• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
2 /*
3  * Copyright © 2000 SuSE, Inc.
4  * Copyright © 2007 Red Hat, Inc.
5  *
6  * Permission to use, copy, modify, distribute, and sell this software and its
7  * documentation for any purpose is hereby granted without fee, provided that
8  * the above copyright notice appear in all copies and that both that
9  * copyright notice and this permission notice appear in supporting
10  * documentation, and that the name of SuSE not be used in advertising or
11  * publicity pertaining to distribution of the software without specific,
12  * written prior permission.  SuSE makes no representations about the
13  * suitability of this software for any purpose.  It is provided "as is"
14  * without express or implied warranty.
15  *
16  * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
18  * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
20  * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
21  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22  *
23  * Author:  Keith Packard, SuSE, Inc.
24  */
25 
26 #ifdef HAVE_CONFIG_H
27 #include <config.h>
28 #endif
29 #include <string.h>
30 #include <stdlib.h>
31 #include "pixman-private.h"
32 #include "pixman-combine32.h"
33 #include "pixman-inlines.h"
34 
35 static force_inline uint32_t
fetch_24(uint8_t * a)36 fetch_24 (uint8_t *a)
37 {
38     if (((uintptr_t)a) & 1)
39     {
40 #ifdef WORDS_BIGENDIAN
41 	return (*a << 16) | (*(uint16_t *)(a + 1));
42 #else
43 	return *a | (*(uint16_t *)(a + 1) << 8);
44 #endif
45     }
46     else
47     {
48 #ifdef WORDS_BIGENDIAN
49 	return (*(uint16_t *)a << 8) | *(a + 2);
50 #else
51 	return *(uint16_t *)a | (*(a + 2) << 16);
52 #endif
53     }
54 }
55 
56 static force_inline void
store_24(uint8_t * a,uint32_t v)57 store_24 (uint8_t *a,
58           uint32_t v)
59 {
60     if (((uintptr_t)a) & 1)
61     {
62 #ifdef WORDS_BIGENDIAN
63 	*a = (uint8_t) (v >> 16);
64 	*(uint16_t *)(a + 1) = (uint16_t) (v);
65 #else
66 	*a = (uint8_t) (v);
67 	*(uint16_t *)(a + 1) = (uint16_t) (v >> 8);
68 #endif
69     }
70     else
71     {
72 #ifdef WORDS_BIGENDIAN
73 	*(uint16_t *)a = (uint16_t)(v >> 8);
74 	*(a + 2) = (uint8_t)v;
75 #else
76 	*(uint16_t *)a = (uint16_t)v;
77 	*(a + 2) = (uint8_t)(v >> 16);
78 #endif
79     }
80 }
81 
82 static force_inline uint32_t
over(uint32_t src,uint32_t dest)83 over (uint32_t src,
84       uint32_t dest)
85 {
86     uint32_t a = ~src >> 24;
87 
88     UN8x4_MUL_UN8_ADD_UN8x4 (dest, a, src);
89 
90     return dest;
91 }
92 
93 static force_inline uint32_t
in(uint32_t x,uint8_t y)94 in (uint32_t x,
95     uint8_t  y)
96 {
97     uint16_t a = y;
98 
99     UN8x4_MUL_UN8 (x, a);
100 
101     return x;
102 }
103 
104 /*
105  * Naming convention:
106  *
107  *  op_src_mask_dest
108  */
109 static void
fast_composite_over_x888_8_8888(pixman_implementation_t * imp,pixman_composite_info_t * info)110 fast_composite_over_x888_8_8888 (pixman_implementation_t *imp,
111                                  pixman_composite_info_t *info)
112 {
113     PIXMAN_COMPOSITE_ARGS (info);
114     uint32_t    *src, *src_line;
115     uint32_t    *dst, *dst_line;
116     uint8_t     *mask, *mask_line;
117     int src_stride, mask_stride, dst_stride;
118     uint8_t m;
119     uint32_t s, d;
120     int32_t w;
121 
122     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
123     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
124     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
125 
126     while (height--)
127     {
128 	src = src_line;
129 	src_line += src_stride;
130 	dst = dst_line;
131 	dst_line += dst_stride;
132 	mask = mask_line;
133 	mask_line += mask_stride;
134 
135 	w = width;
136 	while (w--)
137 	{
138 	    m = *mask++;
139 	    if (m)
140 	    {
141 		s = *src | 0xff000000;
142 
143 		if (m == 0xff)
144 		{
145 		    *dst = s;
146 		}
147 		else
148 		{
149 		    d = in (s, m);
150 		    *dst = over (d, *dst);
151 		}
152 	    }
153 	    src++;
154 	    dst++;
155 	}
156     }
157 }
158 
159 static void
fast_composite_in_n_8_8(pixman_implementation_t * imp,pixman_composite_info_t * info)160 fast_composite_in_n_8_8 (pixman_implementation_t *imp,
161                          pixman_composite_info_t *info)
162 {
163     PIXMAN_COMPOSITE_ARGS (info);
164     uint32_t src, srca;
165     uint8_t     *dst_line, *dst;
166     uint8_t     *mask_line, *mask, m;
167     int dst_stride, mask_stride;
168     int32_t w;
169     uint16_t t;
170 
171     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
172 
173     srca = src >> 24;
174 
175     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
176     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
177 
178     if (srca == 0xff)
179     {
180 	while (height--)
181 	{
182 	    dst = dst_line;
183 	    dst_line += dst_stride;
184 	    mask = mask_line;
185 	    mask_line += mask_stride;
186 	    w = width;
187 
188 	    while (w--)
189 	    {
190 		m = *mask++;
191 
192 		if (m == 0)
193 		    *dst = 0;
194 		else if (m != 0xff)
195 		    *dst = MUL_UN8 (m, *dst, t);
196 
197 		dst++;
198 	    }
199 	}
200     }
201     else
202     {
203 	while (height--)
204 	{
205 	    dst = dst_line;
206 	    dst_line += dst_stride;
207 	    mask = mask_line;
208 	    mask_line += mask_stride;
209 	    w = width;
210 
211 	    while (w--)
212 	    {
213 		m = *mask++;
214 		m = MUL_UN8 (m, srca, t);
215 
216 		if (m == 0)
217 		    *dst = 0;
218 		else if (m != 0xff)
219 		    *dst = MUL_UN8 (m, *dst, t);
220 
221 		dst++;
222 	    }
223 	}
224     }
225 }
226 
227 static void
fast_composite_in_8_8(pixman_implementation_t * imp,pixman_composite_info_t * info)228 fast_composite_in_8_8 (pixman_implementation_t *imp,
229                        pixman_composite_info_t *info)
230 {
231     PIXMAN_COMPOSITE_ARGS (info);
232     uint8_t     *dst_line, *dst;
233     uint8_t     *src_line, *src;
234     int dst_stride, src_stride;
235     int32_t w;
236     uint8_t s;
237     uint16_t t;
238 
239     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
240     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
241 
242     while (height--)
243     {
244 	dst = dst_line;
245 	dst_line += dst_stride;
246 	src = src_line;
247 	src_line += src_stride;
248 	w = width;
249 
250 	while (w--)
251 	{
252 	    s = *src++;
253 
254 	    if (s == 0)
255 		*dst = 0;
256 	    else if (s != 0xff)
257 		*dst = MUL_UN8 (s, *dst, t);
258 
259 	    dst++;
260 	}
261     }
262 }
263 
264 static void
fast_composite_over_n_8_8888(pixman_implementation_t * imp,pixman_composite_info_t * info)265 fast_composite_over_n_8_8888 (pixman_implementation_t *imp,
266                               pixman_composite_info_t *info)
267 {
268     PIXMAN_COMPOSITE_ARGS (info);
269     uint32_t src, srca;
270     uint32_t    *dst_line, *dst, d;
271     uint8_t     *mask_line, *mask, m;
272     int dst_stride, mask_stride;
273     int32_t w;
274 
275     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
276 
277     srca = src >> 24;
278     if (src == 0)
279 	return;
280 
281     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
282     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
283 
284     while (height--)
285     {
286 	dst = dst_line;
287 	dst_line += dst_stride;
288 	mask = mask_line;
289 	mask_line += mask_stride;
290 	w = width;
291 
292 	while (w--)
293 	{
294 	    m = *mask++;
295 	    if (m == 0xff)
296 	    {
297 		if (srca == 0xff)
298 		    *dst = src;
299 		else
300 		    *dst = over (src, *dst);
301 	    }
302 	    else if (m)
303 	    {
304 		d = in (src, m);
305 		*dst = over (d, *dst);
306 	    }
307 	    dst++;
308 	}
309     }
310 }
311 
312 static void
fast_composite_add_n_8888_8888_ca(pixman_implementation_t * imp,pixman_composite_info_t * info)313 fast_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
314 				   pixman_composite_info_t *info)
315 {
316     PIXMAN_COMPOSITE_ARGS (info);
317     uint32_t src, s;
318     uint32_t    *dst_line, *dst, d;
319     uint32_t    *mask_line, *mask, ma;
320     int dst_stride, mask_stride;
321     int32_t w;
322 
323     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
324 
325     if (src == 0)
326 	return;
327 
328     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
329     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
330 
331     while (height--)
332     {
333 	dst = dst_line;
334 	dst_line += dst_stride;
335 	mask = mask_line;
336 	mask_line += mask_stride;
337 	w = width;
338 
339 	while (w--)
340 	{
341 	    ma = *mask++;
342 
343 	    if (ma)
344 	    {
345 		d = *dst;
346 		s = src;
347 
348 		UN8x4_MUL_UN8x4_ADD_UN8x4 (s, ma, d);
349 
350 		*dst = s;
351 	    }
352 
353 	    dst++;
354 	}
355     }
356 }
357 
358 static void
fast_composite_over_n_8888_8888_ca(pixman_implementation_t * imp,pixman_composite_info_t * info)359 fast_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
360                                     pixman_composite_info_t *info)
361 {
362     PIXMAN_COMPOSITE_ARGS (info);
363     uint32_t src, srca, s;
364     uint32_t    *dst_line, *dst, d;
365     uint32_t    *mask_line, *mask, ma;
366     int dst_stride, mask_stride;
367     int32_t w;
368 
369     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
370 
371     srca = src >> 24;
372     if (src == 0)
373 	return;
374 
375     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
376     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
377 
378     while (height--)
379     {
380 	dst = dst_line;
381 	dst_line += dst_stride;
382 	mask = mask_line;
383 	mask_line += mask_stride;
384 	w = width;
385 
386 	while (w--)
387 	{
388 	    ma = *mask++;
389 	    if (ma == 0xffffffff)
390 	    {
391 		if (srca == 0xff)
392 		    *dst = src;
393 		else
394 		    *dst = over (src, *dst);
395 	    }
396 	    else if (ma)
397 	    {
398 		d = *dst;
399 		s = src;
400 
401 		UN8x4_MUL_UN8x4 (s, ma);
402 		UN8x4_MUL_UN8 (ma, srca);
403 		ma = ~ma;
404 		UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
405 
406 		*dst = d;
407 	    }
408 
409 	    dst++;
410 	}
411     }
412 }
413 
414 static void
fast_composite_over_n_8_0888(pixman_implementation_t * imp,pixman_composite_info_t * info)415 fast_composite_over_n_8_0888 (pixman_implementation_t *imp,
416                               pixman_composite_info_t *info)
417 {
418     PIXMAN_COMPOSITE_ARGS (info);
419     uint32_t src, srca;
420     uint8_t     *dst_line, *dst;
421     uint32_t d;
422     uint8_t     *mask_line, *mask, m;
423     int dst_stride, mask_stride;
424     int32_t w;
425 
426     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
427 
428     srca = src >> 24;
429     if (src == 0)
430 	return;
431 
432     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
433     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
434 
435     while (height--)
436     {
437 	dst = dst_line;
438 	dst_line += dst_stride;
439 	mask = mask_line;
440 	mask_line += mask_stride;
441 	w = width;
442 
443 	while (w--)
444 	{
445 	    m = *mask++;
446 	    if (m == 0xff)
447 	    {
448 		if (srca == 0xff)
449 		{
450 		    d = src;
451 		}
452 		else
453 		{
454 		    d = fetch_24 (dst);
455 		    d = over (src, d);
456 		}
457 		store_24 (dst, d);
458 	    }
459 	    else if (m)
460 	    {
461 		d = over (in (src, m), fetch_24 (dst));
462 		store_24 (dst, d);
463 	    }
464 	    dst += 3;
465 	}
466     }
467 }
468 
469 static void
fast_composite_over_n_8_0565(pixman_implementation_t * imp,pixman_composite_info_t * info)470 fast_composite_over_n_8_0565 (pixman_implementation_t *imp,
471                               pixman_composite_info_t *info)
472 {
473     PIXMAN_COMPOSITE_ARGS (info);
474     uint32_t src, srca;
475     uint16_t    *dst_line, *dst;
476     uint32_t d;
477     uint8_t     *mask_line, *mask, m;
478     int dst_stride, mask_stride;
479     int32_t w;
480 
481     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
482 
483     srca = src >> 24;
484     if (src == 0)
485 	return;
486 
487     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
488     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
489 
490     while (height--)
491     {
492 	dst = dst_line;
493 	dst_line += dst_stride;
494 	mask = mask_line;
495 	mask_line += mask_stride;
496 	w = width;
497 
498 	while (w--)
499 	{
500 	    m = *mask++;
501 	    if (m == 0xff)
502 	    {
503 		if (srca == 0xff)
504 		{
505 		    d = src;
506 		}
507 		else
508 		{
509 		    d = *dst;
510 		    d = over (src, convert_0565_to_0888 (d));
511 		}
512 		*dst = convert_8888_to_0565 (d);
513 	    }
514 	    else if (m)
515 	    {
516 		d = *dst;
517 		d = over (in (src, m), convert_0565_to_0888 (d));
518 		*dst = convert_8888_to_0565 (d);
519 	    }
520 	    dst++;
521 	}
522     }
523 }
524 
525 static void
fast_composite_over_n_8888_0565_ca(pixman_implementation_t * imp,pixman_composite_info_t * info)526 fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
527                                     pixman_composite_info_t *info)
528 {
529     PIXMAN_COMPOSITE_ARGS (info);
530     uint32_t  src, srca, s;
531     uint16_t  src16;
532     uint16_t *dst_line, *dst;
533     uint32_t  d;
534     uint32_t *mask_line, *mask, ma;
535     int dst_stride, mask_stride;
536     int32_t w;
537 
538     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
539 
540     srca = src >> 24;
541     if (src == 0)
542 	return;
543 
544     src16 = convert_8888_to_0565 (src);
545 
546     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
547     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
548 
549     while (height--)
550     {
551 	dst = dst_line;
552 	dst_line += dst_stride;
553 	mask = mask_line;
554 	mask_line += mask_stride;
555 	w = width;
556 
557 	while (w--)
558 	{
559 	    ma = *mask++;
560 	    if (ma == 0xffffffff)
561 	    {
562 		if (srca == 0xff)
563 		{
564 		    *dst = src16;
565 		}
566 		else
567 		{
568 		    d = *dst;
569 		    d = over (src, convert_0565_to_0888 (d));
570 		    *dst = convert_8888_to_0565 (d);
571 		}
572 	    }
573 	    else if (ma)
574 	    {
575 		d = *dst;
576 		d = convert_0565_to_0888 (d);
577 
578 		s = src;
579 
580 		UN8x4_MUL_UN8x4 (s, ma);
581 		UN8x4_MUL_UN8 (ma, srca);
582 		ma = ~ma;
583 		UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
584 
585 		*dst = convert_8888_to_0565 (d);
586 	    }
587 	    dst++;
588 	}
589     }
590 }
591 
592 static void
fast_composite_over_8888_8888(pixman_implementation_t * imp,pixman_composite_info_t * info)593 fast_composite_over_8888_8888 (pixman_implementation_t *imp,
594                                pixman_composite_info_t *info)
595 {
596     PIXMAN_COMPOSITE_ARGS (info);
597     uint32_t    *dst_line, *dst;
598     uint32_t    *src_line, *src, s;
599     int dst_stride, src_stride;
600     uint8_t a;
601     int32_t w;
602 
603     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
604     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
605 
606     while (height--)
607     {
608 	dst = dst_line;
609 	dst_line += dst_stride;
610 	src = src_line;
611 	src_line += src_stride;
612 	w = width;
613 
614 	while (w--)
615 	{
616 	    s = *src++;
617 	    a = s >> 24;
618 	    if (a == 0xff)
619 		*dst = s;
620 	    else if (s)
621 		*dst = over (s, *dst);
622 	    dst++;
623 	}
624     }
625 }
626 
627 static void
fast_composite_src_x888_8888(pixman_implementation_t * imp,pixman_composite_info_t * info)628 fast_composite_src_x888_8888 (pixman_implementation_t *imp,
629 			      pixman_composite_info_t *info)
630 {
631     PIXMAN_COMPOSITE_ARGS (info);
632     uint32_t    *dst_line, *dst;
633     uint32_t    *src_line, *src;
634     int dst_stride, src_stride;
635     int32_t w;
636 
637     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
638     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
639 
640     while (height--)
641     {
642 	dst = dst_line;
643 	dst_line += dst_stride;
644 	src = src_line;
645 	src_line += src_stride;
646 	w = width;
647 
648 	while (w--)
649 	    *dst++ = (*src++) | 0xff000000;
650     }
651 }
652 
653 #if 0
654 static void
655 fast_composite_over_8888_0888 (pixman_implementation_t *imp,
656 			       pixman_composite_info_t *info)
657 {
658     PIXMAN_COMPOSITE_ARGS (info);
659     uint8_t     *dst_line, *dst;
660     uint32_t d;
661     uint32_t    *src_line, *src, s;
662     uint8_t a;
663     int dst_stride, src_stride;
664     int32_t w;
665 
666     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
667     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
668 
669     while (height--)
670     {
671 	dst = dst_line;
672 	dst_line += dst_stride;
673 	src = src_line;
674 	src_line += src_stride;
675 	w = width;
676 
677 	while (w--)
678 	{
679 	    s = *src++;
680 	    a = s >> 24;
681 	    if (a)
682 	    {
683 		if (a == 0xff)
684 		    d = s;
685 		else
686 		    d = over (s, fetch_24 (dst));
687 
688 		store_24 (dst, d);
689 	    }
690 	    dst += 3;
691 	}
692     }
693 }
694 #endif
695 
696 static void
fast_composite_over_8888_0565(pixman_implementation_t * imp,pixman_composite_info_t * info)697 fast_composite_over_8888_0565 (pixman_implementation_t *imp,
698                                pixman_composite_info_t *info)
699 {
700     PIXMAN_COMPOSITE_ARGS (info);
701     uint16_t    *dst_line, *dst;
702     uint32_t d;
703     uint32_t    *src_line, *src, s;
704     uint8_t a;
705     int dst_stride, src_stride;
706     int32_t w;
707 
708     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
709     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
710 
711     while (height--)
712     {
713 	dst = dst_line;
714 	dst_line += dst_stride;
715 	src = src_line;
716 	src_line += src_stride;
717 	w = width;
718 
719 	while (w--)
720 	{
721 	    s = *src++;
722 	    a = s >> 24;
723 	    if (s)
724 	    {
725 		if (a == 0xff)
726 		{
727 		    d = s;
728 		}
729 		else
730 		{
731 		    d = *dst;
732 		    d = over (s, convert_0565_to_0888 (d));
733 		}
734 		*dst = convert_8888_to_0565 (d);
735 	    }
736 	    dst++;
737 	}
738     }
739 }
740 
741 static void
fast_composite_add_8_8(pixman_implementation_t * imp,pixman_composite_info_t * info)742 fast_composite_add_8_8 (pixman_implementation_t *imp,
743 			pixman_composite_info_t *info)
744 {
745     PIXMAN_COMPOSITE_ARGS (info);
746     uint8_t     *dst_line, *dst;
747     uint8_t     *src_line, *src;
748     int dst_stride, src_stride;
749     int32_t w;
750     uint8_t s, d;
751     uint16_t t;
752 
753     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
754     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
755 
756     while (height--)
757     {
758 	dst = dst_line;
759 	dst_line += dst_stride;
760 	src = src_line;
761 	src_line += src_stride;
762 	w = width;
763 
764 	while (w--)
765 	{
766 	    s = *src++;
767 	    if (s)
768 	    {
769 		if (s != 0xff)
770 		{
771 		    d = *dst;
772 		    t = d + s;
773 		    s = t | (0 - (t >> 8));
774 		}
775 		*dst = s;
776 	    }
777 	    dst++;
778 	}
779     }
780 }
781 
782 static void
fast_composite_add_0565_0565(pixman_implementation_t * imp,pixman_composite_info_t * info)783 fast_composite_add_0565_0565 (pixman_implementation_t *imp,
784                               pixman_composite_info_t *info)
785 {
786     PIXMAN_COMPOSITE_ARGS (info);
787     uint16_t    *dst_line, *dst;
788     uint32_t	d;
789     uint16_t    *src_line, *src;
790     uint32_t	s;
791     int dst_stride, src_stride;
792     int32_t w;
793 
794     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint16_t, src_stride, src_line, 1);
795     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
796 
797     while (height--)
798     {
799 	dst = dst_line;
800 	dst_line += dst_stride;
801 	src = src_line;
802 	src_line += src_stride;
803 	w = width;
804 
805 	while (w--)
806 	{
807 	    s = *src++;
808 	    if (s)
809 	    {
810 		d = *dst;
811 		s = convert_0565_to_8888 (s);
812 		if (d)
813 		{
814 		    d = convert_0565_to_8888 (d);
815 		    UN8x4_ADD_UN8x4 (s, d);
816 		}
817 		*dst = convert_8888_to_0565 (s);
818 	    }
819 	    dst++;
820 	}
821     }
822 }
823 
824 static void
fast_composite_add_8888_8888(pixman_implementation_t * imp,pixman_composite_info_t * info)825 fast_composite_add_8888_8888 (pixman_implementation_t *imp,
826                               pixman_composite_info_t *info)
827 {
828     PIXMAN_COMPOSITE_ARGS (info);
829     uint32_t    *dst_line, *dst;
830     uint32_t    *src_line, *src;
831     int dst_stride, src_stride;
832     int32_t w;
833     uint32_t s, d;
834 
835     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
836     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
837 
838     while (height--)
839     {
840 	dst = dst_line;
841 	dst_line += dst_stride;
842 	src = src_line;
843 	src_line += src_stride;
844 	w = width;
845 
846 	while (w--)
847 	{
848 	    s = *src++;
849 	    if (s)
850 	    {
851 		if (s != 0xffffffff)
852 		{
853 		    d = *dst;
854 		    if (d)
855 			UN8x4_ADD_UN8x4 (s, d);
856 		}
857 		*dst = s;
858 	    }
859 	    dst++;
860 	}
861     }
862 }
863 
864 static void
fast_composite_add_n_8_8(pixman_implementation_t * imp,pixman_composite_info_t * info)865 fast_composite_add_n_8_8 (pixman_implementation_t *imp,
866 			  pixman_composite_info_t *info)
867 {
868     PIXMAN_COMPOSITE_ARGS (info);
869     uint8_t     *dst_line, *dst;
870     uint8_t     *mask_line, *mask;
871     int dst_stride, mask_stride;
872     int32_t w;
873     uint32_t src;
874     uint8_t sa;
875 
876     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
877     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
878     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
879     sa = (src >> 24);
880 
881     while (height--)
882     {
883 	dst = dst_line;
884 	dst_line += dst_stride;
885 	mask = mask_line;
886 	mask_line += mask_stride;
887 	w = width;
888 
889 	while (w--)
890 	{
891 	    uint16_t tmp;
892 	    uint16_t a;
893 	    uint32_t m, d;
894 	    uint32_t r;
895 
896 	    a = *mask++;
897 	    d = *dst;
898 
899 	    m = MUL_UN8 (sa, a, tmp);
900 	    r = ADD_UN8 (m, d, tmp);
901 
902 	    *dst++ = r;
903 	}
904     }
905 }
906 
907 #ifdef WORDS_BIGENDIAN
908 #define CREATE_BITMASK(n) (0x80000000 >> (n))
909 #define UPDATE_BITMASK(n) ((n) >> 1)
910 #else
911 #define CREATE_BITMASK(n) (1 << (n))
912 #define UPDATE_BITMASK(n) ((n) << 1)
913 #endif
914 
915 #define TEST_BIT(p, n)					\
916     (*((p) + ((n) >> 5)) & CREATE_BITMASK ((n) & 31))
917 #define SET_BIT(p, n)							\
918     do { *((p) + ((n) >> 5)) |= CREATE_BITMASK ((n) & 31); } while (0);
919 
920 static void
fast_composite_add_1_1(pixman_implementation_t * imp,pixman_composite_info_t * info)921 fast_composite_add_1_1 (pixman_implementation_t *imp,
922 			pixman_composite_info_t *info)
923 {
924     PIXMAN_COMPOSITE_ARGS (info);
925     uint32_t     *dst_line, *dst;
926     uint32_t     *src_line, *src;
927     int           dst_stride, src_stride;
928     int32_t       w;
929 
930     PIXMAN_IMAGE_GET_LINE (src_image, 0, src_y, uint32_t,
931                            src_stride, src_line, 1);
932     PIXMAN_IMAGE_GET_LINE (dest_image, 0, dest_y, uint32_t,
933                            dst_stride, dst_line, 1);
934 
935     while (height--)
936     {
937 	dst = dst_line;
938 	dst_line += dst_stride;
939 	src = src_line;
940 	src_line += src_stride;
941 	w = width;
942 
943 	while (w--)
944 	{
945 	    /*
946 	     * TODO: improve performance by processing uint32_t data instead
947 	     *       of individual bits
948 	     */
949 	    if (TEST_BIT (src, src_x + w))
950 		SET_BIT (dst, dest_x + w);
951 	}
952     }
953 }
954 
955 static void
fast_composite_over_n_1_8888(pixman_implementation_t * imp,pixman_composite_info_t * info)956 fast_composite_over_n_1_8888 (pixman_implementation_t *imp,
957                               pixman_composite_info_t *info)
958 {
959     PIXMAN_COMPOSITE_ARGS (info);
960     uint32_t     src, srca;
961     uint32_t    *dst, *dst_line;
962     uint32_t    *mask, *mask_line;
963     int          mask_stride, dst_stride;
964     uint32_t     bitcache, bitmask;
965     int32_t      w;
966 
967     if (width <= 0)
968 	return;
969 
970     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
971     srca = src >> 24;
972     if (src == 0)
973 	return;
974 
975     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t,
976                            dst_stride, dst_line, 1);
977     PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
978                            mask_stride, mask_line, 1);
979     mask_line += mask_x >> 5;
980 
981     if (srca == 0xff)
982     {
983 	while (height--)
984 	{
985 	    dst = dst_line;
986 	    dst_line += dst_stride;
987 	    mask = mask_line;
988 	    mask_line += mask_stride;
989 	    w = width;
990 
991 	    bitcache = *mask++;
992 	    bitmask = CREATE_BITMASK (mask_x & 31);
993 
994 	    while (w--)
995 	    {
996 		if (bitmask == 0)
997 		{
998 		    bitcache = *mask++;
999 		    bitmask = CREATE_BITMASK (0);
1000 		}
1001 		if (bitcache & bitmask)
1002 		    *dst = src;
1003 		bitmask = UPDATE_BITMASK (bitmask);
1004 		dst++;
1005 	    }
1006 	}
1007     }
1008     else
1009     {
1010 	while (height--)
1011 	{
1012 	    dst = dst_line;
1013 	    dst_line += dst_stride;
1014 	    mask = mask_line;
1015 	    mask_line += mask_stride;
1016 	    w = width;
1017 
1018 	    bitcache = *mask++;
1019 	    bitmask = CREATE_BITMASK (mask_x & 31);
1020 
1021 	    while (w--)
1022 	    {
1023 		if (bitmask == 0)
1024 		{
1025 		    bitcache = *mask++;
1026 		    bitmask = CREATE_BITMASK (0);
1027 		}
1028 		if (bitcache & bitmask)
1029 		    *dst = over (src, *dst);
1030 		bitmask = UPDATE_BITMASK (bitmask);
1031 		dst++;
1032 	    }
1033 	}
1034     }
1035 }
1036 
1037 static void
fast_composite_over_n_1_0565(pixman_implementation_t * imp,pixman_composite_info_t * info)1038 fast_composite_over_n_1_0565 (pixman_implementation_t *imp,
1039                               pixman_composite_info_t *info)
1040 {
1041     PIXMAN_COMPOSITE_ARGS (info);
1042     uint32_t     src, srca;
1043     uint16_t    *dst, *dst_line;
1044     uint32_t    *mask, *mask_line;
1045     int          mask_stride, dst_stride;
1046     uint32_t     bitcache, bitmask;
1047     int32_t      w;
1048     uint32_t     d;
1049     uint16_t     src565;
1050 
1051     if (width <= 0)
1052 	return;
1053 
1054     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
1055     srca = src >> 24;
1056     if (src == 0)
1057 	return;
1058 
1059     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t,
1060                            dst_stride, dst_line, 1);
1061     PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
1062                            mask_stride, mask_line, 1);
1063     mask_line += mask_x >> 5;
1064 
1065     if (srca == 0xff)
1066     {
1067 	src565 = convert_8888_to_0565 (src);
1068 	while (height--)
1069 	{
1070 	    dst = dst_line;
1071 	    dst_line += dst_stride;
1072 	    mask = mask_line;
1073 	    mask_line += mask_stride;
1074 	    w = width;
1075 
1076 	    bitcache = *mask++;
1077 	    bitmask = CREATE_BITMASK (mask_x & 31);
1078 
1079 	    while (w--)
1080 	    {
1081 		if (bitmask == 0)
1082 		{
1083 		    bitcache = *mask++;
1084 		    bitmask = CREATE_BITMASK (0);
1085 		}
1086 		if (bitcache & bitmask)
1087 		    *dst = src565;
1088 		bitmask = UPDATE_BITMASK (bitmask);
1089 		dst++;
1090 	    }
1091 	}
1092     }
1093     else
1094     {
1095 	while (height--)
1096 	{
1097 	    dst = dst_line;
1098 	    dst_line += dst_stride;
1099 	    mask = mask_line;
1100 	    mask_line += mask_stride;
1101 	    w = width;
1102 
1103 	    bitcache = *mask++;
1104 	    bitmask = CREATE_BITMASK (mask_x & 31);
1105 
1106 	    while (w--)
1107 	    {
1108 		if (bitmask == 0)
1109 		{
1110 		    bitcache = *mask++;
1111 		    bitmask = CREATE_BITMASK (0);
1112 		}
1113 		if (bitcache & bitmask)
1114 		{
1115 		    d = over (src, convert_0565_to_0888 (*dst));
1116 		    *dst = convert_8888_to_0565 (d);
1117 		}
1118 		bitmask = UPDATE_BITMASK (bitmask);
1119 		dst++;
1120 	    }
1121 	}
1122     }
1123 }
1124 
1125 /*
1126  * Simple bitblt
1127  */
1128 
1129 static void
fast_composite_solid_fill(pixman_implementation_t * imp,pixman_composite_info_t * info)1130 fast_composite_solid_fill (pixman_implementation_t *imp,
1131                            pixman_composite_info_t *info)
1132 {
1133     PIXMAN_COMPOSITE_ARGS (info);
1134     uint32_t src;
1135 
1136     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
1137 
1138     if (dest_image->bits.format == PIXMAN_a1)
1139     {
1140 	src = src >> 31;
1141     }
1142     else if (dest_image->bits.format == PIXMAN_a8)
1143     {
1144 	src = src >> 24;
1145     }
1146     else if (dest_image->bits.format == PIXMAN_r5g6b5 ||
1147              dest_image->bits.format == PIXMAN_b5g6r5)
1148     {
1149 	src = convert_8888_to_0565 (src);
1150     }
1151 
1152     pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride,
1153                  PIXMAN_FORMAT_BPP (dest_image->bits.format),
1154                  dest_x, dest_y,
1155                  width, height,
1156                  src);
1157 }
1158 
1159 static void
fast_composite_src_memcpy(pixman_implementation_t * imp,pixman_composite_info_t * info)1160 fast_composite_src_memcpy (pixman_implementation_t *imp,
1161 			   pixman_composite_info_t *info)
1162 {
1163     PIXMAN_COMPOSITE_ARGS (info);
1164     int bpp = PIXMAN_FORMAT_BPP (dest_image->bits.format) / 8;
1165     uint32_t n_bytes = width * bpp;
1166     int dst_stride, src_stride;
1167     uint8_t    *dst;
1168     uint8_t    *src;
1169 
1170     src_stride = src_image->bits.rowstride * 4;
1171     dst_stride = dest_image->bits.rowstride * 4;
1172 
1173     src = (uint8_t *)src_image->bits.bits + src_y * src_stride + src_x * bpp;
1174     dst = (uint8_t *)dest_image->bits.bits + dest_y * dst_stride + dest_x * bpp;
1175 
1176     while (height--)
1177     {
1178 	memcpy (dst, src, n_bytes);
1179 
1180 	dst += dst_stride;
1181 	src += src_stride;
1182     }
1183 }
1184 
1185 FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, SRC, COVER)
1186 FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, SRC, NONE)
1187 FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, SRC, PAD)
1188 FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, SRC, NORMAL)
1189 FAST_NEAREST (x888_8888_cover, x888, 8888, uint32_t, uint32_t, SRC, COVER)
1190 FAST_NEAREST (x888_8888_pad, x888, 8888, uint32_t, uint32_t, SRC, PAD)
1191 FAST_NEAREST (x888_8888_normal, x888, 8888, uint32_t, uint32_t, SRC, NORMAL)
1192 FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, OVER, COVER)
1193 FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, OVER, NONE)
1194 FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, OVER, PAD)
1195 FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, OVER, NORMAL)
1196 FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, SRC, COVER)
1197 FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, SRC, NONE)
1198 FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD)
1199 FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL)
1200 FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL)
1201 FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER)
1202 FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE)
1203 FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD)
1204 FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL)
1205 
1206 #define REPEAT_MIN_WIDTH    32
1207 
1208 static void
fast_composite_tiled_repeat(pixman_implementation_t * imp,pixman_composite_info_t * info)1209 fast_composite_tiled_repeat (pixman_implementation_t *imp,
1210 			     pixman_composite_info_t *info)
1211 {
1212     PIXMAN_COMPOSITE_ARGS (info);
1213     pixman_composite_func_t func;
1214     pixman_format_code_t mask_format;
1215     uint32_t src_flags, mask_flags;
1216     int32_t sx, sy;
1217     int32_t width_remain;
1218     int32_t num_pixels;
1219     int32_t src_width;
1220     int32_t i, j;
1221     pixman_image_t extended_src_image;
1222     uint32_t extended_src[REPEAT_MIN_WIDTH * 2];
1223     pixman_bool_t need_src_extension;
1224     uint32_t *src_line;
1225     int32_t src_stride;
1226     int32_t src_bpp;
1227     pixman_composite_info_t info2 = *info;
1228 
1229     src_flags = (info->src_flags & ~FAST_PATH_NORMAL_REPEAT) |
1230 		    FAST_PATH_SAMPLES_COVER_CLIP_NEAREST;
1231 
1232     if (mask_image)
1233     {
1234 	mask_format = mask_image->common.extended_format_code;
1235 	mask_flags = info->mask_flags;
1236     }
1237     else
1238     {
1239 	mask_format = PIXMAN_null;
1240 	mask_flags = FAST_PATH_IS_OPAQUE;
1241     }
1242 
1243     _pixman_implementation_lookup_composite (
1244 	imp->toplevel, info->op,
1245 	src_image->common.extended_format_code, src_flags,
1246 	mask_format, mask_flags,
1247 	dest_image->common.extended_format_code, info->dest_flags,
1248 	&imp, &func);
1249 
1250     src_bpp = PIXMAN_FORMAT_BPP (src_image->bits.format);
1251 
1252     if (src_image->bits.width < REPEAT_MIN_WIDTH		&&
1253 	(src_bpp == 32 || src_bpp == 16 || src_bpp == 8)	&&
1254 	!src_image->bits.indexed)
1255     {
1256 	sx = src_x;
1257 	sx = MOD (sx, src_image->bits.width);
1258 	sx += width;
1259 	src_width = 0;
1260 
1261 	while (src_width < REPEAT_MIN_WIDTH && src_width <= sx)
1262 	    src_width += src_image->bits.width;
1263 
1264 	src_stride = (src_width * (src_bpp >> 3) + 3) / (int) sizeof (uint32_t);
1265 
1266 	/* Initialize/validate stack-allocated temporary image */
1267 	_pixman_bits_image_init (&extended_src_image, src_image->bits.format,
1268 				 src_width, 1, &extended_src[0], src_stride,
1269 				 FALSE);
1270 	_pixman_image_validate (&extended_src_image);
1271 
1272 	info2.src_image = &extended_src_image;
1273 	need_src_extension = TRUE;
1274     }
1275     else
1276     {
1277 	src_width = src_image->bits.width;
1278 	need_src_extension = FALSE;
1279     }
1280 
1281     sx = src_x;
1282     sy = src_y;
1283 
1284     while (--height >= 0)
1285     {
1286 	sx = MOD (sx, src_width);
1287 	sy = MOD (sy, src_image->bits.height);
1288 
1289 	if (need_src_extension)
1290 	{
1291 	    if (src_bpp == 32)
1292 	    {
1293 		PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint32_t, src_stride, src_line, 1);
1294 
1295 		for (i = 0; i < src_width; )
1296 		{
1297 		    for (j = 0; j < src_image->bits.width; j++, i++)
1298 			extended_src[i] = src_line[j];
1299 		}
1300 	    }
1301 	    else if (src_bpp == 16)
1302 	    {
1303 		uint16_t *src_line_16;
1304 
1305 		PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint16_t, src_stride,
1306 				       src_line_16, 1);
1307 		src_line = (uint32_t*)src_line_16;
1308 
1309 		for (i = 0; i < src_width; )
1310 		{
1311 		    for (j = 0; j < src_image->bits.width; j++, i++)
1312 			((uint16_t*)extended_src)[i] = ((uint16_t*)src_line)[j];
1313 		}
1314 	    }
1315 	    else if (src_bpp == 8)
1316 	    {
1317 		uint8_t *src_line_8;
1318 
1319 		PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint8_t, src_stride,
1320 				       src_line_8, 1);
1321 		src_line = (uint32_t*)src_line_8;
1322 
1323 		for (i = 0; i < src_width; )
1324 		{
1325 		    for (j = 0; j < src_image->bits.width; j++, i++)
1326 			((uint8_t*)extended_src)[i] = ((uint8_t*)src_line)[j];
1327 		}
1328 	    }
1329 
1330 	    info2.src_y = 0;
1331 	}
1332 	else
1333 	{
1334 	    info2.src_y = sy;
1335 	}
1336 
1337 	width_remain = width;
1338 
1339 	while (width_remain > 0)
1340 	{
1341 	    num_pixels = src_width - sx;
1342 
1343 	    if (num_pixels > width_remain)
1344 		num_pixels = width_remain;
1345 
1346 	    info2.src_x = sx;
1347 	    info2.width = num_pixels;
1348 	    info2.height = 1;
1349 
1350 	    func (imp, &info2);
1351 
1352 	    width_remain -= num_pixels;
1353 	    info2.mask_x += num_pixels;
1354 	    info2.dest_x += num_pixels;
1355 	    sx = 0;
1356 	}
1357 
1358 	sx = src_x;
1359 	sy++;
1360 	info2.mask_x = info->mask_x;
1361 	info2.mask_y++;
1362 	info2.dest_x = info->dest_x;
1363 	info2.dest_y++;
1364     }
1365 
1366     if (need_src_extension)
1367 	_pixman_image_fini (&extended_src_image);
1368 }
1369 
1370 /* Use more unrolling for src_0565_0565 because it is typically CPU bound */
1371 static force_inline void
scaled_nearest_scanline_565_565_SRC(uint16_t * dst,const uint16_t * src,int32_t w,pixman_fixed_t vx,pixman_fixed_t unit_x,pixman_fixed_t max_vx,pixman_bool_t fully_transparent_src)1372 scaled_nearest_scanline_565_565_SRC (uint16_t *       dst,
1373 				     const uint16_t * src,
1374 				     int32_t          w,
1375 				     pixman_fixed_t   vx,
1376 				     pixman_fixed_t   unit_x,
1377 				     pixman_fixed_t   max_vx,
1378 				     pixman_bool_t    fully_transparent_src)
1379 {
1380     uint16_t tmp1, tmp2, tmp3, tmp4;
1381     while ((w -= 4) >= 0)
1382     {
1383 	tmp1 = *(src + pixman_fixed_to_int (vx));
1384 	vx += unit_x;
1385 	tmp2 = *(src + pixman_fixed_to_int (vx));
1386 	vx += unit_x;
1387 	tmp3 = *(src + pixman_fixed_to_int (vx));
1388 	vx += unit_x;
1389 	tmp4 = *(src + pixman_fixed_to_int (vx));
1390 	vx += unit_x;
1391 	*dst++ = tmp1;
1392 	*dst++ = tmp2;
1393 	*dst++ = tmp3;
1394 	*dst++ = tmp4;
1395     }
1396     if (w & 2)
1397     {
1398 	tmp1 = *(src + pixman_fixed_to_int (vx));
1399 	vx += unit_x;
1400 	tmp2 = *(src + pixman_fixed_to_int (vx));
1401 	vx += unit_x;
1402 	*dst++ = tmp1;
1403 	*dst++ = tmp2;
1404     }
1405     if (w & 1)
1406 	*dst = *(src + pixman_fixed_to_int (vx));
1407 }
1408 
1409 FAST_NEAREST_MAINLOOP (565_565_cover_SRC,
1410 		       scaled_nearest_scanline_565_565_SRC,
1411 		       uint16_t, uint16_t, COVER)
1412 FAST_NEAREST_MAINLOOP (565_565_none_SRC,
1413 		       scaled_nearest_scanline_565_565_SRC,
1414 		       uint16_t, uint16_t, NONE)
1415 FAST_NEAREST_MAINLOOP (565_565_pad_SRC,
1416 		       scaled_nearest_scanline_565_565_SRC,
1417 		       uint16_t, uint16_t, PAD)
1418 
1419 static force_inline uint32_t
fetch_nearest(pixman_repeat_t src_repeat,pixman_format_code_t format,uint32_t * src,int x,int src_width)1420 fetch_nearest (pixman_repeat_t src_repeat,
1421 	       pixman_format_code_t format,
1422 	       uint32_t *src, int x, int src_width)
1423 {
1424     if (repeat (src_repeat, &x, src_width))
1425     {
1426 	if (format == PIXMAN_x8r8g8b8 || format == PIXMAN_x8b8g8r8)
1427 	    return *(src + x) | 0xff000000;
1428 	else
1429 	    return *(src + x);
1430     }
1431     else
1432     {
1433 	return 0;
1434     }
1435 }
1436 
1437 static force_inline void
combine_over(uint32_t s,uint32_t * dst)1438 combine_over (uint32_t s, uint32_t *dst)
1439 {
1440     if (s)
1441     {
1442 	uint8_t ia = 0xff - (s >> 24);
1443 
1444 	if (ia)
1445 	    UN8x4_MUL_UN8_ADD_UN8x4 (*dst, ia, s);
1446 	else
1447 	    *dst = s;
1448     }
1449 }
1450 
1451 static force_inline void
combine_src(uint32_t s,uint32_t * dst)1452 combine_src (uint32_t s, uint32_t *dst)
1453 {
1454     *dst = s;
1455 }
1456 
1457 static void
fast_composite_scaled_nearest(pixman_implementation_t * imp,pixman_composite_info_t * info)1458 fast_composite_scaled_nearest (pixman_implementation_t *imp,
1459 			       pixman_composite_info_t *info)
1460 {
1461     PIXMAN_COMPOSITE_ARGS (info);
1462     uint32_t       *dst_line;
1463     uint32_t       *src_line;
1464     int             dst_stride, src_stride;
1465     int		    src_width, src_height;
1466     pixman_repeat_t src_repeat;
1467     pixman_fixed_t unit_x, unit_y;
1468     pixman_format_code_t src_format;
1469     pixman_vector_t v;
1470     pixman_fixed_t vy;
1471 
1472     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
1473     /* pass in 0 instead of src_x and src_y because src_x and src_y need to be
1474      * transformed from destination space to source space
1475      */
1476     PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, uint32_t, src_stride, src_line, 1);
1477 
1478     /* reference point is the center of the pixel */
1479     v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;
1480     v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;
1481     v.vector[2] = pixman_fixed_1;
1482 
1483     if (!pixman_transform_point_3d (src_image->common.transform, &v))
1484 	return;
1485 
1486     unit_x = src_image->common.transform->matrix[0][0];
1487     unit_y = src_image->common.transform->matrix[1][1];
1488 
1489     /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */
1490     v.vector[0] -= pixman_fixed_e;
1491     v.vector[1] -= pixman_fixed_e;
1492 
1493     src_height = src_image->bits.height;
1494     src_width = src_image->bits.width;
1495     src_repeat = src_image->common.repeat;
1496     src_format = src_image->bits.format;
1497 
1498     vy = v.vector[1];
1499     while (height--)
1500     {
1501         pixman_fixed_t vx = v.vector[0];
1502 	int y = pixman_fixed_to_int (vy);
1503 	uint32_t *dst = dst_line;
1504 
1505 	dst_line += dst_stride;
1506 
1507         /* adjust the y location by a unit vector in the y direction
1508          * this is equivalent to transforming y+1 of the destination point to source space */
1509         vy += unit_y;
1510 
1511 	if (!repeat (src_repeat, &y, src_height))
1512 	{
1513 	    if (op == PIXMAN_OP_SRC)
1514 		memset (dst, 0, sizeof (*dst) * width);
1515 	}
1516 	else
1517 	{
1518 	    int w = width;
1519 
1520 	    uint32_t *src = src_line + y * src_stride;
1521 
1522 	    while (w >= 2)
1523 	    {
1524 		uint32_t s1, s2;
1525 		int x1, x2;
1526 
1527 		x1 = pixman_fixed_to_int (vx);
1528 		vx += unit_x;
1529 
1530 		x2 = pixman_fixed_to_int (vx);
1531 		vx += unit_x;
1532 
1533 		w -= 2;
1534 
1535 		s1 = fetch_nearest (src_repeat, src_format, src, x1, src_width);
1536 		s2 = fetch_nearest (src_repeat, src_format, src, x2, src_width);
1537 
1538 		if (op == PIXMAN_OP_OVER)
1539 		{
1540 		    combine_over (s1, dst++);
1541 		    combine_over (s2, dst++);
1542 		}
1543 		else
1544 		{
1545 		    combine_src (s1, dst++);
1546 		    combine_src (s2, dst++);
1547 		}
1548 	    }
1549 
1550 	    while (w--)
1551 	    {
1552 		uint32_t s;
1553 		int x;
1554 
1555 		x = pixman_fixed_to_int (vx);
1556 		vx += unit_x;
1557 
1558 		s = fetch_nearest (src_repeat, src_format, src, x, src_width);
1559 
1560 		if (op == PIXMAN_OP_OVER)
1561 		    combine_over (s, dst++);
1562 		else
1563 		    combine_src (s, dst++);
1564 	    }
1565 	}
1566     }
1567 }
1568 
1569 #define CACHE_LINE_SIZE 64
1570 
1571 #define FAST_SIMPLE_ROTATE(suffix, pix_type)                                  \
1572                                                                               \
1573 static void                                                                   \
1574 blt_rotated_90_trivial_##suffix (pix_type       *dst,                         \
1575 				 int             dst_stride,                  \
1576 				 const pix_type *src,                         \
1577 				 int             src_stride,                  \
1578 				 int             w,                           \
1579 				 int             h)                           \
1580 {                                                                             \
1581     int x, y;                                                                 \
1582     for (y = 0; y < h; y++)                                                   \
1583     {                                                                         \
1584 	const pix_type *s = src + (h - y - 1);                                \
1585 	pix_type *d = dst + dst_stride * y;                                   \
1586 	for (x = 0; x < w; x++)                                               \
1587 	{                                                                     \
1588 	    *d++ = *s;                                                        \
1589 	    s += src_stride;                                                  \
1590 	}                                                                     \
1591     }                                                                         \
1592 }                                                                             \
1593                                                                               \
1594 static void                                                                   \
1595 blt_rotated_270_trivial_##suffix (pix_type       *dst,                        \
1596 				  int             dst_stride,                 \
1597 				  const pix_type *src,                        \
1598 				  int             src_stride,                 \
1599 				  int             w,                          \
1600 				  int             h)                          \
1601 {                                                                             \
1602     int x, y;                                                                 \
1603     for (y = 0; y < h; y++)                                                   \
1604     {                                                                         \
1605 	const pix_type *s = src + src_stride * (w - 1) + y;                   \
1606 	pix_type *d = dst + dst_stride * y;                                   \
1607 	for (x = 0; x < w; x++)                                               \
1608 	{                                                                     \
1609 	    *d++ = *s;                                                        \
1610 	    s -= src_stride;                                                  \
1611 	}                                                                     \
1612     }                                                                         \
1613 }                                                                             \
1614                                                                               \
1615 static void                                                                   \
1616 blt_rotated_90_##suffix (pix_type       *dst,                                 \
1617 			 int             dst_stride,                          \
1618 			 const pix_type *src,                                 \
1619 			 int             src_stride,                          \
1620 			 int             W,                                   \
1621 			 int             H)                                   \
1622 {                                                                             \
1623     int x;                                                                    \
1624     int leading_pixels = 0, trailing_pixels = 0;                              \
1625     const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type);                 \
1626                                                                               \
1627     /*                                                                        \
1628      * split processing into handling destination as TILE_SIZExH cache line   \
1629      * aligned vertical stripes (optimistically assuming that destination     \
1630      * stride is a multiple of cache line, if not - it will be just a bit     \
1631      * slower)                                                                \
1632      */                                                                       \
1633                                                                               \
1634     if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1))                               \
1635     {                                                                         \
1636 	leading_pixels = TILE_SIZE - (((uintptr_t)dst &                       \
1637 			    (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
1638 	if (leading_pixels > W)                                               \
1639 	    leading_pixels = W;                                               \
1640                                                                               \
1641 	/* unaligned leading part NxH (where N < TILE_SIZE) */                \
1642 	blt_rotated_90_trivial_##suffix (                                     \
1643 	    dst,                                                              \
1644 	    dst_stride,                                                       \
1645 	    src,                                                              \
1646 	    src_stride,                                                       \
1647 	    leading_pixels,                                                   \
1648 	    H);                                                               \
1649 	                                                                      \
1650 	dst += leading_pixels;                                                \
1651 	src += leading_pixels * src_stride;                                   \
1652 	W -= leading_pixels;                                                  \
1653     }                                                                         \
1654                                                                               \
1655     if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1))                         \
1656     {                                                                         \
1657 	trailing_pixels = (((uintptr_t)(dst + W) &                            \
1658 			    (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
1659 	if (trailing_pixels > W)                                              \
1660 	    trailing_pixels = W;                                              \
1661 	W -= trailing_pixels;                                                 \
1662     }                                                                         \
1663                                                                               \
1664     for (x = 0; x < W; x += TILE_SIZE)                                        \
1665     {                                                                         \
1666 	/* aligned middle part TILE_SIZExH */                                 \
1667 	blt_rotated_90_trivial_##suffix (                                     \
1668 	    dst + x,                                                          \
1669 	    dst_stride,                                                       \
1670 	    src + src_stride * x,                                             \
1671 	    src_stride,                                                       \
1672 	    TILE_SIZE,                                                        \
1673 	    H);                                                               \
1674     }                                                                         \
1675                                                                               \
1676     if (trailing_pixels)                                                      \
1677     {                                                                         \
1678 	/* unaligned trailing part NxH (where N < TILE_SIZE) */               \
1679 	blt_rotated_90_trivial_##suffix (                                     \
1680 	    dst + W,                                                          \
1681 	    dst_stride,                                                       \
1682 	    src + W * src_stride,                                             \
1683 	    src_stride,                                                       \
1684 	    trailing_pixels,                                                  \
1685 	    H);                                                               \
1686     }                                                                         \
1687 }                                                                             \
1688                                                                               \
1689 static void                                                                   \
1690 blt_rotated_270_##suffix (pix_type       *dst,                                \
1691 			  int             dst_stride,                         \
1692 			  const pix_type *src,                                \
1693 			  int             src_stride,                         \
1694 			  int             W,                                  \
1695 			  int             H)                                  \
1696 {                                                                             \
1697     int x;                                                                    \
1698     int leading_pixels = 0, trailing_pixels = 0;                              \
1699     const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type);                 \
1700                                                                               \
1701     /*                                                                        \
1702      * split processing into handling destination as TILE_SIZExH cache line   \
1703      * aligned vertical stripes (optimistically assuming that destination     \
1704      * stride is a multiple of cache line, if not - it will be just a bit     \
1705      * slower)                                                                \
1706      */                                                                       \
1707                                                                               \
1708     if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1))                               \
1709     {                                                                         \
1710 	leading_pixels = TILE_SIZE - (((uintptr_t)dst &                       \
1711 			    (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
1712 	if (leading_pixels > W)                                               \
1713 	    leading_pixels = W;                                               \
1714                                                                               \
1715 	/* unaligned leading part NxH (where N < TILE_SIZE) */                \
1716 	blt_rotated_270_trivial_##suffix (                                    \
1717 	    dst,                                                              \
1718 	    dst_stride,                                                       \
1719 	    src + src_stride * (W - leading_pixels),                          \
1720 	    src_stride,                                                       \
1721 	    leading_pixels,                                                   \
1722 	    H);                                                               \
1723 	                                                                      \
1724 	dst += leading_pixels;                                                \
1725 	W -= leading_pixels;                                                  \
1726     }                                                                         \
1727                                                                               \
1728     if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1))                         \
1729     {                                                                         \
1730 	trailing_pixels = (((uintptr_t)(dst + W) &                            \
1731 			    (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
1732 	if (trailing_pixels > W)                                              \
1733 	    trailing_pixels = W;                                              \
1734 	W -= trailing_pixels;                                                 \
1735 	src += trailing_pixels * src_stride;                                  \
1736     }                                                                         \
1737                                                                               \
1738     for (x = 0; x < W; x += TILE_SIZE)                                        \
1739     {                                                                         \
1740 	/* aligned middle part TILE_SIZExH */                                 \
1741 	blt_rotated_270_trivial_##suffix (                                    \
1742 	    dst + x,                                                          \
1743 	    dst_stride,                                                       \
1744 	    src + src_stride * (W - x - TILE_SIZE),                           \
1745 	    src_stride,                                                       \
1746 	    TILE_SIZE,                                                        \
1747 	    H);                                                               \
1748     }                                                                         \
1749                                                                               \
1750     if (trailing_pixels)                                                      \
1751     {                                                                         \
1752 	/* unaligned trailing part NxH (where N < TILE_SIZE) */               \
1753 	blt_rotated_270_trivial_##suffix (                                    \
1754 	    dst + W,                                                          \
1755 	    dst_stride,                                                       \
1756 	    src - trailing_pixels * src_stride,                               \
1757 	    src_stride,                                                       \
1758 	    trailing_pixels,                                                  \
1759 	    H);                                                               \
1760     }                                                                         \
1761 }                                                                             \
1762                                                                               \
1763 static void                                                                   \
1764 fast_composite_rotate_90_##suffix (pixman_implementation_t *imp,              \
1765 				   pixman_composite_info_t *info)	      \
1766 {									      \
1767     PIXMAN_COMPOSITE_ARGS (info);					      \
1768     pix_type       *dst_line;						      \
1769     pix_type       *src_line;                                                 \
1770     int             dst_stride, src_stride;                                   \
1771     int             src_x_t, src_y_t;                                         \
1772                                                                               \
1773     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, pix_type,              \
1774 			   dst_stride, dst_line, 1);                          \
1775     src_x_t = -src_y + pixman_fixed_to_int (                                  \
1776 				src_image->common.transform->matrix[0][2] +   \
1777 				pixman_fixed_1 / 2 - pixman_fixed_e) - height;\
1778     src_y_t = src_x + pixman_fixed_to_int (                                   \
1779 				src_image->common.transform->matrix[1][2] +   \
1780 				pixman_fixed_1 / 2 - pixman_fixed_e);         \
1781     PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type,             \
1782 			   src_stride, src_line, 1);                          \
1783     blt_rotated_90_##suffix (dst_line, dst_stride, src_line, src_stride,      \
1784 			     width, height);                                  \
1785 }                                                                             \
1786                                                                               \
1787 static void                                                                   \
1788 fast_composite_rotate_270_##suffix (pixman_implementation_t *imp,             \
1789 				    pixman_composite_info_t *info)            \
1790 {                                                                             \
1791     PIXMAN_COMPOSITE_ARGS (info);					      \
1792     pix_type       *dst_line;						      \
1793     pix_type       *src_line;                                                 \
1794     int             dst_stride, src_stride;                                   \
1795     int             src_x_t, src_y_t;                                         \
1796                                                                               \
1797     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, pix_type,              \
1798 			   dst_stride, dst_line, 1);                          \
1799     src_x_t = src_y + pixman_fixed_to_int (                                   \
1800 				src_image->common.transform->matrix[0][2] +   \
1801 				pixman_fixed_1 / 2 - pixman_fixed_e);         \
1802     src_y_t = -src_x + pixman_fixed_to_int (                                  \
1803 				src_image->common.transform->matrix[1][2] +   \
1804 				pixman_fixed_1 / 2 - pixman_fixed_e) - width; \
1805     PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type,             \
1806 			   src_stride, src_line, 1);                          \
1807     blt_rotated_270_##suffix (dst_line, dst_stride, src_line, src_stride,     \
1808 			      width, height);                                 \
1809 }
1810 
1811 FAST_SIMPLE_ROTATE (8, uint8_t)
1812 FAST_SIMPLE_ROTATE (565, uint16_t)
1813 FAST_SIMPLE_ROTATE (8888, uint32_t)
1814 
1815 static const pixman_fast_path_t c_fast_paths[] =
1816 {
1817     PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, fast_composite_over_n_8_0565),
1818     PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, fast_composite_over_n_8_0565),
1819     PIXMAN_STD_FAST_PATH (OVER, solid, a8, r8g8b8, fast_composite_over_n_8_0888),
1820     PIXMAN_STD_FAST_PATH (OVER, solid, a8, b8g8r8, fast_composite_over_n_8_0888),
1821     PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, fast_composite_over_n_8_8888),
1822     PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, fast_composite_over_n_8_8888),
1823     PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, fast_composite_over_n_8_8888),
1824     PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, fast_composite_over_n_8_8888),
1825     PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8r8g8b8, fast_composite_over_n_1_8888),
1826     PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8r8g8b8, fast_composite_over_n_1_8888),
1827     PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8b8g8r8, fast_composite_over_n_1_8888),
1828     PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8b8g8r8, fast_composite_over_n_1_8888),
1829     PIXMAN_STD_FAST_PATH (OVER, solid, a1, r5g6b5,   fast_composite_over_n_1_0565),
1830     PIXMAN_STD_FAST_PATH (OVER, solid, a1, b5g6r5,   fast_composite_over_n_1_0565),
1831     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, fast_composite_over_n_8888_8888_ca),
1832     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, fast_composite_over_n_8888_8888_ca),
1833     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, fast_composite_over_n_8888_0565_ca),
1834     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, fast_composite_over_n_8888_8888_ca),
1835     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, fast_composite_over_n_8888_8888_ca),
1836     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, fast_composite_over_n_8888_0565_ca),
1837     PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, fast_composite_over_x888_8_8888),
1838     PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, fast_composite_over_x888_8_8888),
1839     PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, fast_composite_over_x888_8_8888),
1840     PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, fast_composite_over_x888_8_8888),
1841     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, fast_composite_over_8888_8888),
1842     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, fast_composite_over_8888_8888),
1843     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, fast_composite_over_8888_0565),
1844     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, fast_composite_over_8888_8888),
1845     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, fast_composite_over_8888_8888),
1846     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, fast_composite_over_8888_0565),
1847     PIXMAN_STD_FAST_PATH (ADD, r5g6b5, null, r5g6b5, fast_composite_add_0565_0565),
1848     PIXMAN_STD_FAST_PATH (ADD, b5g6r5, null, b5g6r5, fast_composite_add_0565_0565),
1849     PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, fast_composite_add_8888_8888),
1850     PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, fast_composite_add_8888_8888),
1851     PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, fast_composite_add_8_8),
1852     PIXMAN_STD_FAST_PATH (ADD, a1, null, a1, fast_composite_add_1_1),
1853     PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, fast_composite_add_n_8888_8888_ca),
1854     PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, fast_composite_add_n_8_8),
1855     PIXMAN_STD_FAST_PATH (SRC, solid, null, a8r8g8b8, fast_composite_solid_fill),
1856     PIXMAN_STD_FAST_PATH (SRC, solid, null, x8r8g8b8, fast_composite_solid_fill),
1857     PIXMAN_STD_FAST_PATH (SRC, solid, null, a8b8g8r8, fast_composite_solid_fill),
1858     PIXMAN_STD_FAST_PATH (SRC, solid, null, x8b8g8r8, fast_composite_solid_fill),
1859     PIXMAN_STD_FAST_PATH (SRC, solid, null, a1, fast_composite_solid_fill),
1860     PIXMAN_STD_FAST_PATH (SRC, solid, null, a8, fast_composite_solid_fill),
1861     PIXMAN_STD_FAST_PATH (SRC, solid, null, r5g6b5, fast_composite_solid_fill),
1862     PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, fast_composite_src_x888_8888),
1863     PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, fast_composite_src_x888_8888),
1864     PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy),
1865     PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, fast_composite_src_memcpy),
1866     PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy),
1867     PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy),
1868     PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, fast_composite_src_memcpy),
1869     PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy),
1870     PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8x8, fast_composite_src_memcpy),
1871     PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8a8, fast_composite_src_memcpy),
1872     PIXMAN_STD_FAST_PATH (SRC, b8g8r8x8, null, b8g8r8x8, fast_composite_src_memcpy),
1873     PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, fast_composite_src_memcpy),
1874     PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, fast_composite_src_memcpy),
1875     PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, fast_composite_src_memcpy),
1876     PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, b8g8r8, fast_composite_src_memcpy),
1877     PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
1878     PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
1879     PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, fast_composite_src_memcpy),
1880     PIXMAN_STD_FAST_PATH (IN, a8, null, a8, fast_composite_in_8_8),
1881     PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8),
1882 
1883     SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888_8888),
1884     SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888_8888),
1885     SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, 8888_8888),
1886     SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, 8888_8888),
1887 
1888     SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888_8888),
1889     SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, 8888_8888),
1890 
1891     SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, 8888_565),
1892     SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, 8888_565),
1893 
1894     SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565),
1895 
1896     SIMPLE_NEAREST_FAST_PATH_COVER (SRC, x8r8g8b8, a8r8g8b8, x888_8888),
1897     SIMPLE_NEAREST_FAST_PATH_COVER (SRC, x8b8g8r8, a8b8g8r8, x888_8888),
1898     SIMPLE_NEAREST_FAST_PATH_PAD (SRC, x8r8g8b8, a8r8g8b8, x888_8888),
1899     SIMPLE_NEAREST_FAST_PATH_PAD (SRC, x8b8g8r8, a8b8g8r8, x888_8888),
1900     SIMPLE_NEAREST_FAST_PATH_NORMAL (SRC, x8r8g8b8, a8r8g8b8, x888_8888),
1901     SIMPLE_NEAREST_FAST_PATH_NORMAL (SRC, x8b8g8r8, a8b8g8r8, x888_8888),
1902 
1903     SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, 8888_8888),
1904     SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, 8888_8888),
1905     SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, 8888_8888),
1906     SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, 8888_8888),
1907 
1908     SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565),
1909 
1910 #define NEAREST_FAST_PATH(op,s,d)		\
1911     {   PIXMAN_OP_ ## op,			\
1912 	PIXMAN_ ## s, SCALED_NEAREST_FLAGS,	\
1913 	PIXMAN_null, 0,				\
1914 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,	\
1915 	fast_composite_scaled_nearest,		\
1916     }
1917 
1918     NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8),
1919     NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8),
1920     NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8),
1921     NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8),
1922 
1923     NEAREST_FAST_PATH (SRC, x8r8g8b8, a8r8g8b8),
1924     NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8),
1925     NEAREST_FAST_PATH (SRC, x8b8g8r8, a8b8g8r8),
1926     NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8),
1927 
1928     NEAREST_FAST_PATH (OVER, x8r8g8b8, x8r8g8b8),
1929     NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8),
1930     NEAREST_FAST_PATH (OVER, x8b8g8r8, x8b8g8r8),
1931     NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8),
1932 
1933     NEAREST_FAST_PATH (OVER, x8r8g8b8, a8r8g8b8),
1934     NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8),
1935     NEAREST_FAST_PATH (OVER, x8b8g8r8, a8b8g8r8),
1936     NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8),
1937 
1938 #define SIMPLE_ROTATE_FLAGS(angle)					  \
1939     (FAST_PATH_ROTATE_ ## angle ## _TRANSFORM	|			  \
1940      FAST_PATH_NEAREST_FILTER			|			  \
1941      FAST_PATH_SAMPLES_COVER_CLIP_NEAREST	|			  \
1942      FAST_PATH_STANDARD_FLAGS)
1943 
1944 #define SIMPLE_ROTATE_FAST_PATH(op,s,d,suffix)				  \
1945     {   PIXMAN_OP_ ## op,						  \
1946 	PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (90),				  \
1947 	PIXMAN_null, 0,							  \
1948 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				  \
1949 	fast_composite_rotate_90_##suffix,				  \
1950     },									  \
1951     {   PIXMAN_OP_ ## op,						  \
1952 	PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (270),			  \
1953 	PIXMAN_null, 0,							  \
1954 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				  \
1955 	fast_composite_rotate_270_##suffix,				  \
1956     }
1957 
1958     SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888),
1959     SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888),
1960     SIMPLE_ROTATE_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888),
1961     SIMPLE_ROTATE_FAST_PATH (SRC, r5g6b5, r5g6b5, 565),
1962     SIMPLE_ROTATE_FAST_PATH (SRC, a8, a8, 8),
1963 
1964     /* Simple repeat fast path entry. */
1965     {	PIXMAN_OP_any,
1966 	PIXMAN_any,
1967 	(FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | FAST_PATH_BITS_IMAGE |
1968 	 FAST_PATH_NORMAL_REPEAT),
1969 	PIXMAN_any, 0,
1970 	PIXMAN_any, FAST_PATH_STD_DEST_FLAGS,
1971 	fast_composite_tiled_repeat
1972     },
1973 
1974     {   PIXMAN_OP_NONE	},
1975 };
1976 
1977 #ifdef WORDS_BIGENDIAN
1978 #define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (32 - (offs) - (n)))
1979 #else
1980 #define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (offs))
1981 #endif
1982 
1983 static force_inline void
pixman_fill1_line(uint32_t * dst,int offs,int width,int v)1984 pixman_fill1_line (uint32_t *dst, int offs, int width, int v)
1985 {
1986     if (offs)
1987     {
1988 	int leading_pixels = 32 - offs;
1989 	if (leading_pixels >= width)
1990 	{
1991 	    if (v)
1992 		*dst |= A1_FILL_MASK (width, offs);
1993 	    else
1994 		*dst &= ~A1_FILL_MASK (width, offs);
1995 	    return;
1996 	}
1997 	else
1998 	{
1999 	    if (v)
2000 		*dst++ |= A1_FILL_MASK (leading_pixels, offs);
2001 	    else
2002 		*dst++ &= ~A1_FILL_MASK (leading_pixels, offs);
2003 	    width -= leading_pixels;
2004 	}
2005     }
2006     while (width >= 32)
2007     {
2008 	if (v)
2009 	    *dst++ = 0xFFFFFFFF;
2010 	else
2011 	    *dst++ = 0;
2012 	width -= 32;
2013     }
2014     if (width > 0)
2015     {
2016 	if (v)
2017 	    *dst |= A1_FILL_MASK (width, 0);
2018 	else
2019 	    *dst &= ~A1_FILL_MASK (width, 0);
2020     }
2021 }
2022 
2023 static void
pixman_fill1(uint32_t * bits,int stride,int x,int y,int width,int height,uint32_t filler)2024 pixman_fill1 (uint32_t *bits,
2025               int       stride,
2026               int       x,
2027               int       y,
2028               int       width,
2029               int       height,
2030               uint32_t  filler)
2031 {
2032     uint32_t *dst = bits + y * stride + (x >> 5);
2033     int offs = x & 31;
2034 
2035     if (filler & 1)
2036     {
2037 	while (height--)
2038 	{
2039 	    pixman_fill1_line (dst, offs, width, 1);
2040 	    dst += stride;
2041 	}
2042     }
2043     else
2044     {
2045 	while (height--)
2046 	{
2047 	    pixman_fill1_line (dst, offs, width, 0);
2048 	    dst += stride;
2049 	}
2050     }
2051 }
2052 
2053 static void
pixman_fill8(uint32_t * bits,int stride,int x,int y,int width,int height,uint32_t filler)2054 pixman_fill8 (uint32_t *bits,
2055               int       stride,
2056               int       x,
2057               int       y,
2058               int       width,
2059               int       height,
2060               uint32_t  filler)
2061 {
2062     int byte_stride = stride * (int) sizeof (uint32_t);
2063     uint8_t *dst = (uint8_t *) bits;
2064     uint8_t v = filler & 0xff;
2065     int i;
2066 
2067     dst = dst + y * byte_stride + x;
2068 
2069     while (height--)
2070     {
2071 	for (i = 0; i < width; ++i)
2072 	    dst[i] = v;
2073 
2074 	dst += byte_stride;
2075     }
2076 }
2077 
2078 static void
pixman_fill16(uint32_t * bits,int stride,int x,int y,int width,int height,uint32_t filler)2079 pixman_fill16 (uint32_t *bits,
2080                int       stride,
2081                int       x,
2082                int       y,
2083                int       width,
2084                int       height,
2085                uint32_t  filler)
2086 {
2087     int short_stride =
2088 	(stride * (int)sizeof (uint32_t)) / (int)sizeof (uint16_t);
2089     uint16_t *dst = (uint16_t *)bits;
2090     uint16_t v = filler & 0xffff;
2091     int i;
2092 
2093     dst = dst + y * short_stride + x;
2094 
2095     while (height--)
2096     {
2097 	for (i = 0; i < width; ++i)
2098 	    dst[i] = v;
2099 
2100 	dst += short_stride;
2101     }
2102 }
2103 
2104 static void
pixman_fill32(uint32_t * bits,int stride,int x,int y,int width,int height,uint32_t filler)2105 pixman_fill32 (uint32_t *bits,
2106                int       stride,
2107                int       x,
2108                int       y,
2109                int       width,
2110                int       height,
2111                uint32_t  filler)
2112 {
2113     int i;
2114 
2115     bits = bits + y * stride + x;
2116 
2117     while (height--)
2118     {
2119 	for (i = 0; i < width; ++i)
2120 	    bits[i] = filler;
2121 
2122 	bits += stride;
2123     }
2124 }
2125 
2126 static pixman_bool_t
fast_path_fill(pixman_implementation_t * imp,uint32_t * bits,int stride,int bpp,int x,int y,int width,int height,uint32_t filler)2127 fast_path_fill (pixman_implementation_t *imp,
2128                 uint32_t *               bits,
2129                 int                      stride,
2130                 int                      bpp,
2131                 int                      x,
2132                 int                      y,
2133                 int                      width,
2134                 int                      height,
2135                 uint32_t		 filler)
2136 {
2137     switch (bpp)
2138     {
2139     case 1:
2140 	pixman_fill1 (bits, stride, x, y, width, height, filler);
2141 	break;
2142 
2143     case 8:
2144 	pixman_fill8 (bits, stride, x, y, width, height, filler);
2145 	break;
2146 
2147     case 16:
2148 	pixman_fill16 (bits, stride, x, y, width, height, filler);
2149 	break;
2150 
2151     case 32:
2152 	pixman_fill32 (bits, stride, x, y, width, height, filler);
2153 	break;
2154 
2155     default:
2156 	return FALSE;
2157     }
2158 
2159     return TRUE;
2160 }
2161 
2162 /*****************************************************************************/
2163 
2164 static uint32_t *
fast_fetch_r5g6b5(pixman_iter_t * iter,const uint32_t * mask)2165 fast_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask)
2166 {
2167     int32_t w = iter->width;
2168     uint32_t *dst = iter->buffer;
2169     const uint16_t *src = (const uint16_t *)iter->bits;
2170 
2171     iter->bits += iter->stride;
2172 
2173     /* Align the source buffer at 4 bytes boundary */
2174     if (w > 0 && ((uintptr_t)src & 3))
2175     {
2176 	*dst++ = convert_0565_to_8888 (*src++);
2177 	w--;
2178     }
2179     /* Process two pixels per iteration */
2180     while ((w -= 2) >= 0)
2181     {
2182 	uint32_t sr, sb, sg, t0, t1;
2183 	uint32_t s = *(const uint32_t *)src;
2184 	src += 2;
2185 	sr = (s >> 8) & 0x00F800F8;
2186 	sb = (s << 3) & 0x00F800F8;
2187 	sg = (s >> 3) & 0x00FC00FC;
2188 	sr |= sr >> 5;
2189 	sb |= sb >> 5;
2190 	sg |= sg >> 6;
2191 	t0 = ((sr << 16) & 0x00FF0000) | ((sg << 8) & 0x0000FF00) |
2192 	     (sb & 0xFF) | 0xFF000000;
2193 	t1 = (sr & 0x00FF0000) | ((sg >> 8) & 0x0000FF00) |
2194 	     (sb >> 16) | 0xFF000000;
2195 #ifdef WORDS_BIGENDIAN
2196 	*dst++ = t1;
2197 	*dst++ = t0;
2198 #else
2199 	*dst++ = t0;
2200 	*dst++ = t1;
2201 #endif
2202     }
2203     if (w & 1)
2204     {
2205 	*dst = convert_0565_to_8888 (*src);
2206     }
2207 
2208     return iter->buffer;
2209 }
2210 
2211 static uint32_t *
fast_dest_fetch_noop(pixman_iter_t * iter,const uint32_t * mask)2212 fast_dest_fetch_noop (pixman_iter_t *iter, const uint32_t *mask)
2213 {
2214     iter->bits += iter->stride;
2215     return iter->buffer;
2216 }
2217 
2218 /* Helper function for a workaround, which tries to ensure that 0x1F001F
2219  * constant is always allocated in a register on RISC architectures.
2220  */
2221 static force_inline uint32_t
convert_8888_to_0565_workaround(uint32_t s,uint32_t x1F001F)2222 convert_8888_to_0565_workaround (uint32_t s, uint32_t x1F001F)
2223 {
2224     uint32_t a, b;
2225     a = (s >> 3) & x1F001F;
2226     b = s & 0xFC00;
2227     a |= a >> 5;
2228     a |= b >> 5;
2229     return a;
2230 }
2231 
2232 static void
fast_write_back_r5g6b5(pixman_iter_t * iter)2233 fast_write_back_r5g6b5 (pixman_iter_t *iter)
2234 {
2235     int32_t w = iter->width;
2236     uint16_t *dst = (uint16_t *)(iter->bits - iter->stride);
2237     const uint32_t *src = iter->buffer;
2238     /* Workaround to ensure that x1F001F variable is allocated in a register */
2239     static volatile uint32_t volatile_x1F001F = 0x1F001F;
2240     uint32_t x1F001F = volatile_x1F001F;
2241 
2242     while ((w -= 4) >= 0)
2243     {
2244 	uint32_t s1 = *src++;
2245 	uint32_t s2 = *src++;
2246 	uint32_t s3 = *src++;
2247 	uint32_t s4 = *src++;
2248 	*dst++ = convert_8888_to_0565_workaround (s1, x1F001F);
2249 	*dst++ = convert_8888_to_0565_workaround (s2, x1F001F);
2250 	*dst++ = convert_8888_to_0565_workaround (s3, x1F001F);
2251 	*dst++ = convert_8888_to_0565_workaround (s4, x1F001F);
2252     }
2253     if (w & 2)
2254     {
2255 	*dst++ = convert_8888_to_0565_workaround (*src++, x1F001F);
2256 	*dst++ = convert_8888_to_0565_workaround (*src++, x1F001F);
2257     }
2258     if (w & 1)
2259     {
2260 	*dst = convert_8888_to_0565_workaround (*src, x1F001F);
2261     }
2262 }
2263 
2264 typedef struct
2265 {
2266     pixman_format_code_t	format;
2267     pixman_iter_get_scanline_t	get_scanline;
2268     pixman_iter_write_back_t	write_back;
2269 } fetcher_info_t;
2270 
2271 static const fetcher_info_t fetchers[] =
2272 {
2273     { PIXMAN_r5g6b5, fast_fetch_r5g6b5, fast_write_back_r5g6b5 },
2274     { PIXMAN_null }
2275 };
2276 
2277 static pixman_bool_t
fast_src_iter_init(pixman_implementation_t * imp,pixman_iter_t * iter)2278 fast_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
2279 {
2280     pixman_image_t *image = iter->image;
2281 
2282 #define FLAGS								\
2283     (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM |		\
2284      FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
2285 
2286     if ((iter->iter_flags & ITER_NARROW)			&&
2287 	(iter->image_flags & FLAGS) == FLAGS)
2288     {
2289 	const fetcher_info_t *f;
2290 
2291 	for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
2292 	{
2293 	    if (image->common.extended_format_code == f->format)
2294 	    {
2295 		uint8_t *b = (uint8_t *)image->bits.bits;
2296 		int s = image->bits.rowstride * 4;
2297 
2298 		iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8;
2299 		iter->stride = s;
2300 
2301 		iter->get_scanline = f->get_scanline;
2302 		return TRUE;
2303 	    }
2304 	}
2305     }
2306 
2307     return FALSE;
2308 }
2309 
2310 static pixman_bool_t
fast_dest_iter_init(pixman_implementation_t * imp,pixman_iter_t * iter)2311 fast_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
2312 {
2313     pixman_image_t *image = iter->image;
2314 
2315     if ((iter->iter_flags & ITER_NARROW)		&&
2316 	(iter->image_flags & FAST_PATH_STD_DEST_FLAGS) == FAST_PATH_STD_DEST_FLAGS)
2317     {
2318 	const fetcher_info_t *f;
2319 
2320 	for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
2321 	{
2322 	    if (image->common.extended_format_code == f->format)
2323 	    {
2324 		uint8_t *b = (uint8_t *)image->bits.bits;
2325 		int s = image->bits.rowstride * 4;
2326 
2327 		iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8;
2328 		iter->stride = s;
2329 
2330 		if ((iter->iter_flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) ==
2331 		    (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA))
2332 		{
2333 		    iter->get_scanline = fast_dest_fetch_noop;
2334 		}
2335 		else
2336 		{
2337 		    iter->get_scanline = f->get_scanline;
2338 		}
2339 		iter->write_back = f->write_back;
2340 		return TRUE;
2341 	    }
2342 	}
2343     }
2344     return FALSE;
2345 }
2346 
2347 
2348 pixman_implementation_t *
_pixman_implementation_create_fast_path(pixman_implementation_t * fallback)2349 _pixman_implementation_create_fast_path (pixman_implementation_t *fallback)
2350 {
2351     pixman_implementation_t *imp = _pixman_implementation_create (fallback, c_fast_paths);
2352 
2353     imp->fill = fast_path_fill;
2354     imp->src_iter_init = fast_src_iter_init;
2355     imp->dest_iter_init = fast_dest_iter_init;
2356 
2357     return imp;
2358 }
2359