• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2009 Nokia Corporation
3  * Copyright © 2010 Movial Creative Technologies Oy
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  * DEALINGS IN THE SOFTWARE.
23  */
24 
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include "utils.h"
29 
30 #define SOLID_FLAG 1
31 #define CA_FLAG    2
32 
33 #define L1CACHE_SIZE (8 * 1024)
34 #define L2CACHE_SIZE (128 * 1024)
35 
36 /* This is applied to both L1 and L2 tests - alternatively, you could
37  * parameterise bench_L or split it into two functions. It could be
38  * read at runtime on some architectures, but it only really matters
39  * that it's a number that's an integer divisor of both cacheline
40  * lengths, and further, it only really matters for caches that don't
41  * do allocate0on-write. */
42 #define CACHELINE_LENGTH (32) /* bytes */
43 
44 #define WIDTH  1920
45 #define HEIGHT 1080
46 #define BUFSIZE (WIDTH * HEIGHT * 4)
47 #define XWIDTH 256
48 #define XHEIGHT 256
49 #define TILEWIDTH 32
50 #define TINYWIDTH 8
51 
52 #define EXCLUDE_OVERHEAD 1
53 
54 uint32_t *dst;
55 uint32_t *src;
56 uint32_t *mask;
57 
58 double bandwidth = 0.0;
59 
60 double
bench_memcpy()61 bench_memcpy ()
62 {
63     int64_t n = 0, total;
64     double  t1, t2;
65     int     x = 0;
66 
67     t1 = gettime ();
68     while (1)
69     {
70 	memcpy (dst, src, BUFSIZE - 64);
71 	memcpy (src, dst, BUFSIZE - 64);
72 	n += 4 * (BUFSIZE - 64);
73 	t2 = gettime ();
74 	if (t2 - t1 > 0.5)
75 	    break;
76     }
77     n = total = n * 5;
78     t1 = gettime ();
79     while (n > 0)
80     {
81 	if (++x >= 64)
82 	    x = 0;
83 	memcpy ((char *)dst + 1, (char *)src + x, BUFSIZE - 64);
84 	memcpy ((char *)src + 1, (char *)dst + x, BUFSIZE - 64);
85 	n -= 4 * (BUFSIZE - 64);
86     }
87     t2 = gettime ();
88     return (double)total / (t2 - t1);
89 }
90 
91 static pixman_bool_t use_scaling = FALSE;
92 static pixman_filter_t filter = PIXMAN_FILTER_NEAREST;
93 static pixman_bool_t use_csv_output = FALSE;
94 
95 /* nearly 1x scale factor */
96 static pixman_transform_t m =
97 {
98     {
99         { pixman_fixed_1 + 1, 0,              0              },
100         { 0,                  pixman_fixed_1, 0              },
101         { 0,                  0,              pixman_fixed_1 }
102     }
103 };
104 
105 static void
pixman_image_composite_wrapper(pixman_implementation_t * impl,pixman_composite_info_t * info)106 pixman_image_composite_wrapper (pixman_implementation_t *impl,
107 				pixman_composite_info_t *info)
108 {
109     if (use_scaling)
110     {
111         pixman_image_set_filter (info->src_image, filter, NULL, 0);
112         pixman_image_set_transform(info->src_image, &m);
113     }
114     pixman_image_composite (info->op,
115 			    info->src_image, info->mask_image, info->dest_image,
116 			    info->src_x, info->src_y,
117 			    info->mask_x, info->mask_y,
118 			    info->dest_x, info->dest_y,
119 			    info->width, info->height);
120 }
121 
122 static void
pixman_image_composite_empty(pixman_implementation_t * impl,pixman_composite_info_t * info)123 pixman_image_composite_empty (pixman_implementation_t *impl,
124 			      pixman_composite_info_t *info)
125 {
126     if (use_scaling)
127     {
128         pixman_image_set_filter (info->src_image, filter, NULL, 0);
129         pixman_image_set_transform(info->src_image, &m);
130     }
131     pixman_image_composite (info->op,
132 			    info->src_image, info->mask_image, info->dest_image,
133 			    0, 0, 0, 0, 0, 0, 1, 1);
134 }
135 
136 static inline void
call_func(pixman_composite_func_t func,pixman_op_t op,pixman_image_t * src_image,pixman_image_t * mask_image,pixman_image_t * dest_image,int32_t src_x,int32_t src_y,int32_t mask_x,int32_t mask_y,int32_t dest_x,int32_t dest_y,int32_t width,int32_t height)137 call_func (pixman_composite_func_t func,
138 	   pixman_op_t             op,
139 	   pixman_image_t *        src_image,
140 	   pixman_image_t *        mask_image,
141 	   pixman_image_t *        dest_image,
142 	   int32_t		   src_x,
143 	   int32_t		   src_y,
144 	   int32_t                 mask_x,
145 	   int32_t                 mask_y,
146 	   int32_t                 dest_x,
147 	   int32_t                 dest_y,
148 	   int32_t                 width,
149 	   int32_t                 height)
150 {
151     pixman_composite_info_t info;
152 
153     info.op = op;
154     info.src_image = src_image;
155     info.mask_image = mask_image;
156     info.dest_image = dest_image;
157     info.src_x = src_x;
158     info.src_y = src_y;
159     info.mask_x = mask_x;
160     info.mask_y = mask_y;
161     info.dest_x = dest_x;
162     info.dest_y = dest_y;
163     info.width = width;
164     info.height = height;
165 
166     func (0, &info);
167 }
168 
169 double
170 noinline
bench_L(pixman_op_t op,pixman_image_t * src_img,pixman_image_t * mask_img,pixman_image_t * dst_img,int64_t n,pixman_composite_func_t func,int width,int lines_count)171 bench_L  (pixman_op_t              op,
172           pixman_image_t *         src_img,
173           pixman_image_t *         mask_img,
174           pixman_image_t *         dst_img,
175           int64_t                  n,
176           pixman_composite_func_t  func,
177           int                      width,
178           int                      lines_count)
179 {
180     int64_t      i, j, k;
181     int          x = 0;
182     int          q = 0;
183 
184     for (i = 0; i < n; i++)
185     {
186         /* For caches without allocate-on-write, we need to force the
187          * destination buffer back into the cache on each iteration,
188          * otherwise if they are evicted during the test, they remain
189          * uncached. This doesn't matter for tests which read the
190          * destination buffer, or for caches that do allocate-on-write,
191          * but in those cases this loop just adds constant time, which
192          * should be successfully cancelled out.
193          */
194         for (j = 0; j < lines_count; j++)
195         {
196             for (k = 0; k < width + 62; k += CACHELINE_LENGTH / sizeof *dst)
197             {
198                 q += dst[j * WIDTH + k];
199             }
200             q += dst[j * WIDTH + width + 62];
201         }
202 	if (++x >= 64)
203 	    x = 0;
204 	call_func (func, op, src_img, mask_img, dst_img, x, 0, x, 0, 63 - x, 0, width, lines_count);
205     }
206 
207     return (double)n * lines_count * width;
208 }
209 
210 double
211 noinline
bench_M(pixman_op_t op,pixman_image_t * src_img,pixman_image_t * mask_img,pixman_image_t * dst_img,int64_t n,pixman_composite_func_t func)212 bench_M (pixman_op_t              op,
213          pixman_image_t *         src_img,
214          pixman_image_t *         mask_img,
215          pixman_image_t *         dst_img,
216          int64_t                  n,
217          pixman_composite_func_t  func)
218 {
219     int64_t i;
220     int     x = 0;
221 
222     for (i = 0; i < n; i++)
223     {
224 	if (++x >= 64)
225 	    x = 0;
226 	call_func (func, op, src_img, mask_img, dst_img, x, 0, x, 0, 1, 0, WIDTH - 64, HEIGHT);
227     }
228 
229     return (double)n * (WIDTH - 64) * HEIGHT;
230 }
231 
232 double
233 noinline
bench_HT(pixman_op_t op,pixman_image_t * src_img,pixman_image_t * mask_img,pixman_image_t * dst_img,int64_t n,pixman_composite_func_t func)234 bench_HT (pixman_op_t              op,
235           pixman_image_t *         src_img,
236           pixman_image_t *         mask_img,
237           pixman_image_t *         dst_img,
238           int64_t                  n,
239           pixman_composite_func_t  func)
240 {
241     double  pix_cnt = 0;
242     int     x = 0;
243     int     y = 0;
244     int64_t i;
245 
246     srand (0);
247     for (i = 0; i < n; i++)
248     {
249 	int w = (rand () % (TILEWIDTH * 2)) + 1;
250 	int h = (rand () % (TILEWIDTH * 2)) + 1;
251 	if (x + w > WIDTH)
252 	{
253 	    x = 0;
254 	    y += TILEWIDTH * 2;
255 	}
256 	if (y + h > HEIGHT)
257 	{
258 	    y = 0;
259 	}
260 	call_func (func, op, src_img, mask_img, dst_img, x, y, x, y, x, y, w, h);
261 	x += w;
262 	pix_cnt += w * h;
263     }
264     return pix_cnt;
265 }
266 
267 double
268 noinline
bench_VT(pixman_op_t op,pixman_image_t * src_img,pixman_image_t * mask_img,pixman_image_t * dst_img,int64_t n,pixman_composite_func_t func)269 bench_VT (pixman_op_t              op,
270           pixman_image_t *         src_img,
271           pixman_image_t *         mask_img,
272           pixman_image_t *         dst_img,
273           int64_t                  n,
274           pixman_composite_func_t  func)
275 {
276     double  pix_cnt = 0;
277     int     x = 0;
278     int     y = 0;
279     int64_t i;
280 
281     srand (0);
282     for (i = 0; i < n; i++)
283     {
284 	int w = (rand () % (TILEWIDTH * 2)) + 1;
285 	int h = (rand () % (TILEWIDTH * 2)) + 1;
286 	if (y + h > HEIGHT)
287 	{
288 	    y = 0;
289 	    x += TILEWIDTH * 2;
290 	}
291 	if (x + w > WIDTH)
292 	{
293 	    x = 0;
294 	}
295 	call_func (func, op, src_img, mask_img, dst_img, x, y, x, y, x, y, w, h);
296 	y += h;
297 	pix_cnt += w * h;
298     }
299     return pix_cnt;
300 }
301 
302 double
303 noinline
bench_R(pixman_op_t op,pixman_image_t * src_img,pixman_image_t * mask_img,pixman_image_t * dst_img,int64_t n,pixman_composite_func_t func,int maxw,int maxh)304 bench_R (pixman_op_t              op,
305          pixman_image_t *         src_img,
306          pixman_image_t *         mask_img,
307          pixman_image_t *         dst_img,
308          int64_t                  n,
309          pixman_composite_func_t  func,
310          int                      maxw,
311          int                      maxh)
312 {
313     double  pix_cnt = 0;
314     int64_t i;
315 
316     if (maxw <= TILEWIDTH * 2 || maxh <= TILEWIDTH * 2)
317     {
318 	printf("error: maxw <= TILEWIDTH * 2 || maxh <= TILEWIDTH * 2\n");
319         return 0;
320     }
321 
322     srand (0);
323     for (i = 0; i < n; i++)
324     {
325 	int w = (rand () % (TILEWIDTH * 2)) + 1;
326 	int h = (rand () % (TILEWIDTH * 2)) + 1;
327 	int sx = rand () % (maxw - TILEWIDTH * 2);
328 	int sy = rand () % (maxh - TILEWIDTH * 2);
329 	int dx = rand () % (maxw - TILEWIDTH * 2);
330 	int dy = rand () % (maxh - TILEWIDTH * 2);
331 	call_func (func, op, src_img, mask_img, dst_img, sx, sy, sx, sy, dx, dy, w, h);
332 	pix_cnt += w * h;
333     }
334     return pix_cnt;
335 }
336 
337 double
338 noinline
bench_RT(pixman_op_t op,pixman_image_t * src_img,pixman_image_t * mask_img,pixman_image_t * dst_img,int64_t n,pixman_composite_func_t func,int maxw,int maxh)339 bench_RT (pixman_op_t              op,
340           pixman_image_t *         src_img,
341           pixman_image_t *         mask_img,
342           pixman_image_t *         dst_img,
343           int64_t                  n,
344           pixman_composite_func_t  func,
345           int                      maxw,
346           int                      maxh)
347 {
348     double  pix_cnt = 0;
349     int64_t i;
350 
351     if (maxw <= TINYWIDTH * 2 || maxh <= TINYWIDTH * 2)
352     {
353 	printf("error: maxw <= TINYWIDTH * 2 || maxh <= TINYWIDTH * 2\n");
354         return 0;
355     }
356 
357     srand (0);
358     for (i = 0; i < n; i++)
359     {
360 	int w = (rand () % (TINYWIDTH * 2)) + 1;
361 	int h = (rand () % (TINYWIDTH * 2)) + 1;
362 	int sx = rand () % (maxw - TINYWIDTH * 2);
363 	int sy = rand () % (maxh - TINYWIDTH * 2);
364 	int dx = rand () % (maxw - TINYWIDTH * 2);
365 	int dy = rand () % (maxh - TINYWIDTH * 2);
366 	call_func (func, op, src_img, mask_img, dst_img, sx, sy, sx, sy, dx, dy, w, h);
367 	pix_cnt += w * h;
368     }
369     return pix_cnt;
370 }
371 
372 static double
Mpx_per_sec(double pix_cnt,double t1,double t2,double t3)373 Mpx_per_sec (double pix_cnt, double t1, double t2, double t3)
374 {
375     double overhead = t2 - t1;
376     double testtime = t3 - t2;
377 
378     return pix_cnt / (testtime - overhead) / 1e6;
379 }
380 
381 void
bench_composite(const char * testname,int src_fmt,int src_flags,int op,int mask_fmt,int mask_flags,int dst_fmt,double npix)382 bench_composite (const char *testname,
383                  int         src_fmt,
384                  int         src_flags,
385                  int         op,
386                  int         mask_fmt,
387                  int         mask_flags,
388                  int         dst_fmt,
389                  double      npix)
390 {
391     pixman_image_t *                src_img;
392     pixman_image_t *                dst_img;
393     pixman_image_t *                mask_img;
394     pixman_image_t *                xsrc_img;
395     pixman_image_t *                xdst_img;
396     pixman_image_t *                xmask_img;
397     double                          t1, t2, t3, pix_cnt;
398     int64_t                         n, l1test_width, nlines;
399     double                             bytes_per_pix = 0;
400     pixman_bool_t                   bench_pixbuf = FALSE;
401 
402     pixman_composite_func_t func = pixman_image_composite_wrapper;
403 
404     if (!(src_flags & SOLID_FLAG))
405     {
406         bytes_per_pix += (src_fmt >> 24) / 8.0;
407         src_img = pixman_image_create_bits (src_fmt,
408                                             WIDTH, HEIGHT,
409                                             src,
410                                             WIDTH * 4);
411         xsrc_img = pixman_image_create_bits (src_fmt,
412                                              XWIDTH, XHEIGHT,
413                                              src,
414                                              XWIDTH * 4);
415     }
416     else
417     {
418         src_img = pixman_image_create_bits (src_fmt,
419                                             1, 1,
420                                             src,
421                                             4);
422         xsrc_img = pixman_image_create_bits (src_fmt,
423                                              1, 1,
424                                              src,
425                                              4);
426         pixman_image_set_repeat (src_img, PIXMAN_REPEAT_NORMAL);
427         pixman_image_set_repeat (xsrc_img, PIXMAN_REPEAT_NORMAL);
428     }
429 
430     bytes_per_pix += (dst_fmt >> 24) / 8.0;
431     dst_img = pixman_image_create_bits (dst_fmt,
432                                         WIDTH, HEIGHT,
433                                         dst,
434                                         WIDTH * 4);
435 
436     mask_img = NULL;
437     xmask_img = NULL;
438     if (strcmp (testname, "pixbuf") == 0 || strcmp (testname, "rpixbuf") == 0)
439     {
440         bench_pixbuf = TRUE;
441     }
442     if (!(mask_flags & SOLID_FLAG) && mask_fmt != PIXMAN_null)
443     {
444         bytes_per_pix += (mask_fmt >> 24) / ((op == PIXMAN_OP_SRC) ? 8.0 : 4.0);
445         mask_img = pixman_image_create_bits (mask_fmt,
446                                              WIDTH, HEIGHT,
447                                              bench_pixbuf ? src : mask,
448                                              WIDTH * 4);
449         xmask_img = pixman_image_create_bits (mask_fmt,
450                                              XWIDTH, XHEIGHT,
451                                              bench_pixbuf ? src : mask,
452                                              XWIDTH * 4);
453     }
454     else if (mask_fmt != PIXMAN_null)
455     {
456         mask_img = pixman_image_create_bits (mask_fmt,
457                                              1, 1,
458                                              mask,
459                                              4);
460         xmask_img = pixman_image_create_bits (mask_fmt,
461                                              1, 1,
462                                              mask,
463                                              4 * 4);
464        pixman_image_set_repeat (mask_img, PIXMAN_REPEAT_NORMAL);
465        pixman_image_set_repeat (xmask_img, PIXMAN_REPEAT_NORMAL);
466     }
467     if ((mask_flags & CA_FLAG) && mask_fmt != PIXMAN_null)
468     {
469        pixman_image_set_component_alpha (mask_img, 1);
470     }
471     xdst_img = pixman_image_create_bits (dst_fmt,
472                                          XWIDTH, XHEIGHT,
473                                          dst,
474                                          XWIDTH * 4);
475 
476     if (!use_csv_output)
477         printf ("%24s %c", testname, func != pixman_image_composite_wrapper ?
478                 '-' : '=');
479 
480     memcpy (dst, src, BUFSIZE);
481     memcpy (src, dst, BUFSIZE);
482 
483     l1test_width = L1CACHE_SIZE / 8 - 64;
484     if (l1test_width < 1)
485 	l1test_width = 1;
486     if (l1test_width > WIDTH - 64)
487 	l1test_width = WIDTH - 64;
488     n = 1 + npix / (l1test_width * 8);
489     t1 = gettime ();
490 #if EXCLUDE_OVERHEAD
491     pix_cnt = bench_L (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty, l1test_width, 1);
492 #endif
493     t2 = gettime ();
494     pix_cnt = bench_L (op, src_img, mask_img, dst_img, n, func, l1test_width, 1);
495     t3 = gettime ();
496     if (use_csv_output)
497         printf ("%g,", Mpx_per_sec (pix_cnt, t1, t2, t3));
498     else
499         printf ("  L1:%7.2f", Mpx_per_sec (pix_cnt, t1, t2, t3));
500     fflush (stdout);
501 
502     memcpy (dst, src, BUFSIZE);
503     memcpy (src, dst, BUFSIZE);
504 
505     nlines = (L2CACHE_SIZE / l1test_width) /
506 	((PIXMAN_FORMAT_BPP(src_fmt) + PIXMAN_FORMAT_BPP(dst_fmt)) / 8);
507     if (nlines < 1)
508 	nlines = 1;
509     n = 1 + npix / (l1test_width * nlines);
510     t1 = gettime ();
511 #if EXCLUDE_OVERHEAD
512     pix_cnt = bench_L (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty, l1test_width, nlines);
513 #endif
514     t2 = gettime ();
515     pix_cnt = bench_L (op, src_img, mask_img, dst_img, n, func, l1test_width, nlines);
516     t3 = gettime ();
517     if (use_csv_output)
518         printf ("%g,", Mpx_per_sec (pix_cnt, t1, t2, t3));
519     else
520         printf ("  L2:%7.2f", Mpx_per_sec (pix_cnt, t1, t2, t3));
521     fflush (stdout);
522 
523     memcpy (dst, src, BUFSIZE);
524     memcpy (src, dst, BUFSIZE);
525 
526     n = 1 + npix / (WIDTH * HEIGHT);
527     t1 = gettime ();
528 #if EXCLUDE_OVERHEAD
529     pix_cnt = bench_M (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty);
530 #endif
531     t2 = gettime ();
532     pix_cnt = bench_M (op, src_img, mask_img, dst_img, n, func);
533     t3 = gettime ();
534     if (use_csv_output)
535         printf ("%g,", Mpx_per_sec (pix_cnt, t1, t2, t3));
536     else
537         printf ("  M:%6.2f (%6.2f%%)", Mpx_per_sec (pix_cnt, t1, t2, t3),
538                 (pix_cnt / ((t3 - t2) - (t2 - t1)) * bytes_per_pix) * (100.0 / bandwidth) );
539     fflush (stdout);
540 
541     memcpy (dst, src, BUFSIZE);
542     memcpy (src, dst, BUFSIZE);
543 
544     n = 1 + npix / (8 * TILEWIDTH * TILEWIDTH);
545     t1 = gettime ();
546 #if EXCLUDE_OVERHEAD
547     pix_cnt = bench_HT (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty);
548 #endif
549     t2 = gettime ();
550     pix_cnt = bench_HT (op, src_img, mask_img, dst_img, n, func);
551     t3 = gettime ();
552     if (use_csv_output)
553         printf ("%g,", Mpx_per_sec (pix_cnt, t1, t2, t3));
554     else
555         printf ("  HT:%6.2f", Mpx_per_sec (pix_cnt, t1, t2, t3));
556     fflush (stdout);
557 
558     memcpy (dst, src, BUFSIZE);
559     memcpy (src, dst, BUFSIZE);
560 
561     n = 1 + npix / (8 * TILEWIDTH * TILEWIDTH);
562     t1 = gettime ();
563 #if EXCLUDE_OVERHEAD
564     pix_cnt = bench_VT (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty);
565 #endif
566     t2 = gettime ();
567     pix_cnt = bench_VT (op, src_img, mask_img, dst_img, n, func);
568     t3 = gettime ();
569     if (use_csv_output)
570         printf ("%g,", Mpx_per_sec (pix_cnt, t1, t2, t3));
571     else
572         printf ("  VT:%6.2f", Mpx_per_sec (pix_cnt, t1, t2, t3));
573     fflush (stdout);
574 
575     memcpy (dst, src, BUFSIZE);
576     memcpy (src, dst, BUFSIZE);
577 
578     n = 1 + npix / (8 * TILEWIDTH * TILEWIDTH);
579     t1 = gettime ();
580 #if EXCLUDE_OVERHEAD
581     pix_cnt = bench_R (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty, WIDTH, HEIGHT);
582 #endif
583     t2 = gettime ();
584     pix_cnt = bench_R (op, src_img, mask_img, dst_img, n, func, WIDTH, HEIGHT);
585     t3 = gettime ();
586     if (use_csv_output)
587         printf ("%g,", Mpx_per_sec (pix_cnt, t1, t2, t3));
588     else
589         printf ("  R:%6.2f", Mpx_per_sec (pix_cnt, t1, t2, t3));
590     fflush (stdout);
591 
592     memcpy (dst, src, BUFSIZE);
593     memcpy (src, dst, BUFSIZE);
594 
595     n = 1 + npix / (16 * TINYWIDTH * TINYWIDTH);
596     t1 = gettime ();
597 #if EXCLUDE_OVERHEAD
598     pix_cnt = bench_RT (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty, WIDTH, HEIGHT);
599 #endif
600     t2 = gettime ();
601     pix_cnt = bench_RT (op, src_img, mask_img, dst_img, n, func, WIDTH, HEIGHT);
602     t3 = gettime ();
603     if (use_csv_output)
604         printf ("%g\n", Mpx_per_sec (pix_cnt, t1, t2, t3));
605     else
606         printf ("  RT:%6.2f (%4.0fKops/s)\n", Mpx_per_sec (pix_cnt, t1, t2, t3), (double) n / ((t3 - t2) * 1000));
607 
608     if (mask_img) {
609 	pixman_image_unref (mask_img);
610 	pixman_image_unref (xmask_img);
611     }
612     pixman_image_unref (src_img);
613     pixman_image_unref (dst_img);
614     pixman_image_unref (xsrc_img);
615     pixman_image_unref (xdst_img);
616 }
617 
618 #define PIXMAN_OP_OUT_REV (PIXMAN_OP_OUT_REVERSE)
619 
620 struct test_entry
621 {
622     const char *testname;
623     int         src_fmt;
624     int         src_flags;
625     int         op;
626     int         mask_fmt;
627     int         mask_flags;
628     int         dst_fmt;
629 };
630 
631 typedef struct test_entry test_entry_t;
632 
633 static const test_entry_t tests_tbl[] =
634 {
635     { "add_8_8_8",             PIXMAN_a8,          0, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_a8 },
636     { "add_n_8_8",             PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_a8 },
637     { "add_n_8_8888",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_a8r8g8b8 },
638     { "add_n_8_x888",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_x8r8g8b8 },
639     { "add_n_8_0565",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_r5g6b5 },
640     { "add_n_8_1555",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_a1r5g5b5 },
641     { "add_n_8_4444",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_a4r4g4b4 },
642     { "add_n_8_2222",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_a2r2g2b2 },
643     { "add_n_8_2x10",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_x2r10g10b10 },
644     { "add_n_8_2a10",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_a8,       0, PIXMAN_a2r10g10b10 },
645     { "add_n_8",               PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a8 },
646     { "add_n_8888",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a8r8g8b8 },
647     { "add_n_x888",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_x8r8g8b8 },
648     { "add_n_0565",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
649     { "add_n_1555",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a1r5g5b5 },
650     { "add_n_4444",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a4r4g4b4 },
651     { "add_n_2222",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a2r2g2b2 },
652     { "add_n_2x10",            PIXMAN_a2r10g10b10, 1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_x2r10g10b10 },
653     { "add_n_2a10",            PIXMAN_a2r10g10b10, 1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a2r10g10b10 },
654     { "add_8_8",               PIXMAN_a8,          0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a8 },
655     { "add_x888_x888",         PIXMAN_x8r8g8b8,    0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_x8r8g8b8 },
656     { "add_8888_8888",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a8r8g8b8 },
657     { "add_8888_0565",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
658     { "add_8888_1555",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a1r5g5b5 },
659     { "add_8888_4444",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a4r4g4b4 },
660     { "add_8888_2222",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a2r2g2b2 },
661     { "add_0565_0565",         PIXMAN_r5g6b5,      0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
662     { "add_1555_1555",         PIXMAN_a1r5g5b5,    0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a1r5g5b5 },
663     { "add_0565_2x10",         PIXMAN_r5g6b5,      0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_x2r10g10b10 },
664     { "add_2a10_2a10",         PIXMAN_a2r10g10b10, 0, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a2r10g10b10 },
665     { "in_n_8_8",              PIXMAN_a8r8g8b8,    1, PIXMAN_OP_IN,      PIXMAN_a8,       0, PIXMAN_a8 },
666     { "in_8_8",                PIXMAN_a8,          0, PIXMAN_OP_IN,      PIXMAN_null,     0, PIXMAN_a8 },
667     { "src_n_2222",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a2r2g2b2 },
668     { "src_n_0565",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
669     { "src_n_1555",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a1r5g5b5 },
670     { "src_n_4444",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a4r4g4b4 },
671     { "src_n_x888",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_x8r8g8b8 },
672     { "src_n_8888",            PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a8r8g8b8 },
673     { "src_n_2x10",            PIXMAN_a2r10g10b10, 1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_x2r10g10b10 },
674     { "src_n_2a10",            PIXMAN_a2r10g10b10, 1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a2r10g10b10 },
675     { "src_8888_0565",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
676     { "src_0565_8888",         PIXMAN_r5g6b5,      0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a8r8g8b8 },
677     { "src_8888_4444",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a4r4g4b4 },
678     { "src_8888_2222",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a2r2g2b2 },
679     { "src_8888_2x10",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_x2r10g10b10 },
680     { "src_8888_2a10",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a2r10g10b10 },
681     { "src_0888_0565",         PIXMAN_r8g8b8,      0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
682     { "src_0888_8888",         PIXMAN_r8g8b8,      0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a8r8g8b8 },
683     { "src_0888_x888",         PIXMAN_r8g8b8,      0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_x8r8g8b8 },
684     { "src_0888_8888_rev",     PIXMAN_b8g8r8,      0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_x8r8g8b8 },
685     { "src_0888_0565_rev",     PIXMAN_b8g8r8,      0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
686     { "src_x888_x888",         PIXMAN_x8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_x8r8g8b8 },
687     { "src_x888_8888",         PIXMAN_x8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a8r8g8b8 },
688     { "src_8888_8888",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a8r8g8b8 },
689     { "src_0565_0565",         PIXMAN_r5g6b5,      0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
690     { "src_1555_0565",         PIXMAN_a1r5g5b5,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
691     { "src_0565_1555",         PIXMAN_r5g6b5,      0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a1r5g5b5 },
692     { "src_8_8",               PIXMAN_a8,          0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a8 },
693     { "src_n_8",               PIXMAN_a8,          1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a8 },
694     { "src_n_8_0565",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_r5g6b5 },
695     { "src_n_8_1555",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_a1r5g5b5 },
696     { "src_n_8_4444",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_a4r4g4b4 },
697     { "src_n_8_2222",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_a2r2g2b2 },
698     { "src_n_8_x888",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_x8r8g8b8 },
699     { "src_n_8_8888",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_a8r8g8b8 },
700     { "src_n_8_2x10",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_x2r10g10b10 },
701     { "src_n_8_2a10",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_a2r10g10b10 },
702     { "src_8888_8_0565",       PIXMAN_a8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_r5g6b5 },
703     { "src_0888_8_0565",       PIXMAN_r8g8b8,      0, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_r5g6b5 },
704     { "src_0888_8_8888",       PIXMAN_r8g8b8,      0, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_a8r8g8b8 },
705     { "src_0888_8_x888",       PIXMAN_r8g8b8,      0, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_x8r8g8b8 },
706     { "src_x888_8_x888",       PIXMAN_x8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_x8r8g8b8 },
707     { "src_x888_8_8888",       PIXMAN_x8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_a8r8g8b8 },
708     { "src_0565_8_0565",       PIXMAN_r5g6b5,      0, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_r5g6b5 },
709     { "src_1555_8_0565",       PIXMAN_a1r5g5b5,    0, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_r5g6b5 },
710     { "src_0565_8_1555",       PIXMAN_r5g6b5,      0, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_a1r5g5b5 },
711     { "over_n_x888",           PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_null,     0, PIXMAN_x8r8g8b8 },
712     { "over_n_8888",           PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_null,     0, PIXMAN_a8r8g8b8 },
713     { "over_n_0565",           PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_null,     0, PIXMAN_r5g6b5 },
714     { "over_n_1555",           PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_null,     0, PIXMAN_a1r5g5b5 },
715     { "over_8888_0565",        PIXMAN_a8r8g8b8,    0, PIXMAN_OP_OVER,    PIXMAN_null,     0, PIXMAN_r5g6b5 },
716     { "over_8888_8888",        PIXMAN_a8r8g8b8,    0, PIXMAN_OP_OVER,    PIXMAN_null,     0, PIXMAN_a8r8g8b8 },
717     { "over_8888_x888",        PIXMAN_a8r8g8b8,    0, PIXMAN_OP_OVER,    PIXMAN_null,     0, PIXMAN_x8r8g8b8 },
718     { "over_x888_8_0565",      PIXMAN_x8r8g8b8,    0, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_r5g6b5 },
719     { "over_x888_8_8888",      PIXMAN_x8r8g8b8,    0, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_a8r8g8b8 },
720     { "over_n_8_0565",         PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_r5g6b5 },
721     { "over_n_8_1555",         PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_a1r5g5b5 },
722     { "over_n_8_4444",         PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_a4r4g4b4 },
723     { "over_n_8_2222",         PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_a2r2g2b2 },
724     { "over_n_8_x888",         PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_x8r8g8b8 },
725     { "over_n_8_8888",         PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_a8r8g8b8 },
726     { "over_n_8_2x10",         PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_x2r10g10b10 },
727     { "over_n_8_2a10",         PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8,       0, PIXMAN_a2r10g10b10 },
728     { "over_n_8888_8888_ca",   PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8r8g8b8, 2, PIXMAN_a8r8g8b8 },
729     { "over_n_8888_x888_ca",   PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8r8g8b8, 2, PIXMAN_x8r8g8b8 },
730     { "over_n_8888_0565_ca",   PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8r8g8b8, 2, PIXMAN_r5g6b5 },
731     { "over_n_8888_1555_ca",   PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8r8g8b8, 2, PIXMAN_a1r5g5b5 },
732     { "over_n_8888_4444_ca",   PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8r8g8b8, 2, PIXMAN_a4r4g4b4 },
733     { "over_n_8888_2222_ca",   PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8r8g8b8, 2, PIXMAN_a2r2g2b2 },
734     { "over_n_8888_2x10_ca",   PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8r8g8b8, 2, PIXMAN_x2r10g10b10 },
735     { "over_n_8888_2a10_ca",   PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER,    PIXMAN_a8r8g8b8, 2, PIXMAN_a2r10g10b10 },
736     { "over_8888_n_8888",      PIXMAN_a8r8g8b8,    0, PIXMAN_OP_OVER,    PIXMAN_a8,       1, PIXMAN_a8r8g8b8 },
737     { "over_8888_n_x888",      PIXMAN_a8r8g8b8,    0, PIXMAN_OP_OVER,    PIXMAN_a8,       1, PIXMAN_x8r8g8b8 },
738     { "over_8888_n_0565",      PIXMAN_a8r8g8b8,    0, PIXMAN_OP_OVER,    PIXMAN_a8,       1, PIXMAN_r5g6b5 },
739     { "over_8888_n_1555",      PIXMAN_a8r8g8b8,    0, PIXMAN_OP_OVER,    PIXMAN_a8,       1, PIXMAN_a1r5g5b5 },
740     { "over_x888_n_8888",      PIXMAN_x8r8g8b8,    0, PIXMAN_OP_OVER,    PIXMAN_a8,       1, PIXMAN_a8r8g8b8 },
741     { "outrev_n_8_0565",       PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OUT_REV, PIXMAN_a8,       0, PIXMAN_r5g6b5 },
742     { "outrev_n_8_1555",       PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OUT_REV, PIXMAN_a8,       0, PIXMAN_a1r5g5b5 },
743     { "outrev_n_8_x888",       PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OUT_REV, PIXMAN_a8,       0, PIXMAN_x8r8g8b8 },
744     { "outrev_n_8_8888",       PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OUT_REV, PIXMAN_a8,       0, PIXMAN_a8r8g8b8 },
745     { "outrev_n_8888_0565_ca", PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_r5g6b5 },
746     { "outrev_n_8888_1555_ca", PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_a1r5g5b5 },
747     { "outrev_n_8888_x888_ca", PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_x8r8g8b8 },
748     { "outrev_n_8888_8888_ca", PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_a8r8g8b8 },
749     { "over_reverse_n_8888",   PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OVER_REVERSE, PIXMAN_null, 0, PIXMAN_a8r8g8b8 },
750     { "in_reverse_8888_8888",  PIXMAN_a8r8g8b8,    0, PIXMAN_OP_IN_REVERSE, PIXMAN_null,  0, PIXMAN_a8r8g8b8 },
751     { "pixbuf",                PIXMAN_x8b8g8r8,    0, PIXMAN_OP_SRC,     PIXMAN_a8b8g8r8, 0, PIXMAN_a8r8g8b8 },
752     { "rpixbuf",               PIXMAN_x8b8g8r8,    0, PIXMAN_OP_SRC,     PIXMAN_a8b8g8r8, 0, PIXMAN_a8b8g8r8 },
753 };
754 
755 static const test_entry_t special_patterns[] =
756 {
757     { "add_n_2x10",            PIXMAN_a2r10g10b10, 1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_x2r10g10b10 },
758     { "add_n_2a10",            PIXMAN_a2r10g10b10, 1, PIXMAN_OP_ADD,     PIXMAN_null,     0, PIXMAN_a2r10g10b10 },
759     { "src_n_2x10",            PIXMAN_a2r10g10b10, 1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_x2r10g10b10 },
760     { "src_n_2a10",            PIXMAN_a2r10g10b10, 1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a2r10g10b10 },
761     { "src_0888_8888_rev",     PIXMAN_b8g8r8,      0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_x8r8g8b8 },
762     { "src_0888_0565_rev",     PIXMAN_b8g8r8,      0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
763     { "src_n_8",               PIXMAN_a8,          1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a8 },
764     { "pixbuf",                PIXMAN_x8b8g8r8,    0, PIXMAN_OP_SRC,     PIXMAN_a8b8g8r8, 0, PIXMAN_a8r8g8b8 },
765     { "rpixbuf",               PIXMAN_x8b8g8r8,    0, PIXMAN_OP_SRC,     PIXMAN_a8b8g8r8, 0, PIXMAN_a8b8g8r8 },
766 };
767 
768 /* Returns the sub-string's end pointer in string. */
769 static const char *
copy_sub_string(char * buf,const char * string,const char * scan_from,const char * end)770 copy_sub_string (char       *buf,
771                  const char *string,
772                  const char *scan_from,
773                  const char *end)
774 {
775     const char *delim;
776     size_t n;
777 
778     delim = strchr (scan_from, '_');
779     if (!delim)
780         delim = end;
781 
782     n = delim - string;
783     strncpy(buf, string, n);
784     buf[n] = '\0';
785 
786     return delim;
787 }
788 
789 static pixman_op_t
parse_longest_operator(char * buf,const char ** strp,const char * end)790 parse_longest_operator (char *buf, const char **strp, const char *end)
791 {
792     const char *p = *strp;
793     const char *sub_end;
794     const char *best_end = p;
795     pixman_op_t best_op = PIXMAN_OP_NONE;
796     pixman_op_t op;
797 
798     while (p < end)
799     {
800         sub_end = copy_sub_string (buf, *strp, p, end);
801         op = operator_from_string (buf);
802         p = sub_end + 1;
803 
804         if (op != PIXMAN_OP_NONE)
805         {
806             best_end = p;
807             best_op = op;
808         }
809     }
810 
811     *strp = best_end;
812     return best_op;
813 }
814 
815 static pixman_format_code_t
parse_format(char * buf,const char ** p,const char * end)816 parse_format (char *buf, const char **p, const char *end)
817 {
818     pixman_format_code_t format;
819     const char *delim;
820 
821     if (*p >= end)
822         return PIXMAN_null;
823 
824     delim = copy_sub_string (buf, *p, *p, end);
825     format = format_from_string (buf);
826 
827     if (format != PIXMAN_null)
828         *p = delim + 1;
829 
830     return format;
831 }
832 
833 static int
parse_test_pattern(test_entry_t * test,const char * pattern)834 parse_test_pattern (test_entry_t *test, const char *pattern)
835 {
836     const char *p = pattern;
837     const char *end = pattern + strlen (pattern);
838     char buf[1024];
839     pixman_format_code_t format[3];
840     int i;
841 
842     if (strlen (pattern) > sizeof (buf) - 1)
843         return -1;
844 
845     /* Special cases that the parser cannot produce. */
846     for (i = 0; i < ARRAY_LENGTH (special_patterns); i++)
847     {
848         if (strcmp (pattern, special_patterns[i].testname) == 0)
849         {
850             *test = special_patterns[i];
851             return 0;
852         }
853     }
854 
855     test->testname = pattern;
856 
857     /* Extract operator, may contain delimiters,
858      * so take the longest string that matches.
859      */
860     test->op = parse_longest_operator (buf, &p, end);
861     if (test->op == PIXMAN_OP_NONE)
862         return -1;
863 
864     /* extract up to three pixel formats */
865     format[0] = parse_format (buf, &p, end);
866     format[1] = parse_format (buf, &p, end);
867     format[2] = parse_format (buf, &p, end);
868 
869     if (format[0] == PIXMAN_null || format[1] == PIXMAN_null)
870         return -1;
871 
872     /* recognize CA flag */
873     test->mask_flags = 0;
874     if (p < end)
875     {
876         if (strcmp (p, "ca") == 0)
877             test->mask_flags |= CA_FLAG;
878         else
879             return -1; /* trailing garbage */
880     }
881 
882     test->src_fmt = format[0];
883     if (format[2] == PIXMAN_null)
884     {
885         test->mask_fmt = PIXMAN_null;
886         test->dst_fmt = format[1];
887     }
888     else
889     {
890         test->mask_fmt = format[1];
891         test->dst_fmt = format[2];
892     }
893 
894     test->src_flags = 0;
895     if (test->src_fmt == PIXMAN_solid)
896     {
897         test->src_fmt = PIXMAN_a8r8g8b8;
898         test->src_flags |= SOLID_FLAG;
899     }
900 
901     if (test->mask_fmt == PIXMAN_solid)
902     {
903         if (test->mask_flags & CA_FLAG)
904             test->mask_fmt = PIXMAN_a8r8g8b8;
905         else
906             test->mask_fmt = PIXMAN_a8;
907 
908         test->mask_flags |= SOLID_FLAG;
909     }
910 
911     return 0;
912 }
913 
914 static int
check_int(int got,int expected,const char * name,const char * field)915 check_int (int got, int expected, const char *name, const char *field)
916 {
917     if (got == expected)
918         return 0;
919 
920     printf ("%s: %s failure: expected %d, got %d.\n",
921             name, field, expected, got);
922 
923     return 1;
924 }
925 
926 static int
check_format(int got,int expected,const char * name,const char * field)927 check_format (int got, int expected, const char *name, const char *field)
928 {
929     if (got == expected)
930         return 0;
931 
932     printf ("%s: %s failure: expected %s (%#x), got %s (%#x).\n",
933             name, field,
934             format_name (expected), expected,
935             format_name (got), got);
936 
937     return 1;
938 }
939 
940 static void
parser_self_test(void)941 parser_self_test (void)
942 {
943     const test_entry_t *ent;
944     test_entry_t test;
945     int fails = 0;
946     int i;
947 
948     for (i = 0; i < ARRAY_LENGTH (tests_tbl); i++)
949     {
950         ent = &tests_tbl[i];
951 
952         if (parse_test_pattern (&test, ent->testname) < 0)
953         {
954             printf ("parsing failed for '%s'\n", ent->testname);
955             fails++;
956             continue;
957         }
958 
959         fails += check_format (test.src_fmt, ent->src_fmt,
960                                ent->testname, "src_fmt");
961         fails += check_format (test.mask_fmt, ent->mask_fmt,
962                                ent->testname, "mask_fmt");
963         fails += check_format (test.dst_fmt, ent->dst_fmt,
964                                ent->testname, "dst_fmt");
965         fails += check_int    (test.src_flags, ent->src_flags,
966                                ent->testname, "src_flags");
967         fails += check_int    (test.mask_flags, ent->mask_flags,
968                                ent->testname, "mask_flags");
969         fails += check_int    (test.op, ent->op, ent->testname, "op");
970     }
971 
972     if (fails)
973     {
974         printf ("Parser self-test failed.\n");
975         exit (EXIT_FAILURE);
976     }
977 
978     if (!use_csv_output)
979         printf ("Parser self-test complete.\n");
980 }
981 
982 static void
print_test_details(const test_entry_t * test)983 print_test_details (const test_entry_t *test)
984 {
985     printf ("%s: %s, src %s%s, mask %s%s%s, dst %s\n",
986             test->testname,
987             operator_name (test->op),
988             format_name (test->src_fmt),
989             test->src_flags & SOLID_FLAG ? " solid" : "",
990             format_name (test->mask_fmt),
991             test->mask_flags & SOLID_FLAG ? " solid" : "",
992             test->mask_flags & CA_FLAG ? " CA" : "",
993             format_name (test->dst_fmt));
994 }
995 
996 static void
run_one_test(const char * pattern,double bandwidth_,pixman_bool_t prdetails)997 run_one_test (const char *pattern, double bandwidth_, pixman_bool_t prdetails)
998 {
999     test_entry_t test;
1000 
1001     if (parse_test_pattern (&test, pattern) < 0)
1002     {
1003         printf ("Error: Could not parse the test pattern '%s'.\n", pattern);
1004         return;
1005     }
1006 
1007     if (prdetails)
1008     {
1009         print_test_details (&test);
1010         printf ("---\n");
1011     }
1012 
1013     bench_composite (pattern,
1014                      test.src_fmt,
1015                      test.src_flags,
1016                      test.op,
1017                      test.mask_fmt,
1018                      test.mask_flags,
1019                      test.dst_fmt,
1020                      bandwidth_ / 8);
1021 }
1022 
1023 static void
run_default_tests(double bandwidth_)1024 run_default_tests (double bandwidth_)
1025 {
1026     int i;
1027 
1028     for (i = 0; i < ARRAY_LENGTH (tests_tbl); i++)
1029         run_one_test (tests_tbl[i].testname, bandwidth_, FALSE);
1030 }
1031 
1032 static void
print_explanation(void)1033 print_explanation (void)
1034 {
1035     printf ("Benchmark for a set of most commonly used functions\n");
1036     printf ("---\n");
1037     printf ("All results are presented in millions of pixels per second\n");
1038     printf ("L1  - small Xx1 rectangle (fitting L1 cache), always blitted at the same\n");
1039     printf ("      memory location with small drift in horizontal direction\n");
1040     printf ("L2  - small XxY rectangle (fitting L2 cache), always blitted at the same\n");
1041     printf ("      memory location with small drift in horizontal direction\n");
1042     printf ("M   - large %dx%d rectangle, always blitted at the same\n",
1043             WIDTH - 64, HEIGHT);
1044     printf ("      memory location with small drift in horizontal direction\n");
1045     printf ("HT  - random rectangles with %dx%d average size are copied from\n",
1046             TILEWIDTH, TILEWIDTH);
1047     printf ("      one %dx%d buffer to another, traversing from left to right\n",
1048             WIDTH, HEIGHT);
1049     printf ("      and from top to bottom\n");
1050     printf ("VT  - random rectangles with %dx%d average size are copied from\n",
1051             TILEWIDTH, TILEWIDTH);
1052     printf ("      one %dx%d buffer to another, traversing from top to bottom\n",
1053             WIDTH, HEIGHT);
1054     printf ("      and from left to right\n");
1055     printf ("R   - random rectangles with %dx%d average size are copied from\n",
1056             TILEWIDTH, TILEWIDTH);
1057     printf ("      random locations of one %dx%d buffer to another\n",
1058             WIDTH, HEIGHT);
1059     printf ("RT  - as R, but %dx%d average sized rectangles are copied\n",
1060             TINYWIDTH, TINYWIDTH);
1061     printf ("---\n");
1062 }
1063 
1064 static void
print_speed_scaling(double bw)1065 print_speed_scaling (double bw)
1066 {
1067     printf ("reference memcpy speed = %.1fMB/s (%.1fMP/s for 32bpp fills)\n",
1068             bw / 1000000., bw / 4000000);
1069 
1070     if (use_scaling)
1071     {
1072 	printf ("---\n");
1073 	if (filter == PIXMAN_FILTER_BILINEAR)
1074 	    printf ("BILINEAR scaling\n");
1075 	else if (filter == PIXMAN_FILTER_NEAREST)
1076 	    printf ("NEAREST scaling\n");
1077 	else
1078 	    printf ("UNKNOWN scaling\n");
1079     }
1080 
1081     printf ("---\n");
1082 }
1083 
1084 static void
usage(const char * progname)1085 usage (const char *progname)
1086 {
1087     printf ("Usage: %s [-b] [-n] [-c] [-m M] pattern\n", progname);
1088     printf ("  -n : benchmark nearest scaling\n");
1089     printf ("  -b : benchmark bilinear scaling\n");
1090     printf ("  -c : print output as CSV data\n");
1091     printf ("  -m M : set reference memcpy speed to M MB/s instead of measuring it\n");
1092 }
1093 
1094 int
main(int argc,char * argv[])1095 main (int argc, char *argv[])
1096 {
1097     int i;
1098     const char *pattern = NULL;
1099 
1100     for (i = 1; i < argc; i++)
1101     {
1102 	if (argv[i][0] == '-')
1103 	{
1104 	    if (strchr (argv[i] + 1, 'b'))
1105 	    {
1106 		use_scaling = TRUE;
1107 		filter = PIXMAN_FILTER_BILINEAR;
1108 	    }
1109 	    else if (strchr (argv[i] + 1, 'n'))
1110 	    {
1111 		use_scaling = TRUE;
1112 		filter = PIXMAN_FILTER_NEAREST;
1113 	    }
1114 
1115 	    if (strchr (argv[i] + 1, 'c'))
1116 		use_csv_output = TRUE;
1117 
1118 	    if (strcmp (argv[i], "-m") == 0 && i + 1 < argc)
1119 		bandwidth = atof (argv[++i]) * 1e6;
1120 	}
1121 	else
1122 	{
1123 	    if (pattern)
1124 	    {
1125 		pattern = NULL;
1126 		printf ("Error: extra arguments given.\n");
1127 		break;
1128 	    }
1129 	    pattern = argv[i];
1130 	}
1131     }
1132 
1133     if (!pattern)
1134     {
1135 	usage (argv[0]);
1136 	return 1;
1137     }
1138 
1139     parser_self_test ();
1140 
1141     src = aligned_malloc (4096, BUFSIZE * 3);
1142     memset (src, 0xCC, BUFSIZE * 3);
1143     dst = src + (BUFSIZE / 4);
1144     mask = dst + (BUFSIZE / 4);
1145 
1146     if (!use_csv_output)
1147         print_explanation ();
1148 
1149     if (bandwidth < 1.0)
1150         bandwidth = bench_memcpy ();
1151     if (!use_csv_output)
1152         print_speed_scaling (bandwidth);
1153 
1154     if (strcmp (pattern, "all") == 0)
1155         run_default_tests (bandwidth);
1156     else
1157         run_one_test (pattern, bandwidth, !use_csv_output);
1158 
1159     free (src);
1160     return 0;
1161 }
1162