1 /*
2 * Copyright © 2009 Nokia Corporation
3 * Copyright © 2010 Movial Creative Technologies Oy
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include "utils.h"
29
30 #define SOLID_FLAG 1
31 #define CA_FLAG 2
32
33 #define L1CACHE_SIZE (8 * 1024)
34 #define L2CACHE_SIZE (128 * 1024)
35
36 /* This is applied to both L1 and L2 tests - alternatively, you could
37 * parameterise bench_L or split it into two functions. It could be
38 * read at runtime on some architectures, but it only really matters
39 * that it's a number that's an integer divisor of both cacheline
40 * lengths, and further, it only really matters for caches that don't
41 * do allocate0on-write. */
42 #define CACHELINE_LENGTH (32) /* bytes */
43
44 #define WIDTH 1920
45 #define HEIGHT 1080
46 #define BUFSIZE (WIDTH * HEIGHT * 4)
47 #define XWIDTH 256
48 #define XHEIGHT 256
49 #define TILEWIDTH 32
50 #define TINYWIDTH 8
51
52 #define EXCLUDE_OVERHEAD 1
53
54 uint32_t *dst;
55 uint32_t *src;
56 uint32_t *mask;
57
58 double bandwidth = 0;
59
60 double
bench_memcpy()61 bench_memcpy ()
62 {
63 int64_t n = 0, total;
64 double t1, t2;
65 int x = 0;
66
67 t1 = gettime ();
68 while (1)
69 {
70 memcpy (dst, src, BUFSIZE - 64);
71 memcpy (src, dst, BUFSIZE - 64);
72 n += 4 * (BUFSIZE - 64);
73 t2 = gettime ();
74 if (t2 - t1 > 0.5)
75 break;
76 }
77 n = total = n * 5;
78 t1 = gettime ();
79 while (n > 0)
80 {
81 if (++x >= 64)
82 x = 0;
83 memcpy ((char *)dst + 1, (char *)src + x, BUFSIZE - 64);
84 memcpy ((char *)src + 1, (char *)dst + x, BUFSIZE - 64);
85 n -= 4 * (BUFSIZE - 64);
86 }
87 t2 = gettime ();
88 return (double)total / (t2 - t1);
89 }
90
91 static pixman_bool_t use_scaling = FALSE;
92 static pixman_filter_t filter = PIXMAN_FILTER_NEAREST;
93
94 /* nearly 1x scale factor */
95 static pixman_transform_t m =
96 {
97 {
98 { pixman_fixed_1 + 1, 0, 0 },
99 { 0, pixman_fixed_1, 0 },
100 { 0, 0, pixman_fixed_1 }
101 }
102 };
103
104 static void
pixman_image_composite_wrapper(pixman_implementation_t * impl,pixman_composite_info_t * info)105 pixman_image_composite_wrapper (pixman_implementation_t *impl,
106 pixman_composite_info_t *info)
107 {
108 if (use_scaling)
109 {
110 pixman_image_set_filter (info->src_image, filter, NULL, 0);
111 pixman_image_set_transform(info->src_image, &m);
112 }
113 pixman_image_composite (info->op,
114 info->src_image, info->mask_image, info->dest_image,
115 info->src_x, info->src_y,
116 info->mask_x, info->mask_y,
117 info->dest_x, info->dest_y,
118 info->width, info->height);
119 }
120
121 static void
pixman_image_composite_empty(pixman_implementation_t * impl,pixman_composite_info_t * info)122 pixman_image_composite_empty (pixman_implementation_t *impl,
123 pixman_composite_info_t *info)
124 {
125 if (use_scaling)
126 {
127 pixman_image_set_filter (info->src_image, filter, NULL, 0);
128 pixman_image_set_transform(info->src_image, &m);
129 }
130 pixman_image_composite (info->op,
131 info->src_image, info->mask_image, info->dest_image,
132 0, 0, 0, 0, 0, 0, 1, 1);
133 }
134
135 static inline void
call_func(pixman_composite_func_t func,pixman_op_t op,pixman_image_t * src_image,pixman_image_t * mask_image,pixman_image_t * dest_image,int32_t src_x,int32_t src_y,int32_t mask_x,int32_t mask_y,int32_t dest_x,int32_t dest_y,int32_t width,int32_t height)136 call_func (pixman_composite_func_t func,
137 pixman_op_t op,
138 pixman_image_t * src_image,
139 pixman_image_t * mask_image,
140 pixman_image_t * dest_image,
141 int32_t src_x,
142 int32_t src_y,
143 int32_t mask_x,
144 int32_t mask_y,
145 int32_t dest_x,
146 int32_t dest_y,
147 int32_t width,
148 int32_t height)
149 {
150 pixman_composite_info_t info;
151
152 info.op = op;
153 info.src_image = src_image;
154 info.mask_image = mask_image;
155 info.dest_image = dest_image;
156 info.src_x = src_x;
157 info.src_y = src_y;
158 info.mask_x = mask_x;
159 info.mask_y = mask_y;
160 info.dest_x = dest_x;
161 info.dest_y = dest_y;
162 info.width = width;
163 info.height = height;
164
165 func (0, &info);
166 }
167
168 void
169 noinline
bench_L(pixman_op_t op,pixman_image_t * src_img,pixman_image_t * mask_img,pixman_image_t * dst_img,int64_t n,pixman_composite_func_t func,int width,int lines_count)170 bench_L (pixman_op_t op,
171 pixman_image_t * src_img,
172 pixman_image_t * mask_img,
173 pixman_image_t * dst_img,
174 int64_t n,
175 pixman_composite_func_t func,
176 int width,
177 int lines_count)
178 {
179 int64_t i, j, k;
180 int x = 0;
181 int q = 0;
182 volatile int qx;
183
184 for (i = 0; i < n; i++)
185 {
186 /* For caches without allocate-on-write, we need to force the
187 * destination buffer back into the cache on each iteration,
188 * otherwise if they are evicted during the test, they remain
189 * uncached. This doesn't matter for tests which read the
190 * destination buffer, or for caches that do allocate-on-write,
191 * but in those cases this loop just adds constant time, which
192 * should be successfully cancelled out.
193 */
194 for (j = 0; j < lines_count; j++)
195 {
196 for (k = 0; k < width + 62; k += CACHELINE_LENGTH / sizeof *dst)
197 {
198 q += dst[j * WIDTH + k];
199 }
200 q += dst[j * WIDTH + width + 62];
201 }
202 if (++x >= 64)
203 x = 0;
204 call_func (func, op, src_img, mask_img, dst_img, x, 0, x, 0, 63 - x, 0, width, lines_count);
205 }
206 qx = q;
207 }
208
209 void
210 noinline
bench_M(pixman_op_t op,pixman_image_t * src_img,pixman_image_t * mask_img,pixman_image_t * dst_img,int64_t n,pixman_composite_func_t func)211 bench_M (pixman_op_t op,
212 pixman_image_t * src_img,
213 pixman_image_t * mask_img,
214 pixman_image_t * dst_img,
215 int64_t n,
216 pixman_composite_func_t func)
217 {
218 int64_t i;
219 int x = 0;
220
221 for (i = 0; i < n; i++)
222 {
223 if (++x >= 64)
224 x = 0;
225 call_func (func, op, src_img, mask_img, dst_img, x, 0, x, 0, 1, 0, WIDTH - 64, HEIGHT);
226 }
227 }
228
229 double
230 noinline
bench_HT(pixman_op_t op,pixman_image_t * src_img,pixman_image_t * mask_img,pixman_image_t * dst_img,int64_t n,pixman_composite_func_t func)231 bench_HT (pixman_op_t op,
232 pixman_image_t * src_img,
233 pixman_image_t * mask_img,
234 pixman_image_t * dst_img,
235 int64_t n,
236 pixman_composite_func_t func)
237 {
238 double pix_cnt = 0;
239 int x = 0;
240 int y = 0;
241 int64_t i;
242
243 srand (0);
244 for (i = 0; i < n; i++)
245 {
246 int w = (rand () % (TILEWIDTH * 2)) + 1;
247 int h = (rand () % (TILEWIDTH * 2)) + 1;
248 if (x + w > WIDTH)
249 {
250 x = 0;
251 y += TILEWIDTH * 2;
252 }
253 if (y + h > HEIGHT)
254 {
255 y = 0;
256 }
257 call_func (func, op, src_img, mask_img, dst_img, x, y, x, y, x, y, w, h);
258 x += w;
259 pix_cnt += w * h;
260 }
261 return pix_cnt;
262 }
263
264 double
265 noinline
bench_VT(pixman_op_t op,pixman_image_t * src_img,pixman_image_t * mask_img,pixman_image_t * dst_img,int64_t n,pixman_composite_func_t func)266 bench_VT (pixman_op_t op,
267 pixman_image_t * src_img,
268 pixman_image_t * mask_img,
269 pixman_image_t * dst_img,
270 int64_t n,
271 pixman_composite_func_t func)
272 {
273 double pix_cnt = 0;
274 int x = 0;
275 int y = 0;
276 int64_t i;
277
278 srand (0);
279 for (i = 0; i < n; i++)
280 {
281 int w = (rand () % (TILEWIDTH * 2)) + 1;
282 int h = (rand () % (TILEWIDTH * 2)) + 1;
283 if (y + h > HEIGHT)
284 {
285 y = 0;
286 x += TILEWIDTH * 2;
287 }
288 if (x + w > WIDTH)
289 {
290 x = 0;
291 }
292 call_func (func, op, src_img, mask_img, dst_img, x, y, x, y, x, y, w, h);
293 y += h;
294 pix_cnt += w * h;
295 }
296 return pix_cnt;
297 }
298
299 double
300 noinline
bench_R(pixman_op_t op,pixman_image_t * src_img,pixman_image_t * mask_img,pixman_image_t * dst_img,int64_t n,pixman_composite_func_t func,int maxw,int maxh)301 bench_R (pixman_op_t op,
302 pixman_image_t * src_img,
303 pixman_image_t * mask_img,
304 pixman_image_t * dst_img,
305 int64_t n,
306 pixman_composite_func_t func,
307 int maxw,
308 int maxh)
309 {
310 double pix_cnt = 0;
311 int64_t i;
312
313 if (maxw <= TILEWIDTH * 2 || maxh <= TILEWIDTH * 2)
314 {
315 printf("error: maxw <= TILEWIDTH * 2 || maxh <= TILEWIDTH * 2\n");
316 return 0;
317 }
318
319 srand (0);
320 for (i = 0; i < n; i++)
321 {
322 int w = (rand () % (TILEWIDTH * 2)) + 1;
323 int h = (rand () % (TILEWIDTH * 2)) + 1;
324 int sx = rand () % (maxw - TILEWIDTH * 2);
325 int sy = rand () % (maxh - TILEWIDTH * 2);
326 int dx = rand () % (maxw - TILEWIDTH * 2);
327 int dy = rand () % (maxh - TILEWIDTH * 2);
328 call_func (func, op, src_img, mask_img, dst_img, sx, sy, sx, sy, dx, dy, w, h);
329 pix_cnt += w * h;
330 }
331 return pix_cnt;
332 }
333
334 double
335 noinline
bench_RT(pixman_op_t op,pixman_image_t * src_img,pixman_image_t * mask_img,pixman_image_t * dst_img,int64_t n,pixman_composite_func_t func,int maxw,int maxh)336 bench_RT (pixman_op_t op,
337 pixman_image_t * src_img,
338 pixman_image_t * mask_img,
339 pixman_image_t * dst_img,
340 int64_t n,
341 pixman_composite_func_t func,
342 int maxw,
343 int maxh)
344 {
345 double pix_cnt = 0;
346 int64_t i;
347
348 if (maxw <= TINYWIDTH * 2 || maxh <= TINYWIDTH * 2)
349 {
350 printf("error: maxw <= TINYWIDTH * 2 || maxh <= TINYWIDTH * 2\n");
351 return 0;
352 }
353
354 srand (0);
355 for (i = 0; i < n; i++)
356 {
357 int w = (rand () % (TINYWIDTH * 2)) + 1;
358 int h = (rand () % (TINYWIDTH * 2)) + 1;
359 int sx = rand () % (maxw - TINYWIDTH * 2);
360 int sy = rand () % (maxh - TINYWIDTH * 2);
361 int dx = rand () % (maxw - TINYWIDTH * 2);
362 int dy = rand () % (maxh - TINYWIDTH * 2);
363 call_func (func, op, src_img, mask_img, dst_img, sx, sy, sx, sy, dx, dy, w, h);
364 pix_cnt += w * h;
365 }
366 return pix_cnt;
367 }
368
369 void
bench_composite(char * testname,int src_fmt,int src_flags,int op,int mask_fmt,int mask_flags,int dst_fmt,double npix)370 bench_composite (char * testname,
371 int src_fmt,
372 int src_flags,
373 int op,
374 int mask_fmt,
375 int mask_flags,
376 int dst_fmt,
377 double npix)
378 {
379 pixman_image_t * src_img;
380 pixman_image_t * dst_img;
381 pixman_image_t * mask_img;
382 pixman_image_t * xsrc_img;
383 pixman_image_t * xdst_img;
384 pixman_image_t * xmask_img;
385 double t1, t2, t3, pix_cnt;
386 int64_t n, l1test_width, nlines;
387 double bytes_per_pix = 0;
388 pixman_bool_t bench_pixbuf = FALSE;
389
390 pixman_composite_func_t func = pixman_image_composite_wrapper;
391
392 if (!(src_flags & SOLID_FLAG))
393 {
394 bytes_per_pix += (src_fmt >> 24) / 8.0;
395 src_img = pixman_image_create_bits (src_fmt,
396 WIDTH, HEIGHT,
397 src,
398 WIDTH * 4);
399 xsrc_img = pixman_image_create_bits (src_fmt,
400 XWIDTH, XHEIGHT,
401 src,
402 XWIDTH * 4);
403 }
404 else
405 {
406 src_img = pixman_image_create_bits (src_fmt,
407 1, 1,
408 src,
409 4);
410 xsrc_img = pixman_image_create_bits (src_fmt,
411 1, 1,
412 src,
413 4);
414 pixman_image_set_repeat (src_img, PIXMAN_REPEAT_NORMAL);
415 pixman_image_set_repeat (xsrc_img, PIXMAN_REPEAT_NORMAL);
416 }
417
418 bytes_per_pix += (dst_fmt >> 24) / 8.0;
419 dst_img = pixman_image_create_bits (dst_fmt,
420 WIDTH, HEIGHT,
421 dst,
422 WIDTH * 4);
423
424 mask_img = NULL;
425 xmask_img = NULL;
426 if (strcmp (testname, "pixbuf") == 0 || strcmp (testname, "rpixbuf") == 0)
427 {
428 bench_pixbuf = TRUE;
429 }
430 if (!(mask_flags & SOLID_FLAG) && mask_fmt != PIXMAN_null)
431 {
432 bytes_per_pix += (mask_fmt >> 24) / ((op == PIXMAN_OP_SRC) ? 8.0 : 4.0);
433 mask_img = pixman_image_create_bits (mask_fmt,
434 WIDTH, HEIGHT,
435 bench_pixbuf ? src : mask,
436 WIDTH * 4);
437 xmask_img = pixman_image_create_bits (mask_fmt,
438 XWIDTH, XHEIGHT,
439 bench_pixbuf ? src : mask,
440 XWIDTH * 4);
441 }
442 else if (mask_fmt != PIXMAN_null)
443 {
444 mask_img = pixman_image_create_bits (mask_fmt,
445 1, 1,
446 mask,
447 4);
448 xmask_img = pixman_image_create_bits (mask_fmt,
449 1, 1,
450 mask,
451 4 * 4);
452 pixman_image_set_repeat (mask_img, PIXMAN_REPEAT_NORMAL);
453 pixman_image_set_repeat (xmask_img, PIXMAN_REPEAT_NORMAL);
454 }
455 if ((mask_flags & CA_FLAG) && mask_fmt != PIXMAN_null)
456 {
457 pixman_image_set_component_alpha (mask_img, 1);
458 }
459 xdst_img = pixman_image_create_bits (dst_fmt,
460 XWIDTH, XHEIGHT,
461 dst,
462 XWIDTH * 4);
463
464
465 printf ("%24s %c", testname, func != pixman_image_composite_wrapper ?
466 '-' : '=');
467
468 memcpy (dst, src, BUFSIZE);
469 memcpy (src, dst, BUFSIZE);
470
471 l1test_width = L1CACHE_SIZE / 8 - 64;
472 if (l1test_width < 1)
473 l1test_width = 1;
474 if (l1test_width > WIDTH - 64)
475 l1test_width = WIDTH - 64;
476 n = 1 + npix / (l1test_width * 8);
477 t1 = gettime ();
478 #if EXCLUDE_OVERHEAD
479 bench_L (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty, l1test_width, 1);
480 #endif
481 t2 = gettime ();
482 bench_L (op, src_img, mask_img, dst_img, n, func, l1test_width, 1);
483 t3 = gettime ();
484 printf (" L1:%7.2f", (double)n * l1test_width * 1 /
485 ((t3 - t2) - (t2 - t1)) / 1000000.);
486 fflush (stdout);
487
488 memcpy (dst, src, BUFSIZE);
489 memcpy (src, dst, BUFSIZE);
490
491 nlines = (L2CACHE_SIZE / l1test_width) /
492 ((PIXMAN_FORMAT_BPP(src_fmt) + PIXMAN_FORMAT_BPP(dst_fmt)) / 8);
493 if (nlines < 1)
494 nlines = 1;
495 n = 1 + npix / (l1test_width * nlines);
496 t1 = gettime ();
497 #if EXCLUDE_OVERHEAD
498 bench_L (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty, l1test_width, nlines);
499 #endif
500 t2 = gettime ();
501 bench_L (op, src_img, mask_img, dst_img, n, func, l1test_width, nlines);
502 t3 = gettime ();
503 printf (" L2:%7.2f", (double)n * l1test_width * nlines /
504 ((t3 - t2) - (t2 - t1)) / 1000000.);
505 fflush (stdout);
506
507 memcpy (dst, src, BUFSIZE);
508 memcpy (src, dst, BUFSIZE);
509
510 n = 1 + npix / (WIDTH * HEIGHT);
511 t1 = gettime ();
512 #if EXCLUDE_OVERHEAD
513 bench_M (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty);
514 #endif
515 t2 = gettime ();
516 bench_M (op, src_img, mask_img, dst_img, n, func);
517 t3 = gettime ();
518 printf (" M:%6.2f (%6.2f%%)",
519 ((double)n * (WIDTH - 64) * HEIGHT / ((t3 - t2) - (t2 - t1))) / 1000000.,
520 ((double)n * (WIDTH - 64) * HEIGHT / ((t3 - t2) - (t2 - t1)) * bytes_per_pix) * (100.0 / bandwidth) );
521 fflush (stdout);
522
523 memcpy (dst, src, BUFSIZE);
524 memcpy (src, dst, BUFSIZE);
525
526 n = 1 + npix / (8 * TILEWIDTH * TILEWIDTH);
527 t1 = gettime ();
528 #if EXCLUDE_OVERHEAD
529 pix_cnt = bench_HT (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty);
530 #endif
531 t2 = gettime ();
532 pix_cnt = bench_HT (op, src_img, mask_img, dst_img, n, func);
533 t3 = gettime ();
534 printf (" HT:%6.2f", (double)pix_cnt / ((t3 - t2) - (t2 - t1)) / 1000000.);
535 fflush (stdout);
536
537 memcpy (dst, src, BUFSIZE);
538 memcpy (src, dst, BUFSIZE);
539
540 n = 1 + npix / (8 * TILEWIDTH * TILEWIDTH);
541 t1 = gettime ();
542 #if EXCLUDE_OVERHEAD
543 pix_cnt = bench_VT (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty);
544 #endif
545 t2 = gettime ();
546 pix_cnt = bench_VT (op, src_img, mask_img, dst_img, n, func);
547 t3 = gettime ();
548 printf (" VT:%6.2f", (double)pix_cnt / ((t3 - t2) - (t2 - t1)) / 1000000.);
549 fflush (stdout);
550
551 memcpy (dst, src, BUFSIZE);
552 memcpy (src, dst, BUFSIZE);
553
554 n = 1 + npix / (8 * TILEWIDTH * TILEWIDTH);
555 t1 = gettime ();
556 #if EXCLUDE_OVERHEAD
557 pix_cnt = bench_R (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty, WIDTH, HEIGHT);
558 #endif
559 t2 = gettime ();
560 pix_cnt = bench_R (op, src_img, mask_img, dst_img, n, func, WIDTH, HEIGHT);
561 t3 = gettime ();
562 printf (" R:%6.2f", (double)pix_cnt / ((t3 - t2) - (t2 - t1)) / 1000000.);
563 fflush (stdout);
564
565 memcpy (dst, src, BUFSIZE);
566 memcpy (src, dst, BUFSIZE);
567
568 n = 1 + npix / (16 * TINYWIDTH * TINYWIDTH);
569 t1 = gettime ();
570 #if EXCLUDE_OVERHEAD
571 pix_cnt = bench_RT (op, src_img, mask_img, dst_img, n, pixman_image_composite_empty, WIDTH, HEIGHT);
572 #endif
573 t2 = gettime ();
574 pix_cnt = bench_RT (op, src_img, mask_img, dst_img, n, func, WIDTH, HEIGHT);
575 t3 = gettime ();
576 printf (" RT:%6.2f (%4.0fKops/s)\n", (double)pix_cnt / ((t3 - t2) - (t2 - t1)) / 1000000., (double) n / ((t3 - t2) * 1000));
577
578 if (mask_img) {
579 pixman_image_unref (mask_img);
580 pixman_image_unref (xmask_img);
581 }
582 pixman_image_unref (src_img);
583 pixman_image_unref (dst_img);
584 pixman_image_unref (xsrc_img);
585 pixman_image_unref (xdst_img);
586 }
587
588 #define PIXMAN_OP_OUT_REV (PIXMAN_OP_OUT_REVERSE)
589
590 struct
591 {
592 char *testname;
593 int src_fmt;
594 int src_flags;
595 int op;
596 int mask_fmt;
597 int mask_flags;
598 int dst_fmt;
599 }
600 tests_tbl[] =
601 {
602 { "add_8_8_8", PIXMAN_a8, 0, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_a8 },
603 { "add_n_8_8", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_a8 },
604 { "add_n_8_8888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_a8r8g8b8 },
605 { "add_n_8_x888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_x8r8g8b8 },
606 { "add_n_8_0565", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_r5g6b5 },
607 { "add_n_8_1555", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_a1r5g5b5 },
608 { "add_n_8_4444", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_a4r4g4b4 },
609 { "add_n_8_2222", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_a2r2g2b2 },
610 { "add_n_8_2x10", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_x2r10g10b10 },
611 { "add_n_8_2a10", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_a8, 0, PIXMAN_a2r10g10b10 },
612 { "add_n_8", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a8 },
613 { "add_n_8888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a8r8g8b8 },
614 { "add_n_x888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_x8r8g8b8 },
615 { "add_n_0565", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_r5g6b5 },
616 { "add_n_1555", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a1r5g5b5 },
617 { "add_n_4444", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a4r4g4b4 },
618 { "add_n_2222", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a2r2g2b2 },
619 { "add_n_2x10", PIXMAN_a2r10g10b10, 1, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_x2r10g10b10 },
620 { "add_n_2a10", PIXMAN_a2r10g10b10, 1, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a2r10g10b10 },
621 { "add_8_8", PIXMAN_a8, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a8 },
622 { "add_x888_x888", PIXMAN_x8r8g8b8, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_x8r8g8b8 },
623 { "add_8888_8888", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a8r8g8b8 },
624 { "add_8888_0565", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_r5g6b5 },
625 { "add_8888_1555", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a1r5g5b5 },
626 { "add_8888_4444", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a4r4g4b4 },
627 { "add_8888_2222", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a2r2g2b2 },
628 { "add_0565_0565", PIXMAN_r5g6b5, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_r5g6b5 },
629 { "add_1555_1555", PIXMAN_a1r5g5b5, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a1r5g5b5 },
630 { "add_0565_2x10", PIXMAN_r5g6b5, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_x2r10g10b10 },
631 { "add_2a10_2a10", PIXMAN_a2r10g10b10, 0, PIXMAN_OP_ADD, PIXMAN_null, 0, PIXMAN_a2r10g10b10 },
632 { "in_n_8_8", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_IN, PIXMAN_a8, 0, PIXMAN_a8 },
633 { "in_8_8", PIXMAN_a8, 0, PIXMAN_OP_IN, PIXMAN_null, 0, PIXMAN_a8 },
634 { "src_n_2222", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a2r2g2b2 },
635 { "src_n_0565", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_r5g6b5 },
636 { "src_n_1555", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a1r5g5b5 },
637 { "src_n_4444", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a4r4g4b4 },
638 { "src_n_x888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_x8r8g8b8 },
639 { "src_n_8888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8r8g8b8 },
640 { "src_n_2x10", PIXMAN_a2r10g10b10, 1, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_x2r10g10b10 },
641 { "src_n_2a10", PIXMAN_a2r10g10b10, 1, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a2r10g10b10 },
642 { "src_8888_0565", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_r5g6b5 },
643 { "src_0565_8888", PIXMAN_r5g6b5, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8r8g8b8 },
644 { "src_8888_4444", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a4r4g4b4 },
645 { "src_8888_2222", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a2r2g2b2 },
646 { "src_8888_2x10", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_x2r10g10b10 },
647 { "src_8888_2a10", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a2r10g10b10 },
648 { "src_0888_0565", PIXMAN_r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_r5g6b5 },
649 { "src_0888_8888", PIXMAN_r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8r8g8b8 },
650 { "src_0888_x888", PIXMAN_r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_x8r8g8b8 },
651 { "src_0888_8888_rev", PIXMAN_b8g8r8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_x8r8g8b8 },
652 { "src_0888_0565_rev", PIXMAN_b8g8r8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_r5g6b5 },
653 { "src_x888_x888", PIXMAN_x8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_x8r8g8b8 },
654 { "src_x888_8888", PIXMAN_x8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8r8g8b8 },
655 { "src_8888_8888", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8r8g8b8 },
656 { "src_0565_0565", PIXMAN_r5g6b5, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_r5g6b5 },
657 { "src_1555_0565", PIXMAN_a1r5g5b5, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_r5g6b5 },
658 { "src_0565_1555", PIXMAN_r5g6b5, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a1r5g5b5 },
659 { "src_8_8", PIXMAN_a8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8 },
660 { "src_n_8", PIXMAN_a8, 1, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8 },
661 { "src_n_8_0565", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_r5g6b5 },
662 { "src_n_8_1555", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_a1r5g5b5 },
663 { "src_n_8_4444", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_a4r4g4b4 },
664 { "src_n_8_2222", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_a2r2g2b2 },
665 { "src_n_8_x888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_x8r8g8b8 },
666 { "src_n_8_8888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_a8r8g8b8 },
667 { "src_n_8_2x10", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_x2r10g10b10 },
668 { "src_n_8_2a10", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_a2r10g10b10 },
669 { "src_8888_8_0565", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_r5g6b5 },
670 { "src_0888_8_0565", PIXMAN_r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_r5g6b5 },
671 { "src_0888_8_8888", PIXMAN_r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_a8r8g8b8 },
672 { "src_0888_8_x888", PIXMAN_r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_x8r8g8b8 },
673 { "src_x888_8_x888", PIXMAN_x8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_x8r8g8b8 },
674 { "src_x888_8_8888", PIXMAN_x8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_a8r8g8b8 },
675 { "src_0565_8_0565", PIXMAN_r5g6b5, 0, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_r5g6b5 },
676 { "src_1555_8_0565", PIXMAN_a1r5g5b5, 0, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_r5g6b5 },
677 { "src_0565_8_1555", PIXMAN_r5g6b5, 0, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_a1r5g5b5 },
678 { "over_n_x888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_null, 0, PIXMAN_x8r8g8b8 },
679 { "over_n_8888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_null, 0, PIXMAN_a8r8g8b8 },
680 { "over_n_0565", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_null, 0, PIXMAN_r5g6b5 },
681 { "over_n_1555", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_null, 0, PIXMAN_a1r5g5b5 },
682 { "over_8888_0565", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_OVER, PIXMAN_null, 0, PIXMAN_r5g6b5 },
683 { "over_8888_8888", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_OVER, PIXMAN_null, 0, PIXMAN_a8r8g8b8 },
684 { "over_8888_x888", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_OVER, PIXMAN_null, 0, PIXMAN_x8r8g8b8 },
685 { "over_x888_8_0565", PIXMAN_x8r8g8b8, 0, PIXMAN_OP_OVER, PIXMAN_a8, 0, PIXMAN_r5g6b5 },
686 { "over_x888_8_8888", PIXMAN_x8r8g8b8, 0, PIXMAN_OP_OVER, PIXMAN_a8, 0, PIXMAN_a8r8g8b8 },
687 { "over_n_8_0565", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8, 0, PIXMAN_r5g6b5 },
688 { "over_n_8_1555", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8, 0, PIXMAN_a1r5g5b5 },
689 { "over_n_8_4444", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8, 0, PIXMAN_a4r4g4b4 },
690 { "over_n_8_2222", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8, 0, PIXMAN_a2r2g2b2 },
691 { "over_n_8_x888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8, 0, PIXMAN_x8r8g8b8 },
692 { "over_n_8_8888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8, 0, PIXMAN_a8r8g8b8 },
693 { "over_n_8_2x10", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8, 0, PIXMAN_x2r10g10b10 },
694 { "over_n_8_2a10", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8, 0, PIXMAN_a2r10g10b10 },
695 { "over_n_8888_8888_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, 2, PIXMAN_a8r8g8b8 },
696 { "over_n_8888_x888_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, 2, PIXMAN_x8r8g8b8 },
697 { "over_n_8888_0565_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, 2, PIXMAN_r5g6b5 },
698 { "over_n_8888_1555_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, 2, PIXMAN_a1r5g5b5 },
699 { "over_n_8888_4444_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, 2, PIXMAN_a4r4g4b4 },
700 { "over_n_8888_2222_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, 2, PIXMAN_a2r2g2b2 },
701 { "over_n_8888_2x10_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, 2, PIXMAN_x2r10g10b10 },
702 { "over_n_8888_2a10_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, 2, PIXMAN_a2r10g10b10 },
703 { "over_8888_n_8888", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_OVER, PIXMAN_a8, 1, PIXMAN_a8r8g8b8 },
704 { "over_8888_n_x888", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_OVER, PIXMAN_a8, 1, PIXMAN_x8r8g8b8 },
705 { "over_8888_n_0565", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_OVER, PIXMAN_a8, 1, PIXMAN_r5g6b5 },
706 { "over_8888_n_1555", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_OVER, PIXMAN_a8, 1, PIXMAN_a1r5g5b5 },
707 { "over_x888_n_8888", PIXMAN_x8r8g8b8, 0, PIXMAN_OP_OVER, PIXMAN_a8, 1, PIXMAN_a8r8g8b8 },
708 { "outrev_n_8_0565", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8, 0, PIXMAN_r5g6b5 },
709 { "outrev_n_8_1555", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8, 0, PIXMAN_a1r5g5b5 },
710 { "outrev_n_8_x888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8, 0, PIXMAN_x8r8g8b8 },
711 { "outrev_n_8_8888", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8, 0, PIXMAN_a8r8g8b8 },
712 { "outrev_n_8888_0565_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_r5g6b5 },
713 { "outrev_n_8888_1555_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_a1r5g5b5 },
714 { "outrev_n_8888_x888_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_x8r8g8b8 },
715 { "outrev_n_8888_8888_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_a8r8g8b8 },
716 { "over_reverse_n_8888", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_OVER_REVERSE, PIXMAN_null, 0, PIXMAN_a8r8g8b8 },
717 { "pixbuf", PIXMAN_x8b8g8r8, 0, PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, 0, PIXMAN_a8r8g8b8 },
718 { "rpixbuf", PIXMAN_x8b8g8r8, 0, PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, 0, PIXMAN_a8b8g8r8 },
719 };
720
721 int
main(int argc,char * argv[])722 main (int argc, char *argv[])
723 {
724 double x;
725 int i;
726 const char *pattern = NULL;
727 for (i = 1; i < argc; i++)
728 {
729 if (argv[i][0] == '-')
730 {
731 if (strchr (argv[i] + 1, 'b'))
732 {
733 use_scaling = TRUE;
734 filter = PIXMAN_FILTER_BILINEAR;
735 }
736 else if (strchr (argv[i] + 1, 'n'))
737 {
738 use_scaling = TRUE;
739 filter = PIXMAN_FILTER_NEAREST;
740 }
741 }
742 else
743 {
744 pattern = argv[i];
745 }
746 }
747
748 if (!pattern)
749 {
750 printf ("Usage: lowlevel-blt-bench [-b] [-n] pattern\n");
751 printf (" -n : benchmark nearest scaling\n");
752 printf (" -b : benchmark bilinear scaling\n");
753 return 1;
754 }
755
756 src = aligned_malloc (4096, BUFSIZE * 3);
757 memset (src, 0xCC, BUFSIZE * 3);
758 dst = src + (BUFSIZE / 4);
759 mask = dst + (BUFSIZE / 4);
760
761 printf ("Benchmark for a set of most commonly used functions\n");
762 printf ("---\n");
763 printf ("All results are presented in millions of pixels per second\n");
764 printf ("L1 - small Xx1 rectangle (fitting L1 cache), always blitted at the same\n");
765 printf (" memory location with small drift in horizontal direction\n");
766 printf ("L2 - small XxY rectangle (fitting L2 cache), always blitted at the same\n");
767 printf (" memory location with small drift in horizontal direction\n");
768 printf ("M - large %dx%d rectangle, always blitted at the same\n",
769 WIDTH - 64, HEIGHT);
770 printf (" memory location with small drift in horizontal direction\n");
771 printf ("HT - random rectangles with %dx%d average size are copied from\n",
772 TILEWIDTH, TILEWIDTH);
773 printf (" one %dx%d buffer to another, traversing from left to right\n",
774 WIDTH, HEIGHT);
775 printf (" and from top to bottom\n");
776 printf ("VT - random rectangles with %dx%d average size are copied from\n",
777 TILEWIDTH, TILEWIDTH);
778 printf (" one %dx%d buffer to another, traversing from top to bottom\n",
779 WIDTH, HEIGHT);
780 printf (" and from left to right\n");
781 printf ("R - random rectangles with %dx%d average size are copied from\n",
782 TILEWIDTH, TILEWIDTH);
783 printf (" random locations of one %dx%d buffer to another\n",
784 WIDTH, HEIGHT);
785 printf ("RT - as R, but %dx%d average sized rectangles are copied\n",
786 TINYWIDTH, TINYWIDTH);
787 printf ("---\n");
788 bandwidth = x = bench_memcpy ();
789 printf ("reference memcpy speed = %.1fMB/s (%.1fMP/s for 32bpp fills)\n",
790 x / 1000000., x / 4000000);
791 if (use_scaling)
792 {
793 printf ("---\n");
794 if (filter == PIXMAN_FILTER_BILINEAR)
795 printf ("BILINEAR scaling\n");
796 else if (filter == PIXMAN_FILTER_NEAREST)
797 printf ("NEAREST scaling\n");
798 else
799 printf ("UNKNOWN scaling\n");
800 }
801 printf ("---\n");
802
803 for (i = 0; i < ARRAY_LENGTH (tests_tbl); i++)
804 {
805 if (strcmp (pattern, "all") == 0 || strcmp (tests_tbl[i].testname, pattern) == 0)
806 {
807 bench_composite (tests_tbl[i].testname,
808 tests_tbl[i].src_fmt,
809 tests_tbl[i].src_flags,
810 tests_tbl[i].op,
811 tests_tbl[i].mask_fmt,
812 tests_tbl[i].mask_flags,
813 tests_tbl[i].dst_fmt,
814 bandwidth/8);
815 }
816 }
817
818 free (src);
819 return 0;
820 }
821