• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2014 RISC OS Open Ltd
3  *
4  * Permission to use, copy, modify, distribute, and sell this software and its
5  * documentation for any purpose is hereby granted without fee, provided that
6  * the above copyright notice appear in all copies and that both that
7  * copyright notice and this permission notice appear in supporting
8  * documentation, and that the name of the copyright holders not be used in
9  * advertising or publicity pertaining to distribution of the software without
10  * specific, written prior permission.  The copyright holders make no
11  * representations about the suitability of this software for any purpose.  It
12  * is provided "as is" without express or implied warranty.
13  *
14  * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
15  * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
16  * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
17  * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
18  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
19  * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
20  * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
21  * SOFTWARE.
22  *
23  * Author:  Ben Avison (bavison@riscosopen.org)
24  */
25 
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <ctype.h>
30 #include <stdint.h>
31 #include "utils.h"
32 
33 #ifdef HAVE_GETTIMEOFDAY
34 #include <sys/time.h>
35 #else
36 #include <time.h>
37 #endif
38 
39 #define WIDTH  1920
40 #define HEIGHT 1080
41 
42 /* How much data to read to flush all cached data to RAM */
43 #define MAX_L2CACHE_SIZE (8 * 1024 * 1024)
44 
45 #define PAGE_SIZE (4 * 1024)
46 
47 struct bench_info
48 {
49     pixman_op_t           op;
50     pixman_transform_t    transform;
51     pixman_image_t       *src_image;
52     pixman_image_t       *mask_image;
53     pixman_image_t       *dest_image;
54     int32_t               src_x;
55     int32_t               src_y;
56 };
57 
58 typedef struct bench_info bench_info_t;
59 
60 struct box_48_16
61 {
62     pixman_fixed_48_16_t        x1;
63     pixman_fixed_48_16_t        y1;
64     pixman_fixed_48_16_t        x2;
65     pixman_fixed_48_16_t        y2;
66 };
67 
68 typedef struct box_48_16 box_48_16_t;
69 
70 /* This function is copied verbatim from pixman.c. */
71 static pixman_bool_t
compute_transformed_extents(pixman_transform_t * transform,const pixman_box32_t * extents,box_48_16_t * transformed)72 compute_transformed_extents (pixman_transform_t   *transform,
73 			     const pixman_box32_t *extents,
74 			     box_48_16_t          *transformed)
75 {
76     pixman_fixed_48_16_t tx1, ty1, tx2, ty2;
77     pixman_fixed_t x1, y1, x2, y2;
78     int i;
79 
80     x1 = pixman_int_to_fixed (extents->x1) + pixman_fixed_1 / 2;
81     y1 = pixman_int_to_fixed (extents->y1) + pixman_fixed_1 / 2;
82     x2 = pixman_int_to_fixed (extents->x2) - pixman_fixed_1 / 2;
83     y2 = pixman_int_to_fixed (extents->y2) - pixman_fixed_1 / 2;
84 
85     if (!transform)
86     {
87 	transformed->x1 = x1;
88 	transformed->y1 = y1;
89 	transformed->x2 = x2;
90 	transformed->y2 = y2;
91 
92 	return TRUE;
93     }
94 
95     tx1 = ty1 = INT64_MAX;
96     tx2 = ty2 = INT64_MIN;
97 
98     for (i = 0; i < 4; ++i)
99     {
100 	pixman_fixed_48_16_t tx, ty;
101 	pixman_vector_t v;
102 
103 	v.vector[0] = (i & 0x01)? x1 : x2;
104 	v.vector[1] = (i & 0x02)? y1 : y2;
105 	v.vector[2] = pixman_fixed_1;
106 
107 	if (!pixman_transform_point (transform, &v))
108 	    return FALSE;
109 
110 	tx = (pixman_fixed_48_16_t)v.vector[0];
111 	ty = (pixman_fixed_48_16_t)v.vector[1];
112 
113 	if (tx < tx1)
114 	    tx1 = tx;
115 	if (ty < ty1)
116 	    ty1 = ty;
117 	if (tx > tx2)
118 	    tx2 = tx;
119 	if (ty > ty2)
120 	    ty2 = ty;
121     }
122 
123     transformed->x1 = tx1;
124     transformed->y1 = ty1;
125     transformed->x2 = tx2;
126     transformed->y2 = ty2;
127 
128     return TRUE;
129 }
130 
131 static void
create_image(uint32_t width,uint32_t height,pixman_format_code_t format,pixman_filter_t filter,uint32_t ** bits,pixman_image_t ** image)132 create_image (uint32_t                   width,
133               uint32_t                   height,
134               pixman_format_code_t       format,
135               pixman_filter_t            filter,
136               uint32_t                 **bits,
137               pixman_image_t           **image)
138 {
139     uint32_t stride = (width * PIXMAN_FORMAT_BPP (format) + 31) / 32 * 4;
140 
141     *bits = aligned_malloc (PAGE_SIZE, stride * height);
142     memset (*bits, 0xCC, stride * height);
143     *image = pixman_image_create_bits (format, width, height, *bits, stride);
144     pixman_image_set_repeat (*image, PIXMAN_REPEAT_NORMAL);
145     pixman_image_set_filter (*image, filter, NULL, 0);
146 }
147 
148 /* This needs to match the shortest cacheline length we expect to encounter */
149 #define CACHE_CLEAN_INCREMENT 32
150 
151 static void
flush_cache(void)152 flush_cache (void)
153 {
154     static const char clean_space[MAX_L2CACHE_SIZE];
155     volatile const char *x = clean_space;
156     const char *clean_end = clean_space + sizeof clean_space;
157 
158     while (x < clean_end)
159     {
160         (void) *x;
161         x += CACHE_CLEAN_INCREMENT;
162     }
163 }
164 
165 /* Obtain current time in microseconds modulo 2^32 */
166 uint32_t
gettimei(void)167 gettimei (void)
168 {
169 #ifdef HAVE_GETTIMEOFDAY
170     struct timeval tv;
171 
172     gettimeofday (&tv, NULL);
173     return tv.tv_sec * 1000000 + tv.tv_usec;
174 #else
175     return (uint64_t) clock () * 1000000 / CLOCKS_PER_SEC;
176 #endif
177 }
178 
179 static void
pixman_image_composite_wrapper(const pixman_composite_info_t * info)180 pixman_image_composite_wrapper (const pixman_composite_info_t *info)
181 {
182     pixman_image_composite (info->op,
183                             info->src_image, info->mask_image, info->dest_image,
184                             info->src_x, info->src_y,
185                             info->mask_x, info->mask_y,
186                             info->dest_x, info->dest_y,
187                             info->width, info->height);
188 }
189 
190 static void
pixman_image_composite_empty(const pixman_composite_info_t * info)191 pixman_image_composite_empty (const pixman_composite_info_t *info)
192 {
193     pixman_image_composite (info->op,
194                             info->src_image, info->mask_image, info->dest_image,
195                             info->src_x, info->src_y,
196                             info->mask_x, info->mask_y,
197                             info->dest_x, info->dest_y,
198                             1, 1);
199 }
200 
201 static void
bench(const bench_info_t * bi,uint32_t max_n,uint32_t max_time,uint32_t * ret_n,uint32_t * ret_time,void (* func)(const pixman_composite_info_t * info))202 bench (const bench_info_t *bi,
203        uint32_t            max_n,
204        uint32_t            max_time,
205        uint32_t           *ret_n,
206        uint32_t           *ret_time,
207        void              (*func) (const pixman_composite_info_t *info))
208 {
209     uint32_t n = 0;
210     uint32_t t0;
211     uint32_t t1;
212     uint32_t x = 0;
213     pixman_transform_t t;
214     pixman_composite_info_t info;
215 
216     t = bi->transform;
217     info.op = bi->op;
218     info.src_image = bi->src_image;
219     info.mask_image = bi->mask_image;
220     info.dest_image = bi->dest_image;
221     info.src_x = 0;
222     info.src_y = 0;
223     info.mask_x = 0;
224     info.mask_y = 0;
225     /* info.dest_x set below */
226     info.dest_y = 0;
227     info.width = WIDTH;
228     info.height = HEIGHT;
229 
230     t0 = gettimei ();
231 
232     do
233     {
234 
235         if (++x >= 64)
236             x = 0;
237 
238         info.dest_x = 63 - x;
239 
240         t.matrix[0][2] = pixman_int_to_fixed (bi->src_x + x);
241         t.matrix[1][2] = pixman_int_to_fixed (bi->src_y);
242         pixman_image_set_transform (bi->src_image, &t);
243 
244         if (bi->mask_image)
245             pixman_image_set_transform (bi->mask_image, &t);
246 
247         func (&info);
248         t1 = gettimei ();
249     }
250     while (++n < max_n && (t1 - t0) < max_time);
251 
252     if (ret_n)
253         *ret_n = n;
254 
255     *ret_time = t1 - t0;
256 }
257 
258 int
parse_fixed_argument(char * arg,pixman_fixed_t * value)259 parse_fixed_argument (char *arg, pixman_fixed_t *value)
260 {
261     char *tailptr;
262 
263     *value = pixman_double_to_fixed (strtod (arg, &tailptr));
264 
265     return *tailptr == '\0';
266 }
267 
268 int
parse_arguments(int argc,char * argv[],pixman_transform_t * t,pixman_op_t * op,pixman_format_code_t * src_format,pixman_format_code_t * mask_format,pixman_format_code_t * dest_format)269 parse_arguments (int                   argc,
270                  char                 *argv[],
271                  pixman_transform_t   *t,
272                  pixman_op_t          *op,
273                  pixman_format_code_t *src_format,
274                  pixman_format_code_t *mask_format,
275                  pixman_format_code_t *dest_format)
276 {
277     if (!parse_fixed_argument (*argv, &t->matrix[0][0]))
278         return 0;
279 
280     if (*++argv == NULL)
281         return 1;
282 
283     if (!parse_fixed_argument (*argv, &t->matrix[0][1]))
284         return 0;
285 
286     if (*++argv == NULL)
287         return 1;
288 
289     if (!parse_fixed_argument (*argv, &t->matrix[1][0]))
290         return 0;
291 
292     if (*++argv == NULL)
293         return 1;
294 
295     if (!parse_fixed_argument (*argv, &t->matrix[1][1]))
296         return 0;
297 
298     if (*++argv == NULL)
299         return 1;
300 
301     *op = operator_from_string (*argv);
302     if (*op == PIXMAN_OP_NONE)
303         return 0;
304 
305     if (*++argv == NULL)
306         return 1;
307 
308     *src_format = format_from_string (*argv);
309     if (*src_format == PIXMAN_null)
310         return 0;
311 
312     ++argv;
313     if (argv[0] && argv[1])
314     {
315         *mask_format = format_from_string (*argv);
316         if (*mask_format == PIXMAN_null)
317             return 0;
318         ++argv;
319     }
320     if (*argv)
321     {
322         *dest_format = format_from_string (*argv);
323         if (*dest_format == PIXMAN_null)
324             return 0;
325     }
326     return 1;
327 }
328 
329 static void
run_benchmark(const bench_info_t * bi)330 run_benchmark (const bench_info_t *bi)
331 {
332     uint32_t n;  /* number of iterations in at least 5 seconds */
333     uint32_t t1; /* time taken to do n iterations, microseconds */
334     uint32_t t2; /* calling overhead for n iterations, microseconds */
335 
336     flush_cache ();
337     bench (bi, UINT32_MAX, 5000000, &n, &t1, pixman_image_composite_wrapper);
338     bench (bi, n, UINT32_MAX, NULL, &t2, pixman_image_composite_empty);
339 
340     /* The result indicates the output rate in megapixels/second */
341     printf ("%6.2f\n", (double) n * WIDTH * HEIGHT / (t1 - t2));
342 }
343 
344 
345 int
main(int argc,char * argv[])346 main (int argc, char *argv[])
347 {
348     bench_info_t         binfo;
349     pixman_filter_t      filter      = PIXMAN_FILTER_NEAREST;
350     pixman_format_code_t src_format  = PIXMAN_a8r8g8b8;
351     pixman_format_code_t mask_format = 0;
352     pixman_format_code_t dest_format = PIXMAN_a8r8g8b8;
353     pixman_box32_t       dest_box    = { 0, 0, WIDTH, HEIGHT };
354     box_48_16_t          transformed = { 0 };
355     int32_t xmin, ymin, xmax, ymax;
356     uint32_t *src, *mask, *dest;
357 
358     binfo.op         = PIXMAN_OP_SRC;
359     binfo.mask_image = NULL;
360     pixman_transform_init_identity (&binfo.transform);
361 
362     ++argv;
363     if (*argv && (*argv)[0] == '-' && (*argv)[1] == 'n')
364     {
365         filter = PIXMAN_FILTER_NEAREST;
366         ++argv;
367         --argc;
368     }
369 
370     if (*argv && (*argv)[0] == '-' && (*argv)[1] == 'b')
371     {
372         filter = PIXMAN_FILTER_BILINEAR;
373         ++argv;
374         --argc;
375     }
376 
377     if (argc == 1 ||
378         !parse_arguments (argc, argv, &binfo.transform, &binfo.op,
379                           &src_format, &mask_format, &dest_format))
380     {
381         printf ("Usage: affine-bench [-n] [-b] axx [axy] [ayx] [ayy] [combine type]\n");
382         printf ("                    [src format] [mask format] [dest format]\n");
383         printf ("  -n : nearest scaling (default)\n");
384         printf ("  -b : bilinear scaling\n");
385         printf ("  axx : x_out:x_in factor\n");
386         printf ("  axy : x_out:y_in factor (default 0)\n");
387         printf ("  ayx : y_out:x_in factor (default 0)\n");
388         printf ("  ayy : y_out:y_in factor (default 1)\n");
389         printf ("  combine type : src, over, in etc (default src)\n");
390         printf ("  src format : a8r8g8b8, r5g6b5 etc (default a8r8g8b8)\n");
391         printf ("  mask format : as for src format, but no mask used if omitted\n");
392         printf ("  dest format : as for src format (default a8r8g8b8)\n");
393         printf ("The output is a single number in megapixels/second.\n");
394 
395         return EXIT_FAILURE;
396     }
397 
398     /* Compute required extents for source and mask image so they qualify
399      * for COVER fast paths and get the flags in pixman.c:analyze_extent().
400      * These computations are for FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR,
401      * but at the same time they also allow COVER_CLIP_NEAREST.
402      */
403     compute_transformed_extents (&binfo.transform, &dest_box, &transformed);
404     xmin = pixman_fixed_to_int (transformed.x1 - pixman_fixed_1 / 2);
405     ymin = pixman_fixed_to_int (transformed.y1 - pixman_fixed_1 / 2);
406     xmax = pixman_fixed_to_int (transformed.x2 + pixman_fixed_1 / 2);
407     ymax = pixman_fixed_to_int (transformed.y2 + pixman_fixed_1 / 2);
408     /* Note:
409      * The upper limits can be reduced to the following when fetchers
410      * are guaranteed to not access pixels with zero weight. This concerns
411      * particularly all bilinear samplers.
412      *
413      * xmax = pixman_fixed_to_int (transformed.x2 + pixman_fixed_1 / 2 - pixman_fixed_e);
414      * ymax = pixman_fixed_to_int (transformed.y2 + pixman_fixed_1 / 2 - pixman_fixed_e);
415      * This is equivalent to subtracting 0.5 and rounding up, rather than
416      * subtracting 0.5, rounding down and adding 1.
417      */
418     binfo.src_x = -xmin;
419     binfo.src_y = -ymin;
420 
421     /* Always over-allocate width by 64 pixels for all src, mask and dst,
422      * so that we can iterate over an x-offset 0..63 in bench ().
423      * This is similar to lowlevel-blt-bench, which uses the same method
424      * to hit different cacheline misalignments.
425      */
426     create_image (xmax - xmin + 64, ymax - ymin + 1, src_format, filter,
427                   &src, &binfo.src_image);
428 
429     if (mask_format)
430     {
431         create_image (xmax - xmin + 64, ymax - ymin + 1, mask_format, filter,
432                       &mask, &binfo.mask_image);
433 
434         if ((PIXMAN_FORMAT_R(mask_format) ||
435              PIXMAN_FORMAT_G(mask_format) ||
436              PIXMAN_FORMAT_B(mask_format)))
437         {
438             pixman_image_set_component_alpha (binfo.mask_image, 1);
439         }
440     }
441 
442     create_image (WIDTH + 64, HEIGHT, dest_format, filter,
443                   &dest, &binfo.dest_image);
444 
445     run_benchmark (&binfo);
446 
447     return EXIT_SUCCESS;
448 }
449