1 /*
2 * Copyright © 2014 RISC OS Open Ltd
3 *
4 * Permission to use, copy, modify, distribute, and sell this software and its
5 * documentation for any purpose is hereby granted without fee, provided that
6 * the above copyright notice appear in all copies and that both that
7 * copyright notice and this permission notice appear in supporting
8 * documentation, and that the name of the copyright holders not be used in
9 * advertising or publicity pertaining to distribution of the software without
10 * specific, written prior permission. The copyright holders make no
11 * representations about the suitability of this software for any purpose. It
12 * is provided "as is" without express or implied warranty.
13 *
14 * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
15 * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
16 * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
17 * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
18 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
19 * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
20 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
21 * SOFTWARE.
22 *
23 * Author: Ben Avison (bavison@riscosopen.org)
24 */
25
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <ctype.h>
30 #include <stdint.h>
31 #include "utils.h"
32
33 #ifdef HAVE_GETTIMEOFDAY
34 #include <sys/time.h>
35 #else
36 #include <time.h>
37 #endif
38
39 #define WIDTH 1920
40 #define HEIGHT 1080
41
42 /* How much data to read to flush all cached data to RAM */
43 #define MAX_L2CACHE_SIZE (8 * 1024 * 1024)
44
45 #define PAGE_SIZE (4 * 1024)
46
47 struct bench_info
48 {
49 pixman_op_t op;
50 pixman_transform_t transform;
51 pixman_image_t *src_image;
52 pixman_image_t *mask_image;
53 pixman_image_t *dest_image;
54 int32_t src_x;
55 int32_t src_y;
56 };
57
58 typedef struct bench_info bench_info_t;
59
60 struct box_48_16
61 {
62 pixman_fixed_48_16_t x1;
63 pixman_fixed_48_16_t y1;
64 pixman_fixed_48_16_t x2;
65 pixman_fixed_48_16_t y2;
66 };
67
68 typedef struct box_48_16 box_48_16_t;
69
70 /* This function is copied verbatim from pixman.c. */
71 static pixman_bool_t
compute_transformed_extents(pixman_transform_t * transform,const pixman_box32_t * extents,box_48_16_t * transformed)72 compute_transformed_extents (pixman_transform_t *transform,
73 const pixman_box32_t *extents,
74 box_48_16_t *transformed)
75 {
76 pixman_fixed_48_16_t tx1, ty1, tx2, ty2;
77 pixman_fixed_t x1, y1, x2, y2;
78 int i;
79
80 x1 = pixman_int_to_fixed (extents->x1) + pixman_fixed_1 / 2;
81 y1 = pixman_int_to_fixed (extents->y1) + pixman_fixed_1 / 2;
82 x2 = pixman_int_to_fixed (extents->x2) - pixman_fixed_1 / 2;
83 y2 = pixman_int_to_fixed (extents->y2) - pixman_fixed_1 / 2;
84
85 if (!transform)
86 {
87 transformed->x1 = x1;
88 transformed->y1 = y1;
89 transformed->x2 = x2;
90 transformed->y2 = y2;
91
92 return TRUE;
93 }
94
95 tx1 = ty1 = INT64_MAX;
96 tx2 = ty2 = INT64_MIN;
97
98 for (i = 0; i < 4; ++i)
99 {
100 pixman_fixed_48_16_t tx, ty;
101 pixman_vector_t v;
102
103 v.vector[0] = (i & 0x01)? x1 : x2;
104 v.vector[1] = (i & 0x02)? y1 : y2;
105 v.vector[2] = pixman_fixed_1;
106
107 if (!pixman_transform_point (transform, &v))
108 return FALSE;
109
110 tx = (pixman_fixed_48_16_t)v.vector[0];
111 ty = (pixman_fixed_48_16_t)v.vector[1];
112
113 if (tx < tx1)
114 tx1 = tx;
115 if (ty < ty1)
116 ty1 = ty;
117 if (tx > tx2)
118 tx2 = tx;
119 if (ty > ty2)
120 ty2 = ty;
121 }
122
123 transformed->x1 = tx1;
124 transformed->y1 = ty1;
125 transformed->x2 = tx2;
126 transformed->y2 = ty2;
127
128 return TRUE;
129 }
130
131 static void
create_image(uint32_t width,uint32_t height,pixman_format_code_t format,pixman_filter_t filter,uint32_t ** bits,pixman_image_t ** image)132 create_image (uint32_t width,
133 uint32_t height,
134 pixman_format_code_t format,
135 pixman_filter_t filter,
136 uint32_t **bits,
137 pixman_image_t **image)
138 {
139 uint32_t stride = (width * PIXMAN_FORMAT_BPP (format) + 31) / 32 * 4;
140
141 *bits = aligned_malloc (PAGE_SIZE, stride * height);
142 memset (*bits, 0xCC, stride * height);
143 *image = pixman_image_create_bits (format, width, height, *bits, stride);
144 pixman_image_set_repeat (*image, PIXMAN_REPEAT_NORMAL);
145 pixman_image_set_filter (*image, filter, NULL, 0);
146 }
147
148 /* This needs to match the shortest cacheline length we expect to encounter */
149 #define CACHE_CLEAN_INCREMENT 32
150
151 static void
flush_cache(void)152 flush_cache (void)
153 {
154 static const char clean_space[MAX_L2CACHE_SIZE];
155 volatile const char *x = clean_space;
156 const char *clean_end = clean_space + sizeof clean_space;
157
158 while (x < clean_end)
159 {
160 (void) *x;
161 x += CACHE_CLEAN_INCREMENT;
162 }
163 }
164
165 /* Obtain current time in microseconds modulo 2^32 */
166 uint32_t
gettimei(void)167 gettimei (void)
168 {
169 #ifdef HAVE_GETTIMEOFDAY
170 struct timeval tv;
171
172 gettimeofday (&tv, NULL);
173 return tv.tv_sec * 1000000 + tv.tv_usec;
174 #else
175 return (uint64_t) clock () * 1000000 / CLOCKS_PER_SEC;
176 #endif
177 }
178
179 static void
pixman_image_composite_wrapper(const pixman_composite_info_t * info)180 pixman_image_composite_wrapper (const pixman_composite_info_t *info)
181 {
182 pixman_image_composite (info->op,
183 info->src_image, info->mask_image, info->dest_image,
184 info->src_x, info->src_y,
185 info->mask_x, info->mask_y,
186 info->dest_x, info->dest_y,
187 info->width, info->height);
188 }
189
190 static void
pixman_image_composite_empty(const pixman_composite_info_t * info)191 pixman_image_composite_empty (const pixman_composite_info_t *info)
192 {
193 pixman_image_composite (info->op,
194 info->src_image, info->mask_image, info->dest_image,
195 info->src_x, info->src_y,
196 info->mask_x, info->mask_y,
197 info->dest_x, info->dest_y,
198 1, 1);
199 }
200
201 static void
bench(const bench_info_t * bi,uint32_t max_n,uint32_t max_time,uint32_t * ret_n,uint32_t * ret_time,void (* func)(const pixman_composite_info_t * info))202 bench (const bench_info_t *bi,
203 uint32_t max_n,
204 uint32_t max_time,
205 uint32_t *ret_n,
206 uint32_t *ret_time,
207 void (*func) (const pixman_composite_info_t *info))
208 {
209 uint32_t n = 0;
210 uint32_t t0;
211 uint32_t t1;
212 uint32_t x = 0;
213 pixman_transform_t t;
214 pixman_composite_info_t info;
215
216 t = bi->transform;
217 info.op = bi->op;
218 info.src_image = bi->src_image;
219 info.mask_image = bi->mask_image;
220 info.dest_image = bi->dest_image;
221 info.src_x = 0;
222 info.src_y = 0;
223 info.mask_x = 0;
224 info.mask_y = 0;
225 /* info.dest_x set below */
226 info.dest_y = 0;
227 info.width = WIDTH;
228 info.height = HEIGHT;
229
230 t0 = gettimei ();
231
232 do
233 {
234
235 if (++x >= 64)
236 x = 0;
237
238 info.dest_x = 63 - x;
239
240 t.matrix[0][2] = pixman_int_to_fixed (bi->src_x + x);
241 t.matrix[1][2] = pixman_int_to_fixed (bi->src_y);
242 pixman_image_set_transform (bi->src_image, &t);
243
244 if (bi->mask_image)
245 pixman_image_set_transform (bi->mask_image, &t);
246
247 func (&info);
248 t1 = gettimei ();
249 }
250 while (++n < max_n && (t1 - t0) < max_time);
251
252 if (ret_n)
253 *ret_n = n;
254
255 *ret_time = t1 - t0;
256 }
257
258 int
parse_fixed_argument(char * arg,pixman_fixed_t * value)259 parse_fixed_argument (char *arg, pixman_fixed_t *value)
260 {
261 char *tailptr;
262
263 *value = pixman_double_to_fixed (strtod (arg, &tailptr));
264
265 return *tailptr == '\0';
266 }
267
268 int
parse_arguments(int argc,char * argv[],pixman_transform_t * t,pixman_op_t * op,pixman_format_code_t * src_format,pixman_format_code_t * mask_format,pixman_format_code_t * dest_format)269 parse_arguments (int argc,
270 char *argv[],
271 pixman_transform_t *t,
272 pixman_op_t *op,
273 pixman_format_code_t *src_format,
274 pixman_format_code_t *mask_format,
275 pixman_format_code_t *dest_format)
276 {
277 if (!parse_fixed_argument (*argv, &t->matrix[0][0]))
278 return 0;
279
280 if (*++argv == NULL)
281 return 1;
282
283 if (!parse_fixed_argument (*argv, &t->matrix[0][1]))
284 return 0;
285
286 if (*++argv == NULL)
287 return 1;
288
289 if (!parse_fixed_argument (*argv, &t->matrix[1][0]))
290 return 0;
291
292 if (*++argv == NULL)
293 return 1;
294
295 if (!parse_fixed_argument (*argv, &t->matrix[1][1]))
296 return 0;
297
298 if (*++argv == NULL)
299 return 1;
300
301 *op = operator_from_string (*argv);
302 if (*op == PIXMAN_OP_NONE)
303 return 0;
304
305 if (*++argv == NULL)
306 return 1;
307
308 *src_format = format_from_string (*argv);
309 if (*src_format == PIXMAN_null)
310 return 0;
311
312 ++argv;
313 if (argv[0] && argv[1])
314 {
315 *mask_format = format_from_string (*argv);
316 if (*mask_format == PIXMAN_null)
317 return 0;
318 ++argv;
319 }
320 if (*argv)
321 {
322 *dest_format = format_from_string (*argv);
323 if (*dest_format == PIXMAN_null)
324 return 0;
325 }
326 return 1;
327 }
328
329 static void
run_benchmark(const bench_info_t * bi)330 run_benchmark (const bench_info_t *bi)
331 {
332 uint32_t n; /* number of iterations in at least 5 seconds */
333 uint32_t t1; /* time taken to do n iterations, microseconds */
334 uint32_t t2; /* calling overhead for n iterations, microseconds */
335
336 flush_cache ();
337 bench (bi, UINT32_MAX, 5000000, &n, &t1, pixman_image_composite_wrapper);
338 bench (bi, n, UINT32_MAX, NULL, &t2, pixman_image_composite_empty);
339
340 /* The result indicates the output rate in megapixels/second */
341 printf ("%6.2f\n", (double) n * WIDTH * HEIGHT / (t1 - t2));
342 }
343
344
345 int
main(int argc,char * argv[])346 main (int argc, char *argv[])
347 {
348 bench_info_t binfo;
349 pixman_filter_t filter = PIXMAN_FILTER_NEAREST;
350 pixman_format_code_t src_format = PIXMAN_a8r8g8b8;
351 pixman_format_code_t mask_format = 0;
352 pixman_format_code_t dest_format = PIXMAN_a8r8g8b8;
353 pixman_box32_t dest_box = { 0, 0, WIDTH, HEIGHT };
354 box_48_16_t transformed = { 0 };
355 int32_t xmin, ymin, xmax, ymax;
356 uint32_t *src, *mask, *dest;
357
358 binfo.op = PIXMAN_OP_SRC;
359 binfo.mask_image = NULL;
360 pixman_transform_init_identity (&binfo.transform);
361
362 ++argv;
363 if (*argv && (*argv)[0] == '-' && (*argv)[1] == 'n')
364 {
365 filter = PIXMAN_FILTER_NEAREST;
366 ++argv;
367 --argc;
368 }
369
370 if (*argv && (*argv)[0] == '-' && (*argv)[1] == 'b')
371 {
372 filter = PIXMAN_FILTER_BILINEAR;
373 ++argv;
374 --argc;
375 }
376
377 if (argc == 1 ||
378 !parse_arguments (argc, argv, &binfo.transform, &binfo.op,
379 &src_format, &mask_format, &dest_format))
380 {
381 printf ("Usage: affine-bench [-n] [-b] axx [axy] [ayx] [ayy] [combine type]\n");
382 printf (" [src format] [mask format] [dest format]\n");
383 printf (" -n : nearest scaling (default)\n");
384 printf (" -b : bilinear scaling\n");
385 printf (" axx : x_out:x_in factor\n");
386 printf (" axy : x_out:y_in factor (default 0)\n");
387 printf (" ayx : y_out:x_in factor (default 0)\n");
388 printf (" ayy : y_out:y_in factor (default 1)\n");
389 printf (" combine type : src, over, in etc (default src)\n");
390 printf (" src format : a8r8g8b8, r5g6b5 etc (default a8r8g8b8)\n");
391 printf (" mask format : as for src format, but no mask used if omitted\n");
392 printf (" dest format : as for src format (default a8r8g8b8)\n");
393 printf ("The output is a single number in megapixels/second.\n");
394
395 return EXIT_FAILURE;
396 }
397
398 /* Compute required extents for source and mask image so they qualify
399 * for COVER fast paths and get the flags in pixman.c:analyze_extent().
400 * These computations are for FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR,
401 * but at the same time they also allow COVER_CLIP_NEAREST.
402 */
403 compute_transformed_extents (&binfo.transform, &dest_box, &transformed);
404 xmin = pixman_fixed_to_int (transformed.x1 - pixman_fixed_1 / 2);
405 ymin = pixman_fixed_to_int (transformed.y1 - pixman_fixed_1 / 2);
406 xmax = pixman_fixed_to_int (transformed.x2 + pixman_fixed_1 / 2);
407 ymax = pixman_fixed_to_int (transformed.y2 + pixman_fixed_1 / 2);
408 /* Note:
409 * The upper limits can be reduced to the following when fetchers
410 * are guaranteed to not access pixels with zero weight. This concerns
411 * particularly all bilinear samplers.
412 *
413 * xmax = pixman_fixed_to_int (transformed.x2 + pixman_fixed_1 / 2 - pixman_fixed_e);
414 * ymax = pixman_fixed_to_int (transformed.y2 + pixman_fixed_1 / 2 - pixman_fixed_e);
415 * This is equivalent to subtracting 0.5 and rounding up, rather than
416 * subtracting 0.5, rounding down and adding 1.
417 */
418 binfo.src_x = -xmin;
419 binfo.src_y = -ymin;
420
421 /* Always over-allocate width by 64 pixels for all src, mask and dst,
422 * so that we can iterate over an x-offset 0..63 in bench ().
423 * This is similar to lowlevel-blt-bench, which uses the same method
424 * to hit different cacheline misalignments.
425 */
426 create_image (xmax - xmin + 64, ymax - ymin + 1, src_format, filter,
427 &src, &binfo.src_image);
428
429 if (mask_format)
430 {
431 create_image (xmax - xmin + 64, ymax - ymin + 1, mask_format, filter,
432 &mask, &binfo.mask_image);
433
434 if ((PIXMAN_FORMAT_R(mask_format) ||
435 PIXMAN_FORMAT_G(mask_format) ||
436 PIXMAN_FORMAT_B(mask_format)))
437 {
438 pixman_image_set_component_alpha (binfo.mask_image, 1);
439 }
440 }
441
442 create_image (WIDTH + 64, HEIGHT, dest_format, filter,
443 &dest, &binfo.dest_image);
444
445 run_benchmark (&binfo);
446
447 return EXIT_SUCCESS;
448 }
449