1 /*
2 * Microbenchmark for math functions.
3 *
4 * Copyright (c) 2018-2020, Arm Limited.
5 * SPDX-License-Identifier: MIT
6 */
7
8 #undef _GNU_SOURCE
9 #define _GNU_SOURCE 1
10 #include <stdint.h>
11 #include <stdlib.h>
12 #include <stdio.h>
13 #include <string.h>
14 #include <time.h>
15 #include <math.h>
16 #include "mathlib.h"
17
18 #ifndef WANT_VMATH
19 /* Enable the build of vector math code. */
20 # define WANT_VMATH 1
21 #endif
22
23 /* Number of measurements, best result is reported. */
24 #define MEASURE 60
25 /* Array size. */
26 #define N 8000
27 /* Iterations over the array. */
28 #define ITER 125
29
30 static double *Trace;
31 static size_t trace_size;
32 static double A[N];
33 static float Af[N];
34 static long measurecount = MEASURE;
35 static long itercount = ITER;
36
37 #if __aarch64__ && WANT_VMATH
38 typedef __f64x2_t v_double;
39
40 #define v_double_len() 2
41
42 static inline v_double
v_double_load(const double * p)43 v_double_load (const double *p)
44 {
45 return (v_double){p[0], p[1]};
46 }
47
48 static inline v_double
v_double_dup(double x)49 v_double_dup (double x)
50 {
51 return (v_double){x, x};
52 }
53
54 typedef __f32x4_t v_float;
55
56 #define v_float_len() 4
57
58 static inline v_float
v_float_load(const float * p)59 v_float_load (const float *p)
60 {
61 return (v_float){p[0], p[1], p[2], p[3]};
62 }
63
64 static inline v_float
v_float_dup(float x)65 v_float_dup (float x)
66 {
67 return (v_float){x, x, x, x};
68 }
69 #else
70 /* dummy definitions to make things compile. */
71 typedef double v_double;
72 typedef float v_float;
73 #define v_double_len(x) 1
74 #define v_double_load(x) (x)[0]
75 #define v_double_dup(x) (x)
76 #define v_float_len(x) 1
77 #define v_float_load(x) (x)[0]
78 #define v_float_dup(x) (x)
79 #endif
80
81 static double
dummy(double x)82 dummy (double x)
83 {
84 return x;
85 }
86
87 static float
dummyf(float x)88 dummyf (float x)
89 {
90 return x;
91 }
92
93 #if WANT_VMATH
94 #if __aarch64__
95 static v_double
__v_dummy(v_double x)96 __v_dummy (v_double x)
97 {
98 return x;
99 }
100
101 static v_float
__v_dummyf(v_float x)102 __v_dummyf (v_float x)
103 {
104 return x;
105 }
106
107 #ifdef __vpcs
108 __vpcs static v_double
__vn_dummy(v_double x)109 __vn_dummy (v_double x)
110 {
111 return x;
112 }
113
114 __vpcs static v_float
__vn_dummyf(v_float x)115 __vn_dummyf (v_float x)
116 {
117 return x;
118 }
119
120 __vpcs static v_float
xy__vn_powf(v_float x)121 xy__vn_powf (v_float x)
122 {
123 return __vn_powf (x, x);
124 }
125
126 __vpcs static v_float
xy_Z_powf(v_float x)127 xy_Z_powf (v_float x)
128 {
129 return _ZGVnN4vv_powf (x, x);
130 }
131
132 __vpcs static v_double
xy__vn_pow(v_double x)133 xy__vn_pow (v_double x)
134 {
135 return __vn_pow (x, x);
136 }
137
138 __vpcs static v_double
xy_Z_pow(v_double x)139 xy_Z_pow (v_double x)
140 {
141 return _ZGVnN2vv_pow (x, x);
142 }
143 #endif
144
145 static v_float
xy__v_powf(v_float x)146 xy__v_powf (v_float x)
147 {
148 return __v_powf (x, x);
149 }
150
151 static v_double
xy__v_pow(v_double x)152 xy__v_pow (v_double x)
153 {
154 return __v_pow (x, x);
155 }
156 #endif
157
158 static float
xy__s_powf(float x)159 xy__s_powf (float x)
160 {
161 return __s_powf (x, x);
162 }
163
164 static double
xy__s_pow(double x)165 xy__s_pow (double x)
166 {
167 return __s_pow (x, x);
168 }
169 #endif
170
171 static double
xypow(double x)172 xypow (double x)
173 {
174 return pow (x, x);
175 }
176
177 static float
xypowf(float x)178 xypowf (float x)
179 {
180 return powf (x, x);
181 }
182
183 static double
xpow(double x)184 xpow (double x)
185 {
186 return pow (x, 23.4);
187 }
188
189 static float
xpowf(float x)190 xpowf (float x)
191 {
192 return powf (x, 23.4f);
193 }
194
195 static double
ypow(double x)196 ypow (double x)
197 {
198 return pow (2.34, x);
199 }
200
201 static float
ypowf(float x)202 ypowf (float x)
203 {
204 return powf (2.34f, x);
205 }
206
207 static float
sincosf_wrap(float x)208 sincosf_wrap (float x)
209 {
210 float s, c;
211 sincosf (x, &s, &c);
212 return s + c;
213 }
214
215 static const struct fun
216 {
217 const char *name;
218 int prec;
219 int vec;
220 double lo;
221 double hi;
222 union
223 {
224 double (*d) (double);
225 float (*f) (float);
226 v_double (*vd) (v_double);
227 v_float (*vf) (v_float);
228 #ifdef __vpcs
229 __vpcs v_double (*vnd) (v_double);
230 __vpcs v_float (*vnf) (v_float);
231 #endif
232 } fun;
233 } funtab[] = {
234 #define D(func, lo, hi) {#func, 'd', 0, lo, hi, {.d = func}},
235 #define F(func, lo, hi) {#func, 'f', 0, lo, hi, {.f = func}},
236 #define VD(func, lo, hi) {#func, 'd', 'v', lo, hi, {.vd = func}},
237 #define VF(func, lo, hi) {#func, 'f', 'v', lo, hi, {.vf = func}},
238 #define VND(func, lo, hi) {#func, 'd', 'n', lo, hi, {.vnd = func}},
239 #define VNF(func, lo, hi) {#func, 'f', 'n', lo, hi, {.vnf = func}},
240 D (dummy, 1.0, 2.0)
241 D (exp, -9.9, 9.9)
242 D (exp, 0.5, 1.0)
243 D (exp2, -9.9, 9.9)
244 D (log, 0.01, 11.1)
245 D (log, 0.999, 1.001)
246 D (log2, 0.01, 11.1)
247 D (log2, 0.999, 1.001)
248 {"pow", 'd', 0, 0.01, 11.1, {.d = xypow}},
249 D (xpow, 0.01, 11.1)
250 D (ypow, -9.9, 9.9)
251 D (erf, -6.0, 6.0)
252
253 F (dummyf, 1.0, 2.0)
254 F (expf, -9.9, 9.9)
255 F (exp2f, -9.9, 9.9)
256 F (logf, 0.01, 11.1)
257 F (log2f, 0.01, 11.1)
258 {"powf", 'f', 0, 0.01, 11.1, {.f = xypowf}},
259 F (xpowf, 0.01, 11.1)
260 F (ypowf, -9.9, 9.9)
261 {"sincosf", 'f', 0, 0.1, 0.7, {.f = sincosf_wrap}},
262 {"sincosf", 'f', 0, 0.8, 3.1, {.f = sincosf_wrap}},
263 {"sincosf", 'f', 0, -3.1, 3.1, {.f = sincosf_wrap}},
264 {"sincosf", 'f', 0, 3.3, 33.3, {.f = sincosf_wrap}},
265 {"sincosf", 'f', 0, 100, 1000, {.f = sincosf_wrap}},
266 {"sincosf", 'f', 0, 1e6, 1e32, {.f = sincosf_wrap}},
267 F (sinf, 0.1, 0.7)
268 F (sinf, 0.8, 3.1)
269 F (sinf, -3.1, 3.1)
270 F (sinf, 3.3, 33.3)
271 F (sinf, 100, 1000)
272 F (sinf, 1e6, 1e32)
273 F (cosf, 0.1, 0.7)
274 F (cosf, 0.8, 3.1)
275 F (cosf, -3.1, 3.1)
276 F (cosf, 3.3, 33.3)
277 F (cosf, 100, 1000)
278 F (cosf, 1e6, 1e32)
279 F (erff, -4.0, 4.0)
280 #if WANT_VMATH
281 D (__s_sin, -3.1, 3.1)
282 D (__s_cos, -3.1, 3.1)
283 D (__s_exp, -9.9, 9.9)
284 D (__s_log, 0.01, 11.1)
285 {"__s_pow", 'd', 0, 0.01, 11.1, {.d = xy__s_pow}},
286 F (__s_expf, -9.9, 9.9)
287 F (__s_expf_1u, -9.9, 9.9)
288 F (__s_exp2f, -9.9, 9.9)
289 F (__s_exp2f_1u, -9.9, 9.9)
290 F (__s_logf, 0.01, 11.1)
291 {"__s_powf", 'f', 0, 0.01, 11.1, {.f = xy__s_powf}},
292 F (__s_sinf, -3.1, 3.1)
293 F (__s_cosf, -3.1, 3.1)
294 #if __aarch64__
295 VD (__v_dummy, 1.0, 2.0)
296 VD (__v_sin, -3.1, 3.1)
297 VD (__v_cos, -3.1, 3.1)
298 VD (__v_exp, -9.9, 9.9)
299 VD (__v_log, 0.01, 11.1)
300 {"__v_pow", 'd', 'v', 0.01, 11.1, {.vd = xy__v_pow}},
301 VF (__v_dummyf, 1.0, 2.0)
302 VF (__v_expf, -9.9, 9.9)
303 VF (__v_expf_1u, -9.9, 9.9)
304 VF (__v_exp2f, -9.9, 9.9)
305 VF (__v_exp2f_1u, -9.9, 9.9)
306 VF (__v_logf, 0.01, 11.1)
307 {"__v_powf", 'f', 'v', 0.01, 11.1, {.vf = xy__v_powf}},
308 VF (__v_sinf, -3.1, 3.1)
309 VF (__v_cosf, -3.1, 3.1)
310 #ifdef __vpcs
311 VND (__vn_dummy, 1.0, 2.0)
312 VND (__vn_exp, -9.9, 9.9)
313 VND (_ZGVnN2v_exp, -9.9, 9.9)
314 VND (__vn_log, 0.01, 11.1)
315 VND (_ZGVnN2v_log, 0.01, 11.1)
316 {"__vn_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy__vn_pow}},
317 {"_ZGVnN2vv_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy_Z_pow}},
318 VND (__vn_sin, -3.1, 3.1)
319 VND (_ZGVnN2v_sin, -3.1, 3.1)
320 VND (__vn_cos, -3.1, 3.1)
321 VND (_ZGVnN2v_cos, -3.1, 3.1)
322 VNF (__vn_dummyf, 1.0, 2.0)
323 VNF (__vn_expf, -9.9, 9.9)
324 VNF (_ZGVnN4v_expf, -9.9, 9.9)
325 VNF (__vn_expf_1u, -9.9, 9.9)
326 VNF (__vn_exp2f, -9.9, 9.9)
327 VNF (_ZGVnN4v_exp2f, -9.9, 9.9)
328 VNF (__vn_exp2f_1u, -9.9, 9.9)
329 VNF (__vn_logf, 0.01, 11.1)
330 VNF (_ZGVnN4v_logf, 0.01, 11.1)
331 {"__vn_powf", 'f', 'n', 0.01, 11.1, {.vnf = xy__vn_powf}},
332 {"_ZGVnN4vv_powf", 'f', 'n', 0.01, 11.1, {.vnf = xy_Z_powf}},
333 VNF (__vn_sinf, -3.1, 3.1)
334 VNF (_ZGVnN4v_sinf, -3.1, 3.1)
335 VNF (__vn_cosf, -3.1, 3.1)
336 VNF (_ZGVnN4v_cosf, -3.1, 3.1)
337 #endif
338 #endif
339 #endif
340 {0},
341 #undef F
342 #undef D
343 #undef VF
344 #undef VD
345 #undef VNF
346 #undef VND
347 };
348
349 static void
gen_linear(double lo,double hi)350 gen_linear (double lo, double hi)
351 {
352 for (int i = 0; i < N; i++)
353 A[i] = (lo * (N - i) + hi * i) / N;
354 }
355
356 static void
genf_linear(double lo,double hi)357 genf_linear (double lo, double hi)
358 {
359 for (int i = 0; i < N; i++)
360 Af[i] = (float)(lo * (N - i) + hi * i) / N;
361 }
362
363 static inline double
asdouble(uint64_t i)364 asdouble (uint64_t i)
365 {
366 union
367 {
368 uint64_t i;
369 double f;
370 } u = {i};
371 return u.f;
372 }
373
374 static uint64_t seed = 0x0123456789abcdef;
375
376 static double
frand(double lo,double hi)377 frand (double lo, double hi)
378 {
379 seed = 6364136223846793005ULL * seed + 1;
380 return lo + (hi - lo) * (asdouble (seed >> 12 | 0x3ffULL << 52) - 1.0);
381 }
382
383 static void
gen_rand(double lo,double hi)384 gen_rand (double lo, double hi)
385 {
386 for (int i = 0; i < N; i++)
387 A[i] = frand (lo, hi);
388 }
389
390 static void
genf_rand(double lo,double hi)391 genf_rand (double lo, double hi)
392 {
393 for (int i = 0; i < N; i++)
394 Af[i] = (float)frand (lo, hi);
395 }
396
397 static void
gen_trace(int index)398 gen_trace (int index)
399 {
400 for (int i = 0; i < N; i++)
401 A[i] = Trace[index + i];
402 }
403
404 static void
genf_trace(int index)405 genf_trace (int index)
406 {
407 for (int i = 0; i < N; i++)
408 Af[i] = (float)Trace[index + i];
409 }
410
411 static void
run_thruput(double f (double))412 run_thruput (double f (double))
413 {
414 for (int i = 0; i < N; i++)
415 f (A[i]);
416 }
417
418 static void
runf_thruput(float f (float))419 runf_thruput (float f (float))
420 {
421 for (int i = 0; i < N; i++)
422 f (Af[i]);
423 }
424
425 volatile double zero = 0;
426
427 static void
run_latency(double f (double))428 run_latency (double f (double))
429 {
430 double z = zero;
431 double prev = z;
432 for (int i = 0; i < N; i++)
433 prev = f (A[i] + prev * z);
434 }
435
436 static void
runf_latency(float f (float))437 runf_latency (float f (float))
438 {
439 float z = (float)zero;
440 float prev = z;
441 for (int i = 0; i < N; i++)
442 prev = f (Af[i] + prev * z);
443 }
444
445 static void
run_v_thruput(v_double f (v_double))446 run_v_thruput (v_double f (v_double))
447 {
448 for (int i = 0; i < N; i += v_double_len ())
449 f (v_double_load (A+i));
450 }
451
452 static void
runf_v_thruput(v_float f (v_float))453 runf_v_thruput (v_float f (v_float))
454 {
455 for (int i = 0; i < N; i += v_float_len ())
456 f (v_float_load (Af+i));
457 }
458
459 static void
run_v_latency(v_double f (v_double))460 run_v_latency (v_double f (v_double))
461 {
462 v_double z = v_double_dup (zero);
463 v_double prev = z;
464 for (int i = 0; i < N; i += v_double_len ())
465 prev = f (v_double_load (A+i) + prev * z);
466 }
467
468 static void
runf_v_latency(v_float f (v_float))469 runf_v_latency (v_float f (v_float))
470 {
471 v_float z = v_float_dup (zero);
472 v_float prev = z;
473 for (int i = 0; i < N; i += v_float_len ())
474 prev = f (v_float_load (Af+i) + prev * z);
475 }
476
477 #ifdef __vpcs
478 static void
run_vn_thruput(__vpcs v_double f (v_double))479 run_vn_thruput (__vpcs v_double f (v_double))
480 {
481 for (int i = 0; i < N; i += v_double_len ())
482 f (v_double_load (A+i));
483 }
484
485 static void
runf_vn_thruput(__vpcs v_float f (v_float))486 runf_vn_thruput (__vpcs v_float f (v_float))
487 {
488 for (int i = 0; i < N; i += v_float_len ())
489 f (v_float_load (Af+i));
490 }
491
492 static void
run_vn_latency(__vpcs v_double f (v_double))493 run_vn_latency (__vpcs v_double f (v_double))
494 {
495 v_double z = v_double_dup (zero);
496 v_double prev = z;
497 for (int i = 0; i < N; i += v_double_len ())
498 prev = f (v_double_load (A+i) + prev * z);
499 }
500
501 static void
runf_vn_latency(__vpcs v_float f (v_float))502 runf_vn_latency (__vpcs v_float f (v_float))
503 {
504 v_float z = v_float_dup (zero);
505 v_float prev = z;
506 for (int i = 0; i < N; i += v_float_len ())
507 prev = f (v_float_load (Af+i) + prev * z);
508 }
509 #endif
510
511 static uint64_t
tic(void)512 tic (void)
513 {
514 struct timespec ts;
515 if (clock_gettime (CLOCK_REALTIME, &ts))
516 abort ();
517 return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
518 }
519
520 #define TIMEIT(run, f) do { \
521 dt = -1; \
522 run (f); /* Warm up. */ \
523 for (int j = 0; j < measurecount; j++) \
524 { \
525 uint64_t t0 = tic (); \
526 for (int i = 0; i < itercount; i++) \
527 run (f); \
528 uint64_t t1 = tic (); \
529 if (t1 - t0 < dt) \
530 dt = t1 - t0; \
531 } \
532 } while (0)
533
534 static void
bench1(const struct fun * f,int type,double lo,double hi)535 bench1 (const struct fun *f, int type, double lo, double hi)
536 {
537 uint64_t dt = 0;
538 uint64_t ns100;
539 const char *s = type == 't' ? "rthruput" : "latency";
540 int vlen = 1;
541
542 if (f->vec && f->prec == 'd')
543 vlen = v_double_len();
544 else if (f->vec && f->prec == 'f')
545 vlen = v_float_len();
546
547 if (f->prec == 'd' && type == 't' && f->vec == 0)
548 TIMEIT (run_thruput, f->fun.d);
549 else if (f->prec == 'd' && type == 'l' && f->vec == 0)
550 TIMEIT (run_latency, f->fun.d);
551 else if (f->prec == 'f' && type == 't' && f->vec == 0)
552 TIMEIT (runf_thruput, f->fun.f);
553 else if (f->prec == 'f' && type == 'l' && f->vec == 0)
554 TIMEIT (runf_latency, f->fun.f);
555 else if (f->prec == 'd' && type == 't' && f->vec == 'v')
556 TIMEIT (run_v_thruput, f->fun.vd);
557 else if (f->prec == 'd' && type == 'l' && f->vec == 'v')
558 TIMEIT (run_v_latency, f->fun.vd);
559 else if (f->prec == 'f' && type == 't' && f->vec == 'v')
560 TIMEIT (runf_v_thruput, f->fun.vf);
561 else if (f->prec == 'f' && type == 'l' && f->vec == 'v')
562 TIMEIT (runf_v_latency, f->fun.vf);
563 #ifdef __vpcs
564 else if (f->prec == 'd' && type == 't' && f->vec == 'n')
565 TIMEIT (run_vn_thruput, f->fun.vnd);
566 else if (f->prec == 'd' && type == 'l' && f->vec == 'n')
567 TIMEIT (run_vn_latency, f->fun.vnd);
568 else if (f->prec == 'f' && type == 't' && f->vec == 'n')
569 TIMEIT (runf_vn_thruput, f->fun.vnf);
570 else if (f->prec == 'f' && type == 'l' && f->vec == 'n')
571 TIMEIT (runf_vn_latency, f->fun.vnf);
572 #endif
573
574 if (type == 't')
575 {
576 ns100 = (100 * dt + itercount * N / 2) / (itercount * N);
577 printf ("%9s %8s: %4u.%02u ns/elem %10llu ns in [%g %g]\n", f->name, s,
578 (unsigned) (ns100 / 100), (unsigned) (ns100 % 100),
579 (unsigned long long) dt, lo, hi);
580 }
581 else if (type == 'l')
582 {
583 ns100 = (100 * dt + itercount * N / vlen / 2) / (itercount * N / vlen);
584 printf ("%9s %8s: %4u.%02u ns/call %10llu ns in [%g %g]\n", f->name, s,
585 (unsigned) (ns100 / 100), (unsigned) (ns100 % 100),
586 (unsigned long long) dt, lo, hi);
587 }
588 fflush (stdout);
589 }
590
591 static void
bench(const struct fun * f,double lo,double hi,int type,int gen)592 bench (const struct fun *f, double lo, double hi, int type, int gen)
593 {
594 if (f->prec == 'd' && gen == 'r')
595 gen_rand (lo, hi);
596 else if (f->prec == 'd' && gen == 'l')
597 gen_linear (lo, hi);
598 else if (f->prec == 'd' && gen == 't')
599 gen_trace (0);
600 else if (f->prec == 'f' && gen == 'r')
601 genf_rand (lo, hi);
602 else if (f->prec == 'f' && gen == 'l')
603 genf_linear (lo, hi);
604 else if (f->prec == 'f' && gen == 't')
605 genf_trace (0);
606
607 if (gen == 't')
608 hi = trace_size / N;
609
610 if (type == 'b' || type == 't')
611 bench1 (f, 't', lo, hi);
612
613 if (type == 'b' || type == 'l')
614 bench1 (f, 'l', lo, hi);
615
616 for (int i = N; i < trace_size; i += N)
617 {
618 if (f->prec == 'd')
619 gen_trace (i);
620 else
621 genf_trace (i);
622
623 lo = i / N;
624 if (type == 'b' || type == 't')
625 bench1 (f, 't', lo, hi);
626
627 if (type == 'b' || type == 'l')
628 bench1 (f, 'l', lo, hi);
629 }
630 }
631
632 static void
readtrace(const char * name)633 readtrace (const char *name)
634 {
635 int n = 0;
636 FILE *f = strcmp (name, "-") == 0 ? stdin : fopen (name, "r");
637 if (!f)
638 {
639 printf ("openning \"%s\" failed: %m\n", name);
640 exit (1);
641 }
642 for (;;)
643 {
644 if (n >= trace_size)
645 {
646 trace_size += N;
647 Trace = realloc (Trace, trace_size * sizeof (Trace[0]));
648 if (Trace == NULL)
649 {
650 printf ("out of memory\n");
651 exit (1);
652 }
653 }
654 if (fscanf (f, "%lf", Trace + n) != 1)
655 break;
656 n++;
657 }
658 if (ferror (f) || n == 0)
659 {
660 printf ("reading \"%s\" failed: %m\n", name);
661 exit (1);
662 }
663 fclose (f);
664 if (n % N == 0)
665 trace_size = n;
666 for (int i = 0; n < trace_size; n++, i++)
667 Trace[n] = Trace[i];
668 }
669
670 static void
usage(void)671 usage (void)
672 {
673 printf ("usage: ./mathbench [-g rand|linear|trace] [-t latency|thruput|both] "
674 "[-i low high] [-f tracefile] [-m measurements] [-c iterations] func "
675 "[func2 ..]\n");
676 printf ("func:\n");
677 printf ("%7s [run all benchmarks]\n", "all");
678 for (const struct fun *f = funtab; f->name; f++)
679 printf ("%7s [low: %g high: %g]\n", f->name, f->lo, f->hi);
680 exit (1);
681 }
682
683 int
main(int argc,char * argv[])684 main (int argc, char *argv[])
685 {
686 int usergen = 0, gen = 'r', type = 'b', all = 0;
687 double lo = 0, hi = 0;
688 const char *tracefile = "-";
689
690 argv++;
691 argc--;
692 for (;;)
693 {
694 if (argc <= 0)
695 usage ();
696 if (argv[0][0] != '-')
697 break;
698 else if (argc >= 3 && strcmp (argv[0], "-i") == 0)
699 {
700 usergen = 1;
701 lo = strtod (argv[1], 0);
702 hi = strtod (argv[2], 0);
703 argv += 3;
704 argc -= 3;
705 }
706 else if (argc >= 2 && strcmp (argv[0], "-m") == 0)
707 {
708 measurecount = strtol (argv[1], 0, 0);
709 argv += 2;
710 argc -= 2;
711 }
712 else if (argc >= 2 && strcmp (argv[0], "-c") == 0)
713 {
714 itercount = strtol (argv[1], 0, 0);
715 argv += 2;
716 argc -= 2;
717 }
718 else if (argc >= 2 && strcmp (argv[0], "-g") == 0)
719 {
720 gen = argv[1][0];
721 if (strchr ("rlt", gen) == 0)
722 usage ();
723 argv += 2;
724 argc -= 2;
725 }
726 else if (argc >= 2 && strcmp (argv[0], "-f") == 0)
727 {
728 gen = 't'; /* -f implies -g trace. */
729 tracefile = argv[1];
730 argv += 2;
731 argc -= 2;
732 }
733 else if (argc >= 2 && strcmp (argv[0], "-t") == 0)
734 {
735 type = argv[1][0];
736 if (strchr ("ltb", type) == 0)
737 usage ();
738 argv += 2;
739 argc -= 2;
740 }
741 else
742 usage ();
743 }
744 if (gen == 't')
745 {
746 readtrace (tracefile);
747 lo = hi = 0;
748 usergen = 1;
749 }
750 while (argc > 0)
751 {
752 int found = 0;
753 all = strcmp (argv[0], "all") == 0;
754 for (const struct fun *f = funtab; f->name; f++)
755 if (all || strcmp (argv[0], f->name) == 0)
756 {
757 found = 1;
758 if (!usergen)
759 {
760 lo = f->lo;
761 hi = f->hi;
762 }
763 bench (f, lo, hi, type, gen);
764 if (usergen && !all)
765 break;
766 }
767 if (!found)
768 printf ("unknown function: %s\n", argv[0]);
769 argv++;
770 argc--;
771 }
772 return 0;
773 }
774