• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Microbenchmark for math functions.
3  *
4  * Copyright (c) 2018-2020, Arm Limited.
5  * SPDX-License-Identifier: MIT
6  */
7 
8 #undef _GNU_SOURCE
9 #define _GNU_SOURCE 1
10 #include <stdint.h>
11 #include <stdlib.h>
12 #include <stdio.h>
13 #include <string.h>
14 #include <time.h>
15 #include <math.h>
16 #include "mathlib.h"
17 
18 #ifndef WANT_VMATH
19 /* Enable the build of vector math code.  */
20 # define WANT_VMATH 1
21 #endif
22 
23 /* Number of measurements, best result is reported.  */
24 #define MEASURE 60
25 /* Array size.  */
26 #define N 8000
27 /* Iterations over the array.  */
28 #define ITER 125
29 
30 static double *Trace;
31 static size_t trace_size;
32 static double A[N];
33 static float Af[N];
34 static long measurecount = MEASURE;
35 static long itercount = ITER;
36 
37 #if __aarch64__ && WANT_VMATH
38 typedef __f64x2_t v_double;
39 
40 #define v_double_len() 2
41 
42 static inline v_double
v_double_load(const double * p)43 v_double_load (const double *p)
44 {
45   return (v_double){p[0], p[1]};
46 }
47 
48 static inline v_double
v_double_dup(double x)49 v_double_dup (double x)
50 {
51   return (v_double){x, x};
52 }
53 
54 typedef __f32x4_t v_float;
55 
56 #define v_float_len() 4
57 
58 static inline v_float
v_float_load(const float * p)59 v_float_load (const float *p)
60 {
61   return (v_float){p[0], p[1], p[2], p[3]};
62 }
63 
64 static inline v_float
v_float_dup(float x)65 v_float_dup (float x)
66 {
67   return (v_float){x, x, x, x};
68 }
69 #else
70 /* dummy definitions to make things compile.  */
71 typedef double v_double;
72 typedef float v_float;
73 #define v_double_len(x) 1
74 #define v_double_load(x) (x)[0]
75 #define v_double_dup(x) (x)
76 #define v_float_len(x) 1
77 #define v_float_load(x) (x)[0]
78 #define v_float_dup(x) (x)
79 #endif
80 
81 static double
dummy(double x)82 dummy (double x)
83 {
84   return x;
85 }
86 
87 static float
dummyf(float x)88 dummyf (float x)
89 {
90   return x;
91 }
92 
93 #if WANT_VMATH
94 #if __aarch64__
95 static v_double
__v_dummy(v_double x)96 __v_dummy (v_double x)
97 {
98   return x;
99 }
100 
101 static v_float
__v_dummyf(v_float x)102 __v_dummyf (v_float x)
103 {
104   return x;
105 }
106 
107 #ifdef __vpcs
108 __vpcs static v_double
__vn_dummy(v_double x)109 __vn_dummy (v_double x)
110 {
111   return x;
112 }
113 
114 __vpcs static v_float
__vn_dummyf(v_float x)115 __vn_dummyf (v_float x)
116 {
117   return x;
118 }
119 
120 __vpcs static v_float
xy__vn_powf(v_float x)121 xy__vn_powf (v_float x)
122 {
123   return __vn_powf (x, x);
124 }
125 
126 __vpcs static v_float
xy_Z_powf(v_float x)127 xy_Z_powf (v_float x)
128 {
129   return _ZGVnN4vv_powf (x, x);
130 }
131 
132 __vpcs static v_double
xy__vn_pow(v_double x)133 xy__vn_pow (v_double x)
134 {
135   return __vn_pow (x, x);
136 }
137 
138 __vpcs static v_double
xy_Z_pow(v_double x)139 xy_Z_pow (v_double x)
140 {
141   return _ZGVnN2vv_pow (x, x);
142 }
143 #endif
144 
145 static v_float
xy__v_powf(v_float x)146 xy__v_powf (v_float x)
147 {
148   return __v_powf (x, x);
149 }
150 
151 static v_double
xy__v_pow(v_double x)152 xy__v_pow (v_double x)
153 {
154   return __v_pow (x, x);
155 }
156 #endif
157 
158 static float
xy__s_powf(float x)159 xy__s_powf (float x)
160 {
161   return __s_powf (x, x);
162 }
163 
164 static double
xy__s_pow(double x)165 xy__s_pow (double x)
166 {
167   return __s_pow (x, x);
168 }
169 #endif
170 
171 static double
xypow(double x)172 xypow (double x)
173 {
174   return pow (x, x);
175 }
176 
177 static float
xypowf(float x)178 xypowf (float x)
179 {
180   return powf (x, x);
181 }
182 
183 static double
xpow(double x)184 xpow (double x)
185 {
186   return pow (x, 23.4);
187 }
188 
189 static float
xpowf(float x)190 xpowf (float x)
191 {
192   return powf (x, 23.4f);
193 }
194 
195 static double
ypow(double x)196 ypow (double x)
197 {
198   return pow (2.34, x);
199 }
200 
201 static float
ypowf(float x)202 ypowf (float x)
203 {
204   return powf (2.34f, x);
205 }
206 
207 static float
sincosf_wrap(float x)208 sincosf_wrap (float x)
209 {
210   float s, c;
211   sincosf (x, &s, &c);
212   return s + c;
213 }
214 
215 static const struct fun
216 {
217   const char *name;
218   int prec;
219   int vec;
220   double lo;
221   double hi;
222   union
223   {
224     double (*d) (double);
225     float (*f) (float);
226     v_double (*vd) (v_double);
227     v_float (*vf) (v_float);
228 #ifdef __vpcs
229     __vpcs v_double (*vnd) (v_double);
230     __vpcs v_float (*vnf) (v_float);
231 #endif
232   } fun;
233 } funtab[] = {
234 #define D(func, lo, hi) {#func, 'd', 0, lo, hi, {.d = func}},
235 #define F(func, lo, hi) {#func, 'f', 0, lo, hi, {.f = func}},
236 #define VD(func, lo, hi) {#func, 'd', 'v', lo, hi, {.vd = func}},
237 #define VF(func, lo, hi) {#func, 'f', 'v', lo, hi, {.vf = func}},
238 #define VND(func, lo, hi) {#func, 'd', 'n', lo, hi, {.vnd = func}},
239 #define VNF(func, lo, hi) {#func, 'f', 'n', lo, hi, {.vnf = func}},
240 D (dummy, 1.0, 2.0)
241 D (exp, -9.9, 9.9)
242 D (exp, 0.5, 1.0)
243 D (exp2, -9.9, 9.9)
244 D (log, 0.01, 11.1)
245 D (log, 0.999, 1.001)
246 D (log2, 0.01, 11.1)
247 D (log2, 0.999, 1.001)
248 {"pow", 'd', 0, 0.01, 11.1, {.d = xypow}},
249 D (xpow, 0.01, 11.1)
250 D (ypow, -9.9, 9.9)
251 D (erf, -6.0, 6.0)
252 
253 F (dummyf, 1.0, 2.0)
254 F (expf, -9.9, 9.9)
255 F (exp2f, -9.9, 9.9)
256 F (logf, 0.01, 11.1)
257 F (log2f, 0.01, 11.1)
258 {"powf", 'f', 0, 0.01, 11.1, {.f = xypowf}},
259 F (xpowf, 0.01, 11.1)
260 F (ypowf, -9.9, 9.9)
261 {"sincosf", 'f', 0, 0.1, 0.7, {.f = sincosf_wrap}},
262 {"sincosf", 'f', 0, 0.8, 3.1, {.f = sincosf_wrap}},
263 {"sincosf", 'f', 0, -3.1, 3.1, {.f = sincosf_wrap}},
264 {"sincosf", 'f', 0, 3.3, 33.3, {.f = sincosf_wrap}},
265 {"sincosf", 'f', 0, 100, 1000, {.f = sincosf_wrap}},
266 {"sincosf", 'f', 0, 1e6, 1e32, {.f = sincosf_wrap}},
267 F (sinf, 0.1, 0.7)
268 F (sinf, 0.8, 3.1)
269 F (sinf, -3.1, 3.1)
270 F (sinf, 3.3, 33.3)
271 F (sinf, 100, 1000)
272 F (sinf, 1e6, 1e32)
273 F (cosf, 0.1, 0.7)
274 F (cosf, 0.8, 3.1)
275 F (cosf, -3.1, 3.1)
276 F (cosf, 3.3, 33.3)
277 F (cosf, 100, 1000)
278 F (cosf, 1e6, 1e32)
279 F (erff, -4.0, 4.0)
280 #if WANT_VMATH
281 D (__s_sin, -3.1, 3.1)
282 D (__s_cos, -3.1, 3.1)
283 D (__s_exp, -9.9, 9.9)
284 D (__s_log, 0.01, 11.1)
285 {"__s_pow", 'd', 0, 0.01, 11.1, {.d = xy__s_pow}},
286 F (__s_expf, -9.9, 9.9)
287 F (__s_expf_1u, -9.9, 9.9)
288 F (__s_exp2f, -9.9, 9.9)
289 F (__s_exp2f_1u, -9.9, 9.9)
290 F (__s_logf, 0.01, 11.1)
291 {"__s_powf", 'f', 0, 0.01, 11.1, {.f = xy__s_powf}},
292 F (__s_sinf, -3.1, 3.1)
293 F (__s_cosf, -3.1, 3.1)
294 #if __aarch64__
295 VD (__v_dummy, 1.0, 2.0)
296 VD (__v_sin, -3.1, 3.1)
297 VD (__v_cos, -3.1, 3.1)
298 VD (__v_exp, -9.9, 9.9)
299 VD (__v_log, 0.01, 11.1)
300 {"__v_pow", 'd', 'v', 0.01, 11.1, {.vd = xy__v_pow}},
301 VF (__v_dummyf, 1.0, 2.0)
302 VF (__v_expf, -9.9, 9.9)
303 VF (__v_expf_1u, -9.9, 9.9)
304 VF (__v_exp2f, -9.9, 9.9)
305 VF (__v_exp2f_1u, -9.9, 9.9)
306 VF (__v_logf, 0.01, 11.1)
307 {"__v_powf", 'f', 'v', 0.01, 11.1, {.vf = xy__v_powf}},
308 VF (__v_sinf, -3.1, 3.1)
309 VF (__v_cosf, -3.1, 3.1)
310 #ifdef __vpcs
311 VND (__vn_dummy, 1.0, 2.0)
312 VND (__vn_exp, -9.9, 9.9)
313 VND (_ZGVnN2v_exp, -9.9, 9.9)
314 VND (__vn_log, 0.01, 11.1)
315 VND (_ZGVnN2v_log, 0.01, 11.1)
316 {"__vn_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy__vn_pow}},
317 {"_ZGVnN2vv_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy_Z_pow}},
318 VND (__vn_sin, -3.1, 3.1)
319 VND (_ZGVnN2v_sin, -3.1, 3.1)
320 VND (__vn_cos, -3.1, 3.1)
321 VND (_ZGVnN2v_cos, -3.1, 3.1)
322 VNF (__vn_dummyf, 1.0, 2.0)
323 VNF (__vn_expf, -9.9, 9.9)
324 VNF (_ZGVnN4v_expf, -9.9, 9.9)
325 VNF (__vn_expf_1u, -9.9, 9.9)
326 VNF (__vn_exp2f, -9.9, 9.9)
327 VNF (_ZGVnN4v_exp2f, -9.9, 9.9)
328 VNF (__vn_exp2f_1u, -9.9, 9.9)
329 VNF (__vn_logf, 0.01, 11.1)
330 VNF (_ZGVnN4v_logf, 0.01, 11.1)
331 {"__vn_powf", 'f', 'n', 0.01, 11.1, {.vnf = xy__vn_powf}},
332 {"_ZGVnN4vv_powf", 'f', 'n', 0.01, 11.1, {.vnf = xy_Z_powf}},
333 VNF (__vn_sinf, -3.1, 3.1)
334 VNF (_ZGVnN4v_sinf, -3.1, 3.1)
335 VNF (__vn_cosf, -3.1, 3.1)
336 VNF (_ZGVnN4v_cosf, -3.1, 3.1)
337 #endif
338 #endif
339 #endif
340 {0},
341 #undef F
342 #undef D
343 #undef VF
344 #undef VD
345 #undef VNF
346 #undef VND
347 };
348 
349 static void
gen_linear(double lo,double hi)350 gen_linear (double lo, double hi)
351 {
352   for (int i = 0; i < N; i++)
353     A[i] = (lo * (N - i) + hi * i) / N;
354 }
355 
356 static void
genf_linear(double lo,double hi)357 genf_linear (double lo, double hi)
358 {
359   for (int i = 0; i < N; i++)
360     Af[i] = (float)(lo * (N - i) + hi * i) / N;
361 }
362 
363 static inline double
asdouble(uint64_t i)364 asdouble (uint64_t i)
365 {
366   union
367   {
368     uint64_t i;
369     double f;
370   } u = {i};
371   return u.f;
372 }
373 
374 static uint64_t seed = 0x0123456789abcdef;
375 
376 static double
frand(double lo,double hi)377 frand (double lo, double hi)
378 {
379   seed = 6364136223846793005ULL * seed + 1;
380   return lo + (hi - lo) * (asdouble (seed >> 12 | 0x3ffULL << 52) - 1.0);
381 }
382 
383 static void
gen_rand(double lo,double hi)384 gen_rand (double lo, double hi)
385 {
386   for (int i = 0; i < N; i++)
387     A[i] = frand (lo, hi);
388 }
389 
390 static void
genf_rand(double lo,double hi)391 genf_rand (double lo, double hi)
392 {
393   for (int i = 0; i < N; i++)
394     Af[i] = (float)frand (lo, hi);
395 }
396 
397 static void
gen_trace(int index)398 gen_trace (int index)
399 {
400   for (int i = 0; i < N; i++)
401     A[i] = Trace[index + i];
402 }
403 
404 static void
genf_trace(int index)405 genf_trace (int index)
406 {
407   for (int i = 0; i < N; i++)
408     Af[i] = (float)Trace[index + i];
409 }
410 
411 static void
run_thruput(double f (double))412 run_thruput (double f (double))
413 {
414   for (int i = 0; i < N; i++)
415     f (A[i]);
416 }
417 
418 static void
runf_thruput(float f (float))419 runf_thruput (float f (float))
420 {
421   for (int i = 0; i < N; i++)
422     f (Af[i]);
423 }
424 
425 volatile double zero = 0;
426 
427 static void
run_latency(double f (double))428 run_latency (double f (double))
429 {
430   double z = zero;
431   double prev = z;
432   for (int i = 0; i < N; i++)
433     prev = f (A[i] + prev * z);
434 }
435 
436 static void
runf_latency(float f (float))437 runf_latency (float f (float))
438 {
439   float z = (float)zero;
440   float prev = z;
441   for (int i = 0; i < N; i++)
442     prev = f (Af[i] + prev * z);
443 }
444 
445 static void
run_v_thruput(v_double f (v_double))446 run_v_thruput (v_double f (v_double))
447 {
448   for (int i = 0; i < N; i += v_double_len ())
449     f (v_double_load (A+i));
450 }
451 
452 static void
runf_v_thruput(v_float f (v_float))453 runf_v_thruput (v_float f (v_float))
454 {
455   for (int i = 0; i < N; i += v_float_len ())
456     f (v_float_load (Af+i));
457 }
458 
459 static void
run_v_latency(v_double f (v_double))460 run_v_latency (v_double f (v_double))
461 {
462   v_double z = v_double_dup (zero);
463   v_double prev = z;
464   for (int i = 0; i < N; i += v_double_len ())
465     prev = f (v_double_load (A+i) + prev * z);
466 }
467 
468 static void
runf_v_latency(v_float f (v_float))469 runf_v_latency (v_float f (v_float))
470 {
471   v_float z = v_float_dup (zero);
472   v_float prev = z;
473   for (int i = 0; i < N; i += v_float_len ())
474     prev = f (v_float_load (Af+i) + prev * z);
475 }
476 
477 #ifdef __vpcs
478 static void
run_vn_thruput(__vpcs v_double f (v_double))479 run_vn_thruput (__vpcs v_double f (v_double))
480 {
481   for (int i = 0; i < N; i += v_double_len ())
482     f (v_double_load (A+i));
483 }
484 
485 static void
runf_vn_thruput(__vpcs v_float f (v_float))486 runf_vn_thruput (__vpcs v_float f (v_float))
487 {
488   for (int i = 0; i < N; i += v_float_len ())
489     f (v_float_load (Af+i));
490 }
491 
492 static void
run_vn_latency(__vpcs v_double f (v_double))493 run_vn_latency (__vpcs v_double f (v_double))
494 {
495   v_double z = v_double_dup (zero);
496   v_double prev = z;
497   for (int i = 0; i < N; i += v_double_len ())
498     prev = f (v_double_load (A+i) + prev * z);
499 }
500 
501 static void
runf_vn_latency(__vpcs v_float f (v_float))502 runf_vn_latency (__vpcs v_float f (v_float))
503 {
504   v_float z = v_float_dup (zero);
505   v_float prev = z;
506   for (int i = 0; i < N; i += v_float_len ())
507     prev = f (v_float_load (Af+i) + prev * z);
508 }
509 #endif
510 
511 static uint64_t
tic(void)512 tic (void)
513 {
514   struct timespec ts;
515   if (clock_gettime (CLOCK_REALTIME, &ts))
516     abort ();
517   return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
518 }
519 
520 #define TIMEIT(run, f) do { \
521   dt = -1; \
522   run (f); /* Warm up.  */ \
523   for (int j = 0; j < measurecount; j++) \
524     { \
525       uint64_t t0 = tic (); \
526       for (int i = 0; i < itercount; i++) \
527 	run (f); \
528       uint64_t t1 = tic (); \
529       if (t1 - t0 < dt) \
530 	dt = t1 - t0; \
531     } \
532 } while (0)
533 
534 static void
bench1(const struct fun * f,int type,double lo,double hi)535 bench1 (const struct fun *f, int type, double lo, double hi)
536 {
537   uint64_t dt = 0;
538   uint64_t ns100;
539   const char *s = type == 't' ? "rthruput" : "latency";
540   int vlen = 1;
541 
542   if (f->vec && f->prec == 'd')
543     vlen = v_double_len();
544   else if (f->vec && f->prec == 'f')
545     vlen = v_float_len();
546 
547   if (f->prec == 'd' && type == 't' && f->vec == 0)
548     TIMEIT (run_thruput, f->fun.d);
549   else if (f->prec == 'd' && type == 'l' && f->vec == 0)
550     TIMEIT (run_latency, f->fun.d);
551   else if (f->prec == 'f' && type == 't' && f->vec == 0)
552     TIMEIT (runf_thruput, f->fun.f);
553   else if (f->prec == 'f' && type == 'l' && f->vec == 0)
554     TIMEIT (runf_latency, f->fun.f);
555   else if (f->prec == 'd' && type == 't' && f->vec == 'v')
556     TIMEIT (run_v_thruput, f->fun.vd);
557   else if (f->prec == 'd' && type == 'l' && f->vec == 'v')
558     TIMEIT (run_v_latency, f->fun.vd);
559   else if (f->prec == 'f' && type == 't' && f->vec == 'v')
560     TIMEIT (runf_v_thruput, f->fun.vf);
561   else if (f->prec == 'f' && type == 'l' && f->vec == 'v')
562     TIMEIT (runf_v_latency, f->fun.vf);
563 #ifdef __vpcs
564   else if (f->prec == 'd' && type == 't' && f->vec == 'n')
565     TIMEIT (run_vn_thruput, f->fun.vnd);
566   else if (f->prec == 'd' && type == 'l' && f->vec == 'n')
567     TIMEIT (run_vn_latency, f->fun.vnd);
568   else if (f->prec == 'f' && type == 't' && f->vec == 'n')
569     TIMEIT (runf_vn_thruput, f->fun.vnf);
570   else if (f->prec == 'f' && type == 'l' && f->vec == 'n')
571     TIMEIT (runf_vn_latency, f->fun.vnf);
572 #endif
573 
574   if (type == 't')
575     {
576       ns100 = (100 * dt + itercount * N / 2) / (itercount * N);
577       printf ("%9s %8s: %4u.%02u ns/elem %10llu ns in [%g %g]\n", f->name, s,
578 	      (unsigned) (ns100 / 100), (unsigned) (ns100 % 100),
579 	      (unsigned long long) dt, lo, hi);
580     }
581   else if (type == 'l')
582     {
583       ns100 = (100 * dt + itercount * N / vlen / 2) / (itercount * N / vlen);
584       printf ("%9s %8s: %4u.%02u ns/call %10llu ns in [%g %g]\n", f->name, s,
585 	      (unsigned) (ns100 / 100), (unsigned) (ns100 % 100),
586 	      (unsigned long long) dt, lo, hi);
587     }
588   fflush (stdout);
589 }
590 
591 static void
bench(const struct fun * f,double lo,double hi,int type,int gen)592 bench (const struct fun *f, double lo, double hi, int type, int gen)
593 {
594   if (f->prec == 'd' && gen == 'r')
595     gen_rand (lo, hi);
596   else if (f->prec == 'd' && gen == 'l')
597     gen_linear (lo, hi);
598   else if (f->prec == 'd' && gen == 't')
599     gen_trace (0);
600   else if (f->prec == 'f' && gen == 'r')
601     genf_rand (lo, hi);
602   else if (f->prec == 'f' && gen == 'l')
603     genf_linear (lo, hi);
604   else if (f->prec == 'f' && gen == 't')
605     genf_trace (0);
606 
607   if (gen == 't')
608     hi = trace_size / N;
609 
610   if (type == 'b' || type == 't')
611     bench1 (f, 't', lo, hi);
612 
613   if (type == 'b' || type == 'l')
614     bench1 (f, 'l', lo, hi);
615 
616   for (int i = N; i < trace_size; i += N)
617     {
618       if (f->prec == 'd')
619 	gen_trace (i);
620       else
621 	genf_trace (i);
622 
623       lo = i / N;
624       if (type == 'b' || type == 't')
625 	bench1 (f, 't', lo, hi);
626 
627       if (type == 'b' || type == 'l')
628 	bench1 (f, 'l', lo, hi);
629     }
630 }
631 
632 static void
readtrace(const char * name)633 readtrace (const char *name)
634 {
635 	int n = 0;
636 	FILE *f = strcmp (name, "-") == 0 ? stdin : fopen (name, "r");
637 	if (!f)
638 	  {
639 	    printf ("openning \"%s\" failed: %m\n", name);
640 	    exit (1);
641 	  }
642 	for (;;)
643 	  {
644 	    if (n >= trace_size)
645 	      {
646 		trace_size += N;
647 		Trace = realloc (Trace, trace_size * sizeof (Trace[0]));
648 		if (Trace == NULL)
649 		  {
650 		    printf ("out of memory\n");
651 		    exit (1);
652 		  }
653 	      }
654 	    if (fscanf (f, "%lf", Trace + n) != 1)
655 	      break;
656 	    n++;
657 	  }
658 	if (ferror (f) || n == 0)
659 	  {
660 	    printf ("reading \"%s\" failed: %m\n", name);
661 	    exit (1);
662 	  }
663 	fclose (f);
664 	if (n % N == 0)
665 	  trace_size = n;
666 	for (int i = 0; n < trace_size; n++, i++)
667 	  Trace[n] = Trace[i];
668 }
669 
670 static void
usage(void)671 usage (void)
672 {
673   printf ("usage: ./mathbench [-g rand|linear|trace] [-t latency|thruput|both] "
674 	  "[-i low high] [-f tracefile] [-m measurements] [-c iterations] func "
675 	  "[func2 ..]\n");
676   printf ("func:\n");
677   printf ("%7s [run all benchmarks]\n", "all");
678   for (const struct fun *f = funtab; f->name; f++)
679     printf ("%7s [low: %g high: %g]\n", f->name, f->lo, f->hi);
680   exit (1);
681 }
682 
683 int
main(int argc,char * argv[])684 main (int argc, char *argv[])
685 {
686   int usergen = 0, gen = 'r', type = 'b', all = 0;
687   double lo = 0, hi = 0;
688   const char *tracefile = "-";
689 
690   argv++;
691   argc--;
692   for (;;)
693     {
694       if (argc <= 0)
695 	usage ();
696       if (argv[0][0] != '-')
697 	break;
698       else if (argc >= 3 && strcmp (argv[0], "-i") == 0)
699 	{
700 	  usergen = 1;
701 	  lo = strtod (argv[1], 0);
702 	  hi = strtod (argv[2], 0);
703 	  argv += 3;
704 	  argc -= 3;
705 	}
706       else if (argc >= 2 && strcmp (argv[0], "-m") == 0)
707 	{
708 	  measurecount = strtol (argv[1], 0, 0);
709 	  argv += 2;
710 	  argc -= 2;
711 	}
712       else if (argc >= 2 && strcmp (argv[0], "-c") == 0)
713 	{
714 	  itercount = strtol (argv[1], 0, 0);
715 	  argv += 2;
716 	  argc -= 2;
717 	}
718       else if (argc >= 2 && strcmp (argv[0], "-g") == 0)
719 	{
720 	  gen = argv[1][0];
721 	  if (strchr ("rlt", gen) == 0)
722 	    usage ();
723 	  argv += 2;
724 	  argc -= 2;
725 	}
726       else if (argc >= 2 && strcmp (argv[0], "-f") == 0)
727 	{
728 	  gen = 't';  /* -f implies -g trace.  */
729 	  tracefile = argv[1];
730 	  argv += 2;
731 	  argc -= 2;
732 	}
733       else if (argc >= 2 && strcmp (argv[0], "-t") == 0)
734 	{
735 	  type = argv[1][0];
736 	  if (strchr ("ltb", type) == 0)
737 	    usage ();
738 	  argv += 2;
739 	  argc -= 2;
740 	}
741       else
742 	usage ();
743     }
744   if (gen == 't')
745     {
746       readtrace (tracefile);
747       lo = hi = 0;
748       usergen = 1;
749     }
750   while (argc > 0)
751     {
752       int found = 0;
753       all = strcmp (argv[0], "all") == 0;
754       for (const struct fun *f = funtab; f->name; f++)
755 	if (all || strcmp (argv[0], f->name) == 0)
756 	  {
757 	    found = 1;
758 	    if (!usergen)
759 	      {
760 		lo = f->lo;
761 		hi = f->hi;
762 	      }
763 	    bench (f, lo, hi, type, gen);
764 	    if (usergen && !all)
765 	      break;
766 	  }
767       if (!found)
768 	printf ("unknown function: %s\n", argv[0]);
769       argv++;
770       argc--;
771     }
772   return 0;
773 }
774