• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Microbenchmark for math functions.
3  *
4  * Copyright (c) 2018, Arm Limited.
5  * SPDX-License-Identifier: MIT
6  */
7 
8 #undef _GNU_SOURCE
9 #define _GNU_SOURCE 1
10 #include <stdint.h>
11 #include <stdlib.h>
12 #include <stdio.h>
13 #include <string.h>
14 #include <time.h>
15 #include <math.h>
16 #include "mathlib.h"
17 
18 /* Number of measurements, best result is reported.  */
19 #define MEASURE 60
20 /* Array size.  */
21 #define N 8000
22 /* Iterations over the array.  */
23 #define ITER 125
24 
25 static double *Trace;
26 static size_t trace_size;
27 static double A[N];
28 static float Af[N];
29 static long measurecount = MEASURE;
30 static long itercount = ITER;
31 
32 static double
dummy(double x)33 dummy (double x)
34 {
35   return x;
36 }
37 
38 static float
dummyf(float x)39 dummyf (float x)
40 {
41   return x;
42 }
43 
44 static double
xypow(double x)45 xypow (double x)
46 {
47   return pow (x, x);
48 }
49 
50 static float
xypowf(float x)51 xypowf (float x)
52 {
53   return powf (x, x);
54 }
55 
56 static double
xpow(double x)57 xpow (double x)
58 {
59   return pow (x, 23.4);
60 }
61 
62 static float
xpowf(float x)63 xpowf (float x)
64 {
65   return powf (x, 23.4f);
66 }
67 
68 static double
ypow(double x)69 ypow (double x)
70 {
71   return pow (2.34, x);
72 }
73 
74 static float
ypowf(float x)75 ypowf (float x)
76 {
77   return powf (2.34f, x);
78 }
79 
80 static float
sincosf_wrap(float x)81 sincosf_wrap (float x)
82 {
83   float s, c;
84   sincosf (x, &s, &c);
85   return s + c;
86 }
87 
88 static const struct fun
89 {
90   const char *name;
91   int prec;
92   double lo;
93   double hi;
94   union
95   {
96     double (*d) (double);
97     float (*f) (float);
98   } fun;
99 } funtab[] = {
100 #define D(func, lo, hi) {#func, 'd', lo, hi, {.d = func}},
101 #define F(func, lo, hi) {#func, 'f', lo, hi, {.f = func}},
102 D (dummy, 1.0, 2.0)
103 D (exp, -9.9, 9.9)
104 D (exp, 0.5, 1.0)
105 D (exp2, -9.9, 9.9)
106 D (log, 0.01, 11.1)
107 D (log, 0.999, 1.001)
108 D (log2, 0.01, 11.1)
109 D (log2, 0.999, 1.001)
110 {"pow", 'd', 0.01, 11.1, {.d = xypow}},
111 D (xpow, 0.01, 11.1)
112 D (ypow, -9.9, 9.9)
113 
114 F (dummyf, 1.0, 2.0)
115 F (expf, -9.9, 9.9)
116 F (exp2f, -9.9, 9.9)
117 F (logf, 0.01, 11.1)
118 F (log2f, 0.01, 11.1)
119 {"powf", 'f', 0.01, 11.1, {.f = xypowf}},
120 F (xpowf, 0.01, 11.1)
121 F (ypowf, -9.9, 9.9)
122 {"sincosf", 'f', 0.1, 0.7, {.f = sincosf_wrap}},
123 {"sincosf", 'f', 0.8, 3.1, {.f = sincosf_wrap}},
124 {"sincosf", 'f', -3.1, 3.1, {.f = sincosf_wrap}},
125 {"sincosf", 'f', 3.3, 33.3, {.f = sincosf_wrap}},
126 {"sincosf", 'f', 100, 1000, {.f = sincosf_wrap}},
127 {"sincosf", 'f', 1e6, 1e32, {.f = sincosf_wrap}},
128 F (sinf, 0.1, 0.7)
129 F (sinf, 0.8, 3.1)
130 F (sinf, -3.1, 3.1)
131 F (sinf, 3.3, 33.3)
132 F (sinf, 100, 1000)
133 F (sinf, 1e6, 1e32)
134 F (cosf, 0.1, 0.7)
135 F (cosf, 0.8, 3.1)
136 F (cosf, -3.1, 3.1)
137 F (cosf, 3.3, 33.3)
138 F (cosf, 100, 1000)
139 F (cosf, 1e6, 1e32)
140 {0},
141 #undef F
142 #undef D
143 };
144 
145 static void
gen_linear(double lo,double hi)146 gen_linear (double lo, double hi)
147 {
148   for (int i = 0; i < N; i++)
149     A[i] = (lo * (N - i) + hi * i) / N;
150 }
151 
152 static void
genf_linear(double lo,double hi)153 genf_linear (double lo, double hi)
154 {
155   for (int i = 0; i < N; i++)
156     Af[i] = (float)(lo * (N - i) + hi * i) / N;
157 }
158 
159 static inline double
asdouble(uint64_t i)160 asdouble (uint64_t i)
161 {
162   union
163   {
164     uint64_t i;
165     double f;
166   } u = {i};
167   return u.f;
168 }
169 
170 static uint64_t seed = 0x0123456789abcdef;
171 
172 static double
frand(double lo,double hi)173 frand (double lo, double hi)
174 {
175   seed = 6364136223846793005ULL * seed + 1;
176   return lo + (hi - lo) * (asdouble (seed >> 12 | 0x3ffULL << 52) - 1.0);
177 }
178 
179 static void
gen_rand(double lo,double hi)180 gen_rand (double lo, double hi)
181 {
182   for (int i = 0; i < N; i++)
183     A[i] = frand (lo, hi);
184 }
185 
186 static void
genf_rand(double lo,double hi)187 genf_rand (double lo, double hi)
188 {
189   for (int i = 0; i < N; i++)
190     Af[i] = (float)frand (lo, hi);
191 }
192 
193 static void
gen_trace(int index)194 gen_trace (int index)
195 {
196   for (int i = 0; i < N; i++)
197     A[i] = Trace[index + i];
198 }
199 
200 static void
genf_trace(int index)201 genf_trace (int index)
202 {
203   for (int i = 0; i < N; i++)
204     Af[i] = (float)Trace[index + i];
205 }
206 
207 static void
run_thruput(double f (double))208 run_thruput (double f (double))
209 {
210   for (int i = 0; i < N; i++)
211     f (A[i]);
212 }
213 
214 static void
runf_thruput(float f (float))215 runf_thruput (float f (float))
216 {
217   for (int i = 0; i < N; i++)
218     f (Af[i]);
219 }
220 
221 volatile double zero = 0;
222 
223 static void
run_latency(double f (double))224 run_latency (double f (double))
225 {
226   double z = zero;
227   double prev = z;
228   for (int i = 0; i < N; i++)
229     prev = f (A[i] + prev * z);
230 }
231 
232 static void
runf_latency(float f (float))233 runf_latency (float f (float))
234 {
235   float z = (float)zero;
236   float prev = z;
237   for (int i = 0; i < N; i++)
238     prev = f (Af[i] + prev * z);
239 }
240 
241 static uint64_t
tic(void)242 tic (void)
243 {
244   struct timespec ts;
245   if (clock_gettime (CLOCK_REALTIME, &ts))
246     abort ();
247   return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
248 }
249 
250 #define TIMEIT(run, f) do { \
251   dt = -1; \
252   run (f); /* Warm up.  */ \
253   for (int j = 0; j < measurecount; j++) \
254     { \
255       uint64_t t0 = tic (); \
256       for (int i = 0; i < itercount; i++) \
257 	run (f); \
258       uint64_t t1 = tic (); \
259       if (t1 - t0 < dt) \
260 	dt = t1 - t0; \
261     } \
262 } while (0)
263 
264 static void
bench1(const struct fun * f,int type,double lo,double hi)265 bench1 (const struct fun *f, int type, double lo, double hi)
266 {
267   uint64_t dt = 0;
268   uint64_t ns100;
269   const char *s = type == 't' ? "rthruput" : "latency";
270 
271   if (f->prec == 'd' && type == 't')
272     TIMEIT (run_thruput, f->fun.d);
273   else if (f->prec == 'd' && type == 'l')
274     TIMEIT (run_latency, f->fun.d);
275   else if (f->prec == 'f' && type == 't')
276     TIMEIT (runf_thruput, f->fun.f);
277   else if (f->prec == 'f' && type == 'l')
278     TIMEIT (runf_latency, f->fun.f);
279 
280   ns100 = (100 * dt + itercount * N / 2) / (itercount * N);
281   printf ("%7s %8s: %4u.%02u ns/elem %10llu ns in [%g %g]\n", f->name, s,
282 	  (unsigned) (ns100 / 100), (unsigned) (ns100 % 100),
283 	  (unsigned long long) dt, lo, hi);
284   fflush (stdout);
285 }
286 
287 static void
bench(const struct fun * f,double lo,double hi,int type,int gen)288 bench (const struct fun *f, double lo, double hi, int type, int gen)
289 {
290   if (f->prec == 'd' && gen == 'r')
291     gen_rand (lo, hi);
292   else if (f->prec == 'd' && gen == 'l')
293     gen_linear (lo, hi);
294   else if (f->prec == 'd' && gen == 't')
295     gen_trace (0);
296   else if (f->prec == 'f' && gen == 'r')
297     genf_rand (lo, hi);
298   else if (f->prec == 'f' && gen == 'l')
299     genf_linear (lo, hi);
300   else if (f->prec == 'f' && gen == 't')
301     genf_trace (0);
302 
303   if (gen == 't')
304     hi = trace_size / N;
305 
306   if (type == 'b' || type == 't')
307     bench1 (f, 't', lo, hi);
308 
309   if (type == 'b' || type == 'l')
310     bench1 (f, 'l', lo, hi);
311 
312   for (int i = N; i < trace_size; i += N)
313     {
314       if (f->prec == 'd')
315 	gen_trace (i);
316       else
317 	genf_trace (i);
318 
319       lo = i / N;
320       if (type == 'b' || type == 't')
321 	bench1 (f, 't', lo, hi);
322 
323       if (type == 'b' || type == 'l')
324 	bench1 (f, 'l', lo, hi);
325     }
326 }
327 
328 static void
readtrace(const char * name)329 readtrace (const char *name)
330 {
331 	int n = 0;
332 	FILE *f = strcmp (name, "-") == 0 ? stdin : fopen (name, "r");
333 	if (!f)
334 	  {
335 	    printf ("openning \"%s\" failed: %m\n", name);
336 	    exit (1);
337 	  }
338 	for (;;)
339 	  {
340 	    if (n >= trace_size)
341 	      {
342 		trace_size += N;
343 		Trace = realloc (Trace, trace_size * sizeof (Trace[0]));
344 		if (Trace == NULL)
345 		  {
346 		    printf ("out of memory\n");
347 		    exit (1);
348 		  }
349 	      }
350 	    if (fscanf (f, "%lf", Trace + n) != 1)
351 	      break;
352 	    n++;
353 	  }
354 	if (ferror (f) || n == 0)
355 	  {
356 	    printf ("reading \"%s\" failed: %m\n", name);
357 	    exit (1);
358 	  }
359 	fclose (f);
360 	if (n % N == 0)
361 	  trace_size = n;
362 	for (int i = 0; n < trace_size; n++, i++)
363 	  Trace[n] = Trace[i];
364 }
365 
366 static void
usage(void)367 usage (void)
368 {
369   printf ("usage: ./mathbench [-g rand|linear|trace] [-t latency|thruput|both] "
370 	  "[-i low high] [-f tracefile] [-m measurements] [-c iterations] func "
371 	  "[func2 ..]\n");
372   printf ("func:\n");
373   printf ("%7s [run all benchmarks]\n", "all");
374   for (const struct fun *f = funtab; f->name; f++)
375     printf ("%7s [low: %g high: %g]\n", f->name, f->lo, f->hi);
376   exit (1);
377 }
378 
379 int
main(int argc,char * argv[])380 main (int argc, char *argv[])
381 {
382   int usergen = 0, gen = 'r', type = 'b', all = 0;
383   double lo = 0, hi = 0;
384   const char *tracefile = "-";
385 
386   argv++;
387   argc--;
388   for (;;)
389     {
390       if (argc <= 0)
391 	usage ();
392       if (argv[0][0] != '-')
393 	break;
394       else if (argc >= 3 && strcmp (argv[0], "-i") == 0)
395 	{
396 	  usergen = 1;
397 	  lo = strtod (argv[1], 0);
398 	  hi = strtod (argv[2], 0);
399 	  argv += 3;
400 	  argc -= 3;
401 	}
402       else if (argc >= 2 && strcmp (argv[0], "-m") == 0)
403 	{
404 	  measurecount = strtol (argv[1], 0, 0);
405 	  argv += 2;
406 	  argc -= 2;
407 	}
408       else if (argc >= 2 && strcmp (argv[0], "-c") == 0)
409 	{
410 	  itercount = strtol (argv[1], 0, 0);
411 	  argv += 2;
412 	  argc -= 2;
413 	}
414       else if (argc >= 2 && strcmp (argv[0], "-g") == 0)
415 	{
416 	  gen = argv[1][0];
417 	  if (strchr ("rlt", gen) == 0)
418 	    usage ();
419 	  argv += 2;
420 	  argc -= 2;
421 	}
422       else if (argc >= 2 && strcmp (argv[0], "-f") == 0)
423 	{
424 	  gen = 't';  /* -f implies -g trace.  */
425 	  tracefile = argv[1];
426 	  argv += 2;
427 	  argc -= 2;
428 	}
429       else if (argc >= 2 && strcmp (argv[0], "-t") == 0)
430 	{
431 	  type = argv[1][0];
432 	  if (strchr ("ltb", type) == 0)
433 	    usage ();
434 	  argv += 2;
435 	  argc -= 2;
436 	}
437       else
438 	usage ();
439     }
440   if (gen == 't')
441     {
442       readtrace (tracefile);
443       lo = hi = 0;
444       usergen = 1;
445     }
446   while (argc > 0)
447     {
448       int found = 0;
449       all = strcmp (argv[0], "all") == 0;
450       for (const struct fun *f = funtab; f->name; f++)
451 	if (all || strcmp (argv[0], f->name) == 0)
452 	  {
453 	    found = 1;
454 	    if (!usergen)
455 	      {
456 		lo = f->lo;
457 		hi = f->hi;
458 	      }
459 	    bench (f, lo, hi, type, gen);
460 	    if (usergen && !all)
461 	      break;
462 	  }
463       if (!found)
464 	printf ("unknown function: %s\n", argv[0]);
465       argv++;
466       argc--;
467     }
468   return 0;
469 }
470