1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2020 Facebook */
3 #define _GNU_SOURCE
4 #include <argp.h>
5 #include <linux/compiler.h>
6 #include <sys/time.h>
7 #include <sched.h>
8 #include <fcntl.h>
9 #include <pthread.h>
10 #include <sys/sysinfo.h>
11 #include <sys/resource.h>
12 #include <signal.h>
13 #include "bench.h"
14 #include "testing_helpers.h"
15
16 struct env env = {
17 .warmup_sec = 1,
18 .duration_sec = 5,
19 .affinity = false,
20 .consumer_cnt = 1,
21 .producer_cnt = 1,
22 };
23
libbpf_print_fn(enum libbpf_print_level level,const char * format,va_list args)24 static int libbpf_print_fn(enum libbpf_print_level level,
25 const char *format, va_list args)
26 {
27 if (level == LIBBPF_DEBUG && !env.verbose)
28 return 0;
29 return vfprintf(stderr, format, args);
30 }
31
setup_libbpf(void)32 void setup_libbpf(void)
33 {
34 libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
35 libbpf_set_print(libbpf_print_fn);
36 }
37
false_hits_report_progress(int iter,struct bench_res * res,long delta_ns)38 void false_hits_report_progress(int iter, struct bench_res *res, long delta_ns)
39 {
40 long total = res->false_hits + res->hits + res->drops;
41
42 printf("Iter %3d (%7.3lfus): ",
43 iter, (delta_ns - 1000000000) / 1000.0);
44
45 printf("%ld false hits of %ld total operations. Percentage = %2.2f %%\n",
46 res->false_hits, total, ((float)res->false_hits / total) * 100);
47 }
48
false_hits_report_final(struct bench_res res[],int res_cnt)49 void false_hits_report_final(struct bench_res res[], int res_cnt)
50 {
51 long total_hits = 0, total_drops = 0, total_false_hits = 0, total_ops = 0;
52 int i;
53
54 for (i = 0; i < res_cnt; i++) {
55 total_hits += res[i].hits;
56 total_false_hits += res[i].false_hits;
57 total_drops += res[i].drops;
58 }
59 total_ops = total_hits + total_false_hits + total_drops;
60
61 printf("Summary: %ld false hits of %ld total operations. ",
62 total_false_hits, total_ops);
63 printf("Percentage = %2.2f %%\n",
64 ((float)total_false_hits / total_ops) * 100);
65 }
66
hits_drops_report_progress(int iter,struct bench_res * res,long delta_ns)67 void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns)
68 {
69 double hits_per_sec, drops_per_sec;
70 double hits_per_prod;
71
72 hits_per_sec = res->hits / 1000000.0 / (delta_ns / 1000000000.0);
73 hits_per_prod = hits_per_sec / env.producer_cnt;
74 drops_per_sec = res->drops / 1000000.0 / (delta_ns / 1000000000.0);
75
76 printf("Iter %3d (%7.3lfus): ",
77 iter, (delta_ns - 1000000000) / 1000.0);
78
79 printf("hits %8.3lfM/s (%7.3lfM/prod), drops %8.3lfM/s, total operations %8.3lfM/s\n",
80 hits_per_sec, hits_per_prod, drops_per_sec, hits_per_sec + drops_per_sec);
81 }
82
hits_drops_report_final(struct bench_res res[],int res_cnt)83 void hits_drops_report_final(struct bench_res res[], int res_cnt)
84 {
85 int i;
86 double hits_mean = 0.0, drops_mean = 0.0, total_ops_mean = 0.0;
87 double hits_stddev = 0.0, drops_stddev = 0.0, total_ops_stddev = 0.0;
88 double total_ops;
89
90 for (i = 0; i < res_cnt; i++) {
91 hits_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt);
92 drops_mean += res[i].drops / 1000000.0 / (0.0 + res_cnt);
93 }
94 total_ops_mean = hits_mean + drops_mean;
95
96 if (res_cnt > 1) {
97 for (i = 0; i < res_cnt; i++) {
98 hits_stddev += (hits_mean - res[i].hits / 1000000.0) *
99 (hits_mean - res[i].hits / 1000000.0) /
100 (res_cnt - 1.0);
101 drops_stddev += (drops_mean - res[i].drops / 1000000.0) *
102 (drops_mean - res[i].drops / 1000000.0) /
103 (res_cnt - 1.0);
104 total_ops = res[i].hits + res[i].drops;
105 total_ops_stddev += (total_ops_mean - total_ops / 1000000.0) *
106 (total_ops_mean - total_ops / 1000000.0) /
107 (res_cnt - 1.0);
108 }
109 hits_stddev = sqrt(hits_stddev);
110 drops_stddev = sqrt(drops_stddev);
111 total_ops_stddev = sqrt(total_ops_stddev);
112 }
113 printf("Summary: hits %8.3lf \u00B1 %5.3lfM/s (%7.3lfM/prod), ",
114 hits_mean, hits_stddev, hits_mean / env.producer_cnt);
115 printf("drops %8.3lf \u00B1 %5.3lfM/s, ",
116 drops_mean, drops_stddev);
117 printf("total operations %8.3lf \u00B1 %5.3lfM/s\n",
118 total_ops_mean, total_ops_stddev);
119 }
120
ops_report_progress(int iter,struct bench_res * res,long delta_ns)121 void ops_report_progress(int iter, struct bench_res *res, long delta_ns)
122 {
123 double hits_per_sec, hits_per_prod;
124
125 hits_per_sec = res->hits / 1000000.0 / (delta_ns / 1000000000.0);
126 hits_per_prod = hits_per_sec / env.producer_cnt;
127
128 printf("Iter %3d (%7.3lfus): ", iter, (delta_ns - 1000000000) / 1000.0);
129
130 printf("hits %8.3lfM/s (%7.3lfM/prod)\n", hits_per_sec, hits_per_prod);
131 }
132
ops_report_final(struct bench_res res[],int res_cnt)133 void ops_report_final(struct bench_res res[], int res_cnt)
134 {
135 double hits_mean = 0.0, hits_stddev = 0.0;
136 int i;
137
138 for (i = 0; i < res_cnt; i++)
139 hits_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt);
140
141 if (res_cnt > 1) {
142 for (i = 0; i < res_cnt; i++)
143 hits_stddev += (hits_mean - res[i].hits / 1000000.0) *
144 (hits_mean - res[i].hits / 1000000.0) /
145 (res_cnt - 1.0);
146
147 hits_stddev = sqrt(hits_stddev);
148 }
149 printf("Summary: throughput %8.3lf \u00B1 %5.3lf M ops/s (%7.3lfM ops/prod), ",
150 hits_mean, hits_stddev, hits_mean / env.producer_cnt);
151 printf("latency %8.3lf ns/op\n", 1000.0 / hits_mean * env.producer_cnt);
152 }
153
154 const char *argp_program_version = "benchmark";
155 const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
156 const char argp_program_doc[] =
157 "benchmark Generic benchmarking framework.\n"
158 "\n"
159 "This tool runs benchmarks.\n"
160 "\n"
161 "USAGE: benchmark <bench-name>\n"
162 "\n"
163 "EXAMPLES:\n"
164 " # run 'count-local' benchmark with 1 producer and 1 consumer\n"
165 " benchmark count-local\n"
166 " # run 'count-local' with 16 producer and 8 consumer thread, pinned to CPUs\n"
167 " benchmark -p16 -c8 -a count-local\n";
168
169 enum {
170 ARG_PROD_AFFINITY_SET = 1000,
171 ARG_CONS_AFFINITY_SET = 1001,
172 };
173
174 static const struct argp_option opts[] = {
175 { "list", 'l', NULL, 0, "List available benchmarks"},
176 { "duration", 'd', "SEC", 0, "Duration of benchmark, seconds"},
177 { "warmup", 'w', "SEC", 0, "Warm-up period, seconds"},
178 { "producers", 'p', "NUM", 0, "Number of producer threads"},
179 { "consumers", 'c', "NUM", 0, "Number of consumer threads"},
180 { "verbose", 'v', NULL, 0, "Verbose debug output"},
181 { "affinity", 'a', NULL, 0, "Set consumer/producer thread affinity"},
182 { "prod-affinity", ARG_PROD_AFFINITY_SET, "CPUSET", 0,
183 "Set of CPUs for producer threads; implies --affinity"},
184 { "cons-affinity", ARG_CONS_AFFINITY_SET, "CPUSET", 0,
185 "Set of CPUs for consumer threads; implies --affinity"},
186 {},
187 };
188
189 extern struct argp bench_ringbufs_argp;
190 extern struct argp bench_bloom_map_argp;
191 extern struct argp bench_bpf_loop_argp;
192 extern struct argp bench_strncmp_argp;
193
194 static const struct argp_child bench_parsers[] = {
195 { &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 },
196 { &bench_bloom_map_argp, 0, "Bloom filter map benchmark", 0 },
197 { &bench_bpf_loop_argp, 0, "bpf_loop helper benchmark", 0 },
198 { &bench_strncmp_argp, 0, "bpf_strncmp helper benchmark", 0 },
199 {},
200 };
201
parse_arg(int key,char * arg,struct argp_state * state)202 static error_t parse_arg(int key, char *arg, struct argp_state *state)
203 {
204 static int pos_args;
205
206 switch (key) {
207 case 'v':
208 env.verbose = true;
209 break;
210 case 'l':
211 env.list = true;
212 break;
213 case 'd':
214 env.duration_sec = strtol(arg, NULL, 10);
215 if (env.duration_sec <= 0) {
216 fprintf(stderr, "Invalid duration: %s\n", arg);
217 argp_usage(state);
218 }
219 break;
220 case 'w':
221 env.warmup_sec = strtol(arg, NULL, 10);
222 if (env.warmup_sec <= 0) {
223 fprintf(stderr, "Invalid warm-up duration: %s\n", arg);
224 argp_usage(state);
225 }
226 break;
227 case 'p':
228 env.producer_cnt = strtol(arg, NULL, 10);
229 if (env.producer_cnt <= 0) {
230 fprintf(stderr, "Invalid producer count: %s\n", arg);
231 argp_usage(state);
232 }
233 break;
234 case 'c':
235 env.consumer_cnt = strtol(arg, NULL, 10);
236 if (env.consumer_cnt <= 0) {
237 fprintf(stderr, "Invalid consumer count: %s\n", arg);
238 argp_usage(state);
239 }
240 break;
241 case 'a':
242 env.affinity = true;
243 break;
244 case ARG_PROD_AFFINITY_SET:
245 env.affinity = true;
246 if (parse_num_list(arg, &env.prod_cpus.cpus,
247 &env.prod_cpus.cpus_len)) {
248 fprintf(stderr, "Invalid format of CPU set for producers.");
249 argp_usage(state);
250 }
251 break;
252 case ARG_CONS_AFFINITY_SET:
253 env.affinity = true;
254 if (parse_num_list(arg, &env.cons_cpus.cpus,
255 &env.cons_cpus.cpus_len)) {
256 fprintf(stderr, "Invalid format of CPU set for consumers.");
257 argp_usage(state);
258 }
259 break;
260 case ARGP_KEY_ARG:
261 if (pos_args++) {
262 fprintf(stderr,
263 "Unrecognized positional argument: %s\n", arg);
264 argp_usage(state);
265 }
266 env.bench_name = strdup(arg);
267 break;
268 default:
269 return ARGP_ERR_UNKNOWN;
270 }
271 return 0;
272 }
273
parse_cmdline_args(int argc,char ** argv)274 static void parse_cmdline_args(int argc, char **argv)
275 {
276 static const struct argp argp = {
277 .options = opts,
278 .parser = parse_arg,
279 .doc = argp_program_doc,
280 .children = bench_parsers,
281 };
282 if (argp_parse(&argp, argc, argv, 0, NULL, NULL))
283 exit(1);
284 if (!env.list && !env.bench_name) {
285 argp_help(&argp, stderr, ARGP_HELP_DOC, "bench");
286 exit(1);
287 }
288 }
289
290 static void collect_measurements(long delta_ns);
291
292 static __u64 last_time_ns;
sigalarm_handler(int signo)293 static void sigalarm_handler(int signo)
294 {
295 long new_time_ns = get_time_ns();
296 long delta_ns = new_time_ns - last_time_ns;
297
298 collect_measurements(delta_ns);
299
300 last_time_ns = new_time_ns;
301 }
302
303 /* set up periodic 1-second timer */
setup_timer()304 static void setup_timer()
305 {
306 static struct sigaction sigalarm_action = {
307 .sa_handler = sigalarm_handler,
308 };
309 struct itimerval timer_settings = {};
310 int err;
311
312 last_time_ns = get_time_ns();
313 err = sigaction(SIGALRM, &sigalarm_action, NULL);
314 if (err < 0) {
315 fprintf(stderr, "failed to install SIGALRM handler: %d\n", -errno);
316 exit(1);
317 }
318 timer_settings.it_interval.tv_sec = 1;
319 timer_settings.it_value.tv_sec = 1;
320 err = setitimer(ITIMER_REAL, &timer_settings, NULL);
321 if (err < 0) {
322 fprintf(stderr, "failed to arm interval timer: %d\n", -errno);
323 exit(1);
324 }
325 }
326
set_thread_affinity(pthread_t thread,int cpu)327 static void set_thread_affinity(pthread_t thread, int cpu)
328 {
329 cpu_set_t cpuset;
330
331 CPU_ZERO(&cpuset);
332 CPU_SET(cpu, &cpuset);
333 if (pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset)) {
334 fprintf(stderr, "setting affinity to CPU #%d failed: %d\n",
335 cpu, errno);
336 exit(1);
337 }
338 }
339
next_cpu(struct cpu_set * cpu_set)340 static int next_cpu(struct cpu_set *cpu_set)
341 {
342 if (cpu_set->cpus) {
343 int i;
344
345 /* find next available CPU */
346 for (i = cpu_set->next_cpu; i < cpu_set->cpus_len; i++) {
347 if (cpu_set->cpus[i]) {
348 cpu_set->next_cpu = i + 1;
349 return i;
350 }
351 }
352 fprintf(stderr, "Not enough CPUs specified, need CPU #%d or higher.\n", i);
353 exit(1);
354 }
355
356 return cpu_set->next_cpu++;
357 }
358
359 static struct bench_state {
360 int res_cnt;
361 struct bench_res *results;
362 pthread_t *consumers;
363 pthread_t *producers;
364 } state;
365
366 const struct bench *bench = NULL;
367
368 extern const struct bench bench_count_global;
369 extern const struct bench bench_count_local;
370 extern const struct bench bench_rename_base;
371 extern const struct bench bench_rename_kprobe;
372 extern const struct bench bench_rename_kretprobe;
373 extern const struct bench bench_rename_rawtp;
374 extern const struct bench bench_rename_fentry;
375 extern const struct bench bench_rename_fexit;
376 extern const struct bench bench_trig_base;
377 extern const struct bench bench_trig_tp;
378 extern const struct bench bench_trig_rawtp;
379 extern const struct bench bench_trig_kprobe;
380 extern const struct bench bench_trig_fentry;
381 extern const struct bench bench_trig_fentry_sleep;
382 extern const struct bench bench_trig_fmodret;
383 extern const struct bench bench_trig_uprobe_base;
384 extern const struct bench bench_trig_uprobe_with_nop;
385 extern const struct bench bench_trig_uretprobe_with_nop;
386 extern const struct bench bench_trig_uprobe_without_nop;
387 extern const struct bench bench_trig_uretprobe_without_nop;
388 extern const struct bench bench_rb_libbpf;
389 extern const struct bench bench_rb_custom;
390 extern const struct bench bench_pb_libbpf;
391 extern const struct bench bench_pb_custom;
392 extern const struct bench bench_bloom_lookup;
393 extern const struct bench bench_bloom_update;
394 extern const struct bench bench_bloom_false_positive;
395 extern const struct bench bench_hashmap_without_bloom;
396 extern const struct bench bench_hashmap_with_bloom;
397 extern const struct bench bench_bpf_loop;
398 extern const struct bench bench_strncmp_no_helper;
399 extern const struct bench bench_strncmp_helper;
400
401 static const struct bench *benchs[] = {
402 &bench_count_global,
403 &bench_count_local,
404 &bench_rename_base,
405 &bench_rename_kprobe,
406 &bench_rename_kretprobe,
407 &bench_rename_rawtp,
408 &bench_rename_fentry,
409 &bench_rename_fexit,
410 &bench_trig_base,
411 &bench_trig_tp,
412 &bench_trig_rawtp,
413 &bench_trig_kprobe,
414 &bench_trig_fentry,
415 &bench_trig_fentry_sleep,
416 &bench_trig_fmodret,
417 &bench_trig_uprobe_base,
418 &bench_trig_uprobe_with_nop,
419 &bench_trig_uretprobe_with_nop,
420 &bench_trig_uprobe_without_nop,
421 &bench_trig_uretprobe_without_nop,
422 &bench_rb_libbpf,
423 &bench_rb_custom,
424 &bench_pb_libbpf,
425 &bench_pb_custom,
426 &bench_bloom_lookup,
427 &bench_bloom_update,
428 &bench_bloom_false_positive,
429 &bench_hashmap_without_bloom,
430 &bench_hashmap_with_bloom,
431 &bench_bpf_loop,
432 &bench_strncmp_no_helper,
433 &bench_strncmp_helper,
434 };
435
setup_benchmark()436 static void setup_benchmark()
437 {
438 int i, err;
439
440 if (!env.bench_name) {
441 fprintf(stderr, "benchmark name is not specified\n");
442 exit(1);
443 }
444
445 for (i = 0; i < ARRAY_SIZE(benchs); i++) {
446 if (strcmp(benchs[i]->name, env.bench_name) == 0) {
447 bench = benchs[i];
448 break;
449 }
450 }
451 if (!bench) {
452 fprintf(stderr, "benchmark '%s' not found\n", env.bench_name);
453 exit(1);
454 }
455
456 printf("Setting up benchmark '%s'...\n", bench->name);
457
458 state.producers = calloc(env.producer_cnt, sizeof(*state.producers));
459 state.consumers = calloc(env.consumer_cnt, sizeof(*state.consumers));
460 state.results = calloc(env.duration_sec + env.warmup_sec + 2,
461 sizeof(*state.results));
462 if (!state.producers || !state.consumers || !state.results)
463 exit(1);
464
465 if (bench->validate)
466 bench->validate();
467 if (bench->setup)
468 bench->setup();
469
470 for (i = 0; i < env.consumer_cnt; i++) {
471 err = pthread_create(&state.consumers[i], NULL,
472 bench->consumer_thread, (void *)(long)i);
473 if (err) {
474 fprintf(stderr, "failed to create consumer thread #%d: %d\n",
475 i, -errno);
476 exit(1);
477 }
478 if (env.affinity)
479 set_thread_affinity(state.consumers[i],
480 next_cpu(&env.cons_cpus));
481 }
482
483 /* unless explicit producer CPU list is specified, continue after
484 * last consumer CPU
485 */
486 if (!env.prod_cpus.cpus)
487 env.prod_cpus.next_cpu = env.cons_cpus.next_cpu;
488
489 for (i = 0; i < env.producer_cnt; i++) {
490 err = pthread_create(&state.producers[i], NULL,
491 bench->producer_thread, (void *)(long)i);
492 if (err) {
493 fprintf(stderr, "failed to create producer thread #%d: %d\n",
494 i, -errno);
495 exit(1);
496 }
497 if (env.affinity)
498 set_thread_affinity(state.producers[i],
499 next_cpu(&env.prod_cpus));
500 }
501
502 printf("Benchmark '%s' started.\n", bench->name);
503 }
504
505 static pthread_mutex_t bench_done_mtx = PTHREAD_MUTEX_INITIALIZER;
506 static pthread_cond_t bench_done = PTHREAD_COND_INITIALIZER;
507
collect_measurements(long delta_ns)508 static void collect_measurements(long delta_ns) {
509 int iter = state.res_cnt++;
510 struct bench_res *res = &state.results[iter];
511
512 bench->measure(res);
513
514 if (bench->report_progress)
515 bench->report_progress(iter, res, delta_ns);
516
517 if (iter == env.duration_sec + env.warmup_sec) {
518 pthread_mutex_lock(&bench_done_mtx);
519 pthread_cond_signal(&bench_done);
520 pthread_mutex_unlock(&bench_done_mtx);
521 }
522 }
523
main(int argc,char ** argv)524 int main(int argc, char **argv)
525 {
526 parse_cmdline_args(argc, argv);
527
528 if (env.list) {
529 int i;
530
531 printf("Available benchmarks:\n");
532 for (i = 0; i < ARRAY_SIZE(benchs); i++) {
533 printf("- %s\n", benchs[i]->name);
534 }
535 return 0;
536 }
537
538 setup_benchmark();
539
540 setup_timer();
541
542 pthread_mutex_lock(&bench_done_mtx);
543 pthread_cond_wait(&bench_done, &bench_done_mtx);
544 pthread_mutex_unlock(&bench_done_mtx);
545
546 if (bench->report_final)
547 /* skip first sample */
548 bench->report_final(state.results + env.warmup_sec,
549 state.res_cnt - env.warmup_sec);
550
551 return 0;
552 }
553