• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2020 Facebook */
3 #include <asm/barrier.h>
4 #include <linux/perf_event.h>
5 #include <linux/ring_buffer.h>
6 #include <sys/epoll.h>
7 #include <sys/mman.h>
8 #include <argp.h>
9 #include <stdlib.h>
10 #include "bench.h"
11 #include "ringbuf_bench.skel.h"
12 #include "perfbuf_bench.skel.h"
13 
14 static struct {
15 	bool back2back;
16 	int batch_cnt;
17 	bool sampled;
18 	int sample_rate;
19 	int ringbuf_sz; /* per-ringbuf, in bytes */
20 	bool ringbuf_use_output; /* use slower output API */
21 	int perfbuf_sz; /* per-CPU size, in pages */
22 } args = {
23 	.back2back = false,
24 	.batch_cnt = 500,
25 	.sampled = false,
26 	.sample_rate = 500,
27 	.ringbuf_sz = 512 * 1024,
28 	.ringbuf_use_output = false,
29 	.perfbuf_sz = 128,
30 };
31 
32 enum {
33 	ARG_RB_BACK2BACK = 2000,
34 	ARG_RB_USE_OUTPUT = 2001,
35 	ARG_RB_BATCH_CNT = 2002,
36 	ARG_RB_SAMPLED = 2003,
37 	ARG_RB_SAMPLE_RATE = 2004,
38 };
39 
40 static const struct argp_option opts[] = {
41 	{ "rb-b2b", ARG_RB_BACK2BACK, NULL, 0, "Back-to-back mode"},
42 	{ "rb-use-output", ARG_RB_USE_OUTPUT, NULL, 0, "Use bpf_ringbuf_output() instead of bpf_ringbuf_reserve()"},
43 	{ "rb-batch-cnt", ARG_RB_BATCH_CNT, "CNT", 0, "Set BPF-side record batch count"},
44 	{ "rb-sampled", ARG_RB_SAMPLED, NULL, 0, "Notification sampling"},
45 	{ "rb-sample-rate", ARG_RB_SAMPLE_RATE, "RATE", 0, "Notification sample rate"},
46 	{},
47 };
48 
parse_arg(int key,char * arg,struct argp_state * state)49 static error_t parse_arg(int key, char *arg, struct argp_state *state)
50 {
51 	switch (key) {
52 	case ARG_RB_BACK2BACK:
53 		args.back2back = true;
54 		break;
55 	case ARG_RB_USE_OUTPUT:
56 		args.ringbuf_use_output = true;
57 		break;
58 	case ARG_RB_BATCH_CNT:
59 		args.batch_cnt = strtol(arg, NULL, 10);
60 		if (args.batch_cnt < 0) {
61 			fprintf(stderr, "Invalid batch count.");
62 			argp_usage(state);
63 		}
64 		break;
65 	case ARG_RB_SAMPLED:
66 		args.sampled = true;
67 		break;
68 	case ARG_RB_SAMPLE_RATE:
69 		args.sample_rate = strtol(arg, NULL, 10);
70 		if (args.sample_rate < 0) {
71 			fprintf(stderr, "Invalid perfbuf sample rate.");
72 			argp_usage(state);
73 		}
74 		break;
75 	default:
76 		return ARGP_ERR_UNKNOWN;
77 	}
78 	return 0;
79 }
80 
81 /* exported into benchmark runner */
82 const struct argp bench_ringbufs_argp = {
83 	.options = opts,
84 	.parser = parse_arg,
85 };
86 
87 /* RINGBUF-LIBBPF benchmark */
88 
89 static struct counter buf_hits;
90 
bufs_trigger_batch()91 static inline void bufs_trigger_batch()
92 {
93 	(void)syscall(__NR_getpgid);
94 }
95 
bufs_validate()96 static void bufs_validate()
97 {
98 	if (env.consumer_cnt != 1) {
99 		fprintf(stderr, "rb-libbpf benchmark doesn't support multi-consumer!\n");
100 		exit(1);
101 	}
102 
103 	if (args.back2back && env.producer_cnt > 1) {
104 		fprintf(stderr, "back-to-back mode makes sense only for single-producer case!\n");
105 		exit(1);
106 	}
107 }
108 
bufs_sample_producer(void * input)109 static void *bufs_sample_producer(void *input)
110 {
111 	if (args.back2back) {
112 		/* initial batch to get everything started */
113 		bufs_trigger_batch();
114 		return NULL;
115 	}
116 
117 	while (true)
118 		bufs_trigger_batch();
119 	return NULL;
120 }
121 
122 static struct ringbuf_libbpf_ctx {
123 	struct ringbuf_bench *skel;
124 	struct ring_buffer *ringbuf;
125 } ringbuf_libbpf_ctx;
126 
ringbuf_libbpf_measure(struct bench_res * res)127 static void ringbuf_libbpf_measure(struct bench_res *res)
128 {
129 	struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
130 
131 	res->hits = atomic_swap(&buf_hits.value, 0);
132 	res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
133 }
134 
ringbuf_setup_skeleton()135 static struct ringbuf_bench *ringbuf_setup_skeleton()
136 {
137 	struct ringbuf_bench *skel;
138 
139 	setup_libbpf();
140 
141 	skel = ringbuf_bench__open();
142 	if (!skel) {
143 		fprintf(stderr, "failed to open skeleton\n");
144 		exit(1);
145 	}
146 
147 	skel->rodata->batch_cnt = args.batch_cnt;
148 	skel->rodata->use_output = args.ringbuf_use_output ? 1 : 0;
149 
150 	if (args.sampled)
151 		/* record data + header take 16 bytes */
152 		skel->rodata->wakeup_data_size = args.sample_rate * 16;
153 
154 	bpf_map__resize(skel->maps.ringbuf, args.ringbuf_sz);
155 
156 	if (ringbuf_bench__load(skel)) {
157 		fprintf(stderr, "failed to load skeleton\n");
158 		exit(1);
159 	}
160 
161 	return skel;
162 }
163 
buf_process_sample(void * ctx,void * data,size_t len)164 static int buf_process_sample(void *ctx, void *data, size_t len)
165 {
166 	atomic_inc(&buf_hits.value);
167 	return 0;
168 }
169 
ringbuf_libbpf_setup()170 static void ringbuf_libbpf_setup()
171 {
172 	struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
173 	struct bpf_link *link;
174 
175 	ctx->skel = ringbuf_setup_skeleton();
176 	ctx->ringbuf = ring_buffer__new(bpf_map__fd(ctx->skel->maps.ringbuf),
177 					buf_process_sample, NULL, NULL);
178 	if (!ctx->ringbuf) {
179 		fprintf(stderr, "failed to create ringbuf\n");
180 		exit(1);
181 	}
182 
183 	link = bpf_program__attach(ctx->skel->progs.bench_ringbuf);
184 	if (IS_ERR(link)) {
185 		fprintf(stderr, "failed to attach program!\n");
186 		exit(1);
187 	}
188 }
189 
ringbuf_libbpf_consumer(void * input)190 static void *ringbuf_libbpf_consumer(void *input)
191 {
192 	struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
193 
194 	while (ring_buffer__poll(ctx->ringbuf, -1) >= 0) {
195 		if (args.back2back)
196 			bufs_trigger_batch();
197 	}
198 	fprintf(stderr, "ringbuf polling failed!\n");
199 	return NULL;
200 }
201 
202 /* RINGBUF-CUSTOM benchmark */
203 struct ringbuf_custom {
204 	__u64 *consumer_pos;
205 	__u64 *producer_pos;
206 	__u64 mask;
207 	void *data;
208 	int map_fd;
209 };
210 
211 static struct ringbuf_custom_ctx {
212 	struct ringbuf_bench *skel;
213 	struct ringbuf_custom ringbuf;
214 	int epoll_fd;
215 	struct epoll_event event;
216 } ringbuf_custom_ctx;
217 
ringbuf_custom_measure(struct bench_res * res)218 static void ringbuf_custom_measure(struct bench_res *res)
219 {
220 	struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx;
221 
222 	res->hits = atomic_swap(&buf_hits.value, 0);
223 	res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
224 }
225 
ringbuf_custom_setup()226 static void ringbuf_custom_setup()
227 {
228 	struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx;
229 	const size_t page_size = getpagesize();
230 	struct bpf_link *link;
231 	struct ringbuf_custom *r;
232 	void *tmp;
233 	int err;
234 
235 	ctx->skel = ringbuf_setup_skeleton();
236 
237 	ctx->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
238 	if (ctx->epoll_fd < 0) {
239 		fprintf(stderr, "failed to create epoll fd: %d\n", -errno);
240 		exit(1);
241 	}
242 
243 	r = &ctx->ringbuf;
244 	r->map_fd = bpf_map__fd(ctx->skel->maps.ringbuf);
245 	r->mask = args.ringbuf_sz - 1;
246 
247 	/* Map writable consumer page */
248 	tmp = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
249 		   r->map_fd, 0);
250 	if (tmp == MAP_FAILED) {
251 		fprintf(stderr, "failed to mmap consumer page: %d\n", -errno);
252 		exit(1);
253 	}
254 	r->consumer_pos = tmp;
255 
256 	/* Map read-only producer page and data pages. */
257 	tmp = mmap(NULL, page_size + 2 * args.ringbuf_sz, PROT_READ, MAP_SHARED,
258 		   r->map_fd, page_size);
259 	if (tmp == MAP_FAILED) {
260 		fprintf(stderr, "failed to mmap data pages: %d\n", -errno);
261 		exit(1);
262 	}
263 	r->producer_pos = tmp;
264 	r->data = tmp + page_size;
265 
266 	ctx->event.events = EPOLLIN;
267 	err = epoll_ctl(ctx->epoll_fd, EPOLL_CTL_ADD, r->map_fd, &ctx->event);
268 	if (err < 0) {
269 		fprintf(stderr, "failed to epoll add ringbuf: %d\n", -errno);
270 		exit(1);
271 	}
272 
273 	link = bpf_program__attach(ctx->skel->progs.bench_ringbuf);
274 	if (IS_ERR(link)) {
275 		fprintf(stderr, "failed to attach program\n");
276 		exit(1);
277 	}
278 }
279 
280 #define RINGBUF_BUSY_BIT (1 << 31)
281 #define RINGBUF_DISCARD_BIT (1 << 30)
282 #define RINGBUF_META_LEN 8
283 
roundup_len(__u32 len)284 static inline int roundup_len(__u32 len)
285 {
286 	/* clear out top 2 bits */
287 	len <<= 2;
288 	len >>= 2;
289 	/* add length prefix */
290 	len += RINGBUF_META_LEN;
291 	/* round up to 8 byte alignment */
292 	return (len + 7) / 8 * 8;
293 }
294 
ringbuf_custom_process_ring(struct ringbuf_custom * r)295 static void ringbuf_custom_process_ring(struct ringbuf_custom *r)
296 {
297 	unsigned long cons_pos, prod_pos;
298 	int *len_ptr, len;
299 	bool got_new_data;
300 
301 	cons_pos = smp_load_acquire(r->consumer_pos);
302 	while (true) {
303 		got_new_data = false;
304 		prod_pos = smp_load_acquire(r->producer_pos);
305 		while (cons_pos < prod_pos) {
306 			len_ptr = r->data + (cons_pos & r->mask);
307 			len = smp_load_acquire(len_ptr);
308 
309 			/* sample not committed yet, bail out for now */
310 			if (len & RINGBUF_BUSY_BIT)
311 				return;
312 
313 			got_new_data = true;
314 			cons_pos += roundup_len(len);
315 
316 			atomic_inc(&buf_hits.value);
317 		}
318 		if (got_new_data)
319 			smp_store_release(r->consumer_pos, cons_pos);
320 		else
321 			break;
322 	};
323 }
324 
ringbuf_custom_consumer(void * input)325 static void *ringbuf_custom_consumer(void *input)
326 {
327 	struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx;
328 	int cnt;
329 
330 	do {
331 		if (args.back2back)
332 			bufs_trigger_batch();
333 		cnt = epoll_wait(ctx->epoll_fd, &ctx->event, 1, -1);
334 		if (cnt > 0)
335 			ringbuf_custom_process_ring(&ctx->ringbuf);
336 	} while (cnt >= 0);
337 	fprintf(stderr, "ringbuf polling failed!\n");
338 	return 0;
339 }
340 
341 /* PERFBUF-LIBBPF benchmark */
342 static struct perfbuf_libbpf_ctx {
343 	struct perfbuf_bench *skel;
344 	struct perf_buffer *perfbuf;
345 } perfbuf_libbpf_ctx;
346 
perfbuf_measure(struct bench_res * res)347 static void perfbuf_measure(struct bench_res *res)
348 {
349 	struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
350 
351 	res->hits = atomic_swap(&buf_hits.value, 0);
352 	res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
353 }
354 
perfbuf_setup_skeleton()355 static struct perfbuf_bench *perfbuf_setup_skeleton()
356 {
357 	struct perfbuf_bench *skel;
358 
359 	setup_libbpf();
360 
361 	skel = perfbuf_bench__open();
362 	if (!skel) {
363 		fprintf(stderr, "failed to open skeleton\n");
364 		exit(1);
365 	}
366 
367 	skel->rodata->batch_cnt = args.batch_cnt;
368 
369 	if (perfbuf_bench__load(skel)) {
370 		fprintf(stderr, "failed to load skeleton\n");
371 		exit(1);
372 	}
373 
374 	return skel;
375 }
376 
377 static enum bpf_perf_event_ret
perfbuf_process_sample_raw(void * input_ctx,int cpu,struct perf_event_header * e)378 perfbuf_process_sample_raw(void *input_ctx, int cpu,
379 			   struct perf_event_header *e)
380 {
381 	switch (e->type) {
382 	case PERF_RECORD_SAMPLE:
383 		atomic_inc(&buf_hits.value);
384 		break;
385 	case PERF_RECORD_LOST:
386 		break;
387 	default:
388 		return LIBBPF_PERF_EVENT_ERROR;
389 	}
390 	return LIBBPF_PERF_EVENT_CONT;
391 }
392 
perfbuf_libbpf_setup()393 static void perfbuf_libbpf_setup()
394 {
395 	struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
396 	struct perf_event_attr attr;
397 	struct perf_buffer_raw_opts pb_opts = {
398 		.event_cb = perfbuf_process_sample_raw,
399 		.ctx = (void *)(long)0,
400 		.attr = &attr,
401 	};
402 	struct bpf_link *link;
403 
404 	ctx->skel = perfbuf_setup_skeleton();
405 
406 	memset(&attr, 0, sizeof(attr));
407 	attr.config = PERF_COUNT_SW_BPF_OUTPUT,
408 	attr.type = PERF_TYPE_SOFTWARE;
409 	attr.sample_type = PERF_SAMPLE_RAW;
410 	/* notify only every Nth sample */
411 	if (args.sampled) {
412 		attr.sample_period = args.sample_rate;
413 		attr.wakeup_events = args.sample_rate;
414 	} else {
415 		attr.sample_period = 1;
416 		attr.wakeup_events = 1;
417 	}
418 
419 	if (args.sample_rate > args.batch_cnt) {
420 		fprintf(stderr, "sample rate %d is too high for given batch count %d\n",
421 			args.sample_rate, args.batch_cnt);
422 		exit(1);
423 	}
424 
425 	ctx->perfbuf = perf_buffer__new_raw(bpf_map__fd(ctx->skel->maps.perfbuf),
426 					    args.perfbuf_sz, &pb_opts);
427 	if (!ctx->perfbuf) {
428 		fprintf(stderr, "failed to create perfbuf\n");
429 		exit(1);
430 	}
431 
432 	link = bpf_program__attach(ctx->skel->progs.bench_perfbuf);
433 	if (IS_ERR(link)) {
434 		fprintf(stderr, "failed to attach program\n");
435 		exit(1);
436 	}
437 }
438 
perfbuf_libbpf_consumer(void * input)439 static void *perfbuf_libbpf_consumer(void *input)
440 {
441 	struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
442 
443 	while (perf_buffer__poll(ctx->perfbuf, -1) >= 0) {
444 		if (args.back2back)
445 			bufs_trigger_batch();
446 	}
447 	fprintf(stderr, "perfbuf polling failed!\n");
448 	return NULL;
449 }
450 
451 /* PERFBUF-CUSTOM benchmark */
452 
453 /* copies of internal libbpf definitions */
454 struct perf_cpu_buf {
455 	struct perf_buffer *pb;
456 	void *base; /* mmap()'ed memory */
457 	void *buf; /* for reconstructing segmented data */
458 	size_t buf_size;
459 	int fd;
460 	int cpu;
461 	int map_key;
462 };
463 
464 struct perf_buffer {
465 	perf_buffer_event_fn event_cb;
466 	perf_buffer_sample_fn sample_cb;
467 	perf_buffer_lost_fn lost_cb;
468 	void *ctx; /* passed into callbacks */
469 
470 	size_t page_size;
471 	size_t mmap_size;
472 	struct perf_cpu_buf **cpu_bufs;
473 	struct epoll_event *events;
474 	int cpu_cnt; /* number of allocated CPU buffers */
475 	int epoll_fd; /* perf event FD */
476 	int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
477 };
478 
perfbuf_custom_consumer(void * input)479 static void *perfbuf_custom_consumer(void *input)
480 {
481 	struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
482 	struct perf_buffer *pb = ctx->perfbuf;
483 	struct perf_cpu_buf *cpu_buf;
484 	struct perf_event_mmap_page *header;
485 	size_t mmap_mask = pb->mmap_size - 1;
486 	struct perf_event_header *ehdr;
487 	__u64 data_head, data_tail;
488 	size_t ehdr_size;
489 	void *base;
490 	int i, cnt;
491 
492 	while (true) {
493 		if (args.back2back)
494 			bufs_trigger_batch();
495 		cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, -1);
496 		if (cnt <= 0) {
497 			fprintf(stderr, "perf epoll failed: %d\n", -errno);
498 			exit(1);
499 		}
500 
501 		for (i = 0; i < cnt; ++i) {
502 			cpu_buf = pb->events[i].data.ptr;
503 			header = cpu_buf->base;
504 			base = ((void *)header) + pb->page_size;
505 
506 			data_head = ring_buffer_read_head(header);
507 			data_tail = header->data_tail;
508 			while (data_head != data_tail) {
509 				ehdr = base + (data_tail & mmap_mask);
510 				ehdr_size = ehdr->size;
511 
512 				if (ehdr->type == PERF_RECORD_SAMPLE)
513 					atomic_inc(&buf_hits.value);
514 
515 				data_tail += ehdr_size;
516 			}
517 			ring_buffer_write_tail(header, data_tail);
518 		}
519 	}
520 	return NULL;
521 }
522 
523 const struct bench bench_rb_libbpf = {
524 	.name = "rb-libbpf",
525 	.validate = bufs_validate,
526 	.setup = ringbuf_libbpf_setup,
527 	.producer_thread = bufs_sample_producer,
528 	.consumer_thread = ringbuf_libbpf_consumer,
529 	.measure = ringbuf_libbpf_measure,
530 	.report_progress = hits_drops_report_progress,
531 	.report_final = hits_drops_report_final,
532 };
533 
534 const struct bench bench_rb_custom = {
535 	.name = "rb-custom",
536 	.validate = bufs_validate,
537 	.setup = ringbuf_custom_setup,
538 	.producer_thread = bufs_sample_producer,
539 	.consumer_thread = ringbuf_custom_consumer,
540 	.measure = ringbuf_custom_measure,
541 	.report_progress = hits_drops_report_progress,
542 	.report_final = hits_drops_report_final,
543 };
544 
545 const struct bench bench_pb_libbpf = {
546 	.name = "pb-libbpf",
547 	.validate = bufs_validate,
548 	.setup = perfbuf_libbpf_setup,
549 	.producer_thread = bufs_sample_producer,
550 	.consumer_thread = perfbuf_libbpf_consumer,
551 	.measure = perfbuf_measure,
552 	.report_progress = hits_drops_report_progress,
553 	.report_final = hits_drops_report_final,
554 };
555 
556 const struct bench bench_pb_custom = {
557 	.name = "pb-custom",
558 	.validate = bufs_validate,
559 	.setup = perfbuf_libbpf_setup,
560 	.producer_thread = bufs_sample_producer,
561 	.consumer_thread = perfbuf_custom_consumer,
562 	.measure = perfbuf_measure,
563 	.report_progress = hits_drops_report_progress,
564 	.report_final = hits_drops_report_final,
565 };
566 
567