• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2 
3 /*
4  * AF_XDP user-space access library.
5  *
6  * Copyright(c) 2018 - 2019 Intel Corporation.
7  *
8  * Author(s): Magnus Karlsson <magnus.karlsson@intel.com>
9  */
10 
11 #include <errno.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <unistd.h>
15 #include <arpa/inet.h>
16 #include <asm/barrier.h>
17 #include <linux/compiler.h>
18 #include <linux/ethtool.h>
19 #include <linux/filter.h>
20 #include <linux/if_ether.h>
21 #include <linux/if_packet.h>
22 #include <linux/if_xdp.h>
23 #include <linux/sockios.h>
24 #include <net/if.h>
25 #include <sys/ioctl.h>
26 #include <sys/mman.h>
27 #include <sys/socket.h>
28 #include <sys/types.h>
29 
30 #include "bpf.h"
31 #include "libbpf.h"
32 #include "libbpf_internal.h"
33 #include "xsk.h"
34 
35 #ifndef SOL_XDP
36  #define SOL_XDP 283
37 #endif
38 
39 #ifndef AF_XDP
40  #define AF_XDP 44
41 #endif
42 
43 #ifndef PF_XDP
44  #define PF_XDP AF_XDP
45 #endif
46 
47 struct xsk_umem {
48 	struct xsk_ring_prod *fill;
49 	struct xsk_ring_cons *comp;
50 	char *umem_area;
51 	struct xsk_umem_config config;
52 	int fd;
53 	int refcount;
54 };
55 
56 struct xsk_socket {
57 	struct xsk_ring_cons *rx;
58 	struct xsk_ring_prod *tx;
59 	__u64 outstanding_tx;
60 	struct xsk_umem *umem;
61 	struct xsk_socket_config config;
62 	int fd;
63 	int ifindex;
64 	int prog_fd;
65 	int xsks_map_fd;
66 	__u32 queue_id;
67 	char ifname[IFNAMSIZ];
68 };
69 
70 struct xsk_nl_info {
71 	bool xdp_prog_attached;
72 	int ifindex;
73 	int fd;
74 };
75 
76 /* Up until and including Linux 5.3 */
77 struct xdp_ring_offset_v1 {
78 	__u64 producer;
79 	__u64 consumer;
80 	__u64 desc;
81 };
82 
83 /* Up until and including Linux 5.3 */
84 struct xdp_mmap_offsets_v1 {
85 	struct xdp_ring_offset_v1 rx;
86 	struct xdp_ring_offset_v1 tx;
87 	struct xdp_ring_offset_v1 fr;
88 	struct xdp_ring_offset_v1 cr;
89 };
90 
xsk_umem__fd(const struct xsk_umem * umem)91 int xsk_umem__fd(const struct xsk_umem *umem)
92 {
93 	return umem ? umem->fd : -EINVAL;
94 }
95 
xsk_socket__fd(const struct xsk_socket * xsk)96 int xsk_socket__fd(const struct xsk_socket *xsk)
97 {
98 	return xsk ? xsk->fd : -EINVAL;
99 }
100 
xsk_page_aligned(void * buffer)101 static bool xsk_page_aligned(void *buffer)
102 {
103 	unsigned long addr = (unsigned long)buffer;
104 
105 	return !(addr & (getpagesize() - 1));
106 }
107 
xsk_set_umem_config(struct xsk_umem_config * cfg,const struct xsk_umem_config * usr_cfg)108 static void xsk_set_umem_config(struct xsk_umem_config *cfg,
109 				const struct xsk_umem_config *usr_cfg)
110 {
111 	if (!usr_cfg) {
112 		cfg->fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
113 		cfg->comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
114 		cfg->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
115 		cfg->frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
116 		cfg->flags = XSK_UMEM__DEFAULT_FLAGS;
117 		return;
118 	}
119 
120 	cfg->fill_size = usr_cfg->fill_size;
121 	cfg->comp_size = usr_cfg->comp_size;
122 	cfg->frame_size = usr_cfg->frame_size;
123 	cfg->frame_headroom = usr_cfg->frame_headroom;
124 	cfg->flags = usr_cfg->flags;
125 }
126 
xsk_set_xdp_socket_config(struct xsk_socket_config * cfg,const struct xsk_socket_config * usr_cfg)127 static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg,
128 				     const struct xsk_socket_config *usr_cfg)
129 {
130 	if (!usr_cfg) {
131 		cfg->rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
132 		cfg->tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
133 		cfg->libbpf_flags = 0;
134 		cfg->xdp_flags = 0;
135 		cfg->bind_flags = 0;
136 		return 0;
137 	}
138 
139 	if (usr_cfg->libbpf_flags & ~XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)
140 		return -EINVAL;
141 
142 	cfg->rx_size = usr_cfg->rx_size;
143 	cfg->tx_size = usr_cfg->tx_size;
144 	cfg->libbpf_flags = usr_cfg->libbpf_flags;
145 	cfg->xdp_flags = usr_cfg->xdp_flags;
146 	cfg->bind_flags = usr_cfg->bind_flags;
147 
148 	return 0;
149 }
150 
xsk_mmap_offsets_v1(struct xdp_mmap_offsets * off)151 static void xsk_mmap_offsets_v1(struct xdp_mmap_offsets *off)
152 {
153 	struct xdp_mmap_offsets_v1 off_v1;
154 
155 	/* getsockopt on a kernel <= 5.3 has no flags fields.
156 	 * Copy over the offsets to the correct places in the >=5.4 format
157 	 * and put the flags where they would have been on that kernel.
158 	 */
159 	memcpy(&off_v1, off, sizeof(off_v1));
160 
161 	off->rx.producer = off_v1.rx.producer;
162 	off->rx.consumer = off_v1.rx.consumer;
163 	off->rx.desc = off_v1.rx.desc;
164 	off->rx.flags = off_v1.rx.consumer + sizeof(__u32);
165 
166 	off->tx.producer = off_v1.tx.producer;
167 	off->tx.consumer = off_v1.tx.consumer;
168 	off->tx.desc = off_v1.tx.desc;
169 	off->tx.flags = off_v1.tx.consumer + sizeof(__u32);
170 
171 	off->fr.producer = off_v1.fr.producer;
172 	off->fr.consumer = off_v1.fr.consumer;
173 	off->fr.desc = off_v1.fr.desc;
174 	off->fr.flags = off_v1.fr.consumer + sizeof(__u32);
175 
176 	off->cr.producer = off_v1.cr.producer;
177 	off->cr.consumer = off_v1.cr.consumer;
178 	off->cr.desc = off_v1.cr.desc;
179 	off->cr.flags = off_v1.cr.consumer + sizeof(__u32);
180 }
181 
xsk_get_mmap_offsets(int fd,struct xdp_mmap_offsets * off)182 static int xsk_get_mmap_offsets(int fd, struct xdp_mmap_offsets *off)
183 {
184 	socklen_t optlen;
185 	int err;
186 
187 	optlen = sizeof(*off);
188 	err = getsockopt(fd, SOL_XDP, XDP_MMAP_OFFSETS, off, &optlen);
189 	if (err)
190 		return err;
191 
192 	if (optlen == sizeof(*off))
193 		return 0;
194 
195 	if (optlen == sizeof(struct xdp_mmap_offsets_v1)) {
196 		xsk_mmap_offsets_v1(off);
197 		return 0;
198 	}
199 
200 	return -EINVAL;
201 }
202 
xsk_umem__create_v0_0_4(struct xsk_umem ** umem_ptr,void * umem_area,__u64 size,struct xsk_ring_prod * fill,struct xsk_ring_cons * comp,const struct xsk_umem_config * usr_config)203 int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
204 			    __u64 size, struct xsk_ring_prod *fill,
205 			    struct xsk_ring_cons *comp,
206 			    const struct xsk_umem_config *usr_config)
207 {
208 	struct xdp_mmap_offsets off;
209 	struct xdp_umem_reg mr;
210 	struct xsk_umem *umem;
211 	void *map;
212 	int err;
213 
214 	if (!umem_area || !umem_ptr || !fill || !comp)
215 		return -EFAULT;
216 	if (!size && !xsk_page_aligned(umem_area))
217 		return -EINVAL;
218 
219 	umem = calloc(1, sizeof(*umem));
220 	if (!umem)
221 		return -ENOMEM;
222 
223 	umem->fd = socket(AF_XDP, SOCK_RAW, 0);
224 	if (umem->fd < 0) {
225 		err = -errno;
226 		goto out_umem_alloc;
227 	}
228 
229 	umem->umem_area = umem_area;
230 	xsk_set_umem_config(&umem->config, usr_config);
231 
232 	memset(&mr, 0, sizeof(mr));
233 	mr.addr = (uintptr_t)umem_area;
234 	mr.len = size;
235 	mr.chunk_size = umem->config.frame_size;
236 	mr.headroom = umem->config.frame_headroom;
237 	mr.flags = umem->config.flags;
238 
239 	err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr));
240 	if (err) {
241 		err = -errno;
242 		goto out_socket;
243 	}
244 	err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_FILL_RING,
245 			 &umem->config.fill_size,
246 			 sizeof(umem->config.fill_size));
247 	if (err) {
248 		err = -errno;
249 		goto out_socket;
250 	}
251 	err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_COMPLETION_RING,
252 			 &umem->config.comp_size,
253 			 sizeof(umem->config.comp_size));
254 	if (err) {
255 		err = -errno;
256 		goto out_socket;
257 	}
258 
259 	err = xsk_get_mmap_offsets(umem->fd, &off);
260 	if (err) {
261 		err = -errno;
262 		goto out_socket;
263 	}
264 
265 	map = mmap(NULL, off.fr.desc + umem->config.fill_size * sizeof(__u64),
266 		   PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, umem->fd,
267 		   XDP_UMEM_PGOFF_FILL_RING);
268 	if (map == MAP_FAILED) {
269 		err = -errno;
270 		goto out_socket;
271 	}
272 
273 	umem->fill = fill;
274 	fill->mask = umem->config.fill_size - 1;
275 	fill->size = umem->config.fill_size;
276 	fill->producer = map + off.fr.producer;
277 	fill->consumer = map + off.fr.consumer;
278 	fill->flags = map + off.fr.flags;
279 	fill->ring = map + off.fr.desc;
280 	fill->cached_cons = umem->config.fill_size;
281 
282 	map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64),
283 		   PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, umem->fd,
284 		   XDP_UMEM_PGOFF_COMPLETION_RING);
285 	if (map == MAP_FAILED) {
286 		err = -errno;
287 		goto out_mmap;
288 	}
289 
290 	umem->comp = comp;
291 	comp->mask = umem->config.comp_size - 1;
292 	comp->size = umem->config.comp_size;
293 	comp->producer = map + off.cr.producer;
294 	comp->consumer = map + off.cr.consumer;
295 	comp->flags = map + off.cr.flags;
296 	comp->ring = map + off.cr.desc;
297 
298 	*umem_ptr = umem;
299 	return 0;
300 
301 out_mmap:
302 	munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64));
303 out_socket:
304 	close(umem->fd);
305 out_umem_alloc:
306 	free(umem);
307 	return err;
308 }
309 
310 struct xsk_umem_config_v1 {
311 	__u32 fill_size;
312 	__u32 comp_size;
313 	__u32 frame_size;
314 	__u32 frame_headroom;
315 };
316 
xsk_umem__create_v0_0_2(struct xsk_umem ** umem_ptr,void * umem_area,__u64 size,struct xsk_ring_prod * fill,struct xsk_ring_cons * comp,const struct xsk_umem_config * usr_config)317 int xsk_umem__create_v0_0_2(struct xsk_umem **umem_ptr, void *umem_area,
318 			    __u64 size, struct xsk_ring_prod *fill,
319 			    struct xsk_ring_cons *comp,
320 			    const struct xsk_umem_config *usr_config)
321 {
322 	struct xsk_umem_config config;
323 
324 	memcpy(&config, usr_config, sizeof(struct xsk_umem_config_v1));
325 	config.flags = 0;
326 
327 	return xsk_umem__create_v0_0_4(umem_ptr, umem_area, size, fill, comp,
328 					&config);
329 }
330 COMPAT_VERSION(xsk_umem__create_v0_0_2, xsk_umem__create, LIBBPF_0.0.2)
331 DEFAULT_VERSION(xsk_umem__create_v0_0_4, xsk_umem__create, LIBBPF_0.0.4)
332 
xsk_load_xdp_prog(struct xsk_socket * xsk)333 static int xsk_load_xdp_prog(struct xsk_socket *xsk)
334 {
335 	static const int log_buf_size = 16 * 1024;
336 	char log_buf[log_buf_size];
337 	int err, prog_fd;
338 
339 	/* This is the C-program:
340 	 * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx)
341 	 * {
342 	 *     int index = ctx->rx_queue_index;
343 	 *
344 	 *     // A set entry here means that the correspnding queue_id
345 	 *     // has an active AF_XDP socket bound to it.
346 	 *     if (bpf_map_lookup_elem(&xsks_map, &index))
347 	 *         return bpf_redirect_map(&xsks_map, index, 0);
348 	 *
349 	 *     return XDP_PASS;
350 	 * }
351 	 */
352 	struct bpf_insn prog[] = {
353 		/* r1 = *(u32 *)(r1 + 16) */
354 		BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 16),
355 		/* *(u32 *)(r10 - 4) = r1 */
356 		BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_1, -4),
357 		BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
358 		BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
359 		BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd),
360 		BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
361 		BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
362 		BPF_MOV32_IMM(BPF_REG_0, 2),
363 		/* if r1 == 0 goto +5 */
364 		BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5),
365 		/* r2 = *(u32 *)(r10 - 4) */
366 		BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd),
367 		BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_10, -4),
368 		BPF_MOV32_IMM(BPF_REG_3, 0),
369 		BPF_EMIT_CALL(BPF_FUNC_redirect_map),
370 		/* The jumps are to this instruction */
371 		BPF_EXIT_INSN(),
372 	};
373 	size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
374 
375 	prog_fd = bpf_load_program(BPF_PROG_TYPE_XDP, prog, insns_cnt,
376 				   "LGPL-2.1 or BSD-2-Clause", 0, log_buf,
377 				   log_buf_size);
378 	if (prog_fd < 0) {
379 		pr_warning("BPF log buffer:\n%s", log_buf);
380 		return prog_fd;
381 	}
382 
383 	err = bpf_set_link_xdp_fd(xsk->ifindex, prog_fd, xsk->config.xdp_flags);
384 	if (err) {
385 		close(prog_fd);
386 		return err;
387 	}
388 
389 	xsk->prog_fd = prog_fd;
390 	return 0;
391 }
392 
xsk_get_max_queues(struct xsk_socket * xsk)393 static int xsk_get_max_queues(struct xsk_socket *xsk)
394 {
395 	struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS };
396 	struct ifreq ifr = {};
397 	int fd, err, ret;
398 
399 	fd = socket(AF_INET, SOCK_DGRAM, 0);
400 	if (fd < 0)
401 		return -errno;
402 
403 	ifr.ifr_data = (void *)&channels;
404 	memcpy(ifr.ifr_name, xsk->ifname, IFNAMSIZ - 1);
405 	ifr.ifr_name[IFNAMSIZ - 1] = '\0';
406 	err = ioctl(fd, SIOCETHTOOL, &ifr);
407 	if (err && errno != EOPNOTSUPP) {
408 		ret = -errno;
409 		goto out;
410 	}
411 
412 	if (err) {
413 		/* If the device says it has no channels, then all traffic
414 		 * is sent to a single stream, so max queues = 1.
415 		 */
416 		ret = 1;
417 	} else {
418 		/* Take the max of rx, tx, combined. Drivers return
419 		 * the number of channels in different ways.
420 		 */
421 		ret = max(channels.max_rx, channels.max_tx);
422 		ret = max(ret, (int)channels.max_combined);
423 	}
424 
425 out:
426 	close(fd);
427 	return ret;
428 }
429 
xsk_create_bpf_maps(struct xsk_socket * xsk)430 static int xsk_create_bpf_maps(struct xsk_socket *xsk)
431 {
432 	int max_queues;
433 	int fd;
434 
435 	max_queues = xsk_get_max_queues(xsk);
436 	if (max_queues < 0)
437 		return max_queues;
438 
439 	fd = bpf_create_map_name(BPF_MAP_TYPE_XSKMAP, "xsks_map",
440 				 sizeof(int), sizeof(int), max_queues, 0);
441 	if (fd < 0)
442 		return fd;
443 
444 	xsk->xsks_map_fd = fd;
445 
446 	return 0;
447 }
448 
xsk_delete_bpf_maps(struct xsk_socket * xsk)449 static void xsk_delete_bpf_maps(struct xsk_socket *xsk)
450 {
451 	bpf_map_delete_elem(xsk->xsks_map_fd, &xsk->queue_id);
452 	close(xsk->xsks_map_fd);
453 }
454 
xsk_lookup_bpf_maps(struct xsk_socket * xsk)455 static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
456 {
457 	__u32 i, *map_ids, num_maps, prog_len = sizeof(struct bpf_prog_info);
458 	__u32 map_len = sizeof(struct bpf_map_info);
459 	struct bpf_prog_info prog_info = {};
460 	struct bpf_map_info map_info;
461 	int fd, err;
462 
463 	err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len);
464 	if (err)
465 		return err;
466 
467 	num_maps = prog_info.nr_map_ids;
468 
469 	map_ids = calloc(prog_info.nr_map_ids, sizeof(*map_ids));
470 	if (!map_ids)
471 		return -ENOMEM;
472 
473 	memset(&prog_info, 0, prog_len);
474 	prog_info.nr_map_ids = num_maps;
475 	prog_info.map_ids = (__u64)(unsigned long)map_ids;
476 
477 	err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len);
478 	if (err)
479 		goto out_map_ids;
480 
481 	xsk->xsks_map_fd = -1;
482 
483 	for (i = 0; i < prog_info.nr_map_ids; i++) {
484 		fd = bpf_map_get_fd_by_id(map_ids[i]);
485 		if (fd < 0)
486 			continue;
487 
488 		err = bpf_obj_get_info_by_fd(fd, &map_info, &map_len);
489 		if (err) {
490 			close(fd);
491 			continue;
492 		}
493 
494 		if (!strcmp(map_info.name, "xsks_map")) {
495 			xsk->xsks_map_fd = fd;
496 			continue;
497 		}
498 
499 		close(fd);
500 	}
501 
502 	err = 0;
503 	if (xsk->xsks_map_fd == -1)
504 		err = -ENOENT;
505 
506 out_map_ids:
507 	free(map_ids);
508 	return err;
509 }
510 
xsk_set_bpf_maps(struct xsk_socket * xsk)511 static int xsk_set_bpf_maps(struct xsk_socket *xsk)
512 {
513 	return bpf_map_update_elem(xsk->xsks_map_fd, &xsk->queue_id,
514 				   &xsk->fd, 0);
515 }
516 
xsk_setup_xdp_prog(struct xsk_socket * xsk)517 static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
518 {
519 	__u32 prog_id = 0;
520 	int err;
521 
522 	err = bpf_get_link_xdp_id(xsk->ifindex, &prog_id,
523 				  xsk->config.xdp_flags);
524 	if (err)
525 		return err;
526 
527 	if (!prog_id) {
528 		err = xsk_create_bpf_maps(xsk);
529 		if (err)
530 			return err;
531 
532 		err = xsk_load_xdp_prog(xsk);
533 		if (err) {
534 			xsk_delete_bpf_maps(xsk);
535 			return err;
536 		}
537 	} else {
538 		xsk->prog_fd = bpf_prog_get_fd_by_id(prog_id);
539 		if (xsk->prog_fd < 0)
540 			return -errno;
541 		err = xsk_lookup_bpf_maps(xsk);
542 		if (err) {
543 			close(xsk->prog_fd);
544 			return err;
545 		}
546 	}
547 
548 	err = xsk_set_bpf_maps(xsk);
549 	if (err) {
550 		xsk_delete_bpf_maps(xsk);
551 		close(xsk->prog_fd);
552 		return err;
553 	}
554 
555 	return 0;
556 }
557 
xsk_socket__create(struct xsk_socket ** xsk_ptr,const char * ifname,__u32 queue_id,struct xsk_umem * umem,struct xsk_ring_cons * rx,struct xsk_ring_prod * tx,const struct xsk_socket_config * usr_config)558 int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
559 		       __u32 queue_id, struct xsk_umem *umem,
560 		       struct xsk_ring_cons *rx, struct xsk_ring_prod *tx,
561 		       const struct xsk_socket_config *usr_config)
562 {
563 	void *rx_map = NULL, *tx_map = NULL;
564 	struct sockaddr_xdp sxdp = {};
565 	struct xdp_mmap_offsets off;
566 	struct xsk_socket *xsk;
567 	int err;
568 
569 	if (!umem || !xsk_ptr || !rx || !tx)
570 		return -EFAULT;
571 
572 	if (umem->refcount) {
573 		pr_warning("Error: shared umems not supported by libbpf.\n");
574 		return -EBUSY;
575 	}
576 
577 	xsk = calloc(1, sizeof(*xsk));
578 	if (!xsk)
579 		return -ENOMEM;
580 
581 	if (umem->refcount++ > 0) {
582 		xsk->fd = socket(AF_XDP, SOCK_RAW, 0);
583 		if (xsk->fd < 0) {
584 			err = -errno;
585 			goto out_xsk_alloc;
586 		}
587 	} else {
588 		xsk->fd = umem->fd;
589 	}
590 
591 	xsk->outstanding_tx = 0;
592 	xsk->queue_id = queue_id;
593 	xsk->umem = umem;
594 	xsk->ifindex = if_nametoindex(ifname);
595 	if (!xsk->ifindex) {
596 		err = -errno;
597 		goto out_socket;
598 	}
599 	memcpy(xsk->ifname, ifname, IFNAMSIZ - 1);
600 	xsk->ifname[IFNAMSIZ - 1] = '\0';
601 
602 	err = xsk_set_xdp_socket_config(&xsk->config, usr_config);
603 	if (err)
604 		goto out_socket;
605 
606 	if (rx) {
607 		err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING,
608 				 &xsk->config.rx_size,
609 				 sizeof(xsk->config.rx_size));
610 		if (err) {
611 			err = -errno;
612 			goto out_socket;
613 		}
614 	}
615 	if (tx) {
616 		err = setsockopt(xsk->fd, SOL_XDP, XDP_TX_RING,
617 				 &xsk->config.tx_size,
618 				 sizeof(xsk->config.tx_size));
619 		if (err) {
620 			err = -errno;
621 			goto out_socket;
622 		}
623 	}
624 
625 	err = xsk_get_mmap_offsets(xsk->fd, &off);
626 	if (err) {
627 		err = -errno;
628 		goto out_socket;
629 	}
630 
631 	if (rx) {
632 		rx_map = mmap(NULL, off.rx.desc +
633 			      xsk->config.rx_size * sizeof(struct xdp_desc),
634 			      PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
635 			      xsk->fd, XDP_PGOFF_RX_RING);
636 		if (rx_map == MAP_FAILED) {
637 			err = -errno;
638 			goto out_socket;
639 		}
640 
641 		rx->mask = xsk->config.rx_size - 1;
642 		rx->size = xsk->config.rx_size;
643 		rx->producer = rx_map + off.rx.producer;
644 		rx->consumer = rx_map + off.rx.consumer;
645 		rx->flags = rx_map + off.rx.flags;
646 		rx->ring = rx_map + off.rx.desc;
647 	}
648 	xsk->rx = rx;
649 
650 	if (tx) {
651 		tx_map = mmap(NULL, off.tx.desc +
652 			      xsk->config.tx_size * sizeof(struct xdp_desc),
653 			      PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
654 			      xsk->fd, XDP_PGOFF_TX_RING);
655 		if (tx_map == MAP_FAILED) {
656 			err = -errno;
657 			goto out_mmap_rx;
658 		}
659 
660 		tx->mask = xsk->config.tx_size - 1;
661 		tx->size = xsk->config.tx_size;
662 		tx->producer = tx_map + off.tx.producer;
663 		tx->consumer = tx_map + off.tx.consumer;
664 		tx->flags = tx_map + off.tx.flags;
665 		tx->ring = tx_map + off.tx.desc;
666 		tx->cached_cons = xsk->config.tx_size;
667 	}
668 	xsk->tx = tx;
669 
670 	sxdp.sxdp_family = PF_XDP;
671 	sxdp.sxdp_ifindex = xsk->ifindex;
672 	sxdp.sxdp_queue_id = xsk->queue_id;
673 	sxdp.sxdp_flags = xsk->config.bind_flags;
674 
675 	err = bind(xsk->fd, (struct sockaddr *)&sxdp, sizeof(sxdp));
676 	if (err) {
677 		err = -errno;
678 		goto out_mmap_tx;
679 	}
680 
681 	xsk->prog_fd = -1;
682 
683 	if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
684 		err = xsk_setup_xdp_prog(xsk);
685 		if (err)
686 			goto out_mmap_tx;
687 	}
688 
689 	*xsk_ptr = xsk;
690 	return 0;
691 
692 out_mmap_tx:
693 	if (tx)
694 		munmap(tx_map, off.tx.desc +
695 		       xsk->config.tx_size * sizeof(struct xdp_desc));
696 out_mmap_rx:
697 	if (rx)
698 		munmap(rx_map, off.rx.desc +
699 		       xsk->config.rx_size * sizeof(struct xdp_desc));
700 out_socket:
701 	if (--umem->refcount)
702 		close(xsk->fd);
703 out_xsk_alloc:
704 	free(xsk);
705 	return err;
706 }
707 
xsk_umem__delete(struct xsk_umem * umem)708 int xsk_umem__delete(struct xsk_umem *umem)
709 {
710 	struct xdp_mmap_offsets off;
711 	int err;
712 
713 	if (!umem)
714 		return 0;
715 
716 	if (umem->refcount)
717 		return -EBUSY;
718 
719 	err = xsk_get_mmap_offsets(umem->fd, &off);
720 	if (!err) {
721 		munmap(umem->fill->ring - off.fr.desc,
722 		       off.fr.desc + umem->config.fill_size * sizeof(__u64));
723 		munmap(umem->comp->ring - off.cr.desc,
724 		       off.cr.desc + umem->config.comp_size * sizeof(__u64));
725 	}
726 
727 	close(umem->fd);
728 	free(umem);
729 
730 	return 0;
731 }
732 
xsk_socket__delete(struct xsk_socket * xsk)733 void xsk_socket__delete(struct xsk_socket *xsk)
734 {
735 	size_t desc_sz = sizeof(struct xdp_desc);
736 	struct xdp_mmap_offsets off;
737 	int err;
738 
739 	if (!xsk)
740 		return;
741 
742 	if (xsk->prog_fd != -1) {
743 		xsk_delete_bpf_maps(xsk);
744 		close(xsk->prog_fd);
745 	}
746 
747 	err = xsk_get_mmap_offsets(xsk->fd, &off);
748 	if (!err) {
749 		if (xsk->rx) {
750 			munmap(xsk->rx->ring - off.rx.desc,
751 			       off.rx.desc + xsk->config.rx_size * desc_sz);
752 		}
753 		if (xsk->tx) {
754 			munmap(xsk->tx->ring - off.tx.desc,
755 			       off.tx.desc + xsk->config.tx_size * desc_sz);
756 		}
757 
758 	}
759 
760 	xsk->umem->refcount--;
761 	/* Do not close an fd that also has an associated umem connected
762 	 * to it.
763 	 */
764 	if (xsk->fd != xsk->umem->fd)
765 		close(xsk->fd);
766 	free(xsk);
767 }
768