1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2
3 /*
4 * Common eBPF ELF object loading operations.
5 *
6 * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
7 * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
8 * Copyright (C) 2015 Huawei Inc.
9 * Copyright (C) 2017 Nicira, Inc.
10 * Copyright (C) 2019 Isovalent, Inc.
11 */
12
13 #ifndef _GNU_SOURCE
14 #define _GNU_SOURCE
15 #endif
16 #include <stdlib.h>
17 #include <stdio.h>
18 #include <stdarg.h>
19 #include <libgen.h>
20 #include <inttypes.h>
21 #include <limits.h>
22 #include <string.h>
23 #include <unistd.h>
24 #include <endian.h>
25 #include <fcntl.h>
26 #include <errno.h>
27 #include <ctype.h>
28 #include <asm/unistd.h>
29 #include <linux/err.h>
30 #include <linux/kernel.h>
31 #include <linux/bpf.h>
32 #include <linux/btf.h>
33 #include <linux/filter.h>
34 #include <linux/limits.h>
35 #include <linux/perf_event.h>
36 #include <linux/bpf_perf_event.h>
37 #include <linux/ring_buffer.h>
38 #include <sys/epoll.h>
39 #include <sys/ioctl.h>
40 #include <sys/mman.h>
41 #include <sys/stat.h>
42 #include <sys/types.h>
43 #include <sys/vfs.h>
44 #include <sys/utsname.h>
45 #include <sys/resource.h>
46 #include <libelf.h>
47 #include <gelf.h>
48 #include <zlib.h>
49
50 #include "libbpf.h"
51 #include "bpf.h"
52 #include "btf.h"
53 #include "str_error.h"
54 #include "libbpf_internal.h"
55 #include "hashmap.h"
56 #include "bpf_gen_internal.h"
57 #include "zip.h"
58
59 #ifndef BPF_FS_MAGIC
60 #define BPF_FS_MAGIC 0xcafe4a11
61 #endif
62
63 #define MAX_EVENT_NAME_LEN 64
64
65 #define BPF_FS_DEFAULT_PATH "/sys/fs/bpf"
66
67 #define BPF_INSN_SZ (sizeof(struct bpf_insn))
68
69 /* vsprintf() in __base_pr() uses nonliteral format string. It may break
70 * compilation if user enables corresponding warning. Disable it explicitly.
71 */
72 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
73
74 #define __printf(a, b) __attribute__((format(printf, a, b)))
75
76 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj);
77 static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog);
78 static int map_set_def_max_entries(struct bpf_map *map);
79
80 static const char * const attach_type_name[] = {
81 [BPF_CGROUP_INET_INGRESS] = "cgroup_inet_ingress",
82 [BPF_CGROUP_INET_EGRESS] = "cgroup_inet_egress",
83 [BPF_CGROUP_INET_SOCK_CREATE] = "cgroup_inet_sock_create",
84 [BPF_CGROUP_INET_SOCK_RELEASE] = "cgroup_inet_sock_release",
85 [BPF_CGROUP_SOCK_OPS] = "cgroup_sock_ops",
86 [BPF_CGROUP_DEVICE] = "cgroup_device",
87 [BPF_CGROUP_INET4_BIND] = "cgroup_inet4_bind",
88 [BPF_CGROUP_INET6_BIND] = "cgroup_inet6_bind",
89 [BPF_CGROUP_INET4_CONNECT] = "cgroup_inet4_connect",
90 [BPF_CGROUP_INET6_CONNECT] = "cgroup_inet6_connect",
91 [BPF_CGROUP_UNIX_CONNECT] = "cgroup_unix_connect",
92 [BPF_CGROUP_INET4_POST_BIND] = "cgroup_inet4_post_bind",
93 [BPF_CGROUP_INET6_POST_BIND] = "cgroup_inet6_post_bind",
94 [BPF_CGROUP_INET4_GETPEERNAME] = "cgroup_inet4_getpeername",
95 [BPF_CGROUP_INET6_GETPEERNAME] = "cgroup_inet6_getpeername",
96 [BPF_CGROUP_UNIX_GETPEERNAME] = "cgroup_unix_getpeername",
97 [BPF_CGROUP_INET4_GETSOCKNAME] = "cgroup_inet4_getsockname",
98 [BPF_CGROUP_INET6_GETSOCKNAME] = "cgroup_inet6_getsockname",
99 [BPF_CGROUP_UNIX_GETSOCKNAME] = "cgroup_unix_getsockname",
100 [BPF_CGROUP_UDP4_SENDMSG] = "cgroup_udp4_sendmsg",
101 [BPF_CGROUP_UDP6_SENDMSG] = "cgroup_udp6_sendmsg",
102 [BPF_CGROUP_UNIX_SENDMSG] = "cgroup_unix_sendmsg",
103 [BPF_CGROUP_SYSCTL] = "cgroup_sysctl",
104 [BPF_CGROUP_UDP4_RECVMSG] = "cgroup_udp4_recvmsg",
105 [BPF_CGROUP_UDP6_RECVMSG] = "cgroup_udp6_recvmsg",
106 [BPF_CGROUP_UNIX_RECVMSG] = "cgroup_unix_recvmsg",
107 [BPF_CGROUP_GETSOCKOPT] = "cgroup_getsockopt",
108 [BPF_CGROUP_SETSOCKOPT] = "cgroup_setsockopt",
109 [BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser",
110 [BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict",
111 [BPF_SK_SKB_VERDICT] = "sk_skb_verdict",
112 [BPF_SK_MSG_VERDICT] = "sk_msg_verdict",
113 [BPF_LIRC_MODE2] = "lirc_mode2",
114 [BPF_FLOW_DISSECTOR] = "flow_dissector",
115 [BPF_TRACE_RAW_TP] = "trace_raw_tp",
116 [BPF_TRACE_FENTRY] = "trace_fentry",
117 [BPF_TRACE_FEXIT] = "trace_fexit",
118 [BPF_MODIFY_RETURN] = "modify_return",
119 [BPF_LSM_MAC] = "lsm_mac",
120 [BPF_LSM_CGROUP] = "lsm_cgroup",
121 [BPF_SK_LOOKUP] = "sk_lookup",
122 [BPF_TRACE_ITER] = "trace_iter",
123 [BPF_XDP_DEVMAP] = "xdp_devmap",
124 [BPF_XDP_CPUMAP] = "xdp_cpumap",
125 [BPF_XDP] = "xdp",
126 [BPF_SK_REUSEPORT_SELECT] = "sk_reuseport_select",
127 [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE] = "sk_reuseport_select_or_migrate",
128 [BPF_PERF_EVENT] = "perf_event",
129 [BPF_TRACE_KPROBE_MULTI] = "trace_kprobe_multi",
130 [BPF_STRUCT_OPS] = "struct_ops",
131 [BPF_NETFILTER] = "netfilter",
132 [BPF_TCX_INGRESS] = "tcx_ingress",
133 [BPF_TCX_EGRESS] = "tcx_egress",
134 [BPF_TRACE_UPROBE_MULTI] = "trace_uprobe_multi",
135 [BPF_NETKIT_PRIMARY] = "netkit_primary",
136 [BPF_NETKIT_PEER] = "netkit_peer",
137 [BPF_TRACE_KPROBE_SESSION] = "trace_kprobe_session",
138 };
139
140 static const char * const link_type_name[] = {
141 [BPF_LINK_TYPE_UNSPEC] = "unspec",
142 [BPF_LINK_TYPE_RAW_TRACEPOINT] = "raw_tracepoint",
143 [BPF_LINK_TYPE_TRACING] = "tracing",
144 [BPF_LINK_TYPE_CGROUP] = "cgroup",
145 [BPF_LINK_TYPE_ITER] = "iter",
146 [BPF_LINK_TYPE_NETNS] = "netns",
147 [BPF_LINK_TYPE_XDP] = "xdp",
148 [BPF_LINK_TYPE_PERF_EVENT] = "perf_event",
149 [BPF_LINK_TYPE_KPROBE_MULTI] = "kprobe_multi",
150 [BPF_LINK_TYPE_STRUCT_OPS] = "struct_ops",
151 [BPF_LINK_TYPE_NETFILTER] = "netfilter",
152 [BPF_LINK_TYPE_TCX] = "tcx",
153 [BPF_LINK_TYPE_UPROBE_MULTI] = "uprobe_multi",
154 [BPF_LINK_TYPE_NETKIT] = "netkit",
155 [BPF_LINK_TYPE_SOCKMAP] = "sockmap",
156 };
157
158 static const char * const map_type_name[] = {
159 [BPF_MAP_TYPE_UNSPEC] = "unspec",
160 [BPF_MAP_TYPE_HASH] = "hash",
161 [BPF_MAP_TYPE_ARRAY] = "array",
162 [BPF_MAP_TYPE_PROG_ARRAY] = "prog_array",
163 [BPF_MAP_TYPE_PERF_EVENT_ARRAY] = "perf_event_array",
164 [BPF_MAP_TYPE_PERCPU_HASH] = "percpu_hash",
165 [BPF_MAP_TYPE_PERCPU_ARRAY] = "percpu_array",
166 [BPF_MAP_TYPE_STACK_TRACE] = "stack_trace",
167 [BPF_MAP_TYPE_CGROUP_ARRAY] = "cgroup_array",
168 [BPF_MAP_TYPE_LRU_HASH] = "lru_hash",
169 [BPF_MAP_TYPE_LRU_PERCPU_HASH] = "lru_percpu_hash",
170 [BPF_MAP_TYPE_LPM_TRIE] = "lpm_trie",
171 [BPF_MAP_TYPE_ARRAY_OF_MAPS] = "array_of_maps",
172 [BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps",
173 [BPF_MAP_TYPE_DEVMAP] = "devmap",
174 [BPF_MAP_TYPE_DEVMAP_HASH] = "devmap_hash",
175 [BPF_MAP_TYPE_SOCKMAP] = "sockmap",
176 [BPF_MAP_TYPE_CPUMAP] = "cpumap",
177 [BPF_MAP_TYPE_XSKMAP] = "xskmap",
178 [BPF_MAP_TYPE_SOCKHASH] = "sockhash",
179 [BPF_MAP_TYPE_CGROUP_STORAGE] = "cgroup_storage",
180 [BPF_MAP_TYPE_REUSEPORT_SOCKARRAY] = "reuseport_sockarray",
181 [BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE] = "percpu_cgroup_storage",
182 [BPF_MAP_TYPE_QUEUE] = "queue",
183 [BPF_MAP_TYPE_STACK] = "stack",
184 [BPF_MAP_TYPE_SK_STORAGE] = "sk_storage",
185 [BPF_MAP_TYPE_STRUCT_OPS] = "struct_ops",
186 [BPF_MAP_TYPE_RINGBUF] = "ringbuf",
187 [BPF_MAP_TYPE_INODE_STORAGE] = "inode_storage",
188 [BPF_MAP_TYPE_TASK_STORAGE] = "task_storage",
189 [BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter",
190 [BPF_MAP_TYPE_USER_RINGBUF] = "user_ringbuf",
191 [BPF_MAP_TYPE_CGRP_STORAGE] = "cgrp_storage",
192 [BPF_MAP_TYPE_ARENA] = "arena",
193 };
194
195 static const char * const prog_type_name[] = {
196 [BPF_PROG_TYPE_UNSPEC] = "unspec",
197 [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter",
198 [BPF_PROG_TYPE_KPROBE] = "kprobe",
199 [BPF_PROG_TYPE_SCHED_CLS] = "sched_cls",
200 [BPF_PROG_TYPE_SCHED_ACT] = "sched_act",
201 [BPF_PROG_TYPE_TRACEPOINT] = "tracepoint",
202 [BPF_PROG_TYPE_XDP] = "xdp",
203 [BPF_PROG_TYPE_PERF_EVENT] = "perf_event",
204 [BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb",
205 [BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock",
206 [BPF_PROG_TYPE_LWT_IN] = "lwt_in",
207 [BPF_PROG_TYPE_LWT_OUT] = "lwt_out",
208 [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit",
209 [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops",
210 [BPF_PROG_TYPE_SK_SKB] = "sk_skb",
211 [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device",
212 [BPF_PROG_TYPE_SK_MSG] = "sk_msg",
213 [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint",
214 [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr",
215 [BPF_PROG_TYPE_LWT_SEG6LOCAL] = "lwt_seg6local",
216 [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2",
217 [BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport",
218 [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector",
219 [BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl",
220 [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable",
221 [BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt",
222 [BPF_PROG_TYPE_TRACING] = "tracing",
223 [BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops",
224 [BPF_PROG_TYPE_EXT] = "ext",
225 [BPF_PROG_TYPE_LSM] = "lsm",
226 [BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup",
227 [BPF_PROG_TYPE_SYSCALL] = "syscall",
228 [BPF_PROG_TYPE_NETFILTER] = "netfilter",
229 };
230
__base_pr(enum libbpf_print_level level,const char * format,va_list args)231 static int __base_pr(enum libbpf_print_level level, const char *format,
232 va_list args)
233 {
234 const char *env_var = "LIBBPF_LOG_LEVEL";
235 static enum libbpf_print_level min_level = LIBBPF_INFO;
236 static bool initialized;
237
238 if (!initialized) {
239 char *verbosity;
240
241 initialized = true;
242 verbosity = getenv(env_var);
243 if (verbosity) {
244 if (strcasecmp(verbosity, "warn") == 0)
245 min_level = LIBBPF_WARN;
246 else if (strcasecmp(verbosity, "debug") == 0)
247 min_level = LIBBPF_DEBUG;
248 else if (strcasecmp(verbosity, "info") == 0)
249 min_level = LIBBPF_INFO;
250 else
251 fprintf(stderr, "libbpf: unrecognized '%s' envvar value: '%s', should be one of 'warn', 'debug', or 'info'.\n",
252 env_var, verbosity);
253 }
254 }
255
256 /* if too verbose, skip logging */
257 if (level > min_level)
258 return 0;
259
260 return vfprintf(stderr, format, args);
261 }
262
263 static libbpf_print_fn_t __libbpf_pr = __base_pr;
264
libbpf_set_print(libbpf_print_fn_t fn)265 libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn)
266 {
267 libbpf_print_fn_t old_print_fn;
268
269 old_print_fn = __atomic_exchange_n(&__libbpf_pr, fn, __ATOMIC_RELAXED);
270
271 return old_print_fn;
272 }
273
274 __printf(2, 3)
libbpf_print(enum libbpf_print_level level,const char * format,...)275 void libbpf_print(enum libbpf_print_level level, const char *format, ...)
276 {
277 va_list args;
278 int old_errno;
279 libbpf_print_fn_t print_fn;
280
281 print_fn = __atomic_load_n(&__libbpf_pr, __ATOMIC_RELAXED);
282 if (!print_fn)
283 return;
284
285 old_errno = errno;
286
287 va_start(args, format);
288 print_fn(level, format, args);
289 va_end(args);
290
291 errno = old_errno;
292 }
293
pr_perm_msg(int err)294 static void pr_perm_msg(int err)
295 {
296 struct rlimit limit;
297 char buf[100];
298
299 if (err != -EPERM || geteuid() != 0)
300 return;
301
302 err = getrlimit(RLIMIT_MEMLOCK, &limit);
303 if (err)
304 return;
305
306 if (limit.rlim_cur == RLIM_INFINITY)
307 return;
308
309 if (limit.rlim_cur < 1024)
310 snprintf(buf, sizeof(buf), "%zu bytes", (size_t)limit.rlim_cur);
311 else if (limit.rlim_cur < 1024*1024)
312 snprintf(buf, sizeof(buf), "%.1f KiB", (double)limit.rlim_cur / 1024);
313 else
314 snprintf(buf, sizeof(buf), "%.1f MiB", (double)limit.rlim_cur / (1024*1024));
315
316 pr_warn("permission error while running as root; try raising 'ulimit -l'? current value: %s\n",
317 buf);
318 }
319
320 #define STRERR_BUFSIZE 128
321
322 /* Copied from tools/perf/util/util.h */
323 #ifndef zfree
324 # define zfree(ptr) ({ free(*ptr); *ptr = NULL; })
325 #endif
326
327 #ifndef zclose
328 # define zclose(fd) ({ \
329 int ___err = 0; \
330 if ((fd) >= 0) \
331 ___err = close((fd)); \
332 fd = -1; \
333 ___err; })
334 #endif
335
ptr_to_u64(const void * ptr)336 static inline __u64 ptr_to_u64(const void *ptr)
337 {
338 return (__u64) (unsigned long) ptr;
339 }
340
libbpf_set_strict_mode(enum libbpf_strict_mode mode)341 int libbpf_set_strict_mode(enum libbpf_strict_mode mode)
342 {
343 /* as of v1.0 libbpf_set_strict_mode() is a no-op */
344 return 0;
345 }
346
libbpf_major_version(void)347 __u32 libbpf_major_version(void)
348 {
349 return LIBBPF_MAJOR_VERSION;
350 }
351
libbpf_minor_version(void)352 __u32 libbpf_minor_version(void)
353 {
354 return LIBBPF_MINOR_VERSION;
355 }
356
libbpf_version_string(void)357 const char *libbpf_version_string(void)
358 {
359 #define __S(X) #X
360 #define _S(X) __S(X)
361 return "v" _S(LIBBPF_MAJOR_VERSION) "." _S(LIBBPF_MINOR_VERSION);
362 #undef _S
363 #undef __S
364 }
365
366 enum reloc_type {
367 RELO_LD64,
368 RELO_CALL,
369 RELO_DATA,
370 RELO_EXTERN_LD64,
371 RELO_EXTERN_CALL,
372 RELO_SUBPROG_ADDR,
373 RELO_CORE,
374 };
375
376 struct reloc_desc {
377 enum reloc_type type;
378 int insn_idx;
379 union {
380 const struct bpf_core_relo *core_relo; /* used when type == RELO_CORE */
381 struct {
382 int map_idx;
383 int sym_off;
384 int ext_idx;
385 };
386 };
387 };
388
389 /* stored as sec_def->cookie for all libbpf-supported SEC()s */
390 enum sec_def_flags {
391 SEC_NONE = 0,
392 /* expected_attach_type is optional, if kernel doesn't support that */
393 SEC_EXP_ATTACH_OPT = 1,
394 /* legacy, only used by libbpf_get_type_names() and
395 * libbpf_attach_type_by_name(), not used by libbpf itself at all.
396 * This used to be associated with cgroup (and few other) BPF programs
397 * that were attachable through BPF_PROG_ATTACH command. Pretty
398 * meaningless nowadays, though.
399 */
400 SEC_ATTACHABLE = 2,
401 SEC_ATTACHABLE_OPT = SEC_ATTACHABLE | SEC_EXP_ATTACH_OPT,
402 /* attachment target is specified through BTF ID in either kernel or
403 * other BPF program's BTF object
404 */
405 SEC_ATTACH_BTF = 4,
406 /* BPF program type allows sleeping/blocking in kernel */
407 SEC_SLEEPABLE = 8,
408 /* BPF program support non-linear XDP buffer */
409 SEC_XDP_FRAGS = 16,
410 /* Setup proper attach type for usdt probes. */
411 SEC_USDT = 32,
412 };
413
414 struct bpf_sec_def {
415 char *sec;
416 enum bpf_prog_type prog_type;
417 enum bpf_attach_type expected_attach_type;
418 long cookie;
419 int handler_id;
420
421 libbpf_prog_setup_fn_t prog_setup_fn;
422 libbpf_prog_prepare_load_fn_t prog_prepare_load_fn;
423 libbpf_prog_attach_fn_t prog_attach_fn;
424 };
425
426 /*
427 * bpf_prog should be a better name but it has been used in
428 * linux/filter.h.
429 */
430 struct bpf_program {
431 char *name;
432 char *sec_name;
433 size_t sec_idx;
434 const struct bpf_sec_def *sec_def;
435 /* this program's instruction offset (in number of instructions)
436 * within its containing ELF section
437 */
438 size_t sec_insn_off;
439 /* number of original instructions in ELF section belonging to this
440 * program, not taking into account subprogram instructions possible
441 * appended later during relocation
442 */
443 size_t sec_insn_cnt;
444 /* Offset (in number of instructions) of the start of instruction
445 * belonging to this BPF program within its containing main BPF
446 * program. For the entry-point (main) BPF program, this is always
447 * zero. For a sub-program, this gets reset before each of main BPF
448 * programs are processed and relocated and is used to determined
449 * whether sub-program was already appended to the main program, and
450 * if yes, at which instruction offset.
451 */
452 size_t sub_insn_off;
453
454 /* instructions that belong to BPF program; insns[0] is located at
455 * sec_insn_off instruction within its ELF section in ELF file, so
456 * when mapping ELF file instruction index to the local instruction,
457 * one needs to subtract sec_insn_off; and vice versa.
458 */
459 struct bpf_insn *insns;
460 /* actual number of instruction in this BPF program's image; for
461 * entry-point BPF programs this includes the size of main program
462 * itself plus all the used sub-programs, appended at the end
463 */
464 size_t insns_cnt;
465
466 struct reloc_desc *reloc_desc;
467 int nr_reloc;
468
469 /* BPF verifier log settings */
470 char *log_buf;
471 size_t log_size;
472 __u32 log_level;
473
474 struct bpf_object *obj;
475
476 int fd;
477 bool autoload;
478 bool autoattach;
479 bool sym_global;
480 bool mark_btf_static;
481 enum bpf_prog_type type;
482 enum bpf_attach_type expected_attach_type;
483 int exception_cb_idx;
484
485 int prog_ifindex;
486 __u32 attach_btf_obj_fd;
487 __u32 attach_btf_id;
488 __u32 attach_prog_fd;
489
490 void *func_info;
491 __u32 func_info_rec_size;
492 __u32 func_info_cnt;
493
494 void *line_info;
495 __u32 line_info_rec_size;
496 __u32 line_info_cnt;
497 __u32 prog_flags;
498 };
499
500 struct bpf_struct_ops {
501 struct bpf_program **progs;
502 __u32 *kern_func_off;
503 /* e.g. struct tcp_congestion_ops in bpf_prog's btf format */
504 void *data;
505 /* e.g. struct bpf_struct_ops_tcp_congestion_ops in
506 * btf_vmlinux's format.
507 * struct bpf_struct_ops_tcp_congestion_ops {
508 * [... some other kernel fields ...]
509 * struct tcp_congestion_ops data;
510 * }
511 * kern_vdata-size == sizeof(struct bpf_struct_ops_tcp_congestion_ops)
512 * bpf_map__init_kern_struct_ops() will populate the "kern_vdata"
513 * from "data".
514 */
515 void *kern_vdata;
516 __u32 type_id;
517 };
518
519 #define DATA_SEC ".data"
520 #define BSS_SEC ".bss"
521 #define RODATA_SEC ".rodata"
522 #define KCONFIG_SEC ".kconfig"
523 #define KSYMS_SEC ".ksyms"
524 #define STRUCT_OPS_SEC ".struct_ops"
525 #define STRUCT_OPS_LINK_SEC ".struct_ops.link"
526 #define ARENA_SEC ".addr_space.1"
527
528 enum libbpf_map_type {
529 LIBBPF_MAP_UNSPEC,
530 LIBBPF_MAP_DATA,
531 LIBBPF_MAP_BSS,
532 LIBBPF_MAP_RODATA,
533 LIBBPF_MAP_KCONFIG,
534 };
535
536 struct bpf_map_def {
537 unsigned int type;
538 unsigned int key_size;
539 unsigned int value_size;
540 unsigned int max_entries;
541 unsigned int map_flags;
542 };
543
544 struct bpf_map {
545 struct bpf_object *obj;
546 char *name;
547 /* real_name is defined for special internal maps (.rodata*,
548 * .data*, .bss, .kconfig) and preserves their original ELF section
549 * name. This is important to be able to find corresponding BTF
550 * DATASEC information.
551 */
552 char *real_name;
553 int fd;
554 int sec_idx;
555 size_t sec_offset;
556 int map_ifindex;
557 int inner_map_fd;
558 struct bpf_map_def def;
559 __u32 numa_node;
560 __u32 btf_var_idx;
561 int mod_btf_fd;
562 __u32 btf_key_type_id;
563 __u32 btf_value_type_id;
564 __u32 btf_vmlinux_value_type_id;
565 enum libbpf_map_type libbpf_type;
566 void *mmaped;
567 struct bpf_struct_ops *st_ops;
568 struct bpf_map *inner_map;
569 void **init_slots;
570 int init_slots_sz;
571 char *pin_path;
572 bool pinned;
573 bool reused;
574 bool autocreate;
575 bool autoattach;
576 __u64 map_extra;
577 };
578
579 enum extern_type {
580 EXT_UNKNOWN,
581 EXT_KCFG,
582 EXT_KSYM,
583 };
584
585 enum kcfg_type {
586 KCFG_UNKNOWN,
587 KCFG_CHAR,
588 KCFG_BOOL,
589 KCFG_INT,
590 KCFG_TRISTATE,
591 KCFG_CHAR_ARR,
592 };
593
594 struct extern_desc {
595 enum extern_type type;
596 int sym_idx;
597 int btf_id;
598 int sec_btf_id;
599 char *name;
600 char *essent_name;
601 bool is_set;
602 bool is_weak;
603 union {
604 struct {
605 enum kcfg_type type;
606 int sz;
607 int align;
608 int data_off;
609 bool is_signed;
610 } kcfg;
611 struct {
612 unsigned long long addr;
613
614 /* target btf_id of the corresponding kernel var. */
615 int kernel_btf_obj_fd;
616 int kernel_btf_id;
617
618 /* local btf_id of the ksym extern's type. */
619 __u32 type_id;
620 /* BTF fd index to be patched in for insn->off, this is
621 * 0 for vmlinux BTF, index in obj->fd_array for module
622 * BTF
623 */
624 __s16 btf_fd_idx;
625 } ksym;
626 };
627 };
628
629 struct module_btf {
630 struct btf *btf;
631 char *name;
632 __u32 id;
633 int fd;
634 int fd_array_idx;
635 };
636
637 enum sec_type {
638 SEC_UNUSED = 0,
639 SEC_RELO,
640 SEC_BSS,
641 SEC_DATA,
642 SEC_RODATA,
643 SEC_ST_OPS,
644 };
645
646 struct elf_sec_desc {
647 enum sec_type sec_type;
648 Elf64_Shdr *shdr;
649 Elf_Data *data;
650 };
651
652 struct elf_state {
653 int fd;
654 const void *obj_buf;
655 size_t obj_buf_sz;
656 Elf *elf;
657 Elf64_Ehdr *ehdr;
658 Elf_Data *symbols;
659 Elf_Data *arena_data;
660 size_t shstrndx; /* section index for section name strings */
661 size_t strtabidx;
662 struct elf_sec_desc *secs;
663 size_t sec_cnt;
664 int btf_maps_shndx;
665 __u32 btf_maps_sec_btf_id;
666 int text_shndx;
667 int symbols_shndx;
668 bool has_st_ops;
669 int arena_data_shndx;
670 };
671
672 struct usdt_manager;
673
674 struct bpf_object {
675 char name[BPF_OBJ_NAME_LEN];
676 char license[64];
677 __u32 kern_version;
678
679 struct bpf_program *programs;
680 size_t nr_programs;
681 struct bpf_map *maps;
682 size_t nr_maps;
683 size_t maps_cap;
684
685 char *kconfig;
686 struct extern_desc *externs;
687 int nr_extern;
688 int kconfig_map_idx;
689
690 bool loaded;
691 bool has_subcalls;
692 bool has_rodata;
693
694 struct bpf_gen *gen_loader;
695
696 /* Information when doing ELF related work. Only valid if efile.elf is not NULL */
697 struct elf_state efile;
698
699 struct btf *btf;
700 struct btf_ext *btf_ext;
701
702 /* Parse and load BTF vmlinux if any of the programs in the object need
703 * it at load time.
704 */
705 struct btf *btf_vmlinux;
706 /* Path to the custom BTF to be used for BPF CO-RE relocations as an
707 * override for vmlinux BTF.
708 */
709 char *btf_custom_path;
710 /* vmlinux BTF override for CO-RE relocations */
711 struct btf *btf_vmlinux_override;
712 /* Lazily initialized kernel module BTFs */
713 struct module_btf *btf_modules;
714 bool btf_modules_loaded;
715 size_t btf_module_cnt;
716 size_t btf_module_cap;
717
718 /* optional log settings passed to BPF_BTF_LOAD and BPF_PROG_LOAD commands */
719 char *log_buf;
720 size_t log_size;
721 __u32 log_level;
722
723 int *fd_array;
724 size_t fd_array_cap;
725 size_t fd_array_cnt;
726
727 struct usdt_manager *usdt_man;
728
729 int arena_map_idx;
730 void *arena_data;
731 size_t arena_data_sz;
732
733 struct kern_feature_cache *feat_cache;
734 char *token_path;
735 int token_fd;
736
737 char path[];
738 };
739
740 static const char *elf_sym_str(const struct bpf_object *obj, size_t off);
741 static const char *elf_sec_str(const struct bpf_object *obj, size_t off);
742 static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx);
743 static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name);
744 static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn);
745 static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn);
746 static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn);
747 static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx);
748 static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx);
749
bpf_program__unload(struct bpf_program * prog)750 void bpf_program__unload(struct bpf_program *prog)
751 {
752 if (!prog)
753 return;
754
755 zclose(prog->fd);
756
757 zfree(&prog->func_info);
758 zfree(&prog->line_info);
759 }
760
bpf_program__exit(struct bpf_program * prog)761 static void bpf_program__exit(struct bpf_program *prog)
762 {
763 if (!prog)
764 return;
765
766 bpf_program__unload(prog);
767 zfree(&prog->name);
768 zfree(&prog->sec_name);
769 zfree(&prog->insns);
770 zfree(&prog->reloc_desc);
771
772 prog->nr_reloc = 0;
773 prog->insns_cnt = 0;
774 prog->sec_idx = -1;
775 }
776
insn_is_subprog_call(const struct bpf_insn * insn)777 static bool insn_is_subprog_call(const struct bpf_insn *insn)
778 {
779 return BPF_CLASS(insn->code) == BPF_JMP &&
780 BPF_OP(insn->code) == BPF_CALL &&
781 BPF_SRC(insn->code) == BPF_K &&
782 insn->src_reg == BPF_PSEUDO_CALL &&
783 insn->dst_reg == 0 &&
784 insn->off == 0;
785 }
786
is_call_insn(const struct bpf_insn * insn)787 static bool is_call_insn(const struct bpf_insn *insn)
788 {
789 return insn->code == (BPF_JMP | BPF_CALL);
790 }
791
insn_is_pseudo_func(struct bpf_insn * insn)792 static bool insn_is_pseudo_func(struct bpf_insn *insn)
793 {
794 return is_ldimm64_insn(insn) && insn->src_reg == BPF_PSEUDO_FUNC;
795 }
796
797 static int
bpf_object__init_prog(struct bpf_object * obj,struct bpf_program * prog,const char * name,size_t sec_idx,const char * sec_name,size_t sec_off,void * insn_data,size_t insn_data_sz)798 bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
799 const char *name, size_t sec_idx, const char *sec_name,
800 size_t sec_off, void *insn_data, size_t insn_data_sz)
801 {
802 if (insn_data_sz == 0 || insn_data_sz % BPF_INSN_SZ || sec_off % BPF_INSN_SZ) {
803 pr_warn("sec '%s': corrupted program '%s', offset %zu, size %zu\n",
804 sec_name, name, sec_off, insn_data_sz);
805 return -EINVAL;
806 }
807
808 memset(prog, 0, sizeof(*prog));
809 prog->obj = obj;
810
811 prog->sec_idx = sec_idx;
812 prog->sec_insn_off = sec_off / BPF_INSN_SZ;
813 prog->sec_insn_cnt = insn_data_sz / BPF_INSN_SZ;
814 /* insns_cnt can later be increased by appending used subprograms */
815 prog->insns_cnt = prog->sec_insn_cnt;
816
817 prog->type = BPF_PROG_TYPE_UNSPEC;
818 prog->fd = -1;
819 prog->exception_cb_idx = -1;
820
821 /* libbpf's convention for SEC("?abc...") is that it's just like
822 * SEC("abc...") but the corresponding bpf_program starts out with
823 * autoload set to false.
824 */
825 if (sec_name[0] == '?') {
826 prog->autoload = false;
827 /* from now on forget there was ? in section name */
828 sec_name++;
829 } else {
830 prog->autoload = true;
831 }
832
833 prog->autoattach = true;
834
835 /* inherit object's log_level */
836 prog->log_level = obj->log_level;
837
838 prog->sec_name = strdup(sec_name);
839 if (!prog->sec_name)
840 goto errout;
841
842 prog->name = strdup(name);
843 if (!prog->name)
844 goto errout;
845
846 prog->insns = malloc(insn_data_sz);
847 if (!prog->insns)
848 goto errout;
849 memcpy(prog->insns, insn_data, insn_data_sz);
850
851 return 0;
852 errout:
853 pr_warn("sec '%s': failed to allocate memory for prog '%s'\n", sec_name, name);
854 bpf_program__exit(prog);
855 return -ENOMEM;
856 }
857
858 static int
bpf_object__add_programs(struct bpf_object * obj,Elf_Data * sec_data,const char * sec_name,int sec_idx)859 bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
860 const char *sec_name, int sec_idx)
861 {
862 Elf_Data *symbols = obj->efile.symbols;
863 struct bpf_program *prog, *progs;
864 void *data = sec_data->d_buf;
865 size_t sec_sz = sec_data->d_size, sec_off, prog_sz, nr_syms;
866 int nr_progs, err, i;
867 const char *name;
868 Elf64_Sym *sym;
869
870 progs = obj->programs;
871 nr_progs = obj->nr_programs;
872 nr_syms = symbols->d_size / sizeof(Elf64_Sym);
873
874 for (i = 0; i < nr_syms; i++) {
875 sym = elf_sym_by_idx(obj, i);
876
877 if (sym->st_shndx != sec_idx)
878 continue;
879 if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
880 continue;
881
882 prog_sz = sym->st_size;
883 sec_off = sym->st_value;
884
885 name = elf_sym_str(obj, sym->st_name);
886 if (!name) {
887 pr_warn("sec '%s': failed to get symbol name for offset %zu\n",
888 sec_name, sec_off);
889 return -LIBBPF_ERRNO__FORMAT;
890 }
891
892 if (sec_off + prog_sz > sec_sz || sec_off + prog_sz < sec_off) {
893 pr_warn("sec '%s': program at offset %zu crosses section boundary\n",
894 sec_name, sec_off);
895 return -LIBBPF_ERRNO__FORMAT;
896 }
897
898 if (sec_idx != obj->efile.text_shndx && ELF64_ST_BIND(sym->st_info) == STB_LOCAL) {
899 pr_warn("sec '%s': program '%s' is static and not supported\n", sec_name, name);
900 return -ENOTSUP;
901 }
902
903 pr_debug("sec '%s': found program '%s' at insn offset %zu (%zu bytes), code size %zu insns (%zu bytes)\n",
904 sec_name, name, sec_off / BPF_INSN_SZ, sec_off, prog_sz / BPF_INSN_SZ, prog_sz);
905
906 progs = libbpf_reallocarray(progs, nr_progs + 1, sizeof(*progs));
907 if (!progs) {
908 /*
909 * In this case the original obj->programs
910 * is still valid, so don't need special treat for
911 * bpf_close_object().
912 */
913 pr_warn("sec '%s': failed to alloc memory for new program '%s'\n",
914 sec_name, name);
915 return -ENOMEM;
916 }
917 obj->programs = progs;
918
919 prog = &progs[nr_progs];
920
921 err = bpf_object__init_prog(obj, prog, name, sec_idx, sec_name,
922 sec_off, data + sec_off, prog_sz);
923 if (err)
924 return err;
925
926 if (ELF64_ST_BIND(sym->st_info) != STB_LOCAL)
927 prog->sym_global = true;
928
929 /* if function is a global/weak symbol, but has restricted
930 * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF FUNC
931 * as static to enable more permissive BPF verification mode
932 * with more outside context available to BPF verifier
933 */
934 if (prog->sym_global && (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN
935 || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL))
936 prog->mark_btf_static = true;
937
938 nr_progs++;
939 obj->nr_programs = nr_progs;
940 }
941
942 return 0;
943 }
944
945 static const struct btf_member *
find_member_by_offset(const struct btf_type * t,__u32 bit_offset)946 find_member_by_offset(const struct btf_type *t, __u32 bit_offset)
947 {
948 struct btf_member *m;
949 int i;
950
951 for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
952 if (btf_member_bit_offset(t, i) == bit_offset)
953 return m;
954 }
955
956 return NULL;
957 }
958
959 static const struct btf_member *
find_member_by_name(const struct btf * btf,const struct btf_type * t,const char * name)960 find_member_by_name(const struct btf *btf, const struct btf_type *t,
961 const char *name)
962 {
963 struct btf_member *m;
964 int i;
965
966 for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
967 if (!strcmp(btf__name_by_offset(btf, m->name_off), name))
968 return m;
969 }
970
971 return NULL;
972 }
973
974 static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name,
975 __u16 kind, struct btf **res_btf,
976 struct module_btf **res_mod_btf);
977
978 #define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_"
979 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
980 const char *name, __u32 kind);
981
982 static int
find_struct_ops_kern_types(struct bpf_object * obj,const char * tname_raw,struct module_btf ** mod_btf,const struct btf_type ** type,__u32 * type_id,const struct btf_type ** vtype,__u32 * vtype_id,const struct btf_member ** data_member)983 find_struct_ops_kern_types(struct bpf_object *obj, const char *tname_raw,
984 struct module_btf **mod_btf,
985 const struct btf_type **type, __u32 *type_id,
986 const struct btf_type **vtype, __u32 *vtype_id,
987 const struct btf_member **data_member)
988 {
989 const struct btf_type *kern_type, *kern_vtype;
990 const struct btf_member *kern_data_member;
991 struct btf *btf = NULL;
992 __s32 kern_vtype_id, kern_type_id;
993 char tname[256];
994 __u32 i;
995
996 snprintf(tname, sizeof(tname), "%.*s",
997 (int)bpf_core_essential_name_len(tname_raw), tname_raw);
998
999 kern_type_id = find_ksym_btf_id(obj, tname, BTF_KIND_STRUCT,
1000 &btf, mod_btf);
1001 if (kern_type_id < 0) {
1002 pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n",
1003 tname);
1004 return kern_type_id;
1005 }
1006 kern_type = btf__type_by_id(btf, kern_type_id);
1007
1008 /* Find the corresponding "map_value" type that will be used
1009 * in map_update(BPF_MAP_TYPE_STRUCT_OPS). For example,
1010 * find "struct bpf_struct_ops_tcp_congestion_ops" from the
1011 * btf_vmlinux.
1012 */
1013 kern_vtype_id = find_btf_by_prefix_kind(btf, STRUCT_OPS_VALUE_PREFIX,
1014 tname, BTF_KIND_STRUCT);
1015 if (kern_vtype_id < 0) {
1016 pr_warn("struct_ops init_kern: struct %s%s is not found in kernel BTF\n",
1017 STRUCT_OPS_VALUE_PREFIX, tname);
1018 return kern_vtype_id;
1019 }
1020 kern_vtype = btf__type_by_id(btf, kern_vtype_id);
1021
1022 /* Find "struct tcp_congestion_ops" from
1023 * struct bpf_struct_ops_tcp_congestion_ops {
1024 * [ ... ]
1025 * struct tcp_congestion_ops data;
1026 * }
1027 */
1028 kern_data_member = btf_members(kern_vtype);
1029 for (i = 0; i < btf_vlen(kern_vtype); i++, kern_data_member++) {
1030 if (kern_data_member->type == kern_type_id)
1031 break;
1032 }
1033 if (i == btf_vlen(kern_vtype)) {
1034 pr_warn("struct_ops init_kern: struct %s data is not found in struct %s%s\n",
1035 tname, STRUCT_OPS_VALUE_PREFIX, tname);
1036 return -EINVAL;
1037 }
1038
1039 *type = kern_type;
1040 *type_id = kern_type_id;
1041 *vtype = kern_vtype;
1042 *vtype_id = kern_vtype_id;
1043 *data_member = kern_data_member;
1044
1045 return 0;
1046 }
1047
bpf_map__is_struct_ops(const struct bpf_map * map)1048 static bool bpf_map__is_struct_ops(const struct bpf_map *map)
1049 {
1050 return map->def.type == BPF_MAP_TYPE_STRUCT_OPS;
1051 }
1052
is_valid_st_ops_program(struct bpf_object * obj,const struct bpf_program * prog)1053 static bool is_valid_st_ops_program(struct bpf_object *obj,
1054 const struct bpf_program *prog)
1055 {
1056 int i;
1057
1058 for (i = 0; i < obj->nr_programs; i++) {
1059 if (&obj->programs[i] == prog)
1060 return prog->type == BPF_PROG_TYPE_STRUCT_OPS;
1061 }
1062
1063 return false;
1064 }
1065
1066 /* For each struct_ops program P, referenced from some struct_ops map M,
1067 * enable P.autoload if there are Ms for which M.autocreate is true,
1068 * disable P.autoload if for all Ms M.autocreate is false.
1069 * Don't change P.autoload for programs that are not referenced from any maps.
1070 */
bpf_object_adjust_struct_ops_autoload(struct bpf_object * obj)1071 static int bpf_object_adjust_struct_ops_autoload(struct bpf_object *obj)
1072 {
1073 struct bpf_program *prog, *slot_prog;
1074 struct bpf_map *map;
1075 int i, j, k, vlen;
1076
1077 for (i = 0; i < obj->nr_programs; ++i) {
1078 int should_load = false;
1079 int use_cnt = 0;
1080
1081 prog = &obj->programs[i];
1082 if (prog->type != BPF_PROG_TYPE_STRUCT_OPS)
1083 continue;
1084
1085 for (j = 0; j < obj->nr_maps; ++j) {
1086 const struct btf_type *type;
1087
1088 map = &obj->maps[j];
1089 if (!bpf_map__is_struct_ops(map))
1090 continue;
1091
1092 type = btf__type_by_id(obj->btf, map->st_ops->type_id);
1093 vlen = btf_vlen(type);
1094 for (k = 0; k < vlen; ++k) {
1095 slot_prog = map->st_ops->progs[k];
1096 if (prog != slot_prog)
1097 continue;
1098
1099 use_cnt++;
1100 if (map->autocreate)
1101 should_load = true;
1102 }
1103 }
1104 if (use_cnt)
1105 prog->autoload = should_load;
1106 }
1107
1108 return 0;
1109 }
1110
1111 /* Init the map's fields that depend on kern_btf */
bpf_map__init_kern_struct_ops(struct bpf_map * map)1112 static int bpf_map__init_kern_struct_ops(struct bpf_map *map)
1113 {
1114 const struct btf_member *member, *kern_member, *kern_data_member;
1115 const struct btf_type *type, *kern_type, *kern_vtype;
1116 __u32 i, kern_type_id, kern_vtype_id, kern_data_off;
1117 struct bpf_object *obj = map->obj;
1118 const struct btf *btf = obj->btf;
1119 struct bpf_struct_ops *st_ops;
1120 const struct btf *kern_btf;
1121 struct module_btf *mod_btf = NULL;
1122 void *data, *kern_data;
1123 const char *tname;
1124 int err;
1125
1126 st_ops = map->st_ops;
1127 type = btf__type_by_id(btf, st_ops->type_id);
1128 tname = btf__name_by_offset(btf, type->name_off);
1129 err = find_struct_ops_kern_types(obj, tname, &mod_btf,
1130 &kern_type, &kern_type_id,
1131 &kern_vtype, &kern_vtype_id,
1132 &kern_data_member);
1133 if (err)
1134 return err;
1135
1136 kern_btf = mod_btf ? mod_btf->btf : obj->btf_vmlinux;
1137
1138 pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n",
1139 map->name, st_ops->type_id, kern_type_id, kern_vtype_id);
1140
1141 map->mod_btf_fd = mod_btf ? mod_btf->fd : -1;
1142 map->def.value_size = kern_vtype->size;
1143 map->btf_vmlinux_value_type_id = kern_vtype_id;
1144
1145 st_ops->kern_vdata = calloc(1, kern_vtype->size);
1146 if (!st_ops->kern_vdata)
1147 return -ENOMEM;
1148
1149 data = st_ops->data;
1150 kern_data_off = kern_data_member->offset / 8;
1151 kern_data = st_ops->kern_vdata + kern_data_off;
1152
1153 member = btf_members(type);
1154 for (i = 0; i < btf_vlen(type); i++, member++) {
1155 const struct btf_type *mtype, *kern_mtype;
1156 __u32 mtype_id, kern_mtype_id;
1157 void *mdata, *kern_mdata;
1158 struct bpf_program *prog;
1159 __s64 msize, kern_msize;
1160 __u32 moff, kern_moff;
1161 __u32 kern_member_idx;
1162 const char *mname;
1163
1164 mname = btf__name_by_offset(btf, member->name_off);
1165 moff = member->offset / 8;
1166 mdata = data + moff;
1167 msize = btf__resolve_size(btf, member->type);
1168 if (msize < 0) {
1169 pr_warn("struct_ops init_kern %s: failed to resolve the size of member %s\n",
1170 map->name, mname);
1171 return msize;
1172 }
1173
1174 kern_member = find_member_by_name(kern_btf, kern_type, mname);
1175 if (!kern_member) {
1176 if (!libbpf_is_mem_zeroed(mdata, msize)) {
1177 pr_warn("struct_ops init_kern %s: Cannot find member %s in kernel BTF\n",
1178 map->name, mname);
1179 return -ENOTSUP;
1180 }
1181
1182 if (st_ops->progs[i]) {
1183 /* If we had declaratively set struct_ops callback, we need to
1184 * force its autoload to false, because it doesn't have
1185 * a chance of succeeding from POV of the current struct_ops map.
1186 * If this program is still referenced somewhere else, though,
1187 * then bpf_object_adjust_struct_ops_autoload() will update its
1188 * autoload accordingly.
1189 */
1190 st_ops->progs[i]->autoload = false;
1191 st_ops->progs[i] = NULL;
1192 }
1193
1194 /* Skip all-zero/NULL fields if they are not present in the kernel BTF */
1195 pr_info("struct_ops %s: member %s not found in kernel, skipping it as it's set to zero\n",
1196 map->name, mname);
1197 continue;
1198 }
1199
1200 kern_member_idx = kern_member - btf_members(kern_type);
1201 if (btf_member_bitfield_size(type, i) ||
1202 btf_member_bitfield_size(kern_type, kern_member_idx)) {
1203 pr_warn("struct_ops init_kern %s: bitfield %s is not supported\n",
1204 map->name, mname);
1205 return -ENOTSUP;
1206 }
1207
1208 kern_moff = kern_member->offset / 8;
1209 kern_mdata = kern_data + kern_moff;
1210
1211 mtype = skip_mods_and_typedefs(btf, member->type, &mtype_id);
1212 kern_mtype = skip_mods_and_typedefs(kern_btf, kern_member->type,
1213 &kern_mtype_id);
1214 if (BTF_INFO_KIND(mtype->info) !=
1215 BTF_INFO_KIND(kern_mtype->info)) {
1216 pr_warn("struct_ops init_kern %s: Unmatched member type %s %u != %u(kernel)\n",
1217 map->name, mname, BTF_INFO_KIND(mtype->info),
1218 BTF_INFO_KIND(kern_mtype->info));
1219 return -ENOTSUP;
1220 }
1221
1222 if (btf_is_ptr(mtype)) {
1223 prog = *(void **)mdata;
1224 /* just like for !kern_member case above, reset declaratively
1225 * set (at compile time) program's autload to false,
1226 * if user replaced it with another program or NULL
1227 */
1228 if (st_ops->progs[i] && st_ops->progs[i] != prog)
1229 st_ops->progs[i]->autoload = false;
1230
1231 /* Update the value from the shadow type */
1232 st_ops->progs[i] = prog;
1233 if (!prog)
1234 continue;
1235
1236 if (!is_valid_st_ops_program(obj, prog)) {
1237 pr_warn("struct_ops init_kern %s: member %s is not a struct_ops program\n",
1238 map->name, mname);
1239 return -ENOTSUP;
1240 }
1241
1242 kern_mtype = skip_mods_and_typedefs(kern_btf,
1243 kern_mtype->type,
1244 &kern_mtype_id);
1245
1246 /* mtype->type must be a func_proto which was
1247 * guaranteed in bpf_object__collect_st_ops_relos(),
1248 * so only check kern_mtype for func_proto here.
1249 */
1250 if (!btf_is_func_proto(kern_mtype)) {
1251 pr_warn("struct_ops init_kern %s: kernel member %s is not a func ptr\n",
1252 map->name, mname);
1253 return -ENOTSUP;
1254 }
1255
1256 if (mod_btf)
1257 prog->attach_btf_obj_fd = mod_btf->fd;
1258
1259 /* if we haven't yet processed this BPF program, record proper
1260 * attach_btf_id and member_idx
1261 */
1262 if (!prog->attach_btf_id) {
1263 prog->attach_btf_id = kern_type_id;
1264 prog->expected_attach_type = kern_member_idx;
1265 }
1266
1267 /* struct_ops BPF prog can be re-used between multiple
1268 * .struct_ops & .struct_ops.link as long as it's the
1269 * same struct_ops struct definition and the same
1270 * function pointer field
1271 */
1272 if (prog->attach_btf_id != kern_type_id) {
1273 pr_warn("struct_ops init_kern %s func ptr %s: invalid reuse of prog %s in sec %s with type %u: attach_btf_id %u != kern_type_id %u\n",
1274 map->name, mname, prog->name, prog->sec_name, prog->type,
1275 prog->attach_btf_id, kern_type_id);
1276 return -EINVAL;
1277 }
1278 if (prog->expected_attach_type != kern_member_idx) {
1279 pr_warn("struct_ops init_kern %s func ptr %s: invalid reuse of prog %s in sec %s with type %u: expected_attach_type %u != kern_member_idx %u\n",
1280 map->name, mname, prog->name, prog->sec_name, prog->type,
1281 prog->expected_attach_type, kern_member_idx);
1282 return -EINVAL;
1283 }
1284
1285 st_ops->kern_func_off[i] = kern_data_off + kern_moff;
1286
1287 pr_debug("struct_ops init_kern %s: func ptr %s is set to prog %s from data(+%u) to kern_data(+%u)\n",
1288 map->name, mname, prog->name, moff,
1289 kern_moff);
1290
1291 continue;
1292 }
1293
1294 kern_msize = btf__resolve_size(kern_btf, kern_mtype_id);
1295 if (kern_msize < 0 || msize != kern_msize) {
1296 pr_warn("struct_ops init_kern %s: Error in size of member %s: %zd != %zd(kernel)\n",
1297 map->name, mname, (ssize_t)msize,
1298 (ssize_t)kern_msize);
1299 return -ENOTSUP;
1300 }
1301
1302 pr_debug("struct_ops init_kern %s: copy %s %u bytes from data(+%u) to kern_data(+%u)\n",
1303 map->name, mname, (unsigned int)msize,
1304 moff, kern_moff);
1305 memcpy(kern_mdata, mdata, msize);
1306 }
1307
1308 return 0;
1309 }
1310
bpf_object__init_kern_struct_ops_maps(struct bpf_object * obj)1311 static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj)
1312 {
1313 struct bpf_map *map;
1314 size_t i;
1315 int err;
1316
1317 for (i = 0; i < obj->nr_maps; i++) {
1318 map = &obj->maps[i];
1319
1320 if (!bpf_map__is_struct_ops(map))
1321 continue;
1322
1323 if (!map->autocreate)
1324 continue;
1325
1326 err = bpf_map__init_kern_struct_ops(map);
1327 if (err)
1328 return err;
1329 }
1330
1331 return 0;
1332 }
1333
init_struct_ops_maps(struct bpf_object * obj,const char * sec_name,int shndx,Elf_Data * data)1334 static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name,
1335 int shndx, Elf_Data *data)
1336 {
1337 const struct btf_type *type, *datasec;
1338 const struct btf_var_secinfo *vsi;
1339 struct bpf_struct_ops *st_ops;
1340 const char *tname, *var_name;
1341 __s32 type_id, datasec_id;
1342 const struct btf *btf;
1343 struct bpf_map *map;
1344 __u32 i;
1345
1346 if (shndx == -1)
1347 return 0;
1348
1349 btf = obj->btf;
1350 datasec_id = btf__find_by_name_kind(btf, sec_name,
1351 BTF_KIND_DATASEC);
1352 if (datasec_id < 0) {
1353 pr_warn("struct_ops init: DATASEC %s not found\n",
1354 sec_name);
1355 return -EINVAL;
1356 }
1357
1358 datasec = btf__type_by_id(btf, datasec_id);
1359 vsi = btf_var_secinfos(datasec);
1360 for (i = 0; i < btf_vlen(datasec); i++, vsi++) {
1361 type = btf__type_by_id(obj->btf, vsi->type);
1362 var_name = btf__name_by_offset(obj->btf, type->name_off);
1363
1364 type_id = btf__resolve_type(obj->btf, vsi->type);
1365 if (type_id < 0) {
1366 pr_warn("struct_ops init: Cannot resolve var type_id %u in DATASEC %s\n",
1367 vsi->type, sec_name);
1368 return -EINVAL;
1369 }
1370
1371 type = btf__type_by_id(obj->btf, type_id);
1372 tname = btf__name_by_offset(obj->btf, type->name_off);
1373 if (!tname[0]) {
1374 pr_warn("struct_ops init: anonymous type is not supported\n");
1375 return -ENOTSUP;
1376 }
1377 if (!btf_is_struct(type)) {
1378 pr_warn("struct_ops init: %s is not a struct\n", tname);
1379 return -EINVAL;
1380 }
1381
1382 map = bpf_object__add_map(obj);
1383 if (IS_ERR(map))
1384 return PTR_ERR(map);
1385
1386 map->sec_idx = shndx;
1387 map->sec_offset = vsi->offset;
1388 map->name = strdup(var_name);
1389 if (!map->name)
1390 return -ENOMEM;
1391 map->btf_value_type_id = type_id;
1392
1393 /* Follow same convention as for programs autoload:
1394 * SEC("?.struct_ops") means map is not created by default.
1395 */
1396 if (sec_name[0] == '?') {
1397 map->autocreate = false;
1398 /* from now on forget there was ? in section name */
1399 sec_name++;
1400 }
1401
1402 map->def.type = BPF_MAP_TYPE_STRUCT_OPS;
1403 map->def.key_size = sizeof(int);
1404 map->def.value_size = type->size;
1405 map->def.max_entries = 1;
1406 map->def.map_flags = strcmp(sec_name, STRUCT_OPS_LINK_SEC) == 0 ? BPF_F_LINK : 0;
1407 map->autoattach = true;
1408
1409 map->st_ops = calloc(1, sizeof(*map->st_ops));
1410 if (!map->st_ops)
1411 return -ENOMEM;
1412 st_ops = map->st_ops;
1413 st_ops->data = malloc(type->size);
1414 st_ops->progs = calloc(btf_vlen(type), sizeof(*st_ops->progs));
1415 st_ops->kern_func_off = malloc(btf_vlen(type) *
1416 sizeof(*st_ops->kern_func_off));
1417 if (!st_ops->data || !st_ops->progs || !st_ops->kern_func_off)
1418 return -ENOMEM;
1419
1420 if (vsi->offset + type->size > data->d_size) {
1421 pr_warn("struct_ops init: var %s is beyond the end of DATASEC %s\n",
1422 var_name, sec_name);
1423 return -EINVAL;
1424 }
1425
1426 memcpy(st_ops->data,
1427 data->d_buf + vsi->offset,
1428 type->size);
1429 st_ops->type_id = type_id;
1430
1431 pr_debug("struct_ops init: struct %s(type_id=%u) %s found at offset %u\n",
1432 tname, type_id, var_name, vsi->offset);
1433 }
1434
1435 return 0;
1436 }
1437
bpf_object_init_struct_ops(struct bpf_object * obj)1438 static int bpf_object_init_struct_ops(struct bpf_object *obj)
1439 {
1440 const char *sec_name;
1441 int sec_idx, err;
1442
1443 for (sec_idx = 0; sec_idx < obj->efile.sec_cnt; ++sec_idx) {
1444 struct elf_sec_desc *desc = &obj->efile.secs[sec_idx];
1445
1446 if (desc->sec_type != SEC_ST_OPS)
1447 continue;
1448
1449 sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
1450 if (!sec_name)
1451 return -LIBBPF_ERRNO__FORMAT;
1452
1453 err = init_struct_ops_maps(obj, sec_name, sec_idx, desc->data);
1454 if (err)
1455 return err;
1456 }
1457
1458 return 0;
1459 }
1460
bpf_object__new(const char * path,const void * obj_buf,size_t obj_buf_sz,const char * obj_name)1461 static struct bpf_object *bpf_object__new(const char *path,
1462 const void *obj_buf,
1463 size_t obj_buf_sz,
1464 const char *obj_name)
1465 {
1466 struct bpf_object *obj;
1467 char *end;
1468
1469 obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1);
1470 if (!obj) {
1471 pr_warn("alloc memory failed for %s\n", path);
1472 return ERR_PTR(-ENOMEM);
1473 }
1474
1475 strcpy(obj->path, path);
1476 if (obj_name) {
1477 libbpf_strlcpy(obj->name, obj_name, sizeof(obj->name));
1478 } else {
1479 /* Using basename() GNU version which doesn't modify arg. */
1480 libbpf_strlcpy(obj->name, basename((void *)path), sizeof(obj->name));
1481 end = strchr(obj->name, '.');
1482 if (end)
1483 *end = 0;
1484 }
1485
1486 obj->efile.fd = -1;
1487 /*
1488 * Caller of this function should also call
1489 * bpf_object__elf_finish() after data collection to return
1490 * obj_buf to user. If not, we should duplicate the buffer to
1491 * avoid user freeing them before elf finish.
1492 */
1493 obj->efile.obj_buf = obj_buf;
1494 obj->efile.obj_buf_sz = obj_buf_sz;
1495 obj->efile.btf_maps_shndx = -1;
1496 obj->kconfig_map_idx = -1;
1497 obj->arena_map_idx = -1;
1498
1499 obj->kern_version = get_kernel_version();
1500 obj->loaded = false;
1501
1502 return obj;
1503 }
1504
bpf_object__elf_finish(struct bpf_object * obj)1505 static void bpf_object__elf_finish(struct bpf_object *obj)
1506 {
1507 if (!obj->efile.elf)
1508 return;
1509
1510 elf_end(obj->efile.elf);
1511 obj->efile.elf = NULL;
1512 obj->efile.symbols = NULL;
1513 obj->efile.arena_data = NULL;
1514
1515 zfree(&obj->efile.secs);
1516 obj->efile.sec_cnt = 0;
1517 zclose(obj->efile.fd);
1518 obj->efile.obj_buf = NULL;
1519 obj->efile.obj_buf_sz = 0;
1520 }
1521
bpf_object__elf_init(struct bpf_object * obj)1522 static int bpf_object__elf_init(struct bpf_object *obj)
1523 {
1524 Elf64_Ehdr *ehdr;
1525 int err = 0;
1526 Elf *elf;
1527
1528 if (obj->efile.elf) {
1529 pr_warn("elf: init internal error\n");
1530 return -LIBBPF_ERRNO__LIBELF;
1531 }
1532
1533 if (obj->efile.obj_buf_sz > 0) {
1534 /* obj_buf should have been validated by bpf_object__open_mem(). */
1535 elf = elf_memory((char *)obj->efile.obj_buf, obj->efile.obj_buf_sz);
1536 } else {
1537 obj->efile.fd = open(obj->path, O_RDONLY | O_CLOEXEC);
1538 if (obj->efile.fd < 0) {
1539 char errmsg[STRERR_BUFSIZE], *cp;
1540
1541 err = -errno;
1542 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
1543 pr_warn("elf: failed to open %s: %s\n", obj->path, cp);
1544 return err;
1545 }
1546
1547 elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL);
1548 }
1549
1550 if (!elf) {
1551 pr_warn("elf: failed to open %s as ELF file: %s\n", obj->path, elf_errmsg(-1));
1552 err = -LIBBPF_ERRNO__LIBELF;
1553 goto errout;
1554 }
1555
1556 obj->efile.elf = elf;
1557
1558 if (elf_kind(elf) != ELF_K_ELF) {
1559 err = -LIBBPF_ERRNO__FORMAT;
1560 pr_warn("elf: '%s' is not a proper ELF object\n", obj->path);
1561 goto errout;
1562 }
1563
1564 if (gelf_getclass(elf) != ELFCLASS64) {
1565 err = -LIBBPF_ERRNO__FORMAT;
1566 pr_warn("elf: '%s' is not a 64-bit ELF object\n", obj->path);
1567 goto errout;
1568 }
1569
1570 obj->efile.ehdr = ehdr = elf64_getehdr(elf);
1571 if (!obj->efile.ehdr) {
1572 pr_warn("elf: failed to get ELF header from %s: %s\n", obj->path, elf_errmsg(-1));
1573 err = -LIBBPF_ERRNO__FORMAT;
1574 goto errout;
1575 }
1576
1577 if (elf_getshdrstrndx(elf, &obj->efile.shstrndx)) {
1578 pr_warn("elf: failed to get section names section index for %s: %s\n",
1579 obj->path, elf_errmsg(-1));
1580 err = -LIBBPF_ERRNO__FORMAT;
1581 goto errout;
1582 }
1583
1584 /* ELF is corrupted/truncated, avoid calling elf_strptr. */
1585 if (!elf_rawdata(elf_getscn(elf, obj->efile.shstrndx), NULL)) {
1586 pr_warn("elf: failed to get section names strings from %s: %s\n",
1587 obj->path, elf_errmsg(-1));
1588 err = -LIBBPF_ERRNO__FORMAT;
1589 goto errout;
1590 }
1591
1592 /* Old LLVM set e_machine to EM_NONE */
1593 if (ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF)) {
1594 pr_warn("elf: %s is not a valid eBPF object file\n", obj->path);
1595 err = -LIBBPF_ERRNO__FORMAT;
1596 goto errout;
1597 }
1598
1599 return 0;
1600 errout:
1601 bpf_object__elf_finish(obj);
1602 return err;
1603 }
1604
bpf_object__check_endianness(struct bpf_object * obj)1605 static int bpf_object__check_endianness(struct bpf_object *obj)
1606 {
1607 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
1608 if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
1609 return 0;
1610 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
1611 if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
1612 return 0;
1613 #else
1614 # error "Unrecognized __BYTE_ORDER__"
1615 #endif
1616 pr_warn("elf: endianness mismatch in %s.\n", obj->path);
1617 return -LIBBPF_ERRNO__ENDIAN;
1618 }
1619
1620 static int
bpf_object__init_license(struct bpf_object * obj,void * data,size_t size)1621 bpf_object__init_license(struct bpf_object *obj, void *data, size_t size)
1622 {
1623 if (!data) {
1624 pr_warn("invalid license section in %s\n", obj->path);
1625 return -LIBBPF_ERRNO__FORMAT;
1626 }
1627 /* libbpf_strlcpy() only copies first N - 1 bytes, so size + 1 won't
1628 * go over allowed ELF data section buffer
1629 */
1630 libbpf_strlcpy(obj->license, data, min(size + 1, sizeof(obj->license)));
1631 pr_debug("license of %s is %s\n", obj->path, obj->license);
1632 return 0;
1633 }
1634
1635 static int
bpf_object__init_kversion(struct bpf_object * obj,void * data,size_t size)1636 bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size)
1637 {
1638 __u32 kver;
1639
1640 if (!data || size != sizeof(kver)) {
1641 pr_warn("invalid kver section in %s\n", obj->path);
1642 return -LIBBPF_ERRNO__FORMAT;
1643 }
1644 memcpy(&kver, data, sizeof(kver));
1645 obj->kern_version = kver;
1646 pr_debug("kernel version of %s is %x\n", obj->path, obj->kern_version);
1647 return 0;
1648 }
1649
bpf_map_type__is_map_in_map(enum bpf_map_type type)1650 static bool bpf_map_type__is_map_in_map(enum bpf_map_type type)
1651 {
1652 if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
1653 type == BPF_MAP_TYPE_HASH_OF_MAPS)
1654 return true;
1655 return false;
1656 }
1657
find_elf_sec_sz(const struct bpf_object * obj,const char * name,__u32 * size)1658 static int find_elf_sec_sz(const struct bpf_object *obj, const char *name, __u32 *size)
1659 {
1660 Elf_Data *data;
1661 Elf_Scn *scn;
1662
1663 if (!name)
1664 return -EINVAL;
1665
1666 scn = elf_sec_by_name(obj, name);
1667 data = elf_sec_data(obj, scn);
1668 if (data) {
1669 *size = data->d_size;
1670 return 0; /* found it */
1671 }
1672
1673 return -ENOENT;
1674 }
1675
find_elf_var_sym(const struct bpf_object * obj,const char * name)1676 static Elf64_Sym *find_elf_var_sym(const struct bpf_object *obj, const char *name)
1677 {
1678 Elf_Data *symbols = obj->efile.symbols;
1679 const char *sname;
1680 size_t si;
1681
1682 for (si = 0; si < symbols->d_size / sizeof(Elf64_Sym); si++) {
1683 Elf64_Sym *sym = elf_sym_by_idx(obj, si);
1684
1685 if (ELF64_ST_TYPE(sym->st_info) != STT_OBJECT)
1686 continue;
1687
1688 if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL &&
1689 ELF64_ST_BIND(sym->st_info) != STB_WEAK)
1690 continue;
1691
1692 sname = elf_sym_str(obj, sym->st_name);
1693 if (!sname) {
1694 pr_warn("failed to get sym name string for var %s\n", name);
1695 return ERR_PTR(-EIO);
1696 }
1697 if (strcmp(name, sname) == 0)
1698 return sym;
1699 }
1700
1701 return ERR_PTR(-ENOENT);
1702 }
1703
1704 /* Some versions of Android don't provide memfd_create() in their libc
1705 * implementation, so avoid complications and just go straight to Linux
1706 * syscall.
1707 */
sys_memfd_create(const char * name,unsigned flags)1708 static int sys_memfd_create(const char *name, unsigned flags)
1709 {
1710 return syscall(__NR_memfd_create, name, flags);
1711 }
1712
1713 #ifndef MFD_CLOEXEC
1714 #define MFD_CLOEXEC 0x0001U
1715 #endif
1716
create_placeholder_fd(void)1717 static int create_placeholder_fd(void)
1718 {
1719 int fd;
1720
1721 fd = ensure_good_fd(sys_memfd_create("libbpf-placeholder-fd", MFD_CLOEXEC));
1722 if (fd < 0)
1723 return -errno;
1724 return fd;
1725 }
1726
bpf_object__add_map(struct bpf_object * obj)1727 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
1728 {
1729 struct bpf_map *map;
1730 int err;
1731
1732 err = libbpf_ensure_mem((void **)&obj->maps, &obj->maps_cap,
1733 sizeof(*obj->maps), obj->nr_maps + 1);
1734 if (err)
1735 return ERR_PTR(err);
1736
1737 map = &obj->maps[obj->nr_maps++];
1738 map->obj = obj;
1739 /* Preallocate map FD without actually creating BPF map just yet.
1740 * These map FD "placeholders" will be reused later without changing
1741 * FD value when map is actually created in the kernel.
1742 *
1743 * This is useful to be able to perform BPF program relocations
1744 * without having to create BPF maps before that step. This allows us
1745 * to finalize and load BTF very late in BPF object's loading phase,
1746 * right before BPF maps have to be created and BPF programs have to
1747 * be loaded. By having these map FD placeholders we can perform all
1748 * the sanitizations, relocations, and any other adjustments before we
1749 * start creating actual BPF kernel objects (BTF, maps, progs).
1750 */
1751 map->fd = create_placeholder_fd();
1752 if (map->fd < 0)
1753 return ERR_PTR(map->fd);
1754 map->inner_map_fd = -1;
1755 map->autocreate = true;
1756
1757 return map;
1758 }
1759
array_map_mmap_sz(unsigned int value_sz,unsigned int max_entries)1760 static size_t array_map_mmap_sz(unsigned int value_sz, unsigned int max_entries)
1761 {
1762 const long page_sz = sysconf(_SC_PAGE_SIZE);
1763 size_t map_sz;
1764
1765 map_sz = (size_t)roundup(value_sz, 8) * max_entries;
1766 map_sz = roundup(map_sz, page_sz);
1767 return map_sz;
1768 }
1769
bpf_map_mmap_sz(const struct bpf_map * map)1770 static size_t bpf_map_mmap_sz(const struct bpf_map *map)
1771 {
1772 const long page_sz = sysconf(_SC_PAGE_SIZE);
1773
1774 switch (map->def.type) {
1775 case BPF_MAP_TYPE_ARRAY:
1776 return array_map_mmap_sz(map->def.value_size, map->def.max_entries);
1777 case BPF_MAP_TYPE_ARENA:
1778 return page_sz * map->def.max_entries;
1779 default:
1780 return 0; /* not supported */
1781 }
1782 }
1783
bpf_map_mmap_resize(struct bpf_map * map,size_t old_sz,size_t new_sz)1784 static int bpf_map_mmap_resize(struct bpf_map *map, size_t old_sz, size_t new_sz)
1785 {
1786 void *mmaped;
1787
1788 if (!map->mmaped)
1789 return -EINVAL;
1790
1791 if (old_sz == new_sz)
1792 return 0;
1793
1794 mmaped = mmap(NULL, new_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
1795 if (mmaped == MAP_FAILED)
1796 return -errno;
1797
1798 memcpy(mmaped, map->mmaped, min(old_sz, new_sz));
1799 munmap(map->mmaped, old_sz);
1800 map->mmaped = mmaped;
1801 return 0;
1802 }
1803
internal_map_name(struct bpf_object * obj,const char * real_name)1804 static char *internal_map_name(struct bpf_object *obj, const char *real_name)
1805 {
1806 char map_name[BPF_OBJ_NAME_LEN], *p;
1807 int pfx_len, sfx_len = max((size_t)7, strlen(real_name));
1808
1809 /* This is one of the more confusing parts of libbpf for various
1810 * reasons, some of which are historical. The original idea for naming
1811 * internal names was to include as much of BPF object name prefix as
1812 * possible, so that it can be distinguished from similar internal
1813 * maps of a different BPF object.
1814 * As an example, let's say we have bpf_object named 'my_object_name'
1815 * and internal map corresponding to '.rodata' ELF section. The final
1816 * map name advertised to user and to the kernel will be
1817 * 'my_objec.rodata', taking first 8 characters of object name and
1818 * entire 7 characters of '.rodata'.
1819 * Somewhat confusingly, if internal map ELF section name is shorter
1820 * than 7 characters, e.g., '.bss', we still reserve 7 characters
1821 * for the suffix, even though we only have 4 actual characters, and
1822 * resulting map will be called 'my_objec.bss', not even using all 15
1823 * characters allowed by the kernel. Oh well, at least the truncated
1824 * object name is somewhat consistent in this case. But if the map
1825 * name is '.kconfig', we'll still have entirety of '.kconfig' added
1826 * (8 chars) and thus will be left with only first 7 characters of the
1827 * object name ('my_obje'). Happy guessing, user, that the final map
1828 * name will be "my_obje.kconfig".
1829 * Now, with libbpf starting to support arbitrarily named .rodata.*
1830 * and .data.* data sections, it's possible that ELF section name is
1831 * longer than allowed 15 chars, so we now need to be careful to take
1832 * only up to 15 first characters of ELF name, taking no BPF object
1833 * name characters at all. So '.rodata.abracadabra' will result in
1834 * '.rodata.abracad' kernel and user-visible name.
1835 * We need to keep this convoluted logic intact for .data, .bss and
1836 * .rodata maps, but for new custom .data.custom and .rodata.custom
1837 * maps we use their ELF names as is, not prepending bpf_object name
1838 * in front. We still need to truncate them to 15 characters for the
1839 * kernel. Full name can be recovered for such maps by using DATASEC
1840 * BTF type associated with such map's value type, though.
1841 */
1842 if (sfx_len >= BPF_OBJ_NAME_LEN)
1843 sfx_len = BPF_OBJ_NAME_LEN - 1;
1844
1845 /* if there are two or more dots in map name, it's a custom dot map */
1846 if (strchr(real_name + 1, '.') != NULL)
1847 pfx_len = 0;
1848 else
1849 pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1, strlen(obj->name));
1850
1851 snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name,
1852 sfx_len, real_name);
1853
1854 /* sanities map name to characters allowed by kernel */
1855 for (p = map_name; *p && p < map_name + sizeof(map_name); p++)
1856 if (!isalnum(*p) && *p != '_' && *p != '.')
1857 *p = '_';
1858
1859 return strdup(map_name);
1860 }
1861
1862 static int
1863 map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map);
1864
1865 /* Internal BPF map is mmap()'able only if at least one of corresponding
1866 * DATASEC's VARs are to be exposed through BPF skeleton. I.e., it's a GLOBAL
1867 * variable and it's not marked as __hidden (which turns it into, effectively,
1868 * a STATIC variable).
1869 */
map_is_mmapable(struct bpf_object * obj,struct bpf_map * map)1870 static bool map_is_mmapable(struct bpf_object *obj, struct bpf_map *map)
1871 {
1872 const struct btf_type *t, *vt;
1873 struct btf_var_secinfo *vsi;
1874 int i, n;
1875
1876 if (!map->btf_value_type_id)
1877 return false;
1878
1879 t = btf__type_by_id(obj->btf, map->btf_value_type_id);
1880 if (!btf_is_datasec(t))
1881 return false;
1882
1883 vsi = btf_var_secinfos(t);
1884 for (i = 0, n = btf_vlen(t); i < n; i++, vsi++) {
1885 vt = btf__type_by_id(obj->btf, vsi->type);
1886 if (!btf_is_var(vt))
1887 continue;
1888
1889 if (btf_var(vt)->linkage != BTF_VAR_STATIC)
1890 return true;
1891 }
1892
1893 return false;
1894 }
1895
1896 static int
bpf_object__init_internal_map(struct bpf_object * obj,enum libbpf_map_type type,const char * real_name,int sec_idx,void * data,size_t data_sz)1897 bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
1898 const char *real_name, int sec_idx, void *data, size_t data_sz)
1899 {
1900 struct bpf_map_def *def;
1901 struct bpf_map *map;
1902 size_t mmap_sz;
1903 int err;
1904
1905 map = bpf_object__add_map(obj);
1906 if (IS_ERR(map))
1907 return PTR_ERR(map);
1908
1909 map->libbpf_type = type;
1910 map->sec_idx = sec_idx;
1911 map->sec_offset = 0;
1912 map->real_name = strdup(real_name);
1913 map->name = internal_map_name(obj, real_name);
1914 if (!map->real_name || !map->name) {
1915 zfree(&map->real_name);
1916 zfree(&map->name);
1917 return -ENOMEM;
1918 }
1919
1920 def = &map->def;
1921 def->type = BPF_MAP_TYPE_ARRAY;
1922 def->key_size = sizeof(int);
1923 def->value_size = data_sz;
1924 def->max_entries = 1;
1925 def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG
1926 ? BPF_F_RDONLY_PROG : 0;
1927
1928 /* failures are fine because of maps like .rodata.str1.1 */
1929 (void) map_fill_btf_type_info(obj, map);
1930
1931 if (map_is_mmapable(obj, map))
1932 def->map_flags |= BPF_F_MMAPABLE;
1933
1934 pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
1935 map->name, map->sec_idx, map->sec_offset, def->map_flags);
1936
1937 mmap_sz = bpf_map_mmap_sz(map);
1938 map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
1939 MAP_SHARED | MAP_ANONYMOUS, -1, 0);
1940 if (map->mmaped == MAP_FAILED) {
1941 err = -errno;
1942 map->mmaped = NULL;
1943 pr_warn("failed to alloc map '%s' content buffer: %d\n",
1944 map->name, err);
1945 zfree(&map->real_name);
1946 zfree(&map->name);
1947 return err;
1948 }
1949
1950 if (data)
1951 memcpy(map->mmaped, data, data_sz);
1952
1953 pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name);
1954 return 0;
1955 }
1956
bpf_object__init_global_data_maps(struct bpf_object * obj)1957 static int bpf_object__init_global_data_maps(struct bpf_object *obj)
1958 {
1959 struct elf_sec_desc *sec_desc;
1960 const char *sec_name;
1961 int err = 0, sec_idx;
1962
1963 /*
1964 * Populate obj->maps with libbpf internal maps.
1965 */
1966 for (sec_idx = 1; sec_idx < obj->efile.sec_cnt; sec_idx++) {
1967 sec_desc = &obj->efile.secs[sec_idx];
1968
1969 /* Skip recognized sections with size 0. */
1970 if (!sec_desc->data || sec_desc->data->d_size == 0)
1971 continue;
1972
1973 switch (sec_desc->sec_type) {
1974 case SEC_DATA:
1975 sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
1976 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA,
1977 sec_name, sec_idx,
1978 sec_desc->data->d_buf,
1979 sec_desc->data->d_size);
1980 break;
1981 case SEC_RODATA:
1982 obj->has_rodata = true;
1983 sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
1984 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA,
1985 sec_name, sec_idx,
1986 sec_desc->data->d_buf,
1987 sec_desc->data->d_size);
1988 break;
1989 case SEC_BSS:
1990 sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
1991 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS,
1992 sec_name, sec_idx,
1993 NULL,
1994 sec_desc->data->d_size);
1995 break;
1996 default:
1997 /* skip */
1998 break;
1999 }
2000 if (err)
2001 return err;
2002 }
2003 return 0;
2004 }
2005
2006
find_extern_by_name(const struct bpf_object * obj,const void * name)2007 static struct extern_desc *find_extern_by_name(const struct bpf_object *obj,
2008 const void *name)
2009 {
2010 int i;
2011
2012 for (i = 0; i < obj->nr_extern; i++) {
2013 if (strcmp(obj->externs[i].name, name) == 0)
2014 return &obj->externs[i];
2015 }
2016 return NULL;
2017 }
2018
find_extern_by_name_with_len(const struct bpf_object * obj,const void * name,int len)2019 static struct extern_desc *find_extern_by_name_with_len(const struct bpf_object *obj,
2020 const void *name, int len)
2021 {
2022 const char *ext_name;
2023 int i;
2024
2025 for (i = 0; i < obj->nr_extern; i++) {
2026 ext_name = obj->externs[i].name;
2027 if (strlen(ext_name) == len && strncmp(ext_name, name, len) == 0)
2028 return &obj->externs[i];
2029 }
2030 return NULL;
2031 }
2032
set_kcfg_value_tri(struct extern_desc * ext,void * ext_val,char value)2033 static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val,
2034 char value)
2035 {
2036 switch (ext->kcfg.type) {
2037 case KCFG_BOOL:
2038 if (value == 'm') {
2039 pr_warn("extern (kcfg) '%s': value '%c' implies tristate or char type\n",
2040 ext->name, value);
2041 return -EINVAL;
2042 }
2043 *(bool *)ext_val = value == 'y' ? true : false;
2044 break;
2045 case KCFG_TRISTATE:
2046 if (value == 'y')
2047 *(enum libbpf_tristate *)ext_val = TRI_YES;
2048 else if (value == 'm')
2049 *(enum libbpf_tristate *)ext_val = TRI_MODULE;
2050 else /* value == 'n' */
2051 *(enum libbpf_tristate *)ext_val = TRI_NO;
2052 break;
2053 case KCFG_CHAR:
2054 *(char *)ext_val = value;
2055 break;
2056 case KCFG_UNKNOWN:
2057 case KCFG_INT:
2058 case KCFG_CHAR_ARR:
2059 default:
2060 pr_warn("extern (kcfg) '%s': value '%c' implies bool, tristate, or char type\n",
2061 ext->name, value);
2062 return -EINVAL;
2063 }
2064 ext->is_set = true;
2065 return 0;
2066 }
2067
set_kcfg_value_str(struct extern_desc * ext,char * ext_val,const char * value)2068 static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val,
2069 const char *value)
2070 {
2071 size_t len;
2072
2073 if (ext->kcfg.type != KCFG_CHAR_ARR) {
2074 pr_warn("extern (kcfg) '%s': value '%s' implies char array type\n",
2075 ext->name, value);
2076 return -EINVAL;
2077 }
2078
2079 len = strlen(value);
2080 if (len < 2 || value[len - 1] != '"') {
2081 pr_warn("extern (kcfg) '%s': invalid string config '%s'\n",
2082 ext->name, value);
2083 return -EINVAL;
2084 }
2085
2086 /* strip quotes */
2087 len -= 2;
2088 if (len >= ext->kcfg.sz) {
2089 pr_warn("extern (kcfg) '%s': long string '%s' of (%zu bytes) truncated to %d bytes\n",
2090 ext->name, value, len, ext->kcfg.sz - 1);
2091 len = ext->kcfg.sz - 1;
2092 }
2093 memcpy(ext_val, value + 1, len);
2094 ext_val[len] = '\0';
2095 ext->is_set = true;
2096 return 0;
2097 }
2098
parse_u64(const char * value,__u64 * res)2099 static int parse_u64(const char *value, __u64 *res)
2100 {
2101 char *value_end;
2102 int err;
2103
2104 errno = 0;
2105 *res = strtoull(value, &value_end, 0);
2106 if (errno) {
2107 err = -errno;
2108 pr_warn("failed to parse '%s' as integer: %d\n", value, err);
2109 return err;
2110 }
2111 if (*value_end) {
2112 pr_warn("failed to parse '%s' as integer completely\n", value);
2113 return -EINVAL;
2114 }
2115 return 0;
2116 }
2117
is_kcfg_value_in_range(const struct extern_desc * ext,__u64 v)2118 static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v)
2119 {
2120 int bit_sz = ext->kcfg.sz * 8;
2121
2122 if (ext->kcfg.sz == 8)
2123 return true;
2124
2125 /* Validate that value stored in u64 fits in integer of `ext->sz`
2126 * bytes size without any loss of information. If the target integer
2127 * is signed, we rely on the following limits of integer type of
2128 * Y bits and subsequent transformation:
2129 *
2130 * -2^(Y-1) <= X <= 2^(Y-1) - 1
2131 * 0 <= X + 2^(Y-1) <= 2^Y - 1
2132 * 0 <= X + 2^(Y-1) < 2^Y
2133 *
2134 * For unsigned target integer, check that all the (64 - Y) bits are
2135 * zero.
2136 */
2137 if (ext->kcfg.is_signed)
2138 return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz);
2139 else
2140 return (v >> bit_sz) == 0;
2141 }
2142
set_kcfg_value_num(struct extern_desc * ext,void * ext_val,__u64 value)2143 static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val,
2144 __u64 value)
2145 {
2146 if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR &&
2147 ext->kcfg.type != KCFG_BOOL) {
2148 pr_warn("extern (kcfg) '%s': value '%llu' implies integer, char, or boolean type\n",
2149 ext->name, (unsigned long long)value);
2150 return -EINVAL;
2151 }
2152 if (ext->kcfg.type == KCFG_BOOL && value > 1) {
2153 pr_warn("extern (kcfg) '%s': value '%llu' isn't boolean compatible\n",
2154 ext->name, (unsigned long long)value);
2155 return -EINVAL;
2156
2157 }
2158 if (!is_kcfg_value_in_range(ext, value)) {
2159 pr_warn("extern (kcfg) '%s': value '%llu' doesn't fit in %d bytes\n",
2160 ext->name, (unsigned long long)value, ext->kcfg.sz);
2161 return -ERANGE;
2162 }
2163 switch (ext->kcfg.sz) {
2164 case 1:
2165 *(__u8 *)ext_val = value;
2166 break;
2167 case 2:
2168 *(__u16 *)ext_val = value;
2169 break;
2170 case 4:
2171 *(__u32 *)ext_val = value;
2172 break;
2173 case 8:
2174 *(__u64 *)ext_val = value;
2175 break;
2176 default:
2177 return -EINVAL;
2178 }
2179 ext->is_set = true;
2180 return 0;
2181 }
2182
bpf_object__process_kconfig_line(struct bpf_object * obj,char * buf,void * data)2183 static int bpf_object__process_kconfig_line(struct bpf_object *obj,
2184 char *buf, void *data)
2185 {
2186 struct extern_desc *ext;
2187 char *sep, *value;
2188 int len, err = 0;
2189 void *ext_val;
2190 __u64 num;
2191
2192 if (!str_has_pfx(buf, "CONFIG_"))
2193 return 0;
2194
2195 sep = strchr(buf, '=');
2196 if (!sep) {
2197 pr_warn("failed to parse '%s': no separator\n", buf);
2198 return -EINVAL;
2199 }
2200
2201 /* Trim ending '\n' */
2202 len = strlen(buf);
2203 if (buf[len - 1] == '\n')
2204 buf[len - 1] = '\0';
2205 /* Split on '=' and ensure that a value is present. */
2206 *sep = '\0';
2207 if (!sep[1]) {
2208 *sep = '=';
2209 pr_warn("failed to parse '%s': no value\n", buf);
2210 return -EINVAL;
2211 }
2212
2213 ext = find_extern_by_name(obj, buf);
2214 if (!ext || ext->is_set)
2215 return 0;
2216
2217 ext_val = data + ext->kcfg.data_off;
2218 value = sep + 1;
2219
2220 switch (*value) {
2221 case 'y': case 'n': case 'm':
2222 err = set_kcfg_value_tri(ext, ext_val, *value);
2223 break;
2224 case '"':
2225 err = set_kcfg_value_str(ext, ext_val, value);
2226 break;
2227 default:
2228 /* assume integer */
2229 err = parse_u64(value, &num);
2230 if (err) {
2231 pr_warn("extern (kcfg) '%s': value '%s' isn't a valid integer\n", ext->name, value);
2232 return err;
2233 }
2234 if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) {
2235 pr_warn("extern (kcfg) '%s': value '%s' implies integer type\n", ext->name, value);
2236 return -EINVAL;
2237 }
2238 err = set_kcfg_value_num(ext, ext_val, num);
2239 break;
2240 }
2241 if (err)
2242 return err;
2243 pr_debug("extern (kcfg) '%s': set to %s\n", ext->name, value);
2244 return 0;
2245 }
2246
bpf_object__read_kconfig_file(struct bpf_object * obj,void * data)2247 static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data)
2248 {
2249 char buf[PATH_MAX];
2250 struct utsname uts;
2251 int len, err = 0;
2252 gzFile file;
2253
2254 uname(&uts);
2255 len = snprintf(buf, PATH_MAX, "/boot/config-%s", uts.release);
2256 if (len < 0)
2257 return -EINVAL;
2258 else if (len >= PATH_MAX)
2259 return -ENAMETOOLONG;
2260
2261 /* gzopen also accepts uncompressed files. */
2262 file = gzopen(buf, "re");
2263 if (!file)
2264 file = gzopen("/proc/config.gz", "re");
2265
2266 if (!file) {
2267 pr_warn("failed to open system Kconfig\n");
2268 return -ENOENT;
2269 }
2270
2271 while (gzgets(file, buf, sizeof(buf))) {
2272 err = bpf_object__process_kconfig_line(obj, buf, data);
2273 if (err) {
2274 pr_warn("error parsing system Kconfig line '%s': %d\n",
2275 buf, err);
2276 goto out;
2277 }
2278 }
2279
2280 out:
2281 gzclose(file);
2282 return err;
2283 }
2284
bpf_object__read_kconfig_mem(struct bpf_object * obj,const char * config,void * data)2285 static int bpf_object__read_kconfig_mem(struct bpf_object *obj,
2286 const char *config, void *data)
2287 {
2288 char buf[PATH_MAX];
2289 int err = 0;
2290 FILE *file;
2291
2292 file = fmemopen((void *)config, strlen(config), "r");
2293 if (!file) {
2294 err = -errno;
2295 pr_warn("failed to open in-memory Kconfig: %d\n", err);
2296 return err;
2297 }
2298
2299 while (fgets(buf, sizeof(buf), file)) {
2300 err = bpf_object__process_kconfig_line(obj, buf, data);
2301 if (err) {
2302 pr_warn("error parsing in-memory Kconfig line '%s': %d\n",
2303 buf, err);
2304 break;
2305 }
2306 }
2307
2308 fclose(file);
2309 return err;
2310 }
2311
bpf_object__init_kconfig_map(struct bpf_object * obj)2312 static int bpf_object__init_kconfig_map(struct bpf_object *obj)
2313 {
2314 struct extern_desc *last_ext = NULL, *ext;
2315 size_t map_sz;
2316 int i, err;
2317
2318 for (i = 0; i < obj->nr_extern; i++) {
2319 ext = &obj->externs[i];
2320 if (ext->type == EXT_KCFG)
2321 last_ext = ext;
2322 }
2323
2324 if (!last_ext)
2325 return 0;
2326
2327 map_sz = last_ext->kcfg.data_off + last_ext->kcfg.sz;
2328 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG,
2329 ".kconfig", obj->efile.symbols_shndx,
2330 NULL, map_sz);
2331 if (err)
2332 return err;
2333
2334 obj->kconfig_map_idx = obj->nr_maps - 1;
2335
2336 return 0;
2337 }
2338
2339 const struct btf_type *
skip_mods_and_typedefs(const struct btf * btf,__u32 id,__u32 * res_id)2340 skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id)
2341 {
2342 const struct btf_type *t = btf__type_by_id(btf, id);
2343
2344 if (res_id)
2345 *res_id = id;
2346
2347 while (btf_is_mod(t) || btf_is_typedef(t)) {
2348 if (res_id)
2349 *res_id = t->type;
2350 t = btf__type_by_id(btf, t->type);
2351 }
2352
2353 return t;
2354 }
2355
2356 static const struct btf_type *
resolve_func_ptr(const struct btf * btf,__u32 id,__u32 * res_id)2357 resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id)
2358 {
2359 const struct btf_type *t;
2360
2361 t = skip_mods_and_typedefs(btf, id, NULL);
2362 if (!btf_is_ptr(t))
2363 return NULL;
2364
2365 t = skip_mods_and_typedefs(btf, t->type, res_id);
2366
2367 return btf_is_func_proto(t) ? t : NULL;
2368 }
2369
__btf_kind_str(__u16 kind)2370 static const char *__btf_kind_str(__u16 kind)
2371 {
2372 switch (kind) {
2373 case BTF_KIND_UNKN: return "void";
2374 case BTF_KIND_INT: return "int";
2375 case BTF_KIND_PTR: return "ptr";
2376 case BTF_KIND_ARRAY: return "array";
2377 case BTF_KIND_STRUCT: return "struct";
2378 case BTF_KIND_UNION: return "union";
2379 case BTF_KIND_ENUM: return "enum";
2380 case BTF_KIND_FWD: return "fwd";
2381 case BTF_KIND_TYPEDEF: return "typedef";
2382 case BTF_KIND_VOLATILE: return "volatile";
2383 case BTF_KIND_CONST: return "const";
2384 case BTF_KIND_RESTRICT: return "restrict";
2385 case BTF_KIND_FUNC: return "func";
2386 case BTF_KIND_FUNC_PROTO: return "func_proto";
2387 case BTF_KIND_VAR: return "var";
2388 case BTF_KIND_DATASEC: return "datasec";
2389 case BTF_KIND_FLOAT: return "float";
2390 case BTF_KIND_DECL_TAG: return "decl_tag";
2391 case BTF_KIND_TYPE_TAG: return "type_tag";
2392 case BTF_KIND_ENUM64: return "enum64";
2393 default: return "unknown";
2394 }
2395 }
2396
btf_kind_str(const struct btf_type * t)2397 const char *btf_kind_str(const struct btf_type *t)
2398 {
2399 return __btf_kind_str(btf_kind(t));
2400 }
2401
2402 /*
2403 * Fetch integer attribute of BTF map definition. Such attributes are
2404 * represented using a pointer to an array, in which dimensionality of array
2405 * encodes specified integer value. E.g., int (*type)[BPF_MAP_TYPE_ARRAY];
2406 * encodes `type => BPF_MAP_TYPE_ARRAY` key/value pair completely using BTF
2407 * type definition, while using only sizeof(void *) space in ELF data section.
2408 */
get_map_field_int(const char * map_name,const struct btf * btf,const struct btf_member * m,__u32 * res)2409 static bool get_map_field_int(const char *map_name, const struct btf *btf,
2410 const struct btf_member *m, __u32 *res)
2411 {
2412 const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL);
2413 const char *name = btf__name_by_offset(btf, m->name_off);
2414 const struct btf_array *arr_info;
2415 const struct btf_type *arr_t;
2416
2417 if (!btf_is_ptr(t)) {
2418 pr_warn("map '%s': attr '%s': expected PTR, got %s.\n",
2419 map_name, name, btf_kind_str(t));
2420 return false;
2421 }
2422
2423 arr_t = btf__type_by_id(btf, t->type);
2424 if (!arr_t) {
2425 pr_warn("map '%s': attr '%s': type [%u] not found.\n",
2426 map_name, name, t->type);
2427 return false;
2428 }
2429 if (!btf_is_array(arr_t)) {
2430 pr_warn("map '%s': attr '%s': expected ARRAY, got %s.\n",
2431 map_name, name, btf_kind_str(arr_t));
2432 return false;
2433 }
2434 arr_info = btf_array(arr_t);
2435 *res = arr_info->nelems;
2436 return true;
2437 }
2438
get_map_field_long(const char * map_name,const struct btf * btf,const struct btf_member * m,__u64 * res)2439 static bool get_map_field_long(const char *map_name, const struct btf *btf,
2440 const struct btf_member *m, __u64 *res)
2441 {
2442 const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL);
2443 const char *name = btf__name_by_offset(btf, m->name_off);
2444
2445 if (btf_is_ptr(t)) {
2446 __u32 res32;
2447 bool ret;
2448
2449 ret = get_map_field_int(map_name, btf, m, &res32);
2450 if (ret)
2451 *res = (__u64)res32;
2452 return ret;
2453 }
2454
2455 if (!btf_is_enum(t) && !btf_is_enum64(t)) {
2456 pr_warn("map '%s': attr '%s': expected ENUM or ENUM64, got %s.\n",
2457 map_name, name, btf_kind_str(t));
2458 return false;
2459 }
2460
2461 if (btf_vlen(t) != 1) {
2462 pr_warn("map '%s': attr '%s': invalid __ulong\n",
2463 map_name, name);
2464 return false;
2465 }
2466
2467 if (btf_is_enum(t)) {
2468 const struct btf_enum *e = btf_enum(t);
2469
2470 *res = e->val;
2471 } else {
2472 const struct btf_enum64 *e = btf_enum64(t);
2473
2474 *res = btf_enum64_value(e);
2475 }
2476 return true;
2477 }
2478
pathname_concat(char * buf,size_t buf_sz,const char * path,const char * name)2479 static int pathname_concat(char *buf, size_t buf_sz, const char *path, const char *name)
2480 {
2481 int len;
2482
2483 len = snprintf(buf, buf_sz, "%s/%s", path, name);
2484 if (len < 0)
2485 return -EINVAL;
2486 if (len >= buf_sz)
2487 return -ENAMETOOLONG;
2488
2489 return 0;
2490 }
2491
build_map_pin_path(struct bpf_map * map,const char * path)2492 static int build_map_pin_path(struct bpf_map *map, const char *path)
2493 {
2494 char buf[PATH_MAX];
2495 int err;
2496
2497 if (!path)
2498 path = BPF_FS_DEFAULT_PATH;
2499
2500 err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
2501 if (err)
2502 return err;
2503
2504 return bpf_map__set_pin_path(map, buf);
2505 }
2506
2507 /* should match definition in bpf_helpers.h */
2508 enum libbpf_pin_type {
2509 LIBBPF_PIN_NONE,
2510 /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
2511 LIBBPF_PIN_BY_NAME,
2512 };
2513
parse_btf_map_def(const char * map_name,struct btf * btf,const struct btf_type * def_t,bool strict,struct btf_map_def * map_def,struct btf_map_def * inner_def)2514 int parse_btf_map_def(const char *map_name, struct btf *btf,
2515 const struct btf_type *def_t, bool strict,
2516 struct btf_map_def *map_def, struct btf_map_def *inner_def)
2517 {
2518 const struct btf_type *t;
2519 const struct btf_member *m;
2520 bool is_inner = inner_def == NULL;
2521 int vlen, i;
2522
2523 vlen = btf_vlen(def_t);
2524 m = btf_members(def_t);
2525 for (i = 0; i < vlen; i++, m++) {
2526 const char *name = btf__name_by_offset(btf, m->name_off);
2527
2528 if (!name) {
2529 pr_warn("map '%s': invalid field #%d.\n", map_name, i);
2530 return -EINVAL;
2531 }
2532 if (strcmp(name, "type") == 0) {
2533 if (!get_map_field_int(map_name, btf, m, &map_def->map_type))
2534 return -EINVAL;
2535 map_def->parts |= MAP_DEF_MAP_TYPE;
2536 } else if (strcmp(name, "max_entries") == 0) {
2537 if (!get_map_field_int(map_name, btf, m, &map_def->max_entries))
2538 return -EINVAL;
2539 map_def->parts |= MAP_DEF_MAX_ENTRIES;
2540 } else if (strcmp(name, "map_flags") == 0) {
2541 if (!get_map_field_int(map_name, btf, m, &map_def->map_flags))
2542 return -EINVAL;
2543 map_def->parts |= MAP_DEF_MAP_FLAGS;
2544 } else if (strcmp(name, "numa_node") == 0) {
2545 if (!get_map_field_int(map_name, btf, m, &map_def->numa_node))
2546 return -EINVAL;
2547 map_def->parts |= MAP_DEF_NUMA_NODE;
2548 } else if (strcmp(name, "key_size") == 0) {
2549 __u32 sz;
2550
2551 if (!get_map_field_int(map_name, btf, m, &sz))
2552 return -EINVAL;
2553 if (map_def->key_size && map_def->key_size != sz) {
2554 pr_warn("map '%s': conflicting key size %u != %u.\n",
2555 map_name, map_def->key_size, sz);
2556 return -EINVAL;
2557 }
2558 map_def->key_size = sz;
2559 map_def->parts |= MAP_DEF_KEY_SIZE;
2560 } else if (strcmp(name, "key") == 0) {
2561 __s64 sz;
2562
2563 t = btf__type_by_id(btf, m->type);
2564 if (!t) {
2565 pr_warn("map '%s': key type [%d] not found.\n",
2566 map_name, m->type);
2567 return -EINVAL;
2568 }
2569 if (!btf_is_ptr(t)) {
2570 pr_warn("map '%s': key spec is not PTR: %s.\n",
2571 map_name, btf_kind_str(t));
2572 return -EINVAL;
2573 }
2574 sz = btf__resolve_size(btf, t->type);
2575 if (sz < 0) {
2576 pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n",
2577 map_name, t->type, (ssize_t)sz);
2578 return sz;
2579 }
2580 if (map_def->key_size && map_def->key_size != sz) {
2581 pr_warn("map '%s': conflicting key size %u != %zd.\n",
2582 map_name, map_def->key_size, (ssize_t)sz);
2583 return -EINVAL;
2584 }
2585 map_def->key_size = sz;
2586 map_def->key_type_id = t->type;
2587 map_def->parts |= MAP_DEF_KEY_SIZE | MAP_DEF_KEY_TYPE;
2588 } else if (strcmp(name, "value_size") == 0) {
2589 __u32 sz;
2590
2591 if (!get_map_field_int(map_name, btf, m, &sz))
2592 return -EINVAL;
2593 if (map_def->value_size && map_def->value_size != sz) {
2594 pr_warn("map '%s': conflicting value size %u != %u.\n",
2595 map_name, map_def->value_size, sz);
2596 return -EINVAL;
2597 }
2598 map_def->value_size = sz;
2599 map_def->parts |= MAP_DEF_VALUE_SIZE;
2600 } else if (strcmp(name, "value") == 0) {
2601 __s64 sz;
2602
2603 t = btf__type_by_id(btf, m->type);
2604 if (!t) {
2605 pr_warn("map '%s': value type [%d] not found.\n",
2606 map_name, m->type);
2607 return -EINVAL;
2608 }
2609 if (!btf_is_ptr(t)) {
2610 pr_warn("map '%s': value spec is not PTR: %s.\n",
2611 map_name, btf_kind_str(t));
2612 return -EINVAL;
2613 }
2614 sz = btf__resolve_size(btf, t->type);
2615 if (sz < 0) {
2616 pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n",
2617 map_name, t->type, (ssize_t)sz);
2618 return sz;
2619 }
2620 if (map_def->value_size && map_def->value_size != sz) {
2621 pr_warn("map '%s': conflicting value size %u != %zd.\n",
2622 map_name, map_def->value_size, (ssize_t)sz);
2623 return -EINVAL;
2624 }
2625 map_def->value_size = sz;
2626 map_def->value_type_id = t->type;
2627 map_def->parts |= MAP_DEF_VALUE_SIZE | MAP_DEF_VALUE_TYPE;
2628 }
2629 else if (strcmp(name, "values") == 0) {
2630 bool is_map_in_map = bpf_map_type__is_map_in_map(map_def->map_type);
2631 bool is_prog_array = map_def->map_type == BPF_MAP_TYPE_PROG_ARRAY;
2632 const char *desc = is_map_in_map ? "map-in-map inner" : "prog-array value";
2633 char inner_map_name[128];
2634 int err;
2635
2636 if (is_inner) {
2637 pr_warn("map '%s': multi-level inner maps not supported.\n",
2638 map_name);
2639 return -ENOTSUP;
2640 }
2641 if (i != vlen - 1) {
2642 pr_warn("map '%s': '%s' member should be last.\n",
2643 map_name, name);
2644 return -EINVAL;
2645 }
2646 if (!is_map_in_map && !is_prog_array) {
2647 pr_warn("map '%s': should be map-in-map or prog-array.\n",
2648 map_name);
2649 return -ENOTSUP;
2650 }
2651 if (map_def->value_size && map_def->value_size != 4) {
2652 pr_warn("map '%s': conflicting value size %u != 4.\n",
2653 map_name, map_def->value_size);
2654 return -EINVAL;
2655 }
2656 map_def->value_size = 4;
2657 t = btf__type_by_id(btf, m->type);
2658 if (!t) {
2659 pr_warn("map '%s': %s type [%d] not found.\n",
2660 map_name, desc, m->type);
2661 return -EINVAL;
2662 }
2663 if (!btf_is_array(t) || btf_array(t)->nelems) {
2664 pr_warn("map '%s': %s spec is not a zero-sized array.\n",
2665 map_name, desc);
2666 return -EINVAL;
2667 }
2668 t = skip_mods_and_typedefs(btf, btf_array(t)->type, NULL);
2669 if (!btf_is_ptr(t)) {
2670 pr_warn("map '%s': %s def is of unexpected kind %s.\n",
2671 map_name, desc, btf_kind_str(t));
2672 return -EINVAL;
2673 }
2674 t = skip_mods_and_typedefs(btf, t->type, NULL);
2675 if (is_prog_array) {
2676 if (!btf_is_func_proto(t)) {
2677 pr_warn("map '%s': prog-array value def is of unexpected kind %s.\n",
2678 map_name, btf_kind_str(t));
2679 return -EINVAL;
2680 }
2681 continue;
2682 }
2683 if (!btf_is_struct(t)) {
2684 pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
2685 map_name, btf_kind_str(t));
2686 return -EINVAL;
2687 }
2688
2689 snprintf(inner_map_name, sizeof(inner_map_name), "%s.inner", map_name);
2690 err = parse_btf_map_def(inner_map_name, btf, t, strict, inner_def, NULL);
2691 if (err)
2692 return err;
2693
2694 map_def->parts |= MAP_DEF_INNER_MAP;
2695 } else if (strcmp(name, "pinning") == 0) {
2696 __u32 val;
2697
2698 if (is_inner) {
2699 pr_warn("map '%s': inner def can't be pinned.\n", map_name);
2700 return -EINVAL;
2701 }
2702 if (!get_map_field_int(map_name, btf, m, &val))
2703 return -EINVAL;
2704 if (val != LIBBPF_PIN_NONE && val != LIBBPF_PIN_BY_NAME) {
2705 pr_warn("map '%s': invalid pinning value %u.\n",
2706 map_name, val);
2707 return -EINVAL;
2708 }
2709 map_def->pinning = val;
2710 map_def->parts |= MAP_DEF_PINNING;
2711 } else if (strcmp(name, "map_extra") == 0) {
2712 __u64 map_extra;
2713
2714 if (!get_map_field_long(map_name, btf, m, &map_extra))
2715 return -EINVAL;
2716 map_def->map_extra = map_extra;
2717 map_def->parts |= MAP_DEF_MAP_EXTRA;
2718 } else {
2719 if (strict) {
2720 pr_warn("map '%s': unknown field '%s'.\n", map_name, name);
2721 return -ENOTSUP;
2722 }
2723 pr_debug("map '%s': ignoring unknown field '%s'.\n", map_name, name);
2724 }
2725 }
2726
2727 if (map_def->map_type == BPF_MAP_TYPE_UNSPEC) {
2728 pr_warn("map '%s': map type isn't specified.\n", map_name);
2729 return -EINVAL;
2730 }
2731
2732 return 0;
2733 }
2734
adjust_ringbuf_sz(size_t sz)2735 static size_t adjust_ringbuf_sz(size_t sz)
2736 {
2737 __u32 page_sz = sysconf(_SC_PAGE_SIZE);
2738 __u32 mul;
2739
2740 /* if user forgot to set any size, make sure they see error */
2741 if (sz == 0)
2742 return 0;
2743 /* Kernel expects BPF_MAP_TYPE_RINGBUF's max_entries to be
2744 * a power-of-2 multiple of kernel's page size. If user diligently
2745 * satisified these conditions, pass the size through.
2746 */
2747 if ((sz % page_sz) == 0 && is_pow_of_2(sz / page_sz))
2748 return sz;
2749
2750 /* Otherwise find closest (page_sz * power_of_2) product bigger than
2751 * user-set size to satisfy both user size request and kernel
2752 * requirements and substitute correct max_entries for map creation.
2753 */
2754 for (mul = 1; mul <= UINT_MAX / page_sz; mul <<= 1) {
2755 if (mul * page_sz > sz)
2756 return mul * page_sz;
2757 }
2758
2759 /* if it's impossible to satisfy the conditions (i.e., user size is
2760 * very close to UINT_MAX but is not a power-of-2 multiple of
2761 * page_size) then just return original size and let kernel reject it
2762 */
2763 return sz;
2764 }
2765
map_is_ringbuf(const struct bpf_map * map)2766 static bool map_is_ringbuf(const struct bpf_map *map)
2767 {
2768 return map->def.type == BPF_MAP_TYPE_RINGBUF ||
2769 map->def.type == BPF_MAP_TYPE_USER_RINGBUF;
2770 }
2771
fill_map_from_def(struct bpf_map * map,const struct btf_map_def * def)2772 static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def)
2773 {
2774 map->def.type = def->map_type;
2775 map->def.key_size = def->key_size;
2776 map->def.value_size = def->value_size;
2777 map->def.max_entries = def->max_entries;
2778 map->def.map_flags = def->map_flags;
2779 map->map_extra = def->map_extra;
2780
2781 map->numa_node = def->numa_node;
2782 map->btf_key_type_id = def->key_type_id;
2783 map->btf_value_type_id = def->value_type_id;
2784
2785 /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
2786 if (map_is_ringbuf(map))
2787 map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
2788
2789 if (def->parts & MAP_DEF_MAP_TYPE)
2790 pr_debug("map '%s': found type = %u.\n", map->name, def->map_type);
2791
2792 if (def->parts & MAP_DEF_KEY_TYPE)
2793 pr_debug("map '%s': found key [%u], sz = %u.\n",
2794 map->name, def->key_type_id, def->key_size);
2795 else if (def->parts & MAP_DEF_KEY_SIZE)
2796 pr_debug("map '%s': found key_size = %u.\n", map->name, def->key_size);
2797
2798 if (def->parts & MAP_DEF_VALUE_TYPE)
2799 pr_debug("map '%s': found value [%u], sz = %u.\n",
2800 map->name, def->value_type_id, def->value_size);
2801 else if (def->parts & MAP_DEF_VALUE_SIZE)
2802 pr_debug("map '%s': found value_size = %u.\n", map->name, def->value_size);
2803
2804 if (def->parts & MAP_DEF_MAX_ENTRIES)
2805 pr_debug("map '%s': found max_entries = %u.\n", map->name, def->max_entries);
2806 if (def->parts & MAP_DEF_MAP_FLAGS)
2807 pr_debug("map '%s': found map_flags = 0x%x.\n", map->name, def->map_flags);
2808 if (def->parts & MAP_DEF_MAP_EXTRA)
2809 pr_debug("map '%s': found map_extra = 0x%llx.\n", map->name,
2810 (unsigned long long)def->map_extra);
2811 if (def->parts & MAP_DEF_PINNING)
2812 pr_debug("map '%s': found pinning = %u.\n", map->name, def->pinning);
2813 if (def->parts & MAP_DEF_NUMA_NODE)
2814 pr_debug("map '%s': found numa_node = %u.\n", map->name, def->numa_node);
2815
2816 if (def->parts & MAP_DEF_INNER_MAP)
2817 pr_debug("map '%s': found inner map definition.\n", map->name);
2818 }
2819
btf_var_linkage_str(__u32 linkage)2820 static const char *btf_var_linkage_str(__u32 linkage)
2821 {
2822 switch (linkage) {
2823 case BTF_VAR_STATIC: return "static";
2824 case BTF_VAR_GLOBAL_ALLOCATED: return "global";
2825 case BTF_VAR_GLOBAL_EXTERN: return "extern";
2826 default: return "unknown";
2827 }
2828 }
2829
bpf_object__init_user_btf_map(struct bpf_object * obj,const struct btf_type * sec,int var_idx,int sec_idx,const Elf_Data * data,bool strict,const char * pin_root_path)2830 static int bpf_object__init_user_btf_map(struct bpf_object *obj,
2831 const struct btf_type *sec,
2832 int var_idx, int sec_idx,
2833 const Elf_Data *data, bool strict,
2834 const char *pin_root_path)
2835 {
2836 struct btf_map_def map_def = {}, inner_def = {};
2837 const struct btf_type *var, *def;
2838 const struct btf_var_secinfo *vi;
2839 const struct btf_var *var_extra;
2840 const char *map_name;
2841 struct bpf_map *map;
2842 int err;
2843
2844 vi = btf_var_secinfos(sec) + var_idx;
2845 var = btf__type_by_id(obj->btf, vi->type);
2846 var_extra = btf_var(var);
2847 map_name = btf__name_by_offset(obj->btf, var->name_off);
2848
2849 if (map_name == NULL || map_name[0] == '\0') {
2850 pr_warn("map #%d: empty name.\n", var_idx);
2851 return -EINVAL;
2852 }
2853 if ((__u64)vi->offset + vi->size > data->d_size) {
2854 pr_warn("map '%s' BTF data is corrupted.\n", map_name);
2855 return -EINVAL;
2856 }
2857 if (!btf_is_var(var)) {
2858 pr_warn("map '%s': unexpected var kind %s.\n",
2859 map_name, btf_kind_str(var));
2860 return -EINVAL;
2861 }
2862 if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED) {
2863 pr_warn("map '%s': unsupported map linkage %s.\n",
2864 map_name, btf_var_linkage_str(var_extra->linkage));
2865 return -EOPNOTSUPP;
2866 }
2867
2868 def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
2869 if (!btf_is_struct(def)) {
2870 pr_warn("map '%s': unexpected def kind %s.\n",
2871 map_name, btf_kind_str(var));
2872 return -EINVAL;
2873 }
2874 if (def->size > vi->size) {
2875 pr_warn("map '%s': invalid def size.\n", map_name);
2876 return -EINVAL;
2877 }
2878
2879 map = bpf_object__add_map(obj);
2880 if (IS_ERR(map))
2881 return PTR_ERR(map);
2882 map->name = strdup(map_name);
2883 if (!map->name) {
2884 pr_warn("map '%s': failed to alloc map name.\n", map_name);
2885 return -ENOMEM;
2886 }
2887 map->libbpf_type = LIBBPF_MAP_UNSPEC;
2888 map->def.type = BPF_MAP_TYPE_UNSPEC;
2889 map->sec_idx = sec_idx;
2890 map->sec_offset = vi->offset;
2891 map->btf_var_idx = var_idx;
2892 pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
2893 map_name, map->sec_idx, map->sec_offset);
2894
2895 err = parse_btf_map_def(map->name, obj->btf, def, strict, &map_def, &inner_def);
2896 if (err)
2897 return err;
2898
2899 fill_map_from_def(map, &map_def);
2900
2901 if (map_def.pinning == LIBBPF_PIN_BY_NAME) {
2902 err = build_map_pin_path(map, pin_root_path);
2903 if (err) {
2904 pr_warn("map '%s': couldn't build pin path.\n", map->name);
2905 return err;
2906 }
2907 }
2908
2909 if (map_def.parts & MAP_DEF_INNER_MAP) {
2910 map->inner_map = calloc(1, sizeof(*map->inner_map));
2911 if (!map->inner_map)
2912 return -ENOMEM;
2913 map->inner_map->fd = create_placeholder_fd();
2914 if (map->inner_map->fd < 0)
2915 return map->inner_map->fd;
2916 map->inner_map->sec_idx = sec_idx;
2917 map->inner_map->name = malloc(strlen(map_name) + sizeof(".inner") + 1);
2918 if (!map->inner_map->name)
2919 return -ENOMEM;
2920 sprintf(map->inner_map->name, "%s.inner", map_name);
2921
2922 fill_map_from_def(map->inner_map, &inner_def);
2923 }
2924
2925 err = map_fill_btf_type_info(obj, map);
2926 if (err)
2927 return err;
2928
2929 return 0;
2930 }
2931
init_arena_map_data(struct bpf_object * obj,struct bpf_map * map,const char * sec_name,int sec_idx,void * data,size_t data_sz)2932 static int init_arena_map_data(struct bpf_object *obj, struct bpf_map *map,
2933 const char *sec_name, int sec_idx,
2934 void *data, size_t data_sz)
2935 {
2936 const long page_sz = sysconf(_SC_PAGE_SIZE);
2937 size_t mmap_sz;
2938
2939 mmap_sz = bpf_map_mmap_sz(map);
2940 if (roundup(data_sz, page_sz) > mmap_sz) {
2941 pr_warn("elf: sec '%s': declared ARENA map size (%zu) is too small to hold global __arena variables of size %zu\n",
2942 sec_name, mmap_sz, data_sz);
2943 return -E2BIG;
2944 }
2945
2946 obj->arena_data = malloc(data_sz);
2947 if (!obj->arena_data)
2948 return -ENOMEM;
2949 memcpy(obj->arena_data, data, data_sz);
2950 obj->arena_data_sz = data_sz;
2951
2952 /* make bpf_map__init_value() work for ARENA maps */
2953 map->mmaped = obj->arena_data;
2954
2955 return 0;
2956 }
2957
bpf_object__init_user_btf_maps(struct bpf_object * obj,bool strict,const char * pin_root_path)2958 static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
2959 const char *pin_root_path)
2960 {
2961 const struct btf_type *sec = NULL;
2962 int nr_types, i, vlen, err;
2963 const struct btf_type *t;
2964 const char *name;
2965 Elf_Data *data;
2966 Elf_Scn *scn;
2967
2968 if (obj->efile.btf_maps_shndx < 0)
2969 return 0;
2970
2971 scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx);
2972 data = elf_sec_data(obj, scn);
2973 if (!scn || !data) {
2974 pr_warn("elf: failed to get %s map definitions for %s\n",
2975 MAPS_ELF_SEC, obj->path);
2976 return -EINVAL;
2977 }
2978
2979 nr_types = btf__type_cnt(obj->btf);
2980 for (i = 1; i < nr_types; i++) {
2981 t = btf__type_by_id(obj->btf, i);
2982 if (!btf_is_datasec(t))
2983 continue;
2984 name = btf__name_by_offset(obj->btf, t->name_off);
2985 if (strcmp(name, MAPS_ELF_SEC) == 0) {
2986 sec = t;
2987 obj->efile.btf_maps_sec_btf_id = i;
2988 break;
2989 }
2990 }
2991
2992 if (!sec) {
2993 pr_warn("DATASEC '%s' not found.\n", MAPS_ELF_SEC);
2994 return -ENOENT;
2995 }
2996
2997 vlen = btf_vlen(sec);
2998 for (i = 0; i < vlen; i++) {
2999 err = bpf_object__init_user_btf_map(obj, sec, i,
3000 obj->efile.btf_maps_shndx,
3001 data, strict,
3002 pin_root_path);
3003 if (err)
3004 return err;
3005 }
3006
3007 for (i = 0; i < obj->nr_maps; i++) {
3008 struct bpf_map *map = &obj->maps[i];
3009
3010 if (map->def.type != BPF_MAP_TYPE_ARENA)
3011 continue;
3012
3013 if (obj->arena_map_idx >= 0) {
3014 pr_warn("map '%s': only single ARENA map is supported (map '%s' is also ARENA)\n",
3015 map->name, obj->maps[obj->arena_map_idx].name);
3016 return -EINVAL;
3017 }
3018 obj->arena_map_idx = i;
3019
3020 if (obj->efile.arena_data) {
3021 err = init_arena_map_data(obj, map, ARENA_SEC, obj->efile.arena_data_shndx,
3022 obj->efile.arena_data->d_buf,
3023 obj->efile.arena_data->d_size);
3024 if (err)
3025 return err;
3026 }
3027 }
3028 if (obj->efile.arena_data && obj->arena_map_idx < 0) {
3029 pr_warn("elf: sec '%s': to use global __arena variables the ARENA map should be explicitly declared in SEC(\".maps\")\n",
3030 ARENA_SEC);
3031 return -ENOENT;
3032 }
3033
3034 return 0;
3035 }
3036
bpf_object__init_maps(struct bpf_object * obj,const struct bpf_object_open_opts * opts)3037 static int bpf_object__init_maps(struct bpf_object *obj,
3038 const struct bpf_object_open_opts *opts)
3039 {
3040 const char *pin_root_path;
3041 bool strict;
3042 int err = 0;
3043
3044 strict = !OPTS_GET(opts, relaxed_maps, false);
3045 pin_root_path = OPTS_GET(opts, pin_root_path, NULL);
3046
3047 err = bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
3048 err = err ?: bpf_object__init_global_data_maps(obj);
3049 err = err ?: bpf_object__init_kconfig_map(obj);
3050 err = err ?: bpf_object_init_struct_ops(obj);
3051
3052 return err;
3053 }
3054
section_have_execinstr(struct bpf_object * obj,int idx)3055 static bool section_have_execinstr(struct bpf_object *obj, int idx)
3056 {
3057 Elf64_Shdr *sh;
3058
3059 sh = elf_sec_hdr(obj, elf_sec_by_idx(obj, idx));
3060 if (!sh)
3061 return false;
3062
3063 return sh->sh_flags & SHF_EXECINSTR;
3064 }
3065
starts_with_qmark(const char * s)3066 static bool starts_with_qmark(const char *s)
3067 {
3068 return s && s[0] == '?';
3069 }
3070
btf_needs_sanitization(struct bpf_object * obj)3071 static bool btf_needs_sanitization(struct bpf_object *obj)
3072 {
3073 bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
3074 bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
3075 bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
3076 bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
3077 bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG);
3078 bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
3079 bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64);
3080 bool has_qmark_datasec = kernel_supports(obj, FEAT_BTF_QMARK_DATASEC);
3081
3082 return !has_func || !has_datasec || !has_func_global || !has_float ||
3083 !has_decl_tag || !has_type_tag || !has_enum64 || !has_qmark_datasec;
3084 }
3085
bpf_object__sanitize_btf(struct bpf_object * obj,struct btf * btf)3086 static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
3087 {
3088 bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
3089 bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
3090 bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
3091 bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
3092 bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG);
3093 bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
3094 bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64);
3095 bool has_qmark_datasec = kernel_supports(obj, FEAT_BTF_QMARK_DATASEC);
3096 int enum64_placeholder_id = 0;
3097 struct btf_type *t;
3098 int i, j, vlen;
3099
3100 for (i = 1; i < btf__type_cnt(btf); i++) {
3101 t = (struct btf_type *)btf__type_by_id(btf, i);
3102
3103 if ((!has_datasec && btf_is_var(t)) || (!has_decl_tag && btf_is_decl_tag(t))) {
3104 /* replace VAR/DECL_TAG with INT */
3105 t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0);
3106 /*
3107 * using size = 1 is the safest choice, 4 will be too
3108 * big and cause kernel BTF validation failure if
3109 * original variable took less than 4 bytes
3110 */
3111 t->size = 1;
3112 *(int *)(t + 1) = BTF_INT_ENC(0, 0, 8);
3113 } else if (!has_datasec && btf_is_datasec(t)) {
3114 /* replace DATASEC with STRUCT */
3115 const struct btf_var_secinfo *v = btf_var_secinfos(t);
3116 struct btf_member *m = btf_members(t);
3117 struct btf_type *vt;
3118 char *name;
3119
3120 name = (char *)btf__name_by_offset(btf, t->name_off);
3121 while (*name) {
3122 if (*name == '.' || *name == '?')
3123 *name = '_';
3124 name++;
3125 }
3126
3127 vlen = btf_vlen(t);
3128 t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen);
3129 for (j = 0; j < vlen; j++, v++, m++) {
3130 /* order of field assignments is important */
3131 m->offset = v->offset * 8;
3132 m->type = v->type;
3133 /* preserve variable name as member name */
3134 vt = (void *)btf__type_by_id(btf, v->type);
3135 m->name_off = vt->name_off;
3136 }
3137 } else if (!has_qmark_datasec && btf_is_datasec(t) &&
3138 starts_with_qmark(btf__name_by_offset(btf, t->name_off))) {
3139 /* replace '?' prefix with '_' for DATASEC names */
3140 char *name;
3141
3142 name = (char *)btf__name_by_offset(btf, t->name_off);
3143 if (name[0] == '?')
3144 name[0] = '_';
3145 } else if (!has_func && btf_is_func_proto(t)) {
3146 /* replace FUNC_PROTO with ENUM */
3147 vlen = btf_vlen(t);
3148 t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen);
3149 t->size = sizeof(__u32); /* kernel enforced */
3150 } else if (!has_func && btf_is_func(t)) {
3151 /* replace FUNC with TYPEDEF */
3152 t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0);
3153 } else if (!has_func_global && btf_is_func(t)) {
3154 /* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */
3155 t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0);
3156 } else if (!has_float && btf_is_float(t)) {
3157 /* replace FLOAT with an equally-sized empty STRUCT;
3158 * since C compilers do not accept e.g. "float" as a
3159 * valid struct name, make it anonymous
3160 */
3161 t->name_off = 0;
3162 t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0);
3163 } else if (!has_type_tag && btf_is_type_tag(t)) {
3164 /* replace TYPE_TAG with a CONST */
3165 t->name_off = 0;
3166 t->info = BTF_INFO_ENC(BTF_KIND_CONST, 0, 0);
3167 } else if (!has_enum64 && btf_is_enum(t)) {
3168 /* clear the kflag */
3169 t->info = btf_type_info(btf_kind(t), btf_vlen(t), false);
3170 } else if (!has_enum64 && btf_is_enum64(t)) {
3171 /* replace ENUM64 with a union */
3172 struct btf_member *m;
3173
3174 if (enum64_placeholder_id == 0) {
3175 enum64_placeholder_id = btf__add_int(btf, "enum64_placeholder", 1, 0);
3176 if (enum64_placeholder_id < 0)
3177 return enum64_placeholder_id;
3178
3179 t = (struct btf_type *)btf__type_by_id(btf, i);
3180 }
3181
3182 m = btf_members(t);
3183 vlen = btf_vlen(t);
3184 t->info = BTF_INFO_ENC(BTF_KIND_UNION, 0, vlen);
3185 for (j = 0; j < vlen; j++, m++) {
3186 m->type = enum64_placeholder_id;
3187 m->offset = 0;
3188 }
3189 }
3190 }
3191
3192 return 0;
3193 }
3194
libbpf_needs_btf(const struct bpf_object * obj)3195 static bool libbpf_needs_btf(const struct bpf_object *obj)
3196 {
3197 return obj->efile.btf_maps_shndx >= 0 ||
3198 obj->efile.has_st_ops ||
3199 obj->nr_extern > 0;
3200 }
3201
kernel_needs_btf(const struct bpf_object * obj)3202 static bool kernel_needs_btf(const struct bpf_object *obj)
3203 {
3204 return obj->efile.has_st_ops;
3205 }
3206
bpf_object__init_btf(struct bpf_object * obj,Elf_Data * btf_data,Elf_Data * btf_ext_data)3207 static int bpf_object__init_btf(struct bpf_object *obj,
3208 Elf_Data *btf_data,
3209 Elf_Data *btf_ext_data)
3210 {
3211 int err = -ENOENT;
3212
3213 if (btf_data) {
3214 obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
3215 err = libbpf_get_error(obj->btf);
3216 if (err) {
3217 obj->btf = NULL;
3218 pr_warn("Error loading ELF section %s: %d.\n", BTF_ELF_SEC, err);
3219 goto out;
3220 }
3221 /* enforce 8-byte pointers for BPF-targeted BTFs */
3222 btf__set_pointer_size(obj->btf, 8);
3223 }
3224 if (btf_ext_data) {
3225 struct btf_ext_info *ext_segs[3];
3226 int seg_num, sec_num;
3227
3228 if (!obj->btf) {
3229 pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n",
3230 BTF_EXT_ELF_SEC, BTF_ELF_SEC);
3231 goto out;
3232 }
3233 obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size);
3234 err = libbpf_get_error(obj->btf_ext);
3235 if (err) {
3236 pr_warn("Error loading ELF section %s: %d. Ignored and continue.\n",
3237 BTF_EXT_ELF_SEC, err);
3238 obj->btf_ext = NULL;
3239 goto out;
3240 }
3241
3242 /* setup .BTF.ext to ELF section mapping */
3243 ext_segs[0] = &obj->btf_ext->func_info;
3244 ext_segs[1] = &obj->btf_ext->line_info;
3245 ext_segs[2] = &obj->btf_ext->core_relo_info;
3246 for (seg_num = 0; seg_num < ARRAY_SIZE(ext_segs); seg_num++) {
3247 struct btf_ext_info *seg = ext_segs[seg_num];
3248 const struct btf_ext_info_sec *sec;
3249 const char *sec_name;
3250 Elf_Scn *scn;
3251
3252 if (seg->sec_cnt == 0)
3253 continue;
3254
3255 seg->sec_idxs = calloc(seg->sec_cnt, sizeof(*seg->sec_idxs));
3256 if (!seg->sec_idxs) {
3257 err = -ENOMEM;
3258 goto out;
3259 }
3260
3261 sec_num = 0;
3262 for_each_btf_ext_sec(seg, sec) {
3263 /* preventively increment index to avoid doing
3264 * this before every continue below
3265 */
3266 sec_num++;
3267
3268 sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
3269 if (str_is_empty(sec_name))
3270 continue;
3271 scn = elf_sec_by_name(obj, sec_name);
3272 if (!scn)
3273 continue;
3274
3275 seg->sec_idxs[sec_num - 1] = elf_ndxscn(scn);
3276 }
3277 }
3278 }
3279 out:
3280 if (err && libbpf_needs_btf(obj)) {
3281 pr_warn("BTF is required, but is missing or corrupted.\n");
3282 return err;
3283 }
3284 return 0;
3285 }
3286
compare_vsi_off(const void * _a,const void * _b)3287 static int compare_vsi_off(const void *_a, const void *_b)
3288 {
3289 const struct btf_var_secinfo *a = _a;
3290 const struct btf_var_secinfo *b = _b;
3291
3292 return a->offset - b->offset;
3293 }
3294
btf_fixup_datasec(struct bpf_object * obj,struct btf * btf,struct btf_type * t)3295 static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,
3296 struct btf_type *t)
3297 {
3298 __u32 size = 0, i, vars = btf_vlen(t);
3299 const char *sec_name = btf__name_by_offset(btf, t->name_off);
3300 struct btf_var_secinfo *vsi;
3301 bool fixup_offsets = false;
3302 int err;
3303
3304 if (!sec_name) {
3305 pr_debug("No name found in string section for DATASEC kind.\n");
3306 return -ENOENT;
3307 }
3308
3309 /* Extern-backing datasecs (.ksyms, .kconfig) have their size and
3310 * variable offsets set at the previous step. Further, not every
3311 * extern BTF VAR has corresponding ELF symbol preserved, so we skip
3312 * all fixups altogether for such sections and go straight to sorting
3313 * VARs within their DATASEC.
3314 */
3315 if (strcmp(sec_name, KCONFIG_SEC) == 0 || strcmp(sec_name, KSYMS_SEC) == 0)
3316 goto sort_vars;
3317
3318 /* Clang leaves DATASEC size and VAR offsets as zeroes, so we need to
3319 * fix this up. But BPF static linker already fixes this up and fills
3320 * all the sizes and offsets during static linking. So this step has
3321 * to be optional. But the STV_HIDDEN handling is non-optional for any
3322 * non-extern DATASEC, so the variable fixup loop below handles both
3323 * functions at the same time, paying the cost of BTF VAR <-> ELF
3324 * symbol matching just once.
3325 */
3326 if (t->size == 0) {
3327 err = find_elf_sec_sz(obj, sec_name, &size);
3328 if (err || !size) {
3329 pr_debug("sec '%s': failed to determine size from ELF: size %u, err %d\n",
3330 sec_name, size, err);
3331 return -ENOENT;
3332 }
3333
3334 t->size = size;
3335 fixup_offsets = true;
3336 }
3337
3338 for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) {
3339 const struct btf_type *t_var;
3340 struct btf_var *var;
3341 const char *var_name;
3342 Elf64_Sym *sym;
3343
3344 t_var = btf__type_by_id(btf, vsi->type);
3345 if (!t_var || !btf_is_var(t_var)) {
3346 pr_debug("sec '%s': unexpected non-VAR type found\n", sec_name);
3347 return -EINVAL;
3348 }
3349
3350 var = btf_var(t_var);
3351 if (var->linkage == BTF_VAR_STATIC || var->linkage == BTF_VAR_GLOBAL_EXTERN)
3352 continue;
3353
3354 var_name = btf__name_by_offset(btf, t_var->name_off);
3355 if (!var_name) {
3356 pr_debug("sec '%s': failed to find name of DATASEC's member #%d\n",
3357 sec_name, i);
3358 return -ENOENT;
3359 }
3360
3361 sym = find_elf_var_sym(obj, var_name);
3362 if (IS_ERR(sym)) {
3363 pr_debug("sec '%s': failed to find ELF symbol for VAR '%s'\n",
3364 sec_name, var_name);
3365 return -ENOENT;
3366 }
3367
3368 if (fixup_offsets)
3369 vsi->offset = sym->st_value;
3370
3371 /* if variable is a global/weak symbol, but has restricted
3372 * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF VAR
3373 * as static. This follows similar logic for functions (BPF
3374 * subprogs) and influences libbpf's further decisions about
3375 * whether to make global data BPF array maps as
3376 * BPF_F_MMAPABLE.
3377 */
3378 if (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN
3379 || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL)
3380 var->linkage = BTF_VAR_STATIC;
3381 }
3382
3383 sort_vars:
3384 qsort(btf_var_secinfos(t), vars, sizeof(*vsi), compare_vsi_off);
3385 return 0;
3386 }
3387
bpf_object_fixup_btf(struct bpf_object * obj)3388 static int bpf_object_fixup_btf(struct bpf_object *obj)
3389 {
3390 int i, n, err = 0;
3391
3392 if (!obj->btf)
3393 return 0;
3394
3395 n = btf__type_cnt(obj->btf);
3396 for (i = 1; i < n; i++) {
3397 struct btf_type *t = btf_type_by_id(obj->btf, i);
3398
3399 /* Loader needs to fix up some of the things compiler
3400 * couldn't get its hands on while emitting BTF. This
3401 * is section size and global variable offset. We use
3402 * the info from the ELF itself for this purpose.
3403 */
3404 if (btf_is_datasec(t)) {
3405 err = btf_fixup_datasec(obj, obj->btf, t);
3406 if (err)
3407 return err;
3408 }
3409 }
3410
3411 return 0;
3412 }
3413
prog_needs_vmlinux_btf(struct bpf_program * prog)3414 static bool prog_needs_vmlinux_btf(struct bpf_program *prog)
3415 {
3416 if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
3417 prog->type == BPF_PROG_TYPE_LSM)
3418 return true;
3419
3420 /* BPF_PROG_TYPE_TRACING programs which do not attach to other programs
3421 * also need vmlinux BTF
3422 */
3423 if (prog->type == BPF_PROG_TYPE_TRACING && !prog->attach_prog_fd)
3424 return true;
3425
3426 return false;
3427 }
3428
map_needs_vmlinux_btf(struct bpf_map * map)3429 static bool map_needs_vmlinux_btf(struct bpf_map *map)
3430 {
3431 return bpf_map__is_struct_ops(map);
3432 }
3433
obj_needs_vmlinux_btf(const struct bpf_object * obj)3434 static bool obj_needs_vmlinux_btf(const struct bpf_object *obj)
3435 {
3436 struct bpf_program *prog;
3437 struct bpf_map *map;
3438 int i;
3439
3440 /* CO-RE relocations need kernel BTF, only when btf_custom_path
3441 * is not specified
3442 */
3443 if (obj->btf_ext && obj->btf_ext->core_relo_info.len && !obj->btf_custom_path)
3444 return true;
3445
3446 /* Support for typed ksyms needs kernel BTF */
3447 for (i = 0; i < obj->nr_extern; i++) {
3448 const struct extern_desc *ext;
3449
3450 ext = &obj->externs[i];
3451 if (ext->type == EXT_KSYM && ext->ksym.type_id)
3452 return true;
3453 }
3454
3455 bpf_object__for_each_program(prog, obj) {
3456 if (!prog->autoload)
3457 continue;
3458 if (prog_needs_vmlinux_btf(prog))
3459 return true;
3460 }
3461
3462 bpf_object__for_each_map(map, obj) {
3463 if (map_needs_vmlinux_btf(map))
3464 return true;
3465 }
3466
3467 return false;
3468 }
3469
bpf_object__load_vmlinux_btf(struct bpf_object * obj,bool force)3470 static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force)
3471 {
3472 int err;
3473
3474 /* btf_vmlinux could be loaded earlier */
3475 if (obj->btf_vmlinux || obj->gen_loader)
3476 return 0;
3477
3478 if (!force && !obj_needs_vmlinux_btf(obj))
3479 return 0;
3480
3481 obj->btf_vmlinux = btf__load_vmlinux_btf();
3482 err = libbpf_get_error(obj->btf_vmlinux);
3483 if (err) {
3484 pr_warn("Error loading vmlinux BTF: %d\n", err);
3485 obj->btf_vmlinux = NULL;
3486 return err;
3487 }
3488 return 0;
3489 }
3490
bpf_object__sanitize_and_load_btf(struct bpf_object * obj)3491 static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
3492 {
3493 struct btf *kern_btf = obj->btf;
3494 bool btf_mandatory, sanitize;
3495 int i, err = 0;
3496
3497 if (!obj->btf)
3498 return 0;
3499
3500 if (!kernel_supports(obj, FEAT_BTF)) {
3501 if (kernel_needs_btf(obj)) {
3502 err = -EOPNOTSUPP;
3503 goto report;
3504 }
3505 pr_debug("Kernel doesn't support BTF, skipping uploading it.\n");
3506 return 0;
3507 }
3508
3509 /* Even though some subprogs are global/weak, user might prefer more
3510 * permissive BPF verification process that BPF verifier performs for
3511 * static functions, taking into account more context from the caller
3512 * functions. In such case, they need to mark such subprogs with
3513 * __attribute__((visibility("hidden"))) and libbpf will adjust
3514 * corresponding FUNC BTF type to be marked as static and trigger more
3515 * involved BPF verification process.
3516 */
3517 for (i = 0; i < obj->nr_programs; i++) {
3518 struct bpf_program *prog = &obj->programs[i];
3519 struct btf_type *t;
3520 const char *name;
3521 int j, n;
3522
3523 if (!prog->mark_btf_static || !prog_is_subprog(obj, prog))
3524 continue;
3525
3526 n = btf__type_cnt(obj->btf);
3527 for (j = 1; j < n; j++) {
3528 t = btf_type_by_id(obj->btf, j);
3529 if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL)
3530 continue;
3531
3532 name = btf__str_by_offset(obj->btf, t->name_off);
3533 if (strcmp(name, prog->name) != 0)
3534 continue;
3535
3536 t->info = btf_type_info(BTF_KIND_FUNC, BTF_FUNC_STATIC, 0);
3537 break;
3538 }
3539 }
3540
3541 sanitize = btf_needs_sanitization(obj);
3542 if (sanitize) {
3543 const void *raw_data;
3544 __u32 sz;
3545
3546 /* clone BTF to sanitize a copy and leave the original intact */
3547 raw_data = btf__raw_data(obj->btf, &sz);
3548 kern_btf = btf__new(raw_data, sz);
3549 err = libbpf_get_error(kern_btf);
3550 if (err)
3551 return err;
3552
3553 /* enforce 8-byte pointers for BPF-targeted BTFs */
3554 btf__set_pointer_size(obj->btf, 8);
3555 err = bpf_object__sanitize_btf(obj, kern_btf);
3556 if (err)
3557 return err;
3558 }
3559
3560 if (obj->gen_loader) {
3561 __u32 raw_size = 0;
3562 const void *raw_data = btf__raw_data(kern_btf, &raw_size);
3563
3564 if (!raw_data)
3565 return -ENOMEM;
3566 bpf_gen__load_btf(obj->gen_loader, raw_data, raw_size);
3567 /* Pretend to have valid FD to pass various fd >= 0 checks.
3568 * This fd == 0 will not be used with any syscall and will be reset to -1 eventually.
3569 */
3570 btf__set_fd(kern_btf, 0);
3571 } else {
3572 /* currently BPF_BTF_LOAD only supports log_level 1 */
3573 err = btf_load_into_kernel(kern_btf, obj->log_buf, obj->log_size,
3574 obj->log_level ? 1 : 0, obj->token_fd);
3575 }
3576 if (sanitize) {
3577 if (!err) {
3578 /* move fd to libbpf's BTF */
3579 btf__set_fd(obj->btf, btf__fd(kern_btf));
3580 btf__set_fd(kern_btf, -1);
3581 }
3582 btf__free(kern_btf);
3583 }
3584 report:
3585 if (err) {
3586 btf_mandatory = kernel_needs_btf(obj);
3587 pr_warn("Error loading .BTF into kernel: %d. %s\n", err,
3588 btf_mandatory ? "BTF is mandatory, can't proceed."
3589 : "BTF is optional, ignoring.");
3590 if (!btf_mandatory)
3591 err = 0;
3592 }
3593 return err;
3594 }
3595
elf_sym_str(const struct bpf_object * obj,size_t off)3596 static const char *elf_sym_str(const struct bpf_object *obj, size_t off)
3597 {
3598 const char *name;
3599
3600 name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, off);
3601 if (!name) {
3602 pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
3603 off, obj->path, elf_errmsg(-1));
3604 return NULL;
3605 }
3606
3607 return name;
3608 }
3609
elf_sec_str(const struct bpf_object * obj,size_t off)3610 static const char *elf_sec_str(const struct bpf_object *obj, size_t off)
3611 {
3612 const char *name;
3613
3614 name = elf_strptr(obj->efile.elf, obj->efile.shstrndx, off);
3615 if (!name) {
3616 pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
3617 off, obj->path, elf_errmsg(-1));
3618 return NULL;
3619 }
3620
3621 return name;
3622 }
3623
elf_sec_by_idx(const struct bpf_object * obj,size_t idx)3624 static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx)
3625 {
3626 Elf_Scn *scn;
3627
3628 scn = elf_getscn(obj->efile.elf, idx);
3629 if (!scn) {
3630 pr_warn("elf: failed to get section(%zu) from %s: %s\n",
3631 idx, obj->path, elf_errmsg(-1));
3632 return NULL;
3633 }
3634 return scn;
3635 }
3636
elf_sec_by_name(const struct bpf_object * obj,const char * name)3637 static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name)
3638 {
3639 Elf_Scn *scn = NULL;
3640 Elf *elf = obj->efile.elf;
3641 const char *sec_name;
3642
3643 while ((scn = elf_nextscn(elf, scn)) != NULL) {
3644 sec_name = elf_sec_name(obj, scn);
3645 if (!sec_name)
3646 return NULL;
3647
3648 if (strcmp(sec_name, name) != 0)
3649 continue;
3650
3651 return scn;
3652 }
3653 return NULL;
3654 }
3655
elf_sec_hdr(const struct bpf_object * obj,Elf_Scn * scn)3656 static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn)
3657 {
3658 Elf64_Shdr *shdr;
3659
3660 if (!scn)
3661 return NULL;
3662
3663 shdr = elf64_getshdr(scn);
3664 if (!shdr) {
3665 pr_warn("elf: failed to get section(%zu) header from %s: %s\n",
3666 elf_ndxscn(scn), obj->path, elf_errmsg(-1));
3667 return NULL;
3668 }
3669
3670 return shdr;
3671 }
3672
elf_sec_name(const struct bpf_object * obj,Elf_Scn * scn)3673 static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn)
3674 {
3675 const char *name;
3676 Elf64_Shdr *sh;
3677
3678 if (!scn)
3679 return NULL;
3680
3681 sh = elf_sec_hdr(obj, scn);
3682 if (!sh)
3683 return NULL;
3684
3685 name = elf_sec_str(obj, sh->sh_name);
3686 if (!name) {
3687 pr_warn("elf: failed to get section(%zu) name from %s: %s\n",
3688 elf_ndxscn(scn), obj->path, elf_errmsg(-1));
3689 return NULL;
3690 }
3691
3692 return name;
3693 }
3694
elf_sec_data(const struct bpf_object * obj,Elf_Scn * scn)3695 static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn)
3696 {
3697 Elf_Data *data;
3698
3699 if (!scn)
3700 return NULL;
3701
3702 data = elf_getdata(scn, 0);
3703 if (!data) {
3704 pr_warn("elf: failed to get section(%zu) %s data from %s: %s\n",
3705 elf_ndxscn(scn), elf_sec_name(obj, scn) ?: "<?>",
3706 obj->path, elf_errmsg(-1));
3707 return NULL;
3708 }
3709
3710 return data;
3711 }
3712
elf_sym_by_idx(const struct bpf_object * obj,size_t idx)3713 static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx)
3714 {
3715 if (idx >= obj->efile.symbols->d_size / sizeof(Elf64_Sym))
3716 return NULL;
3717
3718 return (Elf64_Sym *)obj->efile.symbols->d_buf + idx;
3719 }
3720
elf_rel_by_idx(Elf_Data * data,size_t idx)3721 static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx)
3722 {
3723 if (idx >= data->d_size / sizeof(Elf64_Rel))
3724 return NULL;
3725
3726 return (Elf64_Rel *)data->d_buf + idx;
3727 }
3728
is_sec_name_dwarf(const char * name)3729 static bool is_sec_name_dwarf(const char *name)
3730 {
3731 /* approximation, but the actual list is too long */
3732 return str_has_pfx(name, ".debug_");
3733 }
3734
ignore_elf_section(Elf64_Shdr * hdr,const char * name)3735 static bool ignore_elf_section(Elf64_Shdr *hdr, const char *name)
3736 {
3737 /* no special handling of .strtab */
3738 if (hdr->sh_type == SHT_STRTAB)
3739 return true;
3740
3741 /* ignore .llvm_addrsig section as well */
3742 if (hdr->sh_type == SHT_LLVM_ADDRSIG)
3743 return true;
3744
3745 /* no subprograms will lead to an empty .text section, ignore it */
3746 if (hdr->sh_type == SHT_PROGBITS && hdr->sh_size == 0 &&
3747 strcmp(name, ".text") == 0)
3748 return true;
3749
3750 /* DWARF sections */
3751 if (is_sec_name_dwarf(name))
3752 return true;
3753
3754 if (str_has_pfx(name, ".rel")) {
3755 name += sizeof(".rel") - 1;
3756 /* DWARF section relocations */
3757 if (is_sec_name_dwarf(name))
3758 return true;
3759
3760 /* .BTF and .BTF.ext don't need relocations */
3761 if (strcmp(name, BTF_ELF_SEC) == 0 ||
3762 strcmp(name, BTF_EXT_ELF_SEC) == 0)
3763 return true;
3764 }
3765
3766 return false;
3767 }
3768
cmp_progs(const void * _a,const void * _b)3769 static int cmp_progs(const void *_a, const void *_b)
3770 {
3771 const struct bpf_program *a = _a;
3772 const struct bpf_program *b = _b;
3773
3774 if (a->sec_idx != b->sec_idx)
3775 return a->sec_idx < b->sec_idx ? -1 : 1;
3776
3777 /* sec_insn_off can't be the same within the section */
3778 return a->sec_insn_off < b->sec_insn_off ? -1 : 1;
3779 }
3780
bpf_object__elf_collect(struct bpf_object * obj)3781 static int bpf_object__elf_collect(struct bpf_object *obj)
3782 {
3783 struct elf_sec_desc *sec_desc;
3784 Elf *elf = obj->efile.elf;
3785 Elf_Data *btf_ext_data = NULL;
3786 Elf_Data *btf_data = NULL;
3787 int idx = 0, err = 0;
3788 const char *name;
3789 Elf_Data *data;
3790 Elf_Scn *scn;
3791 Elf64_Shdr *sh;
3792
3793 /* ELF section indices are 0-based, but sec #0 is special "invalid"
3794 * section. Since section count retrieved by elf_getshdrnum() does
3795 * include sec #0, it is already the necessary size of an array to keep
3796 * all the sections.
3797 */
3798 if (elf_getshdrnum(obj->efile.elf, &obj->efile.sec_cnt)) {
3799 pr_warn("elf: failed to get the number of sections for %s: %s\n",
3800 obj->path, elf_errmsg(-1));
3801 return -LIBBPF_ERRNO__FORMAT;
3802 }
3803 obj->efile.secs = calloc(obj->efile.sec_cnt, sizeof(*obj->efile.secs));
3804 if (!obj->efile.secs)
3805 return -ENOMEM;
3806
3807 /* a bunch of ELF parsing functionality depends on processing symbols,
3808 * so do the first pass and find the symbol table
3809 */
3810 scn = NULL;
3811 while ((scn = elf_nextscn(elf, scn)) != NULL) {
3812 sh = elf_sec_hdr(obj, scn);
3813 if (!sh)
3814 return -LIBBPF_ERRNO__FORMAT;
3815
3816 if (sh->sh_type == SHT_SYMTAB) {
3817 if (obj->efile.symbols) {
3818 pr_warn("elf: multiple symbol tables in %s\n", obj->path);
3819 return -LIBBPF_ERRNO__FORMAT;
3820 }
3821
3822 data = elf_sec_data(obj, scn);
3823 if (!data)
3824 return -LIBBPF_ERRNO__FORMAT;
3825
3826 idx = elf_ndxscn(scn);
3827
3828 obj->efile.symbols = data;
3829 obj->efile.symbols_shndx = idx;
3830 obj->efile.strtabidx = sh->sh_link;
3831 }
3832 }
3833
3834 if (!obj->efile.symbols) {
3835 pr_warn("elf: couldn't find symbol table in %s, stripped object file?\n",
3836 obj->path);
3837 return -ENOENT;
3838 }
3839
3840 scn = NULL;
3841 while ((scn = elf_nextscn(elf, scn)) != NULL) {
3842 idx = elf_ndxscn(scn);
3843 sec_desc = &obj->efile.secs[idx];
3844
3845 sh = elf_sec_hdr(obj, scn);
3846 if (!sh)
3847 return -LIBBPF_ERRNO__FORMAT;
3848
3849 name = elf_sec_str(obj, sh->sh_name);
3850 if (!name)
3851 return -LIBBPF_ERRNO__FORMAT;
3852
3853 if (ignore_elf_section(sh, name))
3854 continue;
3855
3856 data = elf_sec_data(obj, scn);
3857 if (!data)
3858 return -LIBBPF_ERRNO__FORMAT;
3859
3860 pr_debug("elf: section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
3861 idx, name, (unsigned long)data->d_size,
3862 (int)sh->sh_link, (unsigned long)sh->sh_flags,
3863 (int)sh->sh_type);
3864
3865 if (strcmp(name, "license") == 0) {
3866 err = bpf_object__init_license(obj, data->d_buf, data->d_size);
3867 if (err)
3868 return err;
3869 } else if (strcmp(name, "version") == 0) {
3870 err = bpf_object__init_kversion(obj, data->d_buf, data->d_size);
3871 if (err)
3872 return err;
3873 } else if (strcmp(name, "maps") == 0) {
3874 pr_warn("elf: legacy map definitions in 'maps' section are not supported by libbpf v1.0+\n");
3875 return -ENOTSUP;
3876 } else if (strcmp(name, MAPS_ELF_SEC) == 0) {
3877 obj->efile.btf_maps_shndx = idx;
3878 } else if (strcmp(name, BTF_ELF_SEC) == 0) {
3879 if (sh->sh_type != SHT_PROGBITS)
3880 return -LIBBPF_ERRNO__FORMAT;
3881 btf_data = data;
3882 } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {
3883 if (sh->sh_type != SHT_PROGBITS)
3884 return -LIBBPF_ERRNO__FORMAT;
3885 btf_ext_data = data;
3886 } else if (sh->sh_type == SHT_SYMTAB) {
3887 /* already processed during the first pass above */
3888 } else if (sh->sh_type == SHT_PROGBITS && data->d_size > 0) {
3889 if (sh->sh_flags & SHF_EXECINSTR) {
3890 if (strcmp(name, ".text") == 0)
3891 obj->efile.text_shndx = idx;
3892 err = bpf_object__add_programs(obj, data, name, idx);
3893 if (err)
3894 return err;
3895 } else if (strcmp(name, DATA_SEC) == 0 ||
3896 str_has_pfx(name, DATA_SEC ".")) {
3897 sec_desc->sec_type = SEC_DATA;
3898 sec_desc->shdr = sh;
3899 sec_desc->data = data;
3900 } else if (strcmp(name, RODATA_SEC) == 0 ||
3901 str_has_pfx(name, RODATA_SEC ".")) {
3902 sec_desc->sec_type = SEC_RODATA;
3903 sec_desc->shdr = sh;
3904 sec_desc->data = data;
3905 } else if (strcmp(name, STRUCT_OPS_SEC) == 0 ||
3906 strcmp(name, STRUCT_OPS_LINK_SEC) == 0 ||
3907 strcmp(name, "?" STRUCT_OPS_SEC) == 0 ||
3908 strcmp(name, "?" STRUCT_OPS_LINK_SEC) == 0) {
3909 sec_desc->sec_type = SEC_ST_OPS;
3910 sec_desc->shdr = sh;
3911 sec_desc->data = data;
3912 obj->efile.has_st_ops = true;
3913 } else if (strcmp(name, ARENA_SEC) == 0) {
3914 obj->efile.arena_data = data;
3915 obj->efile.arena_data_shndx = idx;
3916 } else {
3917 pr_info("elf: skipping unrecognized data section(%d) %s\n",
3918 idx, name);
3919 }
3920 } else if (sh->sh_type == SHT_REL) {
3921 int targ_sec_idx = sh->sh_info; /* points to other section */
3922
3923 if (sh->sh_entsize != sizeof(Elf64_Rel) ||
3924 targ_sec_idx >= obj->efile.sec_cnt)
3925 return -LIBBPF_ERRNO__FORMAT;
3926
3927 /* Only do relo for section with exec instructions */
3928 if (!section_have_execinstr(obj, targ_sec_idx) &&
3929 strcmp(name, ".rel" STRUCT_OPS_SEC) &&
3930 strcmp(name, ".rel" STRUCT_OPS_LINK_SEC) &&
3931 strcmp(name, ".rel?" STRUCT_OPS_SEC) &&
3932 strcmp(name, ".rel?" STRUCT_OPS_LINK_SEC) &&
3933 strcmp(name, ".rel" MAPS_ELF_SEC)) {
3934 pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n",
3935 idx, name, targ_sec_idx,
3936 elf_sec_name(obj, elf_sec_by_idx(obj, targ_sec_idx)) ?: "<?>");
3937 continue;
3938 }
3939
3940 sec_desc->sec_type = SEC_RELO;
3941 sec_desc->shdr = sh;
3942 sec_desc->data = data;
3943 } else if (sh->sh_type == SHT_NOBITS && (strcmp(name, BSS_SEC) == 0 ||
3944 str_has_pfx(name, BSS_SEC "."))) {
3945 sec_desc->sec_type = SEC_BSS;
3946 sec_desc->shdr = sh;
3947 sec_desc->data = data;
3948 } else {
3949 pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name,
3950 (size_t)sh->sh_size);
3951 }
3952 }
3953
3954 if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) {
3955 pr_warn("elf: symbol strings section missing or invalid in %s\n", obj->path);
3956 return -LIBBPF_ERRNO__FORMAT;
3957 }
3958
3959 /* sort BPF programs by section name and in-section instruction offset
3960 * for faster search
3961 */
3962 if (obj->nr_programs)
3963 qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs);
3964
3965 return bpf_object__init_btf(obj, btf_data, btf_ext_data);
3966 }
3967
sym_is_extern(const Elf64_Sym * sym)3968 static bool sym_is_extern(const Elf64_Sym *sym)
3969 {
3970 int bind = ELF64_ST_BIND(sym->st_info);
3971 /* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */
3972 return sym->st_shndx == SHN_UNDEF &&
3973 (bind == STB_GLOBAL || bind == STB_WEAK) &&
3974 ELF64_ST_TYPE(sym->st_info) == STT_NOTYPE;
3975 }
3976
sym_is_subprog(const Elf64_Sym * sym,int text_shndx)3977 static bool sym_is_subprog(const Elf64_Sym *sym, int text_shndx)
3978 {
3979 int bind = ELF64_ST_BIND(sym->st_info);
3980 int type = ELF64_ST_TYPE(sym->st_info);
3981
3982 /* in .text section */
3983 if (sym->st_shndx != text_shndx)
3984 return false;
3985
3986 /* local function */
3987 if (bind == STB_LOCAL && type == STT_SECTION)
3988 return true;
3989
3990 /* global function */
3991 return (bind == STB_GLOBAL || bind == STB_WEAK) && type == STT_FUNC;
3992 }
3993
find_extern_btf_id(const struct btf * btf,const char * ext_name)3994 static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
3995 {
3996 const struct btf_type *t;
3997 const char *tname;
3998 int i, n;
3999
4000 if (!btf)
4001 return -ESRCH;
4002
4003 n = btf__type_cnt(btf);
4004 for (i = 1; i < n; i++) {
4005 t = btf__type_by_id(btf, i);
4006
4007 if (!btf_is_var(t) && !btf_is_func(t))
4008 continue;
4009
4010 tname = btf__name_by_offset(btf, t->name_off);
4011 if (strcmp(tname, ext_name))
4012 continue;
4013
4014 if (btf_is_var(t) &&
4015 btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN)
4016 return -EINVAL;
4017
4018 if (btf_is_func(t) && btf_func_linkage(t) != BTF_FUNC_EXTERN)
4019 return -EINVAL;
4020
4021 return i;
4022 }
4023
4024 return -ENOENT;
4025 }
4026
find_extern_sec_btf_id(struct btf * btf,int ext_btf_id)4027 static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id) {
4028 const struct btf_var_secinfo *vs;
4029 const struct btf_type *t;
4030 int i, j, n;
4031
4032 if (!btf)
4033 return -ESRCH;
4034
4035 n = btf__type_cnt(btf);
4036 for (i = 1; i < n; i++) {
4037 t = btf__type_by_id(btf, i);
4038
4039 if (!btf_is_datasec(t))
4040 continue;
4041
4042 vs = btf_var_secinfos(t);
4043 for (j = 0; j < btf_vlen(t); j++, vs++) {
4044 if (vs->type == ext_btf_id)
4045 return i;
4046 }
4047 }
4048
4049 return -ENOENT;
4050 }
4051
find_kcfg_type(const struct btf * btf,int id,bool * is_signed)4052 static enum kcfg_type find_kcfg_type(const struct btf *btf, int id,
4053 bool *is_signed)
4054 {
4055 const struct btf_type *t;
4056 const char *name;
4057
4058 t = skip_mods_and_typedefs(btf, id, NULL);
4059 name = btf__name_by_offset(btf, t->name_off);
4060
4061 if (is_signed)
4062 *is_signed = false;
4063 switch (btf_kind(t)) {
4064 case BTF_KIND_INT: {
4065 int enc = btf_int_encoding(t);
4066
4067 if (enc & BTF_INT_BOOL)
4068 return t->size == 1 ? KCFG_BOOL : KCFG_UNKNOWN;
4069 if (is_signed)
4070 *is_signed = enc & BTF_INT_SIGNED;
4071 if (t->size == 1)
4072 return KCFG_CHAR;
4073 if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1)))
4074 return KCFG_UNKNOWN;
4075 return KCFG_INT;
4076 }
4077 case BTF_KIND_ENUM:
4078 if (t->size != 4)
4079 return KCFG_UNKNOWN;
4080 if (strcmp(name, "libbpf_tristate"))
4081 return KCFG_UNKNOWN;
4082 return KCFG_TRISTATE;
4083 case BTF_KIND_ENUM64:
4084 if (strcmp(name, "libbpf_tristate"))
4085 return KCFG_UNKNOWN;
4086 return KCFG_TRISTATE;
4087 case BTF_KIND_ARRAY:
4088 if (btf_array(t)->nelems == 0)
4089 return KCFG_UNKNOWN;
4090 if (find_kcfg_type(btf, btf_array(t)->type, NULL) != KCFG_CHAR)
4091 return KCFG_UNKNOWN;
4092 return KCFG_CHAR_ARR;
4093 default:
4094 return KCFG_UNKNOWN;
4095 }
4096 }
4097
cmp_externs(const void * _a,const void * _b)4098 static int cmp_externs(const void *_a, const void *_b)
4099 {
4100 const struct extern_desc *a = _a;
4101 const struct extern_desc *b = _b;
4102
4103 if (a->type != b->type)
4104 return a->type < b->type ? -1 : 1;
4105
4106 if (a->type == EXT_KCFG) {
4107 /* descending order by alignment requirements */
4108 if (a->kcfg.align != b->kcfg.align)
4109 return a->kcfg.align > b->kcfg.align ? -1 : 1;
4110 /* ascending order by size, within same alignment class */
4111 if (a->kcfg.sz != b->kcfg.sz)
4112 return a->kcfg.sz < b->kcfg.sz ? -1 : 1;
4113 }
4114
4115 /* resolve ties by name */
4116 return strcmp(a->name, b->name);
4117 }
4118
find_int_btf_id(const struct btf * btf)4119 static int find_int_btf_id(const struct btf *btf)
4120 {
4121 const struct btf_type *t;
4122 int i, n;
4123
4124 n = btf__type_cnt(btf);
4125 for (i = 1; i < n; i++) {
4126 t = btf__type_by_id(btf, i);
4127
4128 if (btf_is_int(t) && btf_int_bits(t) == 32)
4129 return i;
4130 }
4131
4132 return 0;
4133 }
4134
add_dummy_ksym_var(struct btf * btf)4135 static int add_dummy_ksym_var(struct btf *btf)
4136 {
4137 int i, int_btf_id, sec_btf_id, dummy_var_btf_id;
4138 const struct btf_var_secinfo *vs;
4139 const struct btf_type *sec;
4140
4141 if (!btf)
4142 return 0;
4143
4144 sec_btf_id = btf__find_by_name_kind(btf, KSYMS_SEC,
4145 BTF_KIND_DATASEC);
4146 if (sec_btf_id < 0)
4147 return 0;
4148
4149 sec = btf__type_by_id(btf, sec_btf_id);
4150 vs = btf_var_secinfos(sec);
4151 for (i = 0; i < btf_vlen(sec); i++, vs++) {
4152 const struct btf_type *vt;
4153
4154 vt = btf__type_by_id(btf, vs->type);
4155 if (btf_is_func(vt))
4156 break;
4157 }
4158
4159 /* No func in ksyms sec. No need to add dummy var. */
4160 if (i == btf_vlen(sec))
4161 return 0;
4162
4163 int_btf_id = find_int_btf_id(btf);
4164 dummy_var_btf_id = btf__add_var(btf,
4165 "dummy_ksym",
4166 BTF_VAR_GLOBAL_ALLOCATED,
4167 int_btf_id);
4168 if (dummy_var_btf_id < 0)
4169 pr_warn("cannot create a dummy_ksym var\n");
4170
4171 return dummy_var_btf_id;
4172 }
4173
bpf_object__collect_externs(struct bpf_object * obj)4174 static int bpf_object__collect_externs(struct bpf_object *obj)
4175 {
4176 struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL;
4177 const struct btf_type *t;
4178 struct extern_desc *ext;
4179 int i, n, off, dummy_var_btf_id;
4180 const char *ext_name, *sec_name;
4181 size_t ext_essent_len;
4182 Elf_Scn *scn;
4183 Elf64_Shdr *sh;
4184
4185 if (!obj->efile.symbols)
4186 return 0;
4187
4188 scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx);
4189 sh = elf_sec_hdr(obj, scn);
4190 if (!sh || sh->sh_entsize != sizeof(Elf64_Sym))
4191 return -LIBBPF_ERRNO__FORMAT;
4192
4193 dummy_var_btf_id = add_dummy_ksym_var(obj->btf);
4194 if (dummy_var_btf_id < 0)
4195 return dummy_var_btf_id;
4196
4197 n = sh->sh_size / sh->sh_entsize;
4198 pr_debug("looking for externs among %d symbols...\n", n);
4199
4200 for (i = 0; i < n; i++) {
4201 Elf64_Sym *sym = elf_sym_by_idx(obj, i);
4202
4203 if (!sym)
4204 return -LIBBPF_ERRNO__FORMAT;
4205 if (!sym_is_extern(sym))
4206 continue;
4207 ext_name = elf_sym_str(obj, sym->st_name);
4208 if (!ext_name || !ext_name[0])
4209 continue;
4210
4211 ext = obj->externs;
4212 ext = libbpf_reallocarray(ext, obj->nr_extern + 1, sizeof(*ext));
4213 if (!ext)
4214 return -ENOMEM;
4215 obj->externs = ext;
4216 ext = &ext[obj->nr_extern];
4217 memset(ext, 0, sizeof(*ext));
4218 obj->nr_extern++;
4219
4220 ext->btf_id = find_extern_btf_id(obj->btf, ext_name);
4221 if (ext->btf_id <= 0) {
4222 pr_warn("failed to find BTF for extern '%s': %d\n",
4223 ext_name, ext->btf_id);
4224 return ext->btf_id;
4225 }
4226 t = btf__type_by_id(obj->btf, ext->btf_id);
4227 ext->name = strdup(btf__name_by_offset(obj->btf, t->name_off));
4228 if (!ext->name)
4229 return -ENOMEM;
4230 ext->sym_idx = i;
4231 ext->is_weak = ELF64_ST_BIND(sym->st_info) == STB_WEAK;
4232
4233 ext_essent_len = bpf_core_essential_name_len(ext->name);
4234 ext->essent_name = NULL;
4235 if (ext_essent_len != strlen(ext->name)) {
4236 ext->essent_name = strndup(ext->name, ext_essent_len);
4237 if (!ext->essent_name)
4238 return -ENOMEM;
4239 }
4240
4241 ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id);
4242 if (ext->sec_btf_id <= 0) {
4243 pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n",
4244 ext_name, ext->btf_id, ext->sec_btf_id);
4245 return ext->sec_btf_id;
4246 }
4247 sec = (void *)btf__type_by_id(obj->btf, ext->sec_btf_id);
4248 sec_name = btf__name_by_offset(obj->btf, sec->name_off);
4249
4250 if (strcmp(sec_name, KCONFIG_SEC) == 0) {
4251 if (btf_is_func(t)) {
4252 pr_warn("extern function %s is unsupported under %s section\n",
4253 ext->name, KCONFIG_SEC);
4254 return -ENOTSUP;
4255 }
4256 kcfg_sec = sec;
4257 ext->type = EXT_KCFG;
4258 ext->kcfg.sz = btf__resolve_size(obj->btf, t->type);
4259 if (ext->kcfg.sz <= 0) {
4260 pr_warn("failed to resolve size of extern (kcfg) '%s': %d\n",
4261 ext_name, ext->kcfg.sz);
4262 return ext->kcfg.sz;
4263 }
4264 ext->kcfg.align = btf__align_of(obj->btf, t->type);
4265 if (ext->kcfg.align <= 0) {
4266 pr_warn("failed to determine alignment of extern (kcfg) '%s': %d\n",
4267 ext_name, ext->kcfg.align);
4268 return -EINVAL;
4269 }
4270 ext->kcfg.type = find_kcfg_type(obj->btf, t->type,
4271 &ext->kcfg.is_signed);
4272 if (ext->kcfg.type == KCFG_UNKNOWN) {
4273 pr_warn("extern (kcfg) '%s': type is unsupported\n", ext_name);
4274 return -ENOTSUP;
4275 }
4276 } else if (strcmp(sec_name, KSYMS_SEC) == 0) {
4277 ksym_sec = sec;
4278 ext->type = EXT_KSYM;
4279 skip_mods_and_typedefs(obj->btf, t->type,
4280 &ext->ksym.type_id);
4281 } else {
4282 pr_warn("unrecognized extern section '%s'\n", sec_name);
4283 return -ENOTSUP;
4284 }
4285 }
4286 pr_debug("collected %d externs total\n", obj->nr_extern);
4287
4288 if (!obj->nr_extern)
4289 return 0;
4290
4291 /* sort externs by type, for kcfg ones also by (align, size, name) */
4292 qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs);
4293
4294 /* for .ksyms section, we need to turn all externs into allocated
4295 * variables in BTF to pass kernel verification; we do this by
4296 * pretending that each extern is a 8-byte variable
4297 */
4298 if (ksym_sec) {
4299 /* find existing 4-byte integer type in BTF to use for fake
4300 * extern variables in DATASEC
4301 */
4302 int int_btf_id = find_int_btf_id(obj->btf);
4303 /* For extern function, a dummy_var added earlier
4304 * will be used to replace the vs->type and
4305 * its name string will be used to refill
4306 * the missing param's name.
4307 */
4308 const struct btf_type *dummy_var;
4309
4310 dummy_var = btf__type_by_id(obj->btf, dummy_var_btf_id);
4311 for (i = 0; i < obj->nr_extern; i++) {
4312 ext = &obj->externs[i];
4313 if (ext->type != EXT_KSYM)
4314 continue;
4315 pr_debug("extern (ksym) #%d: symbol %d, name %s\n",
4316 i, ext->sym_idx, ext->name);
4317 }
4318
4319 sec = ksym_sec;
4320 n = btf_vlen(sec);
4321 for (i = 0, off = 0; i < n; i++, off += sizeof(int)) {
4322 struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
4323 struct btf_type *vt;
4324
4325 vt = (void *)btf__type_by_id(obj->btf, vs->type);
4326 ext_name = btf__name_by_offset(obj->btf, vt->name_off);
4327 ext = find_extern_by_name(obj, ext_name);
4328 if (!ext) {
4329 pr_warn("failed to find extern definition for BTF %s '%s'\n",
4330 btf_kind_str(vt), ext_name);
4331 return -ESRCH;
4332 }
4333 if (btf_is_func(vt)) {
4334 const struct btf_type *func_proto;
4335 struct btf_param *param;
4336 int j;
4337
4338 func_proto = btf__type_by_id(obj->btf,
4339 vt->type);
4340 param = btf_params(func_proto);
4341 /* Reuse the dummy_var string if the
4342 * func proto does not have param name.
4343 */
4344 for (j = 0; j < btf_vlen(func_proto); j++)
4345 if (param[j].type && !param[j].name_off)
4346 param[j].name_off =
4347 dummy_var->name_off;
4348 vs->type = dummy_var_btf_id;
4349 vt->info &= ~0xffff;
4350 vt->info |= BTF_FUNC_GLOBAL;
4351 } else {
4352 btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
4353 vt->type = int_btf_id;
4354 }
4355 vs->offset = off;
4356 vs->size = sizeof(int);
4357 }
4358 sec->size = off;
4359 }
4360
4361 if (kcfg_sec) {
4362 sec = kcfg_sec;
4363 /* for kcfg externs calculate their offsets within a .kconfig map */
4364 off = 0;
4365 for (i = 0; i < obj->nr_extern; i++) {
4366 ext = &obj->externs[i];
4367 if (ext->type != EXT_KCFG)
4368 continue;
4369
4370 ext->kcfg.data_off = roundup(off, ext->kcfg.align);
4371 off = ext->kcfg.data_off + ext->kcfg.sz;
4372 pr_debug("extern (kcfg) #%d: symbol %d, off %u, name %s\n",
4373 i, ext->sym_idx, ext->kcfg.data_off, ext->name);
4374 }
4375 sec->size = off;
4376 n = btf_vlen(sec);
4377 for (i = 0; i < n; i++) {
4378 struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
4379
4380 t = btf__type_by_id(obj->btf, vs->type);
4381 ext_name = btf__name_by_offset(obj->btf, t->name_off);
4382 ext = find_extern_by_name(obj, ext_name);
4383 if (!ext) {
4384 pr_warn("failed to find extern definition for BTF var '%s'\n",
4385 ext_name);
4386 return -ESRCH;
4387 }
4388 btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
4389 vs->offset = ext->kcfg.data_off;
4390 }
4391 }
4392 return 0;
4393 }
4394
prog_is_subprog(const struct bpf_object * obj,const struct bpf_program * prog)4395 static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog)
4396 {
4397 return prog->sec_idx == obj->efile.text_shndx;
4398 }
4399
4400 struct bpf_program *
bpf_object__find_program_by_name(const struct bpf_object * obj,const char * name)4401 bpf_object__find_program_by_name(const struct bpf_object *obj,
4402 const char *name)
4403 {
4404 struct bpf_program *prog;
4405
4406 bpf_object__for_each_program(prog, obj) {
4407 if (prog_is_subprog(obj, prog))
4408 continue;
4409 if (!strcmp(prog->name, name))
4410 return prog;
4411 }
4412 return errno = ENOENT, NULL;
4413 }
4414
bpf_object__shndx_is_data(const struct bpf_object * obj,int shndx)4415 static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
4416 int shndx)
4417 {
4418 switch (obj->efile.secs[shndx].sec_type) {
4419 case SEC_BSS:
4420 case SEC_DATA:
4421 case SEC_RODATA:
4422 return true;
4423 default:
4424 return false;
4425 }
4426 }
4427
bpf_object__shndx_is_maps(const struct bpf_object * obj,int shndx)4428 static bool bpf_object__shndx_is_maps(const struct bpf_object *obj,
4429 int shndx)
4430 {
4431 return shndx == obj->efile.btf_maps_shndx;
4432 }
4433
4434 static enum libbpf_map_type
bpf_object__section_to_libbpf_map_type(const struct bpf_object * obj,int shndx)4435 bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
4436 {
4437 if (shndx == obj->efile.symbols_shndx)
4438 return LIBBPF_MAP_KCONFIG;
4439
4440 switch (obj->efile.secs[shndx].sec_type) {
4441 case SEC_BSS:
4442 return LIBBPF_MAP_BSS;
4443 case SEC_DATA:
4444 return LIBBPF_MAP_DATA;
4445 case SEC_RODATA:
4446 return LIBBPF_MAP_RODATA;
4447 default:
4448 return LIBBPF_MAP_UNSPEC;
4449 }
4450 }
4451
bpf_program__record_reloc(struct bpf_program * prog,struct reloc_desc * reloc_desc,__u32 insn_idx,const char * sym_name,const Elf64_Sym * sym,const Elf64_Rel * rel)4452 static int bpf_program__record_reloc(struct bpf_program *prog,
4453 struct reloc_desc *reloc_desc,
4454 __u32 insn_idx, const char *sym_name,
4455 const Elf64_Sym *sym, const Elf64_Rel *rel)
4456 {
4457 struct bpf_insn *insn = &prog->insns[insn_idx];
4458 size_t map_idx, nr_maps = prog->obj->nr_maps;
4459 struct bpf_object *obj = prog->obj;
4460 __u32 shdr_idx = sym->st_shndx;
4461 enum libbpf_map_type type;
4462 const char *sym_sec_name;
4463 struct bpf_map *map;
4464
4465 if (!is_call_insn(insn) && !is_ldimm64_insn(insn)) {
4466 pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n",
4467 prog->name, sym_name, insn_idx, insn->code);
4468 return -LIBBPF_ERRNO__RELOC;
4469 }
4470
4471 if (sym_is_extern(sym)) {
4472 int sym_idx = ELF64_R_SYM(rel->r_info);
4473 int i, n = obj->nr_extern;
4474 struct extern_desc *ext;
4475
4476 for (i = 0; i < n; i++) {
4477 ext = &obj->externs[i];
4478 if (ext->sym_idx == sym_idx)
4479 break;
4480 }
4481 if (i >= n) {
4482 pr_warn("prog '%s': extern relo failed to find extern for '%s' (%d)\n",
4483 prog->name, sym_name, sym_idx);
4484 return -LIBBPF_ERRNO__RELOC;
4485 }
4486 pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n",
4487 prog->name, i, ext->name, ext->sym_idx, insn_idx);
4488 if (insn->code == (BPF_JMP | BPF_CALL))
4489 reloc_desc->type = RELO_EXTERN_CALL;
4490 else
4491 reloc_desc->type = RELO_EXTERN_LD64;
4492 reloc_desc->insn_idx = insn_idx;
4493 reloc_desc->ext_idx = i;
4494 return 0;
4495 }
4496
4497 /* sub-program call relocation */
4498 if (is_call_insn(insn)) {
4499 if (insn->src_reg != BPF_PSEUDO_CALL) {
4500 pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name);
4501 return -LIBBPF_ERRNO__RELOC;
4502 }
4503 /* text_shndx can be 0, if no default "main" program exists */
4504 if (!shdr_idx || shdr_idx != obj->efile.text_shndx) {
4505 sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
4506 pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n",
4507 prog->name, sym_name, sym_sec_name);
4508 return -LIBBPF_ERRNO__RELOC;
4509 }
4510 if (sym->st_value % BPF_INSN_SZ) {
4511 pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n",
4512 prog->name, sym_name, (size_t)sym->st_value);
4513 return -LIBBPF_ERRNO__RELOC;
4514 }
4515 reloc_desc->type = RELO_CALL;
4516 reloc_desc->insn_idx = insn_idx;
4517 reloc_desc->sym_off = sym->st_value;
4518 return 0;
4519 }
4520
4521 if (!shdr_idx || shdr_idx >= SHN_LORESERVE) {
4522 pr_warn("prog '%s': invalid relo against '%s' in special section 0x%x; forgot to initialize global var?..\n",
4523 prog->name, sym_name, shdr_idx);
4524 return -LIBBPF_ERRNO__RELOC;
4525 }
4526
4527 /* loading subprog addresses */
4528 if (sym_is_subprog(sym, obj->efile.text_shndx)) {
4529 /* global_func: sym->st_value = offset in the section, insn->imm = 0.
4530 * local_func: sym->st_value = 0, insn->imm = offset in the section.
4531 */
4532 if ((sym->st_value % BPF_INSN_SZ) || (insn->imm % BPF_INSN_SZ)) {
4533 pr_warn("prog '%s': bad subprog addr relo against '%s' at offset %zu+%d\n",
4534 prog->name, sym_name, (size_t)sym->st_value, insn->imm);
4535 return -LIBBPF_ERRNO__RELOC;
4536 }
4537
4538 reloc_desc->type = RELO_SUBPROG_ADDR;
4539 reloc_desc->insn_idx = insn_idx;
4540 reloc_desc->sym_off = sym->st_value;
4541 return 0;
4542 }
4543
4544 type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
4545 sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
4546
4547 /* arena data relocation */
4548 if (shdr_idx == obj->efile.arena_data_shndx) {
4549 if (obj->arena_map_idx < 0) {
4550 pr_warn("prog '%s': bad arena data relocation at insn %u, no arena maps defined\n",
4551 prog->name, insn_idx);
4552 return -LIBBPF_ERRNO__RELOC;
4553 }
4554 reloc_desc->type = RELO_DATA;
4555 reloc_desc->insn_idx = insn_idx;
4556 reloc_desc->map_idx = obj->arena_map_idx;
4557 reloc_desc->sym_off = sym->st_value;
4558
4559 map = &obj->maps[obj->arena_map_idx];
4560 pr_debug("prog '%s': found arena map %d (%s, sec %d, off %zu) for insn %u\n",
4561 prog->name, obj->arena_map_idx, map->name, map->sec_idx,
4562 map->sec_offset, insn_idx);
4563 return 0;
4564 }
4565
4566 /* generic map reference relocation */
4567 if (type == LIBBPF_MAP_UNSPEC) {
4568 if (!bpf_object__shndx_is_maps(obj, shdr_idx)) {
4569 pr_warn("prog '%s': bad map relo against '%s' in section '%s'\n",
4570 prog->name, sym_name, sym_sec_name);
4571 return -LIBBPF_ERRNO__RELOC;
4572 }
4573 for (map_idx = 0; map_idx < nr_maps; map_idx++) {
4574 map = &obj->maps[map_idx];
4575 if (map->libbpf_type != type ||
4576 map->sec_idx != sym->st_shndx ||
4577 map->sec_offset != sym->st_value)
4578 continue;
4579 pr_debug("prog '%s': found map %zd (%s, sec %d, off %zu) for insn #%u\n",
4580 prog->name, map_idx, map->name, map->sec_idx,
4581 map->sec_offset, insn_idx);
4582 break;
4583 }
4584 if (map_idx >= nr_maps) {
4585 pr_warn("prog '%s': map relo failed to find map for section '%s', off %zu\n",
4586 prog->name, sym_sec_name, (size_t)sym->st_value);
4587 return -LIBBPF_ERRNO__RELOC;
4588 }
4589 reloc_desc->type = RELO_LD64;
4590 reloc_desc->insn_idx = insn_idx;
4591 reloc_desc->map_idx = map_idx;
4592 reloc_desc->sym_off = 0; /* sym->st_value determines map_idx */
4593 return 0;
4594 }
4595
4596 /* global data map relocation */
4597 if (!bpf_object__shndx_is_data(obj, shdr_idx)) {
4598 pr_warn("prog '%s': bad data relo against section '%s'\n",
4599 prog->name, sym_sec_name);
4600 return -LIBBPF_ERRNO__RELOC;
4601 }
4602 for (map_idx = 0; map_idx < nr_maps; map_idx++) {
4603 map = &obj->maps[map_idx];
4604 if (map->libbpf_type != type || map->sec_idx != sym->st_shndx)
4605 continue;
4606 pr_debug("prog '%s': found data map %zd (%s, sec %d, off %zu) for insn %u\n",
4607 prog->name, map_idx, map->name, map->sec_idx,
4608 map->sec_offset, insn_idx);
4609 break;
4610 }
4611 if (map_idx >= nr_maps) {
4612 pr_warn("prog '%s': data relo failed to find map for section '%s'\n",
4613 prog->name, sym_sec_name);
4614 return -LIBBPF_ERRNO__RELOC;
4615 }
4616
4617 reloc_desc->type = RELO_DATA;
4618 reloc_desc->insn_idx = insn_idx;
4619 reloc_desc->map_idx = map_idx;
4620 reloc_desc->sym_off = sym->st_value;
4621 return 0;
4622 }
4623
prog_contains_insn(const struct bpf_program * prog,size_t insn_idx)4624 static bool prog_contains_insn(const struct bpf_program *prog, size_t insn_idx)
4625 {
4626 return insn_idx >= prog->sec_insn_off &&
4627 insn_idx < prog->sec_insn_off + prog->sec_insn_cnt;
4628 }
4629
find_prog_by_sec_insn(const struct bpf_object * obj,size_t sec_idx,size_t insn_idx)4630 static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj,
4631 size_t sec_idx, size_t insn_idx)
4632 {
4633 int l = 0, r = obj->nr_programs - 1, m;
4634 struct bpf_program *prog;
4635
4636 if (!obj->nr_programs)
4637 return NULL;
4638
4639 while (l < r) {
4640 m = l + (r - l + 1) / 2;
4641 prog = &obj->programs[m];
4642
4643 if (prog->sec_idx < sec_idx ||
4644 (prog->sec_idx == sec_idx && prog->sec_insn_off <= insn_idx))
4645 l = m;
4646 else
4647 r = m - 1;
4648 }
4649 /* matching program could be at index l, but it still might be the
4650 * wrong one, so we need to double check conditions for the last time
4651 */
4652 prog = &obj->programs[l];
4653 if (prog->sec_idx == sec_idx && prog_contains_insn(prog, insn_idx))
4654 return prog;
4655 return NULL;
4656 }
4657
4658 static int
bpf_object__collect_prog_relos(struct bpf_object * obj,Elf64_Shdr * shdr,Elf_Data * data)4659 bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Data *data)
4660 {
4661 const char *relo_sec_name, *sec_name;
4662 size_t sec_idx = shdr->sh_info, sym_idx;
4663 struct bpf_program *prog;
4664 struct reloc_desc *relos;
4665 int err, i, nrels;
4666 const char *sym_name;
4667 __u32 insn_idx;
4668 Elf_Scn *scn;
4669 Elf_Data *scn_data;
4670 Elf64_Sym *sym;
4671 Elf64_Rel *rel;
4672
4673 if (sec_idx >= obj->efile.sec_cnt)
4674 return -EINVAL;
4675
4676 scn = elf_sec_by_idx(obj, sec_idx);
4677 scn_data = elf_sec_data(obj, scn);
4678 if (!scn_data)
4679 return -LIBBPF_ERRNO__FORMAT;
4680
4681 relo_sec_name = elf_sec_str(obj, shdr->sh_name);
4682 sec_name = elf_sec_name(obj, scn);
4683 if (!relo_sec_name || !sec_name)
4684 return -EINVAL;
4685
4686 pr_debug("sec '%s': collecting relocation for section(%zu) '%s'\n",
4687 relo_sec_name, sec_idx, sec_name);
4688 nrels = shdr->sh_size / shdr->sh_entsize;
4689
4690 for (i = 0; i < nrels; i++) {
4691 rel = elf_rel_by_idx(data, i);
4692 if (!rel) {
4693 pr_warn("sec '%s': failed to get relo #%d\n", relo_sec_name, i);
4694 return -LIBBPF_ERRNO__FORMAT;
4695 }
4696
4697 sym_idx = ELF64_R_SYM(rel->r_info);
4698 sym = elf_sym_by_idx(obj, sym_idx);
4699 if (!sym) {
4700 pr_warn("sec '%s': symbol #%zu not found for relo #%d\n",
4701 relo_sec_name, sym_idx, i);
4702 return -LIBBPF_ERRNO__FORMAT;
4703 }
4704
4705 if (sym->st_shndx >= obj->efile.sec_cnt) {
4706 pr_warn("sec '%s': corrupted symbol #%zu pointing to invalid section #%zu for relo #%d\n",
4707 relo_sec_name, sym_idx, (size_t)sym->st_shndx, i);
4708 return -LIBBPF_ERRNO__FORMAT;
4709 }
4710
4711 if (rel->r_offset % BPF_INSN_SZ || rel->r_offset >= scn_data->d_size) {
4712 pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n",
4713 relo_sec_name, (size_t)rel->r_offset, i);
4714 return -LIBBPF_ERRNO__FORMAT;
4715 }
4716
4717 insn_idx = rel->r_offset / BPF_INSN_SZ;
4718 /* relocations against static functions are recorded as
4719 * relocations against the section that contains a function;
4720 * in such case, symbol will be STT_SECTION and sym.st_name
4721 * will point to empty string (0), so fetch section name
4722 * instead
4723 */
4724 if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION && sym->st_name == 0)
4725 sym_name = elf_sec_name(obj, elf_sec_by_idx(obj, sym->st_shndx));
4726 else
4727 sym_name = elf_sym_str(obj, sym->st_name);
4728 sym_name = sym_name ?: "<?";
4729
4730 pr_debug("sec '%s': relo #%d: insn #%u against '%s'\n",
4731 relo_sec_name, i, insn_idx, sym_name);
4732
4733 prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
4734 if (!prog) {
4735 pr_debug("sec '%s': relo #%d: couldn't find program in section '%s' for insn #%u, probably overridden weak function, skipping...\n",
4736 relo_sec_name, i, sec_name, insn_idx);
4737 continue;
4738 }
4739
4740 relos = libbpf_reallocarray(prog->reloc_desc,
4741 prog->nr_reloc + 1, sizeof(*relos));
4742 if (!relos)
4743 return -ENOMEM;
4744 prog->reloc_desc = relos;
4745
4746 /* adjust insn_idx to local BPF program frame of reference */
4747 insn_idx -= prog->sec_insn_off;
4748 err = bpf_program__record_reloc(prog, &relos[prog->nr_reloc],
4749 insn_idx, sym_name, sym, rel);
4750 if (err)
4751 return err;
4752
4753 prog->nr_reloc++;
4754 }
4755 return 0;
4756 }
4757
map_fill_btf_type_info(struct bpf_object * obj,struct bpf_map * map)4758 static int map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map)
4759 {
4760 int id;
4761
4762 if (!obj->btf)
4763 return -ENOENT;
4764
4765 /* if it's BTF-defined map, we don't need to search for type IDs.
4766 * For struct_ops map, it does not need btf_key_type_id and
4767 * btf_value_type_id.
4768 */
4769 if (map->sec_idx == obj->efile.btf_maps_shndx || bpf_map__is_struct_ops(map))
4770 return 0;
4771
4772 /*
4773 * LLVM annotates global data differently in BTF, that is,
4774 * only as '.data', '.bss' or '.rodata'.
4775 */
4776 if (!bpf_map__is_internal(map))
4777 return -ENOENT;
4778
4779 id = btf__find_by_name(obj->btf, map->real_name);
4780 if (id < 0)
4781 return id;
4782
4783 map->btf_key_type_id = 0;
4784 map->btf_value_type_id = id;
4785 return 0;
4786 }
4787
bpf_get_map_info_from_fdinfo(int fd,struct bpf_map_info * info)4788 static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info)
4789 {
4790 char file[PATH_MAX], buff[4096];
4791 FILE *fp;
4792 __u32 val;
4793 int err;
4794
4795 snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
4796 memset(info, 0, sizeof(*info));
4797
4798 fp = fopen(file, "re");
4799 if (!fp) {
4800 err = -errno;
4801 pr_warn("failed to open %s: %d. No procfs support?\n", file,
4802 err);
4803 return err;
4804 }
4805
4806 while (fgets(buff, sizeof(buff), fp)) {
4807 if (sscanf(buff, "map_type:\t%u", &val) == 1)
4808 info->type = val;
4809 else if (sscanf(buff, "key_size:\t%u", &val) == 1)
4810 info->key_size = val;
4811 else if (sscanf(buff, "value_size:\t%u", &val) == 1)
4812 info->value_size = val;
4813 else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
4814 info->max_entries = val;
4815 else if (sscanf(buff, "map_flags:\t%i", &val) == 1)
4816 info->map_flags = val;
4817 }
4818
4819 fclose(fp);
4820
4821 return 0;
4822 }
4823
bpf_map__autocreate(const struct bpf_map * map)4824 bool bpf_map__autocreate(const struct bpf_map *map)
4825 {
4826 return map->autocreate;
4827 }
4828
bpf_map__set_autocreate(struct bpf_map * map,bool autocreate)4829 int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate)
4830 {
4831 if (map->obj->loaded)
4832 return libbpf_err(-EBUSY);
4833
4834 map->autocreate = autocreate;
4835 return 0;
4836 }
4837
bpf_map__set_autoattach(struct bpf_map * map,bool autoattach)4838 int bpf_map__set_autoattach(struct bpf_map *map, bool autoattach)
4839 {
4840 if (!bpf_map__is_struct_ops(map))
4841 return libbpf_err(-EINVAL);
4842
4843 map->autoattach = autoattach;
4844 return 0;
4845 }
4846
bpf_map__autoattach(const struct bpf_map * map)4847 bool bpf_map__autoattach(const struct bpf_map *map)
4848 {
4849 return map->autoattach;
4850 }
4851
bpf_map__reuse_fd(struct bpf_map * map,int fd)4852 int bpf_map__reuse_fd(struct bpf_map *map, int fd)
4853 {
4854 struct bpf_map_info info;
4855 __u32 len = sizeof(info), name_len;
4856 int new_fd, err;
4857 char *new_name;
4858
4859 memset(&info, 0, len);
4860 err = bpf_map_get_info_by_fd(fd, &info, &len);
4861 if (err && errno == EINVAL)
4862 err = bpf_get_map_info_from_fdinfo(fd, &info);
4863 if (err)
4864 return libbpf_err(err);
4865
4866 name_len = strlen(info.name);
4867 if (name_len == BPF_OBJ_NAME_LEN - 1 && strncmp(map->name, info.name, name_len) == 0)
4868 new_name = strdup(map->name);
4869 else
4870 new_name = strdup(info.name);
4871
4872 if (!new_name)
4873 return libbpf_err(-errno);
4874
4875 /*
4876 * Like dup(), but make sure new FD is >= 3 and has O_CLOEXEC set.
4877 * This is similar to what we do in ensure_good_fd(), but without
4878 * closing original FD.
4879 */
4880 new_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
4881 if (new_fd < 0) {
4882 err = -errno;
4883 goto err_free_new_name;
4884 }
4885
4886 err = reuse_fd(map->fd, new_fd);
4887 if (err)
4888 goto err_free_new_name;
4889
4890 free(map->name);
4891
4892 map->name = new_name;
4893 map->def.type = info.type;
4894 map->def.key_size = info.key_size;
4895 map->def.value_size = info.value_size;
4896 map->def.max_entries = info.max_entries;
4897 map->def.map_flags = info.map_flags;
4898 map->btf_key_type_id = info.btf_key_type_id;
4899 map->btf_value_type_id = info.btf_value_type_id;
4900 map->reused = true;
4901 map->map_extra = info.map_extra;
4902
4903 return 0;
4904
4905 err_free_new_name:
4906 free(new_name);
4907 return libbpf_err(err);
4908 }
4909
bpf_map__max_entries(const struct bpf_map * map)4910 __u32 bpf_map__max_entries(const struct bpf_map *map)
4911 {
4912 return map->def.max_entries;
4913 }
4914
bpf_map__inner_map(struct bpf_map * map)4915 struct bpf_map *bpf_map__inner_map(struct bpf_map *map)
4916 {
4917 if (!bpf_map_type__is_map_in_map(map->def.type))
4918 return errno = EINVAL, NULL;
4919
4920 return map->inner_map;
4921 }
4922
bpf_map__set_max_entries(struct bpf_map * map,__u32 max_entries)4923 int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
4924 {
4925 if (map->obj->loaded)
4926 return libbpf_err(-EBUSY);
4927
4928 map->def.max_entries = max_entries;
4929
4930 /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
4931 if (map_is_ringbuf(map))
4932 map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
4933
4934 return 0;
4935 }
4936
bpf_object_prepare_token(struct bpf_object * obj)4937 static int bpf_object_prepare_token(struct bpf_object *obj)
4938 {
4939 const char *bpffs_path;
4940 int bpffs_fd = -1, token_fd, err;
4941 bool mandatory;
4942 enum libbpf_print_level level;
4943
4944 /* token is explicitly prevented */
4945 if (obj->token_path && obj->token_path[0] == '\0') {
4946 pr_debug("object '%s': token is prevented, skipping...\n", obj->name);
4947 return 0;
4948 }
4949
4950 mandatory = obj->token_path != NULL;
4951 level = mandatory ? LIBBPF_WARN : LIBBPF_DEBUG;
4952
4953 bpffs_path = obj->token_path ?: BPF_FS_DEFAULT_PATH;
4954 bpffs_fd = open(bpffs_path, O_DIRECTORY, O_RDWR);
4955 if (bpffs_fd < 0) {
4956 err = -errno;
4957 __pr(level, "object '%s': failed (%d) to open BPF FS mount at '%s'%s\n",
4958 obj->name, err, bpffs_path,
4959 mandatory ? "" : ", skipping optional step...");
4960 return mandatory ? err : 0;
4961 }
4962
4963 token_fd = bpf_token_create(bpffs_fd, 0);
4964 close(bpffs_fd);
4965 if (token_fd < 0) {
4966 if (!mandatory && token_fd == -ENOENT) {
4967 pr_debug("object '%s': BPF FS at '%s' doesn't have BPF token delegation set up, skipping...\n",
4968 obj->name, bpffs_path);
4969 return 0;
4970 }
4971 __pr(level, "object '%s': failed (%d) to create BPF token from '%s'%s\n",
4972 obj->name, token_fd, bpffs_path,
4973 mandatory ? "" : ", skipping optional step...");
4974 return mandatory ? token_fd : 0;
4975 }
4976
4977 obj->feat_cache = calloc(1, sizeof(*obj->feat_cache));
4978 if (!obj->feat_cache) {
4979 close(token_fd);
4980 return -ENOMEM;
4981 }
4982
4983 obj->token_fd = token_fd;
4984 obj->feat_cache->token_fd = token_fd;
4985
4986 return 0;
4987 }
4988
4989 static int
bpf_object__probe_loading(struct bpf_object * obj)4990 bpf_object__probe_loading(struct bpf_object *obj)
4991 {
4992 char *cp, errmsg[STRERR_BUFSIZE];
4993 struct bpf_insn insns[] = {
4994 BPF_MOV64_IMM(BPF_REG_0, 0),
4995 BPF_EXIT_INSN(),
4996 };
4997 int ret, insn_cnt = ARRAY_SIZE(insns);
4998 LIBBPF_OPTS(bpf_prog_load_opts, opts,
4999 .token_fd = obj->token_fd,
5000 .prog_flags = obj->token_fd ? BPF_F_TOKEN_FD : 0,
5001 );
5002
5003 if (obj->gen_loader)
5004 return 0;
5005
5006 ret = bump_rlimit_memlock();
5007 if (ret)
5008 pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %d), you might need to do it explicitly!\n", ret);
5009
5010 /* make sure basic loading works */
5011 ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &opts);
5012 if (ret < 0)
5013 ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts);
5014 if (ret < 0) {
5015 ret = errno;
5016 cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
5017 pr_warn("Error in %s():%s(%d). Couldn't load trivial BPF "
5018 "program. Make sure your kernel supports BPF "
5019 "(CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is "
5020 "set to big enough value.\n", __func__, cp, ret);
5021 return -ret;
5022 }
5023 close(ret);
5024
5025 return 0;
5026 }
5027
kernel_supports(const struct bpf_object * obj,enum kern_feature_id feat_id)5028 bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
5029 {
5030 if (obj->gen_loader)
5031 /* To generate loader program assume the latest kernel
5032 * to avoid doing extra prog_load, map_create syscalls.
5033 */
5034 return true;
5035
5036 if (obj->token_fd)
5037 return feat_supported(obj->feat_cache, feat_id);
5038
5039 return feat_supported(NULL, feat_id);
5040 }
5041
map_is_reuse_compat(const struct bpf_map * map,int map_fd)5042 static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
5043 {
5044 struct bpf_map_info map_info;
5045 char msg[STRERR_BUFSIZE];
5046 __u32 map_info_len = sizeof(map_info);
5047 int err;
5048
5049 memset(&map_info, 0, map_info_len);
5050 err = bpf_map_get_info_by_fd(map_fd, &map_info, &map_info_len);
5051 if (err && errno == EINVAL)
5052 err = bpf_get_map_info_from_fdinfo(map_fd, &map_info);
5053 if (err) {
5054 pr_warn("failed to get map info for map FD %d: %s\n", map_fd,
5055 libbpf_strerror_r(errno, msg, sizeof(msg)));
5056 return false;
5057 }
5058
5059 return (map_info.type == map->def.type &&
5060 map_info.key_size == map->def.key_size &&
5061 map_info.value_size == map->def.value_size &&
5062 map_info.max_entries == map->def.max_entries &&
5063 map_info.map_flags == map->def.map_flags &&
5064 map_info.map_extra == map->map_extra);
5065 }
5066
5067 static int
bpf_object__reuse_map(struct bpf_map * map)5068 bpf_object__reuse_map(struct bpf_map *map)
5069 {
5070 char *cp, errmsg[STRERR_BUFSIZE];
5071 int err, pin_fd;
5072
5073 pin_fd = bpf_obj_get(map->pin_path);
5074 if (pin_fd < 0) {
5075 err = -errno;
5076 if (err == -ENOENT) {
5077 pr_debug("found no pinned map to reuse at '%s'\n",
5078 map->pin_path);
5079 return 0;
5080 }
5081
5082 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
5083 pr_warn("couldn't retrieve pinned map '%s': %s\n",
5084 map->pin_path, cp);
5085 return err;
5086 }
5087
5088 if (!map_is_reuse_compat(map, pin_fd)) {
5089 pr_warn("couldn't reuse pinned map at '%s': parameter mismatch\n",
5090 map->pin_path);
5091 close(pin_fd);
5092 return -EINVAL;
5093 }
5094
5095 err = bpf_map__reuse_fd(map, pin_fd);
5096 close(pin_fd);
5097 if (err)
5098 return err;
5099
5100 map->pinned = true;
5101 pr_debug("reused pinned map at '%s'\n", map->pin_path);
5102
5103 return 0;
5104 }
5105
5106 static int
bpf_object__populate_internal_map(struct bpf_object * obj,struct bpf_map * map)5107 bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
5108 {
5109 enum libbpf_map_type map_type = map->libbpf_type;
5110 char *cp, errmsg[STRERR_BUFSIZE];
5111 int err, zero = 0;
5112 size_t mmap_sz;
5113
5114 if (obj->gen_loader) {
5115 bpf_gen__map_update_elem(obj->gen_loader, map - obj->maps,
5116 map->mmaped, map->def.value_size);
5117 if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG)
5118 bpf_gen__map_freeze(obj->gen_loader, map - obj->maps);
5119 return 0;
5120 }
5121
5122 err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0);
5123 if (err) {
5124 err = -errno;
5125 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
5126 pr_warn("map '%s': failed to set initial contents: %s\n",
5127 bpf_map__name(map), cp);
5128 return err;
5129 }
5130
5131 /* Freeze .rodata and .kconfig map as read-only from syscall side. */
5132 if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) {
5133 err = bpf_map_freeze(map->fd);
5134 if (err) {
5135 err = -errno;
5136 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
5137 pr_warn("map '%s': failed to freeze as read-only: %s\n",
5138 bpf_map__name(map), cp);
5139 return err;
5140 }
5141 }
5142
5143 /* Remap anonymous mmap()-ed "map initialization image" as
5144 * a BPF map-backed mmap()-ed memory, but preserving the same
5145 * memory address. This will cause kernel to change process'
5146 * page table to point to a different piece of kernel memory,
5147 * but from userspace point of view memory address (and its
5148 * contents, being identical at this point) will stay the
5149 * same. This mapping will be released by bpf_object__close()
5150 * as per normal clean up procedure.
5151 */
5152 mmap_sz = bpf_map_mmap_sz(map);
5153 if (map->def.map_flags & BPF_F_MMAPABLE) {
5154 void *mmaped;
5155 int prot;
5156
5157 if (map->def.map_flags & BPF_F_RDONLY_PROG)
5158 prot = PROT_READ;
5159 else
5160 prot = PROT_READ | PROT_WRITE;
5161 mmaped = mmap(map->mmaped, mmap_sz, prot, MAP_SHARED | MAP_FIXED, map->fd, 0);
5162 if (mmaped == MAP_FAILED) {
5163 err = -errno;
5164 pr_warn("map '%s': failed to re-mmap() contents: %d\n",
5165 bpf_map__name(map), err);
5166 return err;
5167 }
5168 map->mmaped = mmaped;
5169 } else if (map->mmaped) {
5170 munmap(map->mmaped, mmap_sz);
5171 map->mmaped = NULL;
5172 }
5173
5174 return 0;
5175 }
5176
5177 static void bpf_map__destroy(struct bpf_map *map);
5178
map_is_created(const struct bpf_map * map)5179 static bool map_is_created(const struct bpf_map *map)
5180 {
5181 return map->obj->loaded || map->reused;
5182 }
5183
bpf_object__create_map(struct bpf_object * obj,struct bpf_map * map,bool is_inner)5184 static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner)
5185 {
5186 LIBBPF_OPTS(bpf_map_create_opts, create_attr);
5187 struct bpf_map_def *def = &map->def;
5188 const char *map_name = NULL;
5189 int err = 0, map_fd;
5190
5191 if (kernel_supports(obj, FEAT_PROG_NAME))
5192 map_name = map->name;
5193 create_attr.map_ifindex = map->map_ifindex;
5194 create_attr.map_flags = def->map_flags;
5195 create_attr.numa_node = map->numa_node;
5196 create_attr.map_extra = map->map_extra;
5197 create_attr.token_fd = obj->token_fd;
5198 if (obj->token_fd)
5199 create_attr.map_flags |= BPF_F_TOKEN_FD;
5200
5201 if (bpf_map__is_struct_ops(map)) {
5202 create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
5203 if (map->mod_btf_fd >= 0) {
5204 create_attr.value_type_btf_obj_fd = map->mod_btf_fd;
5205 create_attr.map_flags |= BPF_F_VTYPE_BTF_OBJ_FD;
5206 }
5207 }
5208
5209 if (obj->btf && btf__fd(obj->btf) >= 0) {
5210 create_attr.btf_fd = btf__fd(obj->btf);
5211 create_attr.btf_key_type_id = map->btf_key_type_id;
5212 create_attr.btf_value_type_id = map->btf_value_type_id;
5213 }
5214
5215 if (bpf_map_type__is_map_in_map(def->type)) {
5216 if (map->inner_map) {
5217 err = map_set_def_max_entries(map->inner_map);
5218 if (err)
5219 return err;
5220 err = bpf_object__create_map(obj, map->inner_map, true);
5221 if (err) {
5222 pr_warn("map '%s': failed to create inner map: %d\n",
5223 map->name, err);
5224 return err;
5225 }
5226 map->inner_map_fd = map->inner_map->fd;
5227 }
5228 if (map->inner_map_fd >= 0)
5229 create_attr.inner_map_fd = map->inner_map_fd;
5230 }
5231
5232 switch (def->type) {
5233 case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
5234 case BPF_MAP_TYPE_CGROUP_ARRAY:
5235 case BPF_MAP_TYPE_STACK_TRACE:
5236 case BPF_MAP_TYPE_ARRAY_OF_MAPS:
5237 case BPF_MAP_TYPE_HASH_OF_MAPS:
5238 case BPF_MAP_TYPE_DEVMAP:
5239 case BPF_MAP_TYPE_DEVMAP_HASH:
5240 case BPF_MAP_TYPE_CPUMAP:
5241 case BPF_MAP_TYPE_XSKMAP:
5242 case BPF_MAP_TYPE_SOCKMAP:
5243 case BPF_MAP_TYPE_SOCKHASH:
5244 case BPF_MAP_TYPE_QUEUE:
5245 case BPF_MAP_TYPE_STACK:
5246 case BPF_MAP_TYPE_ARENA:
5247 create_attr.btf_fd = 0;
5248 create_attr.btf_key_type_id = 0;
5249 create_attr.btf_value_type_id = 0;
5250 map->btf_key_type_id = 0;
5251 map->btf_value_type_id = 0;
5252 break;
5253 case BPF_MAP_TYPE_STRUCT_OPS:
5254 create_attr.btf_value_type_id = 0;
5255 break;
5256 default:
5257 break;
5258 }
5259
5260 if (obj->gen_loader) {
5261 bpf_gen__map_create(obj->gen_loader, def->type, map_name,
5262 def->key_size, def->value_size, def->max_entries,
5263 &create_attr, is_inner ? -1 : map - obj->maps);
5264 /* We keep pretenting we have valid FD to pass various fd >= 0
5265 * checks by just keeping original placeholder FDs in place.
5266 * See bpf_object__add_map() comment.
5267 * This placeholder fd will not be used with any syscall and
5268 * will be reset to -1 eventually.
5269 */
5270 map_fd = map->fd;
5271 } else {
5272 map_fd = bpf_map_create(def->type, map_name,
5273 def->key_size, def->value_size,
5274 def->max_entries, &create_attr);
5275 }
5276 if (map_fd < 0 && (create_attr.btf_key_type_id || create_attr.btf_value_type_id)) {
5277 char *cp, errmsg[STRERR_BUFSIZE];
5278
5279 err = -errno;
5280 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
5281 pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
5282 map->name, cp, err);
5283 create_attr.btf_fd = 0;
5284 create_attr.btf_key_type_id = 0;
5285 create_attr.btf_value_type_id = 0;
5286 map->btf_key_type_id = 0;
5287 map->btf_value_type_id = 0;
5288 map_fd = bpf_map_create(def->type, map_name,
5289 def->key_size, def->value_size,
5290 def->max_entries, &create_attr);
5291 }
5292
5293 if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) {
5294 if (obj->gen_loader)
5295 map->inner_map->fd = -1;
5296 bpf_map__destroy(map->inner_map);
5297 zfree(&map->inner_map);
5298 }
5299
5300 if (map_fd < 0)
5301 return map_fd;
5302
5303 /* obj->gen_loader case, prevent reuse_fd() from closing map_fd */
5304 if (map->fd == map_fd)
5305 return 0;
5306
5307 /* Keep placeholder FD value but now point it to the BPF map object.
5308 * This way everything that relied on this map's FD (e.g., relocated
5309 * ldimm64 instructions) will stay valid and won't need adjustments.
5310 * map->fd stays valid but now point to what map_fd points to.
5311 */
5312 return reuse_fd(map->fd, map_fd);
5313 }
5314
init_map_in_map_slots(struct bpf_object * obj,struct bpf_map * map)5315 static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map)
5316 {
5317 const struct bpf_map *targ_map;
5318 unsigned int i;
5319 int fd, err = 0;
5320
5321 for (i = 0; i < map->init_slots_sz; i++) {
5322 if (!map->init_slots[i])
5323 continue;
5324
5325 targ_map = map->init_slots[i];
5326 fd = targ_map->fd;
5327
5328 if (obj->gen_loader) {
5329 bpf_gen__populate_outer_map(obj->gen_loader,
5330 map - obj->maps, i,
5331 targ_map - obj->maps);
5332 } else {
5333 err = bpf_map_update_elem(map->fd, &i, &fd, 0);
5334 }
5335 if (err) {
5336 err = -errno;
5337 pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n",
5338 map->name, i, targ_map->name, fd, err);
5339 return err;
5340 }
5341 pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n",
5342 map->name, i, targ_map->name, fd);
5343 }
5344
5345 zfree(&map->init_slots);
5346 map->init_slots_sz = 0;
5347
5348 return 0;
5349 }
5350
init_prog_array_slots(struct bpf_object * obj,struct bpf_map * map)5351 static int init_prog_array_slots(struct bpf_object *obj, struct bpf_map *map)
5352 {
5353 const struct bpf_program *targ_prog;
5354 unsigned int i;
5355 int fd, err;
5356
5357 if (obj->gen_loader)
5358 return -ENOTSUP;
5359
5360 for (i = 0; i < map->init_slots_sz; i++) {
5361 if (!map->init_slots[i])
5362 continue;
5363
5364 targ_prog = map->init_slots[i];
5365 fd = bpf_program__fd(targ_prog);
5366
5367 err = bpf_map_update_elem(map->fd, &i, &fd, 0);
5368 if (err) {
5369 err = -errno;
5370 pr_warn("map '%s': failed to initialize slot [%d] to prog '%s' fd=%d: %d\n",
5371 map->name, i, targ_prog->name, fd, err);
5372 return err;
5373 }
5374 pr_debug("map '%s': slot [%d] set to prog '%s' fd=%d\n",
5375 map->name, i, targ_prog->name, fd);
5376 }
5377
5378 zfree(&map->init_slots);
5379 map->init_slots_sz = 0;
5380
5381 return 0;
5382 }
5383
bpf_object_init_prog_arrays(struct bpf_object * obj)5384 static int bpf_object_init_prog_arrays(struct bpf_object *obj)
5385 {
5386 struct bpf_map *map;
5387 int i, err;
5388
5389 for (i = 0; i < obj->nr_maps; i++) {
5390 map = &obj->maps[i];
5391
5392 if (!map->init_slots_sz || map->def.type != BPF_MAP_TYPE_PROG_ARRAY)
5393 continue;
5394
5395 err = init_prog_array_slots(obj, map);
5396 if (err < 0)
5397 return err;
5398 }
5399 return 0;
5400 }
5401
map_set_def_max_entries(struct bpf_map * map)5402 static int map_set_def_max_entries(struct bpf_map *map)
5403 {
5404 if (map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !map->def.max_entries) {
5405 int nr_cpus;
5406
5407 nr_cpus = libbpf_num_possible_cpus();
5408 if (nr_cpus < 0) {
5409 pr_warn("map '%s': failed to determine number of system CPUs: %d\n",
5410 map->name, nr_cpus);
5411 return nr_cpus;
5412 }
5413 pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus);
5414 map->def.max_entries = nr_cpus;
5415 }
5416
5417 return 0;
5418 }
5419
5420 static int
bpf_object__create_maps(struct bpf_object * obj)5421 bpf_object__create_maps(struct bpf_object *obj)
5422 {
5423 struct bpf_map *map;
5424 char *cp, errmsg[STRERR_BUFSIZE];
5425 unsigned int i, j;
5426 int err;
5427 bool retried;
5428
5429 for (i = 0; i < obj->nr_maps; i++) {
5430 map = &obj->maps[i];
5431
5432 /* To support old kernels, we skip creating global data maps
5433 * (.rodata, .data, .kconfig, etc); later on, during program
5434 * loading, if we detect that at least one of the to-be-loaded
5435 * programs is referencing any global data map, we'll error
5436 * out with program name and relocation index logged.
5437 * This approach allows to accommodate Clang emitting
5438 * unnecessary .rodata.str1.1 sections for string literals,
5439 * but also it allows to have CO-RE applications that use
5440 * global variables in some of BPF programs, but not others.
5441 * If those global variable-using programs are not loaded at
5442 * runtime due to bpf_program__set_autoload(prog, false),
5443 * bpf_object loading will succeed just fine even on old
5444 * kernels.
5445 */
5446 if (bpf_map__is_internal(map) && !kernel_supports(obj, FEAT_GLOBAL_DATA))
5447 map->autocreate = false;
5448
5449 if (!map->autocreate) {
5450 pr_debug("map '%s': skipped auto-creating...\n", map->name);
5451 continue;
5452 }
5453
5454 err = map_set_def_max_entries(map);
5455 if (err)
5456 goto err_out;
5457
5458 retried = false;
5459 retry:
5460 if (map->pin_path) {
5461 err = bpf_object__reuse_map(map);
5462 if (err) {
5463 pr_warn("map '%s': error reusing pinned map\n",
5464 map->name);
5465 goto err_out;
5466 }
5467 if (retried && map->fd < 0) {
5468 pr_warn("map '%s': cannot find pinned map\n",
5469 map->name);
5470 err = -ENOENT;
5471 goto err_out;
5472 }
5473 }
5474
5475 if (map->reused) {
5476 pr_debug("map '%s': skipping creation (preset fd=%d)\n",
5477 map->name, map->fd);
5478 } else {
5479 err = bpf_object__create_map(obj, map, false);
5480 if (err)
5481 goto err_out;
5482
5483 pr_debug("map '%s': created successfully, fd=%d\n",
5484 map->name, map->fd);
5485
5486 if (bpf_map__is_internal(map)) {
5487 err = bpf_object__populate_internal_map(obj, map);
5488 if (err < 0)
5489 goto err_out;
5490 } else if (map->def.type == BPF_MAP_TYPE_ARENA) {
5491 map->mmaped = mmap((void *)(long)map->map_extra,
5492 bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE,
5493 map->map_extra ? MAP_SHARED | MAP_FIXED : MAP_SHARED,
5494 map->fd, 0);
5495 if (map->mmaped == MAP_FAILED) {
5496 err = -errno;
5497 map->mmaped = NULL;
5498 pr_warn("map '%s': failed to mmap arena: %d\n",
5499 map->name, err);
5500 return err;
5501 }
5502 if (obj->arena_data) {
5503 memcpy(map->mmaped, obj->arena_data, obj->arena_data_sz);
5504 zfree(&obj->arena_data);
5505 }
5506 }
5507 if (map->init_slots_sz && map->def.type != BPF_MAP_TYPE_PROG_ARRAY) {
5508 err = init_map_in_map_slots(obj, map);
5509 if (err < 0)
5510 goto err_out;
5511 }
5512 }
5513
5514 if (map->pin_path && !map->pinned) {
5515 err = bpf_map__pin(map, NULL);
5516 if (err) {
5517 if (!retried && err == -EEXIST) {
5518 retried = true;
5519 goto retry;
5520 }
5521 pr_warn("map '%s': failed to auto-pin at '%s': %d\n",
5522 map->name, map->pin_path, err);
5523 goto err_out;
5524 }
5525 }
5526 }
5527
5528 return 0;
5529
5530 err_out:
5531 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
5532 pr_warn("map '%s': failed to create: %s(%d)\n", map->name, cp, err);
5533 pr_perm_msg(err);
5534 for (j = 0; j < i; j++)
5535 zclose(obj->maps[j].fd);
5536 return err;
5537 }
5538
bpf_core_is_flavor_sep(const char * s)5539 static bool bpf_core_is_flavor_sep(const char *s)
5540 {
5541 /* check X___Y name pattern, where X and Y are not underscores */
5542 return s[0] != '_' && /* X */
5543 s[1] == '_' && s[2] == '_' && s[3] == '_' && /* ___ */
5544 s[4] != '_'; /* Y */
5545 }
5546
5547 /* Given 'some_struct_name___with_flavor' return the length of a name prefix
5548 * before last triple underscore. Struct name part after last triple
5549 * underscore is ignored by BPF CO-RE relocation during relocation matching.
5550 */
bpf_core_essential_name_len(const char * name)5551 size_t bpf_core_essential_name_len(const char *name)
5552 {
5553 size_t n = strlen(name);
5554 int i;
5555
5556 for (i = n - 5; i >= 0; i--) {
5557 if (bpf_core_is_flavor_sep(name + i))
5558 return i + 1;
5559 }
5560 return n;
5561 }
5562
bpf_core_free_cands(struct bpf_core_cand_list * cands)5563 void bpf_core_free_cands(struct bpf_core_cand_list *cands)
5564 {
5565 if (!cands)
5566 return;
5567
5568 free(cands->cands);
5569 free(cands);
5570 }
5571
bpf_core_add_cands(struct bpf_core_cand * local_cand,size_t local_essent_len,const struct btf * targ_btf,const char * targ_btf_name,int targ_start_id,struct bpf_core_cand_list * cands)5572 int bpf_core_add_cands(struct bpf_core_cand *local_cand,
5573 size_t local_essent_len,
5574 const struct btf *targ_btf,
5575 const char *targ_btf_name,
5576 int targ_start_id,
5577 struct bpf_core_cand_list *cands)
5578 {
5579 struct bpf_core_cand *new_cands, *cand;
5580 const struct btf_type *t, *local_t;
5581 const char *targ_name, *local_name;
5582 size_t targ_essent_len;
5583 int n, i;
5584
5585 local_t = btf__type_by_id(local_cand->btf, local_cand->id);
5586 local_name = btf__str_by_offset(local_cand->btf, local_t->name_off);
5587
5588 n = btf__type_cnt(targ_btf);
5589 for (i = targ_start_id; i < n; i++) {
5590 t = btf__type_by_id(targ_btf, i);
5591 if (!btf_kind_core_compat(t, local_t))
5592 continue;
5593
5594 targ_name = btf__name_by_offset(targ_btf, t->name_off);
5595 if (str_is_empty(targ_name))
5596 continue;
5597
5598 targ_essent_len = bpf_core_essential_name_len(targ_name);
5599 if (targ_essent_len != local_essent_len)
5600 continue;
5601
5602 if (strncmp(local_name, targ_name, local_essent_len) != 0)
5603 continue;
5604
5605 pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s in [%s]\n",
5606 local_cand->id, btf_kind_str(local_t),
5607 local_name, i, btf_kind_str(t), targ_name,
5608 targ_btf_name);
5609 new_cands = libbpf_reallocarray(cands->cands, cands->len + 1,
5610 sizeof(*cands->cands));
5611 if (!new_cands)
5612 return -ENOMEM;
5613
5614 cand = &new_cands[cands->len];
5615 cand->btf = targ_btf;
5616 cand->id = i;
5617
5618 cands->cands = new_cands;
5619 cands->len++;
5620 }
5621 return 0;
5622 }
5623
load_module_btfs(struct bpf_object * obj)5624 static int load_module_btfs(struct bpf_object *obj)
5625 {
5626 struct bpf_btf_info info;
5627 struct module_btf *mod_btf;
5628 struct btf *btf;
5629 char name[64];
5630 __u32 id = 0, len;
5631 int err, fd;
5632
5633 if (obj->btf_modules_loaded)
5634 return 0;
5635
5636 if (obj->gen_loader)
5637 return 0;
5638
5639 /* don't do this again, even if we find no module BTFs */
5640 obj->btf_modules_loaded = true;
5641
5642 /* kernel too old to support module BTFs */
5643 if (!kernel_supports(obj, FEAT_MODULE_BTF))
5644 return 0;
5645
5646 while (true) {
5647 err = bpf_btf_get_next_id(id, &id);
5648 if (err && errno == ENOENT)
5649 return 0;
5650 if (err && errno == EPERM) {
5651 pr_debug("skipping module BTFs loading, missing privileges\n");
5652 return 0;
5653 }
5654 if (err) {
5655 err = -errno;
5656 pr_warn("failed to iterate BTF objects: %d\n", err);
5657 return err;
5658 }
5659
5660 fd = bpf_btf_get_fd_by_id(id);
5661 if (fd < 0) {
5662 if (errno == ENOENT)
5663 continue; /* expected race: BTF was unloaded */
5664 err = -errno;
5665 pr_warn("failed to get BTF object #%d FD: %d\n", id, err);
5666 return err;
5667 }
5668
5669 len = sizeof(info);
5670 memset(&info, 0, sizeof(info));
5671 info.name = ptr_to_u64(name);
5672 info.name_len = sizeof(name);
5673
5674 err = bpf_btf_get_info_by_fd(fd, &info, &len);
5675 if (err) {
5676 err = -errno;
5677 pr_warn("failed to get BTF object #%d info: %d\n", id, err);
5678 goto err_out;
5679 }
5680
5681 /* ignore non-module BTFs */
5682 if (!info.kernel_btf || strcmp(name, "vmlinux") == 0) {
5683 close(fd);
5684 continue;
5685 }
5686
5687 btf = btf_get_from_fd(fd, obj->btf_vmlinux);
5688 err = libbpf_get_error(btf);
5689 if (err) {
5690 pr_warn("failed to load module [%s]'s BTF object #%d: %d\n",
5691 name, id, err);
5692 goto err_out;
5693 }
5694
5695 err = libbpf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap,
5696 sizeof(*obj->btf_modules), obj->btf_module_cnt + 1);
5697 if (err)
5698 goto err_out;
5699
5700 mod_btf = &obj->btf_modules[obj->btf_module_cnt++];
5701
5702 mod_btf->btf = btf;
5703 mod_btf->id = id;
5704 mod_btf->fd = fd;
5705 mod_btf->name = strdup(name);
5706 if (!mod_btf->name) {
5707 err = -ENOMEM;
5708 goto err_out;
5709 }
5710 continue;
5711
5712 err_out:
5713 close(fd);
5714 return err;
5715 }
5716
5717 return 0;
5718 }
5719
5720 static struct bpf_core_cand_list *
bpf_core_find_cands(struct bpf_object * obj,const struct btf * local_btf,__u32 local_type_id)5721 bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 local_type_id)
5722 {
5723 struct bpf_core_cand local_cand = {};
5724 struct bpf_core_cand_list *cands;
5725 const struct btf *main_btf;
5726 const struct btf_type *local_t;
5727 const char *local_name;
5728 size_t local_essent_len;
5729 int err, i;
5730
5731 local_cand.btf = local_btf;
5732 local_cand.id = local_type_id;
5733 local_t = btf__type_by_id(local_btf, local_type_id);
5734 if (!local_t)
5735 return ERR_PTR(-EINVAL);
5736
5737 local_name = btf__name_by_offset(local_btf, local_t->name_off);
5738 if (str_is_empty(local_name))
5739 return ERR_PTR(-EINVAL);
5740 local_essent_len = bpf_core_essential_name_len(local_name);
5741
5742 cands = calloc(1, sizeof(*cands));
5743 if (!cands)
5744 return ERR_PTR(-ENOMEM);
5745
5746 /* Attempt to find target candidates in vmlinux BTF first */
5747 main_btf = obj->btf_vmlinux_override ?: obj->btf_vmlinux;
5748 err = bpf_core_add_cands(&local_cand, local_essent_len, main_btf, "vmlinux", 1, cands);
5749 if (err)
5750 goto err_out;
5751
5752 /* if vmlinux BTF has any candidate, don't got for module BTFs */
5753 if (cands->len)
5754 return cands;
5755
5756 /* if vmlinux BTF was overridden, don't attempt to load module BTFs */
5757 if (obj->btf_vmlinux_override)
5758 return cands;
5759
5760 /* now look through module BTFs, trying to still find candidates */
5761 err = load_module_btfs(obj);
5762 if (err)
5763 goto err_out;
5764
5765 for (i = 0; i < obj->btf_module_cnt; i++) {
5766 err = bpf_core_add_cands(&local_cand, local_essent_len,
5767 obj->btf_modules[i].btf,
5768 obj->btf_modules[i].name,
5769 btf__type_cnt(obj->btf_vmlinux),
5770 cands);
5771 if (err)
5772 goto err_out;
5773 }
5774
5775 return cands;
5776 err_out:
5777 bpf_core_free_cands(cands);
5778 return ERR_PTR(err);
5779 }
5780
5781 /* Check local and target types for compatibility. This check is used for
5782 * type-based CO-RE relocations and follow slightly different rules than
5783 * field-based relocations. This function assumes that root types were already
5784 * checked for name match. Beyond that initial root-level name check, names
5785 * are completely ignored. Compatibility rules are as follows:
5786 * - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but
5787 * kind should match for local and target types (i.e., STRUCT is not
5788 * compatible with UNION);
5789 * - for ENUMs, the size is ignored;
5790 * - for INT, size and signedness are ignored;
5791 * - for ARRAY, dimensionality is ignored, element types are checked for
5792 * compatibility recursively;
5793 * - CONST/VOLATILE/RESTRICT modifiers are ignored;
5794 * - TYPEDEFs/PTRs are compatible if types they pointing to are compatible;
5795 * - FUNC_PROTOs are compatible if they have compatible signature: same
5796 * number of input args and compatible return and argument types.
5797 * These rules are not set in stone and probably will be adjusted as we get
5798 * more experience with using BPF CO-RE relocations.
5799 */
bpf_core_types_are_compat(const struct btf * local_btf,__u32 local_id,const struct btf * targ_btf,__u32 targ_id)5800 int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
5801 const struct btf *targ_btf, __u32 targ_id)
5802 {
5803 return __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id, 32);
5804 }
5805
bpf_core_types_match(const struct btf * local_btf,__u32 local_id,const struct btf * targ_btf,__u32 targ_id)5806 int bpf_core_types_match(const struct btf *local_btf, __u32 local_id,
5807 const struct btf *targ_btf, __u32 targ_id)
5808 {
5809 return __bpf_core_types_match(local_btf, local_id, targ_btf, targ_id, false, 32);
5810 }
5811
bpf_core_hash_fn(const long key,void * ctx)5812 static size_t bpf_core_hash_fn(const long key, void *ctx)
5813 {
5814 return key;
5815 }
5816
bpf_core_equal_fn(const long k1,const long k2,void * ctx)5817 static bool bpf_core_equal_fn(const long k1, const long k2, void *ctx)
5818 {
5819 return k1 == k2;
5820 }
5821
record_relo_core(struct bpf_program * prog,const struct bpf_core_relo * core_relo,int insn_idx)5822 static int record_relo_core(struct bpf_program *prog,
5823 const struct bpf_core_relo *core_relo, int insn_idx)
5824 {
5825 struct reloc_desc *relos, *relo;
5826
5827 relos = libbpf_reallocarray(prog->reloc_desc,
5828 prog->nr_reloc + 1, sizeof(*relos));
5829 if (!relos)
5830 return -ENOMEM;
5831 relo = &relos[prog->nr_reloc];
5832 relo->type = RELO_CORE;
5833 relo->insn_idx = insn_idx;
5834 relo->core_relo = core_relo;
5835 prog->reloc_desc = relos;
5836 prog->nr_reloc++;
5837 return 0;
5838 }
5839
find_relo_core(struct bpf_program * prog,int insn_idx)5840 static const struct bpf_core_relo *find_relo_core(struct bpf_program *prog, int insn_idx)
5841 {
5842 struct reloc_desc *relo;
5843 int i;
5844
5845 for (i = 0; i < prog->nr_reloc; i++) {
5846 relo = &prog->reloc_desc[i];
5847 if (relo->type != RELO_CORE || relo->insn_idx != insn_idx)
5848 continue;
5849
5850 return relo->core_relo;
5851 }
5852
5853 return NULL;
5854 }
5855
bpf_core_resolve_relo(struct bpf_program * prog,const struct bpf_core_relo * relo,int relo_idx,const struct btf * local_btf,struct hashmap * cand_cache,struct bpf_core_relo_res * targ_res)5856 static int bpf_core_resolve_relo(struct bpf_program *prog,
5857 const struct bpf_core_relo *relo,
5858 int relo_idx,
5859 const struct btf *local_btf,
5860 struct hashmap *cand_cache,
5861 struct bpf_core_relo_res *targ_res)
5862 {
5863 struct bpf_core_spec specs_scratch[3] = {};
5864 struct bpf_core_cand_list *cands = NULL;
5865 const char *prog_name = prog->name;
5866 const struct btf_type *local_type;
5867 const char *local_name;
5868 __u32 local_id = relo->type_id;
5869 int err;
5870
5871 local_type = btf__type_by_id(local_btf, local_id);
5872 if (!local_type)
5873 return -EINVAL;
5874
5875 local_name = btf__name_by_offset(local_btf, local_type->name_off);
5876 if (!local_name)
5877 return -EINVAL;
5878
5879 if (relo->kind != BPF_CORE_TYPE_ID_LOCAL &&
5880 !hashmap__find(cand_cache, local_id, &cands)) {
5881 cands = bpf_core_find_cands(prog->obj, local_btf, local_id);
5882 if (IS_ERR(cands)) {
5883 pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n",
5884 prog_name, relo_idx, local_id, btf_kind_str(local_type),
5885 local_name, PTR_ERR(cands));
5886 return PTR_ERR(cands);
5887 }
5888 err = hashmap__set(cand_cache, local_id, cands, NULL, NULL);
5889 if (err) {
5890 bpf_core_free_cands(cands);
5891 return err;
5892 }
5893 }
5894
5895 return bpf_core_calc_relo_insn(prog_name, relo, relo_idx, local_btf, cands, specs_scratch,
5896 targ_res);
5897 }
5898
5899 static int
bpf_object__relocate_core(struct bpf_object * obj,const char * targ_btf_path)5900 bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
5901 {
5902 const struct btf_ext_info_sec *sec;
5903 struct bpf_core_relo_res targ_res;
5904 const struct bpf_core_relo *rec;
5905 const struct btf_ext_info *seg;
5906 struct hashmap_entry *entry;
5907 struct hashmap *cand_cache = NULL;
5908 struct bpf_program *prog;
5909 struct bpf_insn *insn;
5910 const char *sec_name;
5911 int i, err = 0, insn_idx, sec_idx, sec_num;
5912
5913 if (obj->btf_ext->core_relo_info.len == 0)
5914 return 0;
5915
5916 if (targ_btf_path) {
5917 obj->btf_vmlinux_override = btf__parse(targ_btf_path, NULL);
5918 err = libbpf_get_error(obj->btf_vmlinux_override);
5919 if (err) {
5920 pr_warn("failed to parse target BTF: %d\n", err);
5921 return err;
5922 }
5923 }
5924
5925 cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL);
5926 if (IS_ERR(cand_cache)) {
5927 err = PTR_ERR(cand_cache);
5928 goto out;
5929 }
5930
5931 seg = &obj->btf_ext->core_relo_info;
5932 sec_num = 0;
5933 for_each_btf_ext_sec(seg, sec) {
5934 sec_idx = seg->sec_idxs[sec_num];
5935 sec_num++;
5936
5937 sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
5938 if (str_is_empty(sec_name)) {
5939 err = -EINVAL;
5940 goto out;
5941 }
5942
5943 pr_debug("sec '%s': found %d CO-RE relocations\n", sec_name, sec->num_info);
5944
5945 for_each_btf_ext_rec(seg, sec, i, rec) {
5946 if (rec->insn_off % BPF_INSN_SZ)
5947 return -EINVAL;
5948 insn_idx = rec->insn_off / BPF_INSN_SZ;
5949 prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
5950 if (!prog) {
5951 /* When __weak subprog is "overridden" by another instance
5952 * of the subprog from a different object file, linker still
5953 * appends all the .BTF.ext info that used to belong to that
5954 * eliminated subprogram.
5955 * This is similar to what x86-64 linker does for relocations.
5956 * So just ignore such relocations just like we ignore
5957 * subprog instructions when discovering subprograms.
5958 */
5959 pr_debug("sec '%s': skipping CO-RE relocation #%d for insn #%d belonging to eliminated weak subprogram\n",
5960 sec_name, i, insn_idx);
5961 continue;
5962 }
5963 /* no need to apply CO-RE relocation if the program is
5964 * not going to be loaded
5965 */
5966 if (!prog->autoload)
5967 continue;
5968
5969 /* adjust insn_idx from section frame of reference to the local
5970 * program's frame of reference; (sub-)program code is not yet
5971 * relocated, so it's enough to just subtract in-section offset
5972 */
5973 insn_idx = insn_idx - prog->sec_insn_off;
5974 if (insn_idx >= prog->insns_cnt)
5975 return -EINVAL;
5976 insn = &prog->insns[insn_idx];
5977
5978 err = record_relo_core(prog, rec, insn_idx);
5979 if (err) {
5980 pr_warn("prog '%s': relo #%d: failed to record relocation: %d\n",
5981 prog->name, i, err);
5982 goto out;
5983 }
5984
5985 if (prog->obj->gen_loader)
5986 continue;
5987
5988 err = bpf_core_resolve_relo(prog, rec, i, obj->btf, cand_cache, &targ_res);
5989 if (err) {
5990 pr_warn("prog '%s': relo #%d: failed to relocate: %d\n",
5991 prog->name, i, err);
5992 goto out;
5993 }
5994
5995 err = bpf_core_patch_insn(prog->name, insn, insn_idx, rec, i, &targ_res);
5996 if (err) {
5997 pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %d\n",
5998 prog->name, i, insn_idx, err);
5999 goto out;
6000 }
6001 }
6002 }
6003
6004 out:
6005 /* obj->btf_vmlinux and module BTFs are freed after object load */
6006 btf__free(obj->btf_vmlinux_override);
6007 obj->btf_vmlinux_override = NULL;
6008
6009 if (!IS_ERR_OR_NULL(cand_cache)) {
6010 hashmap__for_each_entry(cand_cache, entry, i) {
6011 bpf_core_free_cands(entry->pvalue);
6012 }
6013 hashmap__free(cand_cache);
6014 }
6015 return err;
6016 }
6017
6018 /* base map load ldimm64 special constant, used also for log fixup logic */
6019 #define POISON_LDIMM64_MAP_BASE 2001000000
6020 #define POISON_LDIMM64_MAP_PFX "200100"
6021
poison_map_ldimm64(struct bpf_program * prog,int relo_idx,int insn_idx,struct bpf_insn * insn,int map_idx,const struct bpf_map * map)6022 static void poison_map_ldimm64(struct bpf_program *prog, int relo_idx,
6023 int insn_idx, struct bpf_insn *insn,
6024 int map_idx, const struct bpf_map *map)
6025 {
6026 int i;
6027
6028 pr_debug("prog '%s': relo #%d: poisoning insn #%d that loads map #%d '%s'\n",
6029 prog->name, relo_idx, insn_idx, map_idx, map->name);
6030
6031 /* we turn single ldimm64 into two identical invalid calls */
6032 for (i = 0; i < 2; i++) {
6033 insn->code = BPF_JMP | BPF_CALL;
6034 insn->dst_reg = 0;
6035 insn->src_reg = 0;
6036 insn->off = 0;
6037 /* if this instruction is reachable (not a dead code),
6038 * verifier will complain with something like:
6039 * invalid func unknown#2001000123
6040 * where lower 123 is map index into obj->maps[] array
6041 */
6042 insn->imm = POISON_LDIMM64_MAP_BASE + map_idx;
6043
6044 insn++;
6045 }
6046 }
6047
6048 /* unresolved kfunc call special constant, used also for log fixup logic */
6049 #define POISON_CALL_KFUNC_BASE 2002000000
6050 #define POISON_CALL_KFUNC_PFX "2002"
6051
poison_kfunc_call(struct bpf_program * prog,int relo_idx,int insn_idx,struct bpf_insn * insn,int ext_idx,const struct extern_desc * ext)6052 static void poison_kfunc_call(struct bpf_program *prog, int relo_idx,
6053 int insn_idx, struct bpf_insn *insn,
6054 int ext_idx, const struct extern_desc *ext)
6055 {
6056 pr_debug("prog '%s': relo #%d: poisoning insn #%d that calls kfunc '%s'\n",
6057 prog->name, relo_idx, insn_idx, ext->name);
6058
6059 /* we turn kfunc call into invalid helper call with identifiable constant */
6060 insn->code = BPF_JMP | BPF_CALL;
6061 insn->dst_reg = 0;
6062 insn->src_reg = 0;
6063 insn->off = 0;
6064 /* if this instruction is reachable (not a dead code),
6065 * verifier will complain with something like:
6066 * invalid func unknown#2001000123
6067 * where lower 123 is extern index into obj->externs[] array
6068 */
6069 insn->imm = POISON_CALL_KFUNC_BASE + ext_idx;
6070 }
6071
6072 /* Relocate data references within program code:
6073 * - map references;
6074 * - global variable references;
6075 * - extern references.
6076 */
6077 static int
bpf_object__relocate_data(struct bpf_object * obj,struct bpf_program * prog)6078 bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
6079 {
6080 int i;
6081
6082 for (i = 0; i < prog->nr_reloc; i++) {
6083 struct reloc_desc *relo = &prog->reloc_desc[i];
6084 struct bpf_insn *insn = &prog->insns[relo->insn_idx];
6085 const struct bpf_map *map;
6086 struct extern_desc *ext;
6087
6088 switch (relo->type) {
6089 case RELO_LD64:
6090 map = &obj->maps[relo->map_idx];
6091 if (obj->gen_loader) {
6092 insn[0].src_reg = BPF_PSEUDO_MAP_IDX;
6093 insn[0].imm = relo->map_idx;
6094 } else if (map->autocreate) {
6095 insn[0].src_reg = BPF_PSEUDO_MAP_FD;
6096 insn[0].imm = map->fd;
6097 } else {
6098 poison_map_ldimm64(prog, i, relo->insn_idx, insn,
6099 relo->map_idx, map);
6100 }
6101 break;
6102 case RELO_DATA:
6103 map = &obj->maps[relo->map_idx];
6104 insn[1].imm = insn[0].imm + relo->sym_off;
6105 if (obj->gen_loader) {
6106 insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
6107 insn[0].imm = relo->map_idx;
6108 } else if (map->autocreate) {
6109 insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
6110 insn[0].imm = map->fd;
6111 } else {
6112 poison_map_ldimm64(prog, i, relo->insn_idx, insn,
6113 relo->map_idx, map);
6114 }
6115 break;
6116 case RELO_EXTERN_LD64:
6117 ext = &obj->externs[relo->ext_idx];
6118 if (ext->type == EXT_KCFG) {
6119 if (obj->gen_loader) {
6120 insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
6121 insn[0].imm = obj->kconfig_map_idx;
6122 } else {
6123 insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
6124 insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
6125 }
6126 insn[1].imm = ext->kcfg.data_off;
6127 } else /* EXT_KSYM */ {
6128 if (ext->ksym.type_id && ext->is_set) { /* typed ksyms */
6129 insn[0].src_reg = BPF_PSEUDO_BTF_ID;
6130 insn[0].imm = ext->ksym.kernel_btf_id;
6131 insn[1].imm = ext->ksym.kernel_btf_obj_fd;
6132 } else { /* typeless ksyms or unresolved typed ksyms */
6133 insn[0].imm = (__u32)ext->ksym.addr;
6134 insn[1].imm = ext->ksym.addr >> 32;
6135 }
6136 }
6137 break;
6138 case RELO_EXTERN_CALL:
6139 ext = &obj->externs[relo->ext_idx];
6140 insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL;
6141 if (ext->is_set) {
6142 insn[0].imm = ext->ksym.kernel_btf_id;
6143 insn[0].off = ext->ksym.btf_fd_idx;
6144 } else { /* unresolved weak kfunc call */
6145 poison_kfunc_call(prog, i, relo->insn_idx, insn,
6146 relo->ext_idx, ext);
6147 }
6148 break;
6149 case RELO_SUBPROG_ADDR:
6150 if (insn[0].src_reg != BPF_PSEUDO_FUNC) {
6151 pr_warn("prog '%s': relo #%d: bad insn\n",
6152 prog->name, i);
6153 return -EINVAL;
6154 }
6155 /* handled already */
6156 break;
6157 case RELO_CALL:
6158 /* handled already */
6159 break;
6160 case RELO_CORE:
6161 /* will be handled by bpf_program_record_relos() */
6162 break;
6163 default:
6164 pr_warn("prog '%s': relo #%d: bad relo type %d\n",
6165 prog->name, i, relo->type);
6166 return -EINVAL;
6167 }
6168 }
6169
6170 return 0;
6171 }
6172
adjust_prog_btf_ext_info(const struct bpf_object * obj,const struct bpf_program * prog,const struct btf_ext_info * ext_info,void ** prog_info,__u32 * prog_rec_cnt,__u32 * prog_rec_sz)6173 static int adjust_prog_btf_ext_info(const struct bpf_object *obj,
6174 const struct bpf_program *prog,
6175 const struct btf_ext_info *ext_info,
6176 void **prog_info, __u32 *prog_rec_cnt,
6177 __u32 *prog_rec_sz)
6178 {
6179 void *copy_start = NULL, *copy_end = NULL;
6180 void *rec, *rec_end, *new_prog_info;
6181 const struct btf_ext_info_sec *sec;
6182 size_t old_sz, new_sz;
6183 int i, sec_num, sec_idx, off_adj;
6184
6185 sec_num = 0;
6186 for_each_btf_ext_sec(ext_info, sec) {
6187 sec_idx = ext_info->sec_idxs[sec_num];
6188 sec_num++;
6189 if (prog->sec_idx != sec_idx)
6190 continue;
6191
6192 for_each_btf_ext_rec(ext_info, sec, i, rec) {
6193 __u32 insn_off = *(__u32 *)rec / BPF_INSN_SZ;
6194
6195 if (insn_off < prog->sec_insn_off)
6196 continue;
6197 if (insn_off >= prog->sec_insn_off + prog->sec_insn_cnt)
6198 break;
6199
6200 if (!copy_start)
6201 copy_start = rec;
6202 copy_end = rec + ext_info->rec_size;
6203 }
6204
6205 if (!copy_start)
6206 return -ENOENT;
6207
6208 /* append func/line info of a given (sub-)program to the main
6209 * program func/line info
6210 */
6211 old_sz = (size_t)(*prog_rec_cnt) * ext_info->rec_size;
6212 new_sz = old_sz + (copy_end - copy_start);
6213 new_prog_info = realloc(*prog_info, new_sz);
6214 if (!new_prog_info)
6215 return -ENOMEM;
6216 *prog_info = new_prog_info;
6217 *prog_rec_cnt = new_sz / ext_info->rec_size;
6218 memcpy(new_prog_info + old_sz, copy_start, copy_end - copy_start);
6219
6220 /* Kernel instruction offsets are in units of 8-byte
6221 * instructions, while .BTF.ext instruction offsets generated
6222 * by Clang are in units of bytes. So convert Clang offsets
6223 * into kernel offsets and adjust offset according to program
6224 * relocated position.
6225 */
6226 off_adj = prog->sub_insn_off - prog->sec_insn_off;
6227 rec = new_prog_info + old_sz;
6228 rec_end = new_prog_info + new_sz;
6229 for (; rec < rec_end; rec += ext_info->rec_size) {
6230 __u32 *insn_off = rec;
6231
6232 *insn_off = *insn_off / BPF_INSN_SZ + off_adj;
6233 }
6234 *prog_rec_sz = ext_info->rec_size;
6235 return 0;
6236 }
6237
6238 return -ENOENT;
6239 }
6240
6241 static int
reloc_prog_func_and_line_info(const struct bpf_object * obj,struct bpf_program * main_prog,const struct bpf_program * prog)6242 reloc_prog_func_and_line_info(const struct bpf_object *obj,
6243 struct bpf_program *main_prog,
6244 const struct bpf_program *prog)
6245 {
6246 int err;
6247
6248 /* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't
6249 * support func/line info
6250 */
6251 if (!obj->btf_ext || !kernel_supports(obj, FEAT_BTF_FUNC))
6252 return 0;
6253
6254 /* only attempt func info relocation if main program's func_info
6255 * relocation was successful
6256 */
6257 if (main_prog != prog && !main_prog->func_info)
6258 goto line_info;
6259
6260 err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->func_info,
6261 &main_prog->func_info,
6262 &main_prog->func_info_cnt,
6263 &main_prog->func_info_rec_size);
6264 if (err) {
6265 if (err != -ENOENT) {
6266 pr_warn("prog '%s': error relocating .BTF.ext function info: %d\n",
6267 prog->name, err);
6268 return err;
6269 }
6270 if (main_prog->func_info) {
6271 /*
6272 * Some info has already been found but has problem
6273 * in the last btf_ext reloc. Must have to error out.
6274 */
6275 pr_warn("prog '%s': missing .BTF.ext function info.\n", prog->name);
6276 return err;
6277 }
6278 /* Have problem loading the very first info. Ignore the rest. */
6279 pr_warn("prog '%s': missing .BTF.ext function info for the main program, skipping all of .BTF.ext func info.\n",
6280 prog->name);
6281 }
6282
6283 line_info:
6284 /* don't relocate line info if main program's relocation failed */
6285 if (main_prog != prog && !main_prog->line_info)
6286 return 0;
6287
6288 err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->line_info,
6289 &main_prog->line_info,
6290 &main_prog->line_info_cnt,
6291 &main_prog->line_info_rec_size);
6292 if (err) {
6293 if (err != -ENOENT) {
6294 pr_warn("prog '%s': error relocating .BTF.ext line info: %d\n",
6295 prog->name, err);
6296 return err;
6297 }
6298 if (main_prog->line_info) {
6299 /*
6300 * Some info has already been found but has problem
6301 * in the last btf_ext reloc. Must have to error out.
6302 */
6303 pr_warn("prog '%s': missing .BTF.ext line info.\n", prog->name);
6304 return err;
6305 }
6306 /* Have problem loading the very first info. Ignore the rest. */
6307 pr_warn("prog '%s': missing .BTF.ext line info for the main program, skipping all of .BTF.ext line info.\n",
6308 prog->name);
6309 }
6310 return 0;
6311 }
6312
cmp_relo_by_insn_idx(const void * key,const void * elem)6313 static int cmp_relo_by_insn_idx(const void *key, const void *elem)
6314 {
6315 size_t insn_idx = *(const size_t *)key;
6316 const struct reloc_desc *relo = elem;
6317
6318 if (insn_idx == relo->insn_idx)
6319 return 0;
6320 return insn_idx < relo->insn_idx ? -1 : 1;
6321 }
6322
find_prog_insn_relo(const struct bpf_program * prog,size_t insn_idx)6323 static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, size_t insn_idx)
6324 {
6325 if (!prog->nr_reloc)
6326 return NULL;
6327 return bsearch(&insn_idx, prog->reloc_desc, prog->nr_reloc,
6328 sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx);
6329 }
6330
append_subprog_relos(struct bpf_program * main_prog,struct bpf_program * subprog)6331 static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_program *subprog)
6332 {
6333 int new_cnt = main_prog->nr_reloc + subprog->nr_reloc;
6334 struct reloc_desc *relos;
6335 int i;
6336
6337 if (main_prog == subprog)
6338 return 0;
6339 relos = libbpf_reallocarray(main_prog->reloc_desc, new_cnt, sizeof(*relos));
6340 /* if new count is zero, reallocarray can return a valid NULL result;
6341 * in this case the previous pointer will be freed, so we *have to*
6342 * reassign old pointer to the new value (even if it's NULL)
6343 */
6344 if (!relos && new_cnt)
6345 return -ENOMEM;
6346 if (subprog->nr_reloc)
6347 memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc,
6348 sizeof(*relos) * subprog->nr_reloc);
6349
6350 for (i = main_prog->nr_reloc; i < new_cnt; i++)
6351 relos[i].insn_idx += subprog->sub_insn_off;
6352 /* After insn_idx adjustment the 'relos' array is still sorted
6353 * by insn_idx and doesn't break bsearch.
6354 */
6355 main_prog->reloc_desc = relos;
6356 main_prog->nr_reloc = new_cnt;
6357 return 0;
6358 }
6359
6360 static int
bpf_object__append_subprog_code(struct bpf_object * obj,struct bpf_program * main_prog,struct bpf_program * subprog)6361 bpf_object__append_subprog_code(struct bpf_object *obj, struct bpf_program *main_prog,
6362 struct bpf_program *subprog)
6363 {
6364 struct bpf_insn *insns;
6365 size_t new_cnt;
6366 int err;
6367
6368 subprog->sub_insn_off = main_prog->insns_cnt;
6369
6370 new_cnt = main_prog->insns_cnt + subprog->insns_cnt;
6371 insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns));
6372 if (!insns) {
6373 pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name);
6374 return -ENOMEM;
6375 }
6376 main_prog->insns = insns;
6377 main_prog->insns_cnt = new_cnt;
6378
6379 memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns,
6380 subprog->insns_cnt * sizeof(*insns));
6381
6382 pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
6383 main_prog->name, subprog->insns_cnt, subprog->name);
6384
6385 /* The subprog insns are now appended. Append its relos too. */
6386 err = append_subprog_relos(main_prog, subprog);
6387 if (err)
6388 return err;
6389 return 0;
6390 }
6391
6392 static int
bpf_object__reloc_code(struct bpf_object * obj,struct bpf_program * main_prog,struct bpf_program * prog)6393 bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
6394 struct bpf_program *prog)
6395 {
6396 size_t sub_insn_idx, insn_idx;
6397 struct bpf_program *subprog;
6398 struct reloc_desc *relo;
6399 struct bpf_insn *insn;
6400 int err;
6401
6402 err = reloc_prog_func_and_line_info(obj, main_prog, prog);
6403 if (err)
6404 return err;
6405
6406 for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) {
6407 insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
6408 if (!insn_is_subprog_call(insn) && !insn_is_pseudo_func(insn))
6409 continue;
6410
6411 relo = find_prog_insn_relo(prog, insn_idx);
6412 if (relo && relo->type == RELO_EXTERN_CALL)
6413 /* kfunc relocations will be handled later
6414 * in bpf_object__relocate_data()
6415 */
6416 continue;
6417 if (relo && relo->type != RELO_CALL && relo->type != RELO_SUBPROG_ADDR) {
6418 pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n",
6419 prog->name, insn_idx, relo->type);
6420 return -LIBBPF_ERRNO__RELOC;
6421 }
6422 if (relo) {
6423 /* sub-program instruction index is a combination of
6424 * an offset of a symbol pointed to by relocation and
6425 * call instruction's imm field; for global functions,
6426 * call always has imm = -1, but for static functions
6427 * relocation is against STT_SECTION and insn->imm
6428 * points to a start of a static function
6429 *
6430 * for subprog addr relocation, the relo->sym_off + insn->imm is
6431 * the byte offset in the corresponding section.
6432 */
6433 if (relo->type == RELO_CALL)
6434 sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1;
6435 else
6436 sub_insn_idx = (relo->sym_off + insn->imm) / BPF_INSN_SZ;
6437 } else if (insn_is_pseudo_func(insn)) {
6438 /*
6439 * RELO_SUBPROG_ADDR relo is always emitted even if both
6440 * functions are in the same section, so it shouldn't reach here.
6441 */
6442 pr_warn("prog '%s': missing subprog addr relo for insn #%zu\n",
6443 prog->name, insn_idx);
6444 return -LIBBPF_ERRNO__RELOC;
6445 } else {
6446 /* if subprogram call is to a static function within
6447 * the same ELF section, there won't be any relocation
6448 * emitted, but it also means there is no additional
6449 * offset necessary, insns->imm is relative to
6450 * instruction's original position within the section
6451 */
6452 sub_insn_idx = prog->sec_insn_off + insn_idx + insn->imm + 1;
6453 }
6454
6455 /* we enforce that sub-programs should be in .text section */
6456 subprog = find_prog_by_sec_insn(obj, obj->efile.text_shndx, sub_insn_idx);
6457 if (!subprog) {
6458 pr_warn("prog '%s': no .text section found yet sub-program call exists\n",
6459 prog->name);
6460 return -LIBBPF_ERRNO__RELOC;
6461 }
6462
6463 /* if it's the first call instruction calling into this
6464 * subprogram (meaning this subprog hasn't been processed
6465 * yet) within the context of current main program:
6466 * - append it at the end of main program's instructions blog;
6467 * - process is recursively, while current program is put on hold;
6468 * - if that subprogram calls some other not yet processes
6469 * subprogram, same thing will happen recursively until
6470 * there are no more unprocesses subprograms left to append
6471 * and relocate.
6472 */
6473 if (subprog->sub_insn_off == 0) {
6474 err = bpf_object__append_subprog_code(obj, main_prog, subprog);
6475 if (err)
6476 return err;
6477 err = bpf_object__reloc_code(obj, main_prog, subprog);
6478 if (err)
6479 return err;
6480 }
6481
6482 /* main_prog->insns memory could have been re-allocated, so
6483 * calculate pointer again
6484 */
6485 insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
6486 /* calculate correct instruction position within current main
6487 * prog; each main prog can have a different set of
6488 * subprograms appended (potentially in different order as
6489 * well), so position of any subprog can be different for
6490 * different main programs
6491 */
6492 insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1;
6493
6494 pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n",
6495 prog->name, insn_idx, insn->imm, subprog->name, subprog->sub_insn_off);
6496 }
6497
6498 return 0;
6499 }
6500
6501 /*
6502 * Relocate sub-program calls.
6503 *
6504 * Algorithm operates as follows. Each entry-point BPF program (referred to as
6505 * main prog) is processed separately. For each subprog (non-entry functions,
6506 * that can be called from either entry progs or other subprogs) gets their
6507 * sub_insn_off reset to zero. This serves as indicator that this subprogram
6508 * hasn't been yet appended and relocated within current main prog. Once its
6509 * relocated, sub_insn_off will point at the position within current main prog
6510 * where given subprog was appended. This will further be used to relocate all
6511 * the call instructions jumping into this subprog.
6512 *
6513 * We start with main program and process all call instructions. If the call
6514 * is into a subprog that hasn't been processed (i.e., subprog->sub_insn_off
6515 * is zero), subprog instructions are appended at the end of main program's
6516 * instruction array. Then main program is "put on hold" while we recursively
6517 * process newly appended subprogram. If that subprogram calls into another
6518 * subprogram that hasn't been appended, new subprogram is appended again to
6519 * the *main* prog's instructions (subprog's instructions are always left
6520 * untouched, as they need to be in unmodified state for subsequent main progs
6521 * and subprog instructions are always sent only as part of a main prog) and
6522 * the process continues recursively. Once all the subprogs called from a main
6523 * prog or any of its subprogs are appended (and relocated), all their
6524 * positions within finalized instructions array are known, so it's easy to
6525 * rewrite call instructions with correct relative offsets, corresponding to
6526 * desired target subprog.
6527 *
6528 * Its important to realize that some subprogs might not be called from some
6529 * main prog and any of its called/used subprogs. Those will keep their
6530 * subprog->sub_insn_off as zero at all times and won't be appended to current
6531 * main prog and won't be relocated within the context of current main prog.
6532 * They might still be used from other main progs later.
6533 *
6534 * Visually this process can be shown as below. Suppose we have two main
6535 * programs mainA and mainB and BPF object contains three subprogs: subA,
6536 * subB, and subC. mainA calls only subA, mainB calls only subC, but subA and
6537 * subC both call subB:
6538 *
6539 * +--------+ +-------+
6540 * | v v |
6541 * +--+---+ +--+-+-+ +---+--+
6542 * | subA | | subB | | subC |
6543 * +--+---+ +------+ +---+--+
6544 * ^ ^
6545 * | |
6546 * +---+-------+ +------+----+
6547 * | mainA | | mainB |
6548 * +-----------+ +-----------+
6549 *
6550 * We'll start relocating mainA, will find subA, append it and start
6551 * processing sub A recursively:
6552 *
6553 * +-----------+------+
6554 * | mainA | subA |
6555 * +-----------+------+
6556 *
6557 * At this point we notice that subB is used from subA, so we append it and
6558 * relocate (there are no further subcalls from subB):
6559 *
6560 * +-----------+------+------+
6561 * | mainA | subA | subB |
6562 * +-----------+------+------+
6563 *
6564 * At this point, we relocate subA calls, then go one level up and finish with
6565 * relocatin mainA calls. mainA is done.
6566 *
6567 * For mainB process is similar but results in different order. We start with
6568 * mainB and skip subA and subB, as mainB never calls them (at least
6569 * directly), but we see subC is needed, so we append and start processing it:
6570 *
6571 * +-----------+------+
6572 * | mainB | subC |
6573 * +-----------+------+
6574 * Now we see subC needs subB, so we go back to it, append and relocate it:
6575 *
6576 * +-----------+------+------+
6577 * | mainB | subC | subB |
6578 * +-----------+------+------+
6579 *
6580 * At this point we unwind recursion, relocate calls in subC, then in mainB.
6581 */
6582 static int
bpf_object__relocate_calls(struct bpf_object * obj,struct bpf_program * prog)6583 bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog)
6584 {
6585 struct bpf_program *subprog;
6586 int i, err;
6587
6588 /* mark all subprogs as not relocated (yet) within the context of
6589 * current main program
6590 */
6591 for (i = 0; i < obj->nr_programs; i++) {
6592 subprog = &obj->programs[i];
6593 if (!prog_is_subprog(obj, subprog))
6594 continue;
6595
6596 subprog->sub_insn_off = 0;
6597 }
6598
6599 err = bpf_object__reloc_code(obj, prog, prog);
6600 if (err)
6601 return err;
6602
6603 return 0;
6604 }
6605
6606 static void
bpf_object__free_relocs(struct bpf_object * obj)6607 bpf_object__free_relocs(struct bpf_object *obj)
6608 {
6609 struct bpf_program *prog;
6610 int i;
6611
6612 /* free up relocation descriptors */
6613 for (i = 0; i < obj->nr_programs; i++) {
6614 prog = &obj->programs[i];
6615 zfree(&prog->reloc_desc);
6616 prog->nr_reloc = 0;
6617 }
6618 }
6619
cmp_relocs(const void * _a,const void * _b)6620 static int cmp_relocs(const void *_a, const void *_b)
6621 {
6622 const struct reloc_desc *a = _a;
6623 const struct reloc_desc *b = _b;
6624
6625 if (a->insn_idx != b->insn_idx)
6626 return a->insn_idx < b->insn_idx ? -1 : 1;
6627
6628 /* no two relocations should have the same insn_idx, but ... */
6629 if (a->type != b->type)
6630 return a->type < b->type ? -1 : 1;
6631
6632 return 0;
6633 }
6634
bpf_object__sort_relos(struct bpf_object * obj)6635 static void bpf_object__sort_relos(struct bpf_object *obj)
6636 {
6637 int i;
6638
6639 for (i = 0; i < obj->nr_programs; i++) {
6640 struct bpf_program *p = &obj->programs[i];
6641
6642 if (!p->nr_reloc)
6643 continue;
6644
6645 qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs);
6646 }
6647 }
6648
bpf_prog_assign_exc_cb(struct bpf_object * obj,struct bpf_program * prog)6649 static int bpf_prog_assign_exc_cb(struct bpf_object *obj, struct bpf_program *prog)
6650 {
6651 const char *str = "exception_callback:";
6652 size_t pfx_len = strlen(str);
6653 int i, j, n;
6654
6655 if (!obj->btf || !kernel_supports(obj, FEAT_BTF_DECL_TAG))
6656 return 0;
6657
6658 n = btf__type_cnt(obj->btf);
6659 for (i = 1; i < n; i++) {
6660 const char *name;
6661 struct btf_type *t;
6662
6663 t = btf_type_by_id(obj->btf, i);
6664 if (!btf_is_decl_tag(t) || btf_decl_tag(t)->component_idx != -1)
6665 continue;
6666
6667 name = btf__str_by_offset(obj->btf, t->name_off);
6668 if (strncmp(name, str, pfx_len) != 0)
6669 continue;
6670
6671 t = btf_type_by_id(obj->btf, t->type);
6672 if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL) {
6673 pr_warn("prog '%s': exception_callback:<value> decl tag not applied to the main program\n",
6674 prog->name);
6675 return -EINVAL;
6676 }
6677 if (strcmp(prog->name, btf__str_by_offset(obj->btf, t->name_off)) != 0)
6678 continue;
6679 /* Multiple callbacks are specified for the same prog,
6680 * the verifier will eventually return an error for this
6681 * case, hence simply skip appending a subprog.
6682 */
6683 if (prog->exception_cb_idx >= 0) {
6684 prog->exception_cb_idx = -1;
6685 break;
6686 }
6687
6688 name += pfx_len;
6689 if (str_is_empty(name)) {
6690 pr_warn("prog '%s': exception_callback:<value> decl tag contains empty value\n",
6691 prog->name);
6692 return -EINVAL;
6693 }
6694
6695 for (j = 0; j < obj->nr_programs; j++) {
6696 struct bpf_program *subprog = &obj->programs[j];
6697
6698 if (!prog_is_subprog(obj, subprog))
6699 continue;
6700 if (strcmp(name, subprog->name) != 0)
6701 continue;
6702 /* Enforce non-hidden, as from verifier point of
6703 * view it expects global functions, whereas the
6704 * mark_btf_static fixes up linkage as static.
6705 */
6706 if (!subprog->sym_global || subprog->mark_btf_static) {
6707 pr_warn("prog '%s': exception callback %s must be a global non-hidden function\n",
6708 prog->name, subprog->name);
6709 return -EINVAL;
6710 }
6711 /* Let's see if we already saw a static exception callback with the same name */
6712 if (prog->exception_cb_idx >= 0) {
6713 pr_warn("prog '%s': multiple subprogs with same name as exception callback '%s'\n",
6714 prog->name, subprog->name);
6715 return -EINVAL;
6716 }
6717 prog->exception_cb_idx = j;
6718 break;
6719 }
6720
6721 if (prog->exception_cb_idx >= 0)
6722 continue;
6723
6724 pr_warn("prog '%s': cannot find exception callback '%s'\n", prog->name, name);
6725 return -ENOENT;
6726 }
6727
6728 return 0;
6729 }
6730
6731 static struct {
6732 enum bpf_prog_type prog_type;
6733 const char *ctx_name;
6734 } global_ctx_map[] = {
6735 { BPF_PROG_TYPE_CGROUP_DEVICE, "bpf_cgroup_dev_ctx" },
6736 { BPF_PROG_TYPE_CGROUP_SKB, "__sk_buff" },
6737 { BPF_PROG_TYPE_CGROUP_SOCK, "bpf_sock" },
6738 { BPF_PROG_TYPE_CGROUP_SOCK_ADDR, "bpf_sock_addr" },
6739 { BPF_PROG_TYPE_CGROUP_SOCKOPT, "bpf_sockopt" },
6740 { BPF_PROG_TYPE_CGROUP_SYSCTL, "bpf_sysctl" },
6741 { BPF_PROG_TYPE_FLOW_DISSECTOR, "__sk_buff" },
6742 { BPF_PROG_TYPE_KPROBE, "bpf_user_pt_regs_t" },
6743 { BPF_PROG_TYPE_LWT_IN, "__sk_buff" },
6744 { BPF_PROG_TYPE_LWT_OUT, "__sk_buff" },
6745 { BPF_PROG_TYPE_LWT_SEG6LOCAL, "__sk_buff" },
6746 { BPF_PROG_TYPE_LWT_XMIT, "__sk_buff" },
6747 { BPF_PROG_TYPE_NETFILTER, "bpf_nf_ctx" },
6748 { BPF_PROG_TYPE_PERF_EVENT, "bpf_perf_event_data" },
6749 { BPF_PROG_TYPE_RAW_TRACEPOINT, "bpf_raw_tracepoint_args" },
6750 { BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, "bpf_raw_tracepoint_args" },
6751 { BPF_PROG_TYPE_SCHED_ACT, "__sk_buff" },
6752 { BPF_PROG_TYPE_SCHED_CLS, "__sk_buff" },
6753 { BPF_PROG_TYPE_SK_LOOKUP, "bpf_sk_lookup" },
6754 { BPF_PROG_TYPE_SK_MSG, "sk_msg_md" },
6755 { BPF_PROG_TYPE_SK_REUSEPORT, "sk_reuseport_md" },
6756 { BPF_PROG_TYPE_SK_SKB, "__sk_buff" },
6757 { BPF_PROG_TYPE_SOCK_OPS, "bpf_sock_ops" },
6758 { BPF_PROG_TYPE_SOCKET_FILTER, "__sk_buff" },
6759 { BPF_PROG_TYPE_XDP, "xdp_md" },
6760 /* all other program types don't have "named" context structs */
6761 };
6762
6763 /* forward declarations for arch-specific underlying types of bpf_user_pt_regs_t typedef,
6764 * for below __builtin_types_compatible_p() checks;
6765 * with this approach we don't need any extra arch-specific #ifdef guards
6766 */
6767 struct pt_regs;
6768 struct user_pt_regs;
6769 struct user_regs_struct;
6770
need_func_arg_type_fixup(const struct btf * btf,const struct bpf_program * prog,const char * subprog_name,int arg_idx,int arg_type_id,const char * ctx_name)6771 static bool need_func_arg_type_fixup(const struct btf *btf, const struct bpf_program *prog,
6772 const char *subprog_name, int arg_idx,
6773 int arg_type_id, const char *ctx_name)
6774 {
6775 const struct btf_type *t;
6776 const char *tname;
6777
6778 /* check if existing parameter already matches verifier expectations */
6779 t = skip_mods_and_typedefs(btf, arg_type_id, NULL);
6780 if (!btf_is_ptr(t))
6781 goto out_warn;
6782
6783 /* typedef bpf_user_pt_regs_t is a special PITA case, valid for kprobe
6784 * and perf_event programs, so check this case early on and forget
6785 * about it for subsequent checks
6786 */
6787 while (btf_is_mod(t))
6788 t = btf__type_by_id(btf, t->type);
6789 if (btf_is_typedef(t) &&
6790 (prog->type == BPF_PROG_TYPE_KPROBE || prog->type == BPF_PROG_TYPE_PERF_EVENT)) {
6791 tname = btf__str_by_offset(btf, t->name_off) ?: "<anon>";
6792 if (strcmp(tname, "bpf_user_pt_regs_t") == 0)
6793 return false; /* canonical type for kprobe/perf_event */
6794 }
6795
6796 /* now we can ignore typedefs moving forward */
6797 t = skip_mods_and_typedefs(btf, t->type, NULL);
6798
6799 /* if it's `void *`, definitely fix up BTF info */
6800 if (btf_is_void(t))
6801 return true;
6802
6803 /* if it's already proper canonical type, no need to fix up */
6804 tname = btf__str_by_offset(btf, t->name_off) ?: "<anon>";
6805 if (btf_is_struct(t) && strcmp(tname, ctx_name) == 0)
6806 return false;
6807
6808 /* special cases */
6809 switch (prog->type) {
6810 case BPF_PROG_TYPE_KPROBE:
6811 /* `struct pt_regs *` is expected, but we need to fix up */
6812 if (btf_is_struct(t) && strcmp(tname, "pt_regs") == 0)
6813 return true;
6814 break;
6815 case BPF_PROG_TYPE_PERF_EVENT:
6816 if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct pt_regs) &&
6817 btf_is_struct(t) && strcmp(tname, "pt_regs") == 0)
6818 return true;
6819 if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_pt_regs) &&
6820 btf_is_struct(t) && strcmp(tname, "user_pt_regs") == 0)
6821 return true;
6822 if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_regs_struct) &&
6823 btf_is_struct(t) && strcmp(tname, "user_regs_struct") == 0)
6824 return true;
6825 break;
6826 case BPF_PROG_TYPE_RAW_TRACEPOINT:
6827 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
6828 /* allow u64* as ctx */
6829 if (btf_is_int(t) && t->size == 8)
6830 return true;
6831 break;
6832 default:
6833 break;
6834 }
6835
6836 out_warn:
6837 pr_warn("prog '%s': subprog '%s' arg#%d is expected to be of `struct %s *` type\n",
6838 prog->name, subprog_name, arg_idx, ctx_name);
6839 return false;
6840 }
6841
clone_func_btf_info(struct btf * btf,int orig_fn_id,struct bpf_program * prog)6842 static int clone_func_btf_info(struct btf *btf, int orig_fn_id, struct bpf_program *prog)
6843 {
6844 int fn_id, fn_proto_id, ret_type_id, orig_proto_id;
6845 int i, err, arg_cnt, fn_name_off, linkage;
6846 struct btf_type *fn_t, *fn_proto_t, *t;
6847 struct btf_param *p;
6848
6849 /* caller already validated FUNC -> FUNC_PROTO validity */
6850 fn_t = btf_type_by_id(btf, orig_fn_id);
6851 fn_proto_t = btf_type_by_id(btf, fn_t->type);
6852
6853 /* Note that each btf__add_xxx() operation invalidates
6854 * all btf_type and string pointers, so we need to be
6855 * very careful when cloning BTF types. BTF type
6856 * pointers have to be always refetched. And to avoid
6857 * problems with invalidated string pointers, we
6858 * add empty strings initially, then just fix up
6859 * name_off offsets in place. Offsets are stable for
6860 * existing strings, so that works out.
6861 */
6862 fn_name_off = fn_t->name_off; /* we are about to invalidate fn_t */
6863 linkage = btf_func_linkage(fn_t);
6864 orig_proto_id = fn_t->type; /* original FUNC_PROTO ID */
6865 ret_type_id = fn_proto_t->type; /* fn_proto_t will be invalidated */
6866 arg_cnt = btf_vlen(fn_proto_t);
6867
6868 /* clone FUNC_PROTO and its params */
6869 fn_proto_id = btf__add_func_proto(btf, ret_type_id);
6870 if (fn_proto_id < 0)
6871 return -EINVAL;
6872
6873 for (i = 0; i < arg_cnt; i++) {
6874 int name_off;
6875
6876 /* copy original parameter data */
6877 t = btf_type_by_id(btf, orig_proto_id);
6878 p = &btf_params(t)[i];
6879 name_off = p->name_off;
6880
6881 err = btf__add_func_param(btf, "", p->type);
6882 if (err)
6883 return err;
6884
6885 fn_proto_t = btf_type_by_id(btf, fn_proto_id);
6886 p = &btf_params(fn_proto_t)[i];
6887 p->name_off = name_off; /* use remembered str offset */
6888 }
6889
6890 /* clone FUNC now, btf__add_func() enforces non-empty name, so use
6891 * entry program's name as a placeholder, which we replace immediately
6892 * with original name_off
6893 */
6894 fn_id = btf__add_func(btf, prog->name, linkage, fn_proto_id);
6895 if (fn_id < 0)
6896 return -EINVAL;
6897
6898 fn_t = btf_type_by_id(btf, fn_id);
6899 fn_t->name_off = fn_name_off; /* reuse original string */
6900
6901 return fn_id;
6902 }
6903
6904 /* Check if main program or global subprog's function prototype has `arg:ctx`
6905 * argument tags, and, if necessary, substitute correct type to match what BPF
6906 * verifier would expect, taking into account specific program type. This
6907 * allows to support __arg_ctx tag transparently on old kernels that don't yet
6908 * have a native support for it in the verifier, making user's life much
6909 * easier.
6910 */
bpf_program_fixup_func_info(struct bpf_object * obj,struct bpf_program * prog)6911 static int bpf_program_fixup_func_info(struct bpf_object *obj, struct bpf_program *prog)
6912 {
6913 const char *ctx_name = NULL, *ctx_tag = "arg:ctx", *fn_name;
6914 struct bpf_func_info_min *func_rec;
6915 struct btf_type *fn_t, *fn_proto_t;
6916 struct btf *btf = obj->btf;
6917 const struct btf_type *t;
6918 struct btf_param *p;
6919 int ptr_id = 0, struct_id, tag_id, orig_fn_id;
6920 int i, n, arg_idx, arg_cnt, err, rec_idx;
6921 int *orig_ids;
6922
6923 /* no .BTF.ext, no problem */
6924 if (!obj->btf_ext || !prog->func_info)
6925 return 0;
6926
6927 /* don't do any fix ups if kernel natively supports __arg_ctx */
6928 if (kernel_supports(obj, FEAT_ARG_CTX_TAG))
6929 return 0;
6930
6931 /* some BPF program types just don't have named context structs, so
6932 * this fallback mechanism doesn't work for them
6933 */
6934 for (i = 0; i < ARRAY_SIZE(global_ctx_map); i++) {
6935 if (global_ctx_map[i].prog_type != prog->type)
6936 continue;
6937 ctx_name = global_ctx_map[i].ctx_name;
6938 break;
6939 }
6940 if (!ctx_name)
6941 return 0;
6942
6943 /* remember original func BTF IDs to detect if we already cloned them */
6944 orig_ids = calloc(prog->func_info_cnt, sizeof(*orig_ids));
6945 if (!orig_ids)
6946 return -ENOMEM;
6947 for (i = 0; i < prog->func_info_cnt; i++) {
6948 func_rec = prog->func_info + prog->func_info_rec_size * i;
6949 orig_ids[i] = func_rec->type_id;
6950 }
6951
6952 /* go through each DECL_TAG with "arg:ctx" and see if it points to one
6953 * of our subprogs; if yes and subprog is global and needs adjustment,
6954 * clone and adjust FUNC -> FUNC_PROTO combo
6955 */
6956 for (i = 1, n = btf__type_cnt(btf); i < n; i++) {
6957 /* only DECL_TAG with "arg:ctx" value are interesting */
6958 t = btf__type_by_id(btf, i);
6959 if (!btf_is_decl_tag(t))
6960 continue;
6961 if (strcmp(btf__str_by_offset(btf, t->name_off), ctx_tag) != 0)
6962 continue;
6963
6964 /* only global funcs need adjustment, if at all */
6965 orig_fn_id = t->type;
6966 fn_t = btf_type_by_id(btf, orig_fn_id);
6967 if (!btf_is_func(fn_t) || btf_func_linkage(fn_t) != BTF_FUNC_GLOBAL)
6968 continue;
6969
6970 /* sanity check FUNC -> FUNC_PROTO chain, just in case */
6971 fn_proto_t = btf_type_by_id(btf, fn_t->type);
6972 if (!fn_proto_t || !btf_is_func_proto(fn_proto_t))
6973 continue;
6974
6975 /* find corresponding func_info record */
6976 func_rec = NULL;
6977 for (rec_idx = 0; rec_idx < prog->func_info_cnt; rec_idx++) {
6978 if (orig_ids[rec_idx] == t->type) {
6979 func_rec = prog->func_info + prog->func_info_rec_size * rec_idx;
6980 break;
6981 }
6982 }
6983 /* current main program doesn't call into this subprog */
6984 if (!func_rec)
6985 continue;
6986
6987 /* some more sanity checking of DECL_TAG */
6988 arg_cnt = btf_vlen(fn_proto_t);
6989 arg_idx = btf_decl_tag(t)->component_idx;
6990 if (arg_idx < 0 || arg_idx >= arg_cnt)
6991 continue;
6992
6993 /* check if we should fix up argument type */
6994 p = &btf_params(fn_proto_t)[arg_idx];
6995 fn_name = btf__str_by_offset(btf, fn_t->name_off) ?: "<anon>";
6996 if (!need_func_arg_type_fixup(btf, prog, fn_name, arg_idx, p->type, ctx_name))
6997 continue;
6998
6999 /* clone fn/fn_proto, unless we already did it for another arg */
7000 if (func_rec->type_id == orig_fn_id) {
7001 int fn_id;
7002
7003 fn_id = clone_func_btf_info(btf, orig_fn_id, prog);
7004 if (fn_id < 0) {
7005 err = fn_id;
7006 goto err_out;
7007 }
7008
7009 /* point func_info record to a cloned FUNC type */
7010 func_rec->type_id = fn_id;
7011 }
7012
7013 /* create PTR -> STRUCT type chain to mark PTR_TO_CTX argument;
7014 * we do it just once per main BPF program, as all global
7015 * funcs share the same program type, so need only PTR ->
7016 * STRUCT type chain
7017 */
7018 if (ptr_id == 0) {
7019 struct_id = btf__add_struct(btf, ctx_name, 0);
7020 ptr_id = btf__add_ptr(btf, struct_id);
7021 if (ptr_id < 0 || struct_id < 0) {
7022 err = -EINVAL;
7023 goto err_out;
7024 }
7025 }
7026
7027 /* for completeness, clone DECL_TAG and point it to cloned param */
7028 tag_id = btf__add_decl_tag(btf, ctx_tag, func_rec->type_id, arg_idx);
7029 if (tag_id < 0) {
7030 err = -EINVAL;
7031 goto err_out;
7032 }
7033
7034 /* all the BTF manipulations invalidated pointers, refetch them */
7035 fn_t = btf_type_by_id(btf, func_rec->type_id);
7036 fn_proto_t = btf_type_by_id(btf, fn_t->type);
7037
7038 /* fix up type ID pointed to by param */
7039 p = &btf_params(fn_proto_t)[arg_idx];
7040 p->type = ptr_id;
7041 }
7042
7043 free(orig_ids);
7044 return 0;
7045 err_out:
7046 free(orig_ids);
7047 return err;
7048 }
7049
bpf_object__relocate(struct bpf_object * obj,const char * targ_btf_path)7050 static int bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
7051 {
7052 struct bpf_program *prog;
7053 size_t i, j;
7054 int err;
7055
7056 if (obj->btf_ext) {
7057 err = bpf_object__relocate_core(obj, targ_btf_path);
7058 if (err) {
7059 pr_warn("failed to perform CO-RE relocations: %d\n",
7060 err);
7061 return err;
7062 }
7063 bpf_object__sort_relos(obj);
7064 }
7065
7066 /* Before relocating calls pre-process relocations and mark
7067 * few ld_imm64 instructions that points to subprogs.
7068 * Otherwise bpf_object__reloc_code() later would have to consider
7069 * all ld_imm64 insns as relocation candidates. That would
7070 * reduce relocation speed, since amount of find_prog_insn_relo()
7071 * would increase and most of them will fail to find a relo.
7072 */
7073 for (i = 0; i < obj->nr_programs; i++) {
7074 prog = &obj->programs[i];
7075 for (j = 0; j < prog->nr_reloc; j++) {
7076 struct reloc_desc *relo = &prog->reloc_desc[j];
7077 struct bpf_insn *insn = &prog->insns[relo->insn_idx];
7078
7079 /* mark the insn, so it's recognized by insn_is_pseudo_func() */
7080 if (relo->type == RELO_SUBPROG_ADDR)
7081 insn[0].src_reg = BPF_PSEUDO_FUNC;
7082 }
7083 }
7084
7085 /* relocate subprogram calls and append used subprograms to main
7086 * programs; each copy of subprogram code needs to be relocated
7087 * differently for each main program, because its code location might
7088 * have changed.
7089 * Append subprog relos to main programs to allow data relos to be
7090 * processed after text is completely relocated.
7091 */
7092 for (i = 0; i < obj->nr_programs; i++) {
7093 prog = &obj->programs[i];
7094 /* sub-program's sub-calls are relocated within the context of
7095 * its main program only
7096 */
7097 if (prog_is_subprog(obj, prog))
7098 continue;
7099 if (!prog->autoload)
7100 continue;
7101
7102 err = bpf_object__relocate_calls(obj, prog);
7103 if (err) {
7104 pr_warn("prog '%s': failed to relocate calls: %d\n",
7105 prog->name, err);
7106 return err;
7107 }
7108
7109 err = bpf_prog_assign_exc_cb(obj, prog);
7110 if (err)
7111 return err;
7112 /* Now, also append exception callback if it has not been done already. */
7113 if (prog->exception_cb_idx >= 0) {
7114 struct bpf_program *subprog = &obj->programs[prog->exception_cb_idx];
7115
7116 /* Calling exception callback directly is disallowed, which the
7117 * verifier will reject later. In case it was processed already,
7118 * we can skip this step, otherwise for all other valid cases we
7119 * have to append exception callback now.
7120 */
7121 if (subprog->sub_insn_off == 0) {
7122 err = bpf_object__append_subprog_code(obj, prog, subprog);
7123 if (err)
7124 return err;
7125 err = bpf_object__reloc_code(obj, prog, subprog);
7126 if (err)
7127 return err;
7128 }
7129 }
7130 }
7131 for (i = 0; i < obj->nr_programs; i++) {
7132 prog = &obj->programs[i];
7133 if (prog_is_subprog(obj, prog))
7134 continue;
7135 if (!prog->autoload)
7136 continue;
7137
7138 /* Process data relos for main programs */
7139 err = bpf_object__relocate_data(obj, prog);
7140 if (err) {
7141 pr_warn("prog '%s': failed to relocate data references: %d\n",
7142 prog->name, err);
7143 return err;
7144 }
7145
7146 /* Fix up .BTF.ext information, if necessary */
7147 err = bpf_program_fixup_func_info(obj, prog);
7148 if (err) {
7149 pr_warn("prog '%s': failed to perform .BTF.ext fix ups: %d\n",
7150 prog->name, err);
7151 return err;
7152 }
7153 }
7154
7155 return 0;
7156 }
7157
7158 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
7159 Elf64_Shdr *shdr, Elf_Data *data);
7160
bpf_object__collect_map_relos(struct bpf_object * obj,Elf64_Shdr * shdr,Elf_Data * data)7161 static int bpf_object__collect_map_relos(struct bpf_object *obj,
7162 Elf64_Shdr *shdr, Elf_Data *data)
7163 {
7164 const int bpf_ptr_sz = 8, host_ptr_sz = sizeof(void *);
7165 int i, j, nrels, new_sz;
7166 const struct btf_var_secinfo *vi = NULL;
7167 const struct btf_type *sec, *var, *def;
7168 struct bpf_map *map = NULL, *targ_map = NULL;
7169 struct bpf_program *targ_prog = NULL;
7170 bool is_prog_array, is_map_in_map;
7171 const struct btf_member *member;
7172 const char *name, *mname, *type;
7173 unsigned int moff;
7174 Elf64_Sym *sym;
7175 Elf64_Rel *rel;
7176 void *tmp;
7177
7178 if (!obj->efile.btf_maps_sec_btf_id || !obj->btf)
7179 return -EINVAL;
7180 sec = btf__type_by_id(obj->btf, obj->efile.btf_maps_sec_btf_id);
7181 if (!sec)
7182 return -EINVAL;
7183
7184 nrels = shdr->sh_size / shdr->sh_entsize;
7185 for (i = 0; i < nrels; i++) {
7186 rel = elf_rel_by_idx(data, i);
7187 if (!rel) {
7188 pr_warn(".maps relo #%d: failed to get ELF relo\n", i);
7189 return -LIBBPF_ERRNO__FORMAT;
7190 }
7191
7192 sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info));
7193 if (!sym) {
7194 pr_warn(".maps relo #%d: symbol %zx not found\n",
7195 i, (size_t)ELF64_R_SYM(rel->r_info));
7196 return -LIBBPF_ERRNO__FORMAT;
7197 }
7198 name = elf_sym_str(obj, sym->st_name) ?: "<?>";
7199
7200 pr_debug(".maps relo #%d: for %zd value %zd rel->r_offset %zu name %d ('%s')\n",
7201 i, (ssize_t)(rel->r_info >> 32), (size_t)sym->st_value,
7202 (size_t)rel->r_offset, sym->st_name, name);
7203
7204 for (j = 0; j < obj->nr_maps; j++) {
7205 map = &obj->maps[j];
7206 if (map->sec_idx != obj->efile.btf_maps_shndx)
7207 continue;
7208
7209 vi = btf_var_secinfos(sec) + map->btf_var_idx;
7210 if (vi->offset <= rel->r_offset &&
7211 rel->r_offset + bpf_ptr_sz <= vi->offset + vi->size)
7212 break;
7213 }
7214 if (j == obj->nr_maps) {
7215 pr_warn(".maps relo #%d: cannot find map '%s' at rel->r_offset %zu\n",
7216 i, name, (size_t)rel->r_offset);
7217 return -EINVAL;
7218 }
7219
7220 is_map_in_map = bpf_map_type__is_map_in_map(map->def.type);
7221 is_prog_array = map->def.type == BPF_MAP_TYPE_PROG_ARRAY;
7222 type = is_map_in_map ? "map" : "prog";
7223 if (is_map_in_map) {
7224 if (sym->st_shndx != obj->efile.btf_maps_shndx) {
7225 pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n",
7226 i, name);
7227 return -LIBBPF_ERRNO__RELOC;
7228 }
7229 if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS &&
7230 map->def.key_size != sizeof(int)) {
7231 pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n",
7232 i, map->name, sizeof(int));
7233 return -EINVAL;
7234 }
7235 targ_map = bpf_object__find_map_by_name(obj, name);
7236 if (!targ_map) {
7237 pr_warn(".maps relo #%d: '%s' isn't a valid map reference\n",
7238 i, name);
7239 return -ESRCH;
7240 }
7241 } else if (is_prog_array) {
7242 targ_prog = bpf_object__find_program_by_name(obj, name);
7243 if (!targ_prog) {
7244 pr_warn(".maps relo #%d: '%s' isn't a valid program reference\n",
7245 i, name);
7246 return -ESRCH;
7247 }
7248 if (targ_prog->sec_idx != sym->st_shndx ||
7249 targ_prog->sec_insn_off * 8 != sym->st_value ||
7250 prog_is_subprog(obj, targ_prog)) {
7251 pr_warn(".maps relo #%d: '%s' isn't an entry-point program\n",
7252 i, name);
7253 return -LIBBPF_ERRNO__RELOC;
7254 }
7255 } else {
7256 return -EINVAL;
7257 }
7258
7259 var = btf__type_by_id(obj->btf, vi->type);
7260 def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
7261 if (btf_vlen(def) == 0)
7262 return -EINVAL;
7263 member = btf_members(def) + btf_vlen(def) - 1;
7264 mname = btf__name_by_offset(obj->btf, member->name_off);
7265 if (strcmp(mname, "values"))
7266 return -EINVAL;
7267
7268 moff = btf_member_bit_offset(def, btf_vlen(def) - 1) / 8;
7269 if (rel->r_offset - vi->offset < moff)
7270 return -EINVAL;
7271
7272 moff = rel->r_offset - vi->offset - moff;
7273 /* here we use BPF pointer size, which is always 64 bit, as we
7274 * are parsing ELF that was built for BPF target
7275 */
7276 if (moff % bpf_ptr_sz)
7277 return -EINVAL;
7278 moff /= bpf_ptr_sz;
7279 if (moff >= map->init_slots_sz) {
7280 new_sz = moff + 1;
7281 tmp = libbpf_reallocarray(map->init_slots, new_sz, host_ptr_sz);
7282 if (!tmp)
7283 return -ENOMEM;
7284 map->init_slots = tmp;
7285 memset(map->init_slots + map->init_slots_sz, 0,
7286 (new_sz - map->init_slots_sz) * host_ptr_sz);
7287 map->init_slots_sz = new_sz;
7288 }
7289 map->init_slots[moff] = is_map_in_map ? (void *)targ_map : (void *)targ_prog;
7290
7291 pr_debug(".maps relo #%d: map '%s' slot [%d] points to %s '%s'\n",
7292 i, map->name, moff, type, name);
7293 }
7294
7295 return 0;
7296 }
7297
bpf_object__collect_relos(struct bpf_object * obj)7298 static int bpf_object__collect_relos(struct bpf_object *obj)
7299 {
7300 int i, err;
7301
7302 for (i = 0; i < obj->efile.sec_cnt; i++) {
7303 struct elf_sec_desc *sec_desc = &obj->efile.secs[i];
7304 Elf64_Shdr *shdr;
7305 Elf_Data *data;
7306 int idx;
7307
7308 if (sec_desc->sec_type != SEC_RELO)
7309 continue;
7310
7311 shdr = sec_desc->shdr;
7312 data = sec_desc->data;
7313 idx = shdr->sh_info;
7314
7315 if (shdr->sh_type != SHT_REL || idx < 0 || idx >= obj->efile.sec_cnt) {
7316 pr_warn("internal error at %d\n", __LINE__);
7317 return -LIBBPF_ERRNO__INTERNAL;
7318 }
7319
7320 if (obj->efile.secs[idx].sec_type == SEC_ST_OPS)
7321 err = bpf_object__collect_st_ops_relos(obj, shdr, data);
7322 else if (idx == obj->efile.btf_maps_shndx)
7323 err = bpf_object__collect_map_relos(obj, shdr, data);
7324 else
7325 err = bpf_object__collect_prog_relos(obj, shdr, data);
7326 if (err)
7327 return err;
7328 }
7329
7330 bpf_object__sort_relos(obj);
7331 return 0;
7332 }
7333
insn_is_helper_call(struct bpf_insn * insn,enum bpf_func_id * func_id)7334 static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id)
7335 {
7336 if (BPF_CLASS(insn->code) == BPF_JMP &&
7337 BPF_OP(insn->code) == BPF_CALL &&
7338 BPF_SRC(insn->code) == BPF_K &&
7339 insn->src_reg == 0 &&
7340 insn->dst_reg == 0) {
7341 *func_id = insn->imm;
7342 return true;
7343 }
7344 return false;
7345 }
7346
bpf_object__sanitize_prog(struct bpf_object * obj,struct bpf_program * prog)7347 static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program *prog)
7348 {
7349 struct bpf_insn *insn = prog->insns;
7350 enum bpf_func_id func_id;
7351 int i;
7352
7353 if (obj->gen_loader)
7354 return 0;
7355
7356 for (i = 0; i < prog->insns_cnt; i++, insn++) {
7357 if (!insn_is_helper_call(insn, &func_id))
7358 continue;
7359
7360 /* on kernels that don't yet support
7361 * bpf_probe_read_{kernel,user}[_str] helpers, fall back
7362 * to bpf_probe_read() which works well for old kernels
7363 */
7364 switch (func_id) {
7365 case BPF_FUNC_probe_read_kernel:
7366 case BPF_FUNC_probe_read_user:
7367 if (!kernel_supports(obj, FEAT_PROBE_READ_KERN))
7368 insn->imm = BPF_FUNC_probe_read;
7369 break;
7370 case BPF_FUNC_probe_read_kernel_str:
7371 case BPF_FUNC_probe_read_user_str:
7372 if (!kernel_supports(obj, FEAT_PROBE_READ_KERN))
7373 insn->imm = BPF_FUNC_probe_read_str;
7374 break;
7375 default:
7376 break;
7377 }
7378 }
7379 return 0;
7380 }
7381
7382 static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name,
7383 int *btf_obj_fd, int *btf_type_id);
7384
7385 /* this is called as prog->sec_def->prog_prepare_load_fn for libbpf-supported sec_defs */
libbpf_prepare_prog_load(struct bpf_program * prog,struct bpf_prog_load_opts * opts,long cookie)7386 static int libbpf_prepare_prog_load(struct bpf_program *prog,
7387 struct bpf_prog_load_opts *opts, long cookie)
7388 {
7389 enum sec_def_flags def = cookie;
7390
7391 /* old kernels might not support specifying expected_attach_type */
7392 if ((def & SEC_EXP_ATTACH_OPT) && !kernel_supports(prog->obj, FEAT_EXP_ATTACH_TYPE))
7393 opts->expected_attach_type = 0;
7394
7395 if (def & SEC_SLEEPABLE)
7396 opts->prog_flags |= BPF_F_SLEEPABLE;
7397
7398 if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS))
7399 opts->prog_flags |= BPF_F_XDP_HAS_FRAGS;
7400
7401 /* special check for usdt to use uprobe_multi link */
7402 if ((def & SEC_USDT) && kernel_supports(prog->obj, FEAT_UPROBE_MULTI_LINK)) {
7403 /* for BPF_TRACE_UPROBE_MULTI, user might want to query expected_attach_type
7404 * in prog, and expected_attach_type we set in kernel is from opts, so we
7405 * update both.
7406 */
7407 prog->expected_attach_type = BPF_TRACE_UPROBE_MULTI;
7408 opts->expected_attach_type = BPF_TRACE_UPROBE_MULTI;
7409 }
7410
7411 if ((def & SEC_ATTACH_BTF) && !prog->attach_btf_id) {
7412 int btf_obj_fd = 0, btf_type_id = 0, err;
7413 const char *attach_name;
7414
7415 attach_name = strchr(prog->sec_name, '/');
7416 if (!attach_name) {
7417 /* if BPF program is annotated with just SEC("fentry")
7418 * (or similar) without declaratively specifying
7419 * target, then it is expected that target will be
7420 * specified with bpf_program__set_attach_target() at
7421 * runtime before BPF object load step. If not, then
7422 * there is nothing to load into the kernel as BPF
7423 * verifier won't be able to validate BPF program
7424 * correctness anyways.
7425 */
7426 pr_warn("prog '%s': no BTF-based attach target is specified, use bpf_program__set_attach_target()\n",
7427 prog->name);
7428 return -EINVAL;
7429 }
7430 attach_name++; /* skip over / */
7431
7432 err = libbpf_find_attach_btf_id(prog, attach_name, &btf_obj_fd, &btf_type_id);
7433 if (err)
7434 return err;
7435
7436 /* cache resolved BTF FD and BTF type ID in the prog */
7437 prog->attach_btf_obj_fd = btf_obj_fd;
7438 prog->attach_btf_id = btf_type_id;
7439
7440 /* but by now libbpf common logic is not utilizing
7441 * prog->atach_btf_obj_fd/prog->attach_btf_id anymore because
7442 * this callback is called after opts were populated by
7443 * libbpf, so this callback has to update opts explicitly here
7444 */
7445 opts->attach_btf_obj_fd = btf_obj_fd;
7446 opts->attach_btf_id = btf_type_id;
7447 }
7448 return 0;
7449 }
7450
7451 static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz);
7452
bpf_object_load_prog(struct bpf_object * obj,struct bpf_program * prog,struct bpf_insn * insns,int insns_cnt,const char * license,__u32 kern_version,int * prog_fd)7453 static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog,
7454 struct bpf_insn *insns, int insns_cnt,
7455 const char *license, __u32 kern_version, int *prog_fd)
7456 {
7457 LIBBPF_OPTS(bpf_prog_load_opts, load_attr);
7458 const char *prog_name = NULL;
7459 char *cp, errmsg[STRERR_BUFSIZE];
7460 size_t log_buf_size = 0;
7461 char *log_buf = NULL, *tmp;
7462 bool own_log_buf = true;
7463 __u32 log_level = prog->log_level;
7464 int ret, err;
7465
7466 /* Be more helpful by rejecting programs that can't be validated early
7467 * with more meaningful and actionable error message.
7468 */
7469 switch (prog->type) {
7470 case BPF_PROG_TYPE_UNSPEC:
7471 /*
7472 * The program type must be set. Most likely we couldn't find a proper
7473 * section definition at load time, and thus we didn't infer the type.
7474 */
7475 pr_warn("prog '%s': missing BPF prog type, check ELF section name '%s'\n",
7476 prog->name, prog->sec_name);
7477 return -EINVAL;
7478 case BPF_PROG_TYPE_STRUCT_OPS:
7479 if (prog->attach_btf_id == 0) {
7480 pr_warn("prog '%s': SEC(\"struct_ops\") program isn't referenced anywhere, did you forget to use it?\n",
7481 prog->name);
7482 return -EINVAL;
7483 }
7484 break;
7485 default:
7486 break;
7487 }
7488
7489 if (!insns || !insns_cnt)
7490 return -EINVAL;
7491
7492 if (kernel_supports(obj, FEAT_PROG_NAME))
7493 prog_name = prog->name;
7494 load_attr.attach_prog_fd = prog->attach_prog_fd;
7495 load_attr.attach_btf_obj_fd = prog->attach_btf_obj_fd;
7496 load_attr.attach_btf_id = prog->attach_btf_id;
7497 load_attr.kern_version = kern_version;
7498 load_attr.prog_ifindex = prog->prog_ifindex;
7499 load_attr.expected_attach_type = prog->expected_attach_type;
7500
7501 /* specify func_info/line_info only if kernel supports them */
7502 if (obj->btf && btf__fd(obj->btf) >= 0 && kernel_supports(obj, FEAT_BTF_FUNC)) {
7503 load_attr.prog_btf_fd = btf__fd(obj->btf);
7504 load_attr.func_info = prog->func_info;
7505 load_attr.func_info_rec_size = prog->func_info_rec_size;
7506 load_attr.func_info_cnt = prog->func_info_cnt;
7507 load_attr.line_info = prog->line_info;
7508 load_attr.line_info_rec_size = prog->line_info_rec_size;
7509 load_attr.line_info_cnt = prog->line_info_cnt;
7510 }
7511 load_attr.log_level = log_level;
7512 load_attr.prog_flags = prog->prog_flags;
7513 load_attr.fd_array = obj->fd_array;
7514
7515 load_attr.token_fd = obj->token_fd;
7516 if (obj->token_fd)
7517 load_attr.prog_flags |= BPF_F_TOKEN_FD;
7518
7519 /* adjust load_attr if sec_def provides custom preload callback */
7520 if (prog->sec_def && prog->sec_def->prog_prepare_load_fn) {
7521 err = prog->sec_def->prog_prepare_load_fn(prog, &load_attr, prog->sec_def->cookie);
7522 if (err < 0) {
7523 pr_warn("prog '%s': failed to prepare load attributes: %d\n",
7524 prog->name, err);
7525 return err;
7526 }
7527 insns = prog->insns;
7528 insns_cnt = prog->insns_cnt;
7529 }
7530
7531 if (obj->gen_loader) {
7532 bpf_gen__prog_load(obj->gen_loader, prog->type, prog->name,
7533 license, insns, insns_cnt, &load_attr,
7534 prog - obj->programs);
7535 *prog_fd = -1;
7536 return 0;
7537 }
7538
7539 retry_load:
7540 /* if log_level is zero, we don't request logs initially even if
7541 * custom log_buf is specified; if the program load fails, then we'll
7542 * bump log_level to 1 and use either custom log_buf or we'll allocate
7543 * our own and retry the load to get details on what failed
7544 */
7545 if (log_level) {
7546 if (prog->log_buf) {
7547 log_buf = prog->log_buf;
7548 log_buf_size = prog->log_size;
7549 own_log_buf = false;
7550 } else if (obj->log_buf) {
7551 log_buf = obj->log_buf;
7552 log_buf_size = obj->log_size;
7553 own_log_buf = false;
7554 } else {
7555 log_buf_size = max((size_t)BPF_LOG_BUF_SIZE, log_buf_size * 2);
7556 tmp = realloc(log_buf, log_buf_size);
7557 if (!tmp) {
7558 ret = -ENOMEM;
7559 goto out;
7560 }
7561 log_buf = tmp;
7562 log_buf[0] = '\0';
7563 own_log_buf = true;
7564 }
7565 }
7566
7567 load_attr.log_buf = log_buf;
7568 load_attr.log_size = log_buf_size;
7569 load_attr.log_level = log_level;
7570
7571 ret = bpf_prog_load(prog->type, prog_name, license, insns, insns_cnt, &load_attr);
7572 if (ret >= 0) {
7573 if (log_level && own_log_buf) {
7574 pr_debug("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n",
7575 prog->name, log_buf);
7576 }
7577
7578 if (obj->has_rodata && kernel_supports(obj, FEAT_PROG_BIND_MAP)) {
7579 struct bpf_map *map;
7580 int i;
7581
7582 for (i = 0; i < obj->nr_maps; i++) {
7583 map = &prog->obj->maps[i];
7584 if (map->libbpf_type != LIBBPF_MAP_RODATA)
7585 continue;
7586
7587 if (bpf_prog_bind_map(ret, map->fd, NULL)) {
7588 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
7589 pr_warn("prog '%s': failed to bind map '%s': %s\n",
7590 prog->name, map->real_name, cp);
7591 /* Don't fail hard if can't bind rodata. */
7592 }
7593 }
7594 }
7595
7596 *prog_fd = ret;
7597 ret = 0;
7598 goto out;
7599 }
7600
7601 if (log_level == 0) {
7602 log_level = 1;
7603 goto retry_load;
7604 }
7605 /* On ENOSPC, increase log buffer size and retry, unless custom
7606 * log_buf is specified.
7607 * Be careful to not overflow u32, though. Kernel's log buf size limit
7608 * isn't part of UAPI so it can always be bumped to full 4GB. So don't
7609 * multiply by 2 unless we are sure we'll fit within 32 bits.
7610 * Currently, we'll get -EINVAL when we reach (UINT_MAX >> 2).
7611 */
7612 if (own_log_buf && errno == ENOSPC && log_buf_size <= UINT_MAX / 2)
7613 goto retry_load;
7614
7615 ret = -errno;
7616
7617 /* post-process verifier log to improve error descriptions */
7618 fixup_verifier_log(prog, log_buf, log_buf_size);
7619
7620 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
7621 pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, cp);
7622 pr_perm_msg(ret);
7623
7624 if (own_log_buf && log_buf && log_buf[0] != '\0') {
7625 pr_warn("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n",
7626 prog->name, log_buf);
7627 }
7628
7629 out:
7630 if (own_log_buf)
7631 free(log_buf);
7632 return ret;
7633 }
7634
find_prev_line(char * buf,char * cur)7635 static char *find_prev_line(char *buf, char *cur)
7636 {
7637 char *p;
7638
7639 if (cur == buf) /* end of a log buf */
7640 return NULL;
7641
7642 p = cur - 1;
7643 while (p - 1 >= buf && *(p - 1) != '\n')
7644 p--;
7645
7646 return p;
7647 }
7648
patch_log(char * buf,size_t buf_sz,size_t log_sz,char * orig,size_t orig_sz,const char * patch)7649 static void patch_log(char *buf, size_t buf_sz, size_t log_sz,
7650 char *orig, size_t orig_sz, const char *patch)
7651 {
7652 /* size of the remaining log content to the right from the to-be-replaced part */
7653 size_t rem_sz = (buf + log_sz) - (orig + orig_sz);
7654 size_t patch_sz = strlen(patch);
7655
7656 if (patch_sz != orig_sz) {
7657 /* If patch line(s) are longer than original piece of verifier log,
7658 * shift log contents by (patch_sz - orig_sz) bytes to the right
7659 * starting from after to-be-replaced part of the log.
7660 *
7661 * If patch line(s) are shorter than original piece of verifier log,
7662 * shift log contents by (orig_sz - patch_sz) bytes to the left
7663 * starting from after to-be-replaced part of the log
7664 *
7665 * We need to be careful about not overflowing available
7666 * buf_sz capacity. If that's the case, we'll truncate the end
7667 * of the original log, as necessary.
7668 */
7669 if (patch_sz > orig_sz) {
7670 if (orig + patch_sz >= buf + buf_sz) {
7671 /* patch is big enough to cover remaining space completely */
7672 patch_sz -= (orig + patch_sz) - (buf + buf_sz) + 1;
7673 rem_sz = 0;
7674 } else if (patch_sz - orig_sz > buf_sz - log_sz) {
7675 /* patch causes part of remaining log to be truncated */
7676 rem_sz -= (patch_sz - orig_sz) - (buf_sz - log_sz);
7677 }
7678 }
7679 /* shift remaining log to the right by calculated amount */
7680 memmove(orig + patch_sz, orig + orig_sz, rem_sz);
7681 }
7682
7683 memcpy(orig, patch, patch_sz);
7684 }
7685
fixup_log_failed_core_relo(struct bpf_program * prog,char * buf,size_t buf_sz,size_t log_sz,char * line1,char * line2,char * line3)7686 static void fixup_log_failed_core_relo(struct bpf_program *prog,
7687 char *buf, size_t buf_sz, size_t log_sz,
7688 char *line1, char *line2, char *line3)
7689 {
7690 /* Expected log for failed and not properly guarded CO-RE relocation:
7691 * line1 -> 123: (85) call unknown#195896080
7692 * line2 -> invalid func unknown#195896080
7693 * line3 -> <anything else or end of buffer>
7694 *
7695 * "123" is the index of the instruction that was poisoned. We extract
7696 * instruction index to find corresponding CO-RE relocation and
7697 * replace this part of the log with more relevant information about
7698 * failed CO-RE relocation.
7699 */
7700 const struct bpf_core_relo *relo;
7701 struct bpf_core_spec spec;
7702 char patch[512], spec_buf[256];
7703 int insn_idx, err, spec_len;
7704
7705 if (sscanf(line1, "%d: (%*d) call unknown#195896080\n", &insn_idx) != 1)
7706 return;
7707
7708 relo = find_relo_core(prog, insn_idx);
7709 if (!relo)
7710 return;
7711
7712 err = bpf_core_parse_spec(prog->name, prog->obj->btf, relo, &spec);
7713 if (err)
7714 return;
7715
7716 spec_len = bpf_core_format_spec(spec_buf, sizeof(spec_buf), &spec);
7717 snprintf(patch, sizeof(patch),
7718 "%d: <invalid CO-RE relocation>\n"
7719 "failed to resolve CO-RE relocation %s%s\n",
7720 insn_idx, spec_buf, spec_len >= sizeof(spec_buf) ? "..." : "");
7721
7722 patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch);
7723 }
7724
fixup_log_missing_map_load(struct bpf_program * prog,char * buf,size_t buf_sz,size_t log_sz,char * line1,char * line2,char * line3)7725 static void fixup_log_missing_map_load(struct bpf_program *prog,
7726 char *buf, size_t buf_sz, size_t log_sz,
7727 char *line1, char *line2, char *line3)
7728 {
7729 /* Expected log for failed and not properly guarded map reference:
7730 * line1 -> 123: (85) call unknown#2001000345
7731 * line2 -> invalid func unknown#2001000345
7732 * line3 -> <anything else or end of buffer>
7733 *
7734 * "123" is the index of the instruction that was poisoned.
7735 * "345" in "2001000345" is a map index in obj->maps to fetch map name.
7736 */
7737 struct bpf_object *obj = prog->obj;
7738 const struct bpf_map *map;
7739 int insn_idx, map_idx;
7740 char patch[128];
7741
7742 if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &map_idx) != 2)
7743 return;
7744
7745 map_idx -= POISON_LDIMM64_MAP_BASE;
7746 if (map_idx < 0 || map_idx >= obj->nr_maps)
7747 return;
7748 map = &obj->maps[map_idx];
7749
7750 snprintf(patch, sizeof(patch),
7751 "%d: <invalid BPF map reference>\n"
7752 "BPF map '%s' is referenced but wasn't created\n",
7753 insn_idx, map->name);
7754
7755 patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch);
7756 }
7757
fixup_log_missing_kfunc_call(struct bpf_program * prog,char * buf,size_t buf_sz,size_t log_sz,char * line1,char * line2,char * line3)7758 static void fixup_log_missing_kfunc_call(struct bpf_program *prog,
7759 char *buf, size_t buf_sz, size_t log_sz,
7760 char *line1, char *line2, char *line3)
7761 {
7762 /* Expected log for failed and not properly guarded kfunc call:
7763 * line1 -> 123: (85) call unknown#2002000345
7764 * line2 -> invalid func unknown#2002000345
7765 * line3 -> <anything else or end of buffer>
7766 *
7767 * "123" is the index of the instruction that was poisoned.
7768 * "345" in "2002000345" is an extern index in obj->externs to fetch kfunc name.
7769 */
7770 struct bpf_object *obj = prog->obj;
7771 const struct extern_desc *ext;
7772 int insn_idx, ext_idx;
7773 char patch[128];
7774
7775 if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &ext_idx) != 2)
7776 return;
7777
7778 ext_idx -= POISON_CALL_KFUNC_BASE;
7779 if (ext_idx < 0 || ext_idx >= obj->nr_extern)
7780 return;
7781 ext = &obj->externs[ext_idx];
7782
7783 snprintf(patch, sizeof(patch),
7784 "%d: <invalid kfunc call>\n"
7785 "kfunc '%s' is referenced but wasn't resolved\n",
7786 insn_idx, ext->name);
7787
7788 patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch);
7789 }
7790
fixup_verifier_log(struct bpf_program * prog,char * buf,size_t buf_sz)7791 static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz)
7792 {
7793 /* look for familiar error patterns in last N lines of the log */
7794 const size_t max_last_line_cnt = 10;
7795 char *prev_line, *cur_line, *next_line;
7796 size_t log_sz;
7797 int i;
7798
7799 if (!buf)
7800 return;
7801
7802 log_sz = strlen(buf) + 1;
7803 next_line = buf + log_sz - 1;
7804
7805 for (i = 0; i < max_last_line_cnt; i++, next_line = cur_line) {
7806 cur_line = find_prev_line(buf, next_line);
7807 if (!cur_line)
7808 return;
7809
7810 if (str_has_pfx(cur_line, "invalid func unknown#195896080\n")) {
7811 prev_line = find_prev_line(buf, cur_line);
7812 if (!prev_line)
7813 continue;
7814
7815 /* failed CO-RE relocation case */
7816 fixup_log_failed_core_relo(prog, buf, buf_sz, log_sz,
7817 prev_line, cur_line, next_line);
7818 return;
7819 } else if (str_has_pfx(cur_line, "invalid func unknown#"POISON_LDIMM64_MAP_PFX)) {
7820 prev_line = find_prev_line(buf, cur_line);
7821 if (!prev_line)
7822 continue;
7823
7824 /* reference to uncreated BPF map */
7825 fixup_log_missing_map_load(prog, buf, buf_sz, log_sz,
7826 prev_line, cur_line, next_line);
7827 return;
7828 } else if (str_has_pfx(cur_line, "invalid func unknown#"POISON_CALL_KFUNC_PFX)) {
7829 prev_line = find_prev_line(buf, cur_line);
7830 if (!prev_line)
7831 continue;
7832
7833 /* reference to unresolved kfunc */
7834 fixup_log_missing_kfunc_call(prog, buf, buf_sz, log_sz,
7835 prev_line, cur_line, next_line);
7836 return;
7837 }
7838 }
7839 }
7840
bpf_program_record_relos(struct bpf_program * prog)7841 static int bpf_program_record_relos(struct bpf_program *prog)
7842 {
7843 struct bpf_object *obj = prog->obj;
7844 int i;
7845
7846 for (i = 0; i < prog->nr_reloc; i++) {
7847 struct reloc_desc *relo = &prog->reloc_desc[i];
7848 struct extern_desc *ext = &obj->externs[relo->ext_idx];
7849 int kind;
7850
7851 switch (relo->type) {
7852 case RELO_EXTERN_LD64:
7853 if (ext->type != EXT_KSYM)
7854 continue;
7855 kind = btf_is_var(btf__type_by_id(obj->btf, ext->btf_id)) ?
7856 BTF_KIND_VAR : BTF_KIND_FUNC;
7857 bpf_gen__record_extern(obj->gen_loader, ext->name,
7858 ext->is_weak, !ext->ksym.type_id,
7859 true, kind, relo->insn_idx);
7860 break;
7861 case RELO_EXTERN_CALL:
7862 bpf_gen__record_extern(obj->gen_loader, ext->name,
7863 ext->is_weak, false, false, BTF_KIND_FUNC,
7864 relo->insn_idx);
7865 break;
7866 case RELO_CORE: {
7867 struct bpf_core_relo cr = {
7868 .insn_off = relo->insn_idx * 8,
7869 .type_id = relo->core_relo->type_id,
7870 .access_str_off = relo->core_relo->access_str_off,
7871 .kind = relo->core_relo->kind,
7872 };
7873
7874 bpf_gen__record_relo_core(obj->gen_loader, &cr);
7875 break;
7876 }
7877 default:
7878 continue;
7879 }
7880 }
7881 return 0;
7882 }
7883
7884 static int
bpf_object__load_progs(struct bpf_object * obj,int log_level)7885 bpf_object__load_progs(struct bpf_object *obj, int log_level)
7886 {
7887 struct bpf_program *prog;
7888 size_t i;
7889 int err;
7890
7891 for (i = 0; i < obj->nr_programs; i++) {
7892 prog = &obj->programs[i];
7893 err = bpf_object__sanitize_prog(obj, prog);
7894 if (err)
7895 return err;
7896 }
7897
7898 for (i = 0; i < obj->nr_programs; i++) {
7899 prog = &obj->programs[i];
7900 if (prog_is_subprog(obj, prog))
7901 continue;
7902 if (!prog->autoload) {
7903 pr_debug("prog '%s': skipped loading\n", prog->name);
7904 continue;
7905 }
7906 prog->log_level |= log_level;
7907
7908 if (obj->gen_loader)
7909 bpf_program_record_relos(prog);
7910
7911 err = bpf_object_load_prog(obj, prog, prog->insns, prog->insns_cnt,
7912 obj->license, obj->kern_version, &prog->fd);
7913 if (err) {
7914 pr_warn("prog '%s': failed to load: %d\n", prog->name, err);
7915 return err;
7916 }
7917 }
7918
7919 bpf_object__free_relocs(obj);
7920 return 0;
7921 }
7922
7923 static const struct bpf_sec_def *find_sec_def(const char *sec_name);
7924
bpf_object_init_progs(struct bpf_object * obj,const struct bpf_object_open_opts * opts)7925 static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object_open_opts *opts)
7926 {
7927 struct bpf_program *prog;
7928 int err;
7929
7930 bpf_object__for_each_program(prog, obj) {
7931 prog->sec_def = find_sec_def(prog->sec_name);
7932 if (!prog->sec_def) {
7933 /* couldn't guess, but user might manually specify */
7934 pr_debug("prog '%s': unrecognized ELF section name '%s'\n",
7935 prog->name, prog->sec_name);
7936 continue;
7937 }
7938
7939 prog->type = prog->sec_def->prog_type;
7940 prog->expected_attach_type = prog->sec_def->expected_attach_type;
7941
7942 /* sec_def can have custom callback which should be called
7943 * after bpf_program is initialized to adjust its properties
7944 */
7945 if (prog->sec_def->prog_setup_fn) {
7946 err = prog->sec_def->prog_setup_fn(prog, prog->sec_def->cookie);
7947 if (err < 0) {
7948 pr_warn("prog '%s': failed to initialize: %d\n",
7949 prog->name, err);
7950 return err;
7951 }
7952 }
7953 }
7954
7955 return 0;
7956 }
7957
bpf_object_open(const char * path,const void * obj_buf,size_t obj_buf_sz,const char * obj_name,const struct bpf_object_open_opts * opts)7958 static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz,
7959 const char *obj_name,
7960 const struct bpf_object_open_opts *opts)
7961 {
7962 const char *kconfig, *btf_tmp_path, *token_path;
7963 struct bpf_object *obj;
7964 int err;
7965 char *log_buf;
7966 size_t log_size;
7967 __u32 log_level;
7968
7969 if (obj_buf && !obj_name)
7970 return ERR_PTR(-EINVAL);
7971
7972 if (elf_version(EV_CURRENT) == EV_NONE) {
7973 pr_warn("failed to init libelf for %s\n",
7974 path ? : "(mem buf)");
7975 return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
7976 }
7977
7978 if (!OPTS_VALID(opts, bpf_object_open_opts))
7979 return ERR_PTR(-EINVAL);
7980
7981 obj_name = OPTS_GET(opts, object_name, NULL) ?: obj_name;
7982 if (obj_buf) {
7983 path = obj_name;
7984 pr_debug("loading object '%s' from buffer\n", obj_name);
7985 } else {
7986 pr_debug("loading object from %s\n", path);
7987 }
7988
7989 log_buf = OPTS_GET(opts, kernel_log_buf, NULL);
7990 log_size = OPTS_GET(opts, kernel_log_size, 0);
7991 log_level = OPTS_GET(opts, kernel_log_level, 0);
7992 if (log_size > UINT_MAX)
7993 return ERR_PTR(-EINVAL);
7994 if (log_size && !log_buf)
7995 return ERR_PTR(-EINVAL);
7996
7997 token_path = OPTS_GET(opts, bpf_token_path, NULL);
7998 /* if user didn't specify bpf_token_path explicitly, check if
7999 * LIBBPF_BPF_TOKEN_PATH envvar was set and treat it as bpf_token_path
8000 * option
8001 */
8002 if (!token_path)
8003 token_path = getenv("LIBBPF_BPF_TOKEN_PATH");
8004 if (token_path && strlen(token_path) >= PATH_MAX)
8005 return ERR_PTR(-ENAMETOOLONG);
8006
8007 obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name);
8008 if (IS_ERR(obj))
8009 return obj;
8010
8011 obj->log_buf = log_buf;
8012 obj->log_size = log_size;
8013 obj->log_level = log_level;
8014
8015 if (token_path) {
8016 obj->token_path = strdup(token_path);
8017 if (!obj->token_path) {
8018 err = -ENOMEM;
8019 goto out;
8020 }
8021 }
8022
8023 btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL);
8024 if (btf_tmp_path) {
8025 if (strlen(btf_tmp_path) >= PATH_MAX) {
8026 err = -ENAMETOOLONG;
8027 goto out;
8028 }
8029 obj->btf_custom_path = strdup(btf_tmp_path);
8030 if (!obj->btf_custom_path) {
8031 err = -ENOMEM;
8032 goto out;
8033 }
8034 }
8035
8036 kconfig = OPTS_GET(opts, kconfig, NULL);
8037 if (kconfig) {
8038 obj->kconfig = strdup(kconfig);
8039 if (!obj->kconfig) {
8040 err = -ENOMEM;
8041 goto out;
8042 }
8043 }
8044
8045 err = bpf_object__elf_init(obj);
8046 err = err ? : bpf_object__check_endianness(obj);
8047 err = err ? : bpf_object__elf_collect(obj);
8048 err = err ? : bpf_object__collect_externs(obj);
8049 err = err ? : bpf_object_fixup_btf(obj);
8050 err = err ? : bpf_object__init_maps(obj, opts);
8051 err = err ? : bpf_object_init_progs(obj, opts);
8052 err = err ? : bpf_object__collect_relos(obj);
8053 if (err)
8054 goto out;
8055
8056 bpf_object__elf_finish(obj);
8057
8058 return obj;
8059 out:
8060 bpf_object__close(obj);
8061 return ERR_PTR(err);
8062 }
8063
8064 struct bpf_object *
bpf_object__open_file(const char * path,const struct bpf_object_open_opts * opts)8065 bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts)
8066 {
8067 if (!path)
8068 return libbpf_err_ptr(-EINVAL);
8069
8070 return libbpf_ptr(bpf_object_open(path, NULL, 0, NULL, opts));
8071 }
8072
bpf_object__open(const char * path)8073 struct bpf_object *bpf_object__open(const char *path)
8074 {
8075 return bpf_object__open_file(path, NULL);
8076 }
8077
8078 struct bpf_object *
bpf_object__open_mem(const void * obj_buf,size_t obj_buf_sz,const struct bpf_object_open_opts * opts)8079 bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
8080 const struct bpf_object_open_opts *opts)
8081 {
8082 char tmp_name[64];
8083
8084 if (!obj_buf || obj_buf_sz == 0)
8085 return libbpf_err_ptr(-EINVAL);
8086
8087 /* create a (quite useless) default "name" for this memory buffer object */
8088 snprintf(tmp_name, sizeof(tmp_name), "%lx-%zx", (unsigned long)obj_buf, obj_buf_sz);
8089
8090 return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, tmp_name, opts));
8091 }
8092
bpf_object_unload(struct bpf_object * obj)8093 static int bpf_object_unload(struct bpf_object *obj)
8094 {
8095 size_t i;
8096
8097 if (!obj)
8098 return libbpf_err(-EINVAL);
8099
8100 for (i = 0; i < obj->nr_maps; i++) {
8101 zclose(obj->maps[i].fd);
8102 if (obj->maps[i].st_ops)
8103 zfree(&obj->maps[i].st_ops->kern_vdata);
8104 }
8105
8106 for (i = 0; i < obj->nr_programs; i++)
8107 bpf_program__unload(&obj->programs[i]);
8108
8109 return 0;
8110 }
8111
bpf_object__sanitize_maps(struct bpf_object * obj)8112 static int bpf_object__sanitize_maps(struct bpf_object *obj)
8113 {
8114 struct bpf_map *m;
8115
8116 bpf_object__for_each_map(m, obj) {
8117 if (!bpf_map__is_internal(m))
8118 continue;
8119 if (!kernel_supports(obj, FEAT_ARRAY_MMAP))
8120 m->def.map_flags &= ~BPF_F_MMAPABLE;
8121 }
8122
8123 return 0;
8124 }
8125
8126 typedef int (*kallsyms_cb_t)(unsigned long long sym_addr, char sym_type,
8127 const char *sym_name, void *ctx);
8128
libbpf_kallsyms_parse(kallsyms_cb_t cb,void * ctx)8129 static int libbpf_kallsyms_parse(kallsyms_cb_t cb, void *ctx)
8130 {
8131 char sym_type, sym_name[500];
8132 unsigned long long sym_addr;
8133 int ret, err = 0;
8134 FILE *f;
8135
8136 f = fopen("/proc/kallsyms", "re");
8137 if (!f) {
8138 err = -errno;
8139 pr_warn("failed to open /proc/kallsyms: %d\n", err);
8140 return err;
8141 }
8142
8143 while (true) {
8144 ret = fscanf(f, "%llx %c %499s%*[^\n]\n",
8145 &sym_addr, &sym_type, sym_name);
8146 if (ret == EOF && feof(f))
8147 break;
8148 if (ret != 3) {
8149 pr_warn("failed to read kallsyms entry: %d\n", ret);
8150 err = -EINVAL;
8151 break;
8152 }
8153
8154 err = cb(sym_addr, sym_type, sym_name, ctx);
8155 if (err)
8156 break;
8157 }
8158
8159 fclose(f);
8160 return err;
8161 }
8162
kallsyms_cb(unsigned long long sym_addr,char sym_type,const char * sym_name,void * ctx)8163 static int kallsyms_cb(unsigned long long sym_addr, char sym_type,
8164 const char *sym_name, void *ctx)
8165 {
8166 struct bpf_object *obj = ctx;
8167 const struct btf_type *t;
8168 struct extern_desc *ext;
8169 char *res;
8170
8171 res = strstr(sym_name, ".llvm.");
8172 if (sym_type == 'd' && res)
8173 ext = find_extern_by_name_with_len(obj, sym_name, res - sym_name);
8174 else
8175 ext = find_extern_by_name(obj, sym_name);
8176 if (!ext || ext->type != EXT_KSYM)
8177 return 0;
8178
8179 t = btf__type_by_id(obj->btf, ext->btf_id);
8180 if (!btf_is_var(t))
8181 return 0;
8182
8183 if (ext->is_set && ext->ksym.addr != sym_addr) {
8184 pr_warn("extern (ksym) '%s': resolution is ambiguous: 0x%llx or 0x%llx\n",
8185 sym_name, ext->ksym.addr, sym_addr);
8186 return -EINVAL;
8187 }
8188 if (!ext->is_set) {
8189 ext->is_set = true;
8190 ext->ksym.addr = sym_addr;
8191 pr_debug("extern (ksym) '%s': set to 0x%llx\n", sym_name, sym_addr);
8192 }
8193 return 0;
8194 }
8195
bpf_object__read_kallsyms_file(struct bpf_object * obj)8196 static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
8197 {
8198 return libbpf_kallsyms_parse(kallsyms_cb, obj);
8199 }
8200
find_ksym_btf_id(struct bpf_object * obj,const char * ksym_name,__u16 kind,struct btf ** res_btf,struct module_btf ** res_mod_btf)8201 static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name,
8202 __u16 kind, struct btf **res_btf,
8203 struct module_btf **res_mod_btf)
8204 {
8205 struct module_btf *mod_btf;
8206 struct btf *btf;
8207 int i, id, err;
8208
8209 btf = obj->btf_vmlinux;
8210 mod_btf = NULL;
8211 id = btf__find_by_name_kind(btf, ksym_name, kind);
8212
8213 if (id == -ENOENT) {
8214 err = load_module_btfs(obj);
8215 if (err)
8216 return err;
8217
8218 for (i = 0; i < obj->btf_module_cnt; i++) {
8219 /* we assume module_btf's BTF FD is always >0 */
8220 mod_btf = &obj->btf_modules[i];
8221 btf = mod_btf->btf;
8222 id = btf__find_by_name_kind_own(btf, ksym_name, kind);
8223 if (id != -ENOENT)
8224 break;
8225 }
8226 }
8227 if (id <= 0)
8228 return -ESRCH;
8229
8230 *res_btf = btf;
8231 *res_mod_btf = mod_btf;
8232 return id;
8233 }
8234
bpf_object__resolve_ksym_var_btf_id(struct bpf_object * obj,struct extern_desc * ext)8235 static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj,
8236 struct extern_desc *ext)
8237 {
8238 const struct btf_type *targ_var, *targ_type;
8239 __u32 targ_type_id, local_type_id;
8240 struct module_btf *mod_btf = NULL;
8241 const char *targ_var_name;
8242 struct btf *btf = NULL;
8243 int id, err;
8244
8245 id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &mod_btf);
8246 if (id < 0) {
8247 if (id == -ESRCH && ext->is_weak)
8248 return 0;
8249 pr_warn("extern (var ksym) '%s': not found in kernel BTF\n",
8250 ext->name);
8251 return id;
8252 }
8253
8254 /* find local type_id */
8255 local_type_id = ext->ksym.type_id;
8256
8257 /* find target type_id */
8258 targ_var = btf__type_by_id(btf, id);
8259 targ_var_name = btf__name_by_offset(btf, targ_var->name_off);
8260 targ_type = skip_mods_and_typedefs(btf, targ_var->type, &targ_type_id);
8261
8262 err = bpf_core_types_are_compat(obj->btf, local_type_id,
8263 btf, targ_type_id);
8264 if (err <= 0) {
8265 const struct btf_type *local_type;
8266 const char *targ_name, *local_name;
8267
8268 local_type = btf__type_by_id(obj->btf, local_type_id);
8269 local_name = btf__name_by_offset(obj->btf, local_type->name_off);
8270 targ_name = btf__name_by_offset(btf, targ_type->name_off);
8271
8272 pr_warn("extern (var ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n",
8273 ext->name, local_type_id,
8274 btf_kind_str(local_type), local_name, targ_type_id,
8275 btf_kind_str(targ_type), targ_name);
8276 return -EINVAL;
8277 }
8278
8279 ext->is_set = true;
8280 ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0;
8281 ext->ksym.kernel_btf_id = id;
8282 pr_debug("extern (var ksym) '%s': resolved to [%d] %s %s\n",
8283 ext->name, id, btf_kind_str(targ_var), targ_var_name);
8284
8285 return 0;
8286 }
8287
bpf_object__resolve_ksym_func_btf_id(struct bpf_object * obj,struct extern_desc * ext)8288 static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj,
8289 struct extern_desc *ext)
8290 {
8291 int local_func_proto_id, kfunc_proto_id, kfunc_id;
8292 struct module_btf *mod_btf = NULL;
8293 const struct btf_type *kern_func;
8294 struct btf *kern_btf = NULL;
8295 int ret;
8296
8297 local_func_proto_id = ext->ksym.type_id;
8298
8299 kfunc_id = find_ksym_btf_id(obj, ext->essent_name ?: ext->name, BTF_KIND_FUNC, &kern_btf,
8300 &mod_btf);
8301 if (kfunc_id < 0) {
8302 if (kfunc_id == -ESRCH && ext->is_weak)
8303 return 0;
8304 pr_warn("extern (func ksym) '%s': not found in kernel or module BTFs\n",
8305 ext->name);
8306 return kfunc_id;
8307 }
8308
8309 kern_func = btf__type_by_id(kern_btf, kfunc_id);
8310 kfunc_proto_id = kern_func->type;
8311
8312 ret = bpf_core_types_are_compat(obj->btf, local_func_proto_id,
8313 kern_btf, kfunc_proto_id);
8314 if (ret <= 0) {
8315 if (ext->is_weak)
8316 return 0;
8317
8318 pr_warn("extern (func ksym) '%s': func_proto [%d] incompatible with %s [%d]\n",
8319 ext->name, local_func_proto_id,
8320 mod_btf ? mod_btf->name : "vmlinux", kfunc_proto_id);
8321 return -EINVAL;
8322 }
8323
8324 /* set index for module BTF fd in fd_array, if unset */
8325 if (mod_btf && !mod_btf->fd_array_idx) {
8326 /* insn->off is s16 */
8327 if (obj->fd_array_cnt == INT16_MAX) {
8328 pr_warn("extern (func ksym) '%s': module BTF fd index %d too big to fit in bpf_insn offset\n",
8329 ext->name, mod_btf->fd_array_idx);
8330 return -E2BIG;
8331 }
8332 /* Cannot use index 0 for module BTF fd */
8333 if (!obj->fd_array_cnt)
8334 obj->fd_array_cnt = 1;
8335
8336 ret = libbpf_ensure_mem((void **)&obj->fd_array, &obj->fd_array_cap, sizeof(int),
8337 obj->fd_array_cnt + 1);
8338 if (ret)
8339 return ret;
8340 mod_btf->fd_array_idx = obj->fd_array_cnt;
8341 /* we assume module BTF FD is always >0 */
8342 obj->fd_array[obj->fd_array_cnt++] = mod_btf->fd;
8343 }
8344
8345 ext->is_set = true;
8346 ext->ksym.kernel_btf_id = kfunc_id;
8347 ext->ksym.btf_fd_idx = mod_btf ? mod_btf->fd_array_idx : 0;
8348 /* Also set kernel_btf_obj_fd to make sure that bpf_object__relocate_data()
8349 * populates FD into ld_imm64 insn when it's used to point to kfunc.
8350 * {kernel_btf_id, btf_fd_idx} -> fixup bpf_call.
8351 * {kernel_btf_id, kernel_btf_obj_fd} -> fixup ld_imm64.
8352 */
8353 ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0;
8354 pr_debug("extern (func ksym) '%s': resolved to %s [%d]\n",
8355 ext->name, mod_btf ? mod_btf->name : "vmlinux", kfunc_id);
8356
8357 return 0;
8358 }
8359
bpf_object__resolve_ksyms_btf_id(struct bpf_object * obj)8360 static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj)
8361 {
8362 const struct btf_type *t;
8363 struct extern_desc *ext;
8364 int i, err;
8365
8366 for (i = 0; i < obj->nr_extern; i++) {
8367 ext = &obj->externs[i];
8368 if (ext->type != EXT_KSYM || !ext->ksym.type_id)
8369 continue;
8370
8371 if (obj->gen_loader) {
8372 ext->is_set = true;
8373 ext->ksym.kernel_btf_obj_fd = 0;
8374 ext->ksym.kernel_btf_id = 0;
8375 continue;
8376 }
8377 t = btf__type_by_id(obj->btf, ext->btf_id);
8378 if (btf_is_var(t))
8379 err = bpf_object__resolve_ksym_var_btf_id(obj, ext);
8380 else
8381 err = bpf_object__resolve_ksym_func_btf_id(obj, ext);
8382 if (err)
8383 return err;
8384 }
8385 return 0;
8386 }
8387
bpf_object__resolve_externs(struct bpf_object * obj,const char * extra_kconfig)8388 static int bpf_object__resolve_externs(struct bpf_object *obj,
8389 const char *extra_kconfig)
8390 {
8391 bool need_config = false, need_kallsyms = false;
8392 bool need_vmlinux_btf = false;
8393 struct extern_desc *ext;
8394 void *kcfg_data = NULL;
8395 int err, i;
8396
8397 if (obj->nr_extern == 0)
8398 return 0;
8399
8400 if (obj->kconfig_map_idx >= 0)
8401 kcfg_data = obj->maps[obj->kconfig_map_idx].mmaped;
8402
8403 for (i = 0; i < obj->nr_extern; i++) {
8404 ext = &obj->externs[i];
8405
8406 if (ext->type == EXT_KSYM) {
8407 if (ext->ksym.type_id)
8408 need_vmlinux_btf = true;
8409 else
8410 need_kallsyms = true;
8411 continue;
8412 } else if (ext->type == EXT_KCFG) {
8413 void *ext_ptr = kcfg_data + ext->kcfg.data_off;
8414 __u64 value = 0;
8415
8416 /* Kconfig externs need actual /proc/config.gz */
8417 if (str_has_pfx(ext->name, "CONFIG_")) {
8418 need_config = true;
8419 continue;
8420 }
8421
8422 /* Virtual kcfg externs are customly handled by libbpf */
8423 if (strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) {
8424 value = get_kernel_version();
8425 if (!value) {
8426 pr_warn("extern (kcfg) '%s': failed to get kernel version\n", ext->name);
8427 return -EINVAL;
8428 }
8429 } else if (strcmp(ext->name, "LINUX_HAS_BPF_COOKIE") == 0) {
8430 value = kernel_supports(obj, FEAT_BPF_COOKIE);
8431 } else if (strcmp(ext->name, "LINUX_HAS_SYSCALL_WRAPPER") == 0) {
8432 value = kernel_supports(obj, FEAT_SYSCALL_WRAPPER);
8433 } else if (!str_has_pfx(ext->name, "LINUX_") || !ext->is_weak) {
8434 /* Currently libbpf supports only CONFIG_ and LINUX_ prefixed
8435 * __kconfig externs, where LINUX_ ones are virtual and filled out
8436 * customly by libbpf (their values don't come from Kconfig).
8437 * If LINUX_xxx variable is not recognized by libbpf, but is marked
8438 * __weak, it defaults to zero value, just like for CONFIG_xxx
8439 * externs.
8440 */
8441 pr_warn("extern (kcfg) '%s': unrecognized virtual extern\n", ext->name);
8442 return -EINVAL;
8443 }
8444
8445 err = set_kcfg_value_num(ext, ext_ptr, value);
8446 if (err)
8447 return err;
8448 pr_debug("extern (kcfg) '%s': set to 0x%llx\n",
8449 ext->name, (long long)value);
8450 } else {
8451 pr_warn("extern '%s': unrecognized extern kind\n", ext->name);
8452 return -EINVAL;
8453 }
8454 }
8455 if (need_config && extra_kconfig) {
8456 err = bpf_object__read_kconfig_mem(obj, extra_kconfig, kcfg_data);
8457 if (err)
8458 return -EINVAL;
8459 need_config = false;
8460 for (i = 0; i < obj->nr_extern; i++) {
8461 ext = &obj->externs[i];
8462 if (ext->type == EXT_KCFG && !ext->is_set) {
8463 need_config = true;
8464 break;
8465 }
8466 }
8467 }
8468 if (need_config) {
8469 err = bpf_object__read_kconfig_file(obj, kcfg_data);
8470 if (err)
8471 return -EINVAL;
8472 }
8473 if (need_kallsyms) {
8474 err = bpf_object__read_kallsyms_file(obj);
8475 if (err)
8476 return -EINVAL;
8477 }
8478 if (need_vmlinux_btf) {
8479 err = bpf_object__resolve_ksyms_btf_id(obj);
8480 if (err)
8481 return -EINVAL;
8482 }
8483 for (i = 0; i < obj->nr_extern; i++) {
8484 ext = &obj->externs[i];
8485
8486 if (!ext->is_set && !ext->is_weak) {
8487 pr_warn("extern '%s' (strong): not resolved\n", ext->name);
8488 return -ESRCH;
8489 } else if (!ext->is_set) {
8490 pr_debug("extern '%s' (weak): not resolved, defaulting to zero\n",
8491 ext->name);
8492 }
8493 }
8494
8495 return 0;
8496 }
8497
bpf_map_prepare_vdata(const struct bpf_map * map)8498 static void bpf_map_prepare_vdata(const struct bpf_map *map)
8499 {
8500 const struct btf_type *type;
8501 struct bpf_struct_ops *st_ops;
8502 __u32 i;
8503
8504 st_ops = map->st_ops;
8505 type = btf__type_by_id(map->obj->btf, st_ops->type_id);
8506 for (i = 0; i < btf_vlen(type); i++) {
8507 struct bpf_program *prog = st_ops->progs[i];
8508 void *kern_data;
8509 int prog_fd;
8510
8511 if (!prog)
8512 continue;
8513
8514 prog_fd = bpf_program__fd(prog);
8515 kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i];
8516 *(unsigned long *)kern_data = prog_fd;
8517 }
8518 }
8519
bpf_object_prepare_struct_ops(struct bpf_object * obj)8520 static int bpf_object_prepare_struct_ops(struct bpf_object *obj)
8521 {
8522 struct bpf_map *map;
8523 int i;
8524
8525 for (i = 0; i < obj->nr_maps; i++) {
8526 map = &obj->maps[i];
8527
8528 if (!bpf_map__is_struct_ops(map))
8529 continue;
8530
8531 if (!map->autocreate)
8532 continue;
8533
8534 bpf_map_prepare_vdata(map);
8535 }
8536
8537 return 0;
8538 }
8539
bpf_object_load(struct bpf_object * obj,int extra_log_level,const char * target_btf_path)8540 static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const char *target_btf_path)
8541 {
8542 int err, i;
8543
8544 if (!obj)
8545 return libbpf_err(-EINVAL);
8546
8547 if (obj->loaded) {
8548 pr_warn("object '%s': load can't be attempted twice\n", obj->name);
8549 return libbpf_err(-EINVAL);
8550 }
8551
8552 if (obj->gen_loader)
8553 bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps);
8554
8555 err = bpf_object_prepare_token(obj);
8556 err = err ? : bpf_object__probe_loading(obj);
8557 err = err ? : bpf_object__load_vmlinux_btf(obj, false);
8558 err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
8559 err = err ? : bpf_object__sanitize_maps(obj);
8560 err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
8561 err = err ? : bpf_object_adjust_struct_ops_autoload(obj);
8562 err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path);
8563 err = err ? : bpf_object__sanitize_and_load_btf(obj);
8564 err = err ? : bpf_object__create_maps(obj);
8565 err = err ? : bpf_object__load_progs(obj, extra_log_level);
8566 err = err ? : bpf_object_init_prog_arrays(obj);
8567 err = err ? : bpf_object_prepare_struct_ops(obj);
8568
8569 if (obj->gen_loader) {
8570 /* reset FDs */
8571 if (obj->btf)
8572 btf__set_fd(obj->btf, -1);
8573 if (!err)
8574 err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps);
8575 }
8576
8577 /* clean up fd_array */
8578 zfree(&obj->fd_array);
8579
8580 /* clean up module BTFs */
8581 for (i = 0; i < obj->btf_module_cnt; i++) {
8582 close(obj->btf_modules[i].fd);
8583 btf__free(obj->btf_modules[i].btf);
8584 free(obj->btf_modules[i].name);
8585 }
8586 free(obj->btf_modules);
8587
8588 /* clean up vmlinux BTF */
8589 btf__free(obj->btf_vmlinux);
8590 obj->btf_vmlinux = NULL;
8591
8592 obj->loaded = true; /* doesn't matter if successfully or not */
8593
8594 if (err)
8595 goto out;
8596
8597 return 0;
8598 out:
8599 /* unpin any maps that were auto-pinned during load */
8600 for (i = 0; i < obj->nr_maps; i++)
8601 if (obj->maps[i].pinned && !obj->maps[i].reused)
8602 bpf_map__unpin(&obj->maps[i], NULL);
8603
8604 bpf_object_unload(obj);
8605 pr_warn("failed to load object '%s'\n", obj->path);
8606 return libbpf_err(err);
8607 }
8608
bpf_object__load(struct bpf_object * obj)8609 int bpf_object__load(struct bpf_object *obj)
8610 {
8611 return bpf_object_load(obj, 0, NULL);
8612 }
8613
make_parent_dir(const char * path)8614 static int make_parent_dir(const char *path)
8615 {
8616 char *cp, errmsg[STRERR_BUFSIZE];
8617 char *dname, *dir;
8618 int err = 0;
8619
8620 dname = strdup(path);
8621 if (dname == NULL)
8622 return -ENOMEM;
8623
8624 dir = dirname(dname);
8625 if (mkdir(dir, 0700) && errno != EEXIST)
8626 err = -errno;
8627
8628 free(dname);
8629 if (err) {
8630 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
8631 pr_warn("failed to mkdir %s: %s\n", path, cp);
8632 }
8633 return err;
8634 }
8635
check_path(const char * path)8636 static int check_path(const char *path)
8637 {
8638 char *cp, errmsg[STRERR_BUFSIZE];
8639 struct statfs st_fs;
8640 char *dname, *dir;
8641 int err = 0;
8642
8643 if (path == NULL)
8644 return -EINVAL;
8645
8646 dname = strdup(path);
8647 if (dname == NULL)
8648 return -ENOMEM;
8649
8650 dir = dirname(dname);
8651 if (statfs(dir, &st_fs)) {
8652 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
8653 pr_warn("failed to statfs %s: %s\n", dir, cp);
8654 err = -errno;
8655 }
8656 free(dname);
8657
8658 if (!err && st_fs.f_type != BPF_FS_MAGIC) {
8659 pr_warn("specified path %s is not on BPF FS\n", path);
8660 err = -EINVAL;
8661 }
8662
8663 return err;
8664 }
8665
bpf_program__pin(struct bpf_program * prog,const char * path)8666 int bpf_program__pin(struct bpf_program *prog, const char *path)
8667 {
8668 char *cp, errmsg[STRERR_BUFSIZE];
8669 int err;
8670
8671 if (prog->fd < 0) {
8672 pr_warn("prog '%s': can't pin program that wasn't loaded\n", prog->name);
8673 return libbpf_err(-EINVAL);
8674 }
8675
8676 err = make_parent_dir(path);
8677 if (err)
8678 return libbpf_err(err);
8679
8680 err = check_path(path);
8681 if (err)
8682 return libbpf_err(err);
8683
8684 if (bpf_obj_pin(prog->fd, path)) {
8685 err = -errno;
8686 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
8687 pr_warn("prog '%s': failed to pin at '%s': %s\n", prog->name, path, cp);
8688 return libbpf_err(err);
8689 }
8690
8691 pr_debug("prog '%s': pinned at '%s'\n", prog->name, path);
8692 return 0;
8693 }
8694
bpf_program__unpin(struct bpf_program * prog,const char * path)8695 int bpf_program__unpin(struct bpf_program *prog, const char *path)
8696 {
8697 int err;
8698
8699 if (prog->fd < 0) {
8700 pr_warn("prog '%s': can't unpin program that wasn't loaded\n", prog->name);
8701 return libbpf_err(-EINVAL);
8702 }
8703
8704 err = check_path(path);
8705 if (err)
8706 return libbpf_err(err);
8707
8708 err = unlink(path);
8709 if (err)
8710 return libbpf_err(-errno);
8711
8712 pr_debug("prog '%s': unpinned from '%s'\n", prog->name, path);
8713 return 0;
8714 }
8715
bpf_map__pin(struct bpf_map * map,const char * path)8716 int bpf_map__pin(struct bpf_map *map, const char *path)
8717 {
8718 char *cp, errmsg[STRERR_BUFSIZE];
8719 int err;
8720
8721 if (map == NULL) {
8722 pr_warn("invalid map pointer\n");
8723 return libbpf_err(-EINVAL);
8724 }
8725
8726 if (map->fd < 0) {
8727 pr_warn("map '%s': can't pin BPF map without FD (was it created?)\n", map->name);
8728 return libbpf_err(-EINVAL);
8729 }
8730
8731 if (map->pin_path) {
8732 if (path && strcmp(path, map->pin_path)) {
8733 pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
8734 bpf_map__name(map), map->pin_path, path);
8735 return libbpf_err(-EINVAL);
8736 } else if (map->pinned) {
8737 pr_debug("map '%s' already pinned at '%s'; not re-pinning\n",
8738 bpf_map__name(map), map->pin_path);
8739 return 0;
8740 }
8741 } else {
8742 if (!path) {
8743 pr_warn("missing a path to pin map '%s' at\n",
8744 bpf_map__name(map));
8745 return libbpf_err(-EINVAL);
8746 } else if (map->pinned) {
8747 pr_warn("map '%s' already pinned\n", bpf_map__name(map));
8748 return libbpf_err(-EEXIST);
8749 }
8750
8751 map->pin_path = strdup(path);
8752 if (!map->pin_path) {
8753 err = -errno;
8754 goto out_err;
8755 }
8756 }
8757
8758 err = make_parent_dir(map->pin_path);
8759 if (err)
8760 return libbpf_err(err);
8761
8762 err = check_path(map->pin_path);
8763 if (err)
8764 return libbpf_err(err);
8765
8766 if (bpf_obj_pin(map->fd, map->pin_path)) {
8767 err = -errno;
8768 goto out_err;
8769 }
8770
8771 map->pinned = true;
8772 pr_debug("pinned map '%s'\n", map->pin_path);
8773
8774 return 0;
8775
8776 out_err:
8777 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
8778 pr_warn("failed to pin map: %s\n", cp);
8779 return libbpf_err(err);
8780 }
8781
bpf_map__unpin(struct bpf_map * map,const char * path)8782 int bpf_map__unpin(struct bpf_map *map, const char *path)
8783 {
8784 int err;
8785
8786 if (map == NULL) {
8787 pr_warn("invalid map pointer\n");
8788 return libbpf_err(-EINVAL);
8789 }
8790
8791 if (map->pin_path) {
8792 if (path && strcmp(path, map->pin_path)) {
8793 pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
8794 bpf_map__name(map), map->pin_path, path);
8795 return libbpf_err(-EINVAL);
8796 }
8797 path = map->pin_path;
8798 } else if (!path) {
8799 pr_warn("no path to unpin map '%s' from\n",
8800 bpf_map__name(map));
8801 return libbpf_err(-EINVAL);
8802 }
8803
8804 err = check_path(path);
8805 if (err)
8806 return libbpf_err(err);
8807
8808 err = unlink(path);
8809 if (err != 0)
8810 return libbpf_err(-errno);
8811
8812 map->pinned = false;
8813 pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path);
8814
8815 return 0;
8816 }
8817
bpf_map__set_pin_path(struct bpf_map * map,const char * path)8818 int bpf_map__set_pin_path(struct bpf_map *map, const char *path)
8819 {
8820 char *new = NULL;
8821
8822 if (path) {
8823 new = strdup(path);
8824 if (!new)
8825 return libbpf_err(-errno);
8826 }
8827
8828 free(map->pin_path);
8829 map->pin_path = new;
8830 return 0;
8831 }
8832
8833 __alias(bpf_map__pin_path)
8834 const char *bpf_map__get_pin_path(const struct bpf_map *map);
8835
bpf_map__pin_path(const struct bpf_map * map)8836 const char *bpf_map__pin_path(const struct bpf_map *map)
8837 {
8838 return map->pin_path;
8839 }
8840
bpf_map__is_pinned(const struct bpf_map * map)8841 bool bpf_map__is_pinned(const struct bpf_map *map)
8842 {
8843 return map->pinned;
8844 }
8845
sanitize_pin_path(char * s)8846 static void sanitize_pin_path(char *s)
8847 {
8848 /* bpffs disallows periods in path names */
8849 while (*s) {
8850 if (*s == '.')
8851 *s = '_';
8852 s++;
8853 }
8854 }
8855
bpf_object__pin_maps(struct bpf_object * obj,const char * path)8856 int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
8857 {
8858 struct bpf_map *map;
8859 int err;
8860
8861 if (!obj)
8862 return libbpf_err(-ENOENT);
8863
8864 if (!obj->loaded) {
8865 pr_warn("object not yet loaded; load it first\n");
8866 return libbpf_err(-ENOENT);
8867 }
8868
8869 bpf_object__for_each_map(map, obj) {
8870 char *pin_path = NULL;
8871 char buf[PATH_MAX];
8872
8873 if (!map->autocreate)
8874 continue;
8875
8876 if (path) {
8877 err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
8878 if (err)
8879 goto err_unpin_maps;
8880 sanitize_pin_path(buf);
8881 pin_path = buf;
8882 } else if (!map->pin_path) {
8883 continue;
8884 }
8885
8886 err = bpf_map__pin(map, pin_path);
8887 if (err)
8888 goto err_unpin_maps;
8889 }
8890
8891 return 0;
8892
8893 err_unpin_maps:
8894 while ((map = bpf_object__prev_map(obj, map))) {
8895 if (!map->pin_path)
8896 continue;
8897
8898 bpf_map__unpin(map, NULL);
8899 }
8900
8901 return libbpf_err(err);
8902 }
8903
bpf_object__unpin_maps(struct bpf_object * obj,const char * path)8904 int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
8905 {
8906 struct bpf_map *map;
8907 int err;
8908
8909 if (!obj)
8910 return libbpf_err(-ENOENT);
8911
8912 bpf_object__for_each_map(map, obj) {
8913 char *pin_path = NULL;
8914 char buf[PATH_MAX];
8915
8916 if (path) {
8917 err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
8918 if (err)
8919 return libbpf_err(err);
8920 sanitize_pin_path(buf);
8921 pin_path = buf;
8922 } else if (!map->pin_path) {
8923 continue;
8924 }
8925
8926 err = bpf_map__unpin(map, pin_path);
8927 if (err)
8928 return libbpf_err(err);
8929 }
8930
8931 return 0;
8932 }
8933
bpf_object__pin_programs(struct bpf_object * obj,const char * path)8934 int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
8935 {
8936 struct bpf_program *prog;
8937 char buf[PATH_MAX];
8938 int err;
8939
8940 if (!obj)
8941 return libbpf_err(-ENOENT);
8942
8943 if (!obj->loaded) {
8944 pr_warn("object not yet loaded; load it first\n");
8945 return libbpf_err(-ENOENT);
8946 }
8947
8948 bpf_object__for_each_program(prog, obj) {
8949 err = pathname_concat(buf, sizeof(buf), path, prog->name);
8950 if (err)
8951 goto err_unpin_programs;
8952
8953 err = bpf_program__pin(prog, buf);
8954 if (err)
8955 goto err_unpin_programs;
8956 }
8957
8958 return 0;
8959
8960 err_unpin_programs:
8961 while ((prog = bpf_object__prev_program(obj, prog))) {
8962 if (pathname_concat(buf, sizeof(buf), path, prog->name))
8963 continue;
8964
8965 bpf_program__unpin(prog, buf);
8966 }
8967
8968 return libbpf_err(err);
8969 }
8970
bpf_object__unpin_programs(struct bpf_object * obj,const char * path)8971 int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
8972 {
8973 struct bpf_program *prog;
8974 int err;
8975
8976 if (!obj)
8977 return libbpf_err(-ENOENT);
8978
8979 bpf_object__for_each_program(prog, obj) {
8980 char buf[PATH_MAX];
8981
8982 err = pathname_concat(buf, sizeof(buf), path, prog->name);
8983 if (err)
8984 return libbpf_err(err);
8985
8986 err = bpf_program__unpin(prog, buf);
8987 if (err)
8988 return libbpf_err(err);
8989 }
8990
8991 return 0;
8992 }
8993
bpf_object__pin(struct bpf_object * obj,const char * path)8994 int bpf_object__pin(struct bpf_object *obj, const char *path)
8995 {
8996 int err;
8997
8998 err = bpf_object__pin_maps(obj, path);
8999 if (err)
9000 return libbpf_err(err);
9001
9002 err = bpf_object__pin_programs(obj, path);
9003 if (err) {
9004 bpf_object__unpin_maps(obj, path);
9005 return libbpf_err(err);
9006 }
9007
9008 return 0;
9009 }
9010
bpf_object__unpin(struct bpf_object * obj,const char * path)9011 int bpf_object__unpin(struct bpf_object *obj, const char *path)
9012 {
9013 int err;
9014
9015 err = bpf_object__unpin_programs(obj, path);
9016 if (err)
9017 return libbpf_err(err);
9018
9019 err = bpf_object__unpin_maps(obj, path);
9020 if (err)
9021 return libbpf_err(err);
9022
9023 return 0;
9024 }
9025
bpf_map__destroy(struct bpf_map * map)9026 static void bpf_map__destroy(struct bpf_map *map)
9027 {
9028 if (map->inner_map) {
9029 bpf_map__destroy(map->inner_map);
9030 zfree(&map->inner_map);
9031 }
9032
9033 zfree(&map->init_slots);
9034 map->init_slots_sz = 0;
9035
9036 if (map->mmaped && map->mmaped != map->obj->arena_data)
9037 munmap(map->mmaped, bpf_map_mmap_sz(map));
9038 map->mmaped = NULL;
9039
9040 if (map->st_ops) {
9041 zfree(&map->st_ops->data);
9042 zfree(&map->st_ops->progs);
9043 zfree(&map->st_ops->kern_func_off);
9044 zfree(&map->st_ops);
9045 }
9046
9047 zfree(&map->name);
9048 zfree(&map->real_name);
9049 zfree(&map->pin_path);
9050
9051 if (map->fd >= 0)
9052 zclose(map->fd);
9053 }
9054
bpf_object__close(struct bpf_object * obj)9055 void bpf_object__close(struct bpf_object *obj)
9056 {
9057 size_t i;
9058
9059 if (IS_ERR_OR_NULL(obj))
9060 return;
9061
9062 usdt_manager_free(obj->usdt_man);
9063 obj->usdt_man = NULL;
9064
9065 bpf_gen__free(obj->gen_loader);
9066 bpf_object__elf_finish(obj);
9067 bpf_object_unload(obj);
9068 btf__free(obj->btf);
9069 btf__free(obj->btf_vmlinux);
9070 btf_ext__free(obj->btf_ext);
9071
9072 for (i = 0; i < obj->nr_maps; i++)
9073 bpf_map__destroy(&obj->maps[i]);
9074
9075 zfree(&obj->btf_custom_path);
9076 zfree(&obj->kconfig);
9077
9078 for (i = 0; i < obj->nr_extern; i++) {
9079 zfree(&obj->externs[i].name);
9080 zfree(&obj->externs[i].essent_name);
9081 }
9082
9083 zfree(&obj->externs);
9084 obj->nr_extern = 0;
9085
9086 zfree(&obj->maps);
9087 obj->nr_maps = 0;
9088
9089 if (obj->programs && obj->nr_programs) {
9090 for (i = 0; i < obj->nr_programs; i++)
9091 bpf_program__exit(&obj->programs[i]);
9092 }
9093 zfree(&obj->programs);
9094
9095 zfree(&obj->feat_cache);
9096 zfree(&obj->token_path);
9097 if (obj->token_fd > 0)
9098 close(obj->token_fd);
9099
9100 zfree(&obj->arena_data);
9101
9102 free(obj);
9103 }
9104
bpf_object__name(const struct bpf_object * obj)9105 const char *bpf_object__name(const struct bpf_object *obj)
9106 {
9107 return obj ? obj->name : libbpf_err_ptr(-EINVAL);
9108 }
9109
bpf_object__kversion(const struct bpf_object * obj)9110 unsigned int bpf_object__kversion(const struct bpf_object *obj)
9111 {
9112 return obj ? obj->kern_version : 0;
9113 }
9114
bpf_object__token_fd(const struct bpf_object * obj)9115 int bpf_object__token_fd(const struct bpf_object *obj)
9116 {
9117 return obj->token_fd ?: -1;
9118 }
9119
bpf_object__btf(const struct bpf_object * obj)9120 struct btf *bpf_object__btf(const struct bpf_object *obj)
9121 {
9122 return obj ? obj->btf : NULL;
9123 }
9124
bpf_object__btf_fd(const struct bpf_object * obj)9125 int bpf_object__btf_fd(const struct bpf_object *obj)
9126 {
9127 return obj->btf ? btf__fd(obj->btf) : -1;
9128 }
9129
bpf_object__set_kversion(struct bpf_object * obj,__u32 kern_version)9130 int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version)
9131 {
9132 if (obj->loaded)
9133 return libbpf_err(-EINVAL);
9134
9135 obj->kern_version = kern_version;
9136
9137 return 0;
9138 }
9139
bpf_object__gen_loader(struct bpf_object * obj,struct gen_loader_opts * opts)9140 int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts)
9141 {
9142 struct bpf_gen *gen;
9143
9144 if (!opts)
9145 return -EFAULT;
9146 if (!OPTS_VALID(opts, gen_loader_opts))
9147 return -EINVAL;
9148 gen = calloc(sizeof(*gen), 1);
9149 if (!gen)
9150 return -ENOMEM;
9151 gen->opts = opts;
9152 obj->gen_loader = gen;
9153 return 0;
9154 }
9155
9156 static struct bpf_program *
__bpf_program__iter(const struct bpf_program * p,const struct bpf_object * obj,bool forward)9157 __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj,
9158 bool forward)
9159 {
9160 size_t nr_programs = obj->nr_programs;
9161 ssize_t idx;
9162
9163 if (!nr_programs)
9164 return NULL;
9165
9166 if (!p)
9167 /* Iter from the beginning */
9168 return forward ? &obj->programs[0] :
9169 &obj->programs[nr_programs - 1];
9170
9171 if (p->obj != obj) {
9172 pr_warn("error: program handler doesn't match object\n");
9173 return errno = EINVAL, NULL;
9174 }
9175
9176 idx = (p - obj->programs) + (forward ? 1 : -1);
9177 if (idx >= obj->nr_programs || idx < 0)
9178 return NULL;
9179 return &obj->programs[idx];
9180 }
9181
9182 struct bpf_program *
bpf_object__next_program(const struct bpf_object * obj,struct bpf_program * prev)9183 bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev)
9184 {
9185 struct bpf_program *prog = prev;
9186
9187 do {
9188 prog = __bpf_program__iter(prog, obj, true);
9189 } while (prog && prog_is_subprog(obj, prog));
9190
9191 return prog;
9192 }
9193
9194 struct bpf_program *
bpf_object__prev_program(const struct bpf_object * obj,struct bpf_program * next)9195 bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *next)
9196 {
9197 struct bpf_program *prog = next;
9198
9199 do {
9200 prog = __bpf_program__iter(prog, obj, false);
9201 } while (prog && prog_is_subprog(obj, prog));
9202
9203 return prog;
9204 }
9205
bpf_program__set_ifindex(struct bpf_program * prog,__u32 ifindex)9206 void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex)
9207 {
9208 prog->prog_ifindex = ifindex;
9209 }
9210
bpf_program__name(const struct bpf_program * prog)9211 const char *bpf_program__name(const struct bpf_program *prog)
9212 {
9213 return prog->name;
9214 }
9215
bpf_program__section_name(const struct bpf_program * prog)9216 const char *bpf_program__section_name(const struct bpf_program *prog)
9217 {
9218 return prog->sec_name;
9219 }
9220
bpf_program__autoload(const struct bpf_program * prog)9221 bool bpf_program__autoload(const struct bpf_program *prog)
9222 {
9223 return prog->autoload;
9224 }
9225
bpf_program__set_autoload(struct bpf_program * prog,bool autoload)9226 int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
9227 {
9228 if (prog->obj->loaded)
9229 return libbpf_err(-EINVAL);
9230
9231 prog->autoload = autoload;
9232 return 0;
9233 }
9234
bpf_program__autoattach(const struct bpf_program * prog)9235 bool bpf_program__autoattach(const struct bpf_program *prog)
9236 {
9237 return prog->autoattach;
9238 }
9239
bpf_program__set_autoattach(struct bpf_program * prog,bool autoattach)9240 void bpf_program__set_autoattach(struct bpf_program *prog, bool autoattach)
9241 {
9242 prog->autoattach = autoattach;
9243 }
9244
bpf_program__insns(const struct bpf_program * prog)9245 const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog)
9246 {
9247 return prog->insns;
9248 }
9249
bpf_program__insn_cnt(const struct bpf_program * prog)9250 size_t bpf_program__insn_cnt(const struct bpf_program *prog)
9251 {
9252 return prog->insns_cnt;
9253 }
9254
bpf_program__set_insns(struct bpf_program * prog,struct bpf_insn * new_insns,size_t new_insn_cnt)9255 int bpf_program__set_insns(struct bpf_program *prog,
9256 struct bpf_insn *new_insns, size_t new_insn_cnt)
9257 {
9258 struct bpf_insn *insns;
9259
9260 if (prog->obj->loaded)
9261 return -EBUSY;
9262
9263 insns = libbpf_reallocarray(prog->insns, new_insn_cnt, sizeof(*insns));
9264 /* NULL is a valid return from reallocarray if the new count is zero */
9265 if (!insns && new_insn_cnt) {
9266 pr_warn("prog '%s': failed to realloc prog code\n", prog->name);
9267 return -ENOMEM;
9268 }
9269 memcpy(insns, new_insns, new_insn_cnt * sizeof(*insns));
9270
9271 prog->insns = insns;
9272 prog->insns_cnt = new_insn_cnt;
9273 return 0;
9274 }
9275
bpf_program__fd(const struct bpf_program * prog)9276 int bpf_program__fd(const struct bpf_program *prog)
9277 {
9278 if (!prog)
9279 return libbpf_err(-EINVAL);
9280
9281 if (prog->fd < 0)
9282 return libbpf_err(-ENOENT);
9283
9284 return prog->fd;
9285 }
9286
9287 __alias(bpf_program__type)
9288 enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog);
9289
bpf_program__type(const struct bpf_program * prog)9290 enum bpf_prog_type bpf_program__type(const struct bpf_program *prog)
9291 {
9292 return prog->type;
9293 }
9294
9295 static size_t custom_sec_def_cnt;
9296 static struct bpf_sec_def *custom_sec_defs;
9297 static struct bpf_sec_def custom_fallback_def;
9298 static bool has_custom_fallback_def;
9299 static int last_custom_sec_def_handler_id;
9300
bpf_program__set_type(struct bpf_program * prog,enum bpf_prog_type type)9301 int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
9302 {
9303 if (prog->obj->loaded)
9304 return libbpf_err(-EBUSY);
9305
9306 /* if type is not changed, do nothing */
9307 if (prog->type == type)
9308 return 0;
9309
9310 prog->type = type;
9311
9312 /* If a program type was changed, we need to reset associated SEC()
9313 * handler, as it will be invalid now. The only exception is a generic
9314 * fallback handler, which by definition is program type-agnostic and
9315 * is a catch-all custom handler, optionally set by the application,
9316 * so should be able to handle any type of BPF program.
9317 */
9318 if (prog->sec_def != &custom_fallback_def)
9319 prog->sec_def = NULL;
9320 return 0;
9321 }
9322
9323 __alias(bpf_program__expected_attach_type)
9324 enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog);
9325
bpf_program__expected_attach_type(const struct bpf_program * prog)9326 enum bpf_attach_type bpf_program__expected_attach_type(const struct bpf_program *prog)
9327 {
9328 return prog->expected_attach_type;
9329 }
9330
bpf_program__set_expected_attach_type(struct bpf_program * prog,enum bpf_attach_type type)9331 int bpf_program__set_expected_attach_type(struct bpf_program *prog,
9332 enum bpf_attach_type type)
9333 {
9334 if (prog->obj->loaded)
9335 return libbpf_err(-EBUSY);
9336
9337 prog->expected_attach_type = type;
9338 return 0;
9339 }
9340
bpf_program__flags(const struct bpf_program * prog)9341 __u32 bpf_program__flags(const struct bpf_program *prog)
9342 {
9343 return prog->prog_flags;
9344 }
9345
bpf_program__set_flags(struct bpf_program * prog,__u32 flags)9346 int bpf_program__set_flags(struct bpf_program *prog, __u32 flags)
9347 {
9348 if (prog->obj->loaded)
9349 return libbpf_err(-EBUSY);
9350
9351 prog->prog_flags = flags;
9352 return 0;
9353 }
9354
bpf_program__log_level(const struct bpf_program * prog)9355 __u32 bpf_program__log_level(const struct bpf_program *prog)
9356 {
9357 return prog->log_level;
9358 }
9359
bpf_program__set_log_level(struct bpf_program * prog,__u32 log_level)9360 int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level)
9361 {
9362 if (prog->obj->loaded)
9363 return libbpf_err(-EBUSY);
9364
9365 prog->log_level = log_level;
9366 return 0;
9367 }
9368
bpf_program__log_buf(const struct bpf_program * prog,size_t * log_size)9369 const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size)
9370 {
9371 *log_size = prog->log_size;
9372 return prog->log_buf;
9373 }
9374
bpf_program__set_log_buf(struct bpf_program * prog,char * log_buf,size_t log_size)9375 int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size)
9376 {
9377 if (log_size && !log_buf)
9378 return -EINVAL;
9379 if (prog->log_size > UINT_MAX)
9380 return -EINVAL;
9381 if (prog->obj->loaded)
9382 return -EBUSY;
9383
9384 prog->log_buf = log_buf;
9385 prog->log_size = log_size;
9386 return 0;
9387 }
9388
9389 #define SEC_DEF(sec_pfx, ptype, atype, flags, ...) { \
9390 .sec = (char *)sec_pfx, \
9391 .prog_type = BPF_PROG_TYPE_##ptype, \
9392 .expected_attach_type = atype, \
9393 .cookie = (long)(flags), \
9394 .prog_prepare_load_fn = libbpf_prepare_prog_load, \
9395 __VA_ARGS__ \
9396 }
9397
9398 static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9399 static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9400 static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9401 static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9402 static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9403 static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9404 static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9405 static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9406 static int attach_kprobe_session(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9407 static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9408 static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9409 static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9410
9411 static const struct bpf_sec_def section_defs[] = {
9412 SEC_DEF("socket", SOCKET_FILTER, 0, SEC_NONE),
9413 SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE),
9414 SEC_DEF("sk_reuseport", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE),
9415 SEC_DEF("kprobe+", KPROBE, 0, SEC_NONE, attach_kprobe),
9416 SEC_DEF("uprobe+", KPROBE, 0, SEC_NONE, attach_uprobe),
9417 SEC_DEF("uprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe),
9418 SEC_DEF("kretprobe+", KPROBE, 0, SEC_NONE, attach_kprobe),
9419 SEC_DEF("uretprobe+", KPROBE, 0, SEC_NONE, attach_uprobe),
9420 SEC_DEF("uretprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe),
9421 SEC_DEF("kprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
9422 SEC_DEF("kretprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
9423 SEC_DEF("kprobe.session+", KPROBE, BPF_TRACE_KPROBE_SESSION, SEC_NONE, attach_kprobe_session),
9424 SEC_DEF("uprobe.multi+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi),
9425 SEC_DEF("uretprobe.multi+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi),
9426 SEC_DEF("uprobe.multi.s+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi),
9427 SEC_DEF("uretprobe.multi.s+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi),
9428 SEC_DEF("ksyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall),
9429 SEC_DEF("kretsyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall),
9430 SEC_DEF("usdt+", KPROBE, 0, SEC_USDT, attach_usdt),
9431 SEC_DEF("usdt.s+", KPROBE, 0, SEC_USDT | SEC_SLEEPABLE, attach_usdt),
9432 SEC_DEF("tc/ingress", SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE), /* alias for tcx */
9433 SEC_DEF("tc/egress", SCHED_CLS, BPF_TCX_EGRESS, SEC_NONE), /* alias for tcx */
9434 SEC_DEF("tcx/ingress", SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE),
9435 SEC_DEF("tcx/egress", SCHED_CLS, BPF_TCX_EGRESS, SEC_NONE),
9436 SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE), /* deprecated / legacy, use tcx */
9437 SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE), /* deprecated / legacy, use tcx */
9438 SEC_DEF("action", SCHED_ACT, 0, SEC_NONE), /* deprecated / legacy, use tcx */
9439 SEC_DEF("netkit/primary", SCHED_CLS, BPF_NETKIT_PRIMARY, SEC_NONE),
9440 SEC_DEF("netkit/peer", SCHED_CLS, BPF_NETKIT_PEER, SEC_NONE),
9441 SEC_DEF("tracepoint+", TRACEPOINT, 0, SEC_NONE, attach_tp),
9442 SEC_DEF("tp+", TRACEPOINT, 0, SEC_NONE, attach_tp),
9443 SEC_DEF("raw_tracepoint+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp),
9444 SEC_DEF("raw_tp+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp),
9445 SEC_DEF("raw_tracepoint.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp),
9446 SEC_DEF("raw_tp.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp),
9447 SEC_DEF("tp_btf+", TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace),
9448 SEC_DEF("fentry+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace),
9449 SEC_DEF("fmod_ret+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace),
9450 SEC_DEF("fexit+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF, attach_trace),
9451 SEC_DEF("fentry.s+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
9452 SEC_DEF("fmod_ret.s+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
9453 SEC_DEF("fexit.s+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
9454 SEC_DEF("freplace+", EXT, 0, SEC_ATTACH_BTF, attach_trace),
9455 SEC_DEF("lsm+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm),
9456 SEC_DEF("lsm.s+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm),
9457 SEC_DEF("lsm_cgroup+", LSM, BPF_LSM_CGROUP, SEC_ATTACH_BTF),
9458 SEC_DEF("iter+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter),
9459 SEC_DEF("iter.s+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter),
9460 SEC_DEF("syscall", SYSCALL, 0, SEC_SLEEPABLE),
9461 SEC_DEF("xdp.frags/devmap", XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS),
9462 SEC_DEF("xdp/devmap", XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE),
9463 SEC_DEF("xdp.frags/cpumap", XDP, BPF_XDP_CPUMAP, SEC_XDP_FRAGS),
9464 SEC_DEF("xdp/cpumap", XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE),
9465 SEC_DEF("xdp.frags", XDP, BPF_XDP, SEC_XDP_FRAGS),
9466 SEC_DEF("xdp", XDP, BPF_XDP, SEC_ATTACHABLE_OPT),
9467 SEC_DEF("perf_event", PERF_EVENT, 0, SEC_NONE),
9468 SEC_DEF("lwt_in", LWT_IN, 0, SEC_NONE),
9469 SEC_DEF("lwt_out", LWT_OUT, 0, SEC_NONE),
9470 SEC_DEF("lwt_xmit", LWT_XMIT, 0, SEC_NONE),
9471 SEC_DEF("lwt_seg6local", LWT_SEG6LOCAL, 0, SEC_NONE),
9472 SEC_DEF("sockops", SOCK_OPS, BPF_CGROUP_SOCK_OPS, SEC_ATTACHABLE_OPT),
9473 SEC_DEF("sk_skb/stream_parser", SK_SKB, BPF_SK_SKB_STREAM_PARSER, SEC_ATTACHABLE_OPT),
9474 SEC_DEF("sk_skb/stream_verdict",SK_SKB, BPF_SK_SKB_STREAM_VERDICT, SEC_ATTACHABLE_OPT),
9475 SEC_DEF("sk_skb/verdict", SK_SKB, BPF_SK_SKB_VERDICT, SEC_ATTACHABLE_OPT),
9476 SEC_DEF("sk_skb", SK_SKB, 0, SEC_NONE),
9477 SEC_DEF("sk_msg", SK_MSG, BPF_SK_MSG_VERDICT, SEC_ATTACHABLE_OPT),
9478 SEC_DEF("lirc_mode2", LIRC_MODE2, BPF_LIRC_MODE2, SEC_ATTACHABLE_OPT),
9479 SEC_DEF("flow_dissector", FLOW_DISSECTOR, BPF_FLOW_DISSECTOR, SEC_ATTACHABLE_OPT),
9480 SEC_DEF("cgroup_skb/ingress", CGROUP_SKB, BPF_CGROUP_INET_INGRESS, SEC_ATTACHABLE_OPT),
9481 SEC_DEF("cgroup_skb/egress", CGROUP_SKB, BPF_CGROUP_INET_EGRESS, SEC_ATTACHABLE_OPT),
9482 SEC_DEF("cgroup/skb", CGROUP_SKB, 0, SEC_NONE),
9483 SEC_DEF("cgroup/sock_create", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE),
9484 SEC_DEF("cgroup/sock_release", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_RELEASE, SEC_ATTACHABLE),
9485 SEC_DEF("cgroup/sock", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE_OPT),
9486 SEC_DEF("cgroup/post_bind4", CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND, SEC_ATTACHABLE),
9487 SEC_DEF("cgroup/post_bind6", CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND, SEC_ATTACHABLE),
9488 SEC_DEF("cgroup/bind4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND, SEC_ATTACHABLE),
9489 SEC_DEF("cgroup/bind6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE),
9490 SEC_DEF("cgroup/connect4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE),
9491 SEC_DEF("cgroup/connect6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE),
9492 SEC_DEF("cgroup/connect_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_CONNECT, SEC_ATTACHABLE),
9493 SEC_DEF("cgroup/sendmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE),
9494 SEC_DEF("cgroup/sendmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE),
9495 SEC_DEF("cgroup/sendmsg_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_SENDMSG, SEC_ATTACHABLE),
9496 SEC_DEF("cgroup/recvmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE),
9497 SEC_DEF("cgroup/recvmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE),
9498 SEC_DEF("cgroup/recvmsg_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_RECVMSG, SEC_ATTACHABLE),
9499 SEC_DEF("cgroup/getpeername4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE),
9500 SEC_DEF("cgroup/getpeername6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE),
9501 SEC_DEF("cgroup/getpeername_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETPEERNAME, SEC_ATTACHABLE),
9502 SEC_DEF("cgroup/getsockname4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE),
9503 SEC_DEF("cgroup/getsockname6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE),
9504 SEC_DEF("cgroup/getsockname_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETSOCKNAME, SEC_ATTACHABLE),
9505 SEC_DEF("cgroup/sysctl", CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE),
9506 SEC_DEF("cgroup/getsockopt", CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE),
9507 SEC_DEF("cgroup/setsockopt", CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE),
9508 SEC_DEF("cgroup/dev", CGROUP_DEVICE, BPF_CGROUP_DEVICE, SEC_ATTACHABLE_OPT),
9509 SEC_DEF("struct_ops+", STRUCT_OPS, 0, SEC_NONE),
9510 SEC_DEF("struct_ops.s+", STRUCT_OPS, 0, SEC_SLEEPABLE),
9511 SEC_DEF("sk_lookup", SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE),
9512 SEC_DEF("netfilter", NETFILTER, BPF_NETFILTER, SEC_NONE),
9513 };
9514
libbpf_register_prog_handler(const char * sec,enum bpf_prog_type prog_type,enum bpf_attach_type exp_attach_type,const struct libbpf_prog_handler_opts * opts)9515 int libbpf_register_prog_handler(const char *sec,
9516 enum bpf_prog_type prog_type,
9517 enum bpf_attach_type exp_attach_type,
9518 const struct libbpf_prog_handler_opts *opts)
9519 {
9520 struct bpf_sec_def *sec_def;
9521
9522 if (!OPTS_VALID(opts, libbpf_prog_handler_opts))
9523 return libbpf_err(-EINVAL);
9524
9525 if (last_custom_sec_def_handler_id == INT_MAX) /* prevent overflow */
9526 return libbpf_err(-E2BIG);
9527
9528 if (sec) {
9529 sec_def = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt + 1,
9530 sizeof(*sec_def));
9531 if (!sec_def)
9532 return libbpf_err(-ENOMEM);
9533
9534 custom_sec_defs = sec_def;
9535 sec_def = &custom_sec_defs[custom_sec_def_cnt];
9536 } else {
9537 if (has_custom_fallback_def)
9538 return libbpf_err(-EBUSY);
9539
9540 sec_def = &custom_fallback_def;
9541 }
9542
9543 sec_def->sec = sec ? strdup(sec) : NULL;
9544 if (sec && !sec_def->sec)
9545 return libbpf_err(-ENOMEM);
9546
9547 sec_def->prog_type = prog_type;
9548 sec_def->expected_attach_type = exp_attach_type;
9549 sec_def->cookie = OPTS_GET(opts, cookie, 0);
9550
9551 sec_def->prog_setup_fn = OPTS_GET(opts, prog_setup_fn, NULL);
9552 sec_def->prog_prepare_load_fn = OPTS_GET(opts, prog_prepare_load_fn, NULL);
9553 sec_def->prog_attach_fn = OPTS_GET(opts, prog_attach_fn, NULL);
9554
9555 sec_def->handler_id = ++last_custom_sec_def_handler_id;
9556
9557 if (sec)
9558 custom_sec_def_cnt++;
9559 else
9560 has_custom_fallback_def = true;
9561
9562 return sec_def->handler_id;
9563 }
9564
libbpf_unregister_prog_handler(int handler_id)9565 int libbpf_unregister_prog_handler(int handler_id)
9566 {
9567 struct bpf_sec_def *sec_defs;
9568 int i;
9569
9570 if (handler_id <= 0)
9571 return libbpf_err(-EINVAL);
9572
9573 if (has_custom_fallback_def && custom_fallback_def.handler_id == handler_id) {
9574 memset(&custom_fallback_def, 0, sizeof(custom_fallback_def));
9575 has_custom_fallback_def = false;
9576 return 0;
9577 }
9578
9579 for (i = 0; i < custom_sec_def_cnt; i++) {
9580 if (custom_sec_defs[i].handler_id == handler_id)
9581 break;
9582 }
9583
9584 if (i == custom_sec_def_cnt)
9585 return libbpf_err(-ENOENT);
9586
9587 free(custom_sec_defs[i].sec);
9588 for (i = i + 1; i < custom_sec_def_cnt; i++)
9589 custom_sec_defs[i - 1] = custom_sec_defs[i];
9590 custom_sec_def_cnt--;
9591
9592 /* try to shrink the array, but it's ok if we couldn't */
9593 sec_defs = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt, sizeof(*sec_defs));
9594 /* if new count is zero, reallocarray can return a valid NULL result;
9595 * in this case the previous pointer will be freed, so we *have to*
9596 * reassign old pointer to the new value (even if it's NULL)
9597 */
9598 if (sec_defs || custom_sec_def_cnt == 0)
9599 custom_sec_defs = sec_defs;
9600
9601 return 0;
9602 }
9603
sec_def_matches(const struct bpf_sec_def * sec_def,const char * sec_name)9604 static bool sec_def_matches(const struct bpf_sec_def *sec_def, const char *sec_name)
9605 {
9606 size_t len = strlen(sec_def->sec);
9607
9608 /* "type/" always has to have proper SEC("type/extras") form */
9609 if (sec_def->sec[len - 1] == '/') {
9610 if (str_has_pfx(sec_name, sec_def->sec))
9611 return true;
9612 return false;
9613 }
9614
9615 /* "type+" means it can be either exact SEC("type") or
9616 * well-formed SEC("type/extras") with proper '/' separator
9617 */
9618 if (sec_def->sec[len - 1] == '+') {
9619 len--;
9620 /* not even a prefix */
9621 if (strncmp(sec_name, sec_def->sec, len) != 0)
9622 return false;
9623 /* exact match or has '/' separator */
9624 if (sec_name[len] == '\0' || sec_name[len] == '/')
9625 return true;
9626 return false;
9627 }
9628
9629 return strcmp(sec_name, sec_def->sec) == 0;
9630 }
9631
find_sec_def(const char * sec_name)9632 static const struct bpf_sec_def *find_sec_def(const char *sec_name)
9633 {
9634 const struct bpf_sec_def *sec_def;
9635 int i, n;
9636
9637 n = custom_sec_def_cnt;
9638 for (i = 0; i < n; i++) {
9639 sec_def = &custom_sec_defs[i];
9640 if (sec_def_matches(sec_def, sec_name))
9641 return sec_def;
9642 }
9643
9644 n = ARRAY_SIZE(section_defs);
9645 for (i = 0; i < n; i++) {
9646 sec_def = §ion_defs[i];
9647 if (sec_def_matches(sec_def, sec_name))
9648 return sec_def;
9649 }
9650
9651 if (has_custom_fallback_def)
9652 return &custom_fallback_def;
9653
9654 return NULL;
9655 }
9656
9657 #define MAX_TYPE_NAME_SIZE 32
9658
libbpf_get_type_names(bool attach_type)9659 static char *libbpf_get_type_names(bool attach_type)
9660 {
9661 int i, len = ARRAY_SIZE(section_defs) * MAX_TYPE_NAME_SIZE;
9662 char *buf;
9663
9664 buf = malloc(len);
9665 if (!buf)
9666 return NULL;
9667
9668 buf[0] = '\0';
9669 /* Forge string buf with all available names */
9670 for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
9671 const struct bpf_sec_def *sec_def = §ion_defs[i];
9672
9673 if (attach_type) {
9674 if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load)
9675 continue;
9676
9677 if (!(sec_def->cookie & SEC_ATTACHABLE))
9678 continue;
9679 }
9680
9681 if (strlen(buf) + strlen(section_defs[i].sec) + 2 > len) {
9682 free(buf);
9683 return NULL;
9684 }
9685 strcat(buf, " ");
9686 strcat(buf, section_defs[i].sec);
9687 }
9688
9689 return buf;
9690 }
9691
libbpf_prog_type_by_name(const char * name,enum bpf_prog_type * prog_type,enum bpf_attach_type * expected_attach_type)9692 int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
9693 enum bpf_attach_type *expected_attach_type)
9694 {
9695 const struct bpf_sec_def *sec_def;
9696 char *type_names;
9697
9698 if (!name)
9699 return libbpf_err(-EINVAL);
9700
9701 sec_def = find_sec_def(name);
9702 if (sec_def) {
9703 *prog_type = sec_def->prog_type;
9704 *expected_attach_type = sec_def->expected_attach_type;
9705 return 0;
9706 }
9707
9708 pr_debug("failed to guess program type from ELF section '%s'\n", name);
9709 type_names = libbpf_get_type_names(false);
9710 if (type_names != NULL) {
9711 pr_debug("supported section(type) names are:%s\n", type_names);
9712 free(type_names);
9713 }
9714
9715 return libbpf_err(-ESRCH);
9716 }
9717
libbpf_bpf_attach_type_str(enum bpf_attach_type t)9718 const char *libbpf_bpf_attach_type_str(enum bpf_attach_type t)
9719 {
9720 if (t < 0 || t >= ARRAY_SIZE(attach_type_name))
9721 return NULL;
9722
9723 return attach_type_name[t];
9724 }
9725
libbpf_bpf_link_type_str(enum bpf_link_type t)9726 const char *libbpf_bpf_link_type_str(enum bpf_link_type t)
9727 {
9728 if (t < 0 || t >= ARRAY_SIZE(link_type_name))
9729 return NULL;
9730
9731 return link_type_name[t];
9732 }
9733
libbpf_bpf_map_type_str(enum bpf_map_type t)9734 const char *libbpf_bpf_map_type_str(enum bpf_map_type t)
9735 {
9736 if (t < 0 || t >= ARRAY_SIZE(map_type_name))
9737 return NULL;
9738
9739 return map_type_name[t];
9740 }
9741
libbpf_bpf_prog_type_str(enum bpf_prog_type t)9742 const char *libbpf_bpf_prog_type_str(enum bpf_prog_type t)
9743 {
9744 if (t < 0 || t >= ARRAY_SIZE(prog_type_name))
9745 return NULL;
9746
9747 return prog_type_name[t];
9748 }
9749
find_struct_ops_map_by_offset(struct bpf_object * obj,int sec_idx,size_t offset)9750 static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
9751 int sec_idx,
9752 size_t offset)
9753 {
9754 struct bpf_map *map;
9755 size_t i;
9756
9757 for (i = 0; i < obj->nr_maps; i++) {
9758 map = &obj->maps[i];
9759 if (!bpf_map__is_struct_ops(map))
9760 continue;
9761 if (map->sec_idx == sec_idx &&
9762 map->sec_offset <= offset &&
9763 offset - map->sec_offset < map->def.value_size)
9764 return map;
9765 }
9766
9767 return NULL;
9768 }
9769
9770 /* Collect the reloc from ELF, populate the st_ops->progs[], and update
9771 * st_ops->data for shadow type.
9772 */
bpf_object__collect_st_ops_relos(struct bpf_object * obj,Elf64_Shdr * shdr,Elf_Data * data)9773 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
9774 Elf64_Shdr *shdr, Elf_Data *data)
9775 {
9776 const struct btf_type *type;
9777 const struct btf_member *member;
9778 struct bpf_struct_ops *st_ops;
9779 struct bpf_program *prog;
9780 unsigned int shdr_idx;
9781 const struct btf *btf;
9782 struct bpf_map *map;
9783 unsigned int moff, insn_idx;
9784 const char *name;
9785 __u32 member_idx;
9786 Elf64_Sym *sym;
9787 Elf64_Rel *rel;
9788 int i, nrels;
9789
9790 btf = obj->btf;
9791 nrels = shdr->sh_size / shdr->sh_entsize;
9792 for (i = 0; i < nrels; i++) {
9793 rel = elf_rel_by_idx(data, i);
9794 if (!rel) {
9795 pr_warn("struct_ops reloc: failed to get %d reloc\n", i);
9796 return -LIBBPF_ERRNO__FORMAT;
9797 }
9798
9799 sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info));
9800 if (!sym) {
9801 pr_warn("struct_ops reloc: symbol %zx not found\n",
9802 (size_t)ELF64_R_SYM(rel->r_info));
9803 return -LIBBPF_ERRNO__FORMAT;
9804 }
9805
9806 name = elf_sym_str(obj, sym->st_name) ?: "<?>";
9807 map = find_struct_ops_map_by_offset(obj, shdr->sh_info, rel->r_offset);
9808 if (!map) {
9809 pr_warn("struct_ops reloc: cannot find map at rel->r_offset %zu\n",
9810 (size_t)rel->r_offset);
9811 return -EINVAL;
9812 }
9813
9814 moff = rel->r_offset - map->sec_offset;
9815 shdr_idx = sym->st_shndx;
9816 st_ops = map->st_ops;
9817 pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel->r_offset %zu map->sec_offset %zu name %d (\'%s\')\n",
9818 map->name,
9819 (long long)(rel->r_info >> 32),
9820 (long long)sym->st_value,
9821 shdr_idx, (size_t)rel->r_offset,
9822 map->sec_offset, sym->st_name, name);
9823
9824 if (shdr_idx >= SHN_LORESERVE) {
9825 pr_warn("struct_ops reloc %s: rel->r_offset %zu shdr_idx %u unsupported non-static function\n",
9826 map->name, (size_t)rel->r_offset, shdr_idx);
9827 return -LIBBPF_ERRNO__RELOC;
9828 }
9829 if (sym->st_value % BPF_INSN_SZ) {
9830 pr_warn("struct_ops reloc %s: invalid target program offset %llu\n",
9831 map->name, (unsigned long long)sym->st_value);
9832 return -LIBBPF_ERRNO__FORMAT;
9833 }
9834 insn_idx = sym->st_value / BPF_INSN_SZ;
9835
9836 type = btf__type_by_id(btf, st_ops->type_id);
9837 member = find_member_by_offset(type, moff * 8);
9838 if (!member) {
9839 pr_warn("struct_ops reloc %s: cannot find member at moff %u\n",
9840 map->name, moff);
9841 return -EINVAL;
9842 }
9843 member_idx = member - btf_members(type);
9844 name = btf__name_by_offset(btf, member->name_off);
9845
9846 if (!resolve_func_ptr(btf, member->type, NULL)) {
9847 pr_warn("struct_ops reloc %s: cannot relocate non func ptr %s\n",
9848 map->name, name);
9849 return -EINVAL;
9850 }
9851
9852 prog = find_prog_by_sec_insn(obj, shdr_idx, insn_idx);
9853 if (!prog) {
9854 pr_warn("struct_ops reloc %s: cannot find prog at shdr_idx %u to relocate func ptr %s\n",
9855 map->name, shdr_idx, name);
9856 return -EINVAL;
9857 }
9858
9859 /* prevent the use of BPF prog with invalid type */
9860 if (prog->type != BPF_PROG_TYPE_STRUCT_OPS) {
9861 pr_warn("struct_ops reloc %s: prog %s is not struct_ops BPF program\n",
9862 map->name, prog->name);
9863 return -EINVAL;
9864 }
9865
9866 st_ops->progs[member_idx] = prog;
9867
9868 /* st_ops->data will be exposed to users, being returned by
9869 * bpf_map__initial_value() as a pointer to the shadow
9870 * type. All function pointers in the original struct type
9871 * should be converted to a pointer to struct bpf_program
9872 * in the shadow type.
9873 */
9874 *((struct bpf_program **)(st_ops->data + moff)) = prog;
9875 }
9876
9877 return 0;
9878 }
9879
9880 #define BTF_TRACE_PREFIX "btf_trace_"
9881 #define BTF_LSM_PREFIX "bpf_lsm_"
9882 #define BTF_ITER_PREFIX "bpf_iter_"
9883 #define BTF_MAX_NAME_SIZE 128
9884
btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,const char ** prefix,int * kind)9885 void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
9886 const char **prefix, int *kind)
9887 {
9888 switch (attach_type) {
9889 case BPF_TRACE_RAW_TP:
9890 *prefix = BTF_TRACE_PREFIX;
9891 *kind = BTF_KIND_TYPEDEF;
9892 break;
9893 case BPF_LSM_MAC:
9894 case BPF_LSM_CGROUP:
9895 *prefix = BTF_LSM_PREFIX;
9896 *kind = BTF_KIND_FUNC;
9897 break;
9898 case BPF_TRACE_ITER:
9899 *prefix = BTF_ITER_PREFIX;
9900 *kind = BTF_KIND_FUNC;
9901 break;
9902 default:
9903 *prefix = "";
9904 *kind = BTF_KIND_FUNC;
9905 }
9906 }
9907
find_btf_by_prefix_kind(const struct btf * btf,const char * prefix,const char * name,__u32 kind)9908 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
9909 const char *name, __u32 kind)
9910 {
9911 char btf_type_name[BTF_MAX_NAME_SIZE];
9912 int ret;
9913
9914 ret = snprintf(btf_type_name, sizeof(btf_type_name),
9915 "%s%s", prefix, name);
9916 /* snprintf returns the number of characters written excluding the
9917 * terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it
9918 * indicates truncation.
9919 */
9920 if (ret < 0 || ret >= sizeof(btf_type_name))
9921 return -ENAMETOOLONG;
9922 return btf__find_by_name_kind(btf, btf_type_name, kind);
9923 }
9924
find_attach_btf_id(struct btf * btf,const char * name,enum bpf_attach_type attach_type)9925 static inline int find_attach_btf_id(struct btf *btf, const char *name,
9926 enum bpf_attach_type attach_type)
9927 {
9928 const char *prefix;
9929 int kind;
9930
9931 btf_get_kernel_prefix_kind(attach_type, &prefix, &kind);
9932 return find_btf_by_prefix_kind(btf, prefix, name, kind);
9933 }
9934
libbpf_find_vmlinux_btf_id(const char * name,enum bpf_attach_type attach_type)9935 int libbpf_find_vmlinux_btf_id(const char *name,
9936 enum bpf_attach_type attach_type)
9937 {
9938 struct btf *btf;
9939 int err;
9940
9941 btf = btf__load_vmlinux_btf();
9942 err = libbpf_get_error(btf);
9943 if (err) {
9944 pr_warn("vmlinux BTF is not found\n");
9945 return libbpf_err(err);
9946 }
9947
9948 err = find_attach_btf_id(btf, name, attach_type);
9949 if (err <= 0)
9950 pr_warn("%s is not found in vmlinux BTF\n", name);
9951
9952 btf__free(btf);
9953 return libbpf_err(err);
9954 }
9955
libbpf_find_prog_btf_id(const char * name,__u32 attach_prog_fd)9956 static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
9957 {
9958 struct bpf_prog_info info;
9959 __u32 info_len = sizeof(info);
9960 struct btf *btf;
9961 int err;
9962
9963 memset(&info, 0, info_len);
9964 err = bpf_prog_get_info_by_fd(attach_prog_fd, &info, &info_len);
9965 if (err) {
9966 pr_warn("failed bpf_prog_get_info_by_fd for FD %d: %d\n",
9967 attach_prog_fd, err);
9968 return err;
9969 }
9970
9971 err = -EINVAL;
9972 if (!info.btf_id) {
9973 pr_warn("The target program doesn't have BTF\n");
9974 goto out;
9975 }
9976 btf = btf__load_from_kernel_by_id(info.btf_id);
9977 err = libbpf_get_error(btf);
9978 if (err) {
9979 pr_warn("Failed to get BTF %d of the program: %d\n", info.btf_id, err);
9980 goto out;
9981 }
9982 err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
9983 btf__free(btf);
9984 if (err <= 0) {
9985 pr_warn("%s is not found in prog's BTF\n", name);
9986 goto out;
9987 }
9988 out:
9989 return err;
9990 }
9991
find_kernel_btf_id(struct bpf_object * obj,const char * attach_name,enum bpf_attach_type attach_type,int * btf_obj_fd,int * btf_type_id)9992 static int find_kernel_btf_id(struct bpf_object *obj, const char *attach_name,
9993 enum bpf_attach_type attach_type,
9994 int *btf_obj_fd, int *btf_type_id)
9995 {
9996 int ret, i, mod_len;
9997 const char *fn_name, *mod_name = NULL;
9998
9999 fn_name = strchr(attach_name, ':');
10000 if (fn_name) {
10001 mod_name = attach_name;
10002 mod_len = fn_name - mod_name;
10003 fn_name++;
10004 }
10005
10006 if (!mod_name || strncmp(mod_name, "vmlinux", mod_len) == 0) {
10007 ret = find_attach_btf_id(obj->btf_vmlinux,
10008 mod_name ? fn_name : attach_name,
10009 attach_type);
10010 if (ret > 0) {
10011 *btf_obj_fd = 0; /* vmlinux BTF */
10012 *btf_type_id = ret;
10013 return 0;
10014 }
10015 if (ret != -ENOENT)
10016 return ret;
10017 }
10018
10019 ret = load_module_btfs(obj);
10020 if (ret)
10021 return ret;
10022
10023 for (i = 0; i < obj->btf_module_cnt; i++) {
10024 const struct module_btf *mod = &obj->btf_modules[i];
10025
10026 if (mod_name && strncmp(mod->name, mod_name, mod_len) != 0)
10027 continue;
10028
10029 ret = find_attach_btf_id(mod->btf,
10030 mod_name ? fn_name : attach_name,
10031 attach_type);
10032 if (ret > 0) {
10033 *btf_obj_fd = mod->fd;
10034 *btf_type_id = ret;
10035 return 0;
10036 }
10037 if (ret == -ENOENT)
10038 continue;
10039
10040 return ret;
10041 }
10042
10043 return -ESRCH;
10044 }
10045
libbpf_find_attach_btf_id(struct bpf_program * prog,const char * attach_name,int * btf_obj_fd,int * btf_type_id)10046 static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name,
10047 int *btf_obj_fd, int *btf_type_id)
10048 {
10049 enum bpf_attach_type attach_type = prog->expected_attach_type;
10050 __u32 attach_prog_fd = prog->attach_prog_fd;
10051 int err = 0;
10052
10053 /* BPF program's BTF ID */
10054 if (prog->type == BPF_PROG_TYPE_EXT || attach_prog_fd) {
10055 if (!attach_prog_fd) {
10056 pr_warn("prog '%s': attach program FD is not set\n", prog->name);
10057 return -EINVAL;
10058 }
10059 err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd);
10060 if (err < 0) {
10061 pr_warn("prog '%s': failed to find BPF program (FD %d) BTF ID for '%s': %d\n",
10062 prog->name, attach_prog_fd, attach_name, err);
10063 return err;
10064 }
10065 *btf_obj_fd = 0;
10066 *btf_type_id = err;
10067 return 0;
10068 }
10069
10070 /* kernel/module BTF ID */
10071 if (prog->obj->gen_loader) {
10072 bpf_gen__record_attach_target(prog->obj->gen_loader, attach_name, attach_type);
10073 *btf_obj_fd = 0;
10074 *btf_type_id = 1;
10075 } else {
10076 err = find_kernel_btf_id(prog->obj, attach_name,
10077 attach_type, btf_obj_fd,
10078 btf_type_id);
10079 }
10080 if (err) {
10081 pr_warn("prog '%s': failed to find kernel BTF type ID of '%s': %d\n",
10082 prog->name, attach_name, err);
10083 return err;
10084 }
10085 return 0;
10086 }
10087
libbpf_attach_type_by_name(const char * name,enum bpf_attach_type * attach_type)10088 int libbpf_attach_type_by_name(const char *name,
10089 enum bpf_attach_type *attach_type)
10090 {
10091 char *type_names;
10092 const struct bpf_sec_def *sec_def;
10093
10094 if (!name)
10095 return libbpf_err(-EINVAL);
10096
10097 sec_def = find_sec_def(name);
10098 if (!sec_def) {
10099 pr_debug("failed to guess attach type based on ELF section name '%s'\n", name);
10100 type_names = libbpf_get_type_names(true);
10101 if (type_names != NULL) {
10102 pr_debug("attachable section(type) names are:%s\n", type_names);
10103 free(type_names);
10104 }
10105
10106 return libbpf_err(-EINVAL);
10107 }
10108
10109 if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load)
10110 return libbpf_err(-EINVAL);
10111 if (!(sec_def->cookie & SEC_ATTACHABLE))
10112 return libbpf_err(-EINVAL);
10113
10114 *attach_type = sec_def->expected_attach_type;
10115 return 0;
10116 }
10117
bpf_map__fd(const struct bpf_map * map)10118 int bpf_map__fd(const struct bpf_map *map)
10119 {
10120 if (!map)
10121 return libbpf_err(-EINVAL);
10122 if (!map_is_created(map))
10123 return -1;
10124 return map->fd;
10125 }
10126
map_uses_real_name(const struct bpf_map * map)10127 static bool map_uses_real_name(const struct bpf_map *map)
10128 {
10129 /* Since libbpf started to support custom .data.* and .rodata.* maps,
10130 * their user-visible name differs from kernel-visible name. Users see
10131 * such map's corresponding ELF section name as a map name.
10132 * This check distinguishes .data/.rodata from .data.* and .rodata.*
10133 * maps to know which name has to be returned to the user.
10134 */
10135 if (map->libbpf_type == LIBBPF_MAP_DATA && strcmp(map->real_name, DATA_SEC) != 0)
10136 return true;
10137 if (map->libbpf_type == LIBBPF_MAP_RODATA && strcmp(map->real_name, RODATA_SEC) != 0)
10138 return true;
10139 return false;
10140 }
10141
bpf_map__name(const struct bpf_map * map)10142 const char *bpf_map__name(const struct bpf_map *map)
10143 {
10144 if (!map)
10145 return NULL;
10146
10147 if (map_uses_real_name(map))
10148 return map->real_name;
10149
10150 return map->name;
10151 }
10152
bpf_map__type(const struct bpf_map * map)10153 enum bpf_map_type bpf_map__type(const struct bpf_map *map)
10154 {
10155 return map->def.type;
10156 }
10157
bpf_map__set_type(struct bpf_map * map,enum bpf_map_type type)10158 int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type)
10159 {
10160 if (map_is_created(map))
10161 return libbpf_err(-EBUSY);
10162 map->def.type = type;
10163 return 0;
10164 }
10165
bpf_map__map_flags(const struct bpf_map * map)10166 __u32 bpf_map__map_flags(const struct bpf_map *map)
10167 {
10168 return map->def.map_flags;
10169 }
10170
bpf_map__set_map_flags(struct bpf_map * map,__u32 flags)10171 int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags)
10172 {
10173 if (map_is_created(map))
10174 return libbpf_err(-EBUSY);
10175 map->def.map_flags = flags;
10176 return 0;
10177 }
10178
bpf_map__map_extra(const struct bpf_map * map)10179 __u64 bpf_map__map_extra(const struct bpf_map *map)
10180 {
10181 return map->map_extra;
10182 }
10183
bpf_map__set_map_extra(struct bpf_map * map,__u64 map_extra)10184 int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra)
10185 {
10186 if (map_is_created(map))
10187 return libbpf_err(-EBUSY);
10188 map->map_extra = map_extra;
10189 return 0;
10190 }
10191
bpf_map__numa_node(const struct bpf_map * map)10192 __u32 bpf_map__numa_node(const struct bpf_map *map)
10193 {
10194 return map->numa_node;
10195 }
10196
bpf_map__set_numa_node(struct bpf_map * map,__u32 numa_node)10197 int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node)
10198 {
10199 if (map_is_created(map))
10200 return libbpf_err(-EBUSY);
10201 map->numa_node = numa_node;
10202 return 0;
10203 }
10204
bpf_map__key_size(const struct bpf_map * map)10205 __u32 bpf_map__key_size(const struct bpf_map *map)
10206 {
10207 return map->def.key_size;
10208 }
10209
bpf_map__set_key_size(struct bpf_map * map,__u32 size)10210 int bpf_map__set_key_size(struct bpf_map *map, __u32 size)
10211 {
10212 if (map_is_created(map))
10213 return libbpf_err(-EBUSY);
10214 map->def.key_size = size;
10215 return 0;
10216 }
10217
bpf_map__value_size(const struct bpf_map * map)10218 __u32 bpf_map__value_size(const struct bpf_map *map)
10219 {
10220 return map->def.value_size;
10221 }
10222
map_btf_datasec_resize(struct bpf_map * map,__u32 size)10223 static int map_btf_datasec_resize(struct bpf_map *map, __u32 size)
10224 {
10225 struct btf *btf;
10226 struct btf_type *datasec_type, *var_type;
10227 struct btf_var_secinfo *var;
10228 const struct btf_type *array_type;
10229 const struct btf_array *array;
10230 int vlen, element_sz, new_array_id;
10231 __u32 nr_elements;
10232
10233 /* check btf existence */
10234 btf = bpf_object__btf(map->obj);
10235 if (!btf)
10236 return -ENOENT;
10237
10238 /* verify map is datasec */
10239 datasec_type = btf_type_by_id(btf, bpf_map__btf_value_type_id(map));
10240 if (!btf_is_datasec(datasec_type)) {
10241 pr_warn("map '%s': cannot be resized, map value type is not a datasec\n",
10242 bpf_map__name(map));
10243 return -EINVAL;
10244 }
10245
10246 /* verify datasec has at least one var */
10247 vlen = btf_vlen(datasec_type);
10248 if (vlen == 0) {
10249 pr_warn("map '%s': cannot be resized, map value datasec is empty\n",
10250 bpf_map__name(map));
10251 return -EINVAL;
10252 }
10253
10254 /* verify last var in the datasec is an array */
10255 var = &btf_var_secinfos(datasec_type)[vlen - 1];
10256 var_type = btf_type_by_id(btf, var->type);
10257 array_type = skip_mods_and_typedefs(btf, var_type->type, NULL);
10258 if (!btf_is_array(array_type)) {
10259 pr_warn("map '%s': cannot be resized, last var must be an array\n",
10260 bpf_map__name(map));
10261 return -EINVAL;
10262 }
10263
10264 /* verify request size aligns with array */
10265 array = btf_array(array_type);
10266 element_sz = btf__resolve_size(btf, array->type);
10267 if (element_sz <= 0 || (size - var->offset) % element_sz != 0) {
10268 pr_warn("map '%s': cannot be resized, element size (%d) doesn't align with new total size (%u)\n",
10269 bpf_map__name(map), element_sz, size);
10270 return -EINVAL;
10271 }
10272
10273 /* create a new array based on the existing array, but with new length */
10274 nr_elements = (size - var->offset) / element_sz;
10275 new_array_id = btf__add_array(btf, array->index_type, array->type, nr_elements);
10276 if (new_array_id < 0)
10277 return new_array_id;
10278
10279 /* adding a new btf type invalidates existing pointers to btf objects,
10280 * so refresh pointers before proceeding
10281 */
10282 datasec_type = btf_type_by_id(btf, map->btf_value_type_id);
10283 var = &btf_var_secinfos(datasec_type)[vlen - 1];
10284 var_type = btf_type_by_id(btf, var->type);
10285
10286 /* finally update btf info */
10287 datasec_type->size = size;
10288 var->size = size - var->offset;
10289 var_type->type = new_array_id;
10290
10291 return 0;
10292 }
10293
bpf_map__set_value_size(struct bpf_map * map,__u32 size)10294 int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
10295 {
10296 if (map->obj->loaded || map->reused)
10297 return libbpf_err(-EBUSY);
10298
10299 if (map->mmaped) {
10300 size_t mmap_old_sz, mmap_new_sz;
10301 int err;
10302
10303 if (map->def.type != BPF_MAP_TYPE_ARRAY)
10304 return -EOPNOTSUPP;
10305
10306 mmap_old_sz = bpf_map_mmap_sz(map);
10307 mmap_new_sz = array_map_mmap_sz(size, map->def.max_entries);
10308 err = bpf_map_mmap_resize(map, mmap_old_sz, mmap_new_sz);
10309 if (err) {
10310 pr_warn("map '%s': failed to resize memory-mapped region: %d\n",
10311 bpf_map__name(map), err);
10312 return err;
10313 }
10314 err = map_btf_datasec_resize(map, size);
10315 if (err && err != -ENOENT) {
10316 pr_warn("map '%s': failed to adjust resized BTF, clearing BTF key/value info: %d\n",
10317 bpf_map__name(map), err);
10318 map->btf_value_type_id = 0;
10319 map->btf_key_type_id = 0;
10320 }
10321 }
10322
10323 map->def.value_size = size;
10324 return 0;
10325 }
10326
bpf_map__btf_key_type_id(const struct bpf_map * map)10327 __u32 bpf_map__btf_key_type_id(const struct bpf_map *map)
10328 {
10329 return map ? map->btf_key_type_id : 0;
10330 }
10331
bpf_map__btf_value_type_id(const struct bpf_map * map)10332 __u32 bpf_map__btf_value_type_id(const struct bpf_map *map)
10333 {
10334 return map ? map->btf_value_type_id : 0;
10335 }
10336
bpf_map__set_initial_value(struct bpf_map * map,const void * data,size_t size)10337 int bpf_map__set_initial_value(struct bpf_map *map,
10338 const void *data, size_t size)
10339 {
10340 size_t actual_sz;
10341
10342 if (map->obj->loaded || map->reused)
10343 return libbpf_err(-EBUSY);
10344
10345 if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG)
10346 return libbpf_err(-EINVAL);
10347
10348 if (map->def.type == BPF_MAP_TYPE_ARENA)
10349 actual_sz = map->obj->arena_data_sz;
10350 else
10351 actual_sz = map->def.value_size;
10352 if (size != actual_sz)
10353 return libbpf_err(-EINVAL);
10354
10355 memcpy(map->mmaped, data, size);
10356 return 0;
10357 }
10358
bpf_map__initial_value(const struct bpf_map * map,size_t * psize)10359 void *bpf_map__initial_value(const struct bpf_map *map, size_t *psize)
10360 {
10361 if (bpf_map__is_struct_ops(map)) {
10362 if (psize)
10363 *psize = map->def.value_size;
10364 return map->st_ops->data;
10365 }
10366
10367 if (!map->mmaped)
10368 return NULL;
10369
10370 if (map->def.type == BPF_MAP_TYPE_ARENA)
10371 *psize = map->obj->arena_data_sz;
10372 else
10373 *psize = map->def.value_size;
10374
10375 return map->mmaped;
10376 }
10377
bpf_map__is_internal(const struct bpf_map * map)10378 bool bpf_map__is_internal(const struct bpf_map *map)
10379 {
10380 return map->libbpf_type != LIBBPF_MAP_UNSPEC;
10381 }
10382
bpf_map__ifindex(const struct bpf_map * map)10383 __u32 bpf_map__ifindex(const struct bpf_map *map)
10384 {
10385 return map->map_ifindex;
10386 }
10387
bpf_map__set_ifindex(struct bpf_map * map,__u32 ifindex)10388 int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
10389 {
10390 if (map_is_created(map))
10391 return libbpf_err(-EBUSY);
10392 map->map_ifindex = ifindex;
10393 return 0;
10394 }
10395
bpf_map__set_inner_map_fd(struct bpf_map * map,int fd)10396 int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
10397 {
10398 if (!bpf_map_type__is_map_in_map(map->def.type)) {
10399 pr_warn("error: unsupported map type\n");
10400 return libbpf_err(-EINVAL);
10401 }
10402 if (map->inner_map_fd != -1) {
10403 pr_warn("error: inner_map_fd already specified\n");
10404 return libbpf_err(-EINVAL);
10405 }
10406 if (map->inner_map) {
10407 bpf_map__destroy(map->inner_map);
10408 zfree(&map->inner_map);
10409 }
10410 map->inner_map_fd = fd;
10411 return 0;
10412 }
10413
10414 static struct bpf_map *
__bpf_map__iter(const struct bpf_map * m,const struct bpf_object * obj,int i)10415 __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
10416 {
10417 ssize_t idx;
10418 struct bpf_map *s, *e;
10419
10420 if (!obj || !obj->maps)
10421 return errno = EINVAL, NULL;
10422
10423 s = obj->maps;
10424 e = obj->maps + obj->nr_maps;
10425
10426 if ((m < s) || (m >= e)) {
10427 pr_warn("error in %s: map handler doesn't belong to object\n",
10428 __func__);
10429 return errno = EINVAL, NULL;
10430 }
10431
10432 idx = (m - obj->maps) + i;
10433 if (idx >= obj->nr_maps || idx < 0)
10434 return NULL;
10435 return &obj->maps[idx];
10436 }
10437
10438 struct bpf_map *
bpf_object__next_map(const struct bpf_object * obj,const struct bpf_map * prev)10439 bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev)
10440 {
10441 if (prev == NULL && obj != NULL)
10442 return obj->maps;
10443
10444 return __bpf_map__iter(prev, obj, 1);
10445 }
10446
10447 struct bpf_map *
bpf_object__prev_map(const struct bpf_object * obj,const struct bpf_map * next)10448 bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *next)
10449 {
10450 if (next == NULL && obj != NULL) {
10451 if (!obj->nr_maps)
10452 return NULL;
10453 return obj->maps + obj->nr_maps - 1;
10454 }
10455
10456 return __bpf_map__iter(next, obj, -1);
10457 }
10458
10459 struct bpf_map *
bpf_object__find_map_by_name(const struct bpf_object * obj,const char * name)10460 bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name)
10461 {
10462 struct bpf_map *pos;
10463
10464 bpf_object__for_each_map(pos, obj) {
10465 /* if it's a special internal map name (which always starts
10466 * with dot) then check if that special name matches the
10467 * real map name (ELF section name)
10468 */
10469 if (name[0] == '.') {
10470 if (pos->real_name && strcmp(pos->real_name, name) == 0)
10471 return pos;
10472 continue;
10473 }
10474 /* otherwise map name has to be an exact match */
10475 if (map_uses_real_name(pos)) {
10476 if (strcmp(pos->real_name, name) == 0)
10477 return pos;
10478 continue;
10479 }
10480 if (strcmp(pos->name, name) == 0)
10481 return pos;
10482 }
10483 return errno = ENOENT, NULL;
10484 }
10485
10486 int
bpf_object__find_map_fd_by_name(const struct bpf_object * obj,const char * name)10487 bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name)
10488 {
10489 return bpf_map__fd(bpf_object__find_map_by_name(obj, name));
10490 }
10491
validate_map_op(const struct bpf_map * map,size_t key_sz,size_t value_sz,bool check_value_sz)10492 static int validate_map_op(const struct bpf_map *map, size_t key_sz,
10493 size_t value_sz, bool check_value_sz)
10494 {
10495 if (!map_is_created(map)) /* map is not yet created */
10496 return -ENOENT;
10497
10498 if (map->def.key_size != key_sz) {
10499 pr_warn("map '%s': unexpected key size %zu provided, expected %u\n",
10500 map->name, key_sz, map->def.key_size);
10501 return -EINVAL;
10502 }
10503
10504 if (map->fd < 0) {
10505 pr_warn("map '%s': can't use BPF map without FD (was it created?)\n", map->name);
10506 return -EINVAL;
10507 }
10508
10509 if (!check_value_sz)
10510 return 0;
10511
10512 switch (map->def.type) {
10513 case BPF_MAP_TYPE_PERCPU_ARRAY:
10514 case BPF_MAP_TYPE_PERCPU_HASH:
10515 case BPF_MAP_TYPE_LRU_PERCPU_HASH:
10516 case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: {
10517 int num_cpu = libbpf_num_possible_cpus();
10518 size_t elem_sz = roundup(map->def.value_size, 8);
10519
10520 if (value_sz != num_cpu * elem_sz) {
10521 pr_warn("map '%s': unexpected value size %zu provided for per-CPU map, expected %d * %zu = %zd\n",
10522 map->name, value_sz, num_cpu, elem_sz, num_cpu * elem_sz);
10523 return -EINVAL;
10524 }
10525 break;
10526 }
10527 default:
10528 if (map->def.value_size != value_sz) {
10529 pr_warn("map '%s': unexpected value size %zu provided, expected %u\n",
10530 map->name, value_sz, map->def.value_size);
10531 return -EINVAL;
10532 }
10533 break;
10534 }
10535 return 0;
10536 }
10537
bpf_map__lookup_elem(const struct bpf_map * map,const void * key,size_t key_sz,void * value,size_t value_sz,__u64 flags)10538 int bpf_map__lookup_elem(const struct bpf_map *map,
10539 const void *key, size_t key_sz,
10540 void *value, size_t value_sz, __u64 flags)
10541 {
10542 int err;
10543
10544 err = validate_map_op(map, key_sz, value_sz, true);
10545 if (err)
10546 return libbpf_err(err);
10547
10548 return bpf_map_lookup_elem_flags(map->fd, key, value, flags);
10549 }
10550
bpf_map__update_elem(const struct bpf_map * map,const void * key,size_t key_sz,const void * value,size_t value_sz,__u64 flags)10551 int bpf_map__update_elem(const struct bpf_map *map,
10552 const void *key, size_t key_sz,
10553 const void *value, size_t value_sz, __u64 flags)
10554 {
10555 int err;
10556
10557 err = validate_map_op(map, key_sz, value_sz, true);
10558 if (err)
10559 return libbpf_err(err);
10560
10561 return bpf_map_update_elem(map->fd, key, value, flags);
10562 }
10563
bpf_map__delete_elem(const struct bpf_map * map,const void * key,size_t key_sz,__u64 flags)10564 int bpf_map__delete_elem(const struct bpf_map *map,
10565 const void *key, size_t key_sz, __u64 flags)
10566 {
10567 int err;
10568
10569 err = validate_map_op(map, key_sz, 0, false /* check_value_sz */);
10570 if (err)
10571 return libbpf_err(err);
10572
10573 return bpf_map_delete_elem_flags(map->fd, key, flags);
10574 }
10575
bpf_map__lookup_and_delete_elem(const struct bpf_map * map,const void * key,size_t key_sz,void * value,size_t value_sz,__u64 flags)10576 int bpf_map__lookup_and_delete_elem(const struct bpf_map *map,
10577 const void *key, size_t key_sz,
10578 void *value, size_t value_sz, __u64 flags)
10579 {
10580 int err;
10581
10582 err = validate_map_op(map, key_sz, value_sz, true);
10583 if (err)
10584 return libbpf_err(err);
10585
10586 return bpf_map_lookup_and_delete_elem_flags(map->fd, key, value, flags);
10587 }
10588
bpf_map__get_next_key(const struct bpf_map * map,const void * cur_key,void * next_key,size_t key_sz)10589 int bpf_map__get_next_key(const struct bpf_map *map,
10590 const void *cur_key, void *next_key, size_t key_sz)
10591 {
10592 int err;
10593
10594 err = validate_map_op(map, key_sz, 0, false /* check_value_sz */);
10595 if (err)
10596 return libbpf_err(err);
10597
10598 return bpf_map_get_next_key(map->fd, cur_key, next_key);
10599 }
10600
libbpf_get_error(const void * ptr)10601 long libbpf_get_error(const void *ptr)
10602 {
10603 if (!IS_ERR_OR_NULL(ptr))
10604 return 0;
10605
10606 if (IS_ERR(ptr))
10607 errno = -PTR_ERR(ptr);
10608
10609 /* If ptr == NULL, then errno should be already set by the failing
10610 * API, because libbpf never returns NULL on success and it now always
10611 * sets errno on error. So no extra errno handling for ptr == NULL
10612 * case.
10613 */
10614 return -errno;
10615 }
10616
10617 /* Replace link's underlying BPF program with the new one */
bpf_link__update_program(struct bpf_link * link,struct bpf_program * prog)10618 int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
10619 {
10620 int ret;
10621 int prog_fd = bpf_program__fd(prog);
10622
10623 if (prog_fd < 0) {
10624 pr_warn("prog '%s': can't use BPF program without FD (was it loaded?)\n",
10625 prog->name);
10626 return libbpf_err(-EINVAL);
10627 }
10628
10629 ret = bpf_link_update(bpf_link__fd(link), prog_fd, NULL);
10630 return libbpf_err_errno(ret);
10631 }
10632
10633 /* Release "ownership" of underlying BPF resource (typically, BPF program
10634 * attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected
10635 * link, when destructed through bpf_link__destroy() call won't attempt to
10636 * detach/unregisted that BPF resource. This is useful in situations where,
10637 * say, attached BPF program has to outlive userspace program that attached it
10638 * in the system. Depending on type of BPF program, though, there might be
10639 * additional steps (like pinning BPF program in BPF FS) necessary to ensure
10640 * exit of userspace program doesn't trigger automatic detachment and clean up
10641 * inside the kernel.
10642 */
bpf_link__disconnect(struct bpf_link * link)10643 void bpf_link__disconnect(struct bpf_link *link)
10644 {
10645 link->disconnected = true;
10646 }
10647
bpf_link__destroy(struct bpf_link * link)10648 int bpf_link__destroy(struct bpf_link *link)
10649 {
10650 int err = 0;
10651
10652 if (IS_ERR_OR_NULL(link))
10653 return 0;
10654
10655 if (!link->disconnected && link->detach)
10656 err = link->detach(link);
10657 if (link->pin_path)
10658 free(link->pin_path);
10659 if (link->dealloc)
10660 link->dealloc(link);
10661 else
10662 free(link);
10663
10664 return libbpf_err(err);
10665 }
10666
bpf_link__fd(const struct bpf_link * link)10667 int bpf_link__fd(const struct bpf_link *link)
10668 {
10669 return link->fd;
10670 }
10671
bpf_link__pin_path(const struct bpf_link * link)10672 const char *bpf_link__pin_path(const struct bpf_link *link)
10673 {
10674 return link->pin_path;
10675 }
10676
bpf_link__detach_fd(struct bpf_link * link)10677 static int bpf_link__detach_fd(struct bpf_link *link)
10678 {
10679 return libbpf_err_errno(close(link->fd));
10680 }
10681
bpf_link__open(const char * path)10682 struct bpf_link *bpf_link__open(const char *path)
10683 {
10684 struct bpf_link *link;
10685 int fd;
10686
10687 fd = bpf_obj_get(path);
10688 if (fd < 0) {
10689 fd = -errno;
10690 pr_warn("failed to open link at %s: %d\n", path, fd);
10691 return libbpf_err_ptr(fd);
10692 }
10693
10694 link = calloc(1, sizeof(*link));
10695 if (!link) {
10696 close(fd);
10697 return libbpf_err_ptr(-ENOMEM);
10698 }
10699 link->detach = &bpf_link__detach_fd;
10700 link->fd = fd;
10701
10702 link->pin_path = strdup(path);
10703 if (!link->pin_path) {
10704 bpf_link__destroy(link);
10705 return libbpf_err_ptr(-ENOMEM);
10706 }
10707
10708 return link;
10709 }
10710
bpf_link__detach(struct bpf_link * link)10711 int bpf_link__detach(struct bpf_link *link)
10712 {
10713 return bpf_link_detach(link->fd) ? -errno : 0;
10714 }
10715
bpf_link__pin(struct bpf_link * link,const char * path)10716 int bpf_link__pin(struct bpf_link *link, const char *path)
10717 {
10718 int err;
10719
10720 if (link->pin_path)
10721 return libbpf_err(-EBUSY);
10722 err = make_parent_dir(path);
10723 if (err)
10724 return libbpf_err(err);
10725 err = check_path(path);
10726 if (err)
10727 return libbpf_err(err);
10728
10729 link->pin_path = strdup(path);
10730 if (!link->pin_path)
10731 return libbpf_err(-ENOMEM);
10732
10733 if (bpf_obj_pin(link->fd, link->pin_path)) {
10734 err = -errno;
10735 zfree(&link->pin_path);
10736 return libbpf_err(err);
10737 }
10738
10739 pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path);
10740 return 0;
10741 }
10742
bpf_link__unpin(struct bpf_link * link)10743 int bpf_link__unpin(struct bpf_link *link)
10744 {
10745 int err;
10746
10747 if (!link->pin_path)
10748 return libbpf_err(-EINVAL);
10749
10750 err = unlink(link->pin_path);
10751 if (err != 0)
10752 return -errno;
10753
10754 pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path);
10755 zfree(&link->pin_path);
10756 return 0;
10757 }
10758
10759 struct bpf_link_perf {
10760 struct bpf_link link;
10761 int perf_event_fd;
10762 /* legacy kprobe support: keep track of probe identifier and type */
10763 char *legacy_probe_name;
10764 bool legacy_is_kprobe;
10765 bool legacy_is_retprobe;
10766 };
10767
10768 static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe);
10769 static int remove_uprobe_event_legacy(const char *probe_name, bool retprobe);
10770
bpf_link_perf_detach(struct bpf_link * link)10771 static int bpf_link_perf_detach(struct bpf_link *link)
10772 {
10773 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
10774 int err = 0;
10775
10776 if (ioctl(perf_link->perf_event_fd, PERF_EVENT_IOC_DISABLE, 0) < 0)
10777 err = -errno;
10778
10779 if (perf_link->perf_event_fd != link->fd)
10780 close(perf_link->perf_event_fd);
10781 close(link->fd);
10782
10783 /* legacy uprobe/kprobe needs to be removed after perf event fd closure */
10784 if (perf_link->legacy_probe_name) {
10785 if (perf_link->legacy_is_kprobe) {
10786 err = remove_kprobe_event_legacy(perf_link->legacy_probe_name,
10787 perf_link->legacy_is_retprobe);
10788 } else {
10789 err = remove_uprobe_event_legacy(perf_link->legacy_probe_name,
10790 perf_link->legacy_is_retprobe);
10791 }
10792 }
10793
10794 return err;
10795 }
10796
bpf_link_perf_dealloc(struct bpf_link * link)10797 static void bpf_link_perf_dealloc(struct bpf_link *link)
10798 {
10799 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
10800
10801 free(perf_link->legacy_probe_name);
10802 free(perf_link);
10803 }
10804
bpf_program__attach_perf_event_opts(const struct bpf_program * prog,int pfd,const struct bpf_perf_event_opts * opts)10805 struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *prog, int pfd,
10806 const struct bpf_perf_event_opts *opts)
10807 {
10808 char errmsg[STRERR_BUFSIZE];
10809 struct bpf_link_perf *link;
10810 int prog_fd, link_fd = -1, err;
10811 bool force_ioctl_attach;
10812
10813 if (!OPTS_VALID(opts, bpf_perf_event_opts))
10814 return libbpf_err_ptr(-EINVAL);
10815
10816 if (pfd < 0) {
10817 pr_warn("prog '%s': invalid perf event FD %d\n",
10818 prog->name, pfd);
10819 return libbpf_err_ptr(-EINVAL);
10820 }
10821 prog_fd = bpf_program__fd(prog);
10822 if (prog_fd < 0) {
10823 pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n",
10824 prog->name);
10825 return libbpf_err_ptr(-EINVAL);
10826 }
10827
10828 link = calloc(1, sizeof(*link));
10829 if (!link)
10830 return libbpf_err_ptr(-ENOMEM);
10831 link->link.detach = &bpf_link_perf_detach;
10832 link->link.dealloc = &bpf_link_perf_dealloc;
10833 link->perf_event_fd = pfd;
10834
10835 force_ioctl_attach = OPTS_GET(opts, force_ioctl_attach, false);
10836 if (kernel_supports(prog->obj, FEAT_PERF_LINK) && !force_ioctl_attach) {
10837 DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_opts,
10838 .perf_event.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0));
10839
10840 link_fd = bpf_link_create(prog_fd, pfd, BPF_PERF_EVENT, &link_opts);
10841 if (link_fd < 0) {
10842 err = -errno;
10843 pr_warn("prog '%s': failed to create BPF link for perf_event FD %d: %d (%s)\n",
10844 prog->name, pfd,
10845 err, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10846 goto err_out;
10847 }
10848 link->link.fd = link_fd;
10849 } else {
10850 if (OPTS_GET(opts, bpf_cookie, 0)) {
10851 pr_warn("prog '%s': user context value is not supported\n", prog->name);
10852 err = -EOPNOTSUPP;
10853 goto err_out;
10854 }
10855
10856 if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
10857 err = -errno;
10858 pr_warn("prog '%s': failed to attach to perf_event FD %d: %s\n",
10859 prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10860 if (err == -EPROTO)
10861 pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
10862 prog->name, pfd);
10863 goto err_out;
10864 }
10865 link->link.fd = pfd;
10866 }
10867 if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
10868 err = -errno;
10869 pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n",
10870 prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10871 goto err_out;
10872 }
10873
10874 return &link->link;
10875 err_out:
10876 if (link_fd >= 0)
10877 close(link_fd);
10878 free(link);
10879 return libbpf_err_ptr(err);
10880 }
10881
bpf_program__attach_perf_event(const struct bpf_program * prog,int pfd)10882 struct bpf_link *bpf_program__attach_perf_event(const struct bpf_program *prog, int pfd)
10883 {
10884 return bpf_program__attach_perf_event_opts(prog, pfd, NULL);
10885 }
10886
10887 /*
10888 * this function is expected to parse integer in the range of [0, 2^31-1] from
10889 * given file using scanf format string fmt. If actual parsed value is
10890 * negative, the result might be indistinguishable from error
10891 */
parse_uint_from_file(const char * file,const char * fmt)10892 static int parse_uint_from_file(const char *file, const char *fmt)
10893 {
10894 char buf[STRERR_BUFSIZE];
10895 int err, ret;
10896 FILE *f;
10897
10898 f = fopen(file, "re");
10899 if (!f) {
10900 err = -errno;
10901 pr_debug("failed to open '%s': %s\n", file,
10902 libbpf_strerror_r(err, buf, sizeof(buf)));
10903 return err;
10904 }
10905 err = fscanf(f, fmt, &ret);
10906 if (err != 1) {
10907 err = err == EOF ? -EIO : -errno;
10908 pr_debug("failed to parse '%s': %s\n", file,
10909 libbpf_strerror_r(err, buf, sizeof(buf)));
10910 fclose(f);
10911 return err;
10912 }
10913 fclose(f);
10914 return ret;
10915 }
10916
determine_kprobe_perf_type(void)10917 static int determine_kprobe_perf_type(void)
10918 {
10919 const char *file = "/sys/bus/event_source/devices/kprobe/type";
10920
10921 return parse_uint_from_file(file, "%d\n");
10922 }
10923
determine_uprobe_perf_type(void)10924 static int determine_uprobe_perf_type(void)
10925 {
10926 const char *file = "/sys/bus/event_source/devices/uprobe/type";
10927
10928 return parse_uint_from_file(file, "%d\n");
10929 }
10930
determine_kprobe_retprobe_bit(void)10931 static int determine_kprobe_retprobe_bit(void)
10932 {
10933 const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe";
10934
10935 return parse_uint_from_file(file, "config:%d\n");
10936 }
10937
determine_uprobe_retprobe_bit(void)10938 static int determine_uprobe_retprobe_bit(void)
10939 {
10940 const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe";
10941
10942 return parse_uint_from_file(file, "config:%d\n");
10943 }
10944
10945 #define PERF_UPROBE_REF_CTR_OFFSET_BITS 32
10946 #define PERF_UPROBE_REF_CTR_OFFSET_SHIFT 32
10947
perf_event_open_probe(bool uprobe,bool retprobe,const char * name,uint64_t offset,int pid,size_t ref_ctr_off)10948 static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
10949 uint64_t offset, int pid, size_t ref_ctr_off)
10950 {
10951 const size_t attr_sz = sizeof(struct perf_event_attr);
10952 struct perf_event_attr attr;
10953 char errmsg[STRERR_BUFSIZE];
10954 int type, pfd;
10955
10956 if ((__u64)ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS))
10957 return -EINVAL;
10958
10959 memset(&attr, 0, attr_sz);
10960
10961 type = uprobe ? determine_uprobe_perf_type()
10962 : determine_kprobe_perf_type();
10963 if (type < 0) {
10964 pr_warn("failed to determine %s perf type: %s\n",
10965 uprobe ? "uprobe" : "kprobe",
10966 libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
10967 return type;
10968 }
10969 if (retprobe) {
10970 int bit = uprobe ? determine_uprobe_retprobe_bit()
10971 : determine_kprobe_retprobe_bit();
10972
10973 if (bit < 0) {
10974 pr_warn("failed to determine %s retprobe bit: %s\n",
10975 uprobe ? "uprobe" : "kprobe",
10976 libbpf_strerror_r(bit, errmsg, sizeof(errmsg)));
10977 return bit;
10978 }
10979 attr.config |= 1 << bit;
10980 }
10981 attr.size = attr_sz;
10982 attr.type = type;
10983 attr.config |= (__u64)ref_ctr_off << PERF_UPROBE_REF_CTR_OFFSET_SHIFT;
10984 attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */
10985 attr.config2 = offset; /* kprobe_addr or probe_offset */
10986
10987 /* pid filter is meaningful only for uprobes */
10988 pfd = syscall(__NR_perf_event_open, &attr,
10989 pid < 0 ? -1 : pid /* pid */,
10990 pid == -1 ? 0 : -1 /* cpu */,
10991 -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
10992 return pfd >= 0 ? pfd : -errno;
10993 }
10994
append_to_file(const char * file,const char * fmt,...)10995 static int append_to_file(const char *file, const char *fmt, ...)
10996 {
10997 int fd, n, err = 0;
10998 va_list ap;
10999 char buf[1024];
11000
11001 va_start(ap, fmt);
11002 n = vsnprintf(buf, sizeof(buf), fmt, ap);
11003 va_end(ap);
11004
11005 if (n < 0 || n >= sizeof(buf))
11006 return -EINVAL;
11007
11008 fd = open(file, O_WRONLY | O_APPEND | O_CLOEXEC, 0);
11009 if (fd < 0)
11010 return -errno;
11011
11012 if (write(fd, buf, n) < 0)
11013 err = -errno;
11014
11015 close(fd);
11016 return err;
11017 }
11018
11019 #define DEBUGFS "/sys/kernel/debug/tracing"
11020 #define TRACEFS "/sys/kernel/tracing"
11021
use_debugfs(void)11022 static bool use_debugfs(void)
11023 {
11024 static int has_debugfs = -1;
11025
11026 if (has_debugfs < 0)
11027 has_debugfs = faccessat(AT_FDCWD, DEBUGFS, F_OK, AT_EACCESS) == 0;
11028
11029 return has_debugfs == 1;
11030 }
11031
tracefs_path(void)11032 static const char *tracefs_path(void)
11033 {
11034 return use_debugfs() ? DEBUGFS : TRACEFS;
11035 }
11036
tracefs_kprobe_events(void)11037 static const char *tracefs_kprobe_events(void)
11038 {
11039 return use_debugfs() ? DEBUGFS"/kprobe_events" : TRACEFS"/kprobe_events";
11040 }
11041
tracefs_uprobe_events(void)11042 static const char *tracefs_uprobe_events(void)
11043 {
11044 return use_debugfs() ? DEBUGFS"/uprobe_events" : TRACEFS"/uprobe_events";
11045 }
11046
tracefs_available_filter_functions(void)11047 static const char *tracefs_available_filter_functions(void)
11048 {
11049 return use_debugfs() ? DEBUGFS"/available_filter_functions"
11050 : TRACEFS"/available_filter_functions";
11051 }
11052
tracefs_available_filter_functions_addrs(void)11053 static const char *tracefs_available_filter_functions_addrs(void)
11054 {
11055 return use_debugfs() ? DEBUGFS"/available_filter_functions_addrs"
11056 : TRACEFS"/available_filter_functions_addrs";
11057 }
11058
gen_probe_legacy_event_name(char * buf,size_t buf_sz,const char * name,size_t offset)11059 static void gen_probe_legacy_event_name(char *buf, size_t buf_sz,
11060 const char *name, size_t offset)
11061 {
11062 static int index = 0;
11063 int i;
11064
11065 snprintf(buf, buf_sz, "libbpf_%u_%d_%s_0x%zx", getpid(),
11066 __sync_fetch_and_add(&index, 1), name, offset);
11067
11068 /* sanitize name in the probe name */
11069 for (i = 0; buf[i]; i++) {
11070 if (!isalnum(buf[i]))
11071 buf[i] = '_';
11072 }
11073 }
11074
add_kprobe_event_legacy(const char * probe_name,bool retprobe,const char * kfunc_name,size_t offset)11075 static int add_kprobe_event_legacy(const char *probe_name, bool retprobe,
11076 const char *kfunc_name, size_t offset)
11077 {
11078 return append_to_file(tracefs_kprobe_events(), "%c:%s/%s %s+0x%zx",
11079 retprobe ? 'r' : 'p',
11080 retprobe ? "kretprobes" : "kprobes",
11081 probe_name, kfunc_name, offset);
11082 }
11083
remove_kprobe_event_legacy(const char * probe_name,bool retprobe)11084 static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe)
11085 {
11086 return append_to_file(tracefs_kprobe_events(), "-:%s/%s",
11087 retprobe ? "kretprobes" : "kprobes", probe_name);
11088 }
11089
determine_kprobe_perf_type_legacy(const char * probe_name,bool retprobe)11090 static int determine_kprobe_perf_type_legacy(const char *probe_name, bool retprobe)
11091 {
11092 char file[256];
11093
11094 snprintf(file, sizeof(file), "%s/events/%s/%s/id",
11095 tracefs_path(), retprobe ? "kretprobes" : "kprobes", probe_name);
11096
11097 return parse_uint_from_file(file, "%d\n");
11098 }
11099
perf_event_kprobe_open_legacy(const char * probe_name,bool retprobe,const char * kfunc_name,size_t offset,int pid)11100 static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe,
11101 const char *kfunc_name, size_t offset, int pid)
11102 {
11103 const size_t attr_sz = sizeof(struct perf_event_attr);
11104 struct perf_event_attr attr;
11105 char errmsg[STRERR_BUFSIZE];
11106 int type, pfd, err;
11107
11108 err = add_kprobe_event_legacy(probe_name, retprobe, kfunc_name, offset);
11109 if (err < 0) {
11110 pr_warn("failed to add legacy kprobe event for '%s+0x%zx': %s\n",
11111 kfunc_name, offset,
11112 libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
11113 return err;
11114 }
11115 type = determine_kprobe_perf_type_legacy(probe_name, retprobe);
11116 if (type < 0) {
11117 err = type;
11118 pr_warn("failed to determine legacy kprobe event id for '%s+0x%zx': %s\n",
11119 kfunc_name, offset,
11120 libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
11121 goto err_clean_legacy;
11122 }
11123
11124 memset(&attr, 0, attr_sz);
11125 attr.size = attr_sz;
11126 attr.config = type;
11127 attr.type = PERF_TYPE_TRACEPOINT;
11128
11129 pfd = syscall(__NR_perf_event_open, &attr,
11130 pid < 0 ? -1 : pid, /* pid */
11131 pid == -1 ? 0 : -1, /* cpu */
11132 -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
11133 if (pfd < 0) {
11134 err = -errno;
11135 pr_warn("legacy kprobe perf_event_open() failed: %s\n",
11136 libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
11137 goto err_clean_legacy;
11138 }
11139 return pfd;
11140
11141 err_clean_legacy:
11142 /* Clear the newly added legacy kprobe_event */
11143 remove_kprobe_event_legacy(probe_name, retprobe);
11144 return err;
11145 }
11146
arch_specific_syscall_pfx(void)11147 static const char *arch_specific_syscall_pfx(void)
11148 {
11149 #if defined(__x86_64__)
11150 return "x64";
11151 #elif defined(__i386__)
11152 return "ia32";
11153 #elif defined(__s390x__)
11154 return "s390x";
11155 #elif defined(__s390__)
11156 return "s390";
11157 #elif defined(__arm__)
11158 return "arm";
11159 #elif defined(__aarch64__)
11160 return "arm64";
11161 #elif defined(__mips__)
11162 return "mips";
11163 #elif defined(__riscv)
11164 return "riscv";
11165 #elif defined(__powerpc__)
11166 return "powerpc";
11167 #elif defined(__powerpc64__)
11168 return "powerpc64";
11169 #else
11170 return NULL;
11171 #endif
11172 }
11173
probe_kern_syscall_wrapper(int token_fd)11174 int probe_kern_syscall_wrapper(int token_fd)
11175 {
11176 char syscall_name[64];
11177 const char *ksys_pfx;
11178
11179 ksys_pfx = arch_specific_syscall_pfx();
11180 if (!ksys_pfx)
11181 return 0;
11182
11183 snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx);
11184
11185 if (determine_kprobe_perf_type() >= 0) {
11186 int pfd;
11187
11188 pfd = perf_event_open_probe(false, false, syscall_name, 0, getpid(), 0);
11189 if (pfd >= 0)
11190 close(pfd);
11191
11192 return pfd >= 0 ? 1 : 0;
11193 } else { /* legacy mode */
11194 char probe_name[MAX_EVENT_NAME_LEN];
11195
11196 gen_probe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0);
11197 if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0)
11198 return 0;
11199
11200 (void)remove_kprobe_event_legacy(probe_name, false);
11201 return 1;
11202 }
11203 }
11204
11205 struct bpf_link *
bpf_program__attach_kprobe_opts(const struct bpf_program * prog,const char * func_name,const struct bpf_kprobe_opts * opts)11206 bpf_program__attach_kprobe_opts(const struct bpf_program *prog,
11207 const char *func_name,
11208 const struct bpf_kprobe_opts *opts)
11209 {
11210 DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
11211 enum probe_attach_mode attach_mode;
11212 char errmsg[STRERR_BUFSIZE];
11213 char *legacy_probe = NULL;
11214 struct bpf_link *link;
11215 size_t offset;
11216 bool retprobe, legacy;
11217 int pfd, err;
11218
11219 if (!OPTS_VALID(opts, bpf_kprobe_opts))
11220 return libbpf_err_ptr(-EINVAL);
11221
11222 attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT);
11223 retprobe = OPTS_GET(opts, retprobe, false);
11224 offset = OPTS_GET(opts, offset, 0);
11225 pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
11226
11227 legacy = determine_kprobe_perf_type() < 0;
11228 switch (attach_mode) {
11229 case PROBE_ATTACH_MODE_LEGACY:
11230 legacy = true;
11231 pe_opts.force_ioctl_attach = true;
11232 break;
11233 case PROBE_ATTACH_MODE_PERF:
11234 if (legacy)
11235 return libbpf_err_ptr(-ENOTSUP);
11236 pe_opts.force_ioctl_attach = true;
11237 break;
11238 case PROBE_ATTACH_MODE_LINK:
11239 if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK))
11240 return libbpf_err_ptr(-ENOTSUP);
11241 break;
11242 case PROBE_ATTACH_MODE_DEFAULT:
11243 break;
11244 default:
11245 return libbpf_err_ptr(-EINVAL);
11246 }
11247
11248 if (!legacy) {
11249 pfd = perf_event_open_probe(false /* uprobe */, retprobe,
11250 func_name, offset,
11251 -1 /* pid */, 0 /* ref_ctr_off */);
11252 } else {
11253 char probe_name[MAX_EVENT_NAME_LEN];
11254
11255 gen_probe_legacy_event_name(probe_name, sizeof(probe_name),
11256 func_name, offset);
11257
11258 legacy_probe = strdup(probe_name);
11259 if (!legacy_probe)
11260 return libbpf_err_ptr(-ENOMEM);
11261
11262 pfd = perf_event_kprobe_open_legacy(legacy_probe, retprobe, func_name,
11263 offset, -1 /* pid */);
11264 }
11265 if (pfd < 0) {
11266 err = -errno;
11267 pr_warn("prog '%s': failed to create %s '%s+0x%zx' perf event: %s\n",
11268 prog->name, retprobe ? "kretprobe" : "kprobe",
11269 func_name, offset,
11270 libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
11271 goto err_out;
11272 }
11273 link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
11274 err = libbpf_get_error(link);
11275 if (err) {
11276 close(pfd);
11277 pr_warn("prog '%s': failed to attach to %s '%s+0x%zx': %s\n",
11278 prog->name, retprobe ? "kretprobe" : "kprobe",
11279 func_name, offset,
11280 libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
11281 goto err_clean_legacy;
11282 }
11283 if (legacy) {
11284 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
11285
11286 perf_link->legacy_probe_name = legacy_probe;
11287 perf_link->legacy_is_kprobe = true;
11288 perf_link->legacy_is_retprobe = retprobe;
11289 }
11290
11291 return link;
11292
11293 err_clean_legacy:
11294 if (legacy)
11295 remove_kprobe_event_legacy(legacy_probe, retprobe);
11296 err_out:
11297 free(legacy_probe);
11298 return libbpf_err_ptr(err);
11299 }
11300
bpf_program__attach_kprobe(const struct bpf_program * prog,bool retprobe,const char * func_name)11301 struct bpf_link *bpf_program__attach_kprobe(const struct bpf_program *prog,
11302 bool retprobe,
11303 const char *func_name)
11304 {
11305 DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts,
11306 .retprobe = retprobe,
11307 );
11308
11309 return bpf_program__attach_kprobe_opts(prog, func_name, &opts);
11310 }
11311
bpf_program__attach_ksyscall(const struct bpf_program * prog,const char * syscall_name,const struct bpf_ksyscall_opts * opts)11312 struct bpf_link *bpf_program__attach_ksyscall(const struct bpf_program *prog,
11313 const char *syscall_name,
11314 const struct bpf_ksyscall_opts *opts)
11315 {
11316 LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts);
11317 char func_name[128];
11318
11319 if (!OPTS_VALID(opts, bpf_ksyscall_opts))
11320 return libbpf_err_ptr(-EINVAL);
11321
11322 if (kernel_supports(prog->obj, FEAT_SYSCALL_WRAPPER)) {
11323 /* arch_specific_syscall_pfx() should never return NULL here
11324 * because it is guarded by kernel_supports(). However, since
11325 * compiler does not know that we have an explicit conditional
11326 * as well.
11327 */
11328 snprintf(func_name, sizeof(func_name), "__%s_sys_%s",
11329 arch_specific_syscall_pfx() ? : "", syscall_name);
11330 } else {
11331 snprintf(func_name, sizeof(func_name), "__se_sys_%s", syscall_name);
11332 }
11333
11334 kprobe_opts.retprobe = OPTS_GET(opts, retprobe, false);
11335 kprobe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
11336
11337 return bpf_program__attach_kprobe_opts(prog, func_name, &kprobe_opts);
11338 }
11339
11340 /* Adapted from perf/util/string.c */
glob_match(const char * str,const char * pat)11341 bool glob_match(const char *str, const char *pat)
11342 {
11343 while (*str && *pat && *pat != '*') {
11344 if (*pat == '?') { /* Matches any single character */
11345 str++;
11346 pat++;
11347 continue;
11348 }
11349 if (*str != *pat)
11350 return false;
11351 str++;
11352 pat++;
11353 }
11354 /* Check wild card */
11355 if (*pat == '*') {
11356 while (*pat == '*')
11357 pat++;
11358 if (!*pat) /* Tail wild card matches all */
11359 return true;
11360 while (*str)
11361 if (glob_match(str++, pat))
11362 return true;
11363 }
11364 return !*str && !*pat;
11365 }
11366
11367 struct kprobe_multi_resolve {
11368 const char *pattern;
11369 unsigned long *addrs;
11370 size_t cap;
11371 size_t cnt;
11372 };
11373
11374 struct avail_kallsyms_data {
11375 char **syms;
11376 size_t cnt;
11377 struct kprobe_multi_resolve *res;
11378 };
11379
avail_func_cmp(const void * a,const void * b)11380 static int avail_func_cmp(const void *a, const void *b)
11381 {
11382 return strcmp(*(const char **)a, *(const char **)b);
11383 }
11384
avail_kallsyms_cb(unsigned long long sym_addr,char sym_type,const char * sym_name,void * ctx)11385 static int avail_kallsyms_cb(unsigned long long sym_addr, char sym_type,
11386 const char *sym_name, void *ctx)
11387 {
11388 struct avail_kallsyms_data *data = ctx;
11389 struct kprobe_multi_resolve *res = data->res;
11390 int err;
11391
11392 if (!bsearch(&sym_name, data->syms, data->cnt, sizeof(*data->syms), avail_func_cmp))
11393 return 0;
11394
11395 err = libbpf_ensure_mem((void **)&res->addrs, &res->cap, sizeof(*res->addrs), res->cnt + 1);
11396 if (err)
11397 return err;
11398
11399 res->addrs[res->cnt++] = (unsigned long)sym_addr;
11400 return 0;
11401 }
11402
libbpf_available_kallsyms_parse(struct kprobe_multi_resolve * res)11403 static int libbpf_available_kallsyms_parse(struct kprobe_multi_resolve *res)
11404 {
11405 const char *available_functions_file = tracefs_available_filter_functions();
11406 struct avail_kallsyms_data data;
11407 char sym_name[500];
11408 FILE *f;
11409 int err = 0, ret, i;
11410 char **syms = NULL;
11411 size_t cap = 0, cnt = 0;
11412
11413 f = fopen(available_functions_file, "re");
11414 if (!f) {
11415 err = -errno;
11416 pr_warn("failed to open %s: %d\n", available_functions_file, err);
11417 return err;
11418 }
11419
11420 while (true) {
11421 char *name;
11422
11423 ret = fscanf(f, "%499s%*[^\n]\n", sym_name);
11424 if (ret == EOF && feof(f))
11425 break;
11426
11427 if (ret != 1) {
11428 pr_warn("failed to parse available_filter_functions entry: %d\n", ret);
11429 err = -EINVAL;
11430 goto cleanup;
11431 }
11432
11433 if (!glob_match(sym_name, res->pattern))
11434 continue;
11435
11436 err = libbpf_ensure_mem((void **)&syms, &cap, sizeof(*syms), cnt + 1);
11437 if (err)
11438 goto cleanup;
11439
11440 name = strdup(sym_name);
11441 if (!name) {
11442 err = -errno;
11443 goto cleanup;
11444 }
11445
11446 syms[cnt++] = name;
11447 }
11448
11449 /* no entries found, bail out */
11450 if (cnt == 0) {
11451 err = -ENOENT;
11452 goto cleanup;
11453 }
11454
11455 /* sort available functions */
11456 qsort(syms, cnt, sizeof(*syms), avail_func_cmp);
11457
11458 data.syms = syms;
11459 data.res = res;
11460 data.cnt = cnt;
11461 libbpf_kallsyms_parse(avail_kallsyms_cb, &data);
11462
11463 if (res->cnt == 0)
11464 err = -ENOENT;
11465
11466 cleanup:
11467 for (i = 0; i < cnt; i++)
11468 free((char *)syms[i]);
11469 free(syms);
11470
11471 fclose(f);
11472 return err;
11473 }
11474
has_available_filter_functions_addrs(void)11475 static bool has_available_filter_functions_addrs(void)
11476 {
11477 return access(tracefs_available_filter_functions_addrs(), R_OK) != -1;
11478 }
11479
libbpf_available_kprobes_parse(struct kprobe_multi_resolve * res)11480 static int libbpf_available_kprobes_parse(struct kprobe_multi_resolve *res)
11481 {
11482 const char *available_path = tracefs_available_filter_functions_addrs();
11483 char sym_name[500];
11484 FILE *f;
11485 int ret, err = 0;
11486 unsigned long long sym_addr;
11487
11488 f = fopen(available_path, "re");
11489 if (!f) {
11490 err = -errno;
11491 pr_warn("failed to open %s: %d\n", available_path, err);
11492 return err;
11493 }
11494
11495 while (true) {
11496 ret = fscanf(f, "%llx %499s%*[^\n]\n", &sym_addr, sym_name);
11497 if (ret == EOF && feof(f))
11498 break;
11499
11500 if (ret != 2) {
11501 pr_warn("failed to parse available_filter_functions_addrs entry: %d\n",
11502 ret);
11503 err = -EINVAL;
11504 goto cleanup;
11505 }
11506
11507 if (!glob_match(sym_name, res->pattern))
11508 continue;
11509
11510 err = libbpf_ensure_mem((void **)&res->addrs, &res->cap,
11511 sizeof(*res->addrs), res->cnt + 1);
11512 if (err)
11513 goto cleanup;
11514
11515 res->addrs[res->cnt++] = (unsigned long)sym_addr;
11516 }
11517
11518 if (res->cnt == 0)
11519 err = -ENOENT;
11520
11521 cleanup:
11522 fclose(f);
11523 return err;
11524 }
11525
11526 struct bpf_link *
bpf_program__attach_kprobe_multi_opts(const struct bpf_program * prog,const char * pattern,const struct bpf_kprobe_multi_opts * opts)11527 bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog,
11528 const char *pattern,
11529 const struct bpf_kprobe_multi_opts *opts)
11530 {
11531 LIBBPF_OPTS(bpf_link_create_opts, lopts);
11532 struct kprobe_multi_resolve res = {
11533 .pattern = pattern,
11534 };
11535 enum bpf_attach_type attach_type;
11536 struct bpf_link *link = NULL;
11537 char errmsg[STRERR_BUFSIZE];
11538 const unsigned long *addrs;
11539 int err, link_fd, prog_fd;
11540 bool retprobe, session;
11541 const __u64 *cookies;
11542 const char **syms;
11543 size_t cnt;
11544
11545 if (!OPTS_VALID(opts, bpf_kprobe_multi_opts))
11546 return libbpf_err_ptr(-EINVAL);
11547
11548 prog_fd = bpf_program__fd(prog);
11549 if (prog_fd < 0) {
11550 pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n",
11551 prog->name);
11552 return libbpf_err_ptr(-EINVAL);
11553 }
11554
11555 syms = OPTS_GET(opts, syms, false);
11556 addrs = OPTS_GET(opts, addrs, false);
11557 cnt = OPTS_GET(opts, cnt, false);
11558 cookies = OPTS_GET(opts, cookies, false);
11559
11560 if (!pattern && !addrs && !syms)
11561 return libbpf_err_ptr(-EINVAL);
11562 if (pattern && (addrs || syms || cookies || cnt))
11563 return libbpf_err_ptr(-EINVAL);
11564 if (!pattern && !cnt)
11565 return libbpf_err_ptr(-EINVAL);
11566 if (addrs && syms)
11567 return libbpf_err_ptr(-EINVAL);
11568
11569 if (pattern) {
11570 if (has_available_filter_functions_addrs())
11571 err = libbpf_available_kprobes_parse(&res);
11572 else
11573 err = libbpf_available_kallsyms_parse(&res);
11574 if (err)
11575 goto error;
11576 addrs = res.addrs;
11577 cnt = res.cnt;
11578 }
11579
11580 retprobe = OPTS_GET(opts, retprobe, false);
11581 session = OPTS_GET(opts, session, false);
11582
11583 if (retprobe && session)
11584 return libbpf_err_ptr(-EINVAL);
11585
11586 attach_type = session ? BPF_TRACE_KPROBE_SESSION : BPF_TRACE_KPROBE_MULTI;
11587
11588 lopts.kprobe_multi.syms = syms;
11589 lopts.kprobe_multi.addrs = addrs;
11590 lopts.kprobe_multi.cookies = cookies;
11591 lopts.kprobe_multi.cnt = cnt;
11592 lopts.kprobe_multi.flags = retprobe ? BPF_F_KPROBE_MULTI_RETURN : 0;
11593
11594 link = calloc(1, sizeof(*link));
11595 if (!link) {
11596 err = -ENOMEM;
11597 goto error;
11598 }
11599 link->detach = &bpf_link__detach_fd;
11600
11601 link_fd = bpf_link_create(prog_fd, 0, attach_type, &lopts);
11602 if (link_fd < 0) {
11603 err = -errno;
11604 pr_warn("prog '%s': failed to attach: %s\n",
11605 prog->name, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
11606 goto error;
11607 }
11608 link->fd = link_fd;
11609 free(res.addrs);
11610 return link;
11611
11612 error:
11613 free(link);
11614 free(res.addrs);
11615 return libbpf_err_ptr(err);
11616 }
11617
attach_kprobe(const struct bpf_program * prog,long cookie,struct bpf_link ** link)11618 static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11619 {
11620 DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts);
11621 unsigned long offset = 0;
11622 const char *func_name;
11623 char *func;
11624 int n;
11625
11626 *link = NULL;
11627
11628 /* no auto-attach for SEC("kprobe") and SEC("kretprobe") */
11629 if (strcmp(prog->sec_name, "kprobe") == 0 || strcmp(prog->sec_name, "kretprobe") == 0)
11630 return 0;
11631
11632 opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe/");
11633 if (opts.retprobe)
11634 func_name = prog->sec_name + sizeof("kretprobe/") - 1;
11635 else
11636 func_name = prog->sec_name + sizeof("kprobe/") - 1;
11637
11638 n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset);
11639 if (n < 1) {
11640 pr_warn("kprobe name is invalid: %s\n", func_name);
11641 return -EINVAL;
11642 }
11643 if (opts.retprobe && offset != 0) {
11644 free(func);
11645 pr_warn("kretprobes do not support offset specification\n");
11646 return -EINVAL;
11647 }
11648
11649 opts.offset = offset;
11650 *link = bpf_program__attach_kprobe_opts(prog, func, &opts);
11651 free(func);
11652 return libbpf_get_error(*link);
11653 }
11654
attach_ksyscall(const struct bpf_program * prog,long cookie,struct bpf_link ** link)11655 static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11656 {
11657 LIBBPF_OPTS(bpf_ksyscall_opts, opts);
11658 const char *syscall_name;
11659
11660 *link = NULL;
11661
11662 /* no auto-attach for SEC("ksyscall") and SEC("kretsyscall") */
11663 if (strcmp(prog->sec_name, "ksyscall") == 0 || strcmp(prog->sec_name, "kretsyscall") == 0)
11664 return 0;
11665
11666 opts.retprobe = str_has_pfx(prog->sec_name, "kretsyscall/");
11667 if (opts.retprobe)
11668 syscall_name = prog->sec_name + sizeof("kretsyscall/") - 1;
11669 else
11670 syscall_name = prog->sec_name + sizeof("ksyscall/") - 1;
11671
11672 *link = bpf_program__attach_ksyscall(prog, syscall_name, &opts);
11673 return *link ? 0 : -errno;
11674 }
11675
attach_kprobe_multi(const struct bpf_program * prog,long cookie,struct bpf_link ** link)11676 static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11677 {
11678 LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
11679 const char *spec;
11680 char *pattern;
11681 int n;
11682
11683 *link = NULL;
11684
11685 /* no auto-attach for SEC("kprobe.multi") and SEC("kretprobe.multi") */
11686 if (strcmp(prog->sec_name, "kprobe.multi") == 0 ||
11687 strcmp(prog->sec_name, "kretprobe.multi") == 0)
11688 return 0;
11689
11690 opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe.multi/");
11691 if (opts.retprobe)
11692 spec = prog->sec_name + sizeof("kretprobe.multi/") - 1;
11693 else
11694 spec = prog->sec_name + sizeof("kprobe.multi/") - 1;
11695
11696 n = sscanf(spec, "%m[a-zA-Z0-9_.*?]", &pattern);
11697 if (n < 1) {
11698 pr_warn("kprobe multi pattern is invalid: %s\n", spec);
11699 return -EINVAL;
11700 }
11701
11702 *link = bpf_program__attach_kprobe_multi_opts(prog, pattern, &opts);
11703 free(pattern);
11704 return libbpf_get_error(*link);
11705 }
11706
attach_kprobe_session(const struct bpf_program * prog,long cookie,struct bpf_link ** link)11707 static int attach_kprobe_session(const struct bpf_program *prog, long cookie,
11708 struct bpf_link **link)
11709 {
11710 LIBBPF_OPTS(bpf_kprobe_multi_opts, opts, .session = true);
11711 const char *spec;
11712 char *pattern;
11713 int n;
11714
11715 *link = NULL;
11716
11717 /* no auto-attach for SEC("kprobe.session") */
11718 if (strcmp(prog->sec_name, "kprobe.session") == 0)
11719 return 0;
11720
11721 spec = prog->sec_name + sizeof("kprobe.session/") - 1;
11722 n = sscanf(spec, "%m[a-zA-Z0-9_.*?]", &pattern);
11723 if (n < 1) {
11724 pr_warn("kprobe session pattern is invalid: %s\n", spec);
11725 return -EINVAL;
11726 }
11727
11728 *link = bpf_program__attach_kprobe_multi_opts(prog, pattern, &opts);
11729 free(pattern);
11730 return *link ? 0 : -errno;
11731 }
11732
attach_uprobe_multi(const struct bpf_program * prog,long cookie,struct bpf_link ** link)11733 static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11734 {
11735 char *probe_type = NULL, *binary_path = NULL, *func_name = NULL;
11736 LIBBPF_OPTS(bpf_uprobe_multi_opts, opts);
11737 int n, ret = -EINVAL;
11738
11739 *link = NULL;
11740
11741 n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]",
11742 &probe_type, &binary_path, &func_name);
11743 switch (n) {
11744 case 1:
11745 /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */
11746 ret = 0;
11747 break;
11748 case 3:
11749 opts.retprobe = str_has_pfx(probe_type, "uretprobe.multi");
11750 *link = bpf_program__attach_uprobe_multi(prog, -1, binary_path, func_name, &opts);
11751 ret = libbpf_get_error(*link);
11752 break;
11753 default:
11754 pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name,
11755 prog->sec_name);
11756 break;
11757 }
11758 free(probe_type);
11759 free(binary_path);
11760 free(func_name);
11761 return ret;
11762 }
11763
add_uprobe_event_legacy(const char * probe_name,bool retprobe,const char * binary_path,size_t offset)11764 static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe,
11765 const char *binary_path, size_t offset)
11766 {
11767 return append_to_file(tracefs_uprobe_events(), "%c:%s/%s %s:0x%zx",
11768 retprobe ? 'r' : 'p',
11769 retprobe ? "uretprobes" : "uprobes",
11770 probe_name, binary_path, offset);
11771 }
11772
remove_uprobe_event_legacy(const char * probe_name,bool retprobe)11773 static inline int remove_uprobe_event_legacy(const char *probe_name, bool retprobe)
11774 {
11775 return append_to_file(tracefs_uprobe_events(), "-:%s/%s",
11776 retprobe ? "uretprobes" : "uprobes", probe_name);
11777 }
11778
determine_uprobe_perf_type_legacy(const char * probe_name,bool retprobe)11779 static int determine_uprobe_perf_type_legacy(const char *probe_name, bool retprobe)
11780 {
11781 char file[512];
11782
11783 snprintf(file, sizeof(file), "%s/events/%s/%s/id",
11784 tracefs_path(), retprobe ? "uretprobes" : "uprobes", probe_name);
11785
11786 return parse_uint_from_file(file, "%d\n");
11787 }
11788
perf_event_uprobe_open_legacy(const char * probe_name,bool retprobe,const char * binary_path,size_t offset,int pid)11789 static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe,
11790 const char *binary_path, size_t offset, int pid)
11791 {
11792 const size_t attr_sz = sizeof(struct perf_event_attr);
11793 struct perf_event_attr attr;
11794 int type, pfd, err;
11795
11796 err = add_uprobe_event_legacy(probe_name, retprobe, binary_path, offset);
11797 if (err < 0) {
11798 pr_warn("failed to add legacy uprobe event for %s:0x%zx: %d\n",
11799 binary_path, (size_t)offset, err);
11800 return err;
11801 }
11802 type = determine_uprobe_perf_type_legacy(probe_name, retprobe);
11803 if (type < 0) {
11804 err = type;
11805 pr_warn("failed to determine legacy uprobe event id for %s:0x%zx: %d\n",
11806 binary_path, offset, err);
11807 goto err_clean_legacy;
11808 }
11809
11810 memset(&attr, 0, attr_sz);
11811 attr.size = attr_sz;
11812 attr.config = type;
11813 attr.type = PERF_TYPE_TRACEPOINT;
11814
11815 pfd = syscall(__NR_perf_event_open, &attr,
11816 pid < 0 ? -1 : pid, /* pid */
11817 pid == -1 ? 0 : -1, /* cpu */
11818 -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
11819 if (pfd < 0) {
11820 err = -errno;
11821 pr_warn("legacy uprobe perf_event_open() failed: %d\n", err);
11822 goto err_clean_legacy;
11823 }
11824 return pfd;
11825
11826 err_clean_legacy:
11827 /* Clear the newly added legacy uprobe_event */
11828 remove_uprobe_event_legacy(probe_name, retprobe);
11829 return err;
11830 }
11831
11832 /* Find offset of function name in archive specified by path. Currently
11833 * supported are .zip files that do not compress their contents, as used on
11834 * Android in the form of APKs, for example. "file_name" is the name of the ELF
11835 * file inside the archive. "func_name" matches symbol name or name@@LIB for
11836 * library functions.
11837 *
11838 * An overview of the APK format specifically provided here:
11839 * https://en.wikipedia.org/w/index.php?title=Apk_(file_format)&oldid=1139099120#Package_contents
11840 */
elf_find_func_offset_from_archive(const char * archive_path,const char * file_name,const char * func_name)11841 static long elf_find_func_offset_from_archive(const char *archive_path, const char *file_name,
11842 const char *func_name)
11843 {
11844 struct zip_archive *archive;
11845 struct zip_entry entry;
11846 long ret;
11847 Elf *elf;
11848
11849 archive = zip_archive_open(archive_path);
11850 if (IS_ERR(archive)) {
11851 ret = PTR_ERR(archive);
11852 pr_warn("zip: failed to open %s: %ld\n", archive_path, ret);
11853 return ret;
11854 }
11855
11856 ret = zip_archive_find_entry(archive, file_name, &entry);
11857 if (ret) {
11858 pr_warn("zip: could not find archive member %s in %s: %ld\n", file_name,
11859 archive_path, ret);
11860 goto out;
11861 }
11862 pr_debug("zip: found entry for %s in %s at 0x%lx\n", file_name, archive_path,
11863 (unsigned long)entry.data_offset);
11864
11865 if (entry.compression) {
11866 pr_warn("zip: entry %s of %s is compressed and cannot be handled\n", file_name,
11867 archive_path);
11868 ret = -LIBBPF_ERRNO__FORMAT;
11869 goto out;
11870 }
11871
11872 elf = elf_memory((void *)entry.data, entry.data_length);
11873 if (!elf) {
11874 pr_warn("elf: could not read elf file %s from %s: %s\n", file_name, archive_path,
11875 elf_errmsg(-1));
11876 ret = -LIBBPF_ERRNO__LIBELF;
11877 goto out;
11878 }
11879
11880 ret = elf_find_func_offset(elf, file_name, func_name);
11881 if (ret > 0) {
11882 pr_debug("elf: symbol address match for %s of %s in %s: 0x%x + 0x%lx = 0x%lx\n",
11883 func_name, file_name, archive_path, entry.data_offset, ret,
11884 ret + entry.data_offset);
11885 ret += entry.data_offset;
11886 }
11887 elf_end(elf);
11888
11889 out:
11890 zip_archive_close(archive);
11891 return ret;
11892 }
11893
arch_specific_lib_paths(void)11894 static const char *arch_specific_lib_paths(void)
11895 {
11896 /*
11897 * Based on https://packages.debian.org/sid/libc6.
11898 *
11899 * Assume that the traced program is built for the same architecture
11900 * as libbpf, which should cover the vast majority of cases.
11901 */
11902 #if defined(__x86_64__)
11903 return "/lib/x86_64-linux-gnu";
11904 #elif defined(__i386__)
11905 return "/lib/i386-linux-gnu";
11906 #elif defined(__s390x__)
11907 return "/lib/s390x-linux-gnu";
11908 #elif defined(__s390__)
11909 return "/lib/s390-linux-gnu";
11910 #elif defined(__arm__) && defined(__SOFTFP__)
11911 return "/lib/arm-linux-gnueabi";
11912 #elif defined(__arm__) && !defined(__SOFTFP__)
11913 return "/lib/arm-linux-gnueabihf";
11914 #elif defined(__aarch64__)
11915 return "/lib/aarch64-linux-gnu";
11916 #elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 64
11917 return "/lib/mips64el-linux-gnuabi64";
11918 #elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 32
11919 return "/lib/mipsel-linux-gnu";
11920 #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
11921 return "/lib/powerpc64le-linux-gnu";
11922 #elif defined(__sparc__) && defined(__arch64__)
11923 return "/lib/sparc64-linux-gnu";
11924 #elif defined(__riscv) && __riscv_xlen == 64
11925 return "/lib/riscv64-linux-gnu";
11926 #else
11927 return NULL;
11928 #endif
11929 }
11930
11931 /* Get full path to program/shared library. */
resolve_full_path(const char * file,char * result,size_t result_sz)11932 static int resolve_full_path(const char *file, char *result, size_t result_sz)
11933 {
11934 const char *search_paths[3] = {};
11935 int i, perm;
11936
11937 if (str_has_sfx(file, ".so") || strstr(file, ".so.")) {
11938 search_paths[0] = getenv("LD_LIBRARY_PATH");
11939 search_paths[1] = "/usr/lib64:/usr/lib";
11940 search_paths[2] = arch_specific_lib_paths();
11941 perm = R_OK;
11942 } else {
11943 search_paths[0] = getenv("PATH");
11944 search_paths[1] = "/usr/bin:/usr/sbin";
11945 perm = R_OK | X_OK;
11946 }
11947
11948 for (i = 0; i < ARRAY_SIZE(search_paths); i++) {
11949 const char *s;
11950
11951 if (!search_paths[i])
11952 continue;
11953 for (s = search_paths[i]; s != NULL; s = strchr(s, ':')) {
11954 char *next_path;
11955 int seg_len;
11956
11957 if (s[0] == ':')
11958 s++;
11959 next_path = strchr(s, ':');
11960 seg_len = next_path ? next_path - s : strlen(s);
11961 if (!seg_len)
11962 continue;
11963 snprintf(result, result_sz, "%.*s/%s", seg_len, s, file);
11964 /* ensure it has required permissions */
11965 if (faccessat(AT_FDCWD, result, perm, AT_EACCESS) < 0)
11966 continue;
11967 pr_debug("resolved '%s' to '%s'\n", file, result);
11968 return 0;
11969 }
11970 }
11971 return -ENOENT;
11972 }
11973
11974 struct bpf_link *
bpf_program__attach_uprobe_multi(const struct bpf_program * prog,pid_t pid,const char * path,const char * func_pattern,const struct bpf_uprobe_multi_opts * opts)11975 bpf_program__attach_uprobe_multi(const struct bpf_program *prog,
11976 pid_t pid,
11977 const char *path,
11978 const char *func_pattern,
11979 const struct bpf_uprobe_multi_opts *opts)
11980 {
11981 const unsigned long *ref_ctr_offsets = NULL, *offsets = NULL;
11982 LIBBPF_OPTS(bpf_link_create_opts, lopts);
11983 unsigned long *resolved_offsets = NULL;
11984 int err = 0, link_fd, prog_fd;
11985 struct bpf_link *link = NULL;
11986 char errmsg[STRERR_BUFSIZE];
11987 char full_path[PATH_MAX];
11988 const __u64 *cookies;
11989 const char **syms;
11990 size_t cnt;
11991
11992 if (!OPTS_VALID(opts, bpf_uprobe_multi_opts))
11993 return libbpf_err_ptr(-EINVAL);
11994
11995 prog_fd = bpf_program__fd(prog);
11996 if (prog_fd < 0) {
11997 pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n",
11998 prog->name);
11999 return libbpf_err_ptr(-EINVAL);
12000 }
12001
12002 syms = OPTS_GET(opts, syms, NULL);
12003 offsets = OPTS_GET(opts, offsets, NULL);
12004 ref_ctr_offsets = OPTS_GET(opts, ref_ctr_offsets, NULL);
12005 cookies = OPTS_GET(opts, cookies, NULL);
12006 cnt = OPTS_GET(opts, cnt, 0);
12007
12008 /*
12009 * User can specify 2 mutually exclusive set of inputs:
12010 *
12011 * 1) use only path/func_pattern/pid arguments
12012 *
12013 * 2) use path/pid with allowed combinations of:
12014 * syms/offsets/ref_ctr_offsets/cookies/cnt
12015 *
12016 * - syms and offsets are mutually exclusive
12017 * - ref_ctr_offsets and cookies are optional
12018 *
12019 * Any other usage results in error.
12020 */
12021
12022 if (!path)
12023 return libbpf_err_ptr(-EINVAL);
12024 if (!func_pattern && cnt == 0)
12025 return libbpf_err_ptr(-EINVAL);
12026
12027 if (func_pattern) {
12028 if (syms || offsets || ref_ctr_offsets || cookies || cnt)
12029 return libbpf_err_ptr(-EINVAL);
12030 } else {
12031 if (!!syms == !!offsets)
12032 return libbpf_err_ptr(-EINVAL);
12033 }
12034
12035 if (func_pattern) {
12036 if (!strchr(path, '/')) {
12037 err = resolve_full_path(path, full_path, sizeof(full_path));
12038 if (err) {
12039 pr_warn("prog '%s': failed to resolve full path for '%s': %d\n",
12040 prog->name, path, err);
12041 return libbpf_err_ptr(err);
12042 }
12043 path = full_path;
12044 }
12045
12046 err = elf_resolve_pattern_offsets(path, func_pattern,
12047 &resolved_offsets, &cnt);
12048 if (err < 0)
12049 return libbpf_err_ptr(err);
12050 offsets = resolved_offsets;
12051 } else if (syms) {
12052 err = elf_resolve_syms_offsets(path, cnt, syms, &resolved_offsets, STT_FUNC);
12053 if (err < 0)
12054 return libbpf_err_ptr(err);
12055 offsets = resolved_offsets;
12056 }
12057
12058 lopts.uprobe_multi.path = path;
12059 lopts.uprobe_multi.offsets = offsets;
12060 lopts.uprobe_multi.ref_ctr_offsets = ref_ctr_offsets;
12061 lopts.uprobe_multi.cookies = cookies;
12062 lopts.uprobe_multi.cnt = cnt;
12063 lopts.uprobe_multi.flags = OPTS_GET(opts, retprobe, false) ? BPF_F_UPROBE_MULTI_RETURN : 0;
12064
12065 if (pid == 0)
12066 pid = getpid();
12067 if (pid > 0)
12068 lopts.uprobe_multi.pid = pid;
12069
12070 link = calloc(1, sizeof(*link));
12071 if (!link) {
12072 err = -ENOMEM;
12073 goto error;
12074 }
12075 link->detach = &bpf_link__detach_fd;
12076
12077 link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &lopts);
12078 if (link_fd < 0) {
12079 err = -errno;
12080 pr_warn("prog '%s': failed to attach multi-uprobe: %s\n",
12081 prog->name, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
12082 goto error;
12083 }
12084 link->fd = link_fd;
12085 free(resolved_offsets);
12086 return link;
12087
12088 error:
12089 free(resolved_offsets);
12090 free(link);
12091 return libbpf_err_ptr(err);
12092 }
12093
12094 LIBBPF_API struct bpf_link *
bpf_program__attach_uprobe_opts(const struct bpf_program * prog,pid_t pid,const char * binary_path,size_t func_offset,const struct bpf_uprobe_opts * opts)12095 bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
12096 const char *binary_path, size_t func_offset,
12097 const struct bpf_uprobe_opts *opts)
12098 {
12099 const char *archive_path = NULL, *archive_sep = NULL;
12100 char errmsg[STRERR_BUFSIZE], *legacy_probe = NULL;
12101 DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
12102 enum probe_attach_mode attach_mode;
12103 char full_path[PATH_MAX];
12104 struct bpf_link *link;
12105 size_t ref_ctr_off;
12106 int pfd, err;
12107 bool retprobe, legacy;
12108 const char *func_name;
12109
12110 if (!OPTS_VALID(opts, bpf_uprobe_opts))
12111 return libbpf_err_ptr(-EINVAL);
12112
12113 attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT);
12114 retprobe = OPTS_GET(opts, retprobe, false);
12115 ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0);
12116 pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
12117
12118 if (!binary_path)
12119 return libbpf_err_ptr(-EINVAL);
12120
12121 /* Check if "binary_path" refers to an archive. */
12122 archive_sep = strstr(binary_path, "!/");
12123 if (archive_sep) {
12124 full_path[0] = '\0';
12125 libbpf_strlcpy(full_path, binary_path,
12126 min(sizeof(full_path), (size_t)(archive_sep - binary_path + 1)));
12127 archive_path = full_path;
12128 binary_path = archive_sep + 2;
12129 } else if (!strchr(binary_path, '/')) {
12130 err = resolve_full_path(binary_path, full_path, sizeof(full_path));
12131 if (err) {
12132 pr_warn("prog '%s': failed to resolve full path for '%s': %d\n",
12133 prog->name, binary_path, err);
12134 return libbpf_err_ptr(err);
12135 }
12136 binary_path = full_path;
12137 }
12138 func_name = OPTS_GET(opts, func_name, NULL);
12139 if (func_name) {
12140 long sym_off;
12141
12142 if (archive_path) {
12143 sym_off = elf_find_func_offset_from_archive(archive_path, binary_path,
12144 func_name);
12145 binary_path = archive_path;
12146 } else {
12147 sym_off = elf_find_func_offset_from_file(binary_path, func_name);
12148 }
12149 if (sym_off < 0)
12150 return libbpf_err_ptr(sym_off);
12151 func_offset += sym_off;
12152 }
12153
12154 legacy = determine_uprobe_perf_type() < 0;
12155 switch (attach_mode) {
12156 case PROBE_ATTACH_MODE_LEGACY:
12157 legacy = true;
12158 pe_opts.force_ioctl_attach = true;
12159 break;
12160 case PROBE_ATTACH_MODE_PERF:
12161 if (legacy)
12162 return libbpf_err_ptr(-ENOTSUP);
12163 pe_opts.force_ioctl_attach = true;
12164 break;
12165 case PROBE_ATTACH_MODE_LINK:
12166 if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK))
12167 return libbpf_err_ptr(-ENOTSUP);
12168 break;
12169 case PROBE_ATTACH_MODE_DEFAULT:
12170 break;
12171 default:
12172 return libbpf_err_ptr(-EINVAL);
12173 }
12174
12175 if (!legacy) {
12176 pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path,
12177 func_offset, pid, ref_ctr_off);
12178 } else {
12179 char probe_name[MAX_EVENT_NAME_LEN];
12180
12181 if (ref_ctr_off)
12182 return libbpf_err_ptr(-EINVAL);
12183
12184 gen_probe_legacy_event_name(probe_name, sizeof(probe_name),
12185 strrchr(binary_path, '/') ? : binary_path,
12186 func_offset);
12187
12188 legacy_probe = strdup(probe_name);
12189 if (!legacy_probe)
12190 return libbpf_err_ptr(-ENOMEM);
12191
12192 pfd = perf_event_uprobe_open_legacy(legacy_probe, retprobe,
12193 binary_path, func_offset, pid);
12194 }
12195 if (pfd < 0) {
12196 err = -errno;
12197 pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
12198 prog->name, retprobe ? "uretprobe" : "uprobe",
12199 binary_path, func_offset,
12200 libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
12201 goto err_out;
12202 }
12203
12204 link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
12205 err = libbpf_get_error(link);
12206 if (err) {
12207 close(pfd);
12208 pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n",
12209 prog->name, retprobe ? "uretprobe" : "uprobe",
12210 binary_path, func_offset,
12211 libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
12212 goto err_clean_legacy;
12213 }
12214 if (legacy) {
12215 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
12216
12217 perf_link->legacy_probe_name = legacy_probe;
12218 perf_link->legacy_is_kprobe = false;
12219 perf_link->legacy_is_retprobe = retprobe;
12220 }
12221 return link;
12222
12223 err_clean_legacy:
12224 if (legacy)
12225 remove_uprobe_event_legacy(legacy_probe, retprobe);
12226 err_out:
12227 free(legacy_probe);
12228 return libbpf_err_ptr(err);
12229 }
12230
12231 /* Format of u[ret]probe section definition supporting auto-attach:
12232 * u[ret]probe/binary:function[+offset]
12233 *
12234 * binary can be an absolute/relative path or a filename; the latter is resolved to a
12235 * full binary path via bpf_program__attach_uprobe_opts.
12236 *
12237 * Specifying uprobe+ ensures we carry out strict matching; either "uprobe" must be
12238 * specified (and auto-attach is not possible) or the above format is specified for
12239 * auto-attach.
12240 */
attach_uprobe(const struct bpf_program * prog,long cookie,struct bpf_link ** link)12241 static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12242 {
12243 DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts);
12244 char *probe_type = NULL, *binary_path = NULL, *func_name = NULL, *func_off;
12245 int n, c, ret = -EINVAL;
12246 long offset = 0;
12247
12248 *link = NULL;
12249
12250 n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]",
12251 &probe_type, &binary_path, &func_name);
12252 switch (n) {
12253 case 1:
12254 /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */
12255 ret = 0;
12256 break;
12257 case 2:
12258 pr_warn("prog '%s': section '%s' missing ':function[+offset]' specification\n",
12259 prog->name, prog->sec_name);
12260 break;
12261 case 3:
12262 /* check if user specifies `+offset`, if yes, this should be
12263 * the last part of the string, make sure sscanf read to EOL
12264 */
12265 func_off = strrchr(func_name, '+');
12266 if (func_off) {
12267 n = sscanf(func_off, "+%li%n", &offset, &c);
12268 if (n == 1 && *(func_off + c) == '\0')
12269 func_off[0] = '\0';
12270 else
12271 offset = 0;
12272 }
12273 opts.retprobe = strcmp(probe_type, "uretprobe") == 0 ||
12274 strcmp(probe_type, "uretprobe.s") == 0;
12275 if (opts.retprobe && offset != 0) {
12276 pr_warn("prog '%s': uretprobes do not support offset specification\n",
12277 prog->name);
12278 break;
12279 }
12280 opts.func_name = func_name;
12281 *link = bpf_program__attach_uprobe_opts(prog, -1, binary_path, offset, &opts);
12282 ret = libbpf_get_error(*link);
12283 break;
12284 default:
12285 pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name,
12286 prog->sec_name);
12287 break;
12288 }
12289 free(probe_type);
12290 free(binary_path);
12291 free(func_name);
12292
12293 return ret;
12294 }
12295
bpf_program__attach_uprobe(const struct bpf_program * prog,bool retprobe,pid_t pid,const char * binary_path,size_t func_offset)12296 struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog,
12297 bool retprobe, pid_t pid,
12298 const char *binary_path,
12299 size_t func_offset)
12300 {
12301 DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts, .retprobe = retprobe);
12302
12303 return bpf_program__attach_uprobe_opts(prog, pid, binary_path, func_offset, &opts);
12304 }
12305
bpf_program__attach_usdt(const struct bpf_program * prog,pid_t pid,const char * binary_path,const char * usdt_provider,const char * usdt_name,const struct bpf_usdt_opts * opts)12306 struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog,
12307 pid_t pid, const char *binary_path,
12308 const char *usdt_provider, const char *usdt_name,
12309 const struct bpf_usdt_opts *opts)
12310 {
12311 char resolved_path[512];
12312 struct bpf_object *obj = prog->obj;
12313 struct bpf_link *link;
12314 __u64 usdt_cookie;
12315 int err;
12316
12317 if (!OPTS_VALID(opts, bpf_uprobe_opts))
12318 return libbpf_err_ptr(-EINVAL);
12319
12320 if (bpf_program__fd(prog) < 0) {
12321 pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n",
12322 prog->name);
12323 return libbpf_err_ptr(-EINVAL);
12324 }
12325
12326 if (!binary_path)
12327 return libbpf_err_ptr(-EINVAL);
12328
12329 if (!strchr(binary_path, '/')) {
12330 err = resolve_full_path(binary_path, resolved_path, sizeof(resolved_path));
12331 if (err) {
12332 pr_warn("prog '%s': failed to resolve full path for '%s': %d\n",
12333 prog->name, binary_path, err);
12334 return libbpf_err_ptr(err);
12335 }
12336 binary_path = resolved_path;
12337 }
12338
12339 /* USDT manager is instantiated lazily on first USDT attach. It will
12340 * be destroyed together with BPF object in bpf_object__close().
12341 */
12342 if (IS_ERR(obj->usdt_man))
12343 return libbpf_ptr(obj->usdt_man);
12344 if (!obj->usdt_man) {
12345 obj->usdt_man = usdt_manager_new(obj);
12346 if (IS_ERR(obj->usdt_man))
12347 return libbpf_ptr(obj->usdt_man);
12348 }
12349
12350 usdt_cookie = OPTS_GET(opts, usdt_cookie, 0);
12351 link = usdt_manager_attach_usdt(obj->usdt_man, prog, pid, binary_path,
12352 usdt_provider, usdt_name, usdt_cookie);
12353 err = libbpf_get_error(link);
12354 if (err)
12355 return libbpf_err_ptr(err);
12356 return link;
12357 }
12358
attach_usdt(const struct bpf_program * prog,long cookie,struct bpf_link ** link)12359 static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12360 {
12361 char *path = NULL, *provider = NULL, *name = NULL;
12362 const char *sec_name;
12363 int n, err;
12364
12365 sec_name = bpf_program__section_name(prog);
12366 if (strcmp(sec_name, "usdt") == 0) {
12367 /* no auto-attach for just SEC("usdt") */
12368 *link = NULL;
12369 return 0;
12370 }
12371
12372 n = sscanf(sec_name, "usdt/%m[^:]:%m[^:]:%m[^:]", &path, &provider, &name);
12373 if (n != 3) {
12374 pr_warn("invalid section '%s', expected SEC(\"usdt/<path>:<provider>:<name>\")\n",
12375 sec_name);
12376 err = -EINVAL;
12377 } else {
12378 *link = bpf_program__attach_usdt(prog, -1 /* any process */, path,
12379 provider, name, NULL);
12380 err = libbpf_get_error(*link);
12381 }
12382 free(path);
12383 free(provider);
12384 free(name);
12385 return err;
12386 }
12387
determine_tracepoint_id(const char * tp_category,const char * tp_name)12388 static int determine_tracepoint_id(const char *tp_category,
12389 const char *tp_name)
12390 {
12391 char file[PATH_MAX];
12392 int ret;
12393
12394 ret = snprintf(file, sizeof(file), "%s/events/%s/%s/id",
12395 tracefs_path(), tp_category, tp_name);
12396 if (ret < 0)
12397 return -errno;
12398 if (ret >= sizeof(file)) {
12399 pr_debug("tracepoint %s/%s path is too long\n",
12400 tp_category, tp_name);
12401 return -E2BIG;
12402 }
12403 return parse_uint_from_file(file, "%d\n");
12404 }
12405
perf_event_open_tracepoint(const char * tp_category,const char * tp_name)12406 static int perf_event_open_tracepoint(const char *tp_category,
12407 const char *tp_name)
12408 {
12409 const size_t attr_sz = sizeof(struct perf_event_attr);
12410 struct perf_event_attr attr;
12411 char errmsg[STRERR_BUFSIZE];
12412 int tp_id, pfd, err;
12413
12414 tp_id = determine_tracepoint_id(tp_category, tp_name);
12415 if (tp_id < 0) {
12416 pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n",
12417 tp_category, tp_name,
12418 libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg)));
12419 return tp_id;
12420 }
12421
12422 memset(&attr, 0, attr_sz);
12423 attr.type = PERF_TYPE_TRACEPOINT;
12424 attr.size = attr_sz;
12425 attr.config = tp_id;
12426
12427 pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */,
12428 -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
12429 if (pfd < 0) {
12430 err = -errno;
12431 pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n",
12432 tp_category, tp_name,
12433 libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
12434 return err;
12435 }
12436 return pfd;
12437 }
12438
bpf_program__attach_tracepoint_opts(const struct bpf_program * prog,const char * tp_category,const char * tp_name,const struct bpf_tracepoint_opts * opts)12439 struct bpf_link *bpf_program__attach_tracepoint_opts(const struct bpf_program *prog,
12440 const char *tp_category,
12441 const char *tp_name,
12442 const struct bpf_tracepoint_opts *opts)
12443 {
12444 DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
12445 char errmsg[STRERR_BUFSIZE];
12446 struct bpf_link *link;
12447 int pfd, err;
12448
12449 if (!OPTS_VALID(opts, bpf_tracepoint_opts))
12450 return libbpf_err_ptr(-EINVAL);
12451
12452 pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
12453
12454 pfd = perf_event_open_tracepoint(tp_category, tp_name);
12455 if (pfd < 0) {
12456 pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
12457 prog->name, tp_category, tp_name,
12458 libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
12459 return libbpf_err_ptr(pfd);
12460 }
12461 link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
12462 err = libbpf_get_error(link);
12463 if (err) {
12464 close(pfd);
12465 pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n",
12466 prog->name, tp_category, tp_name,
12467 libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
12468 return libbpf_err_ptr(err);
12469 }
12470 return link;
12471 }
12472
bpf_program__attach_tracepoint(const struct bpf_program * prog,const char * tp_category,const char * tp_name)12473 struct bpf_link *bpf_program__attach_tracepoint(const struct bpf_program *prog,
12474 const char *tp_category,
12475 const char *tp_name)
12476 {
12477 return bpf_program__attach_tracepoint_opts(prog, tp_category, tp_name, NULL);
12478 }
12479
attach_tp(const struct bpf_program * prog,long cookie,struct bpf_link ** link)12480 static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12481 {
12482 char *sec_name, *tp_cat, *tp_name;
12483
12484 *link = NULL;
12485
12486 /* no auto-attach for SEC("tp") or SEC("tracepoint") */
12487 if (strcmp(prog->sec_name, "tp") == 0 || strcmp(prog->sec_name, "tracepoint") == 0)
12488 return 0;
12489
12490 sec_name = strdup(prog->sec_name);
12491 if (!sec_name)
12492 return -ENOMEM;
12493
12494 /* extract "tp/<category>/<name>" or "tracepoint/<category>/<name>" */
12495 if (str_has_pfx(prog->sec_name, "tp/"))
12496 tp_cat = sec_name + sizeof("tp/") - 1;
12497 else
12498 tp_cat = sec_name + sizeof("tracepoint/") - 1;
12499 tp_name = strchr(tp_cat, '/');
12500 if (!tp_name) {
12501 free(sec_name);
12502 return -EINVAL;
12503 }
12504 *tp_name = '\0';
12505 tp_name++;
12506
12507 *link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name);
12508 free(sec_name);
12509 return libbpf_get_error(*link);
12510 }
12511
12512 struct bpf_link *
bpf_program__attach_raw_tracepoint_opts(const struct bpf_program * prog,const char * tp_name,struct bpf_raw_tracepoint_opts * opts)12513 bpf_program__attach_raw_tracepoint_opts(const struct bpf_program *prog,
12514 const char *tp_name,
12515 struct bpf_raw_tracepoint_opts *opts)
12516 {
12517 LIBBPF_OPTS(bpf_raw_tp_opts, raw_opts);
12518 char errmsg[STRERR_BUFSIZE];
12519 struct bpf_link *link;
12520 int prog_fd, pfd;
12521
12522 if (!OPTS_VALID(opts, bpf_raw_tracepoint_opts))
12523 return libbpf_err_ptr(-EINVAL);
12524
12525 prog_fd = bpf_program__fd(prog);
12526 if (prog_fd < 0) {
12527 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
12528 return libbpf_err_ptr(-EINVAL);
12529 }
12530
12531 link = calloc(1, sizeof(*link));
12532 if (!link)
12533 return libbpf_err_ptr(-ENOMEM);
12534 link->detach = &bpf_link__detach_fd;
12535
12536 raw_opts.tp_name = tp_name;
12537 raw_opts.cookie = OPTS_GET(opts, cookie, 0);
12538 pfd = bpf_raw_tracepoint_open_opts(prog_fd, &raw_opts);
12539 if (pfd < 0) {
12540 pfd = -errno;
12541 free(link);
12542 pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n",
12543 prog->name, tp_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
12544 return libbpf_err_ptr(pfd);
12545 }
12546 link->fd = pfd;
12547 return link;
12548 }
12549
bpf_program__attach_raw_tracepoint(const struct bpf_program * prog,const char * tp_name)12550 struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *prog,
12551 const char *tp_name)
12552 {
12553 return bpf_program__attach_raw_tracepoint_opts(prog, tp_name, NULL);
12554 }
12555
attach_raw_tp(const struct bpf_program * prog,long cookie,struct bpf_link ** link)12556 static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12557 {
12558 static const char *const prefixes[] = {
12559 "raw_tp",
12560 "raw_tracepoint",
12561 "raw_tp.w",
12562 "raw_tracepoint.w",
12563 };
12564 size_t i;
12565 const char *tp_name = NULL;
12566
12567 *link = NULL;
12568
12569 for (i = 0; i < ARRAY_SIZE(prefixes); i++) {
12570 size_t pfx_len;
12571
12572 if (!str_has_pfx(prog->sec_name, prefixes[i]))
12573 continue;
12574
12575 pfx_len = strlen(prefixes[i]);
12576 /* no auto-attach case of, e.g., SEC("raw_tp") */
12577 if (prog->sec_name[pfx_len] == '\0')
12578 return 0;
12579
12580 if (prog->sec_name[pfx_len] != '/')
12581 continue;
12582
12583 tp_name = prog->sec_name + pfx_len + 1;
12584 break;
12585 }
12586
12587 if (!tp_name) {
12588 pr_warn("prog '%s': invalid section name '%s'\n",
12589 prog->name, prog->sec_name);
12590 return -EINVAL;
12591 }
12592
12593 *link = bpf_program__attach_raw_tracepoint(prog, tp_name);
12594 return libbpf_get_error(*link);
12595 }
12596
12597 /* Common logic for all BPF program types that attach to a btf_id */
bpf_program__attach_btf_id(const struct bpf_program * prog,const struct bpf_trace_opts * opts)12598 static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *prog,
12599 const struct bpf_trace_opts *opts)
12600 {
12601 LIBBPF_OPTS(bpf_link_create_opts, link_opts);
12602 char errmsg[STRERR_BUFSIZE];
12603 struct bpf_link *link;
12604 int prog_fd, pfd;
12605
12606 if (!OPTS_VALID(opts, bpf_trace_opts))
12607 return libbpf_err_ptr(-EINVAL);
12608
12609 prog_fd = bpf_program__fd(prog);
12610 if (prog_fd < 0) {
12611 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
12612 return libbpf_err_ptr(-EINVAL);
12613 }
12614
12615 link = calloc(1, sizeof(*link));
12616 if (!link)
12617 return libbpf_err_ptr(-ENOMEM);
12618 link->detach = &bpf_link__detach_fd;
12619
12620 /* libbpf is smart enough to redirect to BPF_RAW_TRACEPOINT_OPEN on old kernels */
12621 link_opts.tracing.cookie = OPTS_GET(opts, cookie, 0);
12622 pfd = bpf_link_create(prog_fd, 0, bpf_program__expected_attach_type(prog), &link_opts);
12623 if (pfd < 0) {
12624 pfd = -errno;
12625 free(link);
12626 pr_warn("prog '%s': failed to attach: %s\n",
12627 prog->name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
12628 return libbpf_err_ptr(pfd);
12629 }
12630 link->fd = pfd;
12631 return link;
12632 }
12633
bpf_program__attach_trace(const struct bpf_program * prog)12634 struct bpf_link *bpf_program__attach_trace(const struct bpf_program *prog)
12635 {
12636 return bpf_program__attach_btf_id(prog, NULL);
12637 }
12638
bpf_program__attach_trace_opts(const struct bpf_program * prog,const struct bpf_trace_opts * opts)12639 struct bpf_link *bpf_program__attach_trace_opts(const struct bpf_program *prog,
12640 const struct bpf_trace_opts *opts)
12641 {
12642 return bpf_program__attach_btf_id(prog, opts);
12643 }
12644
bpf_program__attach_lsm(const struct bpf_program * prog)12645 struct bpf_link *bpf_program__attach_lsm(const struct bpf_program *prog)
12646 {
12647 return bpf_program__attach_btf_id(prog, NULL);
12648 }
12649
attach_trace(const struct bpf_program * prog,long cookie,struct bpf_link ** link)12650 static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12651 {
12652 *link = bpf_program__attach_trace(prog);
12653 return libbpf_get_error(*link);
12654 }
12655
attach_lsm(const struct bpf_program * prog,long cookie,struct bpf_link ** link)12656 static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12657 {
12658 *link = bpf_program__attach_lsm(prog);
12659 return libbpf_get_error(*link);
12660 }
12661
12662 static struct bpf_link *
bpf_program_attach_fd(const struct bpf_program * prog,int target_fd,const char * target_name,const struct bpf_link_create_opts * opts)12663 bpf_program_attach_fd(const struct bpf_program *prog,
12664 int target_fd, const char *target_name,
12665 const struct bpf_link_create_opts *opts)
12666 {
12667 enum bpf_attach_type attach_type;
12668 char errmsg[STRERR_BUFSIZE];
12669 struct bpf_link *link;
12670 int prog_fd, link_fd;
12671
12672 prog_fd = bpf_program__fd(prog);
12673 if (prog_fd < 0) {
12674 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
12675 return libbpf_err_ptr(-EINVAL);
12676 }
12677
12678 link = calloc(1, sizeof(*link));
12679 if (!link)
12680 return libbpf_err_ptr(-ENOMEM);
12681 link->detach = &bpf_link__detach_fd;
12682
12683 attach_type = bpf_program__expected_attach_type(prog);
12684 link_fd = bpf_link_create(prog_fd, target_fd, attach_type, opts);
12685 if (link_fd < 0) {
12686 link_fd = -errno;
12687 free(link);
12688 pr_warn("prog '%s': failed to attach to %s: %s\n",
12689 prog->name, target_name,
12690 libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
12691 return libbpf_err_ptr(link_fd);
12692 }
12693 link->fd = link_fd;
12694 return link;
12695 }
12696
12697 struct bpf_link *
bpf_program__attach_cgroup(const struct bpf_program * prog,int cgroup_fd)12698 bpf_program__attach_cgroup(const struct bpf_program *prog, int cgroup_fd)
12699 {
12700 return bpf_program_attach_fd(prog, cgroup_fd, "cgroup", NULL);
12701 }
12702
12703 struct bpf_link *
bpf_program__attach_netns(const struct bpf_program * prog,int netns_fd)12704 bpf_program__attach_netns(const struct bpf_program *prog, int netns_fd)
12705 {
12706 return bpf_program_attach_fd(prog, netns_fd, "netns", NULL);
12707 }
12708
12709 struct bpf_link *
bpf_program__attach_sockmap(const struct bpf_program * prog,int map_fd)12710 bpf_program__attach_sockmap(const struct bpf_program *prog, int map_fd)
12711 {
12712 return bpf_program_attach_fd(prog, map_fd, "sockmap", NULL);
12713 }
12714
bpf_program__attach_xdp(const struct bpf_program * prog,int ifindex)12715 struct bpf_link *bpf_program__attach_xdp(const struct bpf_program *prog, int ifindex)
12716 {
12717 /* target_fd/target_ifindex use the same field in LINK_CREATE */
12718 return bpf_program_attach_fd(prog, ifindex, "xdp", NULL);
12719 }
12720
12721 struct bpf_link *
bpf_program__attach_tcx(const struct bpf_program * prog,int ifindex,const struct bpf_tcx_opts * opts)12722 bpf_program__attach_tcx(const struct bpf_program *prog, int ifindex,
12723 const struct bpf_tcx_opts *opts)
12724 {
12725 LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
12726 __u32 relative_id;
12727 int relative_fd;
12728
12729 if (!OPTS_VALID(opts, bpf_tcx_opts))
12730 return libbpf_err_ptr(-EINVAL);
12731
12732 relative_id = OPTS_GET(opts, relative_id, 0);
12733 relative_fd = OPTS_GET(opts, relative_fd, 0);
12734
12735 /* validate we don't have unexpected combinations of non-zero fields */
12736 if (!ifindex) {
12737 pr_warn("prog '%s': target netdevice ifindex cannot be zero\n",
12738 prog->name);
12739 return libbpf_err_ptr(-EINVAL);
12740 }
12741 if (relative_fd && relative_id) {
12742 pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n",
12743 prog->name);
12744 return libbpf_err_ptr(-EINVAL);
12745 }
12746
12747 link_create_opts.tcx.expected_revision = OPTS_GET(opts, expected_revision, 0);
12748 link_create_opts.tcx.relative_fd = relative_fd;
12749 link_create_opts.tcx.relative_id = relative_id;
12750 link_create_opts.flags = OPTS_GET(opts, flags, 0);
12751
12752 /* target_fd/target_ifindex use the same field in LINK_CREATE */
12753 return bpf_program_attach_fd(prog, ifindex, "tcx", &link_create_opts);
12754 }
12755
12756 struct bpf_link *
bpf_program__attach_netkit(const struct bpf_program * prog,int ifindex,const struct bpf_netkit_opts * opts)12757 bpf_program__attach_netkit(const struct bpf_program *prog, int ifindex,
12758 const struct bpf_netkit_opts *opts)
12759 {
12760 LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
12761 __u32 relative_id;
12762 int relative_fd;
12763
12764 if (!OPTS_VALID(opts, bpf_netkit_opts))
12765 return libbpf_err_ptr(-EINVAL);
12766
12767 relative_id = OPTS_GET(opts, relative_id, 0);
12768 relative_fd = OPTS_GET(opts, relative_fd, 0);
12769
12770 /* validate we don't have unexpected combinations of non-zero fields */
12771 if (!ifindex) {
12772 pr_warn("prog '%s': target netdevice ifindex cannot be zero\n",
12773 prog->name);
12774 return libbpf_err_ptr(-EINVAL);
12775 }
12776 if (relative_fd && relative_id) {
12777 pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n",
12778 prog->name);
12779 return libbpf_err_ptr(-EINVAL);
12780 }
12781
12782 link_create_opts.netkit.expected_revision = OPTS_GET(opts, expected_revision, 0);
12783 link_create_opts.netkit.relative_fd = relative_fd;
12784 link_create_opts.netkit.relative_id = relative_id;
12785 link_create_opts.flags = OPTS_GET(opts, flags, 0);
12786
12787 return bpf_program_attach_fd(prog, ifindex, "netkit", &link_create_opts);
12788 }
12789
bpf_program__attach_freplace(const struct bpf_program * prog,int target_fd,const char * attach_func_name)12790 struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog,
12791 int target_fd,
12792 const char *attach_func_name)
12793 {
12794 int btf_id;
12795
12796 if (!!target_fd != !!attach_func_name) {
12797 pr_warn("prog '%s': supply none or both of target_fd and attach_func_name\n",
12798 prog->name);
12799 return libbpf_err_ptr(-EINVAL);
12800 }
12801
12802 if (prog->type != BPF_PROG_TYPE_EXT) {
12803 pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace",
12804 prog->name);
12805 return libbpf_err_ptr(-EINVAL);
12806 }
12807
12808 if (target_fd) {
12809 LIBBPF_OPTS(bpf_link_create_opts, target_opts);
12810
12811 btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd);
12812 if (btf_id < 0)
12813 return libbpf_err_ptr(btf_id);
12814
12815 target_opts.target_btf_id = btf_id;
12816
12817 return bpf_program_attach_fd(prog, target_fd, "freplace",
12818 &target_opts);
12819 } else {
12820 /* no target, so use raw_tracepoint_open for compatibility
12821 * with old kernels
12822 */
12823 return bpf_program__attach_trace(prog);
12824 }
12825 }
12826
12827 struct bpf_link *
bpf_program__attach_iter(const struct bpf_program * prog,const struct bpf_iter_attach_opts * opts)12828 bpf_program__attach_iter(const struct bpf_program *prog,
12829 const struct bpf_iter_attach_opts *opts)
12830 {
12831 DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
12832 char errmsg[STRERR_BUFSIZE];
12833 struct bpf_link *link;
12834 int prog_fd, link_fd;
12835 __u32 target_fd = 0;
12836
12837 if (!OPTS_VALID(opts, bpf_iter_attach_opts))
12838 return libbpf_err_ptr(-EINVAL);
12839
12840 link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0);
12841 link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0);
12842
12843 prog_fd = bpf_program__fd(prog);
12844 if (prog_fd < 0) {
12845 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
12846 return libbpf_err_ptr(-EINVAL);
12847 }
12848
12849 link = calloc(1, sizeof(*link));
12850 if (!link)
12851 return libbpf_err_ptr(-ENOMEM);
12852 link->detach = &bpf_link__detach_fd;
12853
12854 link_fd = bpf_link_create(prog_fd, target_fd, BPF_TRACE_ITER,
12855 &link_create_opts);
12856 if (link_fd < 0) {
12857 link_fd = -errno;
12858 free(link);
12859 pr_warn("prog '%s': failed to attach to iterator: %s\n",
12860 prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
12861 return libbpf_err_ptr(link_fd);
12862 }
12863 link->fd = link_fd;
12864 return link;
12865 }
12866
attach_iter(const struct bpf_program * prog,long cookie,struct bpf_link ** link)12867 static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12868 {
12869 *link = bpf_program__attach_iter(prog, NULL);
12870 return libbpf_get_error(*link);
12871 }
12872
bpf_program__attach_netfilter(const struct bpf_program * prog,const struct bpf_netfilter_opts * opts)12873 struct bpf_link *bpf_program__attach_netfilter(const struct bpf_program *prog,
12874 const struct bpf_netfilter_opts *opts)
12875 {
12876 LIBBPF_OPTS(bpf_link_create_opts, lopts);
12877 struct bpf_link *link;
12878 int prog_fd, link_fd;
12879
12880 if (!OPTS_VALID(opts, bpf_netfilter_opts))
12881 return libbpf_err_ptr(-EINVAL);
12882
12883 prog_fd = bpf_program__fd(prog);
12884 if (prog_fd < 0) {
12885 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
12886 return libbpf_err_ptr(-EINVAL);
12887 }
12888
12889 link = calloc(1, sizeof(*link));
12890 if (!link)
12891 return libbpf_err_ptr(-ENOMEM);
12892
12893 link->detach = &bpf_link__detach_fd;
12894
12895 lopts.netfilter.pf = OPTS_GET(opts, pf, 0);
12896 lopts.netfilter.hooknum = OPTS_GET(opts, hooknum, 0);
12897 lopts.netfilter.priority = OPTS_GET(opts, priority, 0);
12898 lopts.netfilter.flags = OPTS_GET(opts, flags, 0);
12899
12900 link_fd = bpf_link_create(prog_fd, 0, BPF_NETFILTER, &lopts);
12901 if (link_fd < 0) {
12902 char errmsg[STRERR_BUFSIZE];
12903
12904 link_fd = -errno;
12905 free(link);
12906 pr_warn("prog '%s': failed to attach to netfilter: %s\n",
12907 prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
12908 return libbpf_err_ptr(link_fd);
12909 }
12910 link->fd = link_fd;
12911
12912 return link;
12913 }
12914
bpf_program__attach(const struct bpf_program * prog)12915 struct bpf_link *bpf_program__attach(const struct bpf_program *prog)
12916 {
12917 struct bpf_link *link = NULL;
12918 int err;
12919
12920 if (!prog->sec_def || !prog->sec_def->prog_attach_fn)
12921 return libbpf_err_ptr(-EOPNOTSUPP);
12922
12923 if (bpf_program__fd(prog) < 0) {
12924 pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n",
12925 prog->name);
12926 return libbpf_err_ptr(-EINVAL);
12927 }
12928
12929 err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, &link);
12930 if (err)
12931 return libbpf_err_ptr(err);
12932
12933 /* When calling bpf_program__attach() explicitly, auto-attach support
12934 * is expected to work, so NULL returned link is considered an error.
12935 * This is different for skeleton's attach, see comment in
12936 * bpf_object__attach_skeleton().
12937 */
12938 if (!link)
12939 return libbpf_err_ptr(-EOPNOTSUPP);
12940
12941 return link;
12942 }
12943
12944 struct bpf_link_struct_ops {
12945 struct bpf_link link;
12946 int map_fd;
12947 };
12948
bpf_link__detach_struct_ops(struct bpf_link * link)12949 static int bpf_link__detach_struct_ops(struct bpf_link *link)
12950 {
12951 struct bpf_link_struct_ops *st_link;
12952 __u32 zero = 0;
12953
12954 st_link = container_of(link, struct bpf_link_struct_ops, link);
12955
12956 if (st_link->map_fd < 0)
12957 /* w/o a real link */
12958 return bpf_map_delete_elem(link->fd, &zero);
12959
12960 return close(link->fd);
12961 }
12962
bpf_map__attach_struct_ops(const struct bpf_map * map)12963 struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map)
12964 {
12965 struct bpf_link_struct_ops *link;
12966 __u32 zero = 0;
12967 int err, fd;
12968
12969 if (!bpf_map__is_struct_ops(map)) {
12970 pr_warn("map '%s': can't attach non-struct_ops map\n", map->name);
12971 return libbpf_err_ptr(-EINVAL);
12972 }
12973
12974 if (map->fd < 0) {
12975 pr_warn("map '%s': can't attach BPF map without FD (was it created?)\n", map->name);
12976 return libbpf_err_ptr(-EINVAL);
12977 }
12978
12979 link = calloc(1, sizeof(*link));
12980 if (!link)
12981 return libbpf_err_ptr(-EINVAL);
12982
12983 /* kern_vdata should be prepared during the loading phase. */
12984 err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0);
12985 /* It can be EBUSY if the map has been used to create or
12986 * update a link before. We don't allow updating the value of
12987 * a struct_ops once it is set. That ensures that the value
12988 * never changed. So, it is safe to skip EBUSY.
12989 */
12990 if (err && (!(map->def.map_flags & BPF_F_LINK) || err != -EBUSY)) {
12991 free(link);
12992 return libbpf_err_ptr(err);
12993 }
12994
12995 link->link.detach = bpf_link__detach_struct_ops;
12996
12997 if (!(map->def.map_flags & BPF_F_LINK)) {
12998 /* w/o a real link */
12999 link->link.fd = map->fd;
13000 link->map_fd = -1;
13001 return &link->link;
13002 }
13003
13004 fd = bpf_link_create(map->fd, 0, BPF_STRUCT_OPS, NULL);
13005 if (fd < 0) {
13006 free(link);
13007 return libbpf_err_ptr(fd);
13008 }
13009
13010 link->link.fd = fd;
13011 link->map_fd = map->fd;
13012
13013 return &link->link;
13014 }
13015
13016 /*
13017 * Swap the back struct_ops of a link with a new struct_ops map.
13018 */
bpf_link__update_map(struct bpf_link * link,const struct bpf_map * map)13019 int bpf_link__update_map(struct bpf_link *link, const struct bpf_map *map)
13020 {
13021 struct bpf_link_struct_ops *st_ops_link;
13022 __u32 zero = 0;
13023 int err;
13024
13025 if (!bpf_map__is_struct_ops(map))
13026 return -EINVAL;
13027
13028 if (map->fd < 0) {
13029 pr_warn("map '%s': can't use BPF map without FD (was it created?)\n", map->name);
13030 return -EINVAL;
13031 }
13032
13033 st_ops_link = container_of(link, struct bpf_link_struct_ops, link);
13034 /* Ensure the type of a link is correct */
13035 if (st_ops_link->map_fd < 0)
13036 return -EINVAL;
13037
13038 err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0);
13039 /* It can be EBUSY if the map has been used to create or
13040 * update a link before. We don't allow updating the value of
13041 * a struct_ops once it is set. That ensures that the value
13042 * never changed. So, it is safe to skip EBUSY.
13043 */
13044 if (err && err != -EBUSY)
13045 return err;
13046
13047 err = bpf_link_update(link->fd, map->fd, NULL);
13048 if (err < 0)
13049 return err;
13050
13051 st_ops_link->map_fd = map->fd;
13052
13053 return 0;
13054 }
13055
13056 typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(struct perf_event_header *hdr,
13057 void *private_data);
13058
13059 static enum bpf_perf_event_ret
perf_event_read_simple(void * mmap_mem,size_t mmap_size,size_t page_size,void ** copy_mem,size_t * copy_size,bpf_perf_event_print_t fn,void * private_data)13060 perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
13061 void **copy_mem, size_t *copy_size,
13062 bpf_perf_event_print_t fn, void *private_data)
13063 {
13064 struct perf_event_mmap_page *header = mmap_mem;
13065 __u64 data_head = ring_buffer_read_head(header);
13066 __u64 data_tail = header->data_tail;
13067 void *base = ((__u8 *)header) + page_size;
13068 int ret = LIBBPF_PERF_EVENT_CONT;
13069 struct perf_event_header *ehdr;
13070 size_t ehdr_size;
13071
13072 while (data_head != data_tail) {
13073 ehdr = base + (data_tail & (mmap_size - 1));
13074 ehdr_size = ehdr->size;
13075
13076 if (((void *)ehdr) + ehdr_size > base + mmap_size) {
13077 void *copy_start = ehdr;
13078 size_t len_first = base + mmap_size - copy_start;
13079 size_t len_secnd = ehdr_size - len_first;
13080
13081 if (*copy_size < ehdr_size) {
13082 free(*copy_mem);
13083 *copy_mem = malloc(ehdr_size);
13084 if (!*copy_mem) {
13085 *copy_size = 0;
13086 ret = LIBBPF_PERF_EVENT_ERROR;
13087 break;
13088 }
13089 *copy_size = ehdr_size;
13090 }
13091
13092 memcpy(*copy_mem, copy_start, len_first);
13093 memcpy(*copy_mem + len_first, base, len_secnd);
13094 ehdr = *copy_mem;
13095 }
13096
13097 ret = fn(ehdr, private_data);
13098 data_tail += ehdr_size;
13099 if (ret != LIBBPF_PERF_EVENT_CONT)
13100 break;
13101 }
13102
13103 ring_buffer_write_tail(header, data_tail);
13104 return libbpf_err(ret);
13105 }
13106
13107 struct perf_buffer;
13108
13109 struct perf_buffer_params {
13110 struct perf_event_attr *attr;
13111 /* if event_cb is specified, it takes precendence */
13112 perf_buffer_event_fn event_cb;
13113 /* sample_cb and lost_cb are higher-level common-case callbacks */
13114 perf_buffer_sample_fn sample_cb;
13115 perf_buffer_lost_fn lost_cb;
13116 void *ctx;
13117 int cpu_cnt;
13118 int *cpus;
13119 int *map_keys;
13120 };
13121
13122 struct perf_cpu_buf {
13123 struct perf_buffer *pb;
13124 void *base; /* mmap()'ed memory */
13125 void *buf; /* for reconstructing segmented data */
13126 size_t buf_size;
13127 int fd;
13128 int cpu;
13129 int map_key;
13130 };
13131
13132 struct perf_buffer {
13133 perf_buffer_event_fn event_cb;
13134 perf_buffer_sample_fn sample_cb;
13135 perf_buffer_lost_fn lost_cb;
13136 void *ctx; /* passed into callbacks */
13137
13138 size_t page_size;
13139 size_t mmap_size;
13140 struct perf_cpu_buf **cpu_bufs;
13141 struct epoll_event *events;
13142 int cpu_cnt; /* number of allocated CPU buffers */
13143 int epoll_fd; /* perf event FD */
13144 int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
13145 };
13146
perf_buffer__free_cpu_buf(struct perf_buffer * pb,struct perf_cpu_buf * cpu_buf)13147 static void perf_buffer__free_cpu_buf(struct perf_buffer *pb,
13148 struct perf_cpu_buf *cpu_buf)
13149 {
13150 if (!cpu_buf)
13151 return;
13152 if (cpu_buf->base &&
13153 munmap(cpu_buf->base, pb->mmap_size + pb->page_size))
13154 pr_warn("failed to munmap cpu_buf #%d\n", cpu_buf->cpu);
13155 if (cpu_buf->fd >= 0) {
13156 ioctl(cpu_buf->fd, PERF_EVENT_IOC_DISABLE, 0);
13157 close(cpu_buf->fd);
13158 }
13159 free(cpu_buf->buf);
13160 free(cpu_buf);
13161 }
13162
perf_buffer__free(struct perf_buffer * pb)13163 void perf_buffer__free(struct perf_buffer *pb)
13164 {
13165 int i;
13166
13167 if (IS_ERR_OR_NULL(pb))
13168 return;
13169 if (pb->cpu_bufs) {
13170 for (i = 0; i < pb->cpu_cnt; i++) {
13171 struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
13172
13173 if (!cpu_buf)
13174 continue;
13175
13176 bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key);
13177 perf_buffer__free_cpu_buf(pb, cpu_buf);
13178 }
13179 free(pb->cpu_bufs);
13180 }
13181 if (pb->epoll_fd >= 0)
13182 close(pb->epoll_fd);
13183 free(pb->events);
13184 free(pb);
13185 }
13186
13187 static struct perf_cpu_buf *
perf_buffer__open_cpu_buf(struct perf_buffer * pb,struct perf_event_attr * attr,int cpu,int map_key)13188 perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr,
13189 int cpu, int map_key)
13190 {
13191 struct perf_cpu_buf *cpu_buf;
13192 char msg[STRERR_BUFSIZE];
13193 int err;
13194
13195 cpu_buf = calloc(1, sizeof(*cpu_buf));
13196 if (!cpu_buf)
13197 return ERR_PTR(-ENOMEM);
13198
13199 cpu_buf->pb = pb;
13200 cpu_buf->cpu = cpu;
13201 cpu_buf->map_key = map_key;
13202
13203 cpu_buf->fd = syscall(__NR_perf_event_open, attr, -1 /* pid */, cpu,
13204 -1, PERF_FLAG_FD_CLOEXEC);
13205 if (cpu_buf->fd < 0) {
13206 err = -errno;
13207 pr_warn("failed to open perf buffer event on cpu #%d: %s\n",
13208 cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
13209 goto error;
13210 }
13211
13212 cpu_buf->base = mmap(NULL, pb->mmap_size + pb->page_size,
13213 PROT_READ | PROT_WRITE, MAP_SHARED,
13214 cpu_buf->fd, 0);
13215 if (cpu_buf->base == MAP_FAILED) {
13216 cpu_buf->base = NULL;
13217 err = -errno;
13218 pr_warn("failed to mmap perf buffer on cpu #%d: %s\n",
13219 cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
13220 goto error;
13221 }
13222
13223 if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
13224 err = -errno;
13225 pr_warn("failed to enable perf buffer event on cpu #%d: %s\n",
13226 cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
13227 goto error;
13228 }
13229
13230 return cpu_buf;
13231
13232 error:
13233 perf_buffer__free_cpu_buf(pb, cpu_buf);
13234 return (struct perf_cpu_buf *)ERR_PTR(err);
13235 }
13236
13237 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
13238 struct perf_buffer_params *p);
13239
perf_buffer__new(int map_fd,size_t page_cnt,perf_buffer_sample_fn sample_cb,perf_buffer_lost_fn lost_cb,void * ctx,const struct perf_buffer_opts * opts)13240 struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
13241 perf_buffer_sample_fn sample_cb,
13242 perf_buffer_lost_fn lost_cb,
13243 void *ctx,
13244 const struct perf_buffer_opts *opts)
13245 {
13246 const size_t attr_sz = sizeof(struct perf_event_attr);
13247 struct perf_buffer_params p = {};
13248 struct perf_event_attr attr;
13249 __u32 sample_period;
13250
13251 if (!OPTS_VALID(opts, perf_buffer_opts))
13252 return libbpf_err_ptr(-EINVAL);
13253
13254 sample_period = OPTS_GET(opts, sample_period, 1);
13255 if (!sample_period)
13256 sample_period = 1;
13257
13258 memset(&attr, 0, attr_sz);
13259 attr.size = attr_sz;
13260 attr.config = PERF_COUNT_SW_BPF_OUTPUT;
13261 attr.type = PERF_TYPE_SOFTWARE;
13262 attr.sample_type = PERF_SAMPLE_RAW;
13263 attr.wakeup_events = sample_period;
13264
13265 p.attr = &attr;
13266 p.sample_cb = sample_cb;
13267 p.lost_cb = lost_cb;
13268 p.ctx = ctx;
13269
13270 return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
13271 }
13272
perf_buffer__new_raw(int map_fd,size_t page_cnt,struct perf_event_attr * attr,perf_buffer_event_fn event_cb,void * ctx,const struct perf_buffer_raw_opts * opts)13273 struct perf_buffer *perf_buffer__new_raw(int map_fd, size_t page_cnt,
13274 struct perf_event_attr *attr,
13275 perf_buffer_event_fn event_cb, void *ctx,
13276 const struct perf_buffer_raw_opts *opts)
13277 {
13278 struct perf_buffer_params p = {};
13279
13280 if (!attr)
13281 return libbpf_err_ptr(-EINVAL);
13282
13283 if (!OPTS_VALID(opts, perf_buffer_raw_opts))
13284 return libbpf_err_ptr(-EINVAL);
13285
13286 p.attr = attr;
13287 p.event_cb = event_cb;
13288 p.ctx = ctx;
13289 p.cpu_cnt = OPTS_GET(opts, cpu_cnt, 0);
13290 p.cpus = OPTS_GET(opts, cpus, NULL);
13291 p.map_keys = OPTS_GET(opts, map_keys, NULL);
13292
13293 return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
13294 }
13295
__perf_buffer__new(int map_fd,size_t page_cnt,struct perf_buffer_params * p)13296 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
13297 struct perf_buffer_params *p)
13298 {
13299 const char *online_cpus_file = "/sys/devices/system/cpu/online";
13300 struct bpf_map_info map;
13301 char msg[STRERR_BUFSIZE];
13302 struct perf_buffer *pb;
13303 bool *online = NULL;
13304 __u32 map_info_len;
13305 int err, i, j, n;
13306
13307 if (page_cnt == 0 || (page_cnt & (page_cnt - 1))) {
13308 pr_warn("page count should be power of two, but is %zu\n",
13309 page_cnt);
13310 return ERR_PTR(-EINVAL);
13311 }
13312
13313 /* best-effort sanity checks */
13314 memset(&map, 0, sizeof(map));
13315 map_info_len = sizeof(map);
13316 err = bpf_map_get_info_by_fd(map_fd, &map, &map_info_len);
13317 if (err) {
13318 err = -errno;
13319 /* if BPF_OBJ_GET_INFO_BY_FD is supported, will return
13320 * -EBADFD, -EFAULT, or -E2BIG on real error
13321 */
13322 if (err != -EINVAL) {
13323 pr_warn("failed to get map info for map FD %d: %s\n",
13324 map_fd, libbpf_strerror_r(err, msg, sizeof(msg)));
13325 return ERR_PTR(err);
13326 }
13327 pr_debug("failed to get map info for FD %d; API not supported? Ignoring...\n",
13328 map_fd);
13329 } else {
13330 if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
13331 pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
13332 map.name);
13333 return ERR_PTR(-EINVAL);
13334 }
13335 }
13336
13337 pb = calloc(1, sizeof(*pb));
13338 if (!pb)
13339 return ERR_PTR(-ENOMEM);
13340
13341 pb->event_cb = p->event_cb;
13342 pb->sample_cb = p->sample_cb;
13343 pb->lost_cb = p->lost_cb;
13344 pb->ctx = p->ctx;
13345
13346 pb->page_size = getpagesize();
13347 pb->mmap_size = pb->page_size * page_cnt;
13348 pb->map_fd = map_fd;
13349
13350 pb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
13351 if (pb->epoll_fd < 0) {
13352 err = -errno;
13353 pr_warn("failed to create epoll instance: %s\n",
13354 libbpf_strerror_r(err, msg, sizeof(msg)));
13355 goto error;
13356 }
13357
13358 if (p->cpu_cnt > 0) {
13359 pb->cpu_cnt = p->cpu_cnt;
13360 } else {
13361 pb->cpu_cnt = libbpf_num_possible_cpus();
13362 if (pb->cpu_cnt < 0) {
13363 err = pb->cpu_cnt;
13364 goto error;
13365 }
13366 if (map.max_entries && map.max_entries < pb->cpu_cnt)
13367 pb->cpu_cnt = map.max_entries;
13368 }
13369
13370 pb->events = calloc(pb->cpu_cnt, sizeof(*pb->events));
13371 if (!pb->events) {
13372 err = -ENOMEM;
13373 pr_warn("failed to allocate events: out of memory\n");
13374 goto error;
13375 }
13376 pb->cpu_bufs = calloc(pb->cpu_cnt, sizeof(*pb->cpu_bufs));
13377 if (!pb->cpu_bufs) {
13378 err = -ENOMEM;
13379 pr_warn("failed to allocate buffers: out of memory\n");
13380 goto error;
13381 }
13382
13383 err = parse_cpu_mask_file(online_cpus_file, &online, &n);
13384 if (err) {
13385 pr_warn("failed to get online CPU mask: %d\n", err);
13386 goto error;
13387 }
13388
13389 for (i = 0, j = 0; i < pb->cpu_cnt; i++) {
13390 struct perf_cpu_buf *cpu_buf;
13391 int cpu, map_key;
13392
13393 cpu = p->cpu_cnt > 0 ? p->cpus[i] : i;
13394 map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i;
13395
13396 /* in case user didn't explicitly requested particular CPUs to
13397 * be attached to, skip offline/not present CPUs
13398 */
13399 if (p->cpu_cnt <= 0 && (cpu >= n || !online[cpu]))
13400 continue;
13401
13402 cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key);
13403 if (IS_ERR(cpu_buf)) {
13404 err = PTR_ERR(cpu_buf);
13405 goto error;
13406 }
13407
13408 pb->cpu_bufs[j] = cpu_buf;
13409
13410 err = bpf_map_update_elem(pb->map_fd, &map_key,
13411 &cpu_buf->fd, 0);
13412 if (err) {
13413 err = -errno;
13414 pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n",
13415 cpu, map_key, cpu_buf->fd,
13416 libbpf_strerror_r(err, msg, sizeof(msg)));
13417 goto error;
13418 }
13419
13420 pb->events[j].events = EPOLLIN;
13421 pb->events[j].data.ptr = cpu_buf;
13422 if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd,
13423 &pb->events[j]) < 0) {
13424 err = -errno;
13425 pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n",
13426 cpu, cpu_buf->fd,
13427 libbpf_strerror_r(err, msg, sizeof(msg)));
13428 goto error;
13429 }
13430 j++;
13431 }
13432 pb->cpu_cnt = j;
13433 free(online);
13434
13435 return pb;
13436
13437 error:
13438 free(online);
13439 if (pb)
13440 perf_buffer__free(pb);
13441 return ERR_PTR(err);
13442 }
13443
13444 struct perf_sample_raw {
13445 struct perf_event_header header;
13446 uint32_t size;
13447 char data[];
13448 };
13449
13450 struct perf_sample_lost {
13451 struct perf_event_header header;
13452 uint64_t id;
13453 uint64_t lost;
13454 uint64_t sample_id;
13455 };
13456
13457 static enum bpf_perf_event_ret
perf_buffer__process_record(struct perf_event_header * e,void * ctx)13458 perf_buffer__process_record(struct perf_event_header *e, void *ctx)
13459 {
13460 struct perf_cpu_buf *cpu_buf = ctx;
13461 struct perf_buffer *pb = cpu_buf->pb;
13462 void *data = e;
13463
13464 /* user wants full control over parsing perf event */
13465 if (pb->event_cb)
13466 return pb->event_cb(pb->ctx, cpu_buf->cpu, e);
13467
13468 switch (e->type) {
13469 case PERF_RECORD_SAMPLE: {
13470 struct perf_sample_raw *s = data;
13471
13472 if (pb->sample_cb)
13473 pb->sample_cb(pb->ctx, cpu_buf->cpu, s->data, s->size);
13474 break;
13475 }
13476 case PERF_RECORD_LOST: {
13477 struct perf_sample_lost *s = data;
13478
13479 if (pb->lost_cb)
13480 pb->lost_cb(pb->ctx, cpu_buf->cpu, s->lost);
13481 break;
13482 }
13483 default:
13484 pr_warn("unknown perf sample type %d\n", e->type);
13485 return LIBBPF_PERF_EVENT_ERROR;
13486 }
13487 return LIBBPF_PERF_EVENT_CONT;
13488 }
13489
perf_buffer__process_records(struct perf_buffer * pb,struct perf_cpu_buf * cpu_buf)13490 static int perf_buffer__process_records(struct perf_buffer *pb,
13491 struct perf_cpu_buf *cpu_buf)
13492 {
13493 enum bpf_perf_event_ret ret;
13494
13495 ret = perf_event_read_simple(cpu_buf->base, pb->mmap_size,
13496 pb->page_size, &cpu_buf->buf,
13497 &cpu_buf->buf_size,
13498 perf_buffer__process_record, cpu_buf);
13499 if (ret != LIBBPF_PERF_EVENT_CONT)
13500 return ret;
13501 return 0;
13502 }
13503
perf_buffer__epoll_fd(const struct perf_buffer * pb)13504 int perf_buffer__epoll_fd(const struct perf_buffer *pb)
13505 {
13506 return pb->epoll_fd;
13507 }
13508
perf_buffer__poll(struct perf_buffer * pb,int timeout_ms)13509 int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
13510 {
13511 int i, cnt, err;
13512
13513 cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms);
13514 if (cnt < 0)
13515 return -errno;
13516
13517 for (i = 0; i < cnt; i++) {
13518 struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr;
13519
13520 err = perf_buffer__process_records(pb, cpu_buf);
13521 if (err) {
13522 pr_warn("error while processing records: %d\n", err);
13523 return libbpf_err(err);
13524 }
13525 }
13526 return cnt;
13527 }
13528
13529 /* Return number of PERF_EVENT_ARRAY map slots set up by this perf_buffer
13530 * manager.
13531 */
perf_buffer__buffer_cnt(const struct perf_buffer * pb)13532 size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb)
13533 {
13534 return pb->cpu_cnt;
13535 }
13536
13537 /*
13538 * Return perf_event FD of a ring buffer in *buf_idx* slot of
13539 * PERF_EVENT_ARRAY BPF map. This FD can be polled for new data using
13540 * select()/poll()/epoll() Linux syscalls.
13541 */
perf_buffer__buffer_fd(const struct perf_buffer * pb,size_t buf_idx)13542 int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx)
13543 {
13544 struct perf_cpu_buf *cpu_buf;
13545
13546 if (buf_idx >= pb->cpu_cnt)
13547 return libbpf_err(-EINVAL);
13548
13549 cpu_buf = pb->cpu_bufs[buf_idx];
13550 if (!cpu_buf)
13551 return libbpf_err(-ENOENT);
13552
13553 return cpu_buf->fd;
13554 }
13555
perf_buffer__buffer(struct perf_buffer * pb,int buf_idx,void ** buf,size_t * buf_size)13556 int perf_buffer__buffer(struct perf_buffer *pb, int buf_idx, void **buf, size_t *buf_size)
13557 {
13558 struct perf_cpu_buf *cpu_buf;
13559
13560 if (buf_idx >= pb->cpu_cnt)
13561 return libbpf_err(-EINVAL);
13562
13563 cpu_buf = pb->cpu_bufs[buf_idx];
13564 if (!cpu_buf)
13565 return libbpf_err(-ENOENT);
13566
13567 *buf = cpu_buf->base;
13568 *buf_size = pb->mmap_size;
13569 return 0;
13570 }
13571
13572 /*
13573 * Consume data from perf ring buffer corresponding to slot *buf_idx* in
13574 * PERF_EVENT_ARRAY BPF map without waiting/polling. If there is no data to
13575 * consume, do nothing and return success.
13576 * Returns:
13577 * - 0 on success;
13578 * - <0 on failure.
13579 */
perf_buffer__consume_buffer(struct perf_buffer * pb,size_t buf_idx)13580 int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx)
13581 {
13582 struct perf_cpu_buf *cpu_buf;
13583
13584 if (buf_idx >= pb->cpu_cnt)
13585 return libbpf_err(-EINVAL);
13586
13587 cpu_buf = pb->cpu_bufs[buf_idx];
13588 if (!cpu_buf)
13589 return libbpf_err(-ENOENT);
13590
13591 return perf_buffer__process_records(pb, cpu_buf);
13592 }
13593
perf_buffer__consume(struct perf_buffer * pb)13594 int perf_buffer__consume(struct perf_buffer *pb)
13595 {
13596 int i, err;
13597
13598 for (i = 0; i < pb->cpu_cnt; i++) {
13599 struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
13600
13601 if (!cpu_buf)
13602 continue;
13603
13604 err = perf_buffer__process_records(pb, cpu_buf);
13605 if (err) {
13606 pr_warn("perf_buffer: failed to process records in buffer #%d: %d\n", i, err);
13607 return libbpf_err(err);
13608 }
13609 }
13610 return 0;
13611 }
13612
bpf_program__set_attach_target(struct bpf_program * prog,int attach_prog_fd,const char * attach_func_name)13613 int bpf_program__set_attach_target(struct bpf_program *prog,
13614 int attach_prog_fd,
13615 const char *attach_func_name)
13616 {
13617 int btf_obj_fd = 0, btf_id = 0, err;
13618
13619 if (!prog || attach_prog_fd < 0)
13620 return libbpf_err(-EINVAL);
13621
13622 if (prog->obj->loaded)
13623 return libbpf_err(-EINVAL);
13624
13625 if (attach_prog_fd && !attach_func_name) {
13626 /* remember attach_prog_fd and let bpf_program__load() find
13627 * BTF ID during the program load
13628 */
13629 prog->attach_prog_fd = attach_prog_fd;
13630 return 0;
13631 }
13632
13633 if (attach_prog_fd) {
13634 btf_id = libbpf_find_prog_btf_id(attach_func_name,
13635 attach_prog_fd);
13636 if (btf_id < 0)
13637 return libbpf_err(btf_id);
13638 } else {
13639 if (!attach_func_name)
13640 return libbpf_err(-EINVAL);
13641
13642 /* load btf_vmlinux, if not yet */
13643 err = bpf_object__load_vmlinux_btf(prog->obj, true);
13644 if (err)
13645 return libbpf_err(err);
13646 err = find_kernel_btf_id(prog->obj, attach_func_name,
13647 prog->expected_attach_type,
13648 &btf_obj_fd, &btf_id);
13649 if (err)
13650 return libbpf_err(err);
13651 }
13652
13653 prog->attach_btf_id = btf_id;
13654 prog->attach_btf_obj_fd = btf_obj_fd;
13655 prog->attach_prog_fd = attach_prog_fd;
13656 return 0;
13657 }
13658
parse_cpu_mask_str(const char * s,bool ** mask,int * mask_sz)13659 int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz)
13660 {
13661 int err = 0, n, len, start, end = -1;
13662 bool *tmp;
13663
13664 *mask = NULL;
13665 *mask_sz = 0;
13666
13667 /* Each sub string separated by ',' has format \d+-\d+ or \d+ */
13668 while (*s) {
13669 if (*s == ',' || *s == '\n') {
13670 s++;
13671 continue;
13672 }
13673 n = sscanf(s, "%d%n-%d%n", &start, &len, &end, &len);
13674 if (n <= 0 || n > 2) {
13675 pr_warn("Failed to get CPU range %s: %d\n", s, n);
13676 err = -EINVAL;
13677 goto cleanup;
13678 } else if (n == 1) {
13679 end = start;
13680 }
13681 if (start < 0 || start > end) {
13682 pr_warn("Invalid CPU range [%d,%d] in %s\n",
13683 start, end, s);
13684 err = -EINVAL;
13685 goto cleanup;
13686 }
13687 tmp = realloc(*mask, end + 1);
13688 if (!tmp) {
13689 err = -ENOMEM;
13690 goto cleanup;
13691 }
13692 *mask = tmp;
13693 memset(tmp + *mask_sz, 0, start - *mask_sz);
13694 memset(tmp + start, 1, end - start + 1);
13695 *mask_sz = end + 1;
13696 s += len;
13697 }
13698 if (!*mask_sz) {
13699 pr_warn("Empty CPU range\n");
13700 return -EINVAL;
13701 }
13702 return 0;
13703 cleanup:
13704 free(*mask);
13705 *mask = NULL;
13706 return err;
13707 }
13708
parse_cpu_mask_file(const char * fcpu,bool ** mask,int * mask_sz)13709 int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz)
13710 {
13711 int fd, err = 0, len;
13712 char buf[128];
13713
13714 fd = open(fcpu, O_RDONLY | O_CLOEXEC);
13715 if (fd < 0) {
13716 err = -errno;
13717 pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err);
13718 return err;
13719 }
13720 len = read(fd, buf, sizeof(buf));
13721 close(fd);
13722 if (len <= 0) {
13723 err = len ? -errno : -EINVAL;
13724 pr_warn("Failed to read cpu mask from %s: %d\n", fcpu, err);
13725 return err;
13726 }
13727 if (len >= sizeof(buf)) {
13728 pr_warn("CPU mask is too big in file %s\n", fcpu);
13729 return -E2BIG;
13730 }
13731 buf[len] = '\0';
13732
13733 return parse_cpu_mask_str(buf, mask, mask_sz);
13734 }
13735
libbpf_num_possible_cpus(void)13736 int libbpf_num_possible_cpus(void)
13737 {
13738 static const char *fcpu = "/sys/devices/system/cpu/possible";
13739 static int cpus;
13740 int err, n, i, tmp_cpus;
13741 bool *mask;
13742
13743 tmp_cpus = READ_ONCE(cpus);
13744 if (tmp_cpus > 0)
13745 return tmp_cpus;
13746
13747 err = parse_cpu_mask_file(fcpu, &mask, &n);
13748 if (err)
13749 return libbpf_err(err);
13750
13751 tmp_cpus = 0;
13752 for (i = 0; i < n; i++) {
13753 if (mask[i])
13754 tmp_cpus++;
13755 }
13756 free(mask);
13757
13758 WRITE_ONCE(cpus, tmp_cpus);
13759 return tmp_cpus;
13760 }
13761
populate_skeleton_maps(const struct bpf_object * obj,struct bpf_map_skeleton * maps,size_t map_cnt,size_t map_skel_sz)13762 static int populate_skeleton_maps(const struct bpf_object *obj,
13763 struct bpf_map_skeleton *maps,
13764 size_t map_cnt, size_t map_skel_sz)
13765 {
13766 int i;
13767
13768 for (i = 0; i < map_cnt; i++) {
13769 struct bpf_map_skeleton *map_skel = (void *)maps + i * map_skel_sz;
13770 struct bpf_map **map = map_skel->map;
13771 const char *name = map_skel->name;
13772 void **mmaped = map_skel->mmaped;
13773
13774 *map = bpf_object__find_map_by_name(obj, name);
13775 if (!*map) {
13776 pr_warn("failed to find skeleton map '%s'\n", name);
13777 return -ESRCH;
13778 }
13779
13780 /* externs shouldn't be pre-setup from user code */
13781 if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG)
13782 *mmaped = (*map)->mmaped;
13783 }
13784 return 0;
13785 }
13786
populate_skeleton_progs(const struct bpf_object * obj,struct bpf_prog_skeleton * progs,size_t prog_cnt,size_t prog_skel_sz)13787 static int populate_skeleton_progs(const struct bpf_object *obj,
13788 struct bpf_prog_skeleton *progs,
13789 size_t prog_cnt, size_t prog_skel_sz)
13790 {
13791 int i;
13792
13793 for (i = 0; i < prog_cnt; i++) {
13794 struct bpf_prog_skeleton *prog_skel = (void *)progs + i * prog_skel_sz;
13795 struct bpf_program **prog = prog_skel->prog;
13796 const char *name = prog_skel->name;
13797
13798 *prog = bpf_object__find_program_by_name(obj, name);
13799 if (!*prog) {
13800 pr_warn("failed to find skeleton program '%s'\n", name);
13801 return -ESRCH;
13802 }
13803 }
13804 return 0;
13805 }
13806
bpf_object__open_skeleton(struct bpf_object_skeleton * s,const struct bpf_object_open_opts * opts)13807 int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
13808 const struct bpf_object_open_opts *opts)
13809 {
13810 struct bpf_object *obj;
13811 int err;
13812
13813 obj = bpf_object_open(NULL, s->data, s->data_sz, s->name, opts);
13814 if (IS_ERR(obj)) {
13815 err = PTR_ERR(obj);
13816 pr_warn("failed to initialize skeleton BPF object '%s': %d\n", s->name, err);
13817 return libbpf_err(err);
13818 }
13819
13820 *s->obj = obj;
13821 err = populate_skeleton_maps(obj, s->maps, s->map_cnt, s->map_skel_sz);
13822 if (err) {
13823 pr_warn("failed to populate skeleton maps for '%s': %d\n", s->name, err);
13824 return libbpf_err(err);
13825 }
13826
13827 err = populate_skeleton_progs(obj, s->progs, s->prog_cnt, s->prog_skel_sz);
13828 if (err) {
13829 pr_warn("failed to populate skeleton progs for '%s': %d\n", s->name, err);
13830 return libbpf_err(err);
13831 }
13832
13833 return 0;
13834 }
13835
bpf_object__open_subskeleton(struct bpf_object_subskeleton * s)13836 int bpf_object__open_subskeleton(struct bpf_object_subskeleton *s)
13837 {
13838 int err, len, var_idx, i;
13839 const char *var_name;
13840 const struct bpf_map *map;
13841 struct btf *btf;
13842 __u32 map_type_id;
13843 const struct btf_type *map_type, *var_type;
13844 const struct bpf_var_skeleton *var_skel;
13845 struct btf_var_secinfo *var;
13846
13847 if (!s->obj)
13848 return libbpf_err(-EINVAL);
13849
13850 btf = bpf_object__btf(s->obj);
13851 if (!btf) {
13852 pr_warn("subskeletons require BTF at runtime (object %s)\n",
13853 bpf_object__name(s->obj));
13854 return libbpf_err(-errno);
13855 }
13856
13857 err = populate_skeleton_maps(s->obj, s->maps, s->map_cnt, s->map_skel_sz);
13858 if (err) {
13859 pr_warn("failed to populate subskeleton maps: %d\n", err);
13860 return libbpf_err(err);
13861 }
13862
13863 err = populate_skeleton_progs(s->obj, s->progs, s->prog_cnt, s->prog_skel_sz);
13864 if (err) {
13865 pr_warn("failed to populate subskeleton maps: %d\n", err);
13866 return libbpf_err(err);
13867 }
13868
13869 for (var_idx = 0; var_idx < s->var_cnt; var_idx++) {
13870 var_skel = (void *)s->vars + var_idx * s->var_skel_sz;
13871 map = *var_skel->map;
13872 map_type_id = bpf_map__btf_value_type_id(map);
13873 map_type = btf__type_by_id(btf, map_type_id);
13874
13875 if (!btf_is_datasec(map_type)) {
13876 pr_warn("type for map '%1$s' is not a datasec: %2$s",
13877 bpf_map__name(map),
13878 __btf_kind_str(btf_kind(map_type)));
13879 return libbpf_err(-EINVAL);
13880 }
13881
13882 len = btf_vlen(map_type);
13883 var = btf_var_secinfos(map_type);
13884 for (i = 0; i < len; i++, var++) {
13885 var_type = btf__type_by_id(btf, var->type);
13886 var_name = btf__name_by_offset(btf, var_type->name_off);
13887 if (strcmp(var_name, var_skel->name) == 0) {
13888 *var_skel->addr = map->mmaped + var->offset;
13889 break;
13890 }
13891 }
13892 }
13893 return 0;
13894 }
13895
bpf_object__destroy_subskeleton(struct bpf_object_subskeleton * s)13896 void bpf_object__destroy_subskeleton(struct bpf_object_subskeleton *s)
13897 {
13898 if (!s)
13899 return;
13900 free(s->maps);
13901 free(s->progs);
13902 free(s->vars);
13903 free(s);
13904 }
13905
bpf_object__load_skeleton(struct bpf_object_skeleton * s)13906 int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
13907 {
13908 int i, err;
13909
13910 err = bpf_object__load(*s->obj);
13911 if (err) {
13912 pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err);
13913 return libbpf_err(err);
13914 }
13915
13916 for (i = 0; i < s->map_cnt; i++) {
13917 struct bpf_map_skeleton *map_skel = (void *)s->maps + i * s->map_skel_sz;
13918 struct bpf_map *map = *map_skel->map;
13919
13920 if (!map_skel->mmaped)
13921 continue;
13922
13923 *map_skel->mmaped = map->mmaped;
13924 }
13925
13926 return 0;
13927 }
13928
bpf_object__attach_skeleton(struct bpf_object_skeleton * s)13929 int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
13930 {
13931 int i, err;
13932
13933 for (i = 0; i < s->prog_cnt; i++) {
13934 struct bpf_prog_skeleton *prog_skel = (void *)s->progs + i * s->prog_skel_sz;
13935 struct bpf_program *prog = *prog_skel->prog;
13936 struct bpf_link **link = prog_skel->link;
13937
13938 if (!prog->autoload || !prog->autoattach)
13939 continue;
13940
13941 /* auto-attaching not supported for this program */
13942 if (!prog->sec_def || !prog->sec_def->prog_attach_fn)
13943 continue;
13944
13945 /* if user already set the link manually, don't attempt auto-attach */
13946 if (*link)
13947 continue;
13948
13949 err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, link);
13950 if (err) {
13951 pr_warn("prog '%s': failed to auto-attach: %d\n",
13952 bpf_program__name(prog), err);
13953 return libbpf_err(err);
13954 }
13955
13956 /* It's possible that for some SEC() definitions auto-attach
13957 * is supported in some cases (e.g., if definition completely
13958 * specifies target information), but is not in other cases.
13959 * SEC("uprobe") is one such case. If user specified target
13960 * binary and function name, such BPF program can be
13961 * auto-attached. But if not, it shouldn't trigger skeleton's
13962 * attach to fail. It should just be skipped.
13963 * attach_fn signals such case with returning 0 (no error) and
13964 * setting link to NULL.
13965 */
13966 }
13967
13968
13969 for (i = 0; i < s->map_cnt; i++) {
13970 struct bpf_map_skeleton *map_skel = (void *)s->maps + i * s->map_skel_sz;
13971 struct bpf_map *map = *map_skel->map;
13972 struct bpf_link **link;
13973
13974 if (!map->autocreate || !map->autoattach)
13975 continue;
13976
13977 /* only struct_ops maps can be attached */
13978 if (!bpf_map__is_struct_ops(map))
13979 continue;
13980
13981 /* skeleton is created with earlier version of bpftool, notify user */
13982 if (s->map_skel_sz < offsetofend(struct bpf_map_skeleton, link)) {
13983 pr_warn("map '%s': BPF skeleton version is old, skipping map auto-attachment...\n",
13984 bpf_map__name(map));
13985 continue;
13986 }
13987
13988 link = map_skel->link;
13989 if (!link) {
13990 pr_warn("map '%s': BPF map skeleton link is uninitialized\n",
13991 bpf_map__name(map));
13992 continue;
13993 }
13994
13995 if (*link)
13996 continue;
13997
13998 *link = bpf_map__attach_struct_ops(map);
13999 if (!*link) {
14000 err = -errno;
14001 pr_warn("map '%s': failed to auto-attach: %d\n", bpf_map__name(map), err);
14002 return libbpf_err(err);
14003 }
14004 }
14005
14006 return 0;
14007 }
14008
bpf_object__detach_skeleton(struct bpf_object_skeleton * s)14009 void bpf_object__detach_skeleton(struct bpf_object_skeleton *s)
14010 {
14011 int i;
14012
14013 for (i = 0; i < s->prog_cnt; i++) {
14014 struct bpf_prog_skeleton *prog_skel = (void *)s->progs + i * s->prog_skel_sz;
14015 struct bpf_link **link = prog_skel->link;
14016
14017 bpf_link__destroy(*link);
14018 *link = NULL;
14019 }
14020
14021 if (s->map_skel_sz < sizeof(struct bpf_map_skeleton))
14022 return;
14023
14024 for (i = 0; i < s->map_cnt; i++) {
14025 struct bpf_map_skeleton *map_skel = (void *)s->maps + i * s->map_skel_sz;
14026 struct bpf_link **link = map_skel->link;
14027
14028 if (link) {
14029 bpf_link__destroy(*link);
14030 *link = NULL;
14031 }
14032 }
14033 }
14034
bpf_object__destroy_skeleton(struct bpf_object_skeleton * s)14035 void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
14036 {
14037 if (!s)
14038 return;
14039
14040 bpf_object__detach_skeleton(s);
14041 if (s->obj)
14042 bpf_object__close(*s->obj);
14043 free(s->maps);
14044 free(s->progs);
14045 free(s);
14046 }
14047