• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2015 PLUMgrid, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #ifndef _GNU_SOURCE
17 #define _GNU_SOURCE
18 #endif
19 
20 #include <arpa/inet.h>
21 #include <errno.h>
22 #include <fcntl.h>
23 #include <inttypes.h>
24 #include <limits.h>
25 #include <linux/bpf.h>
26 #include <linux/bpf_common.h>
27 #include <linux/if_packet.h>
28 #include <linux/types.h>
29 #include <linux/perf_event.h>
30 #include <linux/pkt_cls.h>
31 #include <linux/rtnetlink.h>
32 #include <linux/sched.h>
33 #include <linux/unistd.h>
34 #include <linux/version.h>
35 #include <net/ethernet.h>
36 #include <net/if.h>
37 #include <sched.h>
38 #include <stdbool.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <libgen.h>
42 #include <string.h>
43 #include <sys/ioctl.h>
44 #include <sys/resource.h>
45 #include <sys/stat.h>
46 #include <sys/types.h>
47 #include <sys/vfs.h>
48 #include <unistd.h>
49 #include <linux/if_alg.h>
50 
51 #include "libbpf.h"
52 #include "perf_reader.h"
53 
54 // TODO: Remove this when CentOS 6 support is not needed anymore
55 #include "setns.h"
56 
57 #include "bcc_libbpf_inc.h"
58 
59 // TODO: remove these defines when linux-libc-dev exports them properly
60 
61 #ifndef __NR_bpf
62 #if defined(__powerpc64__)
63 #define __NR_bpf 361
64 #elif defined(__s390x__)
65 #define __NR_bpf 351
66 #elif defined(__aarch64__)
67 #define __NR_bpf 280
68 #else
69 #define __NR_bpf 321
70 #endif
71 #endif
72 
73 #ifndef SO_ATTACH_BPF
74 #define SO_ATTACH_BPF 50
75 #endif
76 
77 #ifndef PERF_EVENT_IOC_SET_BPF
78 #define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32)
79 #endif
80 
81 #ifndef PERF_FLAG_FD_CLOEXEC
82 #define PERF_FLAG_FD_CLOEXEC (1UL << 3)
83 #endif
84 
85 // TODO: Remove this when CentOS 6 support is not needed anymore
86 #ifndef AF_ALG
87 #define AF_ALG 38
88 #endif
89 
90 #ifndef min
91 #define min(x, y) ((x) < (y) ? (x) : (y))
92 #endif
93 
94 #define UNUSED(expr) do { (void)(expr); } while (0)
95 
96 #define PERF_UPROBE_REF_CTR_OFFSET_SHIFT 32
97 
98 #ifndef BPF_FS_MAGIC
99 #define BPF_FS_MAGIC		0xcafe4a11
100 #endif
101 
102 struct bpf_helper {
103   char *name;
104   char *required_version;
105 };
106 
107 static struct bpf_helper helpers[] = {
108   {"map_lookup_elem", "3.19"},
109   {"map_update_elem", "3.19"},
110   {"map_delete_elem", "3.19"},
111   {"probe_read", "4.1"},
112   {"ktime_get_ns", "4.1"},
113   {"trace_printk", "4.1"},
114   {"get_prandom_u32", "4.1"},
115   {"get_smp_processor_id", "4.1"},
116   {"skb_store_bytes", "4.1"},
117   {"l3_csum_replace", "4.1"},
118   {"l4_csum_replace", "4.1"},
119   {"tail_call", "4.2"},
120   {"clone_redirect", "4.2"},
121   {"get_current_pid_tgid", "4.2"},
122   {"get_current_uid_gid", "4.2"},
123   {"get_current_comm", "4.2"},
124   {"get_cgroup_classid", "4.3"},
125   {"skb_vlan_push", "4.3"},
126   {"skb_vlan_pop", "4.3"},
127   {"skb_get_tunnel_key", "4.3"},
128   {"skb_set_tunnel_key", "4.3"},
129   {"perf_event_read", "4.3"},
130   {"redirect", "4.4"},
131   {"get_route_realm", "4.4"},
132   {"perf_event_output", "4.4"},
133   {"skb_load_bytes", "4.5"},
134   {"get_stackid", "4.6"},
135   {"csum_diff", "4.6"},
136   {"skb_get_tunnel_opt", "4.6"},
137   {"skb_set_tunnel_opt", "4.6"},
138   {"skb_change_proto", "4.8"},
139   {"skb_change_type", "4.8"},
140   {"skb_under_cgroup", "4.8"},
141   {"get_hash_recalc", "4.8"},
142   {"get_current_task", "4.8"},
143   {"probe_write_user", "4.8"},
144   {"current_task_under_cgroup", "4.9"},
145   {"skb_change_tail", "4.9"},
146   {"skb_pull_data", "4.9"},
147   {"csum_update", "4.9"},
148   {"set_hash_invalid", "4.9"},
149   {"get_numa_node_id", "4.10"},
150   {"skb_change_head", "4.10"},
151   {"xdp_adjust_head", "4.10"},
152   {"probe_read_str", "4.11"},
153   {"get_socket_cookie", "4.12"},
154   {"get_socket_uid", "4.12"},
155   {"set_hash", "4.13"},
156   {"setsockopt", "4.13"},
157   {"skb_adjust_room", "4.13"},
158   {"redirect_map", "4.14"},
159   {"sk_redirect_map", "4.14"},
160   {"sock_map_update", "4.14"},
161   {"xdp_adjust_meta", "4.15"},
162   {"perf_event_read_value", "4.15"},
163   {"perf_prog_read_value", "4.15"},
164   {"getsockopt", "4.15"},
165   {"override_return", "4.16"},
166   {"sock_ops_cb_flags_set", "4.16"},
167   {"msg_redirect_map", "4.17"},
168   {"msg_apply_bytes", "4.17"},
169   {"msg_cork_bytes", "4.17"},
170   {"msg_pull_data", "4.17"},
171   {"bind", "4.17"},
172   {"xdp_adjust_tail", "4.18"},
173   {"skb_get_xfrm_state", "4.18"},
174   {"get_stack", "4.18"},
175   {"skb_load_bytes_relative", "4.18"},
176   {"fib_lookup", "4.18"},
177   {"sock_hash_update", "4.18"},
178   {"msg_redirect_hash", "4.18"},
179   {"sk_redirect_hash", "4.18"},
180   {"lwt_push_encap", "4.18"},
181   {"lwt_seg6_store_bytes", "4.18"},
182   {"lwt_seg6_adjust_srh", "4.18"},
183   {"lwt_seg6_action", "4.18"},
184   {"rc_repeat", "4.18"},
185   {"rc_keydown", "4.18"},
186   {"skb_cgroup_id", "4.18"},
187   {"get_current_cgroup_id", "4.18"},
188   {"get_local_storage", "4.19"},
189   {"sk_select_reuseport", "4.19"},
190   {"skb_ancestor_cgroup_id", "4.19"},
191   {"sk_lookup_tcp", "4.20"},
192   {"sk_lookup_udp", "4.20"},
193   {"sk_release", "4.20"},
194   {"map_push_elem", "4.20"},
195   {"map_pop_elem", "4.20"},
196   {"map_peak_elem", "4.20"},
197   {"msg_push_data", "4.20"},
198   {"msg_pop_data", "5.0"},
199   {"rc_pointer_rel", "5.0"},
200   {"spin_lock", "5.1"},
201   {"spin_unlock", "5.1"},
202   {"sk_fullsock", "5.1"},
203   {"tcp_sock", "5.1"},
204   {"skb_ecn_set_ce", "5.1"},
205   {"get_listener_sock", "5.1"},
206   {"skc_lookup_tcp", "5.2"},
207   {"tcp_check_syncookie", "5.2"},
208   {"sysctl_get_name", "5.2"},
209   {"sysctl_get_current_value", "5.2"},
210   {"sysctl_get_new_value", "5.2"},
211   {"sysctl_set_new_value", "5.2"},
212   {"strtol", "5.2"},
213   {"strtoul", "5.2"},
214   {"sk_storage_get", "5.2"},
215   {"sk_storage_delete", "5.2"},
216   {"send_signal", "5.3"},
217   {"tcp_gen_syncookie", "5.3"},
218   {"skb_output", "5.5"},
219   {"probe_read_user", "5.5"},
220   {"probe_read_kernel", "5.5"},
221   {"probe_read_user_str", "5.5"},
222   {"probe_read_kernel_str", "5.5"},
223   {"tcp_send_ack", "5.5"},
224   {"send_signal_thread", "5.5"},
225   {"jiffies64", "5.5"},
226   {"read_branch_records", "5.6"},
227   {"get_ns_current_pid_tgid", "5.6"},
228   {"xdp_output", "5.6"},
229   {"get_netns_cookie", "5.6"},
230   {"get_current_ancestor_cgroup_id", "5.6"},
231   {"sk_assign", "5.6"},
232   {"ktime_get_boot_ns", "5.7"},
233   {"seq_printf", "5.7"},
234   {"seq_write", "5.7"},
235   {"sk_cgroup_id", "5.7"},
236   {"sk_ancestor_cgroup_id", "5.7"},
237   {"csum_level", "5.7"},
238   {"ringbuf_output", "5.8"},
239   {"ringbuf_reserve", "5.8"},
240   {"ringbuf_submit", "5.8"},
241   {"ringbuf_discard", "5.8"},
242   {"ringbuf_query", "5.8"},
243   {"skc_to_tcp6_sock", "5.9"},
244   {"skc_to_tcp_sock", "5.9"},
245   {"skc_to_tcp_timewait_sock", "5.9"},
246   {"skc_to_tcp_request_sock", "5.9"},
247   {"skc_to_udp6_sock", "5.9"},
248   {"get_task_stack", "5.9"},
249   {"load_hdr_opt", "5.10"},
250   {"store_hdr_opt", "5.10"},
251   {"reserve_hdr_opt", "5.10"},
252   {"inode_storage_get", "5.10"},
253   {"inode_storage_delete", "5.10"},
254   {"d_path", "5.10"},
255   {"copy_from_user", "5.10"},
256   {"snprintf_btf", "5.10"},
257   {"seq_printf_btf", "5.10"},
258   {"skb_cgroup_classid", "5.10"},
259   {"redirect_neigh", "5.10"},
260   {"per_cpu_ptr", "5.10"},
261   {"this_cpu_ptr", "5.10"},
262   {"redirect_peer", "5.10"},
263   {"task_storage_get", "5.11"},
264   {"task_storage_delete", "5.11"},
265   {"get_current_task_btf", "5.11"},
266   {"bprm_opts_set", "5.11"},
267   {"ktime_get_coarse_ns", "5.11"},
268   {"ima_inode_hash", "5.11"},
269   {"sock_from_file", "5.11"},
270   {"check_mtu", "5.12"},
271   {"for_each_map_elem", "5.13"},
272   {"snprintf", "5.13"},
273   {"sys_bpf", "5.14"},
274   {"btf_find_by_name_kind", "5.14"},
275   {"sys_close", "5.14"},
276   {"timer_init", "5.15"},
277   {"timer_set_callback", "5.15"},
278   {"timer_start", "5.15"},
279   {"timer_cancel", "5.15"},
280   {"get_func_ip", "5.15"},
281   {"get_attach_cookie", "5.15"},
282   {"task_pt_regs", "5.15"},
283   {"get_branch_snapshot", "5.16"},
284   {"trace_vprintk", "5.16"},
285   {"skc_to_unix_sock", "5.16"},
286   {"kallsyms_lookup_name", "5.16"},
287   {"find_vma", "5.17"},
288   {"loop", "5.17"},
289   {"strncmp", "5.17"},
290   {"get_func_arg", "5.17"},
291   {"get_func_ret", "5.17"},
292   {"get_func_ret", "5.17"},
293   {"get_retval", "5.18"},
294   {"set_retval", "5.18"},
295   {"xdp_get_buff_len", "5.18"},
296   {"xdp_load_bytes", "5.18"},
297   {"xdp_store_bytes", "5.18"},
298   {"copy_from_user_task", "5.18"},
299   {"skb_set_tstamp", "5.18"},
300   {"ima_file_hash", "5.18"},
301 };
302 
ptr_to_u64(void * ptr)303 static uint64_t ptr_to_u64(void *ptr)
304 {
305   return (uint64_t) (unsigned long) ptr;
306 }
307 
libbpf_bpf_map_create(struct bpf_create_map_attr * create_attr)308 static int libbpf_bpf_map_create(struct bpf_create_map_attr *create_attr)
309 {
310   LIBBPF_OPTS(bpf_map_create_opts, p);
311 
312   p.map_flags = create_attr->map_flags;
313   p.numa_node = create_attr->numa_node;
314   p.btf_fd = create_attr->btf_fd;
315   p.btf_key_type_id = create_attr->btf_key_type_id;
316   p.btf_value_type_id = create_attr->btf_value_type_id;
317   p.map_ifindex = create_attr->map_ifindex;
318   if (create_attr->map_type == BPF_MAP_TYPE_STRUCT_OPS)
319     p.btf_vmlinux_value_type_id = create_attr->btf_vmlinux_value_type_id;
320   else
321     p.inner_map_fd = create_attr->inner_map_fd;
322 
323   return bpf_map_create(create_attr->map_type, create_attr->name, create_attr->key_size,
324                         create_attr->value_size, create_attr->max_entries, &p);
325 }
326 
bcc_create_map_xattr(struct bpf_create_map_attr * attr,bool allow_rlimit)327 int bcc_create_map_xattr(struct bpf_create_map_attr *attr, bool allow_rlimit)
328 {
329   unsigned name_len = attr->name ? strlen(attr->name) : 0;
330   char map_name[BPF_OBJ_NAME_LEN] = {};
331 
332   memcpy(map_name, attr->name, min(name_len, BPF_OBJ_NAME_LEN - 1));
333   attr->name = map_name;
334   int ret = libbpf_bpf_map_create(attr);
335 
336   if (ret < 0 && errno == EPERM) {
337     if (!allow_rlimit)
338       return ret;
339 
340     // see note below about the rationale for this retry
341     struct rlimit rl = {};
342     if (getrlimit(RLIMIT_MEMLOCK, &rl) == 0) {
343       rl.rlim_max = RLIM_INFINITY;
344       rl.rlim_cur = rl.rlim_max;
345       if (setrlimit(RLIMIT_MEMLOCK, &rl) == 0)
346         ret = libbpf_bpf_map_create(attr);
347     }
348   }
349 
350   // kernel already supports btf if its loading is successful,
351   // but this map type may not support pretty print yet.
352   if (ret < 0 && attr->btf_key_type_id && errno == 524 /* ENOTSUPP */) {
353     attr->btf_fd = 0;
354     attr->btf_key_type_id = 0;
355     attr->btf_value_type_id = 0;
356     ret = libbpf_bpf_map_create(attr);
357   }
358 
359   if (ret < 0 && name_len && (errno == E2BIG || errno == EINVAL)) {
360     map_name[0] = '\0';
361     ret = libbpf_bpf_map_create(attr);
362   }
363 
364   if (ret < 0 && errno == EPERM) {
365     if (!allow_rlimit)
366       return ret;
367 
368     // see note below about the rationale for this retry
369     struct rlimit rl = {};
370     if (getrlimit(RLIMIT_MEMLOCK, &rl) == 0) {
371       rl.rlim_max = RLIM_INFINITY;
372       rl.rlim_cur = rl.rlim_max;
373       if (setrlimit(RLIMIT_MEMLOCK, &rl) == 0)
374         ret = libbpf_bpf_map_create(attr);
375     }
376   }
377   return ret;
378 }
379 
bcc_create_map(enum bpf_map_type map_type,const char * name,int key_size,int value_size,int max_entries,int map_flags)380 int bcc_create_map(enum bpf_map_type map_type, const char *name,
381                    int key_size, int value_size,
382                    int max_entries, int map_flags)
383 {
384   struct bpf_create_map_attr attr = {};
385 
386   attr.map_type = map_type;
387   attr.name = name;
388   attr.key_size = key_size;
389   attr.value_size = value_size;
390   attr.max_entries = max_entries;
391   attr.map_flags = map_flags;
392   return bcc_create_map_xattr(&attr, true);
393 }
394 
bpf_update_elem(int fd,void * key,void * value,unsigned long long flags)395 int bpf_update_elem(int fd, void *key, void *value, unsigned long long flags)
396 {
397   return bpf_map_update_elem(fd, key, value, flags);
398 }
399 
bpf_lookup_elem(int fd,void * key,void * value)400 int bpf_lookup_elem(int fd, void *key, void *value)
401 {
402   return bpf_map_lookup_elem(fd, key, value);
403 }
404 
bpf_delete_elem(int fd,void * key)405 int bpf_delete_elem(int fd, void *key)
406 {
407   return bpf_map_delete_elem(fd, key);
408 }
409 
bpf_lookup_and_delete(int fd,void * key,void * value)410 int bpf_lookup_and_delete(int fd, void *key, void *value)
411 {
412   return bpf_map_lookup_and_delete_elem(fd, key, value);
413 }
414 
bpf_lookup_batch(int fd,__u32 * in_batch,__u32 * out_batch,void * keys,void * values,__u32 * count)415 int bpf_lookup_batch(int fd, __u32 *in_batch, __u32 *out_batch, void *keys,
416                      void *values, __u32 *count)
417 {
418   return bpf_map_lookup_batch(fd, in_batch, out_batch, keys, values, count,
419                               NULL);
420 }
421 
bpf_delete_batch(int fd,void * keys,__u32 * count)422 int bpf_delete_batch(int fd,  void *keys, __u32 *count)
423 {
424   return bpf_map_delete_batch(fd, keys, count, NULL);
425 }
426 
bpf_update_batch(int fd,void * keys,void * values,__u32 * count)427 int bpf_update_batch(int fd, void *keys, void *values, __u32 *count)
428 {
429   return bpf_map_update_batch(fd, keys, values, count, NULL);
430 }
431 
bpf_lookup_and_delete_batch(int fd,__u32 * in_batch,__u32 * out_batch,void * keys,void * values,__u32 * count)432 int bpf_lookup_and_delete_batch(int fd, __u32 *in_batch, __u32 *out_batch,
433                                 void *keys, void *values, __u32 *count)
434 {
435   return bpf_map_lookup_and_delete_batch(fd, in_batch, out_batch, keys, values,
436                                          count, NULL);
437 }
438 
bpf_get_first_key(int fd,void * key,size_t key_size)439 int bpf_get_first_key(int fd, void *key, size_t key_size)
440 {
441   int i, res;
442 
443   // 4.12 and above kernel supports passing NULL to BPF_MAP_GET_NEXT_KEY
444   // to get first key of the map. For older kernels, the call will fail.
445   res = bpf_map_get_next_key(fd, 0, key);
446   if (res < 0 && errno == EFAULT) {
447     // Fall back to try to find a non-existing key.
448     static unsigned char try_values[3] = {0, 0xff, 0x55};
449     for (i = 0; i < 3; i++) {
450       memset(key, try_values[i], key_size);
451       // We want to check the existence of the key but we don't know the size
452       // of map's value. So we pass an invalid pointer for value, expect
453       // the call to fail and check if the error is ENOENT indicating the
454       // key doesn't exist. If we use NULL for the invalid pointer, it might
455       // trigger a page fault in kernel and affect performance. Hence we use
456       // ~0 which will fail and return fast.
457       // This should fail since we pass an invalid pointer for value.
458       if (bpf_map_lookup_elem(fd, key, (void *)~0) >= 0)
459         return -1;
460       // This means the key doesn't exist.
461       if (errno == ENOENT)
462         return bpf_map_get_next_key(fd, (void*)&try_values[i], key);
463     }
464     return -1;
465   } else {
466     return res;
467   }
468 }
469 
bpf_get_next_key(int fd,void * key,void * next_key)470 int bpf_get_next_key(int fd, void *key, void *next_key)
471 {
472   return bpf_map_get_next_key(fd, key, next_key);
473 }
474 
bpf_print_hints(int ret,char * log)475 static void bpf_print_hints(int ret, char *log)
476 {
477   if (ret < 0)
478     fprintf(stderr, "bpf: Failed to load program: %s\n", strerror(errno));
479   if (log == NULL)
480     return;
481   else
482     fprintf(stderr, "%s\n", log);
483 
484   if (ret >= 0)
485     return;
486 
487   // The following error strings will need maintenance to match LLVM.
488 
489   // stack busting
490   if (strstr(log, "invalid stack off=-") != NULL) {
491     fprintf(stderr, "HINT: Looks like you exceeded the BPF stack limit. "
492       "This can happen if you allocate too much local variable storage. "
493       "For example, if you allocated a 1 Kbyte struct (maybe for "
494       "BPF_PERF_OUTPUT), busting a max stack of 512 bytes.\n\n");
495   }
496 
497   // didn't check NULL on map lookup
498   if (strstr(log, "invalid mem access 'map_value_or_null'") != NULL) {
499     fprintf(stderr, "HINT: The 'map_value_or_null' error can happen if "
500       "you dereference a pointer value from a map lookup without first "
501       "checking if that pointer is NULL.\n\n");
502   }
503 
504   // lacking a bpf_probe_read
505   if (strstr(log, "invalid mem access 'inv'") != NULL) {
506     fprintf(stderr, "HINT: The invalid mem access 'inv' error can happen "
507       "if you try to dereference memory without first using "
508       "bpf_probe_read_kernel() to copy it to the BPF stack. Sometimes the "
509       "bpf_probe_read_kernel() is automatic by the bcc rewriter, other times "
510       "you'll need to be explicit.\n\n");
511   }
512 
513   // referencing global/static variables or read only data
514   if (strstr(log, "unknown opcode") != NULL) {
515     fprintf(stderr, "HINT: The 'unknown opcode' can happen if you reference "
516       "a global or static variable, or data in read-only section. For example,"
517       " 'char *p = \"hello\"' will result in p referencing a read-only section,"
518       " and 'char p[] = \"hello\"' will have \"hello\" stored on the stack.\n\n");
519   }
520 
521   // helper function not found in kernel
522   char *helper_str = strstr(log, "invalid func ");
523   if (helper_str != NULL) {
524     helper_str += strlen("invalid func ");
525     char *str = strchr(helper_str, '#');
526     if (str != NULL) {
527       helper_str = str + 1;
528     }
529     unsigned int helper_id = atoi(helper_str);
530     if (helper_id && helper_id < sizeof(helpers) / sizeof(struct bpf_helper)) {
531       struct bpf_helper helper = helpers[helper_id - 1];
532       fprintf(stderr, "HINT: bpf_%s missing (added in Linux %s).\n\n",
533               helper.name, helper.required_version);
534     }
535   }
536 }
537 #define ROUND_UP(x, n) (((x) + (n) - 1u) & ~((n) - 1u))
538 
bpf_obj_get_info(int prog_map_fd,void * info,uint32_t * info_len)539 int bpf_obj_get_info(int prog_map_fd, void *info, uint32_t *info_len)
540 {
541   return bpf_obj_get_info_by_fd(prog_map_fd, info, info_len);
542 }
543 
bpf_prog_compute_tag(const struct bpf_insn * insns,int prog_len,unsigned long long * ptag)544 int bpf_prog_compute_tag(const struct bpf_insn *insns, int prog_len,
545                          unsigned long long *ptag)
546 {
547   struct sockaddr_alg alg = {
548     .salg_family    = AF_ALG,
549     .salg_type      = "hash",
550     .salg_name      = "sha1",
551   };
552   int shafd = socket(AF_ALG, SOCK_SEQPACKET | SOCK_CLOEXEC, 0);
553   if (shafd < 0) {
554     fprintf(stderr, "sha1 socket not available %s\n", strerror(errno));
555     return -1;
556   }
557   int ret = bind(shafd, (struct sockaddr *)&alg, sizeof(alg));
558   if (ret < 0) {
559     fprintf(stderr, "sha1 bind fail %s\n", strerror(errno));
560     close(shafd);
561     return ret;
562   }
563   int shafd2 = accept4(shafd, NULL, 0, SOCK_CLOEXEC);
564   if (shafd2 < 0) {
565     fprintf(stderr, "sha1 accept fail %s\n", strerror(errno));
566     close(shafd);
567     return -1;
568   }
569   struct bpf_insn prog[prog_len / 8];
570   bool map_ld_seen = false;
571   int i;
572   for (i = 0; i < prog_len / 8; i++) {
573     prog[i] = insns[i];
574     if (insns[i].code == (BPF_LD | BPF_DW | BPF_IMM) &&
575         insns[i].src_reg == BPF_PSEUDO_MAP_FD &&
576         !map_ld_seen) {
577       prog[i].imm = 0;
578       map_ld_seen = true;
579     } else if (insns[i].code == 0 && map_ld_seen) {
580       prog[i].imm = 0;
581       map_ld_seen = false;
582     } else {
583       map_ld_seen = false;
584     }
585   }
586   ret = write(shafd2, prog, prog_len);
587   if (ret != prog_len) {
588     fprintf(stderr, "sha1 write fail %s\n", strerror(errno));
589     close(shafd2);
590     close(shafd);
591     return -1;
592   }
593 
594   union {
595     unsigned char sha[20];
596     unsigned long long tag;
597   } u = {};
598   ret = read(shafd2, u.sha, 20);
599   if (ret != 20) {
600     fprintf(stderr, "sha1 read fail %s\n", strerror(errno));
601     close(shafd2);
602     close(shafd);
603     return -1;
604   }
605   *ptag = __builtin_bswap64(u.tag);
606   close(shafd2);
607   close(shafd);
608   return 0;
609 }
610 
bpf_prog_get_tag(int fd,unsigned long long * ptag)611 int bpf_prog_get_tag(int fd, unsigned long long *ptag)
612 {
613   char fmt[64];
614   snprintf(fmt, sizeof(fmt), "/proc/self/fdinfo/%d", fd);
615   FILE * f = fopen(fmt, "re");
616   if (!f) {
617 /*    fprintf(stderr, "failed to open fdinfo %s\n", strerror(errno));*/
618     return -1;
619   }
620   fgets(fmt, sizeof(fmt), f); // pos
621   fgets(fmt, sizeof(fmt), f); // flags
622   fgets(fmt, sizeof(fmt), f); // mnt_id
623   fgets(fmt, sizeof(fmt), f); // prog_type
624   fgets(fmt, sizeof(fmt), f); // prog_jited
625   fgets(fmt, sizeof(fmt), f); // prog_tag
626   fclose(f);
627   char *p = strchr(fmt, ':');
628   if (!p) {
629 /*    fprintf(stderr, "broken fdinfo %s\n", fmt);*/
630     return -2;
631   }
632   unsigned long long tag = 0;
633   sscanf(p + 1, "%llx", &tag);
634   *ptag = tag;
635   return 0;
636 }
637 
libbpf_bpf_prog_load(const struct bpf_load_program_attr * load_attr,char * log_buf,size_t log_buf_sz)638 static int libbpf_bpf_prog_load(const struct bpf_load_program_attr *load_attr,
639                                 char *log_buf, size_t log_buf_sz)
640 {
641   LIBBPF_OPTS(bpf_prog_load_opts, p);
642 
643   if (!load_attr || !log_buf != !log_buf_sz) {
644     errno = EINVAL;
645     return -EINVAL;
646   }
647 
648   p.expected_attach_type = load_attr->expected_attach_type;
649   switch (load_attr->prog_type) {
650   case BPF_PROG_TYPE_STRUCT_OPS:
651   case BPF_PROG_TYPE_LSM:
652     p.attach_btf_id = load_attr->attach_btf_id;
653     break;
654   case BPF_PROG_TYPE_TRACING:
655   case BPF_PROG_TYPE_EXT:
656     p.attach_btf_id = load_attr->attach_btf_id;
657     p.attach_prog_fd = load_attr->attach_prog_fd;
658     break;
659   default:
660     p.prog_ifindex = load_attr->prog_ifindex;
661     p.kern_version = load_attr->kern_version;
662   }
663   p.log_level = load_attr->log_level;
664   p.log_buf = log_buf;
665   p.log_size = log_buf_sz;
666   p.prog_btf_fd = load_attr->prog_btf_fd;
667   p.func_info_rec_size = load_attr->func_info_rec_size;
668   p.func_info_cnt = load_attr->func_info_cnt;
669   p.func_info = load_attr->func_info;
670   p.line_info_rec_size = load_attr->line_info_rec_size;
671   p.line_info_cnt = load_attr->line_info_cnt;
672   p.line_info = load_attr->line_info;
673   p.prog_flags = load_attr->prog_flags;
674 
675   return bpf_prog_load(load_attr->prog_type, load_attr->name, load_attr->license,
676                        load_attr->insns, load_attr->insns_cnt, &p);
677 }
678 
bcc_prog_load_xattr(struct bpf_load_program_attr * attr,int prog_len,char * log_buf,unsigned log_buf_size,bool allow_rlimit)679 int bcc_prog_load_xattr(struct bpf_load_program_attr *attr, int prog_len,
680                         char *log_buf, unsigned log_buf_size, bool allow_rlimit)
681 {
682   unsigned name_len = attr->name ? strlen(attr->name) : 0;
683   char *tmp_log_buf = NULL, *attr_log_buf = NULL;
684   unsigned tmp_log_buf_size = 0, attr_log_buf_size = 0;
685   int ret = 0, name_offset = 0, expected_attach_type = 0;
686   char prog_name[BPF_OBJ_NAME_LEN] = {};
687 
688   unsigned insns_cnt = prog_len / sizeof(struct bpf_insn);
689   attr->insns_cnt = insns_cnt;
690 
691   if (attr->log_level > 0) {
692     if (log_buf_size > 0) {
693       // Use user-provided log buffer if available.
694       log_buf[0] = 0;
695       attr_log_buf = log_buf;
696       attr_log_buf_size = log_buf_size;
697     } else {
698       // Create and use temporary log buffer if user didn't provide one.
699       tmp_log_buf_size = LOG_BUF_SIZE;
700       tmp_log_buf = malloc(tmp_log_buf_size);
701       if (!tmp_log_buf) {
702         fprintf(stderr, "bpf: Failed to allocate temporary log buffer: %s\n\n",
703                 strerror(errno));
704         attr->log_level = 0;
705       } else {
706         tmp_log_buf[0] = 0;
707         attr_log_buf = tmp_log_buf;
708         attr_log_buf_size = tmp_log_buf_size;
709       }
710     }
711   }
712 
713   if (name_len) {
714     if (strncmp(attr->name, "kprobe__", 8) == 0)
715       name_offset = 8;
716     else if (strncmp(attr->name, "kretprobe__", 11) == 0)
717       name_offset = 11;
718     else if (strncmp(attr->name, "tracepoint__", 12) == 0)
719       name_offset = 12;
720     else if (strncmp(attr->name, "raw_tracepoint__", 16) == 0)
721       name_offset = 16;
722     else if (strncmp(attr->name, "kfunc__", 7) == 0) {
723       name_offset = 7;
724       expected_attach_type = BPF_TRACE_FENTRY;
725     } else if (strncmp(attr->name, "kmod_ret__", 10) == 0) {
726       name_offset = 10;
727       expected_attach_type = BPF_MODIFY_RETURN;
728     } else if (strncmp(attr->name, "kretfunc__", 10) == 0) {
729       name_offset = 10;
730       expected_attach_type = BPF_TRACE_FEXIT;
731     } else if (strncmp(attr->name, "lsm__", 5) == 0) {
732       name_offset = 5;
733       expected_attach_type = BPF_LSM_MAC;
734     } else if (strncmp(attr->name, "bpf_iter__", 10) == 0) {
735       name_offset = 10;
736       expected_attach_type = BPF_TRACE_ITER;
737     }
738 
739     if (attr->prog_type == BPF_PROG_TYPE_TRACING ||
740         attr->prog_type == BPF_PROG_TYPE_LSM) {
741 #ifdef MINIMAL_LIBBPF
742       fprintf(stderr, "vmlinux BTF not supported in this build of libbpf\n");
743       return -1;
744 #else
745       ret = libbpf_find_vmlinux_btf_id(attr->name + name_offset,
746                                        expected_attach_type);
747       if (ret == -EINVAL) {
748         fprintf(stderr, "bpf: vmlinux BTF is not found\n");
749         return ret;
750       } else if (ret < 0) {
751         fprintf(stderr, "bpf: %s is not found in vmlinux BTF\n",
752                 attr->name + name_offset);
753         return ret;
754       }
755 
756       attr->attach_btf_id = ret;
757       attr->expected_attach_type = expected_attach_type;
758 #endif
759     }
760 
761     memcpy(prog_name, attr->name + name_offset,
762            min(name_len - name_offset, BPF_OBJ_NAME_LEN - 1));
763     attr->name = prog_name;
764   }
765 
766   ret = libbpf_bpf_prog_load(attr, attr_log_buf, attr_log_buf_size);
767 
768   // func_info/line_info may not be supported in old kernels.
769   if (ret < 0 && attr->func_info && errno == EINVAL) {
770     attr->prog_btf_fd = 0;
771     attr->func_info = NULL;
772     attr->func_info_cnt = 0;
773     attr->func_info_rec_size = 0;
774     attr->line_info = NULL;
775     attr->line_info_cnt = 0;
776     attr->line_info_rec_size = 0;
777     ret = libbpf_bpf_prog_load(attr, attr_log_buf, attr_log_buf_size);
778   }
779 
780   // BPF object name is not supported on older Kernels.
781   // If we failed due to this, clear the name and try again.
782   if (ret < 0 && name_len && (errno == E2BIG || errno == EINVAL)) {
783     prog_name[0] = '\0';
784     ret = libbpf_bpf_prog_load(attr, attr_log_buf, attr_log_buf_size);
785   }
786 
787   if (ret < 0 && errno == EPERM) {
788     if (!allow_rlimit)
789       return ret;
790 
791     // When EPERM is returned, two reasons are possible:
792     //  1. user has no permissions for bpf()
793     //  2. user has insufficent rlimit for locked memory
794     // Unfortunately, there is no api to inspect the current usage of locked
795     // mem for the user, so an accurate calculation of how much memory to lock
796     // for this new program is difficult to calculate. As a hack, bump the limit
797     // to unlimited. If program load fails again, return the error.
798     struct rlimit rl = {};
799     if (getrlimit(RLIMIT_MEMLOCK, &rl) == 0) {
800       rl.rlim_max = RLIM_INFINITY;
801       rl.rlim_cur = rl.rlim_max;
802       if (setrlimit(RLIMIT_MEMLOCK, &rl) == 0)
803         ret = libbpf_bpf_prog_load(attr, attr_log_buf, attr_log_buf_size);
804     }
805   }
806 
807   if (ret < 0 && errno == E2BIG) {
808     fprintf(stderr,
809             "bpf: %s. Program %s too large (%u insns), at most %d insns\n\n",
810             strerror(errno), attr->name, insns_cnt, BPF_MAXINSNS);
811     return -1;
812   }
813 
814   // The load has failed. Handle log message.
815   if (ret < 0) {
816     // User has provided a log buffer.
817     if (log_buf_size) {
818       // If logging is not already enabled, enable it and do the syscall again.
819       if (attr->log_level == 0) {
820         attr->log_level = 1;
821         ret = libbpf_bpf_prog_load(attr, log_buf, log_buf_size);
822       }
823       // Print the log message and return.
824       bpf_print_hints(ret, log_buf);
825       if (errno == ENOSPC)
826         fprintf(stderr, "bpf: log_buf size may be insufficient\n");
827       goto return_result;
828     }
829 
830     // User did not provide log buffer. We will try to increase size of
831     // our temporary log buffer to get full error message.
832     if (tmp_log_buf)
833       free(tmp_log_buf);
834     tmp_log_buf_size = LOG_BUF_SIZE;
835     if (attr->log_level == 0)
836       attr->log_level = 1;
837     for (;;) {
838       tmp_log_buf = malloc(tmp_log_buf_size);
839       if (!tmp_log_buf) {
840         fprintf(stderr, "bpf: Failed to allocate temporary log buffer: %s\n\n",
841                 strerror(errno));
842         goto return_result;
843       }
844       tmp_log_buf[0] = 0;
845       ret = libbpf_bpf_prog_load(attr, tmp_log_buf, tmp_log_buf_size);
846       if (ret < 0 && errno == ENOSPC) {
847         // Temporary buffer size is not enough. Double it and try again.
848         free(tmp_log_buf);
849         tmp_log_buf = NULL;
850         tmp_log_buf_size <<= 1;
851       } else {
852         break;
853       }
854     }
855   }
856 
857   // Check if we should print the log message if log_level is not 0,
858   // either specified by user or set due to error.
859   if (attr->log_level > 0) {
860     // Don't print if user enabled logging and provided log buffer,
861     // but there is no error.
862     if (log_buf && ret < 0)
863       bpf_print_hints(ret, log_buf);
864     else if (tmp_log_buf)
865       bpf_print_hints(ret, tmp_log_buf);
866   }
867 
868 return_result:
869   if (tmp_log_buf)
870     free(tmp_log_buf);
871   return ret;
872 }
873 
bcc_prog_load(enum bpf_prog_type prog_type,const char * name,const struct bpf_insn * insns,int prog_len,const char * license,unsigned kern_version,int log_level,char * log_buf,unsigned log_buf_size)874 int bcc_prog_load(enum bpf_prog_type prog_type, const char *name,
875                   const struct bpf_insn *insns, int prog_len,
876                   const char *license, unsigned kern_version,
877                   int log_level, char *log_buf, unsigned log_buf_size)
878 {
879   struct bpf_load_program_attr attr = {};
880 
881   attr.prog_type = prog_type;
882   attr.name = name;
883   attr.insns = insns;
884   attr.license = license;
885   if (prog_type != BPF_PROG_TYPE_TRACING && prog_type != BPF_PROG_TYPE_EXT)
886     attr.kern_version = kern_version;
887   attr.log_level = log_level;
888   return bcc_prog_load_xattr(&attr, prog_len, log_buf, log_buf_size, true);
889 }
890 
bpf_open_raw_sock(const char * name)891 int bpf_open_raw_sock(const char *name)
892 {
893   struct sockaddr_ll sll;
894   int sock;
895 
896   sock = socket(PF_PACKET, SOCK_RAW | SOCK_NONBLOCK | SOCK_CLOEXEC, htons(ETH_P_ALL));
897   if (sock < 0) {
898     fprintf(stderr, "cannot create raw socket\n");
899     return -1;
900   }
901 
902   /* Do not bind on empty interface names */
903   if (!name || *name == '\0')
904     return sock;
905 
906   memset(&sll, 0, sizeof(sll));
907   sll.sll_family = AF_PACKET;
908   sll.sll_ifindex = if_nametoindex(name);
909   if (sll.sll_ifindex == 0) {
910     fprintf(stderr, "bpf: Resolving device name to index: %s\n", strerror(errno));
911     close(sock);
912     return -1;
913   }
914   sll.sll_protocol = htons(ETH_P_ALL);
915   if (bind(sock, (struct sockaddr *)&sll, sizeof(sll)) < 0) {
916     fprintf(stderr, "bind to %s: %s\n", name, strerror(errno));
917     close(sock);
918     return -1;
919   }
920 
921   return sock;
922 }
923 
bpf_attach_socket(int sock,int prog)924 int bpf_attach_socket(int sock, int prog) {
925   return setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog, sizeof(prog));
926 }
927 
928 #define PMU_TYPE_FILE "/sys/bus/event_source/devices/%s/type"
bpf_find_probe_type(const char * event_type)929 static int bpf_find_probe_type(const char *event_type)
930 {
931   int fd;
932   int ret;
933   char buf[PATH_MAX];
934 
935   ret = snprintf(buf, sizeof(buf), PMU_TYPE_FILE, event_type);
936   if (ret < 0 || ret >= (int)sizeof(buf))
937     return -1;
938 
939   fd = open(buf, O_RDONLY | O_CLOEXEC);
940   if (fd < 0)
941     return -1;
942   ret = read(fd, buf, sizeof(buf));
943   close(fd);
944   if (ret < 0 || ret >= (int)sizeof(buf))
945     return -1;
946   errno = 0;
947   ret = (int)strtol(buf, NULL, 10);
948   return errno ? -1 : ret;
949 }
950 
951 #define PMU_RETPROBE_FILE "/sys/bus/event_source/devices/%s/format/retprobe"
bpf_get_retprobe_bit(const char * event_type)952 static int bpf_get_retprobe_bit(const char *event_type)
953 {
954   int fd;
955   int ret;
956   char buf[PATH_MAX];
957 
958   ret = snprintf(buf, sizeof(buf), PMU_RETPROBE_FILE, event_type);
959   if (ret < 0 || ret >= (int)sizeof(buf))
960     return -1;
961 
962   fd = open(buf, O_RDONLY | O_CLOEXEC);
963   if (fd < 0)
964     return -1;
965   ret = read(fd, buf, sizeof(buf));
966   close(fd);
967   if (ret < 0 || ret >= (int)sizeof(buf))
968     return -1;
969   if (strncmp(buf, "config:", strlen("config:")))
970     return -1;
971   errno = 0;
972   ret = (int)strtol(buf + strlen("config:"), NULL, 10);
973   return errno ? -1 : ret;
974 }
975 
976 /*
977  * Kernel API with e12f03d ("perf/core: Implement the 'perf_kprobe' PMU") allows
978  * creating [k,u]probe with perf_event_open, which makes it easier to clean up
979  * the [k,u]probe. This function tries to create pfd with the perf_kprobe PMU.
980  */
bpf_try_perf_event_open_with_probe(const char * name,uint64_t offs,int pid,const char * event_type,int is_return,uint64_t ref_ctr_offset)981 static int bpf_try_perf_event_open_with_probe(const char *name, uint64_t offs,
982              int pid, const char *event_type, int is_return,
983              uint64_t ref_ctr_offset)
984 {
985   struct perf_event_attr attr = {};
986   int type = bpf_find_probe_type(event_type);
987   int is_return_bit = bpf_get_retprobe_bit(event_type);
988   int cpu = 0;
989 
990   if (type < 0 || is_return_bit < 0)
991     return -1;
992   attr.sample_period = 1;
993   attr.wakeup_events = 1;
994   if (is_return)
995     attr.config |= 1 << is_return_bit;
996   attr.config |= (ref_ctr_offset << PERF_UPROBE_REF_CTR_OFFSET_SHIFT);
997 
998   /*
999    * struct perf_event_attr in latest perf_event.h has the following
1000    * extension to config1 and config2. To keep bcc compatibe with
1001    * older perf_event.h, we use config1 and config2 here instead of
1002    * kprobe_func, uprobe_path, kprobe_addr, and probe_offset.
1003    *
1004    * union {
1005    *  __u64 bp_addr;
1006    *  __u64 kprobe_func;
1007    *  __u64 uprobe_path;
1008    *  __u64 config1;
1009    * };
1010    * union {
1011    *   __u64 bp_len;
1012    *   __u64 kprobe_addr;
1013    *   __u64 probe_offset;
1014    *   __u64 config2;
1015    * };
1016    */
1017   attr.config2 = offs;  /* config2 here is kprobe_addr or probe_offset */
1018   attr.size = sizeof(attr);
1019   attr.type = type;
1020   /* config1 here is kprobe_func or  uprobe_path */
1021   attr.config1 = ptr_to_u64((void *)name);
1022   // PID filter is only possible for uprobe events.
1023   if (pid < 0)
1024     pid = -1;
1025   // perf_event_open API doesn't allow both pid and cpu to be -1.
1026   // So only set it to -1 when PID is not -1.
1027   // Tracing events do not do CPU filtering in any cases.
1028   if (pid != -1)
1029     cpu = -1;
1030   return syscall(__NR_perf_event_open, &attr, pid, cpu, -1 /* group_fd */,
1031                  PERF_FLAG_FD_CLOEXEC);
1032 }
1033 
1034 // When a valid Perf Event FD provided through pfd, it will be used to enable
1035 // and attach BPF program to the event, and event_path will be ignored.
1036 // Otherwise, event_path is expected to contain the path to the event in debugfs
1037 // and it will be used to open the Perf Event FD.
1038 // In either case, if the attach partially failed (such as issue with the
1039 // ioctl operations), the **caller** need to clean up the Perf Event FD, either
1040 // provided by the caller or opened here.
bpf_attach_tracing_event(int progfd,const char * event_path,int pid,int * pfd)1041 static int bpf_attach_tracing_event(int progfd, const char *event_path, int pid,
1042                                     int *pfd)
1043 {
1044   int efd, cpu = 0;
1045   ssize_t bytes;
1046   char buf[PATH_MAX];
1047   struct perf_event_attr attr = {};
1048   // Caller did not provided a valid Perf Event FD. Create one with the debugfs
1049   // event path provided.
1050   if (*pfd < 0) {
1051     snprintf(buf, sizeof(buf), "%s/id", event_path);
1052     efd = open(buf, O_RDONLY | O_CLOEXEC, 0);
1053     if (efd < 0) {
1054       fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
1055       return -1;
1056     }
1057 
1058     bytes = read(efd, buf, sizeof(buf));
1059     if (bytes <= 0 || bytes >= (int)sizeof(buf)) {
1060       fprintf(stderr, "read(%s): %s\n", buf, strerror(errno));
1061       close(efd);
1062       return -1;
1063     }
1064     close(efd);
1065     buf[bytes] = '\0';
1066     attr.config = strtol(buf, NULL, 0);
1067     attr.type = PERF_TYPE_TRACEPOINT;
1068     attr.sample_period = 1;
1069     attr.wakeup_events = 1;
1070     // PID filter is only possible for uprobe events.
1071     if (pid < 0)
1072       pid = -1;
1073     // perf_event_open API doesn't allow both pid and cpu to be -1.
1074     // So only set it to -1 when PID is not -1.
1075     // Tracing events do not do CPU filtering in any cases.
1076     if (pid != -1)
1077       cpu = -1;
1078     *pfd = syscall(__NR_perf_event_open, &attr, pid, cpu, -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
1079     if (*pfd < 0) {
1080       fprintf(stderr, "perf_event_open(%s/id): %s\n", event_path, strerror(errno));
1081       return -1;
1082     }
1083   }
1084 
1085   if (ioctl(*pfd, PERF_EVENT_IOC_SET_BPF, progfd) < 0) {
1086     perror("ioctl(PERF_EVENT_IOC_SET_BPF)");
1087     return -1;
1088   }
1089   if (ioctl(*pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
1090     perror("ioctl(PERF_EVENT_IOC_ENABLE)");
1091     return -1;
1092   }
1093 
1094   return 0;
1095 }
1096 
1097 /* Creates an [uk]probe using debugfs.
1098  * On success, the path to the probe is placed in buf (which is assumed to be of size PATH_MAX).
1099  */
create_probe_event(char * buf,const char * ev_name,enum bpf_probe_attach_type attach_type,const char * config1,uint64_t offset,const char * event_type,pid_t pid,int maxactive)1100 static int create_probe_event(char *buf, const char *ev_name,
1101                               enum bpf_probe_attach_type attach_type,
1102                               const char *config1, uint64_t offset,
1103                               const char *event_type, pid_t pid, int maxactive)
1104 {
1105   int kfd = -1, res = -1;
1106   char ev_alias[256];
1107   bool is_kprobe = strncmp("kprobe", event_type, 6) == 0;
1108   bool use_debugfs = false;
1109 
1110   snprintf(buf, PATH_MAX, "/sys/kernel/tracing/%s_events", event_type);
1111   kfd = open(buf, O_WRONLY | O_APPEND | O_CLOEXEC, 0);
1112   if (kfd < 0) {
1113     use_debugfs = true;
1114     snprintf(buf, PATH_MAX, "/sys/kernel/debug/tracing/%s_events", event_type);
1115     kfd = open(buf, O_WRONLY | O_APPEND | O_CLOEXEC, 0);
1116     if (kfd < 0) {
1117       fprintf(stderr, "%s: open(%s): %s\n", __func__, buf,
1118               strerror(errno));
1119       return -1;
1120     }
1121   }
1122 
1123   res = snprintf(ev_alias, sizeof(ev_alias), "%s_bcc_%d", ev_name, getpid());
1124   if (res < 0 || res >= sizeof(ev_alias)) {
1125     fprintf(stderr, "Event name (%s) is too long for buffer\n", ev_name);
1126     close(kfd);
1127     goto error;
1128   }
1129 
1130   if (is_kprobe) {
1131     if (offset > 0 && attach_type == BPF_PROBE_ENTRY)
1132       snprintf(buf, PATH_MAX, "p:kprobes/%s %s+%"PRIu64,
1133                ev_alias, config1, offset);
1134     else if (maxactive > 0 && attach_type == BPF_PROBE_RETURN)
1135       snprintf(buf, PATH_MAX, "r%d:kprobes/%s %s",
1136                maxactive, ev_alias, config1);
1137     else
1138       snprintf(buf, PATH_MAX, "%c:kprobes/%s %s",
1139                attach_type == BPF_PROBE_ENTRY ? 'p' : 'r',
1140                ev_alias, config1);
1141   } else {
1142     res = snprintf(buf, PATH_MAX, "%c:%ss/%s %s:0x%lx", attach_type==BPF_PROBE_ENTRY ? 'p' : 'r',
1143                    event_type, ev_alias, config1, (unsigned long)offset);
1144     if (res < 0 || res >= PATH_MAX) {
1145       fprintf(stderr, "Event alias (%s) too long for buffer\n", ev_alias);
1146       close(kfd);
1147       return -1;
1148     }
1149   }
1150 
1151   if (write(kfd, buf, strlen(buf)) < 0) {
1152     if (errno == ENOENT)
1153       fprintf(stderr, "cannot attach %s, probe entry may not exist\n", event_type);
1154     else
1155       fprintf(stderr, "cannot attach %s, %s\n", event_type, strerror(errno));
1156     close(kfd);
1157     goto error;
1158   }
1159   close(kfd);
1160   if (use_debugfs) {
1161     snprintf(buf, PATH_MAX, "/sys/kernel/debug/tracing/events/%ss/%s",
1162              event_type, ev_alias);
1163   } else {
1164     snprintf(buf, PATH_MAX, "/sys/kernel/tracing/events/%ss/%s",
1165              event_type, ev_alias);
1166    }
1167   return 0;
1168 error:
1169   return -1;
1170 }
1171 
1172 // config1 could be either kprobe_func or uprobe_path,
1173 // see bpf_try_perf_event_open_with_probe().
bpf_attach_probe(int progfd,enum bpf_probe_attach_type attach_type,const char * ev_name,const char * config1,const char * event_type,uint64_t offset,pid_t pid,int maxactive,uint32_t ref_ctr_offset)1174 static int bpf_attach_probe(int progfd, enum bpf_probe_attach_type attach_type,
1175                             const char *ev_name, const char *config1, const char* event_type,
1176                             uint64_t offset, pid_t pid, int maxactive,
1177                             uint32_t ref_ctr_offset)
1178 {
1179   int kfd, pfd = -1;
1180   char buf[PATH_MAX], fname[256];
1181   bool is_kprobe = strncmp("kprobe", event_type, 6) == 0;
1182 
1183   if (maxactive <= 0)
1184     // Try create the [k,u]probe Perf Event with perf_event_open API.
1185     pfd = bpf_try_perf_event_open_with_probe(config1, offset, pid, event_type,
1186                                              attach_type != BPF_PROBE_ENTRY,
1187                                              ref_ctr_offset);
1188 
1189   // If failed, most likely Kernel doesn't support the perf_kprobe PMU
1190   // (e12f03d "perf/core: Implement the 'perf_kprobe' PMU") yet.
1191   // Try create the event using debugfs.
1192   if (pfd < 0) {
1193     if (create_probe_event(buf, ev_name, attach_type, config1, offset,
1194                            event_type, pid, maxactive) < 0)
1195       goto error;
1196 
1197     // If we're using maxactive, we need to check that the event was created
1198     // under the expected name.  If debugfs doesn't support maxactive yet
1199     // (kernel < 4.12), the event is created under a different name; we need to
1200     // delete that event and start again without maxactive.
1201     if (is_kprobe && maxactive > 0 && attach_type == BPF_PROBE_RETURN) {
1202       if (snprintf(fname, sizeof(fname), "%s/id", buf) >= sizeof(fname)) {
1203         fprintf(stderr, "filename (%s) is too long for buffer\n", buf);
1204         goto error;
1205       }
1206       if (access(fname, F_OK) == -1) {
1207         // Deleting kprobe event with incorrect name.
1208         kfd = open("/sys/kernel/tracing/kprobe_events",
1209                    O_WRONLY | O_APPEND | O_CLOEXEC, 0);
1210         if (kfd < 0) {
1211           kfd = open("/sys/kernel/debug/tracing/kprobe_events",
1212                      O_WRONLY | O_APPEND | O_CLOEXEC, 0);
1213           if (kfd < 0) {
1214             fprintf(stderr, "open(/sys/kernel/debug/tracing/kprobe_events): %s\n",
1215                     strerror(errno));
1216             return -1;
1217           }
1218         }
1219         snprintf(fname, sizeof(fname), "-:kprobes/%s_0", ev_name);
1220         if (write(kfd, fname, strlen(fname)) < 0) {
1221           if (errno == ENOENT)
1222             fprintf(stderr, "cannot detach kprobe, probe entry may not exist\n");
1223           else
1224             fprintf(stderr, "cannot detach kprobe, %s\n", strerror(errno));
1225           close(kfd);
1226           goto error;
1227         }
1228         close(kfd);
1229 
1230         // Re-creating kprobe event without maxactive.
1231         if (create_probe_event(buf, ev_name, attach_type, config1,
1232                                offset, event_type, pid, 0) < 0)
1233           goto error;
1234       }
1235     }
1236   }
1237   // If perf_event_open succeeded, bpf_attach_tracing_event will use the created
1238   // Perf Event FD directly and buf would be empty and unused.
1239   // Otherwise it will read the event ID from the path in buf, create the
1240   // Perf Event event using that ID, and updated value of pfd.
1241   if (bpf_attach_tracing_event(progfd, buf, pid, &pfd) == 0)
1242     return pfd;
1243 
1244 error:
1245   bpf_close_perf_event_fd(pfd);
1246   return -1;
1247 }
1248 
bpf_attach_kprobe(int progfd,enum bpf_probe_attach_type attach_type,const char * ev_name,const char * fn_name,uint64_t fn_offset,int maxactive)1249 int bpf_attach_kprobe(int progfd, enum bpf_probe_attach_type attach_type,
1250                       const char *ev_name, const char *fn_name,
1251                       uint64_t fn_offset, int maxactive)
1252 {
1253   return bpf_attach_probe(progfd, attach_type,
1254                           ev_name, fn_name, "kprobe",
1255                           fn_offset, -1, maxactive, 0);
1256 }
1257 
bpf_attach_uprobe(int progfd,enum bpf_probe_attach_type attach_type,const char * ev_name,const char * binary_path,uint64_t offset,pid_t pid,uint32_t ref_ctr_offset)1258 int bpf_attach_uprobe(int progfd, enum bpf_probe_attach_type attach_type,
1259                       const char *ev_name, const char *binary_path,
1260                       uint64_t offset, pid_t pid, uint32_t ref_ctr_offset)
1261 {
1262 
1263   return bpf_attach_probe(progfd, attach_type,
1264                           ev_name, binary_path, "uprobe",
1265                           offset, pid, -1, ref_ctr_offset);
1266 }
1267 
bpf_detach_probe(const char * ev_name,const char * event_type)1268 static int bpf_detach_probe(const char *ev_name, const char *event_type)
1269 {
1270   int kfd = -1, res;
1271   char buf[PATH_MAX];
1272   int found_event = 0;
1273   size_t bufsize = 0;
1274   char *cptr = NULL;
1275   FILE *fp;
1276   bool use_debugfs = false;
1277 
1278   /*
1279    * For [k,u]probe created with perf_event_open (on newer kernel), it is
1280    * not necessary to clean it up in [k,u]probe_events. We first look up
1281    * the %s_bcc_%d line in [k,u]probe_events. If the event is not found,
1282    * it is safe to skip the cleaning up process (write -:... to the file).
1283    */
1284   snprintf(buf, sizeof(buf), "/sys/kernel/tracing/%s_events", event_type);
1285   fp = fopen(buf, "re");
1286   if (!fp) {
1287     use_debugfs = true;
1288     snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", event_type);
1289     fp = fopen(buf, "re");
1290     if (!fp) {
1291       fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
1292       goto error;
1293     }
1294   }
1295 
1296   res = snprintf(buf, sizeof(buf), "%ss/%s_bcc_%d", event_type, ev_name, getpid());
1297   if (res < 0 || res >= (int)sizeof(buf)) {
1298     fprintf(stderr, "snprintf(%s): %d\n", ev_name, res);
1299     goto error;
1300   }
1301 
1302   while (getline(&cptr, &bufsize, fp) != -1)
1303     if (strstr(cptr, buf) != NULL) {
1304       found_event = 1;
1305       break;
1306     }
1307   free(cptr);
1308   fclose(fp);
1309   fp = NULL;
1310 
1311   if (!found_event)
1312     return 0;
1313 
1314   if (use_debugfs) {
1315     snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", event_type);
1316   } else {
1317     snprintf(buf, sizeof(buf), "/sys/kernel/tracing/%s_events", event_type);
1318   }
1319   kfd = open(buf, O_WRONLY | O_APPEND | O_CLOEXEC, 0);
1320   if (kfd < 0) {
1321     fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
1322     goto error;
1323   }
1324 
1325   res = snprintf(buf, sizeof(buf), "-:%ss/%s_bcc_%d", event_type, ev_name, getpid());
1326   if (res < 0 || res >= (int)sizeof(buf)) {
1327     fprintf(stderr, "snprintf(%s): %d\n", ev_name, res);
1328     goto error;
1329   }
1330   if (write(kfd, buf, strlen(buf)) < 0) {
1331     fprintf(stderr, "write(%s): %s\n", buf, strerror(errno));
1332     goto error;
1333   }
1334 
1335   close(kfd);
1336   return 0;
1337 
1338 error:
1339   if (kfd >= 0)
1340     close(kfd);
1341   if (fp)
1342     fclose(fp);
1343   return -1;
1344 }
1345 
bpf_detach_kprobe(const char * ev_name)1346 int bpf_detach_kprobe(const char *ev_name)
1347 {
1348   return bpf_detach_probe(ev_name, "kprobe");
1349 }
1350 
bpf_detach_uprobe(const char * ev_name)1351 int bpf_detach_uprobe(const char *ev_name)
1352 {
1353   return bpf_detach_probe(ev_name, "uprobe");
1354 }
1355 
bpf_attach_tracepoint(int progfd,const char * tp_category,const char * tp_name)1356 int bpf_attach_tracepoint(int progfd, const char *tp_category,
1357                           const char *tp_name)
1358 {
1359   char buf[256];
1360   int pfd = -1;
1361 
1362   snprintf(buf, sizeof(buf), "/sys/kernel/tracing/events/%s/%s",
1363            tp_category, tp_name);
1364   if (bpf_attach_tracing_event(progfd, buf, -1 /* PID */, &pfd) == 0)
1365     return pfd;
1366 
1367   snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%s/%s",
1368            tp_category, tp_name);
1369   if (bpf_attach_tracing_event(progfd, buf, -1 /* PID */, &pfd) == 0)
1370     return pfd;
1371 
1372   bpf_close_perf_event_fd(pfd);
1373   return -1;
1374 }
1375 
bpf_detach_tracepoint(const char * tp_category,const char * tp_name)1376 int bpf_detach_tracepoint(const char *tp_category, const char *tp_name) {
1377   UNUSED(tp_category);
1378   UNUSED(tp_name);
1379   // Right now, there is nothing to do, but it's a good idea to encourage
1380   // callers to detach anything they attach.
1381   return 0;
1382 }
1383 
bpf_attach_raw_tracepoint(int progfd,const char * tp_name)1384 int bpf_attach_raw_tracepoint(int progfd, const char *tp_name)
1385 {
1386   int ret;
1387 
1388   ret = bpf_raw_tracepoint_open(tp_name, progfd);
1389   if (ret < 0)
1390     fprintf(stderr, "bpf_attach_raw_tracepoint (%s): %s\n", tp_name, strerror(errno));
1391   return ret;
1392 }
1393 
1394 #ifndef MINIMAL_LIBBPF
bpf_has_kernel_btf(void)1395 bool bpf_has_kernel_btf(void)
1396 {
1397   struct btf *btf;
1398   int err;
1399 
1400   btf = btf__parse_raw("/sys/kernel/btf/vmlinux");
1401   err = libbpf_get_error(btf);
1402   if (err)
1403     return false;
1404 
1405   btf__free(btf);
1406   return true;
1407 }
1408 
kernel_struct_has_field(const char * struct_name,const char * field_name)1409 int kernel_struct_has_field(const char *struct_name, const char *field_name)
1410 {
1411   const struct btf_type *btf_type;
1412   const struct btf_member *btf_member;
1413   struct btf *btf;
1414   int i, ret, btf_id;
1415 
1416   btf = btf__load_vmlinux_btf();
1417   ret = libbpf_get_error(btf);
1418   if (ret)
1419     return -1;
1420 
1421   btf_id = btf__find_by_name_kind(btf, struct_name, BTF_KIND_STRUCT);
1422   if (btf_id < 0) {
1423     ret = -1;
1424     goto cleanup;
1425   }
1426 
1427   btf_type = btf__type_by_id(btf, btf_id);
1428   btf_member = btf_members(btf_type);
1429   for (i = 0; i < btf_vlen(btf_type); i++, btf_member++) {
1430     if (!strcmp(btf__name_by_offset(btf, btf_member->name_off), field_name)) {
1431       ret = 1;
1432       goto cleanup;
1433     }
1434   }
1435   ret = 0;
1436 
1437 cleanup:
1438   btf__free(btf);
1439   return ret;
1440 }
1441 #endif
1442 
bpf_attach_kfunc(int prog_fd)1443 int bpf_attach_kfunc(int prog_fd)
1444 {
1445   int ret;
1446 
1447   ret = bpf_raw_tracepoint_open(NULL, prog_fd);
1448   if (ret < 0)
1449     fprintf(stderr, "bpf_attach_raw_tracepoint (kfunc): %s\n", strerror(errno));
1450   return ret;
1451 }
1452 
bpf_attach_lsm(int prog_fd)1453 int bpf_attach_lsm(int prog_fd)
1454 {
1455   int ret;
1456 
1457   ret = bpf_raw_tracepoint_open(NULL, prog_fd);
1458   if (ret < 0)
1459     fprintf(stderr, "bpf_attach_raw_tracepoint (lsm): %s\n", strerror(errno));
1460   return ret;
1461 }
1462 
bpf_open_perf_buffer(perf_reader_raw_cb raw_cb,perf_reader_lost_cb lost_cb,void * cb_cookie,int pid,int cpu,int page_cnt)1463 void * bpf_open_perf_buffer(perf_reader_raw_cb raw_cb,
1464                             perf_reader_lost_cb lost_cb, void *cb_cookie,
1465                             int pid, int cpu, int page_cnt)
1466 {
1467   struct bcc_perf_buffer_opts opts = {
1468     .pid = pid,
1469     .cpu = cpu,
1470     .wakeup_events = 1,
1471   };
1472 
1473   return bpf_open_perf_buffer_opts(raw_cb, lost_cb, cb_cookie, page_cnt, &opts);
1474 }
1475 
bpf_open_perf_buffer_opts(perf_reader_raw_cb raw_cb,perf_reader_lost_cb lost_cb,void * cb_cookie,int page_cnt,struct bcc_perf_buffer_opts * opts)1476 void * bpf_open_perf_buffer_opts(perf_reader_raw_cb raw_cb,
1477                             perf_reader_lost_cb lost_cb, void *cb_cookie,
1478                             int page_cnt, struct bcc_perf_buffer_opts *opts)
1479 {
1480   int pfd, pid = opts->pid, cpu = opts->cpu;
1481   struct perf_event_attr attr = {};
1482   struct perf_reader *reader = NULL;
1483 
1484   reader = perf_reader_new(raw_cb, lost_cb, cb_cookie, page_cnt);
1485   if (!reader)
1486     goto error;
1487 
1488   attr.config = 10;//PERF_COUNT_SW_BPF_OUTPUT;
1489   attr.type = PERF_TYPE_SOFTWARE;
1490   attr.sample_type = PERF_SAMPLE_RAW;
1491   attr.sample_period = 1;
1492   attr.wakeup_events = opts->wakeup_events;
1493   pfd = syscall(__NR_perf_event_open, &attr, pid, cpu, -1, PERF_FLAG_FD_CLOEXEC);
1494   if (pfd < 0) {
1495     fprintf(stderr, "perf_event_open: %s\n", strerror(errno));
1496     fprintf(stderr, "   (check your kernel for PERF_COUNT_SW_BPF_OUTPUT support, 4.4 or newer)\n");
1497     goto error;
1498   }
1499   perf_reader_set_fd(reader, pfd);
1500 
1501   if (perf_reader_mmap(reader) < 0)
1502     goto error;
1503 
1504   if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
1505     perror("ioctl(PERF_EVENT_IOC_ENABLE)");
1506     goto error;
1507   }
1508 
1509   return reader;
1510 
1511 error:
1512   if (reader)
1513     perf_reader_free(reader);
1514 
1515   return NULL;
1516 }
1517 
invalid_perf_config(uint32_t type,uint64_t config)1518 static int invalid_perf_config(uint32_t type, uint64_t config) {
1519   switch (type) {
1520   case PERF_TYPE_HARDWARE:
1521     if (config >= PERF_COUNT_HW_MAX) {
1522       fprintf(stderr, "HARDWARE perf event config out of range\n");
1523       goto is_invalid;
1524     }
1525     return 0;
1526   case PERF_TYPE_SOFTWARE:
1527     if (config >= PERF_COUNT_SW_MAX) {
1528       fprintf(stderr, "SOFTWARE perf event config out of range\n");
1529       goto is_invalid;
1530     } else if (config == 10 /* PERF_COUNT_SW_BPF_OUTPUT */) {
1531       fprintf(stderr, "Unable to open or attach perf event for BPF_OUTPUT\n");
1532       goto is_invalid;
1533     }
1534     return 0;
1535   case PERF_TYPE_HW_CACHE:
1536     if (((config >> 16) >= PERF_COUNT_HW_CACHE_RESULT_MAX) ||
1537         (((config >> 8) & 0xff) >= PERF_COUNT_HW_CACHE_OP_MAX) ||
1538         ((config & 0xff) >= PERF_COUNT_HW_CACHE_MAX)) {
1539       fprintf(stderr, "HW_CACHE perf event config out of range\n");
1540       goto is_invalid;
1541     }
1542     return 0;
1543   case PERF_TYPE_TRACEPOINT:
1544   case PERF_TYPE_BREAKPOINT:
1545     fprintf(stderr,
1546             "Unable to open or attach TRACEPOINT or BREAKPOINT events\n");
1547     goto is_invalid;
1548   default:
1549     return 0;
1550   }
1551 is_invalid:
1552   fprintf(stderr, "Invalid perf event type %" PRIu32 " config %" PRIu64 "\n",
1553           type, config);
1554   return 1;
1555 }
1556 
bpf_open_perf_event(uint32_t type,uint64_t config,int pid,int cpu)1557 int bpf_open_perf_event(uint32_t type, uint64_t config, int pid, int cpu) {
1558   int fd;
1559   struct perf_event_attr attr = {};
1560 
1561   if (invalid_perf_config(type, config)) {
1562     return -1;
1563   }
1564 
1565   attr.sample_period = LONG_MAX;
1566   attr.type = type;
1567   attr.config = config;
1568 
1569   fd = syscall(__NR_perf_event_open, &attr, pid, cpu, -1, PERF_FLAG_FD_CLOEXEC);
1570   if (fd < 0) {
1571     fprintf(stderr, "perf_event_open: %s\n", strerror(errno));
1572     return -1;
1573   }
1574 
1575   if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
1576     perror("ioctl(PERF_EVENT_IOC_ENABLE)");
1577     close(fd);
1578     return -1;
1579   }
1580 
1581   return fd;
1582 }
1583 
bpf_attach_xdp(const char * dev_name,int progfd,uint32_t flags)1584 int bpf_attach_xdp(const char *dev_name, int progfd, uint32_t flags) {
1585   int ifindex = if_nametoindex(dev_name);
1586   char err_buf[256];
1587   int ret = -1;
1588 
1589   if (ifindex == 0) {
1590     fprintf(stderr, "bpf: Resolving device name to index: %s\n", strerror(errno));
1591     return -1;
1592   }
1593 
1594   ret = bpf_set_link_xdp_fd(ifindex, progfd, flags);
1595   if (ret) {
1596     libbpf_strerror(ret, err_buf, sizeof(err_buf));
1597     fprintf(stderr, "bpf: Attaching prog to %s: %s\n", dev_name, err_buf);
1598     return -1;
1599   }
1600 
1601   return 0;
1602 }
1603 
bpf_attach_perf_event_raw(int progfd,void * perf_event_attr,pid_t pid,int cpu,int group_fd,unsigned long extra_flags)1604 int bpf_attach_perf_event_raw(int progfd, void *perf_event_attr, pid_t pid,
1605                               int cpu, int group_fd, unsigned long extra_flags) {
1606   int fd = syscall(__NR_perf_event_open, perf_event_attr, pid, cpu, group_fd,
1607                    PERF_FLAG_FD_CLOEXEC | extra_flags);
1608   if (fd < 0) {
1609     perror("perf_event_open failed");
1610     return -1;
1611   }
1612   if (ioctl(fd, PERF_EVENT_IOC_SET_BPF, progfd) != 0) {
1613     perror("ioctl(PERF_EVENT_IOC_SET_BPF) failed");
1614     close(fd);
1615     return -1;
1616   }
1617   if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0) != 0) {
1618     perror("ioctl(PERF_EVENT_IOC_ENABLE) failed");
1619     close(fd);
1620     return -1;
1621   }
1622 
1623   return fd;
1624 }
1625 
bpf_attach_perf_event(int progfd,uint32_t ev_type,uint32_t ev_config,uint64_t sample_period,uint64_t sample_freq,pid_t pid,int cpu,int group_fd)1626 int bpf_attach_perf_event(int progfd, uint32_t ev_type, uint32_t ev_config,
1627                           uint64_t sample_period, uint64_t sample_freq,
1628                           pid_t pid, int cpu, int group_fd) {
1629   if (invalid_perf_config(ev_type, ev_config)) {
1630     return -1;
1631   }
1632   if (!((sample_period > 0) ^ (sample_freq > 0))) {
1633     fprintf(
1634       stderr, "Exactly one of sample_period / sample_freq should be set\n"
1635     );
1636     return -1;
1637   }
1638 
1639   struct perf_event_attr attr = {};
1640   attr.type = ev_type;
1641   attr.config = ev_config;
1642   if (pid > 0)
1643     attr.inherit = 1;
1644   if (sample_freq > 0) {
1645     attr.freq = 1;
1646     attr.sample_freq = sample_freq;
1647   } else {
1648     attr.sample_period = sample_period;
1649   }
1650 
1651   return bpf_attach_perf_event_raw(progfd, &attr, pid, cpu, group_fd, 0);
1652 }
1653 
bpf_close_perf_event_fd(int fd)1654 int bpf_close_perf_event_fd(int fd) {
1655   int res, error = 0;
1656   if (fd >= 0) {
1657     res = ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
1658     if (res != 0) {
1659       perror("ioctl(PERF_EVENT_IOC_DISABLE) failed");
1660       error = res;
1661     }
1662     res = close(fd);
1663     if (res != 0) {
1664       perror("close perf event FD failed");
1665       error = (res && !error) ? res : error;
1666     }
1667   }
1668   return error;
1669 }
1670 
1671 /* Create a new ringbuf manager to manage ringbuf associated with
1672  * map_fd, associating it with callback sample_cb. */
bpf_new_ringbuf(int map_fd,ring_buffer_sample_fn sample_cb,void * ctx)1673 void * bpf_new_ringbuf(int map_fd, ring_buffer_sample_fn sample_cb, void *ctx) {
1674     return ring_buffer__new(map_fd, sample_cb, ctx, NULL);
1675 }
1676 
1677 /* Free the ringbuf manager rb and all ring buffers associated with it. */
bpf_free_ringbuf(struct ring_buffer * rb)1678 void bpf_free_ringbuf(struct ring_buffer *rb) {
1679     ring_buffer__free(rb);
1680 }
1681 
1682 /* Add a new ring buffer associated with map_fd to the ring buffer manager rb,
1683  * associating it with callback sample_cb. */
bpf_add_ringbuf(struct ring_buffer * rb,int map_fd,ring_buffer_sample_fn sample_cb,void * ctx)1684 int bpf_add_ringbuf(struct ring_buffer *rb, int map_fd,
1685                     ring_buffer_sample_fn sample_cb, void *ctx) {
1686     return ring_buffer__add(rb, map_fd, sample_cb, ctx);
1687 }
1688 
1689 /* Poll for available data and consume, if data is available.  Returns number
1690  * of records consumed, or a negative number if any callbacks returned an
1691  * error. */
bpf_poll_ringbuf(struct ring_buffer * rb,int timeout_ms)1692 int bpf_poll_ringbuf(struct ring_buffer *rb, int timeout_ms) {
1693     return ring_buffer__poll(rb, timeout_ms);
1694 }
1695 
1696 /* Consume available data _without_ polling. Good for use cases where low
1697  * latency is desired over performance impact.  Returns number of records
1698  * consumed, or a negative number if any callbacks returned an error. */
bpf_consume_ringbuf(struct ring_buffer * rb)1699 int bpf_consume_ringbuf(struct ring_buffer *rb) {
1700     return ring_buffer__consume(rb);
1701 }
1702 
bcc_iter_attach(int prog_fd,union bpf_iter_link_info * link_info,uint32_t link_info_len)1703 int bcc_iter_attach(int prog_fd, union bpf_iter_link_info *link_info,
1704                     uint32_t link_info_len)
1705 {
1706     DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
1707 
1708     link_create_opts.iter_info = link_info;
1709     link_create_opts.iter_info_len = link_info_len;
1710     return bpf_link_create(prog_fd, 0, BPF_TRACE_ITER, &link_create_opts);
1711 }
1712 
bcc_iter_create(int link_fd)1713 int bcc_iter_create(int link_fd)
1714 {
1715     return bpf_iter_create(link_fd);
1716 }
1717 
bcc_make_parent_dir(const char * path)1718 int bcc_make_parent_dir(const char *path) {
1719   int   err = 0;
1720   char *dname, *dir;
1721 
1722   dname = strdup(path);
1723   if (dname == NULL)
1724     return -ENOMEM;
1725 
1726   dir = dirname(dname);
1727   if (mkdir(dir, 0700) && errno != EEXIST)
1728     err = -errno;
1729 
1730   free(dname);
1731   if (err)
1732     fprintf(stderr, "failed to mkdir %s: %s\n", path, strerror(-err));
1733 
1734   return err;
1735 }
1736 
bcc_check_bpffs_path(const char * path)1737 int bcc_check_bpffs_path(const char *path) {
1738   struct statfs st_fs;
1739   char  *dname, *dir;
1740   int    err = 0;
1741 
1742   if (path == NULL)
1743     return -EINVAL;
1744 
1745   dname = strdup(path);
1746   if (dname == NULL)
1747     return -ENOMEM;
1748 
1749   dir = dirname(dname);
1750   if (statfs(dir, &st_fs)) {
1751     err = -errno;
1752     fprintf(stderr, "failed to statfs %s: %s\n", path, strerror(-err));
1753   }
1754 
1755   free(dname);
1756   if (!err && st_fs.f_type != BPF_FS_MAGIC) {
1757     err = -EINVAL;
1758     fprintf(stderr, "specified path %s is not on BPF FS\n", path);
1759   }
1760 
1761   return err;
1762 }
1763