1 /*
2 * Copyright (c) 2016 Facebook, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <fcntl.h>
18 #include <linux/elf.h>
19 #include <linux/perf_event.h>
20 #include <sys/epoll.h>
21 #include <unistd.h>
22 #include <cerrno>
23 #include <cinttypes>
24 #include <cstdint>
25 #include <cstring>
26 #include <iostream>
27 #include <memory>
28
29 #include "BPFTable.h"
30
31 #include "bcc_exception.h"
32 #include "bcc_syms.h"
33 #include "common.h"
34 #include "file_desc.h"
35 #include "libbpf.h"
36 #include "perf_reader.h"
37
38 namespace ebpf {
39
BPFTable(const TableDesc & desc)40 BPFTable::BPFTable(const TableDesc& desc) : BPFTableBase<void, void>(desc) {}
41
get_value(const std::string & key_str,std::string & value_str)42 StatusTuple BPFTable::get_value(const std::string& key_str,
43 std::string& value_str) {
44 char key[desc.key_size];
45 char value[desc.leaf_size];
46
47 StatusTuple r(0);
48
49 r = string_to_key(key_str, key);
50 if (r.code() != 0)
51 return r;
52
53 if (!lookup(key, value))
54 return StatusTuple(-1, "error getting value");
55
56 return leaf_to_string(value, value_str);
57 }
58
get_value(const std::string & key_str,std::vector<std::string> & value_str)59 StatusTuple BPFTable::get_value(const std::string& key_str,
60 std::vector<std::string>& value_str) {
61 size_t ncpus = get_possible_cpus().size();
62 char key[desc.key_size];
63 char value[desc.leaf_size * ncpus];
64
65 StatusTuple r(0);
66
67 r = string_to_key(key_str, key);
68 if (r.code() != 0)
69 return r;
70
71 if (!lookup(key, value))
72 return StatusTuple(-1, "error getting value");
73
74 value_str.resize(ncpus);
75
76 for (size_t i = 0; i < ncpus; i++) {
77 r = leaf_to_string(value + i * desc.leaf_size, value_str.at(i));
78 if (r.code() != 0)
79 return r;
80 }
81 return StatusTuple(0);
82 }
83
update_value(const std::string & key_str,const std::string & value_str)84 StatusTuple BPFTable::update_value(const std::string& key_str,
85 const std::string& value_str) {
86 char key[desc.key_size];
87 char value[desc.leaf_size];
88
89 StatusTuple r(0);
90
91 r = string_to_key(key_str, key);
92 if (r.code() != 0)
93 return r;
94
95 r = string_to_leaf(value_str, value);
96 if (r.code() != 0)
97 return r;
98
99 if (!update(key, value))
100 return StatusTuple(-1, "error updating element");
101
102 return StatusTuple(0);
103 }
104
update_value(const std::string & key_str,const std::vector<std::string> & value_str)105 StatusTuple BPFTable::update_value(const std::string& key_str,
106 const std::vector<std::string>& value_str) {
107 size_t ncpus = get_possible_cpus().size();
108 char key[desc.key_size];
109 char value[desc.leaf_size * ncpus];
110
111 StatusTuple r(0);
112
113 r = string_to_key(key_str, key);
114 if (r.code() != 0)
115 return r;
116
117 if (value_str.size() != ncpus)
118 return StatusTuple(-1, "bad value size");
119
120 for (size_t i = 0; i < ncpus; i++) {
121 r = string_to_leaf(value_str.at(i), value + i * desc.leaf_size);
122 if (r.code() != 0)
123 return r;
124 }
125
126 if (!update(key, value))
127 return StatusTuple(-1, "error updating element");
128
129 return StatusTuple(0);
130 }
131
remove_value(const std::string & key_str)132 StatusTuple BPFTable::remove_value(const std::string& key_str) {
133 char key[desc.key_size];
134
135 StatusTuple r(0);
136
137 r = string_to_key(key_str, key);
138 if (r.code() != 0)
139 return r;
140
141 if (!remove(key))
142 return StatusTuple(-1, "error removing element");
143
144 return StatusTuple(0);
145 }
146
clear_table_non_atomic()147 StatusTuple BPFTable::clear_table_non_atomic() {
148 if (desc.type == BPF_MAP_TYPE_HASH || desc.type == BPF_MAP_TYPE_PERCPU_HASH ||
149 desc.type == BPF_MAP_TYPE_LRU_HASH ||
150 desc.type == BPF_MAP_TYPE_PERCPU_HASH ||
151 desc.type == BPF_MAP_TYPE_HASH_OF_MAPS) {
152 // For hash maps, use the first() interface (which uses get_next_key) to
153 // iterate through the map and clear elements
154 auto key = std::unique_ptr<void, decltype(::free)*>(::malloc(desc.key_size),
155 ::free);
156
157 while (this->first(key.get()))
158 if (!this->remove(key.get())) {
159 return StatusTuple(-1,
160 "Failed to delete element when clearing table %s",
161 desc.name.c_str());
162 }
163 } else if (desc.type == BPF_MAP_TYPE_ARRAY ||
164 desc.type == BPF_MAP_TYPE_PERCPU_ARRAY) {
165 return StatusTuple(-1, "Array map %s do not support clearing elements",
166 desc.name.c_str());
167 } else if (desc.type == BPF_MAP_TYPE_PROG_ARRAY ||
168 desc.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY ||
169 desc.type == BPF_MAP_TYPE_STACK_TRACE ||
170 desc.type == BPF_MAP_TYPE_ARRAY_OF_MAPS) {
171 // For Stack-trace and FD arrays, just iterate over all indices
172 for (size_t i = 0; i < desc.max_entries; i++) {
173 this->remove(&i);
174 }
175 } else {
176 return StatusTuple(-1, "Clearing for map type of %s not supported yet",
177 desc.name.c_str());
178 }
179
180 return StatusTuple(0);
181 }
182
get_table_offline(std::vector<std::pair<std::string,std::string>> & res)183 StatusTuple BPFTable::get_table_offline(
184 std::vector<std::pair<std::string, std::string>> &res) {
185 StatusTuple r(0);
186 int err;
187
188 auto key = std::unique_ptr<void, decltype(::free)*>(::malloc(desc.key_size),
189 ::free);
190 auto value = std::unique_ptr<void, decltype(::free)*>(::malloc(desc.leaf_size),
191 ::free);
192 std::string key_str;
193 std::string value_str;
194
195 if (desc.type == BPF_MAP_TYPE_ARRAY ||
196 desc.type == BPF_MAP_TYPE_PROG_ARRAY ||
197 desc.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY ||
198 desc.type == BPF_MAP_TYPE_PERCPU_ARRAY ||
199 desc.type == BPF_MAP_TYPE_CGROUP_ARRAY ||
200 desc.type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
201 desc.type == BPF_MAP_TYPE_DEVMAP ||
202 desc.type == BPF_MAP_TYPE_CPUMAP ||
203 desc.type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
204 // For arrays, just iterate over all indices
205 for (size_t i = 0; i < desc.max_entries; i++) {
206 err = bpf_lookup_elem(desc.fd, &i, value.get());
207 if (err < 0 && errno == ENOENT) {
208 // Element is not present, skip it
209 continue;
210 } else if (err < 0) {
211 // Other error, abort
212 return StatusTuple(-1, "Error looking up value: %s", std::strerror(errno));
213 }
214
215 r = key_to_string(&i, key_str);
216 if (r.code() != 0)
217 return r;
218
219 r = leaf_to_string(value.get(), value_str);
220 if (r.code() != 0)
221 return r;
222 res.emplace_back(key_str, value_str);
223 }
224 } else {
225 res.clear();
226 // For other maps, try to use the first() and next() interfaces
227 if (!this->first(key.get()))
228 return StatusTuple(0);
229
230 while (true) {
231 if (!this->lookup(key.get(), value.get()))
232 break;
233 r = key_to_string(key.get(), key_str);
234 if (r.code() != 0)
235 return r;
236
237 r = leaf_to_string(value.get(), value_str);
238 if (r.code() != 0)
239 return r;
240 res.emplace_back(key_str, value_str);
241 if (!this->next(key.get(), key.get()))
242 break;
243 }
244 }
245
246 return StatusTuple(0);
247 }
248
get_possible_cpu_count()249 size_t BPFTable::get_possible_cpu_count() { return get_possible_cpus().size(); }
250
BPFStackTable(const TableDesc & desc,bool use_debug_file,bool check_debug_file_crc)251 BPFStackTable::BPFStackTable(const TableDesc& desc, bool use_debug_file,
252 bool check_debug_file_crc)
253 : BPFTableBase<int, stacktrace_t>(desc) {
254 if (desc.type != BPF_MAP_TYPE_STACK_TRACE)
255 throw std::invalid_argument("Table '" + desc.name +
256 "' is not a stack table");
257
258 symbol_option_ = {.use_debug_file = use_debug_file,
259 .check_debug_file_crc = check_debug_file_crc,
260 .use_symbol_type = (1 << STT_FUNC) | (1 << STT_GNU_IFUNC)};
261 }
262
BPFStackTable(BPFStackTable && that)263 BPFStackTable::BPFStackTable(BPFStackTable&& that)
264 : BPFTableBase<int, stacktrace_t>(that.desc),
265 symbol_option_(std::move(that.symbol_option_)),
266 pid_sym_(std::move(that.pid_sym_)) {
267 that.pid_sym_.clear();
268 }
269
~BPFStackTable()270 BPFStackTable::~BPFStackTable() {
271 for (auto it : pid_sym_)
272 bcc_free_symcache(it.second, it.first);
273 }
274
clear_table_non_atomic()275 void BPFStackTable::clear_table_non_atomic() {
276 for (int i = 0; size_t(i) < capacity(); i++) {
277 remove(&i);
278 }
279 }
280
get_stack_addr(int stack_id)281 std::vector<uintptr_t> BPFStackTable::get_stack_addr(int stack_id) {
282 std::vector<uintptr_t> res;
283 stacktrace_t stack;
284 if (stack_id < 0)
285 return res;
286 if (!lookup(&stack_id, &stack))
287 return res;
288 for (int i = 0; (i < BPF_MAX_STACK_DEPTH) && (stack.ip[i] != 0); i++)
289 res.push_back(stack.ip[i]);
290 return res;
291 }
292
get_stack_symbol(int stack_id,int pid)293 std::vector<std::string> BPFStackTable::get_stack_symbol(int stack_id,
294 int pid) {
295 auto addresses = get_stack_addr(stack_id);
296 std::vector<std::string> res;
297 if (addresses.empty())
298 return res;
299 res.reserve(addresses.size());
300
301 if (pid < 0)
302 pid = -1;
303 if (pid_sym_.find(pid) == pid_sym_.end())
304 pid_sym_[pid] = bcc_symcache_new(pid, &symbol_option_);
305 void* cache = pid_sym_[pid];
306
307 bcc_symbol symbol;
308 for (auto addr : addresses)
309 if (bcc_symcache_resolve(cache, addr, &symbol) != 0)
310 res.emplace_back("[UNKNOWN]");
311 else {
312 res.push_back(symbol.demangle_name);
313 bcc_symbol_free_demangle_name(&symbol);
314 }
315
316 return res;
317 }
318
BPFPerfBuffer(const TableDesc & desc)319 BPFPerfBuffer::BPFPerfBuffer(const TableDesc& desc)
320 : BPFTableBase<int, int>(desc), epfd_(-1) {
321 if (desc.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
322 throw std::invalid_argument("Table '" + desc.name +
323 "' is not a perf buffer");
324 }
325
open_on_cpu(perf_reader_raw_cb cb,perf_reader_lost_cb lost_cb,int cpu,void * cb_cookie,int page_cnt)326 StatusTuple BPFPerfBuffer::open_on_cpu(perf_reader_raw_cb cb,
327 perf_reader_lost_cb lost_cb, int cpu,
328 void* cb_cookie, int page_cnt) {
329 if (cpu_readers_.find(cpu) != cpu_readers_.end())
330 return StatusTuple(-1, "Perf buffer already open on CPU %d", cpu);
331
332 auto reader = static_cast<perf_reader*>(
333 bpf_open_perf_buffer(cb, lost_cb, cb_cookie, -1, cpu, page_cnt));
334 if (reader == nullptr)
335 return StatusTuple(-1, "Unable to construct perf reader");
336
337 int reader_fd = perf_reader_fd(reader);
338 if (!update(&cpu, &reader_fd)) {
339 perf_reader_free(static_cast<void*>(reader));
340 return StatusTuple(-1, "Unable to open perf buffer on CPU %d: %s", cpu,
341 std::strerror(errno));
342 }
343
344 struct epoll_event event = {};
345 event.events = EPOLLIN;
346 event.data.ptr = static_cast<void*>(reader);
347 if (epoll_ctl(epfd_, EPOLL_CTL_ADD, reader_fd, &event) != 0) {
348 perf_reader_free(static_cast<void*>(reader));
349 return StatusTuple(-1, "Unable to add perf_reader FD to epoll: %s",
350 std::strerror(errno));
351 }
352
353 cpu_readers_[cpu] = reader;
354 return StatusTuple(0);
355 }
356
open_all_cpu(perf_reader_raw_cb cb,perf_reader_lost_cb lost_cb,void * cb_cookie,int page_cnt)357 StatusTuple BPFPerfBuffer::open_all_cpu(perf_reader_raw_cb cb,
358 perf_reader_lost_cb lost_cb,
359 void* cb_cookie, int page_cnt) {
360 if (cpu_readers_.size() != 0 || epfd_ != -1)
361 return StatusTuple(-1, "Previously opened perf buffer not cleaned");
362
363 std::vector<int> cpus = get_online_cpus();
364 ep_events_.reset(new epoll_event[cpus.size()]);
365 epfd_ = epoll_create1(EPOLL_CLOEXEC);
366
367 for (int i : cpus) {
368 auto res = open_on_cpu(cb, lost_cb, i, cb_cookie, page_cnt);
369 if (res.code() != 0) {
370 TRY2(close_all_cpu());
371 return res;
372 }
373 }
374 return StatusTuple(0);
375 }
376
close_on_cpu(int cpu)377 StatusTuple BPFPerfBuffer::close_on_cpu(int cpu) {
378 auto it = cpu_readers_.find(cpu);
379 if (it == cpu_readers_.end())
380 return StatusTuple(0);
381 perf_reader_free(static_cast<void*>(it->second));
382 if (!remove(const_cast<int*>(&(it->first))))
383 return StatusTuple(-1, "Unable to close perf buffer on CPU %d", it->first);
384 cpu_readers_.erase(it);
385 return StatusTuple(0);
386 }
387
close_all_cpu()388 StatusTuple BPFPerfBuffer::close_all_cpu() {
389 std::string errors;
390 bool has_error = false;
391
392 if (epfd_ >= 0) {
393 int close_res = close(epfd_);
394 epfd_ = -1;
395 ep_events_.reset();
396 if (close_res != 0) {
397 has_error = true;
398 errors += std::string(std::strerror(errno)) + "\n";
399 }
400 }
401
402 std::vector<int> opened_cpus;
403 for (auto it : cpu_readers_)
404 opened_cpus.push_back(it.first);
405 for (int i : opened_cpus) {
406 auto res = close_on_cpu(i);
407 if (res.code() != 0) {
408 errors += "Failed to close CPU" + std::to_string(i) + " perf buffer: ";
409 errors += res.msg() + "\n";
410 has_error = true;
411 }
412 }
413
414 if (has_error)
415 return StatusTuple(-1, errors);
416 return StatusTuple(0);
417 }
418
poll(int timeout_ms)419 int BPFPerfBuffer::poll(int timeout_ms) {
420 if (epfd_ < 0)
421 return -1;
422 int cnt =
423 epoll_wait(epfd_, ep_events_.get(), cpu_readers_.size(), timeout_ms);
424 for (int i = 0; i < cnt; i++)
425 perf_reader_event_read(static_cast<perf_reader*>(ep_events_[i].data.ptr));
426 return cnt;
427 }
428
~BPFPerfBuffer()429 BPFPerfBuffer::~BPFPerfBuffer() {
430 auto res = close_all_cpu();
431 if (res.code() != 0)
432 std::cerr << "Failed to close all perf buffer on destruction: " << res.msg()
433 << std::endl;
434 }
435
BPFPerfEventArray(const TableDesc & desc)436 BPFPerfEventArray::BPFPerfEventArray(const TableDesc& desc)
437 : BPFTableBase<int, int>(desc) {
438 if (desc.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
439 throw std::invalid_argument("Table '" + desc.name +
440 "' is not a perf event array");
441 }
442
open_all_cpu(uint32_t type,uint64_t config)443 StatusTuple BPFPerfEventArray::open_all_cpu(uint32_t type, uint64_t config) {
444 if (cpu_fds_.size() != 0)
445 return StatusTuple(-1, "Previously opened perf event not cleaned");
446
447 std::vector<int> cpus = get_online_cpus();
448
449 for (int i : cpus) {
450 auto res = open_on_cpu(i, type, config);
451 if (res.code() != 0) {
452 TRY2(close_all_cpu());
453 return res;
454 }
455 }
456 return StatusTuple(0);
457 }
458
close_all_cpu()459 StatusTuple BPFPerfEventArray::close_all_cpu() {
460 std::string errors;
461 bool has_error = false;
462
463 std::vector<int> opened_cpus;
464 for (auto it : cpu_fds_)
465 opened_cpus.push_back(it.first);
466 for (int i : opened_cpus) {
467 auto res = close_on_cpu(i);
468 if (res.code() != 0) {
469 errors += "Failed to close CPU" + std::to_string(i) + " perf event: ";
470 errors += res.msg() + "\n";
471 has_error = true;
472 }
473 }
474
475 if (has_error)
476 return StatusTuple(-1, errors);
477 return StatusTuple(0);
478 }
479
open_on_cpu(int cpu,uint32_t type,uint64_t config)480 StatusTuple BPFPerfEventArray::open_on_cpu(int cpu, uint32_t type,
481 uint64_t config) {
482 if (cpu_fds_.find(cpu) != cpu_fds_.end())
483 return StatusTuple(-1, "Perf event already open on CPU %d", cpu);
484 int fd = bpf_open_perf_event(type, config, -1, cpu);
485 if (fd < 0) {
486 return StatusTuple(-1, "Error constructing perf event %" PRIu32 ":%" PRIu64,
487 type, config);
488 }
489 if (!update(&cpu, &fd)) {
490 bpf_close_perf_event_fd(fd);
491 return StatusTuple(-1, "Unable to open perf event on CPU %d: %s", cpu,
492 std::strerror(errno));
493 }
494 cpu_fds_[cpu] = fd;
495 return StatusTuple(0);
496 }
497
close_on_cpu(int cpu)498 StatusTuple BPFPerfEventArray::close_on_cpu(int cpu) {
499 auto it = cpu_fds_.find(cpu);
500 if (it == cpu_fds_.end()) {
501 return StatusTuple(0);
502 }
503 bpf_close_perf_event_fd(it->second);
504 cpu_fds_.erase(it);
505 return StatusTuple(0);
506 }
507
~BPFPerfEventArray()508 BPFPerfEventArray::~BPFPerfEventArray() {
509 auto res = close_all_cpu();
510 if (res.code() != 0) {
511 std::cerr << "Failed to close all perf buffer on destruction: " << res.msg()
512 << std::endl;
513 }
514 }
515
BPFProgTable(const TableDesc & desc)516 BPFProgTable::BPFProgTable(const TableDesc& desc)
517 : BPFTableBase<int, int>(desc) {
518 if (desc.type != BPF_MAP_TYPE_PROG_ARRAY)
519 throw std::invalid_argument("Table '" + desc.name +
520 "' is not a prog table");
521 }
522
update_value(const int & index,const int & prog_fd)523 StatusTuple BPFProgTable::update_value(const int& index, const int& prog_fd) {
524 if (!this->update(const_cast<int*>(&index), const_cast<int*>(&prog_fd)))
525 return StatusTuple(-1, "Error updating value: %s", std::strerror(errno));
526 return StatusTuple(0);
527 }
528
remove_value(const int & index)529 StatusTuple BPFProgTable::remove_value(const int& index) {
530 if (!this->remove(const_cast<int*>(&index)))
531 return StatusTuple(-1, "Error removing value: %s", std::strerror(errno));
532 return StatusTuple(0);
533 }
534
BPFCgroupArray(const TableDesc & desc)535 BPFCgroupArray::BPFCgroupArray(const TableDesc& desc)
536 : BPFTableBase<int, int>(desc) {
537 if (desc.type != BPF_MAP_TYPE_CGROUP_ARRAY)
538 throw std::invalid_argument("Table '" + desc.name +
539 "' is not a cgroup array");
540 }
541
update_value(const int & index,const int & cgroup2_fd)542 StatusTuple BPFCgroupArray::update_value(const int& index,
543 const int& cgroup2_fd) {
544 if (!this->update(const_cast<int*>(&index), const_cast<int*>(&cgroup2_fd)))
545 return StatusTuple(-1, "Error updating value: %s", std::strerror(errno));
546 return StatusTuple(0);
547 }
548
update_value(const int & index,const std::string & cgroup2_path)549 StatusTuple BPFCgroupArray::update_value(const int& index,
550 const std::string& cgroup2_path) {
551 FileDesc f(::open(cgroup2_path.c_str(), O_RDONLY | O_CLOEXEC));
552 if ((int)f < 0)
553 return StatusTuple(-1, "Unable to open %s", cgroup2_path.c_str());
554 TRY2(update_value(index, (int)f));
555 return StatusTuple(0);
556 }
557
remove_value(const int & index)558 StatusTuple BPFCgroupArray::remove_value(const int& index) {
559 if (!this->remove(const_cast<int*>(&index)))
560 return StatusTuple(-1, "Error removing value: %s", std::strerror(errno));
561 return StatusTuple(0);
562 }
563
BPFDevmapTable(const TableDesc & desc)564 BPFDevmapTable::BPFDevmapTable(const TableDesc& desc)
565 : BPFTableBase<int, int>(desc) {
566 if(desc.type != BPF_MAP_TYPE_DEVMAP)
567 throw std::invalid_argument("Table '" + desc.name +
568 "' is not a devmap table");
569 }
570
update_value(const int & index,const int & value)571 StatusTuple BPFDevmapTable::update_value(const int& index,
572 const int& value) {
573 if (!this->update(const_cast<int*>(&index), const_cast<int*>(&value)))
574 return StatusTuple(-1, "Error updating value: %s", std::strerror(errno));
575 return StatusTuple(0);
576 }
577
get_value(const int & index,int & value)578 StatusTuple BPFDevmapTable::get_value(const int& index,
579 int& value) {
580 if (!this->lookup(const_cast<int*>(&index), &value))
581 return StatusTuple(-1, "Error getting value: %s", std::strerror(errno));
582 return StatusTuple(0);
583 }
584
remove_value(const int & index)585 StatusTuple BPFDevmapTable::remove_value(const int& index) {
586 if (!this->remove(const_cast<int*>(&index)))
587 return StatusTuple(-1, "Error removing value: %s", std::strerror(errno));
588 return StatusTuple(0);
589 }
590
591 } // namespace ebpf
592