1 /*
2 * Copyright (c) 2016 GitHub, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <cxxabi.h>
18 #include <cstring>
19 #include <fcntl.h>
20 #include <linux/elf.h>
21 #include <string.h>
22 #include <sys/stat.h>
23 #include <sys/types.h>
24 #include <unistd.h>
25 #include <cstdio>
26
27 #include "bcc_elf.h"
28 #include "bcc_perf_map.h"
29 #include "bcc_proc.h"
30 #include "bcc_syms.h"
31 #include "common.h"
32 #include "vendor/tinyformat.hpp"
33
34 #include "syms.h"
35
getinode_()36 ino_t ProcStat::getinode_() {
37 struct stat s;
38 return (!stat(procfs_.c_str(), &s)) ? s.st_ino : -1;
39 }
40
is_stale()41 bool ProcStat::is_stale() {
42 ino_t cur_inode = getinode_();
43 return (cur_inode > 0) && (cur_inode != inode_);
44 }
45
ProcStat(int pid)46 ProcStat::ProcStat(int pid)
47 : procfs_(tfm::format("/proc/%d/exe", pid)), inode_(getinode_()) {}
48
_add_symbol(const char * symname,uint64_t addr,void * p)49 void KSyms::_add_symbol(const char *symname, uint64_t addr, void *p) {
50 KSyms *ks = static_cast<KSyms *>(p);
51 ks->syms_.emplace_back(symname, addr);
52 }
53
refresh()54 void KSyms::refresh() {
55 if (syms_.empty()) {
56 bcc_procutils_each_ksym(_add_symbol, this);
57 std::sort(syms_.begin(), syms_.end());
58 }
59 }
60
resolve_addr(uint64_t addr,struct bcc_symbol * sym,bool demangle)61 bool KSyms::resolve_addr(uint64_t addr, struct bcc_symbol *sym, bool demangle) {
62 refresh();
63
64 std::vector<Symbol>::iterator it;
65
66 if (syms_.empty())
67 goto unknown_symbol;
68
69 it = std::upper_bound(syms_.begin(), syms_.end(), Symbol("", addr));
70 if (it != syms_.begin()) {
71 it--;
72 sym->name = (*it).name.c_str();
73 if (demangle)
74 sym->demangle_name = sym->name;
75 sym->module = "kernel";
76 sym->offset = addr - (*it).addr;
77 return true;
78 }
79
80 unknown_symbol:
81 memset(sym, 0, sizeof(struct bcc_symbol));
82 return false;
83 }
84
resolve_name(const char * _unused,const char * name,uint64_t * addr)85 bool KSyms::resolve_name(const char *_unused, const char *name,
86 uint64_t *addr) {
87 refresh();
88
89 if (syms_.size() != symnames_.size()) {
90 symnames_.clear();
91 for (Symbol &sym : syms_) {
92 symnames_[sym.name] = sym.addr;
93 }
94 }
95
96 auto it = symnames_.find(name);
97 if (it == symnames_.end())
98 return false;
99
100 *addr = it->second;
101 return true;
102 }
103
ProcSyms(int pid,struct bcc_symbol_option * option)104 ProcSyms::ProcSyms(int pid, struct bcc_symbol_option *option)
105 : pid_(pid), procstat_(pid), mount_ns_instance_(new ProcMountNS(pid_)) {
106 if (option)
107 std::memcpy(&symbol_option_, option, sizeof(bcc_symbol_option));
108 else
109 symbol_option_ = {
110 .use_debug_file = 1,
111 .check_debug_file_crc = 1,
112 .use_symbol_type = (1 << STT_FUNC) | (1 << STT_GNU_IFUNC)
113 };
114 load_modules();
115 }
116
_add_load_sections(uint64_t v_addr,uint64_t mem_sz,uint64_t file_offset,void * payload)117 int ProcSyms::_add_load_sections(uint64_t v_addr, uint64_t mem_sz,
118 uint64_t file_offset, void *payload) {
119 auto module = static_cast<Module *>(payload);
120 module->ranges_.emplace_back(v_addr, v_addr + mem_sz, file_offset);
121 return 0;
122 }
123
load_exe()124 void ProcSyms::load_exe() {
125 std::string exe = ebpf::get_pid_exe(pid_);
126 Module module(exe.c_str(), mount_ns_instance_.get(), &symbol_option_);
127
128 if (module.type_ != ModuleType::EXEC)
129 return;
130
131 ProcMountNSGuard g(mount_ns_instance_.get());
132
133 bcc_elf_foreach_load_section(exe.c_str(), &_add_load_sections, &module);
134
135 if (!module.ranges_.empty())
136 modules_.emplace_back(std::move(module));
137 }
138
load_modules()139 void ProcSyms::load_modules() {
140 load_exe();
141 bcc_procutils_each_module(pid_, _add_module, this);
142 }
143
refresh()144 void ProcSyms::refresh() {
145 modules_.clear();
146 mount_ns_instance_.reset(new ProcMountNS(pid_));
147 load_modules();
148 procstat_.reset();
149 }
150
_add_module(const char * modname,uint64_t start,uint64_t end,uint64_t offset,bool check_mount_ns,void * payload)151 int ProcSyms::_add_module(const char *modname, uint64_t start, uint64_t end,
152 uint64_t offset, bool check_mount_ns, void *payload) {
153 ProcSyms *ps = static_cast<ProcSyms *>(payload);
154 auto it = std::find_if(
155 ps->modules_.begin(), ps->modules_.end(),
156 [=](const ProcSyms::Module &m) { return m.name_ == modname; });
157 if (it == ps->modules_.end()) {
158 auto module = Module(
159 modname, check_mount_ns ? ps->mount_ns_instance_.get() : nullptr,
160 &ps->symbol_option_);
161
162 // pid/maps doesn't account for file_offset of text within the ELF.
163 // It only gives the mmap offset. We need the real offset for symbol
164 // lookup.
165 if (module.type_ == ModuleType::SO) {
166 if (bcc_elf_get_text_scn_info(modname, &module.elf_so_addr_,
167 &module.elf_so_offset_) < 0) {
168 fprintf(stderr, "WARNING: Couldn't find .text section in %s\n", modname);
169 fprintf(stderr, "WARNING: BCC can't handle sym look ups for %s", modname);
170 }
171 }
172
173 if (!bcc_is_perf_map(modname) || module.type_ != ModuleType::UNKNOWN)
174 // Always add the module even if we can't read it, so that we could
175 // report correct module name. Unless it's a perf map that we only
176 // add readable ones.
177 it = ps->modules_.insert(ps->modules_.end(), std::move(module));
178 else
179 return 0;
180 }
181 it->ranges_.emplace_back(start, end, offset);
182 // perf-PID map is added last. We try both inside the Process's mount
183 // namespace + chroot, and in global /tmp. Make sure we only add one.
184 if (it->type_ == ModuleType::PERF_MAP)
185 return -1;
186
187 return 0;
188 }
189
resolve_addr(uint64_t addr,struct bcc_symbol * sym,bool demangle)190 bool ProcSyms::resolve_addr(uint64_t addr, struct bcc_symbol *sym,
191 bool demangle) {
192 if (procstat_.is_stale())
193 refresh();
194
195 memset(sym, 0, sizeof(struct bcc_symbol));
196
197 const char *original_module = nullptr;
198 uint64_t offset;
199 bool only_perf_map = false;
200 for (Module &mod : modules_) {
201 if (only_perf_map && (mod.type_ != ModuleType::PERF_MAP))
202 continue;
203 if (mod.contains(addr, offset)) {
204 if (mod.find_addr(offset, sym)) {
205 if (demangle) {
206 if (sym->name && (!strncmp(sym->name, "_Z", 2) || !strncmp(sym->name, "___Z", 4)))
207 sym->demangle_name =
208 abi::__cxa_demangle(sym->name, nullptr, nullptr, nullptr);
209 if (!sym->demangle_name)
210 sym->demangle_name = sym->name;
211 }
212 return true;
213 } else if (mod.type_ != ModuleType::PERF_MAP) {
214 // In this case, we found the address in the range of a module, but
215 // not able to find a symbol of that address in the module.
216 // Thus, we would try to find the address in perf map, and
217 // save the module's name in case we will need it later.
218 original_module = mod.name_.c_str();
219 only_perf_map = true;
220 }
221 }
222 }
223 // If we didn't find the symbol anywhere, the module name is probably
224 // set to be the perf map's name as it would be the last we tried.
225 // In this case, if we have found the address previously in a module,
226 // report the saved original module name instead.
227 if (original_module)
228 sym->module = original_module;
229 return false;
230 }
231
resolve_name(const char * module,const char * name,uint64_t * addr)232 bool ProcSyms::resolve_name(const char *module, const char *name,
233 uint64_t *addr) {
234 if (procstat_.is_stale())
235 refresh();
236
237 for (Module &mod : modules_) {
238 if (mod.name_ == module)
239 return mod.find_name(name, addr);
240 }
241 return false;
242 }
243
Module(const char * name,ProcMountNS * mount_ns,struct bcc_symbol_option * option)244 ProcSyms::Module::Module(const char *name, ProcMountNS *mount_ns,
245 struct bcc_symbol_option *option)
246 : name_(name),
247 loaded_(false),
248 mount_ns_(mount_ns),
249 symbol_option_(option),
250 type_(ModuleType::UNKNOWN) {
251 ProcMountNSGuard g(mount_ns_);
252 int elf_type = bcc_elf_get_type(name_.c_str());
253 // The Module is an ELF file
254 if (elf_type >= 0) {
255 if (elf_type == ET_EXEC)
256 type_ = ModuleType::EXEC;
257 else if (elf_type == ET_DYN)
258 type_ = ModuleType::SO;
259 return;
260 }
261 // Other symbol files
262 if (bcc_is_valid_perf_map(name_.c_str()) == 1)
263 type_ = ModuleType::PERF_MAP;
264 else if (bcc_elf_is_vdso(name_.c_str()) == 1)
265 type_ = ModuleType::VDSO;
266
267 // Will be stored later
268 elf_so_offset_ = 0;
269 elf_so_addr_ = 0;
270 }
271
_add_symbol(const char * symname,uint64_t start,uint64_t size,void * p)272 int ProcSyms::Module::_add_symbol(const char *symname, uint64_t start,
273 uint64_t size, void *p) {
274 Module *m = static_cast<Module *>(p);
275 auto res = m->symnames_.emplace(symname);
276 m->syms_.emplace_back(&*(res.first), start, size);
277 return 0;
278 }
279
load_sym_table()280 void ProcSyms::Module::load_sym_table() {
281 if (loaded_)
282 return;
283 loaded_ = true;
284
285 if (type_ == ModuleType::UNKNOWN)
286 return;
287
288 ProcMountNSGuard g(mount_ns_);
289
290 if (type_ == ModuleType::PERF_MAP)
291 bcc_perf_map_foreach_sym(name_.c_str(), _add_symbol, this);
292 if (type_ == ModuleType::EXEC || type_ == ModuleType::SO)
293 bcc_elf_foreach_sym(name_.c_str(), _add_symbol, symbol_option_, this);
294 if (type_ == ModuleType::VDSO)
295 bcc_elf_foreach_vdso_sym(_add_symbol, this);
296
297 std::sort(syms_.begin(), syms_.end());
298 }
299
contains(uint64_t addr,uint64_t & offset) const300 bool ProcSyms::Module::contains(uint64_t addr, uint64_t &offset) const {
301 for (const auto &range : ranges_) {
302 if (addr >= range.start && addr < range.end) {
303 if (type_ == ModuleType::SO || type_ == ModuleType::VDSO) {
304 // Offset within the mmap
305 offset = addr - range.start + range.file_offset;
306
307 // Offset within the ELF for SO symbol lookup
308 offset += (elf_so_addr_ - elf_so_offset_);
309 } else {
310 offset = addr;
311 }
312
313 return true;
314 }
315 }
316
317 return false;
318 }
319
find_name(const char * symname,uint64_t * addr)320 bool ProcSyms::Module::find_name(const char *symname, uint64_t *addr) {
321 load_sym_table();
322
323 for (Symbol &s : syms_) {
324 if (*(s.name) == symname) {
325 *addr = type_ == ModuleType::SO ? start() + s.start : s.start;
326 return true;
327 }
328 }
329 return false;
330 }
331
find_addr(uint64_t offset,struct bcc_symbol * sym)332 bool ProcSyms::Module::find_addr(uint64_t offset, struct bcc_symbol *sym) {
333 load_sym_table();
334
335 sym->module = name_.c_str();
336 sym->offset = offset;
337
338 auto it = std::upper_bound(syms_.begin(), syms_.end(), Symbol(nullptr, offset, 0));
339 if (it == syms_.begin())
340 return false;
341
342 // 'it' points to the symbol whose start address is strictly greater than
343 // the address we're looking for. Start stepping backwards as long as the
344 // current symbol is still below the desired address, and see if the end
345 // of the current symbol (start + size) is above the desired address. Once
346 // we have a matching symbol, return it. Note that simply looking at '--it'
347 // is not enough, because symbols can be nested. For example, we could be
348 // looking for offset 0x12 with the following symbols available:
349 // SYMBOL START SIZE END
350 // goo 0x0 0x6 0x0 + 0x6 = 0x6
351 // foo 0x6 0x10 0x6 + 0x10 = 0x16
352 // bar 0x8 0x4 0x8 + 0x4 = 0xc
353 // baz 0x16 0x10 0x16 + 0x10 = 0x26
354 // The upper_bound lookup will return baz, and then going one symbol back
355 // brings us to bar, which does not contain offset 0x12 and is nested inside
356 // foo. Going back one more symbol brings us to foo, which contains 0x12
357 // and is a match.
358 // However, we also don't want to walk through the entire symbol list for
359 // unknown / missing symbols. So we will break if we reach a function that
360 // doesn't cover the function immediately before 'it', which means it is
361 // not possibly a nested function containing the address we're looking for.
362 --it;
363 uint64_t limit = it->start;
364 for (; offset >= it->start; --it) {
365 if (offset < it->start + it->size) {
366 sym->name = it->name->c_str();
367 sym->offset = (offset - it->start);
368 return true;
369 }
370 if (limit > it->start + it->size)
371 break;
372 // But don't step beyond begin()!
373 if (it == syms_.begin())
374 break;
375 }
376
377 return false;
378 }
379
380 extern "C" {
381
bcc_symcache_new(int pid,struct bcc_symbol_option * option)382 void *bcc_symcache_new(int pid, struct bcc_symbol_option *option) {
383 if (pid < 0)
384 return static_cast<void *>(new KSyms());
385 return static_cast<void *>(new ProcSyms(pid, option));
386 }
387
bcc_free_symcache(void * symcache,int pid)388 void bcc_free_symcache(void *symcache, int pid) {
389 if (pid < 0)
390 delete static_cast<KSyms*>(symcache);
391 else
392 delete static_cast<ProcSyms*>(symcache);
393 }
394
bcc_symbol_free_demangle_name(struct bcc_symbol * sym)395 void bcc_symbol_free_demangle_name(struct bcc_symbol *sym) {
396 if (sym->demangle_name && (sym->demangle_name != sym->name))
397 free(const_cast<char*>(sym->demangle_name));
398 }
399
bcc_symcache_resolve(void * resolver,uint64_t addr,struct bcc_symbol * sym)400 int bcc_symcache_resolve(void *resolver, uint64_t addr,
401 struct bcc_symbol *sym) {
402 SymbolCache *cache = static_cast<SymbolCache *>(resolver);
403 return cache->resolve_addr(addr, sym) ? 0 : -1;
404 }
405
bcc_symcache_resolve_no_demangle(void * resolver,uint64_t addr,struct bcc_symbol * sym)406 int bcc_symcache_resolve_no_demangle(void *resolver, uint64_t addr,
407 struct bcc_symbol *sym) {
408 SymbolCache *cache = static_cast<SymbolCache *>(resolver);
409 return cache->resolve_addr(addr, sym, false) ? 0 : -1;
410 }
411
bcc_symcache_resolve_name(void * resolver,const char * module,const char * name,uint64_t * addr)412 int bcc_symcache_resolve_name(void *resolver, const char *module,
413 const char *name, uint64_t *addr) {
414 SymbolCache *cache = static_cast<SymbolCache *>(resolver);
415 return cache->resolve_name(module, name, addr) ? 0 : -1;
416 }
417
bcc_symcache_refresh(void * resolver)418 void bcc_symcache_refresh(void *resolver) {
419 SymbolCache *cache = static_cast<SymbolCache *>(resolver);
420 cache->refresh();
421 }
422
423 struct mod_st {
424 const char *name;
425 uint64_t start;
426 uint64_t file_offset;
427 };
428
_find_module(const char * modname,uint64_t start,uint64_t end,uint64_t offset,bool,void * p)429 static int _find_module(const char *modname, uint64_t start, uint64_t end,
430 uint64_t offset, bool, void *p) {
431 struct mod_st *mod = (struct mod_st *)p;
432 if (!strcmp(modname, mod->name)) {
433 mod->start = start;
434 mod->file_offset = offset;
435 return -1;
436 }
437 return 0;
438 }
439
bcc_resolve_global_addr(int pid,const char * module,const uint64_t address,uint64_t * global)440 int bcc_resolve_global_addr(int pid, const char *module, const uint64_t address,
441 uint64_t *global) {
442 struct mod_st mod = {module, 0x0};
443 if (bcc_procutils_each_module(pid, _find_module, &mod) < 0 ||
444 mod.start == 0x0)
445 return -1;
446
447 *global = mod.start - mod.file_offset + address;
448 return 0;
449 }
450
_sym_cb_wrapper(const char * symname,uint64_t addr,uint64_t,void * payload)451 static int _sym_cb_wrapper(const char *symname, uint64_t addr, uint64_t,
452 void *payload) {
453 SYM_CB cb = (SYM_CB) payload;
454 return cb(symname, addr);
455 }
456
bcc_foreach_function_symbol(const char * module,SYM_CB cb)457 int bcc_foreach_function_symbol(const char *module, SYM_CB cb) {
458 if (module == 0 || cb == 0)
459 return -1;
460
461 static struct bcc_symbol_option default_option = {
462 .use_debug_file = 1,
463 .check_debug_file_crc = 1,
464 .use_symbol_type = (1 << STT_FUNC) | (1 << STT_GNU_IFUNC)
465 };
466
467 return bcc_elf_foreach_sym(
468 module, _sym_cb_wrapper, &default_option, (void *)cb);
469 }
470
_find_sym(const char * symname,uint64_t addr,uint64_t,void * payload)471 static int _find_sym(const char *symname, uint64_t addr, uint64_t,
472 void *payload) {
473 struct bcc_symbol *sym = (struct bcc_symbol *)payload;
474 if (!strcmp(sym->name, symname)) {
475 sym->offset = addr;
476 return -1;
477 }
478 return 0;
479 }
480
481 struct load_addr_t {
482 uint64_t target_addr;
483 uint64_t binary_addr;
484 };
_find_load(uint64_t v_addr,uint64_t mem_sz,uint64_t file_offset,void * payload)485 int _find_load(uint64_t v_addr, uint64_t mem_sz, uint64_t file_offset,
486 void *payload) {
487 struct load_addr_t *addr = static_cast<load_addr_t *>(payload);
488 if (addr->target_addr >= v_addr && addr->target_addr < (v_addr + mem_sz)) {
489 addr->binary_addr = addr->target_addr - v_addr + file_offset;
490 return -1;
491 }
492 return 0;
493 }
494
bcc_resolve_symname(const char * module,const char * symname,const uint64_t addr,int pid,struct bcc_symbol_option * option,struct bcc_symbol * sym)495 int bcc_resolve_symname(const char *module, const char *symname,
496 const uint64_t addr, int pid,
497 struct bcc_symbol_option *option,
498 struct bcc_symbol *sym) {
499 static struct bcc_symbol_option default_option = {
500 .use_debug_file = 1,
501 .check_debug_file_crc = 1,
502 .use_symbol_type = BCC_SYM_ALL_TYPES,
503 };
504
505 if (module == NULL)
506 return -1;
507
508 memset(sym, 0, sizeof(bcc_symbol));
509
510 if (strchr(module, '/')) {
511 sym->module = strdup(module);
512 } else {
513 sym->module = bcc_procutils_which_so(module, pid);
514 }
515 if (sym->module == NULL)
516 return -1;
517
518 ProcMountNSGuard g(pid);
519
520 sym->name = symname;
521 sym->offset = addr;
522 if (option == NULL)
523 option = &default_option;
524
525 if (sym->name && sym->offset == 0x0)
526 if (bcc_elf_foreach_sym(sym->module, _find_sym, option, sym) < 0)
527 goto invalid_module;
528 if (sym->offset == 0x0)
529 goto invalid_module;
530
531 // For executable (ET_EXEC) binaries, translate the virtual address
532 // to physical address in the binary file.
533 // For shared object binaries (ET_DYN), the address from symbol table should
534 // already be physical address in the binary file.
535 if (bcc_elf_get_type(sym->module) == ET_EXEC) {
536 struct load_addr_t addr = {
537 .target_addr = sym->offset,
538 .binary_addr = 0x0,
539 };
540 if (bcc_elf_foreach_load_section(sym->module, &_find_load, &addr) < 0)
541 goto invalid_module;
542 if (!addr.binary_addr)
543 goto invalid_module;
544 sym->offset = addr.binary_addr;
545 }
546 return 0;
547
548 invalid_module:
549 if (sym->module) {
550 ::free(const_cast<char*>(sym->module));
551 sym->module = NULL;
552 }
553 return -1;
554 }
555 }
556