1// Copyright 2018 The Abseil Authors. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// https://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15// This library provides Symbolize() function that symbolizes program 16// counters to their corresponding symbol names on linux platforms. 17// This library has a minimal implementation of an ELF symbol table 18// reader (i.e. it doesn't depend on libelf, etc.). 19// 20// The algorithm used in Symbolize() is as follows. 21// 22// 1. Go through a list of maps in /proc/self/maps and find the map 23// containing the program counter. 24// 25// 2. Open the mapped file and find a regular symbol table inside. 26// Iterate over symbols in the symbol table and look for the symbol 27// containing the program counter. If such a symbol is found, 28// obtain the symbol name, and demangle the symbol if possible. 29// If the symbol isn't found in the regular symbol table (binary is 30// stripped), try the same thing with a dynamic symbol table. 31// 32// Note that Symbolize() is originally implemented to be used in 33// signal handlers, hence it doesn't use malloc() and other unsafe 34// operations. It should be both thread-safe and async-signal-safe. 35// 36// Implementation note: 37// 38// We don't use heaps but only use stacks. We want to reduce the 39// stack consumption so that the symbolizer can run on small stacks. 40// 41// Here are some numbers collected with GCC 4.1.0 on x86: 42// - sizeof(Elf32_Sym) = 16 43// - sizeof(Elf32_Shdr) = 40 44// - sizeof(Elf64_Sym) = 24 45// - sizeof(Elf64_Shdr) = 64 46// 47// This implementation is intended to be async-signal-safe but uses some 48// functions which are not guaranteed to be so, such as memchr() and 49// memmove(). We assume they are async-signal-safe. 50 51#include <dlfcn.h> 52#include <elf.h> 53#include <fcntl.h> 54#include <link.h> // For ElfW() macro. 55#include <sys/stat.h> 56#include <sys/types.h> 57#include <unistd.h> 58 59#include <algorithm> 60#include <array> 61#include <atomic> 62#include <cerrno> 63#include <cinttypes> 64#include <climits> 65#include <cstdint> 66#include <cstdio> 67#include <cstdlib> 68#include <cstring> 69 70#include "absl/base/casts.h" 71#include "absl/base/dynamic_annotations.h" 72#include "absl/base/internal/low_level_alloc.h" 73#include "absl/base/internal/raw_logging.h" 74#include "absl/base/internal/spinlock.h" 75#include "absl/base/port.h" 76#include "absl/debugging/internal/demangle.h" 77#include "absl/debugging/internal/vdso_support.h" 78#include "absl/strings/string_view.h" 79 80#if defined(__FreeBSD__) && !defined(ElfW) 81#define ElfW(x) __ElfN(x) 82#endif 83 84namespace absl { 85ABSL_NAMESPACE_BEGIN 86 87// Value of argv[0]. Used by MaybeInitializeObjFile(). 88static char *argv0_value = nullptr; 89 90void InitializeSymbolizer(const char *argv0) { 91#ifdef ABSL_HAVE_VDSO_SUPPORT 92 // We need to make sure VDSOSupport::Init() is called before any setuid or 93 // chroot calls, so InitializeSymbolizer() should be called very early in the 94 // life of a program. 95 absl::debugging_internal::VDSOSupport::Init(); 96#endif 97 if (argv0_value != nullptr) { 98 free(argv0_value); 99 argv0_value = nullptr; 100 } 101 if (argv0 != nullptr && argv0[0] != '\0') { 102 argv0_value = strdup(argv0); 103 } 104} 105 106namespace debugging_internal { 107namespace { 108 109// Re-runs fn until it doesn't cause EINTR. 110#define NO_INTR(fn) \ 111 do { \ 112 } while ((fn) < 0 && errno == EINTR) 113 114// On Linux, ELF_ST_* are defined in <linux/elf.h>. To make this portable 115// we define our own ELF_ST_BIND and ELF_ST_TYPE if not available. 116#ifndef ELF_ST_BIND 117#define ELF_ST_BIND(info) (((unsigned char)(info)) >> 4) 118#endif 119 120#ifndef ELF_ST_TYPE 121#define ELF_ST_TYPE(info) (((unsigned char)(info)) & 0xF) 122#endif 123 124// Some platforms use a special .opd section to store function pointers. 125const char kOpdSectionName[] = ".opd"; 126 127#if (defined(__powerpc__) && !(_CALL_ELF > 1)) || defined(__ia64) 128// Use opd section for function descriptors on these platforms, the function 129// address is the first word of the descriptor. 130enum { kPlatformUsesOPDSections = 1 }; 131#else // not PPC or IA64 132enum { kPlatformUsesOPDSections = 0 }; 133#endif 134 135// This works for PowerPC & IA64 only. A function descriptor consist of two 136// pointers and the first one is the function's entry. 137const size_t kFunctionDescriptorSize = sizeof(void *) * 2; 138 139const int kMaxDecorators = 10; // Seems like a reasonable upper limit. 140 141struct InstalledSymbolDecorator { 142 SymbolDecorator fn; 143 void *arg; 144 int ticket; 145}; 146 147int g_num_decorators; 148InstalledSymbolDecorator g_decorators[kMaxDecorators]; 149 150struct FileMappingHint { 151 const void *start; 152 const void *end; 153 uint64_t offset; 154 const char *filename; 155}; 156 157// Protects g_decorators. 158// We are using SpinLock and not a Mutex here, because we may be called 159// from inside Mutex::Lock itself, and it prohibits recursive calls. 160// This happens in e.g. base/stacktrace_syscall_unittest. 161// Moreover, we are using only TryLock(), if the decorator list 162// is being modified (is busy), we skip all decorators, and possibly 163// loose some info. Sorry, that's the best we could do. 164ABSL_CONST_INIT absl::base_internal::SpinLock g_decorators_mu( 165 absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY); 166 167const int kMaxFileMappingHints = 8; 168int g_num_file_mapping_hints; 169FileMappingHint g_file_mapping_hints[kMaxFileMappingHints]; 170// Protects g_file_mapping_hints. 171ABSL_CONST_INIT absl::base_internal::SpinLock g_file_mapping_mu( 172 absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY); 173 174// Async-signal-safe function to zero a buffer. 175// memset() is not guaranteed to be async-signal-safe. 176static void SafeMemZero(void* p, size_t size) { 177 unsigned char *c = static_cast<unsigned char *>(p); 178 while (size--) { 179 *c++ = 0; 180 } 181} 182 183struct ObjFile { 184 ObjFile() 185 : filename(nullptr), 186 start_addr(nullptr), 187 end_addr(nullptr), 188 offset(0), 189 fd(-1), 190 elf_type(-1) { 191 SafeMemZero(&elf_header, sizeof(elf_header)); 192 SafeMemZero(&phdr[0], sizeof(phdr)); 193 } 194 195 char *filename; 196 const void *start_addr; 197 const void *end_addr; 198 uint64_t offset; 199 200 // The following fields are initialized on the first access to the 201 // object file. 202 int fd; 203 int elf_type; 204 ElfW(Ehdr) elf_header; 205 206 // PT_LOAD program header describing executable code. 207 // Normally we expect just one, but SWIFT binaries have two. 208 // CUDA binaries have 3 (see cr/473913254 description). 209 std::array<ElfW(Phdr), 4> phdr; 210}; 211 212// Build 4-way associative cache for symbols. Within each cache line, symbols 213// are replaced in LRU order. 214enum { 215 ASSOCIATIVITY = 4, 216}; 217struct SymbolCacheLine { 218 const void *pc[ASSOCIATIVITY]; 219 char *name[ASSOCIATIVITY]; 220 221 // age[i] is incremented when a line is accessed. it's reset to zero if the 222 // i'th entry is read. 223 uint32_t age[ASSOCIATIVITY]; 224}; 225 226// --------------------------------------------------------------- 227// An async-signal-safe arena for LowLevelAlloc 228static std::atomic<base_internal::LowLevelAlloc::Arena *> g_sig_safe_arena; 229 230static base_internal::LowLevelAlloc::Arena *SigSafeArena() { 231 return g_sig_safe_arena.load(std::memory_order_acquire); 232} 233 234static void InitSigSafeArena() { 235 if (SigSafeArena() == nullptr) { 236 base_internal::LowLevelAlloc::Arena *new_arena = 237 base_internal::LowLevelAlloc::NewArena( 238 base_internal::LowLevelAlloc::kAsyncSignalSafe); 239 base_internal::LowLevelAlloc::Arena *old_value = nullptr; 240 if (!g_sig_safe_arena.compare_exchange_strong(old_value, new_arena, 241 std::memory_order_release, 242 std::memory_order_relaxed)) { 243 // We lost a race to allocate an arena; deallocate. 244 base_internal::LowLevelAlloc::DeleteArena(new_arena); 245 } 246 } 247} 248 249// --------------------------------------------------------------- 250// An AddrMap is a vector of ObjFile, using SigSafeArena() for allocation. 251 252class AddrMap { 253 public: 254 AddrMap() : size_(0), allocated_(0), obj_(nullptr) {} 255 ~AddrMap() { base_internal::LowLevelAlloc::Free(obj_); } 256 size_t Size() const { return size_; } 257 ObjFile *At(size_t i) { return &obj_[i]; } 258 ObjFile *Add(); 259 void Clear(); 260 261 private: 262 size_t size_; // count of valid elements (<= allocated_) 263 size_t allocated_; // count of allocated elements 264 ObjFile *obj_; // array of allocated_ elements 265 AddrMap(const AddrMap &) = delete; 266 AddrMap &operator=(const AddrMap &) = delete; 267}; 268 269void AddrMap::Clear() { 270 for (size_t i = 0; i != size_; i++) { 271 At(i)->~ObjFile(); 272 } 273 size_ = 0; 274} 275 276ObjFile *AddrMap::Add() { 277 if (size_ == allocated_) { 278 size_t new_allocated = allocated_ * 2 + 50; 279 ObjFile *new_obj_ = 280 static_cast<ObjFile *>(base_internal::LowLevelAlloc::AllocWithArena( 281 new_allocated * sizeof(*new_obj_), SigSafeArena())); 282 if (obj_) { 283 memcpy(new_obj_, obj_, allocated_ * sizeof(*new_obj_)); 284 base_internal::LowLevelAlloc::Free(obj_); 285 } 286 obj_ = new_obj_; 287 allocated_ = new_allocated; 288 } 289 return new (&obj_[size_++]) ObjFile; 290} 291 292// --------------------------------------------------------------- 293 294enum FindSymbolResult { SYMBOL_NOT_FOUND = 1, SYMBOL_TRUNCATED, SYMBOL_FOUND }; 295 296class Symbolizer { 297 public: 298 Symbolizer(); 299 ~Symbolizer(); 300 const char *GetSymbol(const void *const pc); 301 302 private: 303 char *CopyString(const char *s) { 304 size_t len = strlen(s); 305 char *dst = static_cast<char *>( 306 base_internal::LowLevelAlloc::AllocWithArena(len + 1, SigSafeArena())); 307 ABSL_RAW_CHECK(dst != nullptr, "out of memory"); 308 memcpy(dst, s, len + 1); 309 return dst; 310 } 311 ObjFile *FindObjFile(const void *const start, 312 size_t size) ABSL_ATTRIBUTE_NOINLINE; 313 static bool RegisterObjFile(const char *filename, 314 const void *const start_addr, 315 const void *const end_addr, uint64_t offset, 316 void *arg); 317 SymbolCacheLine *GetCacheLine(const void *const pc); 318 const char *FindSymbolInCache(const void *const pc); 319 const char *InsertSymbolInCache(const void *const pc, const char *name); 320 void AgeSymbols(SymbolCacheLine *line); 321 void ClearAddrMap(); 322 FindSymbolResult GetSymbolFromObjectFile(const ObjFile &obj, 323 const void *const pc, 324 const ptrdiff_t relocation, 325 char *out, size_t out_size, 326 char *tmp_buf, size_t tmp_buf_size); 327 const char *GetUncachedSymbol(const void *pc); 328 329 enum { 330 SYMBOL_BUF_SIZE = 3072, 331 TMP_BUF_SIZE = 1024, 332 SYMBOL_CACHE_LINES = 128, 333 }; 334 335 AddrMap addr_map_; 336 337 bool ok_; 338 bool addr_map_read_; 339 340 char symbol_buf_[SYMBOL_BUF_SIZE]; 341 342 // tmp_buf_ will be used to store arrays of ElfW(Shdr) and ElfW(Sym) 343 // so we ensure that tmp_buf_ is properly aligned to store either. 344 alignas(16) char tmp_buf_[TMP_BUF_SIZE]; 345 static_assert(alignof(ElfW(Shdr)) <= 16, 346 "alignment of tmp buf too small for Shdr"); 347 static_assert(alignof(ElfW(Sym)) <= 16, 348 "alignment of tmp buf too small for Sym"); 349 350 SymbolCacheLine symbol_cache_[SYMBOL_CACHE_LINES]; 351}; 352 353static std::atomic<Symbolizer *> g_cached_symbolizer; 354 355} // namespace 356 357static size_t SymbolizerSize() { 358#if defined(__wasm__) || defined(__asmjs__) 359 auto pagesize = static_cast<size_t>(getpagesize()); 360#else 361 auto pagesize = static_cast<size_t>(sysconf(_SC_PAGESIZE)); 362#endif 363 return ((sizeof(Symbolizer) - 1) / pagesize + 1) * pagesize; 364} 365 366// Return (and set null) g_cached_symbolized_state if it is not null. 367// Otherwise return a new symbolizer. 368static Symbolizer *AllocateSymbolizer() { 369 InitSigSafeArena(); 370 Symbolizer *symbolizer = 371 g_cached_symbolizer.exchange(nullptr, std::memory_order_acquire); 372 if (symbolizer != nullptr) { 373 return symbolizer; 374 } 375 return new (base_internal::LowLevelAlloc::AllocWithArena( 376 SymbolizerSize(), SigSafeArena())) Symbolizer(); 377} 378 379// Set g_cached_symbolize_state to s if it is null, otherwise 380// delete s. 381static void FreeSymbolizer(Symbolizer *s) { 382 Symbolizer *old_cached_symbolizer = nullptr; 383 if (!g_cached_symbolizer.compare_exchange_strong(old_cached_symbolizer, s, 384 std::memory_order_release, 385 std::memory_order_relaxed)) { 386 s->~Symbolizer(); 387 base_internal::LowLevelAlloc::Free(s); 388 } 389} 390 391Symbolizer::Symbolizer() : ok_(true), addr_map_read_(false) { 392 for (SymbolCacheLine &symbol_cache_line : symbol_cache_) { 393 for (size_t j = 0; j < ABSL_ARRAYSIZE(symbol_cache_line.name); ++j) { 394 symbol_cache_line.pc[j] = nullptr; 395 symbol_cache_line.name[j] = nullptr; 396 symbol_cache_line.age[j] = 0; 397 } 398 } 399} 400 401Symbolizer::~Symbolizer() { 402 for (SymbolCacheLine &symbol_cache_line : symbol_cache_) { 403 for (char *s : symbol_cache_line.name) { 404 base_internal::LowLevelAlloc::Free(s); 405 } 406 } 407 ClearAddrMap(); 408} 409 410// We don't use assert() since it's not guaranteed to be 411// async-signal-safe. Instead we define a minimal assertion 412// macro. So far, we don't need pretty printing for __FILE__, etc. 413#define SAFE_ASSERT(expr) ((expr) ? static_cast<void>(0) : abort()) 414 415// Read up to "count" bytes from file descriptor "fd" into the buffer 416// starting at "buf" while handling short reads and EINTR. On 417// success, return the number of bytes read. Otherwise, return -1. 418static ssize_t ReadPersistent(int fd, void *buf, size_t count) { 419 SAFE_ASSERT(fd >= 0); 420 SAFE_ASSERT(count <= SSIZE_MAX); 421 char *buf0 = reinterpret_cast<char *>(buf); 422 size_t num_bytes = 0; 423 while (num_bytes < count) { 424 ssize_t len; 425 NO_INTR(len = read(fd, buf0 + num_bytes, count - num_bytes)); 426 if (len < 0) { // There was an error other than EINTR. 427 ABSL_RAW_LOG(WARNING, "read failed: errno=%d", errno); 428 return -1; 429 } 430 if (len == 0) { // Reached EOF. 431 break; 432 } 433 num_bytes += static_cast<size_t>(len); 434 } 435 SAFE_ASSERT(num_bytes <= count); 436 return static_cast<ssize_t>(num_bytes); 437} 438 439// Read up to "count" bytes from "offset" in the file pointed by file 440// descriptor "fd" into the buffer starting at "buf". On success, 441// return the number of bytes read. Otherwise, return -1. 442static ssize_t ReadFromOffset(const int fd, void *buf, const size_t count, 443 const off_t offset) { 444 off_t off = lseek(fd, offset, SEEK_SET); 445 if (off == (off_t)-1) { 446 ABSL_RAW_LOG(WARNING, "lseek(%d, %jd, SEEK_SET) failed: errno=%d", fd, 447 static_cast<intmax_t>(offset), errno); 448 return -1; 449 } 450 return ReadPersistent(fd, buf, count); 451} 452 453// Try reading exactly "count" bytes from "offset" bytes in a file 454// pointed by "fd" into the buffer starting at "buf" while handling 455// short reads and EINTR. On success, return true. Otherwise, return 456// false. 457static bool ReadFromOffsetExact(const int fd, void *buf, const size_t count, 458 const off_t offset) { 459 ssize_t len = ReadFromOffset(fd, buf, count, offset); 460 return len >= 0 && static_cast<size_t>(len) == count; 461} 462 463// Returns elf_header.e_type if the file pointed by fd is an ELF binary. 464static int FileGetElfType(const int fd) { 465 ElfW(Ehdr) elf_header; 466 if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) { 467 return -1; 468 } 469 if (memcmp(elf_header.e_ident, ELFMAG, SELFMAG) != 0) { 470 return -1; 471 } 472 return elf_header.e_type; 473} 474 475// Read the section headers in the given ELF binary, and if a section 476// of the specified type is found, set the output to this section header 477// and return true. Otherwise, return false. 478// To keep stack consumption low, we would like this function to not get 479// inlined. 480static ABSL_ATTRIBUTE_NOINLINE bool GetSectionHeaderByType( 481 const int fd, ElfW(Half) sh_num, const off_t sh_offset, ElfW(Word) type, 482 ElfW(Shdr) * out, char *tmp_buf, size_t tmp_buf_size) { 483 ElfW(Shdr) *buf = reinterpret_cast<ElfW(Shdr) *>(tmp_buf); 484 const size_t buf_entries = tmp_buf_size / sizeof(buf[0]); 485 const size_t buf_bytes = buf_entries * sizeof(buf[0]); 486 487 for (size_t i = 0; static_cast<int>(i) < sh_num;) { 488 const size_t num_bytes_left = 489 (static_cast<size_t>(sh_num) - i) * sizeof(buf[0]); 490 const size_t num_bytes_to_read = 491 (buf_bytes > num_bytes_left) ? num_bytes_left : buf_bytes; 492 const off_t offset = sh_offset + static_cast<off_t>(i * sizeof(buf[0])); 493 const ssize_t len = ReadFromOffset(fd, buf, num_bytes_to_read, offset); 494 if (len < 0) { 495 ABSL_RAW_LOG( 496 WARNING, 497 "Reading %zu bytes from offset %ju returned %zd which is negative.", 498 num_bytes_to_read, static_cast<intmax_t>(offset), len); 499 return false; 500 } 501 if (static_cast<size_t>(len) % sizeof(buf[0]) != 0) { 502 ABSL_RAW_LOG( 503 WARNING, 504 "Reading %zu bytes from offset %jd returned %zd which is not a " 505 "multiple of %zu.", 506 num_bytes_to_read, static_cast<intmax_t>(offset), len, 507 sizeof(buf[0])); 508 return false; 509 } 510 const size_t num_headers_in_buf = static_cast<size_t>(len) / sizeof(buf[0]); 511 SAFE_ASSERT(num_headers_in_buf <= buf_entries); 512 for (size_t j = 0; j < num_headers_in_buf; ++j) { 513 if (buf[j].sh_type == type) { 514 *out = buf[j]; 515 return true; 516 } 517 } 518 i += num_headers_in_buf; 519 } 520 return false; 521} 522 523// There is no particular reason to limit section name to 63 characters, 524// but there has (as yet) been no need for anything longer either. 525const int kMaxSectionNameLen = 64; 526 527bool ForEachSection(int fd, 528 const std::function<bool(absl::string_view name, 529 const ElfW(Shdr) &)> &callback) { 530 ElfW(Ehdr) elf_header; 531 if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) { 532 return false; 533 } 534 535 // Technically it can be larger, but in practice this never happens. 536 if (elf_header.e_shentsize != sizeof(ElfW(Shdr))) { 537 return false; 538 } 539 540 ElfW(Shdr) shstrtab; 541 off_t shstrtab_offset = static_cast<off_t>(elf_header.e_shoff) + 542 elf_header.e_shentsize * elf_header.e_shstrndx; 543 if (!ReadFromOffsetExact(fd, &shstrtab, sizeof(shstrtab), shstrtab_offset)) { 544 return false; 545 } 546 547 for (int i = 0; i < elf_header.e_shnum; ++i) { 548 ElfW(Shdr) out; 549 off_t section_header_offset = 550 static_cast<off_t>(elf_header.e_shoff) + elf_header.e_shentsize * i; 551 if (!ReadFromOffsetExact(fd, &out, sizeof(out), section_header_offset)) { 552 return false; 553 } 554 off_t name_offset = static_cast<off_t>(shstrtab.sh_offset) + out.sh_name; 555 char header_name[kMaxSectionNameLen]; 556 ssize_t n_read = 557 ReadFromOffset(fd, &header_name, kMaxSectionNameLen, name_offset); 558 if (n_read < 0) { 559 return false; 560 } else if (n_read > kMaxSectionNameLen) { 561 // Long read? 562 return false; 563 } 564 565 absl::string_view name(header_name, 566 strnlen(header_name, static_cast<size_t>(n_read))); 567 if (!callback(name, out)) { 568 break; 569 } 570 } 571 return true; 572} 573 574// name_len should include terminating '\0'. 575bool GetSectionHeaderByName(int fd, const char *name, size_t name_len, 576 ElfW(Shdr) * out) { 577 char header_name[kMaxSectionNameLen]; 578 if (sizeof(header_name) < name_len) { 579 ABSL_RAW_LOG(WARNING, 580 "Section name '%s' is too long (%zu); " 581 "section will not be found (even if present).", 582 name, name_len); 583 // No point in even trying. 584 return false; 585 } 586 587 ElfW(Ehdr) elf_header; 588 if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) { 589 return false; 590 } 591 592 // Technically it can be larger, but in practice this never happens. 593 if (elf_header.e_shentsize != sizeof(ElfW(Shdr))) { 594 return false; 595 } 596 597 ElfW(Shdr) shstrtab; 598 off_t shstrtab_offset = static_cast<off_t>(elf_header.e_shoff) + 599 elf_header.e_shentsize * elf_header.e_shstrndx; 600 if (!ReadFromOffsetExact(fd, &shstrtab, sizeof(shstrtab), shstrtab_offset)) { 601 return false; 602 } 603 604 for (int i = 0; i < elf_header.e_shnum; ++i) { 605 off_t section_header_offset = 606 static_cast<off_t>(elf_header.e_shoff) + elf_header.e_shentsize * i; 607 if (!ReadFromOffsetExact(fd, out, sizeof(*out), section_header_offset)) { 608 return false; 609 } 610 off_t name_offset = static_cast<off_t>(shstrtab.sh_offset) + out->sh_name; 611 ssize_t n_read = ReadFromOffset(fd, &header_name, name_len, name_offset); 612 if (n_read < 0) { 613 return false; 614 } else if (static_cast<size_t>(n_read) != name_len) { 615 // Short read -- name could be at end of file. 616 continue; 617 } 618 if (memcmp(header_name, name, name_len) == 0) { 619 return true; 620 } 621 } 622 return false; 623} 624 625// Compare symbols at in the same address. 626// Return true if we should pick symbol1. 627static bool ShouldPickFirstSymbol(const ElfW(Sym) & symbol1, 628 const ElfW(Sym) & symbol2) { 629 // If one of the symbols is weak and the other is not, pick the one 630 // this is not a weak symbol. 631 char bind1 = ELF_ST_BIND(symbol1.st_info); 632 char bind2 = ELF_ST_BIND(symbol1.st_info); 633 if (bind1 == STB_WEAK && bind2 != STB_WEAK) return false; 634 if (bind2 == STB_WEAK && bind1 != STB_WEAK) return true; 635 636 // If one of the symbols has zero size and the other is not, pick the 637 // one that has non-zero size. 638 if (symbol1.st_size != 0 && symbol2.st_size == 0) { 639 return true; 640 } 641 if (symbol1.st_size == 0 && symbol2.st_size != 0) { 642 return false; 643 } 644 645 // If one of the symbols has no type and the other is not, pick the 646 // one that has a type. 647 char type1 = ELF_ST_TYPE(symbol1.st_info); 648 char type2 = ELF_ST_TYPE(symbol1.st_info); 649 if (type1 != STT_NOTYPE && type2 == STT_NOTYPE) { 650 return true; 651 } 652 if (type1 == STT_NOTYPE && type2 != STT_NOTYPE) { 653 return false; 654 } 655 656 // Pick the first one, if we still cannot decide. 657 return true; 658} 659 660// Return true if an address is inside a section. 661static bool InSection(const void *address, ptrdiff_t relocation, 662 const ElfW(Shdr) * section) { 663 const char *start = reinterpret_cast<const char *>( 664 section->sh_addr + static_cast<ElfW(Addr)>(relocation)); 665 size_t size = static_cast<size_t>(section->sh_size); 666 return start <= address && address < (start + size); 667} 668 669static const char *ComputeOffset(const char *base, ptrdiff_t offset) { 670 // Note: cast to intptr_t to avoid undefined behavior when base evaluates to 671 // zero and offset is non-zero. 672 return reinterpret_cast<const char *>(reinterpret_cast<intptr_t>(base) + 673 offset); 674} 675 676// Read a symbol table and look for the symbol containing the 677// pc. Iterate over symbols in a symbol table and look for the symbol 678// containing "pc". If the symbol is found, and its name fits in 679// out_size, the name is written into out and SYMBOL_FOUND is returned. 680// If the name does not fit, truncated name is written into out, 681// and SYMBOL_TRUNCATED is returned. Out is NUL-terminated. 682// If the symbol is not found, SYMBOL_NOT_FOUND is returned; 683// To keep stack consumption low, we would like this function to not get 684// inlined. 685static ABSL_ATTRIBUTE_NOINLINE FindSymbolResult FindSymbol( 686 const void *const pc, const int fd, char *out, size_t out_size, 687 ptrdiff_t relocation, const ElfW(Shdr) * strtab, const ElfW(Shdr) * symtab, 688 const ElfW(Shdr) * opd, char *tmp_buf, size_t tmp_buf_size) { 689 if (symtab == nullptr) { 690 return SYMBOL_NOT_FOUND; 691 } 692 693 // Read multiple symbols at once to save read() calls. 694 ElfW(Sym) *buf = reinterpret_cast<ElfW(Sym) *>(tmp_buf); 695 const size_t buf_entries = tmp_buf_size / sizeof(buf[0]); 696 697 const size_t num_symbols = symtab->sh_size / symtab->sh_entsize; 698 699 // On platforms using an .opd section (PowerPC & IA64), a function symbol 700 // has the address of a function descriptor, which contains the real 701 // starting address. However, we do not always want to use the real 702 // starting address because we sometimes want to symbolize a function 703 // pointer into the .opd section, e.g. FindSymbol(&foo,...). 704 const bool pc_in_opd = kPlatformUsesOPDSections && opd != nullptr && 705 InSection(pc, relocation, opd); 706 const bool deref_function_descriptor_pointer = 707 kPlatformUsesOPDSections && opd != nullptr && !pc_in_opd; 708 709 ElfW(Sym) best_match; 710 SafeMemZero(&best_match, sizeof(best_match)); 711 bool found_match = false; 712 for (size_t i = 0; i < num_symbols;) { 713 off_t offset = 714 static_cast<off_t>(symtab->sh_offset + i * symtab->sh_entsize); 715 const size_t num_remaining_symbols = num_symbols - i; 716 const size_t entries_in_chunk = 717 std::min(num_remaining_symbols, buf_entries); 718 const size_t bytes_in_chunk = entries_in_chunk * sizeof(buf[0]); 719 const ssize_t len = ReadFromOffset(fd, buf, bytes_in_chunk, offset); 720 SAFE_ASSERT(len >= 0); 721 SAFE_ASSERT(static_cast<size_t>(len) % sizeof(buf[0]) == 0); 722 const size_t num_symbols_in_buf = static_cast<size_t>(len) / sizeof(buf[0]); 723 SAFE_ASSERT(num_symbols_in_buf <= entries_in_chunk); 724 for (size_t j = 0; j < num_symbols_in_buf; ++j) { 725 const ElfW(Sym) &symbol = buf[j]; 726 727 // For a DSO, a symbol address is relocated by the loading address. 728 // We keep the original address for opd redirection below. 729 const char *const original_start_address = 730 reinterpret_cast<const char *>(symbol.st_value); 731 const char *start_address = 732 ComputeOffset(original_start_address, relocation); 733 734#ifdef __arm__ 735 // ARM functions are always aligned to multiples of two bytes; the 736 // lowest-order bit in start_address is ignored by the CPU and indicates 737 // whether the function contains ARM (0) or Thumb (1) code. We don't care 738 // about what encoding is being used; we just want the real start address 739 // of the function. 740 start_address = reinterpret_cast<const char *>( 741 reinterpret_cast<uintptr_t>(start_address) & ~1u); 742#endif 743 744 if (deref_function_descriptor_pointer && 745 InSection(original_start_address, /*relocation=*/0, opd)) { 746 // The opd section is mapped into memory. Just dereference 747 // start_address to get the first double word, which points to the 748 // function entry. 749 start_address = *reinterpret_cast<const char *const *>(start_address); 750 } 751 752 // If pc is inside the .opd section, it points to a function descriptor. 753 const size_t size = pc_in_opd ? kFunctionDescriptorSize : symbol.st_size; 754 const void *const end_address = 755 ComputeOffset(start_address, static_cast<ptrdiff_t>(size)); 756 if (symbol.st_value != 0 && // Skip null value symbols. 757 symbol.st_shndx != 0 && // Skip undefined symbols. 758#ifdef STT_TLS 759 ELF_ST_TYPE(symbol.st_info) != STT_TLS && // Skip thread-local data. 760#endif // STT_TLS 761 ((start_address <= pc && pc < end_address) || 762 (start_address == pc && pc == end_address))) { 763 if (!found_match || ShouldPickFirstSymbol(symbol, best_match)) { 764 found_match = true; 765 best_match = symbol; 766 } 767 } 768 } 769 i += num_symbols_in_buf; 770 } 771 772 if (found_match) { 773 const off_t off = 774 static_cast<off_t>(strtab->sh_offset) + best_match.st_name; 775 const ssize_t n_read = ReadFromOffset(fd, out, out_size, off); 776 if (n_read <= 0) { 777 // This should never happen. 778 ABSL_RAW_LOG(WARNING, 779 "Unable to read from fd %d at offset %lld: n_read = %zd", fd, 780 static_cast<long long>(off), n_read); 781 return SYMBOL_NOT_FOUND; 782 } 783 ABSL_RAW_CHECK(static_cast<size_t>(n_read) <= out_size, 784 "ReadFromOffset read too much data."); 785 786 // strtab->sh_offset points into .strtab-like section that contains 787 // NUL-terminated strings: '\0foo\0barbaz\0...". 788 // 789 // sh_offset+st_name points to the start of symbol name, but we don't know 790 // how long the symbol is, so we try to read as much as we have space for, 791 // and usually over-read (i.e. there is a NUL somewhere before n_read). 792 if (memchr(out, '\0', static_cast<size_t>(n_read)) == nullptr) { 793 // Either out_size was too small (n_read == out_size and no NUL), or 794 // we tried to read past the EOF (n_read < out_size) and .strtab is 795 // corrupt (missing terminating NUL; should never happen for valid ELF). 796 out[n_read - 1] = '\0'; 797 return SYMBOL_TRUNCATED; 798 } 799 return SYMBOL_FOUND; 800 } 801 802 return SYMBOL_NOT_FOUND; 803} 804 805// Get the symbol name of "pc" from the file pointed by "fd". Process 806// both regular and dynamic symbol tables if necessary. 807// See FindSymbol() comment for description of return value. 808FindSymbolResult Symbolizer::GetSymbolFromObjectFile( 809 const ObjFile &obj, const void *const pc, const ptrdiff_t relocation, 810 char *out, size_t out_size, char *tmp_buf, size_t tmp_buf_size) { 811 ElfW(Shdr) symtab; 812 ElfW(Shdr) strtab; 813 ElfW(Shdr) opd; 814 ElfW(Shdr) *opd_ptr = nullptr; 815 816 // On platforms using an .opd sections for function descriptor, read 817 // the section header. The .opd section is in data segment and should be 818 // loaded but we check that it is mapped just to be extra careful. 819 if (kPlatformUsesOPDSections) { 820 if (GetSectionHeaderByName(obj.fd, kOpdSectionName, 821 sizeof(kOpdSectionName) - 1, &opd) && 822 FindObjFile(reinterpret_cast<const char *>(opd.sh_addr) + relocation, 823 opd.sh_size) != nullptr) { 824 opd_ptr = &opd; 825 } else { 826 return SYMBOL_NOT_FOUND; 827 } 828 } 829 830 // Consult a regular symbol table, then fall back to the dynamic symbol table. 831 for (const auto symbol_table_type : {SHT_SYMTAB, SHT_DYNSYM}) { 832 if (!GetSectionHeaderByType(obj.fd, obj.elf_header.e_shnum, 833 static_cast<off_t>(obj.elf_header.e_shoff), 834 static_cast<ElfW(Word)>(symbol_table_type), 835 &symtab, tmp_buf, tmp_buf_size)) { 836 continue; 837 } 838 if (!ReadFromOffsetExact( 839 obj.fd, &strtab, sizeof(strtab), 840 static_cast<off_t>(obj.elf_header.e_shoff + 841 symtab.sh_link * sizeof(symtab)))) { 842 continue; 843 } 844 const FindSymbolResult rc = 845 FindSymbol(pc, obj.fd, out, out_size, relocation, &strtab, &symtab, 846 opd_ptr, tmp_buf, tmp_buf_size); 847 if (rc != SYMBOL_NOT_FOUND) { 848 return rc; 849 } 850 } 851 852 return SYMBOL_NOT_FOUND; 853} 854 855namespace { 856// Thin wrapper around a file descriptor so that the file descriptor 857// gets closed for sure. 858class FileDescriptor { 859 public: 860 explicit FileDescriptor(int fd) : fd_(fd) {} 861 FileDescriptor(const FileDescriptor &) = delete; 862 FileDescriptor &operator=(const FileDescriptor &) = delete; 863 864 ~FileDescriptor() { 865 if (fd_ >= 0) { 866 close(fd_); 867 } 868 } 869 870 int get() const { return fd_; } 871 872 private: 873 const int fd_; 874}; 875 876// Helper class for reading lines from file. 877// 878// Note: we don't use ProcMapsIterator since the object is big (it has 879// a 5k array member) and uses async-unsafe functions such as sscanf() 880// and snprintf(). 881class LineReader { 882 public: 883 explicit LineReader(int fd, char *buf, size_t buf_len) 884 : fd_(fd), 885 buf_len_(buf_len), 886 buf_(buf), 887 bol_(buf), 888 eol_(buf), 889 eod_(buf) {} 890 891 LineReader(const LineReader &) = delete; 892 LineReader &operator=(const LineReader &) = delete; 893 894 // Read '\n'-terminated line from file. On success, modify "bol" 895 // and "eol", then return true. Otherwise, return false. 896 // 897 // Note: if the last line doesn't end with '\n', the line will be 898 // dropped. It's an intentional behavior to make the code simple. 899 bool ReadLine(const char **bol, const char **eol) { 900 if (BufferIsEmpty()) { // First time. 901 const ssize_t num_bytes = ReadPersistent(fd_, buf_, buf_len_); 902 if (num_bytes <= 0) { // EOF or error. 903 return false; 904 } 905 eod_ = buf_ + num_bytes; 906 bol_ = buf_; 907 } else { 908 bol_ = eol_ + 1; // Advance to the next line in the buffer. 909 SAFE_ASSERT(bol_ <= eod_); // "bol_" can point to "eod_". 910 if (!HasCompleteLine()) { 911 const auto incomplete_line_length = static_cast<size_t>(eod_ - bol_); 912 // Move the trailing incomplete line to the beginning. 913 memmove(buf_, bol_, incomplete_line_length); 914 // Read text from file and append it. 915 char *const append_pos = buf_ + incomplete_line_length; 916 const size_t capacity_left = buf_len_ - incomplete_line_length; 917 const ssize_t num_bytes = 918 ReadPersistent(fd_, append_pos, capacity_left); 919 if (num_bytes <= 0) { // EOF or error. 920 return false; 921 } 922 eod_ = append_pos + num_bytes; 923 bol_ = buf_; 924 } 925 } 926 eol_ = FindLineFeed(); 927 if (eol_ == nullptr) { // '\n' not found. Malformed line. 928 return false; 929 } 930 *eol_ = '\0'; // Replace '\n' with '\0'. 931 932 *bol = bol_; 933 *eol = eol_; 934 return true; 935 } 936 937 private: 938 char *FindLineFeed() const { 939 return reinterpret_cast<char *>( 940 memchr(bol_, '\n', static_cast<size_t>(eod_ - bol_))); 941 } 942 943 bool BufferIsEmpty() const { return buf_ == eod_; } 944 945 bool HasCompleteLine() const { 946 return !BufferIsEmpty() && FindLineFeed() != nullptr; 947 } 948 949 const int fd_; 950 const size_t buf_len_; 951 char *const buf_; 952 char *bol_; 953 char *eol_; 954 const char *eod_; // End of data in "buf_". 955}; 956} // namespace 957 958// Place the hex number read from "start" into "*hex". The pointer to 959// the first non-hex character or "end" is returned. 960static const char *GetHex(const char *start, const char *end, 961 uint64_t *const value) { 962 uint64_t hex = 0; 963 const char *p; 964 for (p = start; p < end; ++p) { 965 int ch = *p; 966 if ((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'F') || 967 (ch >= 'a' && ch <= 'f')) { 968 hex = (hex << 4) | 969 static_cast<uint64_t>(ch < 'A' ? ch - '0' : (ch & 0xF) + 9); 970 } else { // Encountered the first non-hex character. 971 break; 972 } 973 } 974 SAFE_ASSERT(p <= end); 975 *value = hex; 976 return p; 977} 978 979static const char *GetHex(const char *start, const char *end, 980 const void **const addr) { 981 uint64_t hex = 0; 982 const char *p = GetHex(start, end, &hex); 983 *addr = reinterpret_cast<void *>(hex); 984 return p; 985} 986 987// Normally we are only interested in "r?x" maps. 988// On the PowerPC, function pointers point to descriptors in the .opd 989// section. The descriptors themselves are not executable code, so 990// we need to relax the check below to "r??". 991static bool ShouldUseMapping(const char *const flags) { 992 return flags[0] == 'r' && (kPlatformUsesOPDSections || flags[2] == 'x'); 993} 994 995// Read /proc/self/maps and run "callback" for each mmapped file found. If 996// "callback" returns false, stop scanning and return true. Else continue 997// scanning /proc/self/maps. Return true if no parse error is found. 998static ABSL_ATTRIBUTE_NOINLINE bool ReadAddrMap( 999 bool (*callback)(const char *filename, const void *const start_addr, 1000 const void *const end_addr, uint64_t offset, void *arg), 1001 void *arg, void *tmp_buf, size_t tmp_buf_size) { 1002 // Use /proc/self/task/<pid>/maps instead of /proc/self/maps. The latter 1003 // requires kernel to stop all threads, and is significantly slower when there 1004 // are 1000s of threads. 1005 char maps_path[80]; 1006 snprintf(maps_path, sizeof(maps_path), "/proc/self/task/%d/maps", getpid()); 1007 1008 int maps_fd; 1009 NO_INTR(maps_fd = open(maps_path, O_RDONLY)); 1010 FileDescriptor wrapped_maps_fd(maps_fd); 1011 if (wrapped_maps_fd.get() < 0) { 1012 ABSL_RAW_LOG(WARNING, "%s: errno=%d", maps_path, errno); 1013 return false; 1014 } 1015 1016 // Iterate over maps and look for the map containing the pc. Then 1017 // look into the symbol tables inside. 1018 LineReader reader(wrapped_maps_fd.get(), static_cast<char *>(tmp_buf), 1019 tmp_buf_size); 1020 while (true) { 1021 const char *cursor; 1022 const char *eol; 1023 if (!reader.ReadLine(&cursor, &eol)) { // EOF or malformed line. 1024 break; 1025 } 1026 1027 const char *line = cursor; 1028 const void *start_address; 1029 // Start parsing line in /proc/self/maps. Here is an example: 1030 // 1031 // 08048000-0804c000 r-xp 00000000 08:01 2142121 /bin/cat 1032 // 1033 // We want start address (08048000), end address (0804c000), flags 1034 // (r-xp) and file name (/bin/cat). 1035 1036 // Read start address. 1037 cursor = GetHex(cursor, eol, &start_address); 1038 if (cursor == eol || *cursor != '-') { 1039 ABSL_RAW_LOG(WARNING, "Corrupt /proc/self/maps line: %s", line); 1040 return false; 1041 } 1042 ++cursor; // Skip '-'. 1043 1044 // Read end address. 1045 const void *end_address; 1046 cursor = GetHex(cursor, eol, &end_address); 1047 if (cursor == eol || *cursor != ' ') { 1048 ABSL_RAW_LOG(WARNING, "Corrupt /proc/self/maps line: %s", line); 1049 return false; 1050 } 1051 ++cursor; // Skip ' '. 1052 1053 // Read flags. Skip flags until we encounter a space or eol. 1054 const char *const flags_start = cursor; 1055 while (cursor < eol && *cursor != ' ') { 1056 ++cursor; 1057 } 1058 // We expect at least four letters for flags (ex. "r-xp"). 1059 if (cursor == eol || cursor < flags_start + 4) { 1060 ABSL_RAW_LOG(WARNING, "Corrupt /proc/self/maps: %s", line); 1061 return false; 1062 } 1063 1064 // Check flags. 1065 if (!ShouldUseMapping(flags_start)) { 1066 continue; // We skip this map. 1067 } 1068 ++cursor; // Skip ' '. 1069 1070 // Read file offset. 1071 uint64_t offset; 1072 cursor = GetHex(cursor, eol, &offset); 1073 ++cursor; // Skip ' '. 1074 1075 // Skip to file name. "cursor" now points to dev. We need to skip at least 1076 // two spaces for dev and inode. 1077 int num_spaces = 0; 1078 while (cursor < eol) { 1079 if (*cursor == ' ') { 1080 ++num_spaces; 1081 } else if (num_spaces >= 2) { 1082 // The first non-space character after skipping two spaces 1083 // is the beginning of the file name. 1084 break; 1085 } 1086 ++cursor; 1087 } 1088 1089 // Check whether this entry corresponds to our hint table for the true 1090 // filename. 1091 bool hinted = 1092 GetFileMappingHint(&start_address, &end_address, &offset, &cursor); 1093 if (!hinted && (cursor == eol || cursor[0] == '[')) { 1094 // not an object file, typically [vdso] or [vsyscall] 1095 continue; 1096 } 1097 if (!callback(cursor, start_address, end_address, offset, arg)) break; 1098 } 1099 return true; 1100} 1101 1102// Find the objfile mapped in address region containing [addr, addr + len). 1103ObjFile *Symbolizer::FindObjFile(const void *const addr, size_t len) { 1104 for (int i = 0; i < 2; ++i) { 1105 if (!ok_) return nullptr; 1106 1107 // Read /proc/self/maps if necessary 1108 if (!addr_map_read_) { 1109 addr_map_read_ = true; 1110 if (!ReadAddrMap(RegisterObjFile, this, tmp_buf_, TMP_BUF_SIZE)) { 1111 ok_ = false; 1112 return nullptr; 1113 } 1114 } 1115 1116 size_t lo = 0; 1117 size_t hi = addr_map_.Size(); 1118 while (lo < hi) { 1119 size_t mid = (lo + hi) / 2; 1120 if (addr < addr_map_.At(mid)->end_addr) { 1121 hi = mid; 1122 } else { 1123 lo = mid + 1; 1124 } 1125 } 1126 if (lo != addr_map_.Size()) { 1127 ObjFile *obj = addr_map_.At(lo); 1128 SAFE_ASSERT(obj->end_addr > addr); 1129 if (addr >= obj->start_addr && 1130 reinterpret_cast<const char *>(addr) + len <= obj->end_addr) 1131 return obj; 1132 } 1133 1134 // The address mapping may have changed since it was last read. Retry. 1135 ClearAddrMap(); 1136 } 1137 return nullptr; 1138} 1139 1140void Symbolizer::ClearAddrMap() { 1141 for (size_t i = 0; i != addr_map_.Size(); i++) { 1142 ObjFile *o = addr_map_.At(i); 1143 base_internal::LowLevelAlloc::Free(o->filename); 1144 if (o->fd >= 0) { 1145 close(o->fd); 1146 } 1147 } 1148 addr_map_.Clear(); 1149 addr_map_read_ = false; 1150} 1151 1152// Callback for ReadAddrMap to register objfiles in an in-memory table. 1153bool Symbolizer::RegisterObjFile(const char *filename, 1154 const void *const start_addr, 1155 const void *const end_addr, uint64_t offset, 1156 void *arg) { 1157 Symbolizer *impl = static_cast<Symbolizer *>(arg); 1158 1159 // Files are supposed to be added in the increasing address order. Make 1160 // sure that's the case. 1161 size_t addr_map_size = impl->addr_map_.Size(); 1162 if (addr_map_size != 0) { 1163 ObjFile *old = impl->addr_map_.At(addr_map_size - 1); 1164 if (old->end_addr > end_addr) { 1165 ABSL_RAW_LOG(ERROR, 1166 "Unsorted addr map entry: 0x%" PRIxPTR ": %s <-> 0x%" PRIxPTR 1167 ": %s", 1168 reinterpret_cast<uintptr_t>(end_addr), filename, 1169 reinterpret_cast<uintptr_t>(old->end_addr), old->filename); 1170 return true; 1171 } else if (old->end_addr == end_addr) { 1172 // The same entry appears twice. This sometimes happens for [vdso]. 1173 if (old->start_addr != start_addr || 1174 strcmp(old->filename, filename) != 0) { 1175 ABSL_RAW_LOG(ERROR, 1176 "Duplicate addr 0x%" PRIxPTR ": %s <-> 0x%" PRIxPTR ": %s", 1177 reinterpret_cast<uintptr_t>(end_addr), filename, 1178 reinterpret_cast<uintptr_t>(old->end_addr), old->filename); 1179 } 1180 return true; 1181 } else if (old->end_addr == start_addr && 1182 reinterpret_cast<uintptr_t>(old->start_addr) - old->offset == 1183 reinterpret_cast<uintptr_t>(start_addr) - offset && 1184 strcmp(old->filename, filename) == 0) { 1185 // Two contiguous map entries that span a contiguous region of the file, 1186 // perhaps because some part of the file was mlock()ed. Combine them. 1187 old->end_addr = end_addr; 1188 return true; 1189 } 1190 } 1191 ObjFile *obj = impl->addr_map_.Add(); 1192 obj->filename = impl->CopyString(filename); 1193 obj->start_addr = start_addr; 1194 obj->end_addr = end_addr; 1195 obj->offset = offset; 1196 obj->elf_type = -1; // filled on demand 1197 obj->fd = -1; // opened on demand 1198 return true; 1199} 1200 1201// This function wraps the Demangle function to provide an interface 1202// where the input symbol is demangled in-place. 1203// To keep stack consumption low, we would like this function to not 1204// get inlined. 1205static ABSL_ATTRIBUTE_NOINLINE void DemangleInplace(char *out, size_t out_size, 1206 char *tmp_buf, 1207 size_t tmp_buf_size) { 1208 if (Demangle(out, tmp_buf, tmp_buf_size)) { 1209 // Demangling succeeded. Copy to out if the space allows. 1210 size_t len = strlen(tmp_buf); 1211 if (len + 1 <= out_size) { // +1 for '\0'. 1212 SAFE_ASSERT(len < tmp_buf_size); 1213 memmove(out, tmp_buf, len + 1); 1214 } 1215 } 1216} 1217 1218SymbolCacheLine *Symbolizer::GetCacheLine(const void *const pc) { 1219 uintptr_t pc0 = reinterpret_cast<uintptr_t>(pc); 1220 pc0 >>= 3; // drop the low 3 bits 1221 1222 // Shuffle bits. 1223 pc0 ^= (pc0 >> 6) ^ (pc0 >> 12) ^ (pc0 >> 18); 1224 return &symbol_cache_[pc0 % SYMBOL_CACHE_LINES]; 1225} 1226 1227void Symbolizer::AgeSymbols(SymbolCacheLine *line) { 1228 for (uint32_t &age : line->age) { 1229 ++age; 1230 } 1231} 1232 1233const char *Symbolizer::FindSymbolInCache(const void *const pc) { 1234 if (pc == nullptr) return nullptr; 1235 1236 SymbolCacheLine *line = GetCacheLine(pc); 1237 for (size_t i = 0; i < ABSL_ARRAYSIZE(line->pc); ++i) { 1238 if (line->pc[i] == pc) { 1239 AgeSymbols(line); 1240 line->age[i] = 0; 1241 return line->name[i]; 1242 } 1243 } 1244 return nullptr; 1245} 1246 1247const char *Symbolizer::InsertSymbolInCache(const void *const pc, 1248 const char *name) { 1249 SAFE_ASSERT(pc != nullptr); 1250 1251 SymbolCacheLine *line = GetCacheLine(pc); 1252 uint32_t max_age = 0; 1253 size_t oldest_index = 0; 1254 bool found_oldest_index = false; 1255 for (size_t i = 0; i < ABSL_ARRAYSIZE(line->pc); ++i) { 1256 if (line->pc[i] == nullptr) { 1257 AgeSymbols(line); 1258 line->pc[i] = pc; 1259 line->name[i] = CopyString(name); 1260 line->age[i] = 0; 1261 return line->name[i]; 1262 } 1263 if (line->age[i] >= max_age) { 1264 max_age = line->age[i]; 1265 oldest_index = i; 1266 found_oldest_index = true; 1267 } 1268 } 1269 1270 AgeSymbols(line); 1271 ABSL_RAW_CHECK(found_oldest_index, "Corrupt cache"); 1272 base_internal::LowLevelAlloc::Free(line->name[oldest_index]); 1273 line->pc[oldest_index] = pc; 1274 line->name[oldest_index] = CopyString(name); 1275 line->age[oldest_index] = 0; 1276 return line->name[oldest_index]; 1277} 1278 1279static void MaybeOpenFdFromSelfExe(ObjFile *obj) { 1280 if (memcmp(obj->start_addr, ELFMAG, SELFMAG) != 0) { 1281 return; 1282 } 1283 int fd = open("/proc/self/exe", O_RDONLY); 1284 if (fd == -1) { 1285 return; 1286 } 1287 // Verify that contents of /proc/self/exe matches in-memory image of 1288 // the binary. This can fail if the "deleted" binary is in fact not 1289 // the main executable, or for binaries that have the first PT_LOAD 1290 // segment smaller than 4K. We do it in four steps so that the 1291 // buffer is smaller and we don't consume too much stack space. 1292 const char *mem = reinterpret_cast<const char *>(obj->start_addr); 1293 for (int i = 0; i < 4; ++i) { 1294 char buf[1024]; 1295 ssize_t n = read(fd, buf, sizeof(buf)); 1296 if (n != sizeof(buf) || memcmp(buf, mem, sizeof(buf)) != 0) { 1297 close(fd); 1298 return; 1299 } 1300 mem += sizeof(buf); 1301 } 1302 obj->fd = fd; 1303} 1304 1305static bool MaybeInitializeObjFile(ObjFile *obj) { 1306 if (obj->fd < 0) { 1307 obj->fd = open(obj->filename, O_RDONLY); 1308 1309 if (obj->fd < 0) { 1310 // Getting /proc/self/exe here means that we were hinted. 1311 if (strcmp(obj->filename, "/proc/self/exe") == 0) { 1312 // /proc/self/exe may be inaccessible (due to setuid, etc.), so try 1313 // accessing the binary via argv0. 1314 if (argv0_value != nullptr) { 1315 obj->fd = open(argv0_value, O_RDONLY); 1316 } 1317 } else { 1318 MaybeOpenFdFromSelfExe(obj); 1319 } 1320 } 1321 1322 if (obj->fd < 0) { 1323 ABSL_RAW_LOG(WARNING, "%s: open failed: errno=%d", obj->filename, errno); 1324 return false; 1325 } 1326 obj->elf_type = FileGetElfType(obj->fd); 1327 if (obj->elf_type < 0) { 1328 ABSL_RAW_LOG(WARNING, "%s: wrong elf type: %d", obj->filename, 1329 obj->elf_type); 1330 return false; 1331 } 1332 1333 if (!ReadFromOffsetExact(obj->fd, &obj->elf_header, sizeof(obj->elf_header), 1334 0)) { 1335 ABSL_RAW_LOG(WARNING, "%s: failed to read elf header", obj->filename); 1336 return false; 1337 } 1338 const int phnum = obj->elf_header.e_phnum; 1339 const int phentsize = obj->elf_header.e_phentsize; 1340 auto phoff = static_cast<off_t>(obj->elf_header.e_phoff); 1341 size_t num_interesting_load_segments = 0; 1342 for (int j = 0; j < phnum; j++) { 1343 ElfW(Phdr) phdr; 1344 if (!ReadFromOffsetExact(obj->fd, &phdr, sizeof(phdr), phoff)) { 1345 ABSL_RAW_LOG(WARNING, "%s: failed to read program header %d", 1346 obj->filename, j); 1347 return false; 1348 } 1349 phoff += phentsize; 1350 1351#if defined(__powerpc__) && !(_CALL_ELF > 1) 1352 // On the PowerPC ELF v1 ABI, function pointers actually point to function 1353 // descriptors. These descriptors are stored in an .opd section, which is 1354 // mapped read-only. We thus need to look at all readable segments, not 1355 // just the executable ones. 1356 constexpr int interesting = PF_R; 1357#else 1358 constexpr int interesting = PF_X | PF_R; 1359#endif 1360 1361 if (phdr.p_type != PT_LOAD 1362 || (phdr.p_flags & interesting) != interesting) { 1363 // Not a LOAD segment, not executable code, and not a function 1364 // descriptor. 1365 continue; 1366 } 1367 if (num_interesting_load_segments < obj->phdr.size()) { 1368 memcpy(&obj->phdr[num_interesting_load_segments++], &phdr, sizeof(phdr)); 1369 } else { 1370 ABSL_RAW_LOG( 1371 WARNING, "%s: too many interesting LOAD segments: %zu >= %zu", 1372 obj->filename, num_interesting_load_segments, obj->phdr.size()); 1373 break; 1374 } 1375 } 1376 if (num_interesting_load_segments == 0) { 1377 // This object has no interesting LOAD segments. That's unexpected. 1378 ABSL_RAW_LOG(WARNING, "%s: no interesting LOAD segments", obj->filename); 1379 return false; 1380 } 1381 } 1382 return true; 1383} 1384 1385// The implementation of our symbolization routine. If it 1386// successfully finds the symbol containing "pc" and obtains the 1387// symbol name, returns pointer to that symbol. Otherwise, returns nullptr. 1388// If any symbol decorators have been installed via InstallSymbolDecorator(), 1389// they are called here as well. 1390// To keep stack consumption low, we would like this function to not 1391// get inlined. 1392const char *Symbolizer::GetUncachedSymbol(const void *pc) { 1393 ObjFile *const obj = FindObjFile(pc, 1); 1394 ptrdiff_t relocation = 0; 1395 int fd = -1; 1396 if (obj != nullptr) { 1397 if (MaybeInitializeObjFile(obj)) { 1398 const size_t start_addr = reinterpret_cast<size_t>(obj->start_addr); 1399 if (obj->elf_type == ET_DYN && start_addr >= obj->offset) { 1400 // This object was relocated. 1401 // 1402 // For obj->offset > 0, adjust the relocation since a mapping at offset 1403 // X in the file will have a start address of [true relocation]+X. 1404 relocation = static_cast<ptrdiff_t>(start_addr - obj->offset); 1405 1406 // Note: some binaries have multiple LOAD segments that can contain 1407 // function pointers. We must find the right one. 1408 ElfW(Phdr) *phdr = nullptr; 1409 for (size_t j = 0; j < obj->phdr.size(); j++) { 1410 ElfW(Phdr) &p = obj->phdr[j]; 1411 if (p.p_type != PT_LOAD) { 1412 // We only expect PT_LOADs. This must be PT_NULL that we didn't 1413 // write over (i.e. we exhausted all interesting PT_LOADs). 1414 ABSL_RAW_CHECK(p.p_type == PT_NULL, "unexpected p_type"); 1415 break; 1416 } 1417 if (pc < reinterpret_cast<void *>(start_addr + p.p_vaddr + p.p_memsz)) { 1418 phdr = &p; 1419 break; 1420 } 1421 } 1422 if (phdr == nullptr) { 1423 // That's unexpected. Hope for the best. 1424 ABSL_RAW_LOG( 1425 WARNING, 1426 "%s: unable to find LOAD segment for pc: %p, start_addr: %zx", 1427 obj->filename, pc, start_addr); 1428 } else { 1429 // Adjust relocation in case phdr.p_vaddr != 0. 1430 // This happens for binaries linked with `lld --rosegment`, and for 1431 // binaries linked with BFD `ld -z separate-code`. 1432 relocation -= phdr->p_vaddr - phdr->p_offset; 1433 } 1434 } 1435 1436 fd = obj->fd; 1437 if (GetSymbolFromObjectFile(*obj, pc, relocation, symbol_buf_, 1438 sizeof(symbol_buf_), tmp_buf_, 1439 sizeof(tmp_buf_)) == SYMBOL_FOUND) { 1440 // Only try to demangle the symbol name if it fit into symbol_buf_. 1441 DemangleInplace(symbol_buf_, sizeof(symbol_buf_), tmp_buf_, 1442 sizeof(tmp_buf_)); 1443 } 1444 } 1445 } else { 1446#if ABSL_HAVE_VDSO_SUPPORT 1447 VDSOSupport vdso; 1448 if (vdso.IsPresent()) { 1449 VDSOSupport::SymbolInfo symbol_info; 1450 if (vdso.LookupSymbolByAddress(pc, &symbol_info)) { 1451 // All VDSO symbols are known to be short. 1452 size_t len = strlen(symbol_info.name); 1453 ABSL_RAW_CHECK(len + 1 < sizeof(symbol_buf_), 1454 "VDSO symbol unexpectedly long"); 1455 memcpy(symbol_buf_, symbol_info.name, len + 1); 1456 } 1457 } 1458#endif 1459 } 1460 1461 if (g_decorators_mu.TryLock()) { 1462 if (g_num_decorators > 0) { 1463 SymbolDecoratorArgs decorator_args = { 1464 pc, relocation, fd, symbol_buf_, sizeof(symbol_buf_), 1465 tmp_buf_, sizeof(tmp_buf_), nullptr}; 1466 for (int i = 0; i < g_num_decorators; ++i) { 1467 decorator_args.arg = g_decorators[i].arg; 1468 g_decorators[i].fn(&decorator_args); 1469 } 1470 } 1471 g_decorators_mu.Unlock(); 1472 } 1473 if (symbol_buf_[0] == '\0') { 1474 return nullptr; 1475 } 1476 symbol_buf_[sizeof(symbol_buf_) - 1] = '\0'; // Paranoia. 1477 return InsertSymbolInCache(pc, symbol_buf_); 1478} 1479 1480const char *Symbolizer::GetSymbol(const void *pc) { 1481 const char *entry = FindSymbolInCache(pc); 1482 if (entry != nullptr) { 1483 return entry; 1484 } 1485 symbol_buf_[0] = '\0'; 1486 1487#ifdef __hppa__ 1488 { 1489 // In some contexts (e.g., return addresses), PA-RISC uses the lowest two 1490 // bits of the address to indicate the privilege level. Clear those bits 1491 // before trying to symbolize. 1492 const auto pc_bits = reinterpret_cast<uintptr_t>(pc); 1493 const auto address = pc_bits & ~0x3; 1494 entry = GetUncachedSymbol(reinterpret_cast<const void *>(address)); 1495 if (entry != nullptr) { 1496 return entry; 1497 } 1498 1499 // In some contexts, PA-RISC also uses bit 1 of the address to indicate that 1500 // this is a cross-DSO function pointer. Such function pointers actually 1501 // point to a procedure label, a struct whose first 32-bit (pointer) element 1502 // actually points to the function text. With no symbol found for this 1503 // address so far, try interpreting it as a cross-DSO function pointer and 1504 // see how that goes. 1505 if (pc_bits & 0x2) { 1506 return GetUncachedSymbol(*reinterpret_cast<const void *const *>(address)); 1507 } 1508 1509 return nullptr; 1510 } 1511#else 1512 return GetUncachedSymbol(pc); 1513#endif 1514} 1515 1516bool RemoveAllSymbolDecorators(void) { 1517 if (!g_decorators_mu.TryLock()) { 1518 // Someone else is using decorators. Get out. 1519 return false; 1520 } 1521 g_num_decorators = 0; 1522 g_decorators_mu.Unlock(); 1523 return true; 1524} 1525 1526bool RemoveSymbolDecorator(int ticket) { 1527 if (!g_decorators_mu.TryLock()) { 1528 // Someone else is using decorators. Get out. 1529 return false; 1530 } 1531 for (int i = 0; i < g_num_decorators; ++i) { 1532 if (g_decorators[i].ticket == ticket) { 1533 while (i < g_num_decorators - 1) { 1534 g_decorators[i] = g_decorators[i + 1]; 1535 ++i; 1536 } 1537 g_num_decorators = i; 1538 break; 1539 } 1540 } 1541 g_decorators_mu.Unlock(); 1542 return true; // Decorator is known to be removed. 1543} 1544 1545int InstallSymbolDecorator(SymbolDecorator decorator, void *arg) { 1546 static int ticket = 0; 1547 1548 if (!g_decorators_mu.TryLock()) { 1549 // Someone else is using decorators. Get out. 1550 return -2; 1551 } 1552 int ret = ticket; 1553 if (g_num_decorators >= kMaxDecorators) { 1554 ret = -1; 1555 } else { 1556 g_decorators[g_num_decorators] = {decorator, arg, ticket++}; 1557 ++g_num_decorators; 1558 } 1559 g_decorators_mu.Unlock(); 1560 return ret; 1561} 1562 1563bool RegisterFileMappingHint(const void *start, const void *end, uint64_t offset, 1564 const char *filename) { 1565 SAFE_ASSERT(start <= end); 1566 SAFE_ASSERT(filename != nullptr); 1567 1568 InitSigSafeArena(); 1569 1570 if (!g_file_mapping_mu.TryLock()) { 1571 return false; 1572 } 1573 1574 bool ret = true; 1575 if (g_num_file_mapping_hints >= kMaxFileMappingHints) { 1576 ret = false; 1577 } else { 1578 // TODO(ckennelly): Move this into a string copy routine. 1579 size_t len = strlen(filename); 1580 char *dst = static_cast<char *>( 1581 base_internal::LowLevelAlloc::AllocWithArena(len + 1, SigSafeArena())); 1582 ABSL_RAW_CHECK(dst != nullptr, "out of memory"); 1583 memcpy(dst, filename, len + 1); 1584 1585 auto &hint = g_file_mapping_hints[g_num_file_mapping_hints++]; 1586 hint.start = start; 1587 hint.end = end; 1588 hint.offset = offset; 1589 hint.filename = dst; 1590 } 1591 1592 g_file_mapping_mu.Unlock(); 1593 return ret; 1594} 1595 1596bool GetFileMappingHint(const void **start, const void **end, uint64_t *offset, 1597 const char **filename) { 1598 if (!g_file_mapping_mu.TryLock()) { 1599 return false; 1600 } 1601 bool found = false; 1602 for (int i = 0; i < g_num_file_mapping_hints; i++) { 1603 if (g_file_mapping_hints[i].start <= *start && 1604 *end <= g_file_mapping_hints[i].end) { 1605 // We assume that the start_address for the mapping is the base 1606 // address of the ELF section, but when [start_address,end_address) is 1607 // not strictly equal to [hint.start, hint.end), that assumption is 1608 // invalid. 1609 // 1610 // This uses the hint's start address (even though hint.start is not 1611 // necessarily equal to start_address) to ensure the correct 1612 // relocation is computed later. 1613 *start = g_file_mapping_hints[i].start; 1614 *end = g_file_mapping_hints[i].end; 1615 *offset = g_file_mapping_hints[i].offset; 1616 *filename = g_file_mapping_hints[i].filename; 1617 found = true; 1618 break; 1619 } 1620 } 1621 g_file_mapping_mu.Unlock(); 1622 return found; 1623} 1624 1625} // namespace debugging_internal 1626 1627bool Symbolize(const void *pc, char *out, int out_size) { 1628 // Symbolization is very slow under tsan. 1629 ABSL_ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN(); 1630 SAFE_ASSERT(out_size >= 0); 1631 debugging_internal::Symbolizer *s = debugging_internal::AllocateSymbolizer(); 1632 const char *name = s->GetSymbol(pc); 1633 bool ok = false; 1634 if (name != nullptr && out_size > 0) { 1635 strncpy(out, name, static_cast<size_t>(out_size)); 1636 ok = true; 1637 if (out[static_cast<size_t>(out_size) - 1] != '\0') { 1638 // strncpy() does not '\0' terminate when it truncates. Do so, with 1639 // trailing ellipsis. 1640 static constexpr char kEllipsis[] = "..."; 1641 size_t ellipsis_size = 1642 std::min(strlen(kEllipsis), static_cast<size_t>(out_size) - 1); 1643 memcpy(out + static_cast<size_t>(out_size) - ellipsis_size - 1, kEllipsis, 1644 ellipsis_size); 1645 out[static_cast<size_t>(out_size) - 1] = '\0'; 1646 } 1647 } 1648 debugging_internal::FreeSymbolizer(s); 1649 ABSL_ANNOTATE_IGNORE_READS_AND_WRITES_END(); 1650 return ok; 1651} 1652 1653ABSL_NAMESPACE_END 1654} // namespace absl 1655 1656extern "C" bool AbslInternalGetFileMappingHint(const void **start, 1657 const void **end, uint64_t *offset, 1658 const char **filename) { 1659 return absl::debugging_internal::GetFileMappingHint(start, end, offset, 1660 filename); 1661} 1662