1// Copyright 2018 The Abseil Authors. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// https://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15// This library provides Symbolize() function that symbolizes program 16// counters to their corresponding symbol names on linux platforms. 17// This library has a minimal implementation of an ELF symbol table 18// reader (i.e. it doesn't depend on libelf, etc.). 19// 20// The algorithm used in Symbolize() is as follows. 21// 22// 1. Go through a list of maps in /proc/self/maps and find the map 23// containing the program counter. 24// 25// 2. Open the mapped file and find a regular symbol table inside. 26// Iterate over symbols in the symbol table and look for the symbol 27// containing the program counter. If such a symbol is found, 28// obtain the symbol name, and demangle the symbol if possible. 29// If the symbol isn't found in the regular symbol table (binary is 30// stripped), try the same thing with a dynamic symbol table. 31// 32// Note that Symbolize() is originally implemented to be used in 33// signal handlers, hence it doesn't use malloc() and other unsafe 34// operations. It should be both thread-safe and async-signal-safe. 35// 36// Implementation note: 37// 38// We don't use heaps but only use stacks. We want to reduce the 39// stack consumption so that the symbolizer can run on small stacks. 40// 41// Here are some numbers collected with GCC 4.1.0 on x86: 42// - sizeof(Elf32_Sym) = 16 43// - sizeof(Elf32_Shdr) = 40 44// - sizeof(Elf64_Sym) = 24 45// - sizeof(Elf64_Shdr) = 64 46// 47// This implementation is intended to be async-signal-safe but uses some 48// functions which are not guaranteed to be so, such as memchr() and 49// memmove(). We assume they are async-signal-safe. 50 51#include <dlfcn.h> 52#include <elf.h> 53#include <fcntl.h> 54#include <link.h> // For ElfW() macro. 55#include <sys/stat.h> 56#include <sys/types.h> 57#include <unistd.h> 58 59#include <algorithm> 60#include <array> 61#include <atomic> 62#include <cerrno> 63#include <cinttypes> 64#include <climits> 65#include <cstdint> 66#include <cstdio> 67#include <cstdlib> 68#include <cstring> 69 70#include "absl/base/casts.h" 71#include "absl/base/dynamic_annotations.h" 72#include "absl/base/internal/low_level_alloc.h" 73#include "absl/base/internal/raw_logging.h" 74#include "absl/base/internal/spinlock.h" 75#include "absl/base/port.h" 76#include "absl/debugging/internal/demangle.h" 77#include "absl/debugging/internal/vdso_support.h" 78#include "absl/strings/string_view.h" 79 80#if defined(__FreeBSD__) && !defined(ElfW) 81#define ElfW(x) __ElfN(x) 82#endif 83 84namespace absl { 85ABSL_NAMESPACE_BEGIN 86 87// Value of argv[0]. Used by MaybeInitializeObjFile(). 88static char *argv0_value = nullptr; 89 90void InitializeSymbolizer(const char *argv0) { 91#ifdef ABSL_HAVE_VDSO_SUPPORT 92 // We need to make sure VDSOSupport::Init() is called before any setuid or 93 // chroot calls, so InitializeSymbolizer() should be called very early in the 94 // life of a program. 95 absl::debugging_internal::VDSOSupport::Init(); 96#endif 97 if (argv0_value != nullptr) { 98 free(argv0_value); 99 argv0_value = nullptr; 100 } 101 if (argv0 != nullptr && argv0[0] != '\0') { 102 argv0_value = strdup(argv0); 103 } 104} 105 106namespace debugging_internal { 107namespace { 108 109// Re-runs fn until it doesn't cause EINTR. 110#define NO_INTR(fn) \ 111 do { \ 112 } while ((fn) < 0 && errno == EINTR) 113 114// On Linux, ELF_ST_* are defined in <linux/elf.h>. To make this portable 115// we define our own ELF_ST_BIND and ELF_ST_TYPE if not available. 116#ifndef ELF_ST_BIND 117#define ELF_ST_BIND(info) (((unsigned char)(info)) >> 4) 118#endif 119 120#ifndef ELF_ST_TYPE 121#define ELF_ST_TYPE(info) (((unsigned char)(info)) & 0xF) 122#endif 123 124// Some platforms use a special .opd section to store function pointers. 125const char kOpdSectionName[] = ".opd"; 126 127#if (defined(__powerpc__) && !(_CALL_ELF > 1)) || defined(__ia64) 128// Use opd section for function descriptors on these platforms, the function 129// address is the first word of the descriptor. 130enum { kPlatformUsesOPDSections = 1 }; 131#else // not PPC or IA64 132enum { kPlatformUsesOPDSections = 0 }; 133#endif 134 135// This works for PowerPC & IA64 only. A function descriptor consist of two 136// pointers and the first one is the function's entry. 137const size_t kFunctionDescriptorSize = sizeof(void *) * 2; 138 139const int kMaxDecorators = 10; // Seems like a reasonable upper limit. 140 141struct InstalledSymbolDecorator { 142 SymbolDecorator fn; 143 void *arg; 144 int ticket; 145}; 146 147int g_num_decorators; 148InstalledSymbolDecorator g_decorators[kMaxDecorators]; 149 150struct FileMappingHint { 151 const void *start; 152 const void *end; 153 uint64_t offset; 154 const char *filename; 155}; 156 157// Protects g_decorators. 158// We are using SpinLock and not a Mutex here, because we may be called 159// from inside Mutex::Lock itself, and it prohibits recursive calls. 160// This happens in e.g. base/stacktrace_syscall_unittest. 161// Moreover, we are using only TryLock(), if the decorator list 162// is being modified (is busy), we skip all decorators, and possibly 163// loose some info. Sorry, that's the best we could do. 164ABSL_CONST_INIT absl::base_internal::SpinLock g_decorators_mu( 165 absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY); 166 167const int kMaxFileMappingHints = 8; 168int g_num_file_mapping_hints; 169FileMappingHint g_file_mapping_hints[kMaxFileMappingHints]; 170// Protects g_file_mapping_hints. 171ABSL_CONST_INIT absl::base_internal::SpinLock g_file_mapping_mu( 172 absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY); 173 174// Async-signal-safe function to zero a buffer. 175// memset() is not guaranteed to be async-signal-safe. 176static void SafeMemZero(void* p, size_t size) { 177 unsigned char *c = static_cast<unsigned char *>(p); 178 while (size--) { 179 *c++ = 0; 180 } 181} 182 183struct ObjFile { 184 ObjFile() 185 : filename(nullptr), 186 start_addr(nullptr), 187 end_addr(nullptr), 188 offset(0), 189 fd(-1), 190 elf_type(-1) { 191 SafeMemZero(&elf_header, sizeof(elf_header)); 192 SafeMemZero(&phdr[0], sizeof(phdr)); 193 } 194 195 char *filename; 196 const void *start_addr; 197 const void *end_addr; 198 uint64_t offset; 199 200 // The following fields are initialized on the first access to the 201 // object file. 202 int fd; 203 int elf_type; 204 ElfW(Ehdr) elf_header; 205 206 // PT_LOAD program header describing executable code. 207 // Normally we expect just one, but SWIFT binaries have two. 208 // CUDA binaries have 3 (see cr/473913254 description). 209 std::array<ElfW(Phdr), 4> phdr; 210}; 211 212// Build 4-way associative cache for symbols. Within each cache line, symbols 213// are replaced in LRU order. 214enum { 215 ASSOCIATIVITY = 4, 216}; 217struct SymbolCacheLine { 218 const void *pc[ASSOCIATIVITY]; 219 char *name[ASSOCIATIVITY]; 220 221 // age[i] is incremented when a line is accessed. it's reset to zero if the 222 // i'th entry is read. 223 uint32_t age[ASSOCIATIVITY]; 224}; 225 226// --------------------------------------------------------------- 227// An async-signal-safe arena for LowLevelAlloc 228static std::atomic<base_internal::LowLevelAlloc::Arena *> g_sig_safe_arena; 229 230static base_internal::LowLevelAlloc::Arena *SigSafeArena() { 231 return g_sig_safe_arena.load(std::memory_order_acquire); 232} 233 234static void InitSigSafeArena() { 235 if (SigSafeArena() == nullptr) { 236 base_internal::LowLevelAlloc::Arena *new_arena = 237 base_internal::LowLevelAlloc::NewArena( 238 base_internal::LowLevelAlloc::kAsyncSignalSafe); 239 base_internal::LowLevelAlloc::Arena *old_value = nullptr; 240 if (!g_sig_safe_arena.compare_exchange_strong(old_value, new_arena, 241 std::memory_order_release, 242 std::memory_order_relaxed)) { 243 // We lost a race to allocate an arena; deallocate. 244 base_internal::LowLevelAlloc::DeleteArena(new_arena); 245 } 246 } 247} 248 249// --------------------------------------------------------------- 250// An AddrMap is a vector of ObjFile, using SigSafeArena() for allocation. 251 252class AddrMap { 253 public: 254 AddrMap() : size_(0), allocated_(0), obj_(nullptr) {} 255 ~AddrMap() { base_internal::LowLevelAlloc::Free(obj_); } 256 size_t Size() const { return size_; } 257 ObjFile *At(size_t i) { return &obj_[i]; } 258 ObjFile *Add(); 259 void Clear(); 260 261 private: 262 size_t size_; // count of valid elements (<= allocated_) 263 size_t allocated_; // count of allocated elements 264 ObjFile *obj_; // array of allocated_ elements 265 AddrMap(const AddrMap &) = delete; 266 AddrMap &operator=(const AddrMap &) = delete; 267}; 268 269void AddrMap::Clear() { 270 for (size_t i = 0; i != size_; i++) { 271 At(i)->~ObjFile(); 272 } 273 size_ = 0; 274} 275 276ObjFile *AddrMap::Add() { 277 if (size_ == allocated_) { 278 size_t new_allocated = allocated_ * 2 + 50; 279 ObjFile *new_obj_ = 280 static_cast<ObjFile *>(base_internal::LowLevelAlloc::AllocWithArena( 281 new_allocated * sizeof(*new_obj_), SigSafeArena())); 282 if (obj_) { 283 memcpy(new_obj_, obj_, allocated_ * sizeof(*new_obj_)); 284 base_internal::LowLevelAlloc::Free(obj_); 285 } 286 obj_ = new_obj_; 287 allocated_ = new_allocated; 288 } 289 return new (&obj_[size_++]) ObjFile; 290} 291 292// --------------------------------------------------------------- 293 294enum FindSymbolResult { SYMBOL_NOT_FOUND = 1, SYMBOL_TRUNCATED, SYMBOL_FOUND }; 295 296class Symbolizer { 297 public: 298 Symbolizer(); 299 ~Symbolizer(); 300 const char *GetSymbol(const void *const pc); 301 302 private: 303 char *CopyString(const char *s) { 304 size_t len = strlen(s); 305 char *dst = static_cast<char *>( 306 base_internal::LowLevelAlloc::AllocWithArena(len + 1, SigSafeArena())); 307 ABSL_RAW_CHECK(dst != nullptr, "out of memory"); 308 memcpy(dst, s, len + 1); 309 return dst; 310 } 311 ObjFile *FindObjFile(const void *const start, 312 size_t size) ABSL_ATTRIBUTE_NOINLINE; 313 static bool RegisterObjFile(const char *filename, 314 const void *const start_addr, 315 const void *const end_addr, uint64_t offset, 316 void *arg); 317 SymbolCacheLine *GetCacheLine(const void *const pc); 318 const char *FindSymbolInCache(const void *const pc); 319 const char *InsertSymbolInCache(const void *const pc, const char *name); 320 void AgeSymbols(SymbolCacheLine *line); 321 void ClearAddrMap(); 322 FindSymbolResult GetSymbolFromObjectFile(const ObjFile &obj, 323 const void *const pc, 324 const ptrdiff_t relocation, 325 char *out, size_t out_size, 326 char *tmp_buf, size_t tmp_buf_size); 327 const char *GetUncachedSymbol(const void *pc); 328 329 enum { 330 SYMBOL_BUF_SIZE = 3072, 331 TMP_BUF_SIZE = 1024, 332 SYMBOL_CACHE_LINES = 128, 333 }; 334 335 AddrMap addr_map_; 336 337 bool ok_; 338 bool addr_map_read_; 339 340 char symbol_buf_[SYMBOL_BUF_SIZE]; 341 342 // tmp_buf_ will be used to store arrays of ElfW(Shdr) and ElfW(Sym) 343 // so we ensure that tmp_buf_ is properly aligned to store either. 344 alignas(16) char tmp_buf_[TMP_BUF_SIZE]; 345 static_assert(alignof(ElfW(Shdr)) <= 16, 346 "alignment of tmp buf too small for Shdr"); 347 static_assert(alignof(ElfW(Sym)) <= 16, 348 "alignment of tmp buf too small for Sym"); 349 350 SymbolCacheLine symbol_cache_[SYMBOL_CACHE_LINES]; 351}; 352 353static std::atomic<Symbolizer *> g_cached_symbolizer; 354 355} // namespace 356 357static size_t SymbolizerSize() { 358#if defined(__wasm__) || defined(__asmjs__) 359 auto pagesize = static_cast<size_t>(getpagesize()); 360#else 361 auto pagesize = static_cast<size_t>(sysconf(_SC_PAGESIZE)); 362#endif 363 return ((sizeof(Symbolizer) - 1) / pagesize + 1) * pagesize; 364} 365 366// Return (and set null) g_cached_symbolized_state if it is not null. 367// Otherwise return a new symbolizer. 368static Symbolizer *AllocateSymbolizer() { 369 InitSigSafeArena(); 370 Symbolizer *symbolizer = 371 g_cached_symbolizer.exchange(nullptr, std::memory_order_acquire); 372 if (symbolizer != nullptr) { 373 return symbolizer; 374 } 375 return new (base_internal::LowLevelAlloc::AllocWithArena( 376 SymbolizerSize(), SigSafeArena())) Symbolizer(); 377} 378 379// Set g_cached_symbolize_state to s if it is null, otherwise 380// delete s. 381static void FreeSymbolizer(Symbolizer *s) { 382 Symbolizer *old_cached_symbolizer = nullptr; 383 if (!g_cached_symbolizer.compare_exchange_strong(old_cached_symbolizer, s, 384 std::memory_order_release, 385 std::memory_order_relaxed)) { 386 s->~Symbolizer(); 387 base_internal::LowLevelAlloc::Free(s); 388 } 389} 390 391Symbolizer::Symbolizer() : ok_(true), addr_map_read_(false) { 392 for (SymbolCacheLine &symbol_cache_line : symbol_cache_) { 393 for (size_t j = 0; j < ABSL_ARRAYSIZE(symbol_cache_line.name); ++j) { 394 symbol_cache_line.pc[j] = nullptr; 395 symbol_cache_line.name[j] = nullptr; 396 symbol_cache_line.age[j] = 0; 397 } 398 } 399} 400 401Symbolizer::~Symbolizer() { 402 for (SymbolCacheLine &symbol_cache_line : symbol_cache_) { 403 for (char *s : symbol_cache_line.name) { 404 base_internal::LowLevelAlloc::Free(s); 405 } 406 } 407 ClearAddrMap(); 408} 409 410// We don't use assert() since it's not guaranteed to be 411// async-signal-safe. Instead we define a minimal assertion 412// macro. So far, we don't need pretty printing for __FILE__, etc. 413#define SAFE_ASSERT(expr) ((expr) ? static_cast<void>(0) : abort()) 414 415// Read up to "count" bytes from file descriptor "fd" into the buffer 416// starting at "buf" while handling short reads and EINTR. On 417// success, return the number of bytes read. Otherwise, return -1. 418static ssize_t ReadPersistent(int fd, void *buf, size_t count) { 419 SAFE_ASSERT(fd >= 0); 420 SAFE_ASSERT(count <= SSIZE_MAX); 421 char *buf0 = reinterpret_cast<char *>(buf); 422 size_t num_bytes = 0; 423 while (num_bytes < count) { 424 ssize_t len; 425 NO_INTR(len = read(fd, buf0 + num_bytes, count - num_bytes)); 426 if (len < 0) { // There was an error other than EINTR. 427 ABSL_RAW_LOG(WARNING, "read failed: errno=%d", errno); 428 return -1; 429 } 430 if (len == 0) { // Reached EOF. 431 break; 432 } 433 num_bytes += static_cast<size_t>(len); 434 } 435 SAFE_ASSERT(num_bytes <= count); 436 return static_cast<ssize_t>(num_bytes); 437} 438 439// Read up to "count" bytes from "offset" in the file pointed by file 440// descriptor "fd" into the buffer starting at "buf". On success, 441// return the number of bytes read. Otherwise, return -1. 442static ssize_t ReadFromOffset(const int fd, void *buf, const size_t count, 443 const off_t offset) { 444 off_t off = lseek(fd, offset, SEEK_SET); 445 if (off == (off_t)-1) { 446 ABSL_RAW_LOG(WARNING, "lseek(%d, %jd, SEEK_SET) failed: errno=%d", fd, 447 static_cast<intmax_t>(offset), errno); 448 return -1; 449 } 450 return ReadPersistent(fd, buf, count); 451} 452 453// Try reading exactly "count" bytes from "offset" bytes in a file 454// pointed by "fd" into the buffer starting at "buf" while handling 455// short reads and EINTR. On success, return true. Otherwise, return 456// false. 457static bool ReadFromOffsetExact(const int fd, void *buf, const size_t count, 458 const off_t offset) { 459 ssize_t len = ReadFromOffset(fd, buf, count, offset); 460 return len >= 0 && static_cast<size_t>(len) == count; 461} 462 463// Returns elf_header.e_type if the file pointed by fd is an ELF binary. 464static int FileGetElfType(const int fd) { 465 ElfW(Ehdr) elf_header; 466 if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) { 467 return -1; 468 } 469 if (memcmp(elf_header.e_ident, ELFMAG, SELFMAG) != 0) { 470 return -1; 471 } 472 return elf_header.e_type; 473} 474 475// Read the section headers in the given ELF binary, and if a section 476// of the specified type is found, set the output to this section header 477// and return true. Otherwise, return false. 478// To keep stack consumption low, we would like this function to not get 479// inlined. 480static ABSL_ATTRIBUTE_NOINLINE bool GetSectionHeaderByType( 481 const int fd, ElfW(Half) sh_num, const off_t sh_offset, ElfW(Word) type, 482 ElfW(Shdr) * out, char *tmp_buf, size_t tmp_buf_size) { 483 ElfW(Shdr) *buf = reinterpret_cast<ElfW(Shdr) *>(tmp_buf); 484 const size_t buf_entries = tmp_buf_size / sizeof(buf[0]); 485 const size_t buf_bytes = buf_entries * sizeof(buf[0]); 486 487 for (size_t i = 0; static_cast<int>(i) < sh_num;) { 488 const size_t num_bytes_left = 489 (static_cast<size_t>(sh_num) - i) * sizeof(buf[0]); 490 const size_t num_bytes_to_read = 491 (buf_bytes > num_bytes_left) ? num_bytes_left : buf_bytes; 492 const off_t offset = sh_offset + static_cast<off_t>(i * sizeof(buf[0])); 493 const ssize_t len = ReadFromOffset(fd, buf, num_bytes_to_read, offset); 494 if (len < 0) { 495 ABSL_RAW_LOG( 496 WARNING, 497 "Reading %zu bytes from offset %ju returned %zd which is negative.", 498 num_bytes_to_read, static_cast<intmax_t>(offset), len); 499 return false; 500 } 501 if (static_cast<size_t>(len) % sizeof(buf[0]) != 0) { 502 ABSL_RAW_LOG( 503 WARNING, 504 "Reading %zu bytes from offset %jd returned %zd which is not a " 505 "multiple of %zu.", 506 num_bytes_to_read, static_cast<intmax_t>(offset), len, 507 sizeof(buf[0])); 508 return false; 509 } 510 const size_t num_headers_in_buf = static_cast<size_t>(len) / sizeof(buf[0]); 511 SAFE_ASSERT(num_headers_in_buf <= buf_entries); 512 for (size_t j = 0; j < num_headers_in_buf; ++j) { 513 if (buf[j].sh_type == type) { 514 *out = buf[j]; 515 return true; 516 } 517 } 518 i += num_headers_in_buf; 519 } 520 return false; 521} 522 523// There is no particular reason to limit section name to 63 characters, 524// but there has (as yet) been no need for anything longer either. 525const int kMaxSectionNameLen = 64; 526 527bool ForEachSection(int fd, 528 const std::function<bool(absl::string_view name, 529 const ElfW(Shdr) &)> &callback) { 530 ElfW(Ehdr) elf_header; 531 if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) { 532 return false; 533 } 534 535 ElfW(Shdr) shstrtab; 536 off_t shstrtab_offset = static_cast<off_t>(elf_header.e_shoff) + 537 elf_header.e_shentsize * elf_header.e_shstrndx; 538 if (!ReadFromOffsetExact(fd, &shstrtab, sizeof(shstrtab), shstrtab_offset)) { 539 return false; 540 } 541 542 for (int i = 0; i < elf_header.e_shnum; ++i) { 543 ElfW(Shdr) out; 544 off_t section_header_offset = 545 static_cast<off_t>(elf_header.e_shoff) + elf_header.e_shentsize * i; 546 if (!ReadFromOffsetExact(fd, &out, sizeof(out), section_header_offset)) { 547 return false; 548 } 549 off_t name_offset = static_cast<off_t>(shstrtab.sh_offset) + out.sh_name; 550 char header_name[kMaxSectionNameLen]; 551 ssize_t n_read = 552 ReadFromOffset(fd, &header_name, kMaxSectionNameLen, name_offset); 553 if (n_read < 0) { 554 return false; 555 } else if (n_read > kMaxSectionNameLen) { 556 // Long read? 557 return false; 558 } 559 560 absl::string_view name(header_name, 561 strnlen(header_name, static_cast<size_t>(n_read))); 562 if (!callback(name, out)) { 563 break; 564 } 565 } 566 return true; 567} 568 569// name_len should include terminating '\0'. 570bool GetSectionHeaderByName(int fd, const char *name, size_t name_len, 571 ElfW(Shdr) * out) { 572 char header_name[kMaxSectionNameLen]; 573 if (sizeof(header_name) < name_len) { 574 ABSL_RAW_LOG(WARNING, 575 "Section name '%s' is too long (%zu); " 576 "section will not be found (even if present).", 577 name, name_len); 578 // No point in even trying. 579 return false; 580 } 581 582 ElfW(Ehdr) elf_header; 583 if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) { 584 return false; 585 } 586 587 ElfW(Shdr) shstrtab; 588 off_t shstrtab_offset = static_cast<off_t>(elf_header.e_shoff) + 589 elf_header.e_shentsize * elf_header.e_shstrndx; 590 if (!ReadFromOffsetExact(fd, &shstrtab, sizeof(shstrtab), shstrtab_offset)) { 591 return false; 592 } 593 594 for (int i = 0; i < elf_header.e_shnum; ++i) { 595 off_t section_header_offset = 596 static_cast<off_t>(elf_header.e_shoff) + elf_header.e_shentsize * i; 597 if (!ReadFromOffsetExact(fd, out, sizeof(*out), section_header_offset)) { 598 return false; 599 } 600 off_t name_offset = static_cast<off_t>(shstrtab.sh_offset) + out->sh_name; 601 ssize_t n_read = ReadFromOffset(fd, &header_name, name_len, name_offset); 602 if (n_read < 0) { 603 return false; 604 } else if (static_cast<size_t>(n_read) != name_len) { 605 // Short read -- name could be at end of file. 606 continue; 607 } 608 if (memcmp(header_name, name, name_len) == 0) { 609 return true; 610 } 611 } 612 return false; 613} 614 615// Compare symbols at in the same address. 616// Return true if we should pick symbol1. 617static bool ShouldPickFirstSymbol(const ElfW(Sym) & symbol1, 618 const ElfW(Sym) & symbol2) { 619 // If one of the symbols is weak and the other is not, pick the one 620 // this is not a weak symbol. 621 char bind1 = ELF_ST_BIND(symbol1.st_info); 622 char bind2 = ELF_ST_BIND(symbol1.st_info); 623 if (bind1 == STB_WEAK && bind2 != STB_WEAK) return false; 624 if (bind2 == STB_WEAK && bind1 != STB_WEAK) return true; 625 626 // If one of the symbols has zero size and the other is not, pick the 627 // one that has non-zero size. 628 if (symbol1.st_size != 0 && symbol2.st_size == 0) { 629 return true; 630 } 631 if (symbol1.st_size == 0 && symbol2.st_size != 0) { 632 return false; 633 } 634 635 // If one of the symbols has no type and the other is not, pick the 636 // one that has a type. 637 char type1 = ELF_ST_TYPE(symbol1.st_info); 638 char type2 = ELF_ST_TYPE(symbol1.st_info); 639 if (type1 != STT_NOTYPE && type2 == STT_NOTYPE) { 640 return true; 641 } 642 if (type1 == STT_NOTYPE && type2 != STT_NOTYPE) { 643 return false; 644 } 645 646 // Pick the first one, if we still cannot decide. 647 return true; 648} 649 650// Return true if an address is inside a section. 651static bool InSection(const void *address, const ElfW(Shdr) * section) { 652 const char *start = reinterpret_cast<const char *>(section->sh_addr); 653 size_t size = static_cast<size_t>(section->sh_size); 654 return start <= address && address < (start + size); 655} 656 657static const char *ComputeOffset(const char *base, ptrdiff_t offset) { 658 // Note: cast to intptr_t to avoid undefined behavior when base evaluates to 659 // zero and offset is non-zero. 660 return reinterpret_cast<const char *>(reinterpret_cast<intptr_t>(base) + 661 offset); 662} 663 664// Read a symbol table and look for the symbol containing the 665// pc. Iterate over symbols in a symbol table and look for the symbol 666// containing "pc". If the symbol is found, and its name fits in 667// out_size, the name is written into out and SYMBOL_FOUND is returned. 668// If the name does not fit, truncated name is written into out, 669// and SYMBOL_TRUNCATED is returned. Out is NUL-terminated. 670// If the symbol is not found, SYMBOL_NOT_FOUND is returned; 671// To keep stack consumption low, we would like this function to not get 672// inlined. 673static ABSL_ATTRIBUTE_NOINLINE FindSymbolResult FindSymbol( 674 const void *const pc, const int fd, char *out, size_t out_size, 675 ptrdiff_t relocation, const ElfW(Shdr) * strtab, const ElfW(Shdr) * symtab, 676 const ElfW(Shdr) * opd, char *tmp_buf, size_t tmp_buf_size) { 677 if (symtab == nullptr) { 678 return SYMBOL_NOT_FOUND; 679 } 680 681 // Read multiple symbols at once to save read() calls. 682 ElfW(Sym) *buf = reinterpret_cast<ElfW(Sym) *>(tmp_buf); 683 const size_t buf_entries = tmp_buf_size / sizeof(buf[0]); 684 685 const size_t num_symbols = symtab->sh_size / symtab->sh_entsize; 686 687 // On platforms using an .opd section (PowerPC & IA64), a function symbol 688 // has the address of a function descriptor, which contains the real 689 // starting address. However, we do not always want to use the real 690 // starting address because we sometimes want to symbolize a function 691 // pointer into the .opd section, e.g. FindSymbol(&foo,...). 692 const bool pc_in_opd = 693 kPlatformUsesOPDSections && opd != nullptr && InSection(pc, opd); 694 const bool deref_function_descriptor_pointer = 695 kPlatformUsesOPDSections && opd != nullptr && !pc_in_opd; 696 697 ElfW(Sym) best_match; 698 SafeMemZero(&best_match, sizeof(best_match)); 699 bool found_match = false; 700 for (size_t i = 0; i < num_symbols;) { 701 off_t offset = 702 static_cast<off_t>(symtab->sh_offset + i * symtab->sh_entsize); 703 const size_t num_remaining_symbols = num_symbols - i; 704 const size_t entries_in_chunk = 705 std::min(num_remaining_symbols, buf_entries); 706 const size_t bytes_in_chunk = entries_in_chunk * sizeof(buf[0]); 707 const ssize_t len = ReadFromOffset(fd, buf, bytes_in_chunk, offset); 708 SAFE_ASSERT(len >= 0); 709 SAFE_ASSERT(static_cast<size_t>(len) % sizeof(buf[0]) == 0); 710 const size_t num_symbols_in_buf = static_cast<size_t>(len) / sizeof(buf[0]); 711 SAFE_ASSERT(num_symbols_in_buf <= entries_in_chunk); 712 for (size_t j = 0; j < num_symbols_in_buf; ++j) { 713 const ElfW(Sym) &symbol = buf[j]; 714 715 // For a DSO, a symbol address is relocated by the loading address. 716 // We keep the original address for opd redirection below. 717 const char *const original_start_address = 718 reinterpret_cast<const char *>(symbol.st_value); 719 const char *start_address = 720 ComputeOffset(original_start_address, relocation); 721 722#ifdef __arm__ 723 // ARM functions are always aligned to multiples of two bytes; the 724 // lowest-order bit in start_address is ignored by the CPU and indicates 725 // whether the function contains ARM (0) or Thumb (1) code. We don't care 726 // about what encoding is being used; we just want the real start address 727 // of the function. 728 start_address = reinterpret_cast<const char *>( 729 reinterpret_cast<uintptr_t>(start_address) & ~1u); 730#endif 731 732 if (deref_function_descriptor_pointer && 733 InSection(original_start_address, opd)) { 734 // The opd section is mapped into memory. Just dereference 735 // start_address to get the first double word, which points to the 736 // function entry. 737 start_address = *reinterpret_cast<const char *const *>(start_address); 738 } 739 740 // If pc is inside the .opd section, it points to a function descriptor. 741 const size_t size = pc_in_opd ? kFunctionDescriptorSize : symbol.st_size; 742 const void *const end_address = 743 ComputeOffset(start_address, static_cast<ptrdiff_t>(size)); 744 if (symbol.st_value != 0 && // Skip null value symbols. 745 symbol.st_shndx != 0 && // Skip undefined symbols. 746#ifdef STT_TLS 747 ELF_ST_TYPE(symbol.st_info) != STT_TLS && // Skip thread-local data. 748#endif // STT_TLS 749 ((start_address <= pc && pc < end_address) || 750 (start_address == pc && pc == end_address))) { 751 if (!found_match || ShouldPickFirstSymbol(symbol, best_match)) { 752 found_match = true; 753 best_match = symbol; 754 } 755 } 756 } 757 i += num_symbols_in_buf; 758 } 759 760 if (found_match) { 761 const off_t off = 762 static_cast<off_t>(strtab->sh_offset) + best_match.st_name; 763 const ssize_t n_read = ReadFromOffset(fd, out, out_size, off); 764 if (n_read <= 0) { 765 // This should never happen. 766 ABSL_RAW_LOG(WARNING, 767 "Unable to read from fd %d at offset %lld: n_read = %zd", fd, 768 static_cast<long long>(off), n_read); 769 return SYMBOL_NOT_FOUND; 770 } 771 ABSL_RAW_CHECK(static_cast<size_t>(n_read) <= out_size, 772 "ReadFromOffset read too much data."); 773 774 // strtab->sh_offset points into .strtab-like section that contains 775 // NUL-terminated strings: '\0foo\0barbaz\0...". 776 // 777 // sh_offset+st_name points to the start of symbol name, but we don't know 778 // how long the symbol is, so we try to read as much as we have space for, 779 // and usually over-read (i.e. there is a NUL somewhere before n_read). 780 if (memchr(out, '\0', static_cast<size_t>(n_read)) == nullptr) { 781 // Either out_size was too small (n_read == out_size and no NUL), or 782 // we tried to read past the EOF (n_read < out_size) and .strtab is 783 // corrupt (missing terminating NUL; should never happen for valid ELF). 784 out[n_read - 1] = '\0'; 785 return SYMBOL_TRUNCATED; 786 } 787 return SYMBOL_FOUND; 788 } 789 790 return SYMBOL_NOT_FOUND; 791} 792 793// Get the symbol name of "pc" from the file pointed by "fd". Process 794// both regular and dynamic symbol tables if necessary. 795// See FindSymbol() comment for description of return value. 796FindSymbolResult Symbolizer::GetSymbolFromObjectFile( 797 const ObjFile &obj, const void *const pc, const ptrdiff_t relocation, 798 char *out, size_t out_size, char *tmp_buf, size_t tmp_buf_size) { 799 ElfW(Shdr) symtab; 800 ElfW(Shdr) strtab; 801 ElfW(Shdr) opd; 802 ElfW(Shdr) *opd_ptr = nullptr; 803 804 // On platforms using an .opd sections for function descriptor, read 805 // the section header. The .opd section is in data segment and should be 806 // loaded but we check that it is mapped just to be extra careful. 807 if (kPlatformUsesOPDSections) { 808 if (GetSectionHeaderByName(obj.fd, kOpdSectionName, 809 sizeof(kOpdSectionName) - 1, &opd) && 810 FindObjFile(reinterpret_cast<const char *>(opd.sh_addr) + relocation, 811 opd.sh_size) != nullptr) { 812 opd_ptr = &opd; 813 } else { 814 return SYMBOL_NOT_FOUND; 815 } 816 } 817 818 // Consult a regular symbol table, then fall back to the dynamic symbol table. 819 for (const auto symbol_table_type : {SHT_SYMTAB, SHT_DYNSYM}) { 820 if (!GetSectionHeaderByType(obj.fd, obj.elf_header.e_shnum, 821 static_cast<off_t>(obj.elf_header.e_shoff), 822 static_cast<ElfW(Word)>(symbol_table_type), 823 &symtab, tmp_buf, tmp_buf_size)) { 824 continue; 825 } 826 if (!ReadFromOffsetExact( 827 obj.fd, &strtab, sizeof(strtab), 828 static_cast<off_t>(obj.elf_header.e_shoff + 829 symtab.sh_link * sizeof(symtab)))) { 830 continue; 831 } 832 const FindSymbolResult rc = 833 FindSymbol(pc, obj.fd, out, out_size, relocation, &strtab, &symtab, 834 opd_ptr, tmp_buf, tmp_buf_size); 835 if (rc != SYMBOL_NOT_FOUND) { 836 return rc; 837 } 838 } 839 840 return SYMBOL_NOT_FOUND; 841} 842 843namespace { 844// Thin wrapper around a file descriptor so that the file descriptor 845// gets closed for sure. 846class FileDescriptor { 847 public: 848 explicit FileDescriptor(int fd) : fd_(fd) {} 849 FileDescriptor(const FileDescriptor &) = delete; 850 FileDescriptor &operator=(const FileDescriptor &) = delete; 851 852 ~FileDescriptor() { 853 if (fd_ >= 0) { 854 close(fd_); 855 } 856 } 857 858 int get() const { return fd_; } 859 860 private: 861 const int fd_; 862}; 863 864// Helper class for reading lines from file. 865// 866// Note: we don't use ProcMapsIterator since the object is big (it has 867// a 5k array member) and uses async-unsafe functions such as sscanf() 868// and snprintf(). 869class LineReader { 870 public: 871 explicit LineReader(int fd, char *buf, size_t buf_len) 872 : fd_(fd), 873 buf_len_(buf_len), 874 buf_(buf), 875 bol_(buf), 876 eol_(buf), 877 eod_(buf) {} 878 879 LineReader(const LineReader &) = delete; 880 LineReader &operator=(const LineReader &) = delete; 881 882 // Read '\n'-terminated line from file. On success, modify "bol" 883 // and "eol", then return true. Otherwise, return false. 884 // 885 // Note: if the last line doesn't end with '\n', the line will be 886 // dropped. It's an intentional behavior to make the code simple. 887 bool ReadLine(const char **bol, const char **eol) { 888 if (BufferIsEmpty()) { // First time. 889 const ssize_t num_bytes = ReadPersistent(fd_, buf_, buf_len_); 890 if (num_bytes <= 0) { // EOF or error. 891 return false; 892 } 893 eod_ = buf_ + num_bytes; 894 bol_ = buf_; 895 } else { 896 bol_ = eol_ + 1; // Advance to the next line in the buffer. 897 SAFE_ASSERT(bol_ <= eod_); // "bol_" can point to "eod_". 898 if (!HasCompleteLine()) { 899 const auto incomplete_line_length = static_cast<size_t>(eod_ - bol_); 900 // Move the trailing incomplete line to the beginning. 901 memmove(buf_, bol_, incomplete_line_length); 902 // Read text from file and append it. 903 char *const append_pos = buf_ + incomplete_line_length; 904 const size_t capacity_left = buf_len_ - incomplete_line_length; 905 const ssize_t num_bytes = 906 ReadPersistent(fd_, append_pos, capacity_left); 907 if (num_bytes <= 0) { // EOF or error. 908 return false; 909 } 910 eod_ = append_pos + num_bytes; 911 bol_ = buf_; 912 } 913 } 914 eol_ = FindLineFeed(); 915 if (eol_ == nullptr) { // '\n' not found. Malformed line. 916 return false; 917 } 918 *eol_ = '\0'; // Replace '\n' with '\0'. 919 920 *bol = bol_; 921 *eol = eol_; 922 return true; 923 } 924 925 private: 926 char *FindLineFeed() const { 927 return reinterpret_cast<char *>( 928 memchr(bol_, '\n', static_cast<size_t>(eod_ - bol_))); 929 } 930 931 bool BufferIsEmpty() const { return buf_ == eod_; } 932 933 bool HasCompleteLine() const { 934 return !BufferIsEmpty() && FindLineFeed() != nullptr; 935 } 936 937 const int fd_; 938 const size_t buf_len_; 939 char *const buf_; 940 char *bol_; 941 char *eol_; 942 const char *eod_; // End of data in "buf_". 943}; 944} // namespace 945 946// Place the hex number read from "start" into "*hex". The pointer to 947// the first non-hex character or "end" is returned. 948static const char *GetHex(const char *start, const char *end, 949 uint64_t *const value) { 950 uint64_t hex = 0; 951 const char *p; 952 for (p = start; p < end; ++p) { 953 int ch = *p; 954 if ((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'F') || 955 (ch >= 'a' && ch <= 'f')) { 956 hex = (hex << 4) | 957 static_cast<uint64_t>(ch < 'A' ? ch - '0' : (ch & 0xF) + 9); 958 } else { // Encountered the first non-hex character. 959 break; 960 } 961 } 962 SAFE_ASSERT(p <= end); 963 *value = hex; 964 return p; 965} 966 967static const char *GetHex(const char *start, const char *end, 968 const void **const addr) { 969 uint64_t hex = 0; 970 const char *p = GetHex(start, end, &hex); 971 *addr = reinterpret_cast<void *>(hex); 972 return p; 973} 974 975// Normally we are only interested in "r?x" maps. 976// On the PowerPC, function pointers point to descriptors in the .opd 977// section. The descriptors themselves are not executable code, so 978// we need to relax the check below to "r??". 979static bool ShouldUseMapping(const char *const flags) { 980 return flags[0] == 'r' && (kPlatformUsesOPDSections || flags[2] == 'x'); 981} 982 983// Read /proc/self/maps and run "callback" for each mmapped file found. If 984// "callback" returns false, stop scanning and return true. Else continue 985// scanning /proc/self/maps. Return true if no parse error is found. 986static ABSL_ATTRIBUTE_NOINLINE bool ReadAddrMap( 987 bool (*callback)(const char *filename, const void *const start_addr, 988 const void *const end_addr, uint64_t offset, void *arg), 989 void *arg, void *tmp_buf, size_t tmp_buf_size) { 990 // Use /proc/self/task/<pid>/maps instead of /proc/self/maps. The latter 991 // requires kernel to stop all threads, and is significantly slower when there 992 // are 1000s of threads. 993 char maps_path[80]; 994 snprintf(maps_path, sizeof(maps_path), "/proc/self/task/%d/maps", getpid()); 995 996 int maps_fd; 997 NO_INTR(maps_fd = open(maps_path, O_RDONLY)); 998 FileDescriptor wrapped_maps_fd(maps_fd); 999 if (wrapped_maps_fd.get() < 0) { 1000 ABSL_RAW_LOG(WARNING, "%s: errno=%d", maps_path, errno); 1001 return false; 1002 } 1003 1004 // Iterate over maps and look for the map containing the pc. Then 1005 // look into the symbol tables inside. 1006 LineReader reader(wrapped_maps_fd.get(), static_cast<char *>(tmp_buf), 1007 tmp_buf_size); 1008 while (true) { 1009 const char *cursor; 1010 const char *eol; 1011 if (!reader.ReadLine(&cursor, &eol)) { // EOF or malformed line. 1012 break; 1013 } 1014 1015 const char *line = cursor; 1016 const void *start_address; 1017 // Start parsing line in /proc/self/maps. Here is an example: 1018 // 1019 // 08048000-0804c000 r-xp 00000000 08:01 2142121 /bin/cat 1020 // 1021 // We want start address (08048000), end address (0804c000), flags 1022 // (r-xp) and file name (/bin/cat). 1023 1024 // Read start address. 1025 cursor = GetHex(cursor, eol, &start_address); 1026 if (cursor == eol || *cursor != '-') { 1027 ABSL_RAW_LOG(WARNING, "Corrupt /proc/self/maps line: %s", line); 1028 return false; 1029 } 1030 ++cursor; // Skip '-'. 1031 1032 // Read end address. 1033 const void *end_address; 1034 cursor = GetHex(cursor, eol, &end_address); 1035 if (cursor == eol || *cursor != ' ') { 1036 ABSL_RAW_LOG(WARNING, "Corrupt /proc/self/maps line: %s", line); 1037 return false; 1038 } 1039 ++cursor; // Skip ' '. 1040 1041 // Read flags. Skip flags until we encounter a space or eol. 1042 const char *const flags_start = cursor; 1043 while (cursor < eol && *cursor != ' ') { 1044 ++cursor; 1045 } 1046 // We expect at least four letters for flags (ex. "r-xp"). 1047 if (cursor == eol || cursor < flags_start + 4) { 1048 ABSL_RAW_LOG(WARNING, "Corrupt /proc/self/maps: %s", line); 1049 return false; 1050 } 1051 1052 // Check flags. 1053 if (!ShouldUseMapping(flags_start)) { 1054 continue; // We skip this map. 1055 } 1056 ++cursor; // Skip ' '. 1057 1058 // Read file offset. 1059 uint64_t offset; 1060 cursor = GetHex(cursor, eol, &offset); 1061 ++cursor; // Skip ' '. 1062 1063 // Skip to file name. "cursor" now points to dev. We need to skip at least 1064 // two spaces for dev and inode. 1065 int num_spaces = 0; 1066 while (cursor < eol) { 1067 if (*cursor == ' ') { 1068 ++num_spaces; 1069 } else if (num_spaces >= 2) { 1070 // The first non-space character after skipping two spaces 1071 // is the beginning of the file name. 1072 break; 1073 } 1074 ++cursor; 1075 } 1076 1077 // Check whether this entry corresponds to our hint table for the true 1078 // filename. 1079 bool hinted = 1080 GetFileMappingHint(&start_address, &end_address, &offset, &cursor); 1081 if (!hinted && (cursor == eol || cursor[0] == '[')) { 1082 // not an object file, typically [vdso] or [vsyscall] 1083 continue; 1084 } 1085 if (!callback(cursor, start_address, end_address, offset, arg)) break; 1086 } 1087 return true; 1088} 1089 1090// Find the objfile mapped in address region containing [addr, addr + len). 1091ObjFile *Symbolizer::FindObjFile(const void *const addr, size_t len) { 1092 for (int i = 0; i < 2; ++i) { 1093 if (!ok_) return nullptr; 1094 1095 // Read /proc/self/maps if necessary 1096 if (!addr_map_read_) { 1097 addr_map_read_ = true; 1098 if (!ReadAddrMap(RegisterObjFile, this, tmp_buf_, TMP_BUF_SIZE)) { 1099 ok_ = false; 1100 return nullptr; 1101 } 1102 } 1103 1104 size_t lo = 0; 1105 size_t hi = addr_map_.Size(); 1106 while (lo < hi) { 1107 size_t mid = (lo + hi) / 2; 1108 if (addr < addr_map_.At(mid)->end_addr) { 1109 hi = mid; 1110 } else { 1111 lo = mid + 1; 1112 } 1113 } 1114 if (lo != addr_map_.Size()) { 1115 ObjFile *obj = addr_map_.At(lo); 1116 SAFE_ASSERT(obj->end_addr > addr); 1117 if (addr >= obj->start_addr && 1118 reinterpret_cast<const char *>(addr) + len <= obj->end_addr) 1119 return obj; 1120 } 1121 1122 // The address mapping may have changed since it was last read. Retry. 1123 ClearAddrMap(); 1124 } 1125 return nullptr; 1126} 1127 1128void Symbolizer::ClearAddrMap() { 1129 for (size_t i = 0; i != addr_map_.Size(); i++) { 1130 ObjFile *o = addr_map_.At(i); 1131 base_internal::LowLevelAlloc::Free(o->filename); 1132 if (o->fd >= 0) { 1133 close(o->fd); 1134 } 1135 } 1136 addr_map_.Clear(); 1137 addr_map_read_ = false; 1138} 1139 1140// Callback for ReadAddrMap to register objfiles in an in-memory table. 1141bool Symbolizer::RegisterObjFile(const char *filename, 1142 const void *const start_addr, 1143 const void *const end_addr, uint64_t offset, 1144 void *arg) { 1145 Symbolizer *impl = static_cast<Symbolizer *>(arg); 1146 1147 // Files are supposed to be added in the increasing address order. Make 1148 // sure that's the case. 1149 size_t addr_map_size = impl->addr_map_.Size(); 1150 if (addr_map_size != 0) { 1151 ObjFile *old = impl->addr_map_.At(addr_map_size - 1); 1152 if (old->end_addr > end_addr) { 1153 ABSL_RAW_LOG(ERROR, 1154 "Unsorted addr map entry: 0x%" PRIxPTR ": %s <-> 0x%" PRIxPTR 1155 ": %s", 1156 reinterpret_cast<uintptr_t>(end_addr), filename, 1157 reinterpret_cast<uintptr_t>(old->end_addr), old->filename); 1158 return true; 1159 } else if (old->end_addr == end_addr) { 1160 // The same entry appears twice. This sometimes happens for [vdso]. 1161 if (old->start_addr != start_addr || 1162 strcmp(old->filename, filename) != 0) { 1163 ABSL_RAW_LOG(ERROR, 1164 "Duplicate addr 0x%" PRIxPTR ": %s <-> 0x%" PRIxPTR ": %s", 1165 reinterpret_cast<uintptr_t>(end_addr), filename, 1166 reinterpret_cast<uintptr_t>(old->end_addr), old->filename); 1167 } 1168 return true; 1169 } else if (old->end_addr == start_addr && 1170 reinterpret_cast<uintptr_t>(old->start_addr) - old->offset == 1171 reinterpret_cast<uintptr_t>(start_addr) - offset && 1172 strcmp(old->filename, filename) == 0) { 1173 // Two contiguous map entries that span a contiguous region of the file, 1174 // perhaps because some part of the file was mlock()ed. Combine them. 1175 old->end_addr = end_addr; 1176 return true; 1177 } 1178 } 1179 ObjFile *obj = impl->addr_map_.Add(); 1180 obj->filename = impl->CopyString(filename); 1181 obj->start_addr = start_addr; 1182 obj->end_addr = end_addr; 1183 obj->offset = offset; 1184 obj->elf_type = -1; // filled on demand 1185 obj->fd = -1; // opened on demand 1186 return true; 1187} 1188 1189// This function wraps the Demangle function to provide an interface 1190// where the input symbol is demangled in-place. 1191// To keep stack consumption low, we would like this function to not 1192// get inlined. 1193static ABSL_ATTRIBUTE_NOINLINE void DemangleInplace(char *out, size_t out_size, 1194 char *tmp_buf, 1195 size_t tmp_buf_size) { 1196 if (Demangle(out, tmp_buf, tmp_buf_size)) { 1197 // Demangling succeeded. Copy to out if the space allows. 1198 size_t len = strlen(tmp_buf); 1199 if (len + 1 <= out_size) { // +1 for '\0'. 1200 SAFE_ASSERT(len < tmp_buf_size); 1201 memmove(out, tmp_buf, len + 1); 1202 } 1203 } 1204} 1205 1206SymbolCacheLine *Symbolizer::GetCacheLine(const void *const pc) { 1207 uintptr_t pc0 = reinterpret_cast<uintptr_t>(pc); 1208 pc0 >>= 3; // drop the low 3 bits 1209 1210 // Shuffle bits. 1211 pc0 ^= (pc0 >> 6) ^ (pc0 >> 12) ^ (pc0 >> 18); 1212 return &symbol_cache_[pc0 % SYMBOL_CACHE_LINES]; 1213} 1214 1215void Symbolizer::AgeSymbols(SymbolCacheLine *line) { 1216 for (uint32_t &age : line->age) { 1217 ++age; 1218 } 1219} 1220 1221const char *Symbolizer::FindSymbolInCache(const void *const pc) { 1222 if (pc == nullptr) return nullptr; 1223 1224 SymbolCacheLine *line = GetCacheLine(pc); 1225 for (size_t i = 0; i < ABSL_ARRAYSIZE(line->pc); ++i) { 1226 if (line->pc[i] == pc) { 1227 AgeSymbols(line); 1228 line->age[i] = 0; 1229 return line->name[i]; 1230 } 1231 } 1232 return nullptr; 1233} 1234 1235const char *Symbolizer::InsertSymbolInCache(const void *const pc, 1236 const char *name) { 1237 SAFE_ASSERT(pc != nullptr); 1238 1239 SymbolCacheLine *line = GetCacheLine(pc); 1240 uint32_t max_age = 0; 1241 size_t oldest_index = 0; 1242 bool found_oldest_index = false; 1243 for (size_t i = 0; i < ABSL_ARRAYSIZE(line->pc); ++i) { 1244 if (line->pc[i] == nullptr) { 1245 AgeSymbols(line); 1246 line->pc[i] = pc; 1247 line->name[i] = CopyString(name); 1248 line->age[i] = 0; 1249 return line->name[i]; 1250 } 1251 if (line->age[i] >= max_age) { 1252 max_age = line->age[i]; 1253 oldest_index = i; 1254 found_oldest_index = true; 1255 } 1256 } 1257 1258 AgeSymbols(line); 1259 ABSL_RAW_CHECK(found_oldest_index, "Corrupt cache"); 1260 base_internal::LowLevelAlloc::Free(line->name[oldest_index]); 1261 line->pc[oldest_index] = pc; 1262 line->name[oldest_index] = CopyString(name); 1263 line->age[oldest_index] = 0; 1264 return line->name[oldest_index]; 1265} 1266 1267static void MaybeOpenFdFromSelfExe(ObjFile *obj) { 1268 if (memcmp(obj->start_addr, ELFMAG, SELFMAG) != 0) { 1269 return; 1270 } 1271 int fd = open("/proc/self/exe", O_RDONLY); 1272 if (fd == -1) { 1273 return; 1274 } 1275 // Verify that contents of /proc/self/exe matches in-memory image of 1276 // the binary. This can fail if the "deleted" binary is in fact not 1277 // the main executable, or for binaries that have the first PT_LOAD 1278 // segment smaller than 4K. We do it in four steps so that the 1279 // buffer is smaller and we don't consume too much stack space. 1280 const char *mem = reinterpret_cast<const char *>(obj->start_addr); 1281 for (int i = 0; i < 4; ++i) { 1282 char buf[1024]; 1283 ssize_t n = read(fd, buf, sizeof(buf)); 1284 if (n != sizeof(buf) || memcmp(buf, mem, sizeof(buf)) != 0) { 1285 close(fd); 1286 return; 1287 } 1288 mem += sizeof(buf); 1289 } 1290 obj->fd = fd; 1291} 1292 1293static bool MaybeInitializeObjFile(ObjFile *obj) { 1294 if (obj->fd < 0) { 1295 obj->fd = open(obj->filename, O_RDONLY); 1296 1297 if (obj->fd < 0) { 1298 // Getting /proc/self/exe here means that we were hinted. 1299 if (strcmp(obj->filename, "/proc/self/exe") == 0) { 1300 // /proc/self/exe may be inaccessible (due to setuid, etc.), so try 1301 // accessing the binary via argv0. 1302 if (argv0_value != nullptr) { 1303 obj->fd = open(argv0_value, O_RDONLY); 1304 } 1305 } else { 1306 MaybeOpenFdFromSelfExe(obj); 1307 } 1308 } 1309 1310 if (obj->fd < 0) { 1311 ABSL_RAW_LOG(WARNING, "%s: open failed: errno=%d", obj->filename, errno); 1312 return false; 1313 } 1314 obj->elf_type = FileGetElfType(obj->fd); 1315 if (obj->elf_type < 0) { 1316 ABSL_RAW_LOG(WARNING, "%s: wrong elf type: %d", obj->filename, 1317 obj->elf_type); 1318 return false; 1319 } 1320 1321 if (!ReadFromOffsetExact(obj->fd, &obj->elf_header, sizeof(obj->elf_header), 1322 0)) { 1323 ABSL_RAW_LOG(WARNING, "%s: failed to read elf header", obj->filename); 1324 return false; 1325 } 1326 const int phnum = obj->elf_header.e_phnum; 1327 const int phentsize = obj->elf_header.e_phentsize; 1328 auto phoff = static_cast<off_t>(obj->elf_header.e_phoff); 1329 size_t num_executable_load_segments = 0; 1330 for (int j = 0; j < phnum; j++) { 1331 ElfW(Phdr) phdr; 1332 if (!ReadFromOffsetExact(obj->fd, &phdr, sizeof(phdr), phoff)) { 1333 ABSL_RAW_LOG(WARNING, "%s: failed to read program header %d", 1334 obj->filename, j); 1335 return false; 1336 } 1337 phoff += phentsize; 1338 constexpr int rx = PF_X | PF_R; 1339 if (phdr.p_type != PT_LOAD || (phdr.p_flags & rx) != rx) { 1340 // Not a LOAD segment, or not executable code. 1341 continue; 1342 } 1343 if (num_executable_load_segments < obj->phdr.size()) { 1344 memcpy(&obj->phdr[num_executable_load_segments++], &phdr, sizeof(phdr)); 1345 } else { 1346 ABSL_RAW_LOG( 1347 WARNING, "%s: too many executable LOAD segments: %zu >= %zu", 1348 obj->filename, num_executable_load_segments, obj->phdr.size()); 1349 break; 1350 } 1351 } 1352 if (num_executable_load_segments == 0) { 1353 // This object has no "r-x" LOAD segments. That's unexpected. 1354 ABSL_RAW_LOG(WARNING, "%s: no executable LOAD segments", obj->filename); 1355 return false; 1356 } 1357 } 1358 return true; 1359} 1360 1361// The implementation of our symbolization routine. If it 1362// successfully finds the symbol containing "pc" and obtains the 1363// symbol name, returns pointer to that symbol. Otherwise, returns nullptr. 1364// If any symbol decorators have been installed via InstallSymbolDecorator(), 1365// they are called here as well. 1366// To keep stack consumption low, we would like this function to not 1367// get inlined. 1368const char *Symbolizer::GetUncachedSymbol(const void *pc) { 1369 ObjFile *const obj = FindObjFile(pc, 1); 1370 ptrdiff_t relocation = 0; 1371 int fd = -1; 1372 if (obj != nullptr) { 1373 if (MaybeInitializeObjFile(obj)) { 1374 const size_t start_addr = reinterpret_cast<size_t>(obj->start_addr); 1375 if (obj->elf_type == ET_DYN && start_addr >= obj->offset) { 1376 // This object was relocated. 1377 // 1378 // For obj->offset > 0, adjust the relocation since a mapping at offset 1379 // X in the file will have a start address of [true relocation]+X. 1380 relocation = static_cast<ptrdiff_t>(start_addr - obj->offset); 1381 1382 // Note: some binaries have multiple "rx" LOAD segments. We must 1383 // find the right one. 1384 ElfW(Phdr) *phdr = nullptr; 1385 for (size_t j = 0; j < obj->phdr.size(); j++) { 1386 ElfW(Phdr) &p = obj->phdr[j]; 1387 if (p.p_type != PT_LOAD) { 1388 // We only expect PT_LOADs. This must be PT_NULL that we didn't 1389 // write over (i.e. we exhausted all interesting PT_LOADs). 1390 ABSL_RAW_CHECK(p.p_type == PT_NULL, "unexpected p_type"); 1391 break; 1392 } 1393 if (pc < reinterpret_cast<void *>(start_addr + p.p_memsz)) { 1394 phdr = &p; 1395 break; 1396 } 1397 } 1398 if (phdr == nullptr) { 1399 // That's unexpected. Hope for the best. 1400 ABSL_RAW_LOG( 1401 WARNING, 1402 "%s: unable to find LOAD segment for pc: %p, start_addr: %zx", 1403 obj->filename, pc, start_addr); 1404 } else { 1405 // Adjust relocation in case phdr.p_vaddr != 0. 1406 // This happens for binaries linked with `lld --rosegment`, and for 1407 // binaries linked with BFD `ld -z separate-code`. 1408 relocation -= phdr->p_vaddr - phdr->p_offset; 1409 } 1410 } 1411 1412 fd = obj->fd; 1413 if (GetSymbolFromObjectFile(*obj, pc, relocation, symbol_buf_, 1414 sizeof(symbol_buf_), tmp_buf_, 1415 sizeof(tmp_buf_)) == SYMBOL_FOUND) { 1416 // Only try to demangle the symbol name if it fit into symbol_buf_. 1417 DemangleInplace(symbol_buf_, sizeof(symbol_buf_), tmp_buf_, 1418 sizeof(tmp_buf_)); 1419 } 1420 } 1421 } else { 1422#if ABSL_HAVE_VDSO_SUPPORT 1423 VDSOSupport vdso; 1424 if (vdso.IsPresent()) { 1425 VDSOSupport::SymbolInfo symbol_info; 1426 if (vdso.LookupSymbolByAddress(pc, &symbol_info)) { 1427 // All VDSO symbols are known to be short. 1428 size_t len = strlen(symbol_info.name); 1429 ABSL_RAW_CHECK(len + 1 < sizeof(symbol_buf_), 1430 "VDSO symbol unexpectedly long"); 1431 memcpy(symbol_buf_, symbol_info.name, len + 1); 1432 } 1433 } 1434#endif 1435 } 1436 1437 if (g_decorators_mu.TryLock()) { 1438 if (g_num_decorators > 0) { 1439 SymbolDecoratorArgs decorator_args = { 1440 pc, relocation, fd, symbol_buf_, sizeof(symbol_buf_), 1441 tmp_buf_, sizeof(tmp_buf_), nullptr}; 1442 for (int i = 0; i < g_num_decorators; ++i) { 1443 decorator_args.arg = g_decorators[i].arg; 1444 g_decorators[i].fn(&decorator_args); 1445 } 1446 } 1447 g_decorators_mu.Unlock(); 1448 } 1449 if (symbol_buf_[0] == '\0') { 1450 return nullptr; 1451 } 1452 symbol_buf_[sizeof(symbol_buf_) - 1] = '\0'; // Paranoia. 1453 return InsertSymbolInCache(pc, symbol_buf_); 1454} 1455 1456const char *Symbolizer::GetSymbol(const void *pc) { 1457 const char *entry = FindSymbolInCache(pc); 1458 if (entry != nullptr) { 1459 return entry; 1460 } 1461 symbol_buf_[0] = '\0'; 1462 1463#ifdef __hppa__ 1464 { 1465 // In some contexts (e.g., return addresses), PA-RISC uses the lowest two 1466 // bits of the address to indicate the privilege level. Clear those bits 1467 // before trying to symbolize. 1468 const auto pc_bits = reinterpret_cast<uintptr_t>(pc); 1469 const auto address = pc_bits & ~0x3; 1470 entry = GetUncachedSymbol(reinterpret_cast<const void *>(address)); 1471 if (entry != nullptr) { 1472 return entry; 1473 } 1474 1475 // In some contexts, PA-RISC also uses bit 1 of the address to indicate that 1476 // this is a cross-DSO function pointer. Such function pointers actually 1477 // point to a procedure label, a struct whose first 32-bit (pointer) element 1478 // actually points to the function text. With no symbol found for this 1479 // address so far, try interpreting it as a cross-DSO function pointer and 1480 // see how that goes. 1481 if (pc_bits & 0x2) { 1482 return GetUncachedSymbol(*reinterpret_cast<const void *const *>(address)); 1483 } 1484 1485 return nullptr; 1486 } 1487#else 1488 return GetUncachedSymbol(pc); 1489#endif 1490} 1491 1492bool RemoveAllSymbolDecorators(void) { 1493 if (!g_decorators_mu.TryLock()) { 1494 // Someone else is using decorators. Get out. 1495 return false; 1496 } 1497 g_num_decorators = 0; 1498 g_decorators_mu.Unlock(); 1499 return true; 1500} 1501 1502bool RemoveSymbolDecorator(int ticket) { 1503 if (!g_decorators_mu.TryLock()) { 1504 // Someone else is using decorators. Get out. 1505 return false; 1506 } 1507 for (int i = 0; i < g_num_decorators; ++i) { 1508 if (g_decorators[i].ticket == ticket) { 1509 while (i < g_num_decorators - 1) { 1510 g_decorators[i] = g_decorators[i + 1]; 1511 ++i; 1512 } 1513 g_num_decorators = i; 1514 break; 1515 } 1516 } 1517 g_decorators_mu.Unlock(); 1518 return true; // Decorator is known to be removed. 1519} 1520 1521int InstallSymbolDecorator(SymbolDecorator decorator, void *arg) { 1522 static int ticket = 0; 1523 1524 if (!g_decorators_mu.TryLock()) { 1525 // Someone else is using decorators. Get out. 1526 return -2; 1527 } 1528 int ret = ticket; 1529 if (g_num_decorators >= kMaxDecorators) { 1530 ret = -1; 1531 } else { 1532 g_decorators[g_num_decorators] = {decorator, arg, ticket++}; 1533 ++g_num_decorators; 1534 } 1535 g_decorators_mu.Unlock(); 1536 return ret; 1537} 1538 1539bool RegisterFileMappingHint(const void *start, const void *end, uint64_t offset, 1540 const char *filename) { 1541 SAFE_ASSERT(start <= end); 1542 SAFE_ASSERT(filename != nullptr); 1543 1544 InitSigSafeArena(); 1545 1546 if (!g_file_mapping_mu.TryLock()) { 1547 return false; 1548 } 1549 1550 bool ret = true; 1551 if (g_num_file_mapping_hints >= kMaxFileMappingHints) { 1552 ret = false; 1553 } else { 1554 // TODO(ckennelly): Move this into a string copy routine. 1555 size_t len = strlen(filename); 1556 char *dst = static_cast<char *>( 1557 base_internal::LowLevelAlloc::AllocWithArena(len + 1, SigSafeArena())); 1558 ABSL_RAW_CHECK(dst != nullptr, "out of memory"); 1559 memcpy(dst, filename, len + 1); 1560 1561 auto &hint = g_file_mapping_hints[g_num_file_mapping_hints++]; 1562 hint.start = start; 1563 hint.end = end; 1564 hint.offset = offset; 1565 hint.filename = dst; 1566 } 1567 1568 g_file_mapping_mu.Unlock(); 1569 return ret; 1570} 1571 1572bool GetFileMappingHint(const void **start, const void **end, uint64_t *offset, 1573 const char **filename) { 1574 if (!g_file_mapping_mu.TryLock()) { 1575 return false; 1576 } 1577 bool found = false; 1578 for (int i = 0; i < g_num_file_mapping_hints; i++) { 1579 if (g_file_mapping_hints[i].start <= *start && 1580 *end <= g_file_mapping_hints[i].end) { 1581 // We assume that the start_address for the mapping is the base 1582 // address of the ELF section, but when [start_address,end_address) is 1583 // not strictly equal to [hint.start, hint.end), that assumption is 1584 // invalid. 1585 // 1586 // This uses the hint's start address (even though hint.start is not 1587 // necessarily equal to start_address) to ensure the correct 1588 // relocation is computed later. 1589 *start = g_file_mapping_hints[i].start; 1590 *end = g_file_mapping_hints[i].end; 1591 *offset = g_file_mapping_hints[i].offset; 1592 *filename = g_file_mapping_hints[i].filename; 1593 found = true; 1594 break; 1595 } 1596 } 1597 g_file_mapping_mu.Unlock(); 1598 return found; 1599} 1600 1601} // namespace debugging_internal 1602 1603bool Symbolize(const void *pc, char *out, int out_size) { 1604 // Symbolization is very slow under tsan. 1605 ABSL_ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN(); 1606 SAFE_ASSERT(out_size >= 0); 1607 debugging_internal::Symbolizer *s = debugging_internal::AllocateSymbolizer(); 1608 const char *name = s->GetSymbol(pc); 1609 bool ok = false; 1610 if (name != nullptr && out_size > 0) { 1611 strncpy(out, name, static_cast<size_t>(out_size)); 1612 ok = true; 1613 if (out[static_cast<size_t>(out_size) - 1] != '\0') { 1614 // strncpy() does not '\0' terminate when it truncates. Do so, with 1615 // trailing ellipsis. 1616 static constexpr char kEllipsis[] = "..."; 1617 size_t ellipsis_size = 1618 std::min(strlen(kEllipsis), static_cast<size_t>(out_size) - 1); 1619 memcpy(out + static_cast<size_t>(out_size) - ellipsis_size - 1, kEllipsis, 1620 ellipsis_size); 1621 out[static_cast<size_t>(out_size) - 1] = '\0'; 1622 } 1623 } 1624 debugging_internal::FreeSymbolizer(s); 1625 ABSL_ANNOTATE_IGNORE_READS_AND_WRITES_END(); 1626 return ok; 1627} 1628 1629ABSL_NAMESPACE_END 1630} // namespace absl 1631 1632extern "C" bool AbslInternalGetFileMappingHint(const void **start, 1633 const void **end, uint64_t *offset, 1634 const char **filename) { 1635 return absl::debugging_internal::GetFileMappingHint(start, end, offset, 1636 filename); 1637} 1638