1 // Copyright (c) 2011 Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 // Restructured in 2009 by: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
31
32 // dump_symbols.cc: implement google_breakpad::WriteSymbolFile:
33 // Find all the debugging info in a file and dump it as a Breakpad symbol file.
34
35 #include "common/linux/dump_symbols.h"
36
37 #include <assert.h>
38 #include <elf.h>
39 #include <errno.h>
40 #include <fcntl.h>
41 #include <link.h>
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <string.h>
45 #include <sys/mman.h>
46 #include <sys/stat.h>
47 #include <unistd.h>
48
49 #include <iostream>
50 #include <set>
51 #include <string>
52 #include <utility>
53 #include <vector>
54
55 #include "common/dwarf/bytereader-inl.h"
56 #include "common/dwarf/dwarf2diehandler.h"
57 #include "common/dwarf_cfi_to_module.h"
58 #include "common/dwarf_cu_to_module.h"
59 #include "common/dwarf_line_to_module.h"
60 #include "common/linux/crc32.h"
61 #include "common/linux/eintr_wrapper.h"
62 #include "common/linux/elfutils.h"
63 #include "common/linux/elfutils-inl.h"
64 #include "common/linux/elf_symbols_to_module.h"
65 #include "common/linux/file_id.h"
66 #include "common/module.h"
67 #include "common/scoped_ptr.h"
68 #ifndef NO_STABS_SUPPORT
69 #include "common/stabs_reader.h"
70 #include "common/stabs_to_module.h"
71 #endif
72 #include "common/using_std_string.h"
73
74 // This namespace contains helper functions.
75 namespace {
76
77 using google_breakpad::DumpOptions;
78 using google_breakpad::DwarfCFIToModule;
79 using google_breakpad::DwarfCUToModule;
80 using google_breakpad::DwarfLineToModule;
81 using google_breakpad::ElfClass;
82 using google_breakpad::ElfClass32;
83 using google_breakpad::ElfClass64;
84 using google_breakpad::FindElfSectionByName;
85 using google_breakpad::GetOffset;
86 using google_breakpad::IsValidElf;
87 using google_breakpad::Module;
88 #ifndef NO_STABS_SUPPORT
89 using google_breakpad::StabsToModule;
90 #endif
91 using google_breakpad::scoped_ptr;
92
93 // Define AARCH64 ELF architecture if host machine does not include this define.
94 #ifndef EM_AARCH64
95 #define EM_AARCH64 183
96 #endif
97
98 //
99 // FDWrapper
100 //
101 // Wrapper class to make sure opened file is closed.
102 //
103 class FDWrapper {
104 public:
FDWrapper(int fd)105 explicit FDWrapper(int fd) :
106 fd_(fd) {}
~FDWrapper()107 ~FDWrapper() {
108 if (fd_ != -1)
109 close(fd_);
110 }
get()111 int get() {
112 return fd_;
113 }
release()114 int release() {
115 int fd = fd_;
116 fd_ = -1;
117 return fd;
118 }
119 private:
120 int fd_;
121 };
122
123 //
124 // MmapWrapper
125 //
126 // Wrapper class to make sure mapped regions are unmapped.
127 //
128 class MmapWrapper {
129 public:
MmapWrapper()130 MmapWrapper() : is_set_(false) {}
~MmapWrapper()131 ~MmapWrapper() {
132 if (is_set_ && base_ != NULL) {
133 assert(size_ > 0);
134 munmap(base_, size_);
135 }
136 }
set(void * mapped_address,size_t mapped_size)137 void set(void *mapped_address, size_t mapped_size) {
138 is_set_ = true;
139 base_ = mapped_address;
140 size_ = mapped_size;
141 }
release()142 void release() {
143 assert(is_set_);
144 is_set_ = false;
145 base_ = NULL;
146 size_ = 0;
147 }
148
149 private:
150 bool is_set_;
151 void* base_;
152 size_t size_;
153 };
154
155 // Find the preferred loading address of the binary.
156 template<typename ElfClass>
GetLoadingAddress(const typename ElfClass::Phdr * program_headers,int nheader)157 typename ElfClass::Addr GetLoadingAddress(
158 const typename ElfClass::Phdr* program_headers,
159 int nheader) {
160 typedef typename ElfClass::Phdr Phdr;
161
162 // For non-PIC executables (e_type == ET_EXEC), the load address is
163 // the start address of the first PT_LOAD segment. (ELF requires
164 // the segments to be sorted by load address.) For PIC executables
165 // and dynamic libraries (e_type == ET_DYN), this address will
166 // normally be zero.
167 for (int i = 0; i < nheader; ++i) {
168 const Phdr& header = program_headers[i];
169 if (header.p_type == PT_LOAD)
170 return header.p_vaddr;
171 }
172 return 0;
173 }
174
175 #ifndef NO_STABS_SUPPORT
176 template<typename ElfClass>
LoadStabs(const typename ElfClass::Ehdr * elf_header,const typename ElfClass::Shdr * stab_section,const typename ElfClass::Shdr * stabstr_section,const bool big_endian,Module * module)177 bool LoadStabs(const typename ElfClass::Ehdr* elf_header,
178 const typename ElfClass::Shdr* stab_section,
179 const typename ElfClass::Shdr* stabstr_section,
180 const bool big_endian,
181 Module* module) {
182 // A callback object to handle data from the STABS reader.
183 StabsToModule handler(module);
184 // Find the addresses of the STABS data, and create a STABS reader object.
185 // On Linux, STABS entries always have 32-bit values, regardless of the
186 // address size of the architecture whose code they're describing, and
187 // the strings are always "unitized".
188 const uint8_t* stabs =
189 GetOffset<ElfClass, uint8_t>(elf_header, stab_section->sh_offset);
190 const uint8_t* stabstr =
191 GetOffset<ElfClass, uint8_t>(elf_header, stabstr_section->sh_offset);
192 google_breakpad::StabsReader reader(stabs, stab_section->sh_size,
193 stabstr, stabstr_section->sh_size,
194 big_endian, 4, true, &handler);
195 // Read the STABS data, and do post-processing.
196 if (!reader.Process())
197 return false;
198 handler.Finalize();
199 return true;
200 }
201 #endif // NO_STABS_SUPPORT
202
203 // A line-to-module loader that accepts line number info parsed by
204 // dwarf2reader::LineInfo and populates a Module and a line vector
205 // with the results.
206 class DumperLineToModule: public DwarfCUToModule::LineToModuleHandler {
207 public:
208 // Create a line-to-module converter using BYTE_READER.
DumperLineToModule(dwarf2reader::ByteReader * byte_reader)209 explicit DumperLineToModule(dwarf2reader::ByteReader *byte_reader)
210 : byte_reader_(byte_reader) { }
StartCompilationUnit(const string & compilation_dir)211 void StartCompilationUnit(const string& compilation_dir) {
212 compilation_dir_ = compilation_dir;
213 }
ReadProgram(const char * program,uint64 length,Module * module,std::vector<Module::Line> * lines)214 void ReadProgram(const char* program, uint64 length,
215 Module* module, std::vector<Module::Line>* lines) {
216 DwarfLineToModule handler(module, compilation_dir_, lines);
217 dwarf2reader::LineInfo parser(program, length, byte_reader_, &handler);
218 parser.Start();
219 }
220 private:
221 string compilation_dir_;
222 dwarf2reader::ByteReader *byte_reader_;
223 };
224
225 template<typename ElfClass>
LoadDwarf(const string & dwarf_filename,const typename ElfClass::Ehdr * elf_header,const bool big_endian,bool handle_inter_cu_refs,Module * module)226 bool LoadDwarf(const string& dwarf_filename,
227 const typename ElfClass::Ehdr* elf_header,
228 const bool big_endian,
229 bool handle_inter_cu_refs,
230 Module* module) {
231 typedef typename ElfClass::Shdr Shdr;
232
233 const dwarf2reader::Endianness endianness = big_endian ?
234 dwarf2reader::ENDIANNESS_BIG : dwarf2reader::ENDIANNESS_LITTLE;
235 dwarf2reader::ByteReader byte_reader(endianness);
236
237 // Construct a context for this file.
238 DwarfCUToModule::FileContext file_context(dwarf_filename,
239 module,
240 handle_inter_cu_refs);
241
242 // Build a map of the ELF file's sections.
243 const Shdr* sections =
244 GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff);
245 int num_sections = elf_header->e_shnum;
246 const Shdr* section_names = sections + elf_header->e_shstrndx;
247 for (int i = 0; i < num_sections; i++) {
248 const Shdr* section = §ions[i];
249 string name = GetOffset<ElfClass, char>(elf_header,
250 section_names->sh_offset) +
251 section->sh_name;
252 const char* contents = GetOffset<ElfClass, char>(elf_header,
253 section->sh_offset);
254 file_context.AddSectionToSectionMap(name, contents, section->sh_size);
255 }
256
257 // Parse all the compilation units in the .debug_info section.
258 DumperLineToModule line_to_module(&byte_reader);
259 dwarf2reader::SectionMap::const_iterator debug_info_entry =
260 file_context.section_map().find(".debug_info");
261 assert(debug_info_entry != file_context.section_map().end());
262 const std::pair<const char*, uint64>& debug_info_section =
263 debug_info_entry->second;
264 // This should never have been called if the file doesn't have a
265 // .debug_info section.
266 assert(debug_info_section.first);
267 uint64 debug_info_length = debug_info_section.second;
268 for (uint64 offset = 0; offset < debug_info_length;) {
269 // Make a handler for the root DIE that populates MODULE with the
270 // data that was found.
271 DwarfCUToModule::WarningReporter reporter(dwarf_filename, offset);
272 DwarfCUToModule root_handler(&file_context, &line_to_module, &reporter);
273 // Make a Dwarf2Handler that drives the DIEHandler.
274 dwarf2reader::DIEDispatcher die_dispatcher(&root_handler);
275 // Make a DWARF parser for the compilation unit at OFFSET.
276 dwarf2reader::CompilationUnit reader(file_context.section_map(),
277 offset,
278 &byte_reader,
279 &die_dispatcher);
280 // Process the entire compilation unit; get the offset of the next.
281 offset += reader.Start();
282 }
283 return true;
284 }
285
286 // Fill REGISTER_NAMES with the register names appropriate to the
287 // machine architecture given in HEADER, indexed by the register
288 // numbers used in DWARF call frame information. Return true on
289 // success, or false if HEADER's machine architecture is not
290 // supported.
291 template<typename ElfClass>
DwarfCFIRegisterNames(const typename ElfClass::Ehdr * elf_header,std::vector<string> * register_names)292 bool DwarfCFIRegisterNames(const typename ElfClass::Ehdr* elf_header,
293 std::vector<string>* register_names) {
294 switch (elf_header->e_machine) {
295 case EM_386:
296 *register_names = DwarfCFIToModule::RegisterNames::I386();
297 return true;
298 case EM_ARM:
299 *register_names = DwarfCFIToModule::RegisterNames::ARM();
300 return true;
301 case EM_AARCH64:
302 *register_names = DwarfCFIToModule::RegisterNames::ARM64();
303 return true;
304 case EM_MIPS:
305 *register_names = DwarfCFIToModule::RegisterNames::MIPS();
306 return true;
307 case EM_X86_64:
308 *register_names = DwarfCFIToModule::RegisterNames::X86_64();
309 return true;
310 default:
311 return false;
312 }
313 }
314
315 template<typename ElfClass>
LoadDwarfCFI(const string & dwarf_filename,const typename ElfClass::Ehdr * elf_header,const char * section_name,const typename ElfClass::Shdr * section,const bool eh_frame,const typename ElfClass::Shdr * got_section,const typename ElfClass::Shdr * text_section,const bool big_endian,Module * module)316 bool LoadDwarfCFI(const string& dwarf_filename,
317 const typename ElfClass::Ehdr* elf_header,
318 const char* section_name,
319 const typename ElfClass::Shdr* section,
320 const bool eh_frame,
321 const typename ElfClass::Shdr* got_section,
322 const typename ElfClass::Shdr* text_section,
323 const bool big_endian,
324 Module* module) {
325 // Find the appropriate set of register names for this file's
326 // architecture.
327 std::vector<string> register_names;
328 if (!DwarfCFIRegisterNames<ElfClass>(elf_header, ®ister_names)) {
329 fprintf(stderr, "%s: unrecognized ELF machine architecture '%d';"
330 " cannot convert DWARF call frame information\n",
331 dwarf_filename.c_str(), elf_header->e_machine);
332 return false;
333 }
334
335 const dwarf2reader::Endianness endianness = big_endian ?
336 dwarf2reader::ENDIANNESS_BIG : dwarf2reader::ENDIANNESS_LITTLE;
337
338 // Find the call frame information and its size.
339 const char* cfi =
340 GetOffset<ElfClass, char>(elf_header, section->sh_offset);
341 size_t cfi_size = section->sh_size;
342
343 // Plug together the parser, handler, and their entourages.
344 DwarfCFIToModule::Reporter module_reporter(dwarf_filename, section_name);
345 DwarfCFIToModule handler(module, register_names, &module_reporter);
346 dwarf2reader::ByteReader byte_reader(endianness);
347
348 byte_reader.SetAddressSize(ElfClass::kAddrSize);
349
350 // Provide the base addresses for .eh_frame encoded pointers, if
351 // possible.
352 byte_reader.SetCFIDataBase(section->sh_addr, cfi);
353 if (got_section)
354 byte_reader.SetDataBase(got_section->sh_addr);
355 if (text_section)
356 byte_reader.SetTextBase(text_section->sh_addr);
357
358 dwarf2reader::CallFrameInfo::Reporter dwarf_reporter(dwarf_filename,
359 section_name);
360 dwarf2reader::CallFrameInfo parser(cfi, cfi_size,
361 &byte_reader, &handler, &dwarf_reporter,
362 eh_frame);
363 parser.Start();
364 return true;
365 }
366
LoadELF(const string & obj_file,MmapWrapper * map_wrapper,void ** elf_header)367 bool LoadELF(const string& obj_file, MmapWrapper* map_wrapper,
368 void** elf_header) {
369 int obj_fd = open(obj_file.c_str(), O_RDONLY);
370 if (obj_fd < 0) {
371 fprintf(stderr, "Failed to open ELF file '%s': %s\n",
372 obj_file.c_str(), strerror(errno));
373 return false;
374 }
375 FDWrapper obj_fd_wrapper(obj_fd);
376 struct stat st;
377 if (fstat(obj_fd, &st) != 0 && st.st_size <= 0) {
378 fprintf(stderr, "Unable to fstat ELF file '%s': %s\n",
379 obj_file.c_str(), strerror(errno));
380 return false;
381 }
382 void* obj_base = mmap(NULL, st.st_size,
383 PROT_READ | PROT_WRITE, MAP_PRIVATE, obj_fd, 0);
384 if (obj_base == MAP_FAILED) {
385 fprintf(stderr, "Failed to mmap ELF file '%s': %s\n",
386 obj_file.c_str(), strerror(errno));
387 return false;
388 }
389 map_wrapper->set(obj_base, st.st_size);
390 *elf_header = obj_base;
391 if (!IsValidElf(*elf_header)) {
392 fprintf(stderr, "Not a valid ELF file: %s\n", obj_file.c_str());
393 return false;
394 }
395 return true;
396 }
397
398 // Get the endianness of ELF_HEADER. If it's invalid, return false.
399 template<typename ElfClass>
ElfEndianness(const typename ElfClass::Ehdr * elf_header,bool * big_endian)400 bool ElfEndianness(const typename ElfClass::Ehdr* elf_header,
401 bool* big_endian) {
402 if (elf_header->e_ident[EI_DATA] == ELFDATA2LSB) {
403 *big_endian = false;
404 return true;
405 }
406 if (elf_header->e_ident[EI_DATA] == ELFDATA2MSB) {
407 *big_endian = true;
408 return true;
409 }
410
411 fprintf(stderr, "bad data encoding in ELF header: %d\n",
412 elf_header->e_ident[EI_DATA]);
413 return false;
414 }
415
416 // Given |left_abspath|, find the absolute path for |right_path| and see if the
417 // two absolute paths are the same.
IsSameFile(const char * left_abspath,const string & right_path)418 bool IsSameFile(const char* left_abspath, const string& right_path) {
419 char right_abspath[PATH_MAX];
420 if (!realpath(right_path.c_str(), right_abspath))
421 return false;
422 return strcmp(left_abspath, right_abspath) == 0;
423 }
424
425 // Read the .gnu_debuglink and get the debug file name. If anything goes
426 // wrong, return an empty string.
ReadDebugLink(const char * debuglink,const size_t debuglink_size,const bool big_endian,const string & obj_file,const std::vector<string> & debug_dirs)427 string ReadDebugLink(const char* debuglink,
428 const size_t debuglink_size,
429 const bool big_endian,
430 const string& obj_file,
431 const std::vector<string>& debug_dirs) {
432 size_t debuglink_len = strlen(debuglink) + 5; // Include '\0' + CRC32.
433 debuglink_len = 4 * ((debuglink_len + 3) / 4); // Round up to 4 bytes.
434
435 // Sanity check.
436 if (debuglink_len != debuglink_size) {
437 fprintf(stderr, "Mismatched .gnu_debuglink string / section size: "
438 "%zx %zx\n", debuglink_len, debuglink_size);
439 return string();
440 }
441
442 char obj_file_abspath[PATH_MAX];
443 if (!realpath(obj_file.c_str(), obj_file_abspath)) {
444 fprintf(stderr, "Cannot resolve absolute path for %s\n", obj_file.c_str());
445 return string();
446 }
447
448 std::vector<string> searched_paths;
449 string debuglink_path;
450 std::vector<string>::const_iterator it;
451 for (it = debug_dirs.begin(); it < debug_dirs.end(); ++it) {
452 const string& debug_dir = *it;
453 debuglink_path = debug_dir + "/" + debuglink;
454
455 // There is the annoying case of /path/to/foo.so having foo.so as the
456 // debug link file name. Thus this may end up opening /path/to/foo.so again,
457 // and there is a small chance of the two files having the same CRC.
458 if (IsSameFile(obj_file_abspath, debuglink_path))
459 continue;
460
461 searched_paths.push_back(debug_dir);
462 int debuglink_fd = open(debuglink_path.c_str(), O_RDONLY);
463 if (debuglink_fd < 0)
464 continue;
465
466 FDWrapper debuglink_fd_wrapper(debuglink_fd);
467
468 // The CRC is the last 4 bytes in |debuglink|.
469 const dwarf2reader::Endianness endianness = big_endian ?
470 dwarf2reader::ENDIANNESS_BIG : dwarf2reader::ENDIANNESS_LITTLE;
471 dwarf2reader::ByteReader byte_reader(endianness);
472 uint32_t expected_crc =
473 byte_reader.ReadFourBytes(&debuglink[debuglink_size - 4]);
474
475 uint32_t actual_crc = 0;
476 while (true) {
477 const size_t kReadSize = 4096;
478 char buf[kReadSize];
479 ssize_t bytes_read = HANDLE_EINTR(read(debuglink_fd, &buf, kReadSize));
480 if (bytes_read < 0) {
481 fprintf(stderr, "Error reading debug ELF file %s.\n",
482 debuglink_path.c_str());
483 return string();
484 }
485 if (bytes_read == 0)
486 break;
487 actual_crc = google_breakpad::UpdateCrc32(actual_crc, buf, bytes_read);
488 }
489 if (actual_crc != expected_crc) {
490 fprintf(stderr, "Error reading debug ELF file - CRC32 mismatch: %s\n",
491 debuglink_path.c_str());
492 continue;
493 }
494
495 // Found debug file.
496 return debuglink_path;
497 }
498
499 // Not found case.
500 fprintf(stderr, "Failed to find debug ELF file for '%s' after trying:\n",
501 obj_file.c_str());
502 for (it = searched_paths.begin(); it < searched_paths.end(); ++it) {
503 const string& debug_dir = *it;
504 fprintf(stderr, " %s/%s\n", debug_dir.c_str(), debuglink);
505 }
506 return string();
507 }
508
509 //
510 // LoadSymbolsInfo
511 //
512 // Holds the state between the two calls to LoadSymbols() in case it's necessary
513 // to follow the .gnu_debuglink section and load debug information from a
514 // different file.
515 //
516 template<typename ElfClass>
517 class LoadSymbolsInfo {
518 public:
519 typedef typename ElfClass::Addr Addr;
520
LoadSymbolsInfo(const std::vector<string> & dbg_dirs)521 explicit LoadSymbolsInfo(const std::vector<string>& dbg_dirs) :
522 debug_dirs_(dbg_dirs),
523 has_loading_addr_(false) {}
524
525 // Keeps track of which sections have been loaded so sections don't
526 // accidentally get loaded twice from two different files.
LoadedSection(const string & section)527 void LoadedSection(const string §ion) {
528 if (loaded_sections_.count(section) == 0) {
529 loaded_sections_.insert(section);
530 } else {
531 fprintf(stderr, "Section %s has already been loaded.\n",
532 section.c_str());
533 }
534 }
535
536 // The ELF file and linked debug file are expected to have the same preferred
537 // loading address.
set_loading_addr(Addr addr,const string & filename)538 void set_loading_addr(Addr addr, const string &filename) {
539 if (!has_loading_addr_) {
540 loading_addr_ = addr;
541 loaded_file_ = filename;
542 return;
543 }
544
545 if (addr != loading_addr_) {
546 fprintf(stderr,
547 "ELF file '%s' and debug ELF file '%s' "
548 "have different load addresses.\n",
549 loaded_file_.c_str(), filename.c_str());
550 assert(false);
551 }
552 }
553
554 // Setters and getters
debug_dirs() const555 const std::vector<string>& debug_dirs() const {
556 return debug_dirs_;
557 }
558
debuglink_file() const559 string debuglink_file() const {
560 return debuglink_file_;
561 }
set_debuglink_file(string file)562 void set_debuglink_file(string file) {
563 debuglink_file_ = file;
564 }
565
566 private:
567 const std::vector<string>& debug_dirs_; // Directories in which to
568 // search for the debug ELF file.
569
570 string debuglink_file_; // Full path to the debug ELF file.
571
572 bool has_loading_addr_; // Indicate if LOADING_ADDR_ is valid.
573
574 Addr loading_addr_; // Saves the preferred loading address from the
575 // first call to LoadSymbols().
576
577 string loaded_file_; // Name of the file loaded from the first call to
578 // LoadSymbols().
579
580 std::set<string> loaded_sections_; // Tracks the Loaded ELF sections
581 // between calls to LoadSymbols().
582 };
583
584 template<typename ElfClass>
LoadSymbols(const string & obj_file,const bool big_endian,const typename ElfClass::Ehdr * elf_header,const bool read_gnu_debug_link,LoadSymbolsInfo<ElfClass> * info,const DumpOptions & options,Module * module)585 bool LoadSymbols(const string& obj_file,
586 const bool big_endian,
587 const typename ElfClass::Ehdr* elf_header,
588 const bool read_gnu_debug_link,
589 LoadSymbolsInfo<ElfClass>* info,
590 const DumpOptions& options,
591 Module* module) {
592 typedef typename ElfClass::Addr Addr;
593 typedef typename ElfClass::Phdr Phdr;
594 typedef typename ElfClass::Shdr Shdr;
595 typedef typename ElfClass::Word Word;
596
597 Addr loading_addr = GetLoadingAddress<ElfClass>(
598 GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff),
599 elf_header->e_phnum);
600 module->SetLoadAddress(loading_addr);
601 info->set_loading_addr(loading_addr, obj_file);
602
603 Word debug_section_type =
604 elf_header->e_machine == EM_MIPS ? SHT_MIPS_DWARF : SHT_PROGBITS;
605 const Shdr* sections =
606 GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff);
607 const Shdr* section_names = sections + elf_header->e_shstrndx;
608 const char* names =
609 GetOffset<ElfClass, char>(elf_header, section_names->sh_offset);
610 const char *names_end = names + section_names->sh_size;
611 bool found_debug_info_section = false;
612 bool found_usable_info = false;
613
614 if (options.symbol_data != ONLY_CFI) {
615 #ifndef NO_STABS_SUPPORT
616 // Look for STABS debugging information, and load it if present.
617 const Shdr* stab_section =
618 FindElfSectionByName<ElfClass>(".stab", SHT_PROGBITS,
619 sections, names, names_end,
620 elf_header->e_shnum);
621 if (stab_section) {
622 const Shdr* stabstr_section = stab_section->sh_link + sections;
623 if (stabstr_section) {
624 found_debug_info_section = true;
625 found_usable_info = true;
626 info->LoadedSection(".stab");
627 if (!LoadStabs<ElfClass>(elf_header, stab_section, stabstr_section,
628 big_endian, module)) {
629 fprintf(stderr, "%s: \".stab\" section found, but failed to load"
630 " STABS debugging information\n", obj_file.c_str());
631 }
632 }
633 }
634 #endif // NO_STABS_SUPPORT
635
636 // Look for DWARF debugging information, and load it if present.
637 const Shdr* dwarf_section =
638 FindElfSectionByName<ElfClass>(".debug_info", debug_section_type,
639 sections, names, names_end,
640 elf_header->e_shnum);
641 if (dwarf_section) {
642 found_debug_info_section = true;
643 found_usable_info = true;
644 info->LoadedSection(".debug_info");
645 if (!LoadDwarf<ElfClass>(obj_file, elf_header, big_endian,
646 options.handle_inter_cu_refs, module)) {
647 fprintf(stderr, "%s: \".debug_info\" section found, but failed to load "
648 "DWARF debugging information\n", obj_file.c_str());
649 }
650 }
651
652 // See if there are export symbols available.
653 const Shdr* dynsym_section =
654 FindElfSectionByName<ElfClass>(".dynsym", SHT_DYNSYM,
655 sections, names, names_end,
656 elf_header->e_shnum);
657 const Shdr* dynstr_section =
658 FindElfSectionByName<ElfClass>(".dynstr", SHT_STRTAB,
659 sections, names, names_end,
660 elf_header->e_shnum);
661 if (dynsym_section && dynstr_section) {
662 info->LoadedSection(".dynsym");
663
664 const uint8_t* dynsyms =
665 GetOffset<ElfClass, uint8_t>(elf_header,
666 dynsym_section->sh_offset);
667 const uint8_t* dynstrs =
668 GetOffset<ElfClass, uint8_t>(elf_header,
669 dynstr_section->sh_offset);
670 bool result =
671 ELFSymbolsToModule(dynsyms,
672 dynsym_section->sh_size,
673 dynstrs,
674 dynstr_section->sh_size,
675 big_endian,
676 ElfClass::kAddrSize,
677 module);
678 found_usable_info = found_usable_info || result;
679 }
680 }
681
682 if (options.symbol_data != NO_CFI) {
683 // Dwarf Call Frame Information (CFI) is actually independent from
684 // the other DWARF debugging information, and can be used alone.
685 const Shdr* dwarf_cfi_section =
686 FindElfSectionByName<ElfClass>(".debug_frame", debug_section_type,
687 sections, names, names_end,
688 elf_header->e_shnum);
689 if (dwarf_cfi_section) {
690 // Ignore the return value of this function; even without call frame
691 // information, the other debugging information could be perfectly
692 // useful.
693 info->LoadedSection(".debug_frame");
694 bool result =
695 LoadDwarfCFI<ElfClass>(obj_file, elf_header, ".debug_frame",
696 dwarf_cfi_section, false, 0, 0, big_endian,
697 module);
698 found_usable_info = found_usable_info || result;
699 }
700
701 // Linux C++ exception handling information can also provide
702 // unwinding data.
703 const Shdr* eh_frame_section =
704 FindElfSectionByName<ElfClass>(".eh_frame", SHT_PROGBITS,
705 sections, names, names_end,
706 elf_header->e_shnum);
707 if (eh_frame_section) {
708 // Pointers in .eh_frame data may be relative to the base addresses of
709 // certain sections. Provide those sections if present.
710 const Shdr* got_section =
711 FindElfSectionByName<ElfClass>(".got", SHT_PROGBITS,
712 sections, names, names_end,
713 elf_header->e_shnum);
714 const Shdr* text_section =
715 FindElfSectionByName<ElfClass>(".text", SHT_PROGBITS,
716 sections, names, names_end,
717 elf_header->e_shnum);
718 info->LoadedSection(".eh_frame");
719 // As above, ignore the return value of this function.
720 bool result =
721 LoadDwarfCFI<ElfClass>(obj_file, elf_header, ".eh_frame",
722 eh_frame_section, true,
723 got_section, text_section, big_endian, module);
724 found_usable_info = found_usable_info || result;
725 }
726 }
727
728 if (!found_debug_info_section) {
729 fprintf(stderr, "%s: file contains no debugging information"
730 " (no \".stab\" or \".debug_info\" sections)\n",
731 obj_file.c_str());
732
733 // Failed, but maybe there's a .gnu_debuglink section?
734 if (read_gnu_debug_link) {
735 const Shdr* gnu_debuglink_section
736 = FindElfSectionByName<ElfClass>(".gnu_debuglink", SHT_PROGBITS,
737 sections, names,
738 names_end, elf_header->e_shnum);
739 if (gnu_debuglink_section) {
740 if (!info->debug_dirs().empty()) {
741 const char* debuglink_contents =
742 GetOffset<ElfClass, char>(elf_header,
743 gnu_debuglink_section->sh_offset);
744 string debuglink_file =
745 ReadDebugLink(debuglink_contents,
746 gnu_debuglink_section->sh_size,
747 big_endian,
748 obj_file,
749 info->debug_dirs());
750 info->set_debuglink_file(debuglink_file);
751 } else {
752 fprintf(stderr, ".gnu_debuglink section found in '%s', "
753 "but no debug path specified.\n", obj_file.c_str());
754 }
755 } else {
756 fprintf(stderr, "%s does not contain a .gnu_debuglink section.\n",
757 obj_file.c_str());
758 }
759 } else {
760 // Return true if some usable information was found, since the caller
761 // doesn't want to use .gnu_debuglink.
762 return found_usable_info;
763 }
764
765 // No debug info was found, let the user try again with .gnu_debuglink
766 // if present.
767 return false;
768 }
769
770 return true;
771 }
772
773 // Return the breakpad symbol file identifier for the architecture of
774 // ELF_HEADER.
775 template<typename ElfClass>
ElfArchitecture(const typename ElfClass::Ehdr * elf_header)776 const char* ElfArchitecture(const typename ElfClass::Ehdr* elf_header) {
777 typedef typename ElfClass::Half Half;
778 Half arch = elf_header->e_machine;
779 switch (arch) {
780 case EM_386: return "x86";
781 case EM_ARM: return "arm";
782 case EM_AARCH64: return "arm64";
783 case EM_MIPS: return "mips";
784 case EM_PPC64: return "ppc64";
785 case EM_PPC: return "ppc";
786 case EM_S390: return "s390";
787 case EM_SPARC: return "sparc";
788 case EM_SPARCV9: return "sparcv9";
789 case EM_X86_64: return "x86_64";
790 default: return NULL;
791 }
792 }
793
794 // Format the Elf file identifier in IDENTIFIER as a UUID with the
795 // dashes removed.
FormatIdentifier(unsigned char identifier[16])796 string FormatIdentifier(unsigned char identifier[16]) {
797 char identifier_str[40];
798 google_breakpad::FileID::ConvertIdentifierToString(
799 identifier,
800 identifier_str,
801 sizeof(identifier_str));
802 string id_no_dash;
803 for (int i = 0; identifier_str[i] != '\0'; ++i)
804 if (identifier_str[i] != '-')
805 id_no_dash += identifier_str[i];
806 // Add an extra "0" by the end. PDB files on Windows have an 'age'
807 // number appended to the end of the file identifier; this isn't
808 // really used or necessary on other platforms, but be consistent.
809 id_no_dash += '0';
810 return id_no_dash;
811 }
812
813 // Return the non-directory portion of FILENAME: the portion after the
814 // last slash, or the whole filename if there are no slashes.
BaseFileName(const string & filename)815 string BaseFileName(const string &filename) {
816 // Lots of copies! basename's behavior is less than ideal.
817 char* c_filename = strdup(filename.c_str());
818 string base = basename(c_filename);
819 free(c_filename);
820 return base;
821 }
822
823 template<typename ElfClass>
SanitizeDebugFile(const typename ElfClass::Ehdr * debug_elf_header,const string & debuglink_file,const string & obj_filename,const char * obj_file_architecture,const bool obj_file_is_big_endian)824 bool SanitizeDebugFile(const typename ElfClass::Ehdr* debug_elf_header,
825 const string& debuglink_file,
826 const string& obj_filename,
827 const char* obj_file_architecture,
828 const bool obj_file_is_big_endian) {
829 const char* debug_architecture =
830 ElfArchitecture<ElfClass>(debug_elf_header);
831 if (!debug_architecture) {
832 fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n",
833 debuglink_file.c_str(), debug_elf_header->e_machine);
834 return false;
835 }
836 if (strcmp(obj_file_architecture, debug_architecture)) {
837 fprintf(stderr, "%s with ELF machine architecture %s does not match "
838 "%s with ELF architecture %s\n",
839 debuglink_file.c_str(), debug_architecture,
840 obj_filename.c_str(), obj_file_architecture);
841 return false;
842 }
843 bool debug_big_endian;
844 if (!ElfEndianness<ElfClass>(debug_elf_header, &debug_big_endian))
845 return false;
846 if (debug_big_endian != obj_file_is_big_endian) {
847 fprintf(stderr, "%s and %s does not match in endianness\n",
848 obj_filename.c_str(), debuglink_file.c_str());
849 return false;
850 }
851 return true;
852 }
853
854 template<typename ElfClass>
ReadSymbolDataElfClass(const typename ElfClass::Ehdr * elf_header,const string & obj_filename,const std::vector<string> & debug_dirs,const DumpOptions & options,Module ** out_module)855 bool ReadSymbolDataElfClass(const typename ElfClass::Ehdr* elf_header,
856 const string& obj_filename,
857 const std::vector<string>& debug_dirs,
858 const DumpOptions& options,
859 Module** out_module) {
860 typedef typename ElfClass::Ehdr Ehdr;
861 typedef typename ElfClass::Shdr Shdr;
862
863 *out_module = NULL;
864
865 unsigned char identifier[16];
866 if (!google_breakpad::FileID::ElfFileIdentifierFromMappedFile(elf_header,
867 identifier)) {
868 fprintf(stderr, "%s: unable to generate file identifier\n",
869 obj_filename.c_str());
870 return false;
871 }
872
873 const char *architecture = ElfArchitecture<ElfClass>(elf_header);
874 if (!architecture) {
875 fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n",
876 obj_filename.c_str(), elf_header->e_machine);
877 return false;
878 }
879
880 // Figure out what endianness this file is.
881 bool big_endian;
882 if (!ElfEndianness<ElfClass>(elf_header, &big_endian))
883 return false;
884
885 string name = BaseFileName(obj_filename);
886 string os = "Linux";
887 string id = FormatIdentifier(identifier);
888
889 LoadSymbolsInfo<ElfClass> info(debug_dirs);
890 scoped_ptr<Module> module(new Module(name, os, architecture, id));
891 if (!LoadSymbols<ElfClass>(obj_filename, big_endian, elf_header,
892 !debug_dirs.empty(), &info,
893 options, module.get())) {
894 const string debuglink_file = info.debuglink_file();
895 if (debuglink_file.empty())
896 return false;
897
898 // Load debuglink ELF file.
899 fprintf(stderr, "Found debugging info in %s\n", debuglink_file.c_str());
900 MmapWrapper debug_map_wrapper;
901 Ehdr* debug_elf_header = NULL;
902 if (!LoadELF(debuglink_file, &debug_map_wrapper,
903 reinterpret_cast<void**>(&debug_elf_header)) ||
904 !SanitizeDebugFile<ElfClass>(debug_elf_header, debuglink_file,
905 obj_filename, architecture, big_endian)) {
906 return false;
907 }
908
909 if (!LoadSymbols<ElfClass>(debuglink_file, big_endian,
910 debug_elf_header, false, &info,
911 options, module.get())) {
912 return false;
913 }
914 }
915
916 *out_module = module.release();
917 return true;
918 }
919
920 } // namespace
921
922 namespace google_breakpad {
923
924 // Not explicitly exported, but not static so it can be used in unit tests.
ReadSymbolDataInternal(const uint8_t * obj_file,const string & obj_filename,const std::vector<string> & debug_dirs,const DumpOptions & options,Module ** module)925 bool ReadSymbolDataInternal(const uint8_t* obj_file,
926 const string& obj_filename,
927 const std::vector<string>& debug_dirs,
928 const DumpOptions& options,
929 Module** module) {
930 if (!IsValidElf(obj_file)) {
931 fprintf(stderr, "Not a valid ELF file: %s\n", obj_filename.c_str());
932 return false;
933 }
934
935 int elfclass = ElfClass(obj_file);
936 if (elfclass == ELFCLASS32) {
937 return ReadSymbolDataElfClass<ElfClass32>(
938 reinterpret_cast<const Elf32_Ehdr*>(obj_file), obj_filename, debug_dirs,
939 options, module);
940 }
941 if (elfclass == ELFCLASS64) {
942 return ReadSymbolDataElfClass<ElfClass64>(
943 reinterpret_cast<const Elf64_Ehdr*>(obj_file), obj_filename, debug_dirs,
944 options, module);
945 }
946
947 return false;
948 }
949
WriteSymbolFile(const string & obj_file,const std::vector<string> & debug_dirs,const DumpOptions & options,std::ostream & sym_stream)950 bool WriteSymbolFile(const string &obj_file,
951 const std::vector<string>& debug_dirs,
952 const DumpOptions& options,
953 std::ostream &sym_stream) {
954 Module* module;
955 if (!ReadSymbolData(obj_file, debug_dirs, options, &module))
956 return false;
957
958 bool result = module->Write(sym_stream, options.symbol_data);
959 delete module;
960 return result;
961 }
962
ReadSymbolData(const string & obj_file,const std::vector<string> & debug_dirs,const DumpOptions & options,Module ** module)963 bool ReadSymbolData(const string& obj_file,
964 const std::vector<string>& debug_dirs,
965 const DumpOptions& options,
966 Module** module) {
967 MmapWrapper map_wrapper;
968 void* elf_header = NULL;
969 if (!LoadELF(obj_file, &map_wrapper, &elf_header))
970 return false;
971
972 return ReadSymbolDataInternal(reinterpret_cast<uint8_t*>(elf_header),
973 obj_file, debug_dirs, options, module);
974 }
975
976 } // namespace google_breakpad
977