1 // -*- mode: C++ -*- 2 3 // Copyright (c) 2010 Google Inc. All Rights Reserved. 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 // CFI reader author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> 32 33 // This file contains definitions related to the DWARF2/3 reader and 34 // it's handler interfaces. 35 // The DWARF2/3 specification can be found at 36 // http://dwarf.freestandards.org and should be considered required 37 // reading if you wish to modify the implementation. 38 // Only a cursory attempt is made to explain terminology that is 39 // used here, as it is much better explained in the standard documents 40 #ifndef COMMON_DWARF_DWARF2READER_H__ 41 #define COMMON_DWARF_DWARF2READER_H__ 42 43 #include <stdint.h> 44 45 #include <list> 46 #include <map> 47 #include <string> 48 #include <utility> 49 #include <vector> 50 #include <memory> 51 52 #include "common/dwarf/bytereader.h" 53 #include "common/dwarf/dwarf2enums.h" 54 #include "common/dwarf/types.h" 55 #include "common/using_std_string.h" 56 #include "common/dwarf/elf_reader.h" 57 58 namespace dwarf2reader { 59 struct LineStateMachine; 60 class Dwarf2Handler; 61 class LineInfoHandler; 62 class DwpReader; 63 64 // This maps from a string naming a section to a pair containing a 65 // the data for the section, and the size of the section. 66 typedef std::map<string, std::pair<const uint8_t *, uint64_t> > SectionMap; 67 typedef std::list<std::pair<enum DwarfAttribute, enum DwarfForm> > 68 AttributeList; 69 typedef AttributeList::iterator AttributeIterator; 70 typedef AttributeList::const_iterator ConstAttributeIterator; 71 72 struct LineInfoHeader { 73 uint64_t total_length; 74 uint16_t version; 75 uint64_t prologue_length; 76 uint8_t min_insn_length; // insn stands for instructin 77 bool default_is_stmt; // stmt stands for statement 78 int8_t line_base; 79 uint8_t line_range; 80 uint8_t opcode_base; 81 // Use a pointer so that signalsafe_addr2line is able to use this structure 82 // without heap allocation problem. 83 std::vector<unsigned char> *std_opcode_lengths; 84 }; 85 86 class LineInfo { 87 public: 88 89 // Initializes a .debug_line reader. Buffer and buffer length point 90 // to the beginning and length of the line information to read. 91 // Reader is a ByteReader class that has the endianness set 92 // properly. 93 LineInfo(const uint8_t *buffer_, uint64_t buffer_length, 94 ByteReader* reader, LineInfoHandler* handler); 95 ~LineInfo()96 virtual ~LineInfo() { 97 if (header_.std_opcode_lengths) { 98 delete header_.std_opcode_lengths; 99 } 100 } 101 102 // Start processing line info, and calling callbacks in the handler. 103 // Consumes the line number information for a single compilation unit. 104 // Returns the number of bytes processed. 105 uint64_t Start(); 106 107 // Process a single line info opcode at START using the state 108 // machine at LSM. Return true if we should define a line using the 109 // current state of the line state machine. Place the length of the 110 // opcode in LEN. 111 // If LSM_PASSES_PC is non-NULL, this function also checks if the lsm 112 // passes the address of PC. In other words, LSM_PASSES_PC will be 113 // set to true, if the following condition is met. 114 // 115 // lsm's old address < PC <= lsm's new address 116 static bool ProcessOneOpcode(ByteReader* reader, 117 LineInfoHandler* handler, 118 const struct LineInfoHeader &header, 119 const uint8_t *start, 120 struct LineStateMachine* lsm, 121 size_t* len, 122 uintptr pc, 123 bool *lsm_passes_pc); 124 125 private: 126 // Reads the DWARF2/3 header for this line info. 127 void ReadHeader(); 128 129 // Reads the DWARF2/3 line information 130 void ReadLines(); 131 132 // The associated handler to call processing functions in 133 LineInfoHandler* handler_; 134 135 // The associated ByteReader that handles endianness issues for us 136 ByteReader* reader_; 137 138 // A DWARF2/3 line info header. This is not the same size as 139 // in the actual file, as the one in the file may have a 32 bit or 140 // 64 bit lengths 141 142 struct LineInfoHeader header_; 143 144 // buffer is the buffer for our line info, starting at exactly where 145 // the line info to read is. after_header is the place right after 146 // the end of the line information header. 147 const uint8_t *buffer_; 148 #ifndef NDEBUG 149 uint64_t buffer_length_; 150 #endif 151 const uint8_t *after_header_; 152 }; 153 154 // This class is the main interface between the line info reader and 155 // the client. The virtual functions inside this get called for 156 // interesting events that happen during line info reading. The 157 // default implementation does nothing 158 159 class LineInfoHandler { 160 public: LineInfoHandler()161 LineInfoHandler() { } 162 ~LineInfoHandler()163 virtual ~LineInfoHandler() { } 164 165 // Called when we define a directory. NAME is the directory name, 166 // DIR_NUM is the directory number DefineDir(const string & name,uint32_t dir_num)167 virtual void DefineDir(const string& name, uint32_t dir_num) { } 168 169 // Called when we define a filename. NAME is the filename, FILE_NUM 170 // is the file number which is -1 if the file index is the next 171 // index after the last numbered index (this happens when files are 172 // dynamically defined by the line program), DIR_NUM is the 173 // directory index for the directory name of this file, MOD_TIME is 174 // the modification time of the file, and LENGTH is the length of 175 // the file DefineFile(const string & name,int32_t file_num,uint32_t dir_num,uint64_t mod_time,uint64_t length)176 virtual void DefineFile(const string& name, int32_t file_num, 177 uint32_t dir_num, uint64_t mod_time, 178 uint64_t length) { } 179 180 // Called when the line info reader has a new line, address pair 181 // ready for us. ADDRESS is the address of the code, LENGTH is the 182 // length of its machine code in bytes, FILE_NUM is the file number 183 // containing the code, LINE_NUM is the line number in that file for 184 // the code, and COLUMN_NUM is the column number the code starts at, 185 // if we know it (0 otherwise). AddLine(uint64_t address,uint64_t length,uint32_t file_num,uint32_t line_num,uint32_t column_num)186 virtual void AddLine(uint64_t address, uint64_t length, 187 uint32_t file_num, uint32_t line_num, uint32_t column_num) { } 188 }; 189 190 class RangeListHandler { 191 public: RangeListHandler()192 RangeListHandler() { } 193 ~RangeListHandler()194 virtual ~RangeListHandler() { } 195 196 // Add a range. AddRange(uint64_t begin,uint64_t end)197 virtual void AddRange(uint64_t begin, uint64_t end) { }; 198 199 // A new base address must be set for computing the ranges' addresses. SetBaseAddress(uint64_t base_address)200 virtual void SetBaseAddress(uint64_t base_address) { }; 201 202 // Finish processing the range list. Finish()203 virtual void Finish() { }; 204 }; 205 206 class RangeListReader { 207 public: 208 RangeListReader(const uint8_t *buffer, uint64_t size, ByteReader *reader, 209 RangeListHandler *handler); 210 211 bool ReadRangeList(uint64_t offset); 212 213 private: 214 const uint8_t *buffer_; 215 uint64_t size_; 216 ByteReader* reader_; 217 RangeListHandler *handler_; 218 }; 219 220 // This class is the main interface between the reader and the 221 // client. The virtual functions inside this get called for 222 // interesting events that happen during DWARF2 reading. 223 // The default implementation skips everything. 224 class Dwarf2Handler { 225 public: Dwarf2Handler()226 Dwarf2Handler() { } 227 ~Dwarf2Handler()228 virtual ~Dwarf2Handler() { } 229 230 // Start to process a compilation unit at OFFSET from the beginning of the 231 // .debug_info section. Return false if you would like to skip this 232 // compilation unit. StartCompilationUnit(uint64_t offset,uint8_t address_size,uint8_t offset_size,uint64_t cu_length,uint8_t dwarf_version)233 virtual bool StartCompilationUnit(uint64_t offset, uint8_t address_size, 234 uint8_t offset_size, uint64_t cu_length, 235 uint8_t dwarf_version) { return false; } 236 237 // When processing a skeleton compilation unit, resulting from a split 238 // DWARF compilation, once the skeleton debug info has been read, 239 // the reader will call this function to ask the client if it needs 240 // the full debug info from the .dwo or .dwp file. Return true if 241 // you need it, or false to skip processing the split debug info. NeedSplitDebugInfo()242 virtual bool NeedSplitDebugInfo() { return true; } 243 244 // Start to process a split compilation unit at OFFSET from the beginning of 245 // the debug_info section in the .dwp/.dwo file. Return false if you would 246 // like to skip this compilation unit. StartSplitCompilationUnit(uint64_t offset,uint64_t cu_length)247 virtual bool StartSplitCompilationUnit(uint64_t offset, 248 uint64_t cu_length) { return false; } 249 250 // Start to process a DIE at OFFSET from the beginning of the .debug_info 251 // section. Return false if you would like to skip this DIE. StartDIE(uint64_t offset,enum DwarfTag tag)252 virtual bool StartDIE(uint64_t offset, enum DwarfTag tag) { return false; } 253 254 // Called when we have an attribute with unsigned data to give to our 255 // handler. The attribute is for the DIE at OFFSET from the beginning of the 256 // .debug_info section. Its name is ATTR, its form is FORM, and its value is 257 // DATA. ProcessAttributeUnsigned(uint64_t offset,enum DwarfAttribute attr,enum DwarfForm form,uint64_t data)258 virtual void ProcessAttributeUnsigned(uint64_t offset, 259 enum DwarfAttribute attr, 260 enum DwarfForm form, 261 uint64_t data) { } 262 263 // Called when we have an attribute with signed data to give to our handler. 264 // The attribute is for the DIE at OFFSET from the beginning of the 265 // .debug_info section. Its name is ATTR, its form is FORM, and its value is 266 // DATA. ProcessAttributeSigned(uint64_t offset,enum DwarfAttribute attr,enum DwarfForm form,int64_t data)267 virtual void ProcessAttributeSigned(uint64_t offset, 268 enum DwarfAttribute attr, 269 enum DwarfForm form, 270 int64_t data) { } 271 272 // Called when we have an attribute whose value is a reference to 273 // another DIE. The attribute belongs to the DIE at OFFSET from the 274 // beginning of the .debug_info section. Its name is ATTR, its form 275 // is FORM, and the offset of the DIE being referred to from the 276 // beginning of the .debug_info section is DATA. ProcessAttributeReference(uint64_t offset,enum DwarfAttribute attr,enum DwarfForm form,uint64_t data)277 virtual void ProcessAttributeReference(uint64_t offset, 278 enum DwarfAttribute attr, 279 enum DwarfForm form, 280 uint64_t data) { } 281 282 // Called when we have an attribute with a buffer of data to give to our 283 // handler. The attribute is for the DIE at OFFSET from the beginning of the 284 // .debug_info section. Its name is ATTR, its form is FORM, DATA points to 285 // the buffer's contents, and its length in bytes is LENGTH. The buffer is 286 // owned by the caller, not the callee, and may not persist for very long. 287 // If you want the data to be available later, it needs to be copied. ProcessAttributeBuffer(uint64_t offset,enum DwarfAttribute attr,enum DwarfForm form,const uint8_t * data,uint64_t len)288 virtual void ProcessAttributeBuffer(uint64_t offset, 289 enum DwarfAttribute attr, 290 enum DwarfForm form, 291 const uint8_t *data, 292 uint64_t len) { } 293 294 // Called when we have an attribute with string data to give to our handler. 295 // The attribute is for the DIE at OFFSET from the beginning of the 296 // .debug_info section. Its name is ATTR, its form is FORM, and its value is 297 // DATA. ProcessAttributeString(uint64_t offset,enum DwarfAttribute attr,enum DwarfForm form,const string & data)298 virtual void ProcessAttributeString(uint64_t offset, 299 enum DwarfAttribute attr, 300 enum DwarfForm form, 301 const string& data) { } 302 303 // Called when we have an attribute whose value is the 64-bit signature 304 // of a type unit in the .debug_types section. OFFSET is the offset of 305 // the DIE whose attribute we're reporting. ATTR and FORM are the 306 // attribute's name and form. SIGNATURE is the type unit's signature. ProcessAttributeSignature(uint64_t offset,enum DwarfAttribute attr,enum DwarfForm form,uint64_t signature)307 virtual void ProcessAttributeSignature(uint64_t offset, 308 enum DwarfAttribute attr, 309 enum DwarfForm form, 310 uint64_t signature) { } 311 312 // Called when finished processing the DIE at OFFSET. 313 // Because DWARF2/3 specifies a tree of DIEs, you may get starts 314 // before ends of the previous DIE, as we process children before 315 // ending the parent. EndDIE(uint64_t offset)316 virtual void EndDIE(uint64_t offset) { } 317 318 }; 319 320 // The base of DWARF2/3 debug info is a DIE (Debugging Information 321 // Entry. 322 // DWARF groups DIE's into a tree and calls the root of this tree a 323 // "compilation unit". Most of the time, there is one compilation 324 // unit in the .debug_info section for each file that had debug info 325 // generated. 326 // Each DIE consists of 327 328 // 1. a tag specifying a thing that is being described (ie 329 // DW_TAG_subprogram for functions, DW_TAG_variable for variables, etc 330 // 2. attributes (such as DW_AT_location for location in memory, 331 // DW_AT_name for name), and data for each attribute. 332 // 3. A flag saying whether the DIE has children or not 333 334 // In order to gain some amount of compression, the format of 335 // each DIE (tag name, attributes and data forms for the attributes) 336 // are stored in a separate table called the "abbreviation table". 337 // This is done because a large number of DIEs have the exact same tag 338 // and list of attributes, but different data for those attributes. 339 // As a result, the .debug_info section is just a stream of data, and 340 // requires reading of the .debug_abbrev section to say what the data 341 // means. 342 343 // As a warning to the user, it should be noted that the reason for 344 // using absolute offsets from the beginning of .debug_info is that 345 // DWARF2/3 supports referencing DIE's from other DIE's by their offset 346 // from either the current compilation unit start, *or* the beginning 347 // of the .debug_info section. This means it is possible to reference 348 // a DIE in one compilation unit from a DIE in another compilation 349 // unit. This style of reference is usually used to eliminate 350 // duplicated information that occurs across compilation 351 // units, such as base types, etc. GCC 3.4+ support this with 352 // -feliminate-dwarf2-dups. Other toolchains will sometimes do 353 // duplicate elimination in the linker. 354 355 class CompilationUnit { 356 public: 357 358 // Initialize a compilation unit. This requires a map of sections, 359 // the offset of this compilation unit in the .debug_info section, a 360 // ByteReader, and a Dwarf2Handler class to call callbacks in. 361 CompilationUnit(const string& path, const SectionMap& sections, 362 uint64_t offset, ByteReader* reader, Dwarf2Handler* handler); ~CompilationUnit()363 virtual ~CompilationUnit() { 364 if (abbrevs_) delete abbrevs_; 365 } 366 367 // Initialize a compilation unit from a .dwo or .dwp file. 368 // In this case, we need the .debug_addr section from the 369 // executable file that contains the corresponding skeleton 370 // compilation unit. We also inherit the Dwarf2Handler from 371 // the executable file, and call it as if we were still 372 // processing the original compilation unit. 373 void SetSplitDwarf(const uint8_t* addr_buffer, uint64_t addr_buffer_length, 374 uint64_t addr_base, uint64_t ranges_base, uint64_t dwo_id); 375 376 // Begin reading a Dwarf2 compilation unit, and calling the 377 // callbacks in the Dwarf2Handler 378 379 // Return the full length of the compilation unit, including 380 // headers. This plus the starting offset passed to the constructor 381 // is the offset of the end of the compilation unit --- and the 382 // start of the next compilation unit, if there is one. 383 uint64_t Start(); 384 385 private: 386 387 // This struct represents a single DWARF2/3 abbreviation 388 // The abbreviation tells how to read a DWARF2/3 DIE, and consist of a 389 // tag and a list of attributes, as well as the data form of each attribute. 390 struct Abbrev { 391 uint64_t number; 392 enum DwarfTag tag; 393 bool has_children; 394 AttributeList attributes; 395 }; 396 397 // A DWARF2/3 compilation unit header. This is not the same size as 398 // in the actual file, as the one in the file may have a 32 bit or 399 // 64 bit length. 400 struct CompilationUnitHeader { 401 uint64_t length; 402 uint16_t version; 403 uint64_t abbrev_offset; 404 uint8_t address_size; 405 } header_; 406 407 // Reads the DWARF2/3 header for this compilation unit. 408 void ReadHeader(); 409 410 // Reads the DWARF2/3 abbreviations for this compilation unit 411 void ReadAbbrevs(); 412 413 // Processes a single DIE for this compilation unit and return a new 414 // pointer just past the end of it 415 const uint8_t *ProcessDIE(uint64_t dieoffset, 416 const uint8_t *start, 417 const Abbrev& abbrev); 418 419 // Processes a single attribute and return a new pointer just past the 420 // end of it 421 const uint8_t *ProcessAttribute(uint64_t dieoffset, 422 const uint8_t *start, 423 enum DwarfAttribute attr, 424 enum DwarfForm form); 425 426 // Called when we have an attribute with unsigned data to give to 427 // our handler. The attribute is for the DIE at OFFSET from the 428 // beginning of compilation unit, has a name of ATTR, a form of 429 // FORM, and the actual data of the attribute is in DATA. 430 // If we see a DW_AT_GNU_dwo_id attribute, save the value so that 431 // we can find the debug info in a .dwo or .dwp file. ProcessAttributeUnsigned(uint64_t offset,enum DwarfAttribute attr,enum DwarfForm form,uint64_t data)432 void ProcessAttributeUnsigned(uint64_t offset, 433 enum DwarfAttribute attr, 434 enum DwarfForm form, 435 uint64_t data) { 436 if (attr == DW_AT_GNU_dwo_id) { 437 dwo_id_ = data; 438 } 439 else if (attr == DW_AT_GNU_addr_base) { 440 addr_base_ = data; 441 } 442 else if (attr == DW_AT_GNU_ranges_base) { 443 ranges_base_ = data; 444 } 445 // TODO(yunlian): When we add DW_AT_ranges_base from DWARF-5, 446 // that base will apply to DW_AT_ranges attributes in the 447 // skeleton CU as well as in the .dwo/.dwp files. 448 else if (attr == DW_AT_ranges && is_split_dwarf_) { 449 data += ranges_base_; 450 } 451 handler_->ProcessAttributeUnsigned(offset, attr, form, data); 452 } 453 454 // Called when we have an attribute with signed data to give to 455 // our handler. The attribute is for the DIE at OFFSET from the 456 // beginning of compilation unit, has a name of ATTR, a form of 457 // FORM, and the actual data of the attribute is in DATA. ProcessAttributeSigned(uint64_t offset,enum DwarfAttribute attr,enum DwarfForm form,int64_t data)458 void ProcessAttributeSigned(uint64_t offset, 459 enum DwarfAttribute attr, 460 enum DwarfForm form, 461 int64_t data) { 462 handler_->ProcessAttributeSigned(offset, attr, form, data); 463 } 464 465 // Called when we have an attribute with a buffer of data to give to 466 // our handler. The attribute is for the DIE at OFFSET from the 467 // beginning of compilation unit, has a name of ATTR, a form of 468 // FORM, and the actual data of the attribute is in DATA, and the 469 // length of the buffer is LENGTH. ProcessAttributeBuffer(uint64_t offset,enum DwarfAttribute attr,enum DwarfForm form,const uint8_t * data,uint64_t len)470 void ProcessAttributeBuffer(uint64_t offset, 471 enum DwarfAttribute attr, 472 enum DwarfForm form, 473 const uint8_t* data, 474 uint64_t len) { 475 handler_->ProcessAttributeBuffer(offset, attr, form, data, len); 476 } 477 478 // Called when we have an attribute with string data to give to 479 // our handler. The attribute is for the DIE at OFFSET from the 480 // beginning of compilation unit, has a name of ATTR, a form of 481 // FORM, and the actual data of the attribute is in DATA. 482 // If we see a DW_AT_GNU_dwo_name attribute, save the value so 483 // that we can find the debug info in a .dwo or .dwp file. ProcessAttributeString(uint64_t offset,enum DwarfAttribute attr,enum DwarfForm form,const char * data)484 void ProcessAttributeString(uint64_t offset, 485 enum DwarfAttribute attr, 486 enum DwarfForm form, 487 const char* data) { 488 if (attr == DW_AT_GNU_dwo_name) 489 dwo_name_ = data; 490 handler_->ProcessAttributeString(offset, attr, form, data); 491 } 492 493 // Processes all DIEs for this compilation unit 494 void ProcessDIEs(); 495 496 // Skips the die with attributes specified in ABBREV starting at 497 // START, and return the new place to position the stream to. 498 const uint8_t *SkipDIE(const uint8_t *start, const Abbrev& abbrev); 499 500 // Skips the attribute starting at START, with FORM, and return the 501 // new place to position the stream to. 502 const uint8_t *SkipAttribute(const uint8_t *start, enum DwarfForm form); 503 504 // Process the actual debug information in a split DWARF file. 505 void ProcessSplitDwarf(); 506 507 // Read the debug sections from a .dwo file. 508 void ReadDebugSectionsFromDwo(ElfReader* elf_reader, 509 SectionMap* sections); 510 511 // Path of the file containing the debug information. 512 const string path_; 513 514 // Offset from section start is the offset of this compilation unit 515 // from the beginning of the .debug_info section. 516 uint64_t offset_from_section_start_; 517 518 // buffer is the buffer for our CU, starting at .debug_info + offset 519 // passed in from constructor. 520 // after_header points to right after the compilation unit header. 521 const uint8_t *buffer_; 522 uint64_t buffer_length_; 523 const uint8_t *after_header_; 524 525 // The associated ByteReader that handles endianness issues for us 526 ByteReader* reader_; 527 528 // The map of sections in our file to buffers containing their data 529 const SectionMap& sections_; 530 531 // The associated handler to call processing functions in 532 Dwarf2Handler* handler_; 533 534 // Set of DWARF2/3 abbreviations for this compilation unit. Indexed 535 // by abbreviation number, which means that abbrevs_[0] is not 536 // valid. 537 std::vector<Abbrev>* abbrevs_; 538 539 // String section buffer and length, if we have a string section. 540 // This is here to avoid doing a section lookup for strings in 541 // ProcessAttribute, which is in the hot path for DWARF2 reading. 542 const uint8_t *string_buffer_; 543 uint64_t string_buffer_length_; 544 545 // String offsets section buffer and length, if we have a string offsets 546 // section (.debug_str_offsets or .debug_str_offsets.dwo). 547 const uint8_t* str_offsets_buffer_; 548 uint64_t str_offsets_buffer_length_; 549 550 // Address section buffer and length, if we have an address section 551 // (.debug_addr). 552 const uint8_t* addr_buffer_; 553 uint64_t addr_buffer_length_; 554 555 // Flag indicating whether this compilation unit is part of a .dwo 556 // or .dwp file. If true, we are reading this unit because a 557 // skeleton compilation unit in an executable file had a 558 // DW_AT_GNU_dwo_name or DW_AT_GNU_dwo_id attribute. 559 // In a .dwo file, we expect the string offsets section to 560 // have a ".dwo" suffix, and we will use the ".debug_addr" section 561 // associated with the skeleton compilation unit. 562 bool is_split_dwarf_; 563 564 // The value of the DW_AT_GNU_dwo_id attribute, if any. 565 uint64_t dwo_id_; 566 567 // The value of the DW_AT_GNU_dwo_name attribute, if any. 568 const char* dwo_name_; 569 570 // If this is a split DWARF CU, the value of the DW_AT_GNU_dwo_id attribute 571 // from the skeleton CU. 572 uint64_t skeleton_dwo_id_; 573 574 // The value of the DW_AT_GNU_ranges_base attribute, if any. 575 uint64_t ranges_base_; 576 577 // The value of the DW_AT_GNU_addr_base attribute, if any. 578 uint64_t addr_base_; 579 580 // True if we have already looked for a .dwp file. 581 bool have_checked_for_dwp_; 582 583 // Path to the .dwp file. 584 string dwp_path_; 585 586 // ByteReader for the DWP file. 587 std::unique_ptr<ByteReader> dwp_byte_reader_; 588 589 // DWP reader. 590 std::unique_ptr<DwpReader> dwp_reader_; 591 }; 592 593 // A Reader for a .dwp file. Supports the fetching of DWARF debug 594 // info for a given dwo_id. 595 // 596 // There are two versions of .dwp files. In both versions, the 597 // .dwp file is an ELF file containing only debug sections. 598 // In Version 1, the file contains many copies of each debug 599 // section, one for each .dwo file that is packaged in the .dwp 600 // file, and the .debug_cu_index section maps from the dwo_id 601 // to a set of section indexes. In Version 2, the file contains 602 // one of each debug section, and the .debug_cu_index section 603 // maps from the dwo_id to a set of offsets and lengths that 604 // identify each .dwo file's contribution to the larger sections. 605 606 class DwpReader { 607 public: 608 DwpReader(const ByteReader& byte_reader, ElfReader* elf_reader); 609 610 ~DwpReader(); 611 612 // Read the CU index and initialize data members. 613 void Initialize(); 614 615 // Read the debug sections for the given dwo_id. 616 void ReadDebugSectionsForCU(uint64_t dwo_id, SectionMap* sections); 617 618 private: 619 // Search a v1 hash table for "dwo_id". Returns the slot index 620 // where the dwo_id was found, or -1 if it was not found. 621 int LookupCU(uint64_t dwo_id); 622 623 // Search a v2 hash table for "dwo_id". Returns the row index 624 // in the offsets and sizes tables, or 0 if it was not found. 625 uint32_t LookupCUv2(uint64_t dwo_id); 626 627 // The ELF reader for the .dwp file. 628 ElfReader* elf_reader_; 629 630 // The ByteReader for the .dwp file. 631 const ByteReader& byte_reader_; 632 633 // Pointer to the .debug_cu_index section. 634 const char* cu_index_; 635 636 // Size of the .debug_cu_index section. 637 size_t cu_index_size_; 638 639 // Pointer to the .debug_str.dwo section. 640 const char* string_buffer_; 641 642 // Size of the .debug_str.dwo section. 643 size_t string_buffer_size_; 644 645 // Version of the .dwp file. We support versions 1 and 2 currently. 646 int version_; 647 648 // Number of columns in the section tables (version 2). 649 unsigned int ncolumns_; 650 651 // Number of units in the section tables (version 2). 652 unsigned int nunits_; 653 654 // Number of slots in the hash table. 655 unsigned int nslots_; 656 657 // Pointer to the beginning of the hash table. 658 const char* phash_; 659 660 // Pointer to the beginning of the index table. 661 const char* pindex_; 662 663 // Pointer to the beginning of the section index pool (version 1). 664 const char* shndx_pool_; 665 666 // Pointer to the beginning of the section offset table (version 2). 667 const char* offset_table_; 668 669 // Pointer to the beginning of the section size table (version 2). 670 const char* size_table_; 671 672 // Contents of the sections of interest (version 2). 673 const char* abbrev_data_; 674 size_t abbrev_size_; 675 const char* info_data_; 676 size_t info_size_; 677 const char* str_offsets_data_; 678 size_t str_offsets_size_; 679 }; 680 681 // This class is a reader for DWARF's Call Frame Information. CFI 682 // describes how to unwind stack frames --- even for functions that do 683 // not follow fixed conventions for saving registers, whose frame size 684 // varies as they execute, etc. 685 // 686 // CFI describes, at each machine instruction, how to compute the 687 // stack frame's base address, how to find the return address, and 688 // where to find the saved values of the caller's registers (if the 689 // callee has stashed them somewhere to free up the registers for its 690 // own use). 691 // 692 // For example, suppose we have a function whose machine code looks 693 // like this (imagine an assembly language that looks like C, for a 694 // machine with 32-bit registers, and a stack that grows towards lower 695 // addresses): 696 // 697 // func: ; entry point; return address at sp 698 // func+0: sp = sp - 16 ; allocate space for stack frame 699 // func+1: sp[12] = r0 ; save r0 at sp+12 700 // ... ; other code, not frame-related 701 // func+10: sp -= 4; *sp = x ; push some x on the stack 702 // ... ; other code, not frame-related 703 // func+20: r0 = sp[16] ; restore saved r0 704 // func+21: sp += 20 ; pop whole stack frame 705 // func+22: pc = *sp; sp += 4 ; pop return address and jump to it 706 // 707 // DWARF CFI is (a very compressed representation of) a table with a 708 // row for each machine instruction address and a column for each 709 // register showing how to restore it, if possible. 710 // 711 // A special column named "CFA", for "Canonical Frame Address", tells how 712 // to compute the base address of the frame; registers' entries may 713 // refer to the CFA in describing where the registers are saved. 714 // 715 // Another special column, named "RA", represents the return address. 716 // 717 // For example, here is a complete (uncompressed) table describing the 718 // function above: 719 // 720 // insn cfa r0 r1 ... ra 721 // ======================================= 722 // func+0: sp cfa[0] 723 // func+1: sp+16 cfa[0] 724 // func+2: sp+16 cfa[-4] cfa[0] 725 // func+11: sp+20 cfa[-4] cfa[0] 726 // func+21: sp+20 cfa[0] 727 // func+22: sp cfa[0] 728 // 729 // Some things to note here: 730 // 731 // - Each row describes the state of affairs *before* executing the 732 // instruction at the given address. Thus, the row for func+0 733 // describes the state before we allocate the stack frame. In the 734 // next row, the formula for computing the CFA has changed, 735 // reflecting that allocation. 736 // 737 // - The other entries are written in terms of the CFA; this allows 738 // them to remain unchanged as the stack pointer gets bumped around. 739 // For example, the rule for recovering the return address (the "ra" 740 // column) remains unchanged throughout the function, even as the 741 // stack pointer takes on three different offsets from the return 742 // address. 743 // 744 // - Although we haven't shown it, most calling conventions designate 745 // "callee-saves" and "caller-saves" registers. The callee must 746 // preserve the values of callee-saves registers; if it uses them, 747 // it must save their original values somewhere, and restore them 748 // before it returns. In contrast, the callee is free to trash 749 // caller-saves registers; if the callee uses these, it will 750 // probably not bother to save them anywhere, and the CFI will 751 // probably mark their values as "unrecoverable". 752 // 753 // (However, since the caller cannot assume the callee was going to 754 // save them, caller-saves registers are probably dead in the caller 755 // anyway, so compilers usually don't generate CFA for caller-saves 756 // registers.) 757 // 758 // - Exactly where the CFA points is a matter of convention that 759 // depends on the architecture and ABI in use. In the example, the 760 // CFA is the value the stack pointer had upon entry to the 761 // function, pointing at the saved return address. But on the x86, 762 // the call frame information generated by GCC follows the 763 // convention that the CFA is the address *after* the saved return 764 // address. 765 // 766 // But by definition, the CFA remains constant throughout the 767 // lifetime of the frame. This makes it a useful value for other 768 // columns to refer to. It is also gives debuggers a useful handle 769 // for identifying a frame. 770 // 771 // If you look at the table above, you'll notice that a given entry is 772 // often the same as the one immediately above it: most instructions 773 // change only one or two aspects of the stack frame, if they affect 774 // it at all. The DWARF format takes advantage of this fact, and 775 // reduces the size of the data by mentioning only the addresses and 776 // columns at which changes take place. So for the above, DWARF CFI 777 // data would only actually mention the following: 778 // 779 // insn cfa r0 r1 ... ra 780 // ======================================= 781 // func+0: sp cfa[0] 782 // func+1: sp+16 783 // func+2: cfa[-4] 784 // func+11: sp+20 785 // func+21: r0 786 // func+22: sp 787 // 788 // In fact, this is the way the parser reports CFI to the consumer: as 789 // a series of statements of the form, "At address X, column Y changed 790 // to Z," and related conventions for describing the initial state. 791 // 792 // Naturally, it would be impractical to have to scan the entire 793 // program's CFI, noting changes as we go, just to recover the 794 // unwinding rules in effect at one particular instruction. To avoid 795 // this, CFI data is grouped into "entries", each of which covers a 796 // specified range of addresses and begins with a complete statement 797 // of the rules for all recoverable registers at that starting 798 // address. Each entry typically covers a single function. 799 // 800 // Thus, to compute the contents of a given row of the table --- that 801 // is, rules for recovering the CFA, RA, and registers at a given 802 // instruction --- the consumer should find the entry that covers that 803 // instruction's address, start with the initial state supplied at the 804 // beginning of the entry, and work forward until it has processed all 805 // the changes up to and including those for the present instruction. 806 // 807 // There are seven kinds of rules that can appear in an entry of the 808 // table: 809 // 810 // - "undefined": The given register is not preserved by the callee; 811 // its value cannot be recovered. 812 // 813 // - "same value": This register has the same value it did in the callee. 814 // 815 // - offset(N): The register is saved at offset N from the CFA. 816 // 817 // - val_offset(N): The value the register had in the caller is the 818 // CFA plus offset N. (This is usually only useful for describing 819 // the stack pointer.) 820 // 821 // - register(R): The register's value was saved in another register R. 822 // 823 // - expression(E): Evaluating the DWARF expression E using the 824 // current frame's registers' values yields the address at which the 825 // register was saved. 826 // 827 // - val_expression(E): Evaluating the DWARF expression E using the 828 // current frame's registers' values yields the value the register 829 // had in the caller. 830 831 class CallFrameInfo { 832 public: 833 // The different kinds of entries one finds in CFI. Used internally, 834 // and for error reporting. 835 enum EntryKind { kUnknown, kCIE, kFDE, kTerminator }; 836 837 // The handler class to which the parser hands the parsed call frame 838 // information. Defined below. 839 class Handler; 840 841 // A reporter class, which CallFrameInfo uses to report errors 842 // encountered while parsing call frame information. Defined below. 843 class Reporter; 844 845 // Create a DWARF CFI parser. BUFFER points to the contents of the 846 // .debug_frame section to parse; BUFFER_LENGTH is its length in bytes. 847 // REPORTER is an error reporter the parser should use to report 848 // problems. READER is a ByteReader instance that has the endianness and 849 // address size set properly. Report the data we find to HANDLER. 850 // 851 // This class can also parse Linux C++ exception handling data, as found 852 // in '.eh_frame' sections. This data is a variant of DWARF CFI that is 853 // placed in loadable segments so that it is present in the program's 854 // address space, and is interpreted by the C++ runtime to search the 855 // call stack for a handler interested in the exception being thrown, 856 // actually pop the frames, and find cleanup code to run. 857 // 858 // There are two differences between the call frame information described 859 // in the DWARF standard and the exception handling data Linux places in 860 // the .eh_frame section: 861 // 862 // - Exception handling data uses uses a different format for call frame 863 // information entry headers. The distinguished CIE id, the way FDEs 864 // refer to their CIEs, and the way the end of the series of entries is 865 // determined are all slightly different. 866 // 867 // If the constructor's EH_FRAME argument is true, then the 868 // CallFrameInfo parses the entry headers as Linux C++ exception 869 // handling data. If EH_FRAME is false or omitted, the CallFrameInfo 870 // parses standard DWARF call frame information. 871 // 872 // - Linux C++ exception handling data uses CIE augmentation strings 873 // beginning with 'z' to specify the presence of additional data after 874 // the CIE and FDE headers and special encodings used for addresses in 875 // frame description entries. 876 // 877 // CallFrameInfo can handle 'z' augmentations in either DWARF CFI or 878 // exception handling data if you have supplied READER with the base 879 // addresses needed to interpret the pointer encodings that 'z' 880 // augmentations can specify. See the ByteReader interface for details 881 // about the base addresses. See the CallFrameInfo::Handler interface 882 // for details about the additional information one might find in 883 // 'z'-augmented data. 884 // 885 // Thus: 886 // 887 // - If you are parsing standard DWARF CFI, as found in a .debug_frame 888 // section, you should pass false for the EH_FRAME argument, or omit 889 // it, and you need not worry about providing READER with the 890 // additional base addresses. 891 // 892 // - If you want to parse Linux C++ exception handling data from a 893 // .eh_frame section, you should pass EH_FRAME as true, and call 894 // READER's Set*Base member functions before calling our Start method. 895 // 896 // - If you want to parse DWARF CFI that uses the 'z' augmentations 897 // (although I don't think any toolchain ever emits such data), you 898 // could pass false for EH_FRAME, but call READER's Set*Base members. 899 // 900 // The extensions the Linux C++ ABI makes to DWARF for exception 901 // handling are described here, rather poorly: 902 // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html 903 // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html 904 // 905 // The mechanics of C++ exception handling, personality routines, 906 // and language-specific data areas are described here, rather nicely: 907 // http://www.codesourcery.com/public/cxx-abi/abi-eh.html 908 CallFrameInfo(const uint8_t *buffer, size_t buffer_length, 909 ByteReader *reader, Handler *handler, Reporter *reporter, 910 bool eh_frame = false) buffer_(buffer)911 : buffer_(buffer), buffer_length_(buffer_length), 912 reader_(reader), handler_(handler), reporter_(reporter), 913 eh_frame_(eh_frame) { } 914 ~CallFrameInfo()915 ~CallFrameInfo() { } 916 917 // Parse the entries in BUFFER, reporting what we find to HANDLER. 918 // Return true if we reach the end of the section successfully, or 919 // false if we encounter an error. 920 bool Start(); 921 922 // Return the textual name of KIND. For error reporting. 923 static const char *KindName(EntryKind kind); 924 925 private: 926 927 struct CIE; 928 929 // A CFI entry, either an FDE or a CIE. 930 struct Entry { 931 // The starting offset of the entry in the section, for error 932 // reporting. 933 size_t offset; 934 935 // The start of this entry in the buffer. 936 const uint8_t *start; 937 938 // Which kind of entry this is. 939 // 940 // We want to be able to use this for error reporting even while we're 941 // in the midst of parsing. Error reporting code may assume that kind, 942 // offset, and start fields are valid, although kind may be kUnknown. 943 EntryKind kind; 944 945 // The end of this entry's common prologue (initial length and id), and 946 // the start of this entry's kind-specific fields. 947 const uint8_t *fields; 948 949 // The start of this entry's instructions. 950 const uint8_t *instructions; 951 952 // The address past the entry's last byte in the buffer. (Note that 953 // since offset points to the entry's initial length field, and the 954 // length field is the number of bytes after that field, this is not 955 // simply buffer_ + offset + length.) 956 const uint8_t *end; 957 958 // For both DWARF CFI and .eh_frame sections, this is the CIE id in a 959 // CIE, and the offset of the associated CIE in an FDE. 960 uint64_t id; 961 962 // The CIE that applies to this entry, if we've parsed it. If this is a 963 // CIE, then this field points to this structure. 964 CIE *cie; 965 }; 966 967 // A common information entry (CIE). 968 struct CIE: public Entry { 969 uint8_t version; // CFI data version number 970 string augmentation; // vendor format extension markers 971 uint64_t code_alignment_factor; // scale for code address adjustments 972 int data_alignment_factor; // scale for stack pointer adjustments 973 unsigned return_address_register; // which register holds the return addr 974 975 // True if this CIE includes Linux C++ ABI 'z' augmentation data. 976 bool has_z_augmentation; 977 978 // Parsed 'z' augmentation data. These are meaningful only if 979 // has_z_augmentation is true. 980 bool has_z_lsda; // The 'z' augmentation included 'L'. 981 bool has_z_personality; // The 'z' augmentation included 'P'. 982 bool has_z_signal_frame; // The 'z' augmentation included 'S'. 983 984 // If has_z_lsda is true, this is the encoding to be used for language- 985 // specific data area pointers in FDEs. 986 DwarfPointerEncoding lsda_encoding; 987 988 // If has_z_personality is true, this is the encoding used for the 989 // personality routine pointer in the augmentation data. 990 DwarfPointerEncoding personality_encoding; 991 992 // If has_z_personality is true, this is the address of the personality 993 // routine --- or, if personality_encoding & DW_EH_PE_indirect, the 994 // address where the personality routine's address is stored. 995 uint64_t personality_address; 996 997 // This is the encoding used for addresses in the FDE header and 998 // in DW_CFA_set_loc instructions. This is always valid, whether 999 // or not we saw a 'z' augmentation string; its default value is 1000 // DW_EH_PE_absptr, which is what normal DWARF CFI uses. 1001 DwarfPointerEncoding pointer_encoding; 1002 1003 // These were only introduced in DWARF4, so will not be set in older 1004 // versions. 1005 uint8_t address_size; 1006 uint8_t segment_size; 1007 }; 1008 1009 // A frame description entry (FDE). 1010 struct FDE: public Entry { 1011 uint64_t address; // start address of described code 1012 uint64_t size; // size of described code, in bytes 1013 1014 // If cie->has_z_lsda is true, then this is the language-specific data 1015 // area's address --- or its address's address, if cie->lsda_encoding 1016 // has the DW_EH_PE_indirect bit set. 1017 uint64_t lsda_address; 1018 }; 1019 1020 // Internal use. 1021 class Rule; 1022 class UndefinedRule; 1023 class SameValueRule; 1024 class OffsetRule; 1025 class ValOffsetRule; 1026 class RegisterRule; 1027 class ExpressionRule; 1028 class ValExpressionRule; 1029 class RuleMap; 1030 class State; 1031 1032 // Parse the initial length and id of a CFI entry, either a CIE, an FDE, 1033 // or a .eh_frame end-of-data mark. CURSOR points to the beginning of the 1034 // data to parse. On success, populate ENTRY as appropriate, and return 1035 // true. On failure, report the problem, and return false. Even if we 1036 // return false, set ENTRY->end to the first byte after the entry if we 1037 // were able to figure that out, or NULL if we weren't. 1038 bool ReadEntryPrologue(const uint8_t *cursor, Entry *entry); 1039 1040 // Parse the fields of a CIE after the entry prologue, including any 'z' 1041 // augmentation data. Assume that the 'Entry' fields of CIE are 1042 // populated; use CIE->fields and CIE->end as the start and limit for 1043 // parsing. On success, populate the rest of *CIE, and return true; on 1044 // failure, report the problem and return false. 1045 bool ReadCIEFields(CIE *cie); 1046 1047 // Parse the fields of an FDE after the entry prologue, including any 'z' 1048 // augmentation data. Assume that the 'Entry' fields of *FDE are 1049 // initialized; use FDE->fields and FDE->end as the start and limit for 1050 // parsing. Assume that FDE->cie is fully initialized. On success, 1051 // populate the rest of *FDE, and return true; on failure, report the 1052 // problem and return false. 1053 bool ReadFDEFields(FDE *fde); 1054 1055 // Report that ENTRY is incomplete, and return false. This is just a 1056 // trivial wrapper for invoking reporter_->Incomplete; it provides a 1057 // little brevity. 1058 bool ReportIncomplete(Entry *entry); 1059 1060 // Return true if ENCODING has the DW_EH_PE_indirect bit set. IsIndirectEncoding(DwarfPointerEncoding encoding)1061 static bool IsIndirectEncoding(DwarfPointerEncoding encoding) { 1062 return encoding & DW_EH_PE_indirect; 1063 } 1064 1065 // The contents of the DWARF .debug_info section we're parsing. 1066 const uint8_t *buffer_; 1067 size_t buffer_length_; 1068 1069 // For reading multi-byte values with the appropriate endianness. 1070 ByteReader *reader_; 1071 1072 // The handler to which we should report the data we find. 1073 Handler *handler_; 1074 1075 // For reporting problems in the info we're parsing. 1076 Reporter *reporter_; 1077 1078 // True if we are processing .eh_frame-format data. 1079 bool eh_frame_; 1080 }; 1081 1082 // The handler class for CallFrameInfo. The a CFI parser calls the 1083 // member functions of a handler object to report the data it finds. 1084 class CallFrameInfo::Handler { 1085 public: 1086 // The pseudo-register number for the canonical frame address. 1087 enum { kCFARegister = -1 }; 1088 Handler()1089 Handler() { } ~Handler()1090 virtual ~Handler() { } 1091 1092 // The parser has found CFI for the machine code at ADDRESS, 1093 // extending for LENGTH bytes. OFFSET is the offset of the frame 1094 // description entry in the section, for use in error messages. 1095 // VERSION is the version number of the CFI format. AUGMENTATION is 1096 // a string describing any producer-specific extensions present in 1097 // the data. RETURN_ADDRESS is the number of the register that holds 1098 // the address to which the function should return. 1099 // 1100 // Entry should return true to process this CFI, or false to skip to 1101 // the next entry. 1102 // 1103 // The parser invokes Entry for each Frame Description Entry (FDE) 1104 // it finds. The parser doesn't report Common Information Entries 1105 // to the handler explicitly; instead, if the handler elects to 1106 // process a given FDE, the parser reiterates the appropriate CIE's 1107 // contents at the beginning of the FDE's rules. 1108 virtual bool Entry(size_t offset, uint64_t address, uint64_t length, 1109 uint8_t version, const string &augmentation, 1110 unsigned return_address) = 0; 1111 1112 // When the Entry function returns true, the parser calls these 1113 // handler functions repeatedly to describe the rules for recovering 1114 // registers at each instruction in the given range of machine code. 1115 // Immediately after a call to Entry, the handler should assume that 1116 // the rule for each callee-saves register is "unchanged" --- that 1117 // is, that the register still has the value it had in the caller. 1118 // 1119 // If a *Rule function returns true, we continue processing this entry's 1120 // instructions. If a *Rule function returns false, we stop evaluating 1121 // instructions, and skip to the next entry. Either way, we call End 1122 // before going on to the next entry. 1123 // 1124 // In all of these functions, if the REG parameter is kCFARegister, then 1125 // the rule describes how to find the canonical frame address. 1126 // kCFARegister may be passed as a BASE_REGISTER argument, meaning that 1127 // the canonical frame address should be used as the base address for the 1128 // computation. All other REG values will be positive. 1129 1130 // At ADDRESS, register REG's value is not recoverable. 1131 virtual bool UndefinedRule(uint64_t address, int reg) = 0; 1132 1133 // At ADDRESS, register REG's value is the same as that it had in 1134 // the caller. 1135 virtual bool SameValueRule(uint64_t address, int reg) = 0; 1136 1137 // At ADDRESS, register REG has been saved at offset OFFSET from 1138 // BASE_REGISTER. 1139 virtual bool OffsetRule(uint64_t address, int reg, 1140 int base_register, long offset) = 0; 1141 1142 // At ADDRESS, the caller's value of register REG is the current 1143 // value of BASE_REGISTER plus OFFSET. (This rule doesn't provide an 1144 // address at which the register's value is saved.) 1145 virtual bool ValOffsetRule(uint64_t address, int reg, 1146 int base_register, long offset) = 0; 1147 1148 // At ADDRESS, register REG has been saved in BASE_REGISTER. This differs 1149 // from ValOffsetRule(ADDRESS, REG, BASE_REGISTER, 0), in that 1150 // BASE_REGISTER is the "home" for REG's saved value: if you want to 1151 // assign to a variable whose home is REG in the calling frame, you 1152 // should put the value in BASE_REGISTER. 1153 virtual bool RegisterRule(uint64_t address, int reg, int base_register) = 0; 1154 1155 // At ADDRESS, the DWARF expression EXPRESSION yields the address at 1156 // which REG was saved. 1157 virtual bool ExpressionRule(uint64_t address, int reg, 1158 const string &expression) = 0; 1159 1160 // At ADDRESS, the DWARF expression EXPRESSION yields the caller's 1161 // value for REG. (This rule doesn't provide an address at which the 1162 // register's value is saved.) 1163 virtual bool ValExpressionRule(uint64_t address, int reg, 1164 const string &expression) = 0; 1165 1166 // Indicate that the rules for the address range reported by the 1167 // last call to Entry are complete. End should return true if 1168 // everything is okay, or false if an error has occurred and parsing 1169 // should stop. 1170 virtual bool End() = 0; 1171 1172 // Handler functions for Linux C++ exception handling data. These are 1173 // only called if the data includes 'z' augmentation strings. 1174 1175 // The Linux C++ ABI uses an extension of the DWARF CFI format to 1176 // walk the stack to propagate exceptions from the throw to the 1177 // appropriate catch, and do the appropriate cleanups along the way. 1178 // CFI entries used for exception handling have two additional data 1179 // associated with them: 1180 // 1181 // - The "language-specific data area" describes which exception 1182 // types the function has 'catch' clauses for, and indicates how 1183 // to go about re-entering the function at the appropriate catch 1184 // clause. If the exception is not caught, it describes the 1185 // destructors that must run before the frame is popped. 1186 // 1187 // - The "personality routine" is responsible for interpreting the 1188 // language-specific data area's contents, and deciding whether 1189 // the exception should continue to propagate down the stack, 1190 // perhaps after doing some cleanup for this frame, or whether the 1191 // exception will be caught here. 1192 // 1193 // In principle, the language-specific data area is opaque to 1194 // everybody but the personality routine. In practice, these values 1195 // may be useful or interesting to readers with extra context, and 1196 // we have to at least skip them anyway, so we might as well report 1197 // them to the handler. 1198 1199 // This entry's exception handling personality routine's address is 1200 // ADDRESS. If INDIRECT is true, then ADDRESS is the address at 1201 // which the routine's address is stored. The default definition for 1202 // this handler function simply returns true, allowing parsing of 1203 // the entry to continue. PersonalityRoutine(uint64_t address,bool indirect)1204 virtual bool PersonalityRoutine(uint64_t address, bool indirect) { 1205 return true; 1206 } 1207 1208 // This entry's language-specific data area (LSDA) is located at 1209 // ADDRESS. If INDIRECT is true, then ADDRESS is the address at 1210 // which the area's address is stored. The default definition for 1211 // this handler function simply returns true, allowing parsing of 1212 // the entry to continue. LanguageSpecificDataArea(uint64_t address,bool indirect)1213 virtual bool LanguageSpecificDataArea(uint64_t address, bool indirect) { 1214 return true; 1215 } 1216 1217 // This entry describes a signal trampoline --- this frame is the 1218 // caller of a signal handler. The default definition for this 1219 // handler function simply returns true, allowing parsing of the 1220 // entry to continue. 1221 // 1222 // The best description of the rationale for and meaning of signal 1223 // trampoline CFI entries seems to be in the GCC bug database: 1224 // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26208 SignalHandler()1225 virtual bool SignalHandler() { return true; } 1226 }; 1227 1228 // The CallFrameInfo class makes calls on an instance of this class to 1229 // report errors or warn about problems in the data it is parsing. The 1230 // default definitions of these methods print a message to stderr, but 1231 // you can make a derived class that overrides them. 1232 class CallFrameInfo::Reporter { 1233 public: 1234 // Create an error reporter which attributes troubles to the section 1235 // named SECTION in FILENAME. 1236 // 1237 // Normally SECTION would be .debug_frame, but the Mac puts CFI data 1238 // in a Mach-O section named __debug_frame. If we support 1239 // Linux-style exception handling data, we could be reading an 1240 // .eh_frame section. 1241 Reporter(const string &filename, 1242 const string §ion = ".debug_frame") filename_(filename)1243 : filename_(filename), section_(section) { } ~Reporter()1244 virtual ~Reporter() { } 1245 1246 // The CFI entry at OFFSET ends too early to be well-formed. KIND 1247 // indicates what kind of entry it is; KIND can be kUnknown if we 1248 // haven't parsed enough of the entry to tell yet. 1249 virtual void Incomplete(uint64_t offset, CallFrameInfo::EntryKind kind); 1250 1251 // The .eh_frame data has a four-byte zero at OFFSET where the next 1252 // entry's length would be; this is a terminator. However, the buffer 1253 // length as given to the CallFrameInfo constructor says there should be 1254 // more data. 1255 virtual void EarlyEHTerminator(uint64_t offset); 1256 1257 // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the 1258 // section is not that large. 1259 virtual void CIEPointerOutOfRange(uint64_t offset, uint64_t cie_offset); 1260 1261 // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the entry 1262 // there is not a CIE. 1263 virtual void BadCIEId(uint64_t offset, uint64_t cie_offset); 1264 1265 // The FDE at OFFSET refers to a CIE with an address size we don't know how 1266 // to handle. 1267 virtual void UnexpectedAddressSize(uint64_t offset, uint8_t address_size); 1268 1269 // The FDE at OFFSET refers to a CIE with an segment descriptor size we 1270 // don't know how to handle. 1271 virtual void UnexpectedSegmentSize(uint64_t offset, uint8_t segment_size); 1272 1273 // The FDE at OFFSET refers to a CIE with version number VERSION, 1274 // which we don't recognize. We cannot parse DWARF CFI if it uses 1275 // a version number we don't recognize. 1276 virtual void UnrecognizedVersion(uint64_t offset, int version); 1277 1278 // The FDE at OFFSET refers to a CIE with augmentation AUGMENTATION, 1279 // which we don't recognize. We cannot parse DWARF CFI if it uses 1280 // augmentations we don't recognize. 1281 virtual void UnrecognizedAugmentation(uint64_t offset, 1282 const string &augmentation); 1283 1284 // The pointer encoding ENCODING, specified by the CIE at OFFSET, is not 1285 // a valid encoding. 1286 virtual void InvalidPointerEncoding(uint64_t offset, uint8_t encoding); 1287 1288 // The pointer encoding ENCODING, specified by the CIE at OFFSET, depends 1289 // on a base address which has not been supplied. 1290 virtual void UnusablePointerEncoding(uint64_t offset, uint8_t encoding); 1291 1292 // The CIE at OFFSET contains a DW_CFA_restore instruction at 1293 // INSN_OFFSET, which may not appear in a CIE. 1294 virtual void RestoreInCIE(uint64_t offset, uint64_t insn_offset); 1295 1296 // The entry at OFFSET, of kind KIND, has an unrecognized 1297 // instruction at INSN_OFFSET. 1298 virtual void BadInstruction(uint64_t offset, CallFrameInfo::EntryKind kind, 1299 uint64_t insn_offset); 1300 1301 // The instruction at INSN_OFFSET in the entry at OFFSET, of kind 1302 // KIND, establishes a rule that cites the CFA, but we have not 1303 // established a CFA rule yet. 1304 virtual void NoCFARule(uint64_t offset, CallFrameInfo::EntryKind kind, 1305 uint64_t insn_offset); 1306 1307 // The instruction at INSN_OFFSET in the entry at OFFSET, of kind 1308 // KIND, is a DW_CFA_restore_state instruction, but the stack of 1309 // saved states is empty. 1310 virtual void EmptyStateStack(uint64_t offset, CallFrameInfo::EntryKind kind, 1311 uint64_t insn_offset); 1312 1313 // The DW_CFA_remember_state instruction at INSN_OFFSET in the entry 1314 // at OFFSET, of kind KIND, would restore a state that has no CFA 1315 // rule, whereas the current state does have a CFA rule. This is 1316 // bogus input, which the CallFrameInfo::Handler interface doesn't 1317 // (and shouldn't) have any way to report. 1318 virtual void ClearingCFARule(uint64_t offset, CallFrameInfo::EntryKind kind, 1319 uint64_t insn_offset); 1320 1321 protected: 1322 // The name of the file whose CFI we're reading. 1323 string filename_; 1324 1325 // The name of the CFI section in that file. 1326 string section_; 1327 }; 1328 1329 } // namespace dwarf2reader 1330 1331 #endif // UTIL_DEBUGINFO_DWARF2READER_H__ 1332