1 // -*- mode: C++ -*- 2 3 // Copyright (c) 2010 Google Inc. All Rights Reserved. 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 // CFI reader author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> 32 33 // This file contains definitions related to the DWARF2/3 reader and 34 // it's handler interfaces. 35 // The DWARF2/3 specification can be found at 36 // http://dwarf.freestandards.org and should be considered required 37 // reading if you wish to modify the implementation. 38 // Only a cursory attempt is made to explain terminology that is 39 // used here, as it is much better explained in the standard documents 40 #ifndef COMMON_DWARF_DWARF2READER_H__ 41 #define COMMON_DWARF_DWARF2READER_H__ 42 43 #include <list> 44 #include <map> 45 #include <string> 46 #include <utility> 47 #include <vector> 48 49 #include "common/dwarf/bytereader.h" 50 #include "common/dwarf/dwarf2enums.h" 51 #include "common/dwarf/types.h" 52 #include "common/using_std_string.h" 53 54 namespace dwarf2reader { 55 struct LineStateMachine; 56 class Dwarf2Handler; 57 class LineInfoHandler; 58 59 // This maps from a string naming a section to a pair containing a 60 // the data for the section, and the size of the section. 61 typedef std::map<string, std::pair<const char*, uint64> > SectionMap; 62 typedef std::list<std::pair<enum DwarfAttribute, enum DwarfForm> > 63 AttributeList; 64 typedef AttributeList::iterator AttributeIterator; 65 typedef AttributeList::const_iterator ConstAttributeIterator; 66 67 struct LineInfoHeader { 68 uint64 total_length; 69 uint16 version; 70 uint64 prologue_length; 71 uint8 min_insn_length; // insn stands for instructin 72 bool default_is_stmt; // stmt stands for statement 73 int8 line_base; 74 uint8 line_range; 75 uint8 opcode_base; 76 // Use a pointer so that signalsafe_addr2line is able to use this structure 77 // without heap allocation problem. 78 std::vector<unsigned char> *std_opcode_lengths; 79 }; 80 81 class LineInfo { 82 public: 83 84 // Initializes a .debug_line reader. Buffer and buffer length point 85 // to the beginning and length of the line information to read. 86 // Reader is a ByteReader class that has the endianness set 87 // properly. 88 LineInfo(const char* buffer_, uint64 buffer_length, 89 ByteReader* reader, LineInfoHandler* handler); 90 ~LineInfo()91 virtual ~LineInfo() { 92 if (header_.std_opcode_lengths) { 93 delete header_.std_opcode_lengths; 94 } 95 } 96 97 // Start processing line info, and calling callbacks in the handler. 98 // Consumes the line number information for a single compilation unit. 99 // Returns the number of bytes processed. 100 uint64 Start(); 101 102 // Process a single line info opcode at START using the state 103 // machine at LSM. Return true if we should define a line using the 104 // current state of the line state machine. Place the length of the 105 // opcode in LEN. 106 // If LSM_PASSES_PC is non-NULL, this function also checks if the lsm 107 // passes the address of PC. In other words, LSM_PASSES_PC will be 108 // set to true, if the following condition is met. 109 // 110 // lsm's old address < PC <= lsm's new address 111 static bool ProcessOneOpcode(ByteReader* reader, 112 LineInfoHandler* handler, 113 const struct LineInfoHeader &header, 114 const char* start, 115 struct LineStateMachine* lsm, 116 size_t* len, 117 uintptr pc, 118 bool *lsm_passes_pc); 119 120 private: 121 // Reads the DWARF2/3 header for this line info. 122 void ReadHeader(); 123 124 // Reads the DWARF2/3 line information 125 void ReadLines(); 126 127 // The associated handler to call processing functions in 128 LineInfoHandler* handler_; 129 130 // The associated ByteReader that handles endianness issues for us 131 ByteReader* reader_; 132 133 // A DWARF2/3 line info header. This is not the same size as 134 // in the actual file, as the one in the file may have a 32 bit or 135 // 64 bit lengths 136 137 struct LineInfoHeader header_; 138 139 // buffer is the buffer for our line info, starting at exactly where 140 // the line info to read is. after_header is the place right after 141 // the end of the line information header. 142 const char* buffer_; 143 uint64 buffer_length_; 144 const char* after_header_; 145 }; 146 147 // This class is the main interface between the line info reader and 148 // the client. The virtual functions inside this get called for 149 // interesting events that happen during line info reading. The 150 // default implementation does nothing 151 152 class LineInfoHandler { 153 public: LineInfoHandler()154 LineInfoHandler() { } 155 ~LineInfoHandler()156 virtual ~LineInfoHandler() { } 157 158 // Called when we define a directory. NAME is the directory name, 159 // DIR_NUM is the directory number DefineDir(const string & name,uint32 dir_num)160 virtual void DefineDir(const string& name, uint32 dir_num) { } 161 162 // Called when we define a filename. NAME is the filename, FILE_NUM 163 // is the file number which is -1 if the file index is the next 164 // index after the last numbered index (this happens when files are 165 // dynamically defined by the line program), DIR_NUM is the 166 // directory index for the directory name of this file, MOD_TIME is 167 // the modification time of the file, and LENGTH is the length of 168 // the file DefineFile(const string & name,int32 file_num,uint32 dir_num,uint64 mod_time,uint64 length)169 virtual void DefineFile(const string& name, int32 file_num, 170 uint32 dir_num, uint64 mod_time, 171 uint64 length) { } 172 173 // Called when the line info reader has a new line, address pair 174 // ready for us. ADDRESS is the address of the code, LENGTH is the 175 // length of its machine code in bytes, FILE_NUM is the file number 176 // containing the code, LINE_NUM is the line number in that file for 177 // the code, and COLUMN_NUM is the column number the code starts at, 178 // if we know it (0 otherwise). AddLine(uint64 address,uint64 length,uint32 file_num,uint32 line_num,uint32 column_num)179 virtual void AddLine(uint64 address, uint64 length, 180 uint32 file_num, uint32 line_num, uint32 column_num) { } 181 }; 182 183 // The base of DWARF2/3 debug info is a DIE (Debugging Information 184 // Entry. 185 // DWARF groups DIE's into a tree and calls the root of this tree a 186 // "compilation unit". Most of the time, there is one compilation 187 // unit in the .debug_info section for each file that had debug info 188 // generated. 189 // Each DIE consists of 190 191 // 1. a tag specifying a thing that is being described (ie 192 // DW_TAG_subprogram for functions, DW_TAG_variable for variables, etc 193 // 2. attributes (such as DW_AT_location for location in memory, 194 // DW_AT_name for name), and data for each attribute. 195 // 3. A flag saying whether the DIE has children or not 196 197 // In order to gain some amount of compression, the format of 198 // each DIE (tag name, attributes and data forms for the attributes) 199 // are stored in a separate table called the "abbreviation table". 200 // This is done because a large number of DIEs have the exact same tag 201 // and list of attributes, but different data for those attributes. 202 // As a result, the .debug_info section is just a stream of data, and 203 // requires reading of the .debug_abbrev section to say what the data 204 // means. 205 206 // As a warning to the user, it should be noted that the reason for 207 // using absolute offsets from the beginning of .debug_info is that 208 // DWARF2/3 supports referencing DIE's from other DIE's by their offset 209 // from either the current compilation unit start, *or* the beginning 210 // of the .debug_info section. This means it is possible to reference 211 // a DIE in one compilation unit from a DIE in another compilation 212 // unit. This style of reference is usually used to eliminate 213 // duplicated information that occurs across compilation 214 // units, such as base types, etc. GCC 3.4+ support this with 215 // -feliminate-dwarf2-dups. Other toolchains will sometimes do 216 // duplicate elimination in the linker. 217 218 class CompilationUnit { 219 public: 220 221 // Initialize a compilation unit. This requires a map of sections, 222 // the offset of this compilation unit in the .debug_info section, a 223 // ByteReader, and a Dwarf2Handler class to call callbacks in. 224 CompilationUnit(const SectionMap& sections, uint64 offset, 225 ByteReader* reader, Dwarf2Handler* handler); ~CompilationUnit()226 virtual ~CompilationUnit() { 227 if (abbrevs_) delete abbrevs_; 228 } 229 230 // Begin reading a Dwarf2 compilation unit, and calling the 231 // callbacks in the Dwarf2Handler 232 233 // Return the full length of the compilation unit, including 234 // headers. This plus the starting offset passed to the constructor 235 // is the offset of the end of the compilation unit --- and the 236 // start of the next compilation unit, if there is one. 237 uint64 Start(); 238 239 private: 240 241 // This struct represents a single DWARF2/3 abbreviation 242 // The abbreviation tells how to read a DWARF2/3 DIE, and consist of a 243 // tag and a list of attributes, as well as the data form of each attribute. 244 struct Abbrev { 245 uint64 number; 246 enum DwarfTag tag; 247 bool has_children; 248 AttributeList attributes; 249 }; 250 251 // A DWARF2/3 compilation unit header. This is not the same size as 252 // in the actual file, as the one in the file may have a 32 bit or 253 // 64 bit length. 254 struct CompilationUnitHeader { 255 uint64 length; 256 uint16 version; 257 uint64 abbrev_offset; 258 uint8 address_size; 259 } header_; 260 261 // Reads the DWARF2/3 header for this compilation unit. 262 void ReadHeader(); 263 264 // Reads the DWARF2/3 abbreviations for this compilation unit 265 void ReadAbbrevs(); 266 267 // Processes a single DIE for this compilation unit and return a new 268 // pointer just past the end of it 269 const char* ProcessDIE(uint64 dieoffset, 270 const char* start, 271 const Abbrev& abbrev); 272 273 // Processes a single attribute and return a new pointer just past the 274 // end of it 275 const char* ProcessAttribute(uint64 dieoffset, 276 const char* start, 277 enum DwarfAttribute attr, 278 enum DwarfForm form); 279 280 // Processes all DIEs for this compilation unit 281 void ProcessDIEs(); 282 283 // Skips the die with attributes specified in ABBREV starting at 284 // START, and return the new place to position the stream to. 285 const char* SkipDIE(const char* start, 286 const Abbrev& abbrev); 287 288 // Skips the attribute starting at START, with FORM, and return the 289 // new place to position the stream to. 290 const char* SkipAttribute(const char* start, 291 enum DwarfForm form); 292 293 // Offset from section start is the offset of this compilation unit 294 // from the beginning of the .debug_info section. 295 uint64 offset_from_section_start_; 296 297 // buffer is the buffer for our CU, starting at .debug_info + offset 298 // passed in from constructor. 299 // after_header points to right after the compilation unit header. 300 const char* buffer_; 301 uint64 buffer_length_; 302 const char* after_header_; 303 304 // The associated ByteReader that handles endianness issues for us 305 ByteReader* reader_; 306 307 // The map of sections in our file to buffers containing their data 308 const SectionMap& sections_; 309 310 // The associated handler to call processing functions in 311 Dwarf2Handler* handler_; 312 313 // Set of DWARF2/3 abbreviations for this compilation unit. Indexed 314 // by abbreviation number, which means that abbrevs_[0] is not 315 // valid. 316 std::vector<Abbrev>* abbrevs_; 317 318 // String section buffer and length, if we have a string section. 319 // This is here to avoid doing a section lookup for strings in 320 // ProcessAttribute, which is in the hot path for DWARF2 reading. 321 const char* string_buffer_; 322 uint64 string_buffer_length_; 323 }; 324 325 // This class is the main interface between the reader and the 326 // client. The virtual functions inside this get called for 327 // interesting events that happen during DWARF2 reading. 328 // The default implementation skips everything. 329 330 class Dwarf2Handler { 331 public: Dwarf2Handler()332 Dwarf2Handler() { } 333 ~Dwarf2Handler()334 virtual ~Dwarf2Handler() { } 335 336 // Start to process a compilation unit at OFFSET from the beginning of the 337 // .debug_info section. Return false if you would like to skip this 338 // compilation unit. StartCompilationUnit(uint64 offset,uint8 address_size,uint8 offset_size,uint64 cu_length,uint8 dwarf_version)339 virtual bool StartCompilationUnit(uint64 offset, uint8 address_size, 340 uint8 offset_size, uint64 cu_length, 341 uint8 dwarf_version) { return false; } 342 343 // Start to process a DIE at OFFSET from the beginning of the .debug_info 344 // section. Return false if you would like to skip this DIE. StartDIE(uint64 offset,enum DwarfTag tag)345 virtual bool StartDIE(uint64 offset, enum DwarfTag tag) { return false; } 346 347 // Called when we have an attribute with unsigned data to give to our 348 // handler. The attribute is for the DIE at OFFSET from the beginning of the 349 // .debug_info section. Its name is ATTR, its form is FORM, and its value is 350 // DATA. ProcessAttributeUnsigned(uint64 offset,enum DwarfAttribute attr,enum DwarfForm form,uint64 data)351 virtual void ProcessAttributeUnsigned(uint64 offset, 352 enum DwarfAttribute attr, 353 enum DwarfForm form, 354 uint64 data) { } 355 356 // Called when we have an attribute with signed data to give to our handler. 357 // The attribute is for the DIE at OFFSET from the beginning of the 358 // .debug_info section. Its name is ATTR, its form is FORM, and its value is 359 // DATA. ProcessAttributeSigned(uint64 offset,enum DwarfAttribute attr,enum DwarfForm form,int64 data)360 virtual void ProcessAttributeSigned(uint64 offset, 361 enum DwarfAttribute attr, 362 enum DwarfForm form, 363 int64 data) { } 364 365 // Called when we have an attribute whose value is a reference to 366 // another DIE. The attribute belongs to the DIE at OFFSET from the 367 // beginning of the .debug_info section. Its name is ATTR, its form 368 // is FORM, and the offset of the DIE being referred to from the 369 // beginning of the .debug_info section is DATA. ProcessAttributeReference(uint64 offset,enum DwarfAttribute attr,enum DwarfForm form,uint64 data)370 virtual void ProcessAttributeReference(uint64 offset, 371 enum DwarfAttribute attr, 372 enum DwarfForm form, 373 uint64 data) { } 374 375 // Called when we have an attribute with a buffer of data to give to our 376 // handler. The attribute is for the DIE at OFFSET from the beginning of the 377 // .debug_info section. Its name is ATTR, its form is FORM, DATA points to 378 // the buffer's contents, and its length in bytes is LENGTH. The buffer is 379 // owned by the caller, not the callee, and may not persist for very long. 380 // If you want the data to be available later, it needs to be copied. ProcessAttributeBuffer(uint64 offset,enum DwarfAttribute attr,enum DwarfForm form,const char * data,uint64 len)381 virtual void ProcessAttributeBuffer(uint64 offset, 382 enum DwarfAttribute attr, 383 enum DwarfForm form, 384 const char* data, 385 uint64 len) { } 386 387 // Called when we have an attribute with string data to give to our handler. 388 // The attribute is for the DIE at OFFSET from the beginning of the 389 // .debug_info section. Its name is ATTR, its form is FORM, and its value is 390 // DATA. ProcessAttributeString(uint64 offset,enum DwarfAttribute attr,enum DwarfForm form,const string & data)391 virtual void ProcessAttributeString(uint64 offset, 392 enum DwarfAttribute attr, 393 enum DwarfForm form, 394 const string& data) { } 395 396 // Called when we have an attribute whose value is the 64-bit signature 397 // of a type unit in the .debug_types section. OFFSET is the offset of 398 // the DIE whose attribute we're reporting. ATTR and FORM are the 399 // attribute's name and form. SIGNATURE is the type unit's signature. ProcessAttributeSignature(uint64 offset,enum DwarfAttribute attr,enum DwarfForm form,uint64 signature)400 virtual void ProcessAttributeSignature(uint64 offset, 401 enum DwarfAttribute attr, 402 enum DwarfForm form, 403 uint64 signature) { } 404 405 // Called when finished processing the DIE at OFFSET. 406 // Because DWARF2/3 specifies a tree of DIEs, you may get starts 407 // before ends of the previous DIE, as we process children before 408 // ending the parent. EndDIE(uint64 offset)409 virtual void EndDIE(uint64 offset) { } 410 411 }; 412 413 // This class is a reader for DWARF's Call Frame Information. CFI 414 // describes how to unwind stack frames --- even for functions that do 415 // not follow fixed conventions for saving registers, whose frame size 416 // varies as they execute, etc. 417 // 418 // CFI describes, at each machine instruction, how to compute the 419 // stack frame's base address, how to find the return address, and 420 // where to find the saved values of the caller's registers (if the 421 // callee has stashed them somewhere to free up the registers for its 422 // own use). 423 // 424 // For example, suppose we have a function whose machine code looks 425 // like this (imagine an assembly language that looks like C, for a 426 // machine with 32-bit registers, and a stack that grows towards lower 427 // addresses): 428 // 429 // func: ; entry point; return address at sp 430 // func+0: sp = sp - 16 ; allocate space for stack frame 431 // func+1: sp[12] = r0 ; save r0 at sp+12 432 // ... ; other code, not frame-related 433 // func+10: sp -= 4; *sp = x ; push some x on the stack 434 // ... ; other code, not frame-related 435 // func+20: r0 = sp[16] ; restore saved r0 436 // func+21: sp += 20 ; pop whole stack frame 437 // func+22: pc = *sp; sp += 4 ; pop return address and jump to it 438 // 439 // DWARF CFI is (a very compressed representation of) a table with a 440 // row for each machine instruction address and a column for each 441 // register showing how to restore it, if possible. 442 // 443 // A special column named "CFA", for "Canonical Frame Address", tells how 444 // to compute the base address of the frame; registers' entries may 445 // refer to the CFA in describing where the registers are saved. 446 // 447 // Another special column, named "RA", represents the return address. 448 // 449 // For example, here is a complete (uncompressed) table describing the 450 // function above: 451 // 452 // insn cfa r0 r1 ... ra 453 // ======================================= 454 // func+0: sp cfa[0] 455 // func+1: sp+16 cfa[0] 456 // func+2: sp+16 cfa[-4] cfa[0] 457 // func+11: sp+20 cfa[-4] cfa[0] 458 // func+21: sp+20 cfa[0] 459 // func+22: sp cfa[0] 460 // 461 // Some things to note here: 462 // 463 // - Each row describes the state of affairs *before* executing the 464 // instruction at the given address. Thus, the row for func+0 465 // describes the state before we allocate the stack frame. In the 466 // next row, the formula for computing the CFA has changed, 467 // reflecting that allocation. 468 // 469 // - The other entries are written in terms of the CFA; this allows 470 // them to remain unchanged as the stack pointer gets bumped around. 471 // For example, the rule for recovering the return address (the "ra" 472 // column) remains unchanged throughout the function, even as the 473 // stack pointer takes on three different offsets from the return 474 // address. 475 // 476 // - Although we haven't shown it, most calling conventions designate 477 // "callee-saves" and "caller-saves" registers. The callee must 478 // preserve the values of callee-saves registers; if it uses them, 479 // it must save their original values somewhere, and restore them 480 // before it returns. In contrast, the callee is free to trash 481 // caller-saves registers; if the callee uses these, it will 482 // probably not bother to save them anywhere, and the CFI will 483 // probably mark their values as "unrecoverable". 484 // 485 // (However, since the caller cannot assume the callee was going to 486 // save them, caller-saves registers are probably dead in the caller 487 // anyway, so compilers usually don't generate CFA for caller-saves 488 // registers.) 489 // 490 // - Exactly where the CFA points is a matter of convention that 491 // depends on the architecture and ABI in use. In the example, the 492 // CFA is the value the stack pointer had upon entry to the 493 // function, pointing at the saved return address. But on the x86, 494 // the call frame information generated by GCC follows the 495 // convention that the CFA is the address *after* the saved return 496 // address. 497 // 498 // But by definition, the CFA remains constant throughout the 499 // lifetime of the frame. This makes it a useful value for other 500 // columns to refer to. It is also gives debuggers a useful handle 501 // for identifying a frame. 502 // 503 // If you look at the table above, you'll notice that a given entry is 504 // often the same as the one immediately above it: most instructions 505 // change only one or two aspects of the stack frame, if they affect 506 // it at all. The DWARF format takes advantage of this fact, and 507 // reduces the size of the data by mentioning only the addresses and 508 // columns at which changes take place. So for the above, DWARF CFI 509 // data would only actually mention the following: 510 // 511 // insn cfa r0 r1 ... ra 512 // ======================================= 513 // func+0: sp cfa[0] 514 // func+1: sp+16 515 // func+2: cfa[-4] 516 // func+11: sp+20 517 // func+21: r0 518 // func+22: sp 519 // 520 // In fact, this is the way the parser reports CFI to the consumer: as 521 // a series of statements of the form, "At address X, column Y changed 522 // to Z," and related conventions for describing the initial state. 523 // 524 // Naturally, it would be impractical to have to scan the entire 525 // program's CFI, noting changes as we go, just to recover the 526 // unwinding rules in effect at one particular instruction. To avoid 527 // this, CFI data is grouped into "entries", each of which covers a 528 // specified range of addresses and begins with a complete statement 529 // of the rules for all recoverable registers at that starting 530 // address. Each entry typically covers a single function. 531 // 532 // Thus, to compute the contents of a given row of the table --- that 533 // is, rules for recovering the CFA, RA, and registers at a given 534 // instruction --- the consumer should find the entry that covers that 535 // instruction's address, start with the initial state supplied at the 536 // beginning of the entry, and work forward until it has processed all 537 // the changes up to and including those for the present instruction. 538 // 539 // There are seven kinds of rules that can appear in an entry of the 540 // table: 541 // 542 // - "undefined": The given register is not preserved by the callee; 543 // its value cannot be recovered. 544 // 545 // - "same value": This register has the same value it did in the callee. 546 // 547 // - offset(N): The register is saved at offset N from the CFA. 548 // 549 // - val_offset(N): The value the register had in the caller is the 550 // CFA plus offset N. (This is usually only useful for describing 551 // the stack pointer.) 552 // 553 // - register(R): The register's value was saved in another register R. 554 // 555 // - expression(E): Evaluating the DWARF expression E using the 556 // current frame's registers' values yields the address at which the 557 // register was saved. 558 // 559 // - val_expression(E): Evaluating the DWARF expression E using the 560 // current frame's registers' values yields the value the register 561 // had in the caller. 562 563 class CallFrameInfo { 564 public: 565 // The different kinds of entries one finds in CFI. Used internally, 566 // and for error reporting. 567 enum EntryKind { kUnknown, kCIE, kFDE, kTerminator }; 568 569 // The handler class to which the parser hands the parsed call frame 570 // information. Defined below. 571 class Handler; 572 573 // A reporter class, which CallFrameInfo uses to report errors 574 // encountered while parsing call frame information. Defined below. 575 class Reporter; 576 577 // Create a DWARF CFI parser. BUFFER points to the contents of the 578 // .debug_frame section to parse; BUFFER_LENGTH is its length in bytes. 579 // REPORTER is an error reporter the parser should use to report 580 // problems. READER is a ByteReader instance that has the endianness and 581 // address size set properly. Report the data we find to HANDLER. 582 // 583 // This class can also parse Linux C++ exception handling data, as found 584 // in '.eh_frame' sections. This data is a variant of DWARF CFI that is 585 // placed in loadable segments so that it is present in the program's 586 // address space, and is interpreted by the C++ runtime to search the 587 // call stack for a handler interested in the exception being thrown, 588 // actually pop the frames, and find cleanup code to run. 589 // 590 // There are two differences between the call frame information described 591 // in the DWARF standard and the exception handling data Linux places in 592 // the .eh_frame section: 593 // 594 // - Exception handling data uses uses a different format for call frame 595 // information entry headers. The distinguished CIE id, the way FDEs 596 // refer to their CIEs, and the way the end of the series of entries is 597 // determined are all slightly different. 598 // 599 // If the constructor's EH_FRAME argument is true, then the 600 // CallFrameInfo parses the entry headers as Linux C++ exception 601 // handling data. If EH_FRAME is false or omitted, the CallFrameInfo 602 // parses standard DWARF call frame information. 603 // 604 // - Linux C++ exception handling data uses CIE augmentation strings 605 // beginning with 'z' to specify the presence of additional data after 606 // the CIE and FDE headers and special encodings used for addresses in 607 // frame description entries. 608 // 609 // CallFrameInfo can handle 'z' augmentations in either DWARF CFI or 610 // exception handling data if you have supplied READER with the base 611 // addresses needed to interpret the pointer encodings that 'z' 612 // augmentations can specify. See the ByteReader interface for details 613 // about the base addresses. See the CallFrameInfo::Handler interface 614 // for details about the additional information one might find in 615 // 'z'-augmented data. 616 // 617 // Thus: 618 // 619 // - If you are parsing standard DWARF CFI, as found in a .debug_frame 620 // section, you should pass false for the EH_FRAME argument, or omit 621 // it, and you need not worry about providing READER with the 622 // additional base addresses. 623 // 624 // - If you want to parse Linux C++ exception handling data from a 625 // .eh_frame section, you should pass EH_FRAME as true, and call 626 // READER's Set*Base member functions before calling our Start method. 627 // 628 // - If you want to parse DWARF CFI that uses the 'z' augmentations 629 // (although I don't think any toolchain ever emits such data), you 630 // could pass false for EH_FRAME, but call READER's Set*Base members. 631 // 632 // The extensions the Linux C++ ABI makes to DWARF for exception 633 // handling are described here, rather poorly: 634 // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html 635 // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html 636 // 637 // The mechanics of C++ exception handling, personality routines, 638 // and language-specific data areas are described here, rather nicely: 639 // http://www.codesourcery.com/public/cxx-abi/abi-eh.html 640 CallFrameInfo(const char *buffer, size_t buffer_length, 641 ByteReader *reader, Handler *handler, Reporter *reporter, 642 bool eh_frame = false) buffer_(buffer)643 : buffer_(buffer), buffer_length_(buffer_length), 644 reader_(reader), handler_(handler), reporter_(reporter), 645 eh_frame_(eh_frame) { } 646 ~CallFrameInfo()647 ~CallFrameInfo() { } 648 649 // Parse the entries in BUFFER, reporting what we find to HANDLER. 650 // Return true if we reach the end of the section successfully, or 651 // false if we encounter an error. 652 bool Start(); 653 654 // Return the textual name of KIND. For error reporting. 655 static const char *KindName(EntryKind kind); 656 657 private: 658 659 struct CIE; 660 661 // A CFI entry, either an FDE or a CIE. 662 struct Entry { 663 // The starting offset of the entry in the section, for error 664 // reporting. 665 size_t offset; 666 667 // The start of this entry in the buffer. 668 const char *start; 669 670 // Which kind of entry this is. 671 // 672 // We want to be able to use this for error reporting even while we're 673 // in the midst of parsing. Error reporting code may assume that kind, 674 // offset, and start fields are valid, although kind may be kUnknown. 675 EntryKind kind; 676 677 // The end of this entry's common prologue (initial length and id), and 678 // the start of this entry's kind-specific fields. 679 const char *fields; 680 681 // The start of this entry's instructions. 682 const char *instructions; 683 684 // The address past the entry's last byte in the buffer. (Note that 685 // since offset points to the entry's initial length field, and the 686 // length field is the number of bytes after that field, this is not 687 // simply buffer_ + offset + length.) 688 const char *end; 689 690 // For both DWARF CFI and .eh_frame sections, this is the CIE id in a 691 // CIE, and the offset of the associated CIE in an FDE. 692 uint64 id; 693 694 // The CIE that applies to this entry, if we've parsed it. If this is a 695 // CIE, then this field points to this structure. 696 CIE *cie; 697 }; 698 699 // A common information entry (CIE). 700 struct CIE: public Entry { 701 uint8 version; // CFI data version number 702 string augmentation; // vendor format extension markers 703 uint64 code_alignment_factor; // scale for code address adjustments 704 int data_alignment_factor; // scale for stack pointer adjustments 705 unsigned return_address_register; // which register holds the return addr 706 707 // True if this CIE includes Linux C++ ABI 'z' augmentation data. 708 bool has_z_augmentation; 709 710 // Parsed 'z' augmentation data. These are meaningful only if 711 // has_z_augmentation is true. 712 bool has_z_lsda; // The 'z' augmentation included 'L'. 713 bool has_z_personality; // The 'z' augmentation included 'P'. 714 bool has_z_signal_frame; // The 'z' augmentation included 'S'. 715 716 // If has_z_lsda is true, this is the encoding to be used for language- 717 // specific data area pointers in FDEs. 718 DwarfPointerEncoding lsda_encoding; 719 720 // If has_z_personality is true, this is the encoding used for the 721 // personality routine pointer in the augmentation data. 722 DwarfPointerEncoding personality_encoding; 723 724 // If has_z_personality is true, this is the address of the personality 725 // routine --- or, if personality_encoding & DW_EH_PE_indirect, the 726 // address where the personality routine's address is stored. 727 uint64 personality_address; 728 729 // This is the encoding used for addresses in the FDE header and 730 // in DW_CFA_set_loc instructions. This is always valid, whether 731 // or not we saw a 'z' augmentation string; its default value is 732 // DW_EH_PE_absptr, which is what normal DWARF CFI uses. 733 DwarfPointerEncoding pointer_encoding; 734 }; 735 736 // A frame description entry (FDE). 737 struct FDE: public Entry { 738 uint64 address; // start address of described code 739 uint64 size; // size of described code, in bytes 740 741 // If cie->has_z_lsda is true, then this is the language-specific data 742 // area's address --- or its address's address, if cie->lsda_encoding 743 // has the DW_EH_PE_indirect bit set. 744 uint64 lsda_address; 745 }; 746 747 // Internal use. 748 class Rule; 749 class UndefinedRule; 750 class SameValueRule; 751 class OffsetRule; 752 class ValOffsetRule; 753 class RegisterRule; 754 class ExpressionRule; 755 class ValExpressionRule; 756 class RuleMap; 757 class State; 758 759 // Parse the initial length and id of a CFI entry, either a CIE, an FDE, 760 // or a .eh_frame end-of-data mark. CURSOR points to the beginning of the 761 // data to parse. On success, populate ENTRY as appropriate, and return 762 // true. On failure, report the problem, and return false. Even if we 763 // return false, set ENTRY->end to the first byte after the entry if we 764 // were able to figure that out, or NULL if we weren't. 765 bool ReadEntryPrologue(const char *cursor, Entry *entry); 766 767 // Parse the fields of a CIE after the entry prologue, including any 'z' 768 // augmentation data. Assume that the 'Entry' fields of CIE are 769 // populated; use CIE->fields and CIE->end as the start and limit for 770 // parsing. On success, populate the rest of *CIE, and return true; on 771 // failure, report the problem and return false. 772 bool ReadCIEFields(CIE *cie); 773 774 // Parse the fields of an FDE after the entry prologue, including any 'z' 775 // augmentation data. Assume that the 'Entry' fields of *FDE are 776 // initialized; use FDE->fields and FDE->end as the start and limit for 777 // parsing. Assume that FDE->cie is fully initialized. On success, 778 // populate the rest of *FDE, and return true; on failure, report the 779 // problem and return false. 780 bool ReadFDEFields(FDE *fde); 781 782 // Report that ENTRY is incomplete, and return false. This is just a 783 // trivial wrapper for invoking reporter_->Incomplete; it provides a 784 // little brevity. 785 bool ReportIncomplete(Entry *entry); 786 787 // Return true if ENCODING has the DW_EH_PE_indirect bit set. IsIndirectEncoding(DwarfPointerEncoding encoding)788 static bool IsIndirectEncoding(DwarfPointerEncoding encoding) { 789 return encoding & DW_EH_PE_indirect; 790 } 791 792 // The contents of the DWARF .debug_info section we're parsing. 793 const char *buffer_; 794 size_t buffer_length_; 795 796 // For reading multi-byte values with the appropriate endianness. 797 ByteReader *reader_; 798 799 // The handler to which we should report the data we find. 800 Handler *handler_; 801 802 // For reporting problems in the info we're parsing. 803 Reporter *reporter_; 804 805 // True if we are processing .eh_frame-format data. 806 bool eh_frame_; 807 }; 808 809 // The handler class for CallFrameInfo. The a CFI parser calls the 810 // member functions of a handler object to report the data it finds. 811 class CallFrameInfo::Handler { 812 public: 813 // The pseudo-register number for the canonical frame address. 814 enum { kCFARegister = -1 }; 815 Handler()816 Handler() { } ~Handler()817 virtual ~Handler() { } 818 819 // The parser has found CFI for the machine code at ADDRESS, 820 // extending for LENGTH bytes. OFFSET is the offset of the frame 821 // description entry in the section, for use in error messages. 822 // VERSION is the version number of the CFI format. AUGMENTATION is 823 // a string describing any producer-specific extensions present in 824 // the data. RETURN_ADDRESS is the number of the register that holds 825 // the address to which the function should return. 826 // 827 // Entry should return true to process this CFI, or false to skip to 828 // the next entry. 829 // 830 // The parser invokes Entry for each Frame Description Entry (FDE) 831 // it finds. The parser doesn't report Common Information Entries 832 // to the handler explicitly; instead, if the handler elects to 833 // process a given FDE, the parser reiterates the appropriate CIE's 834 // contents at the beginning of the FDE's rules. 835 virtual bool Entry(size_t offset, uint64 address, uint64 length, 836 uint8 version, const string &augmentation, 837 unsigned return_address) = 0; 838 839 // When the Entry function returns true, the parser calls these 840 // handler functions repeatedly to describe the rules for recovering 841 // registers at each instruction in the given range of machine code. 842 // Immediately after a call to Entry, the handler should assume that 843 // the rule for each callee-saves register is "unchanged" --- that 844 // is, that the register still has the value it had in the caller. 845 // 846 // If a *Rule function returns true, we continue processing this entry's 847 // instructions. If a *Rule function returns false, we stop evaluating 848 // instructions, and skip to the next entry. Either way, we call End 849 // before going on to the next entry. 850 // 851 // In all of these functions, if the REG parameter is kCFARegister, then 852 // the rule describes how to find the canonical frame address. 853 // kCFARegister may be passed as a BASE_REGISTER argument, meaning that 854 // the canonical frame address should be used as the base address for the 855 // computation. All other REG values will be positive. 856 857 // At ADDRESS, register REG's value is not recoverable. 858 virtual bool UndefinedRule(uint64 address, int reg) = 0; 859 860 // At ADDRESS, register REG's value is the same as that it had in 861 // the caller. 862 virtual bool SameValueRule(uint64 address, int reg) = 0; 863 864 // At ADDRESS, register REG has been saved at offset OFFSET from 865 // BASE_REGISTER. 866 virtual bool OffsetRule(uint64 address, int reg, 867 int base_register, long offset) = 0; 868 869 // At ADDRESS, the caller's value of register REG is the current 870 // value of BASE_REGISTER plus OFFSET. (This rule doesn't provide an 871 // address at which the register's value is saved.) 872 virtual bool ValOffsetRule(uint64 address, int reg, 873 int base_register, long offset) = 0; 874 875 // At ADDRESS, register REG has been saved in BASE_REGISTER. This differs 876 // from ValOffsetRule(ADDRESS, REG, BASE_REGISTER, 0), in that 877 // BASE_REGISTER is the "home" for REG's saved value: if you want to 878 // assign to a variable whose home is REG in the calling frame, you 879 // should put the value in BASE_REGISTER. 880 virtual bool RegisterRule(uint64 address, int reg, int base_register) = 0; 881 882 // At ADDRESS, the DWARF expression EXPRESSION yields the address at 883 // which REG was saved. 884 virtual bool ExpressionRule(uint64 address, int reg, 885 const string &expression) = 0; 886 887 // At ADDRESS, the DWARF expression EXPRESSION yields the caller's 888 // value for REG. (This rule doesn't provide an address at which the 889 // register's value is saved.) 890 virtual bool ValExpressionRule(uint64 address, int reg, 891 const string &expression) = 0; 892 893 // Indicate that the rules for the address range reported by the 894 // last call to Entry are complete. End should return true if 895 // everything is okay, or false if an error has occurred and parsing 896 // should stop. 897 virtual bool End() = 0; 898 899 // Handler functions for Linux C++ exception handling data. These are 900 // only called if the data includes 'z' augmentation strings. 901 902 // The Linux C++ ABI uses an extension of the DWARF CFI format to 903 // walk the stack to propagate exceptions from the throw to the 904 // appropriate catch, and do the appropriate cleanups along the way. 905 // CFI entries used for exception handling have two additional data 906 // associated with them: 907 // 908 // - The "language-specific data area" describes which exception 909 // types the function has 'catch' clauses for, and indicates how 910 // to go about re-entering the function at the appropriate catch 911 // clause. If the exception is not caught, it describes the 912 // destructors that must run before the frame is popped. 913 // 914 // - The "personality routine" is responsible for interpreting the 915 // language-specific data area's contents, and deciding whether 916 // the exception should continue to propagate down the stack, 917 // perhaps after doing some cleanup for this frame, or whether the 918 // exception will be caught here. 919 // 920 // In principle, the language-specific data area is opaque to 921 // everybody but the personality routine. In practice, these values 922 // may be useful or interesting to readers with extra context, and 923 // we have to at least skip them anyway, so we might as well report 924 // them to the handler. 925 926 // This entry's exception handling personality routine's address is 927 // ADDRESS. If INDIRECT is true, then ADDRESS is the address at 928 // which the routine's address is stored. The default definition for 929 // this handler function simply returns true, allowing parsing of 930 // the entry to continue. PersonalityRoutine(uint64 address,bool indirect)931 virtual bool PersonalityRoutine(uint64 address, bool indirect) { 932 return true; 933 } 934 935 // This entry's language-specific data area (LSDA) is located at 936 // ADDRESS. If INDIRECT is true, then ADDRESS is the address at 937 // which the area's address is stored. The default definition for 938 // this handler function simply returns true, allowing parsing of 939 // the entry to continue. LanguageSpecificDataArea(uint64 address,bool indirect)940 virtual bool LanguageSpecificDataArea(uint64 address, bool indirect) { 941 return true; 942 } 943 944 // This entry describes a signal trampoline --- this frame is the 945 // caller of a signal handler. The default definition for this 946 // handler function simply returns true, allowing parsing of the 947 // entry to continue. 948 // 949 // The best description of the rationale for and meaning of signal 950 // trampoline CFI entries seems to be in the GCC bug database: 951 // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26208 SignalHandler()952 virtual bool SignalHandler() { return true; } 953 }; 954 955 // The CallFrameInfo class makes calls on an instance of this class to 956 // report errors or warn about problems in the data it is parsing. The 957 // default definitions of these methods print a message to stderr, but 958 // you can make a derived class that overrides them. 959 class CallFrameInfo::Reporter { 960 public: 961 // Create an error reporter which attributes troubles to the section 962 // named SECTION in FILENAME. 963 // 964 // Normally SECTION would be .debug_frame, but the Mac puts CFI data 965 // in a Mach-O section named __debug_frame. If we support 966 // Linux-style exception handling data, we could be reading an 967 // .eh_frame section. 968 Reporter(const string &filename, 969 const string §ion = ".debug_frame") filename_(filename)970 : filename_(filename), section_(section) { } ~Reporter()971 virtual ~Reporter() { } 972 973 // The CFI entry at OFFSET ends too early to be well-formed. KIND 974 // indicates what kind of entry it is; KIND can be kUnknown if we 975 // haven't parsed enough of the entry to tell yet. 976 virtual void Incomplete(uint64 offset, CallFrameInfo::EntryKind kind); 977 978 // The .eh_frame data has a four-byte zero at OFFSET where the next 979 // entry's length would be; this is a terminator. However, the buffer 980 // length as given to the CallFrameInfo constructor says there should be 981 // more data. 982 virtual void EarlyEHTerminator(uint64 offset); 983 984 // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the 985 // section is not that large. 986 virtual void CIEPointerOutOfRange(uint64 offset, uint64 cie_offset); 987 988 // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the entry 989 // there is not a CIE. 990 virtual void BadCIEId(uint64 offset, uint64 cie_offset); 991 992 // The FDE at OFFSET refers to a CIE with version number VERSION, 993 // which we don't recognize. We cannot parse DWARF CFI if it uses 994 // a version number we don't recognize. 995 virtual void UnrecognizedVersion(uint64 offset, int version); 996 997 // The FDE at OFFSET refers to a CIE with augmentation AUGMENTATION, 998 // which we don't recognize. We cannot parse DWARF CFI if it uses 999 // augmentations we don't recognize. 1000 virtual void UnrecognizedAugmentation(uint64 offset, 1001 const string &augmentation); 1002 1003 // The pointer encoding ENCODING, specified by the CIE at OFFSET, is not 1004 // a valid encoding. 1005 virtual void InvalidPointerEncoding(uint64 offset, uint8 encoding); 1006 1007 // The pointer encoding ENCODING, specified by the CIE at OFFSET, depends 1008 // on a base address which has not been supplied. 1009 virtual void UnusablePointerEncoding(uint64 offset, uint8 encoding); 1010 1011 // The CIE at OFFSET contains a DW_CFA_restore instruction at 1012 // INSN_OFFSET, which may not appear in a CIE. 1013 virtual void RestoreInCIE(uint64 offset, uint64 insn_offset); 1014 1015 // The entry at OFFSET, of kind KIND, has an unrecognized 1016 // instruction at INSN_OFFSET. 1017 virtual void BadInstruction(uint64 offset, CallFrameInfo::EntryKind kind, 1018 uint64 insn_offset); 1019 1020 // The instruction at INSN_OFFSET in the entry at OFFSET, of kind 1021 // KIND, establishes a rule that cites the CFA, but we have not 1022 // established a CFA rule yet. 1023 virtual void NoCFARule(uint64 offset, CallFrameInfo::EntryKind kind, 1024 uint64 insn_offset); 1025 1026 // The instruction at INSN_OFFSET in the entry at OFFSET, of kind 1027 // KIND, is a DW_CFA_restore_state instruction, but the stack of 1028 // saved states is empty. 1029 virtual void EmptyStateStack(uint64 offset, CallFrameInfo::EntryKind kind, 1030 uint64 insn_offset); 1031 1032 // The DW_CFA_remember_state instruction at INSN_OFFSET in the entry 1033 // at OFFSET, of kind KIND, would restore a state that has no CFA 1034 // rule, whereas the current state does have a CFA rule. This is 1035 // bogus input, which the CallFrameInfo::Handler interface doesn't 1036 // (and shouldn't) have any way to report. 1037 virtual void ClearingCFARule(uint64 offset, CallFrameInfo::EntryKind kind, 1038 uint64 insn_offset); 1039 1040 protected: 1041 // The name of the file whose CFI we're reading. 1042 string filename_; 1043 1044 // The name of the CFI section in that file. 1045 string section_; 1046 }; 1047 1048 } // namespace dwarf2reader 1049 1050 #endif // UTIL_DEBUGINFO_DWARF2READER_H__ 1051