1 2 /*--------------------------------------------------------------------*/ 3 /*--- Format-neutral storage of and querying of info acquired from ---*/ 4 /*--- ELF/XCOFF stabs/dwarf1/dwarf2 debug info. ---*/ 5 /*--- priv_storage.h ---*/ 6 /*--------------------------------------------------------------------*/ 7 8 /* 9 This file is part of Valgrind, a dynamic binary instrumentation 10 framework. 11 12 Copyright (C) 2000-2012 Julian Seward 13 jseward@acm.org 14 15 This program is free software; you can redistribute it and/or 16 modify it under the terms of the GNU General Public License as 17 published by the Free Software Foundation; either version 2 of the 18 License, or (at your option) any later version. 19 20 This program is distributed in the hope that it will be useful, but 21 WITHOUT ANY WARRANTY; without even the implied warranty of 22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 23 General Public License for more details. 24 25 You should have received a copy of the GNU General Public License 26 along with this program; if not, write to the Free Software 27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 28 02111-1307, USA. 29 30 The GNU General Public License is contained in the file COPYING. 31 */ 32 /* 33 Stabs reader greatly improved by Nick Nethercote, Apr 02. 34 This module was also extensively hacked on by Jeremy Fitzhardinge 35 and Tom Hughes. 36 */ 37 /* See comment at top of debuginfo.c for explanation of 38 the _svma / _avma / _image / _bias naming scheme. 39 */ 40 /* Note this is not freestanding; needs pub_core_xarray.h and 41 priv_tytypes.h to be included before it. */ 42 43 #ifndef __PRIV_STORAGE_H 44 #define __PRIV_STORAGE_H 45 46 /* --------------------- SYMBOLS --------------------- */ 47 48 /* A structure to hold an ELF/MachO symbol (very crudely). Usually 49 the symbol only has one name, which is stored in ::pri_name, and 50 ::sec_names is NULL. If there are other names, these are stored in 51 ::sec_names, which is a NULL terminated vector holding the names. 52 The vector is allocated in VG_AR_DINFO, the names themselves live 53 in DebugInfo::strchunks. 54 55 From the point of view of ELF, the primary vs secondary distinction 56 is artificial: they are all just names associated with the address, 57 none of which has higher precedence than any other. However, from 58 the point of view of mapping an address to a name to display to the 59 user, we need to choose one "preferred" name, and so that might as 60 well be installed as the pri_name, whilst all others can live in 61 sec_names[]. This has the convenient side effect that, in the 62 common case where there is only one name for the address, 63 sec_names[] does not need to be allocated. 64 */ 65 typedef 66 struct { 67 Addr addr; /* lowest address of entity */ 68 Addr tocptr; /* ppc64-linux only: value that R2 should have */ 69 UChar* pri_name; /* primary name, never NULL */ 70 UChar** sec_names; /* NULL, or a NULL term'd array of other names */ 71 // XXX: this could be shrunk (on 32-bit platforms) by using 30 72 // bits for the size and 1 bit each for isText and isIFunc. If you 73 // do this, make sure that all assignments to the latter two use 74 // 0 or 1 (or True or False), and that a positive number larger 75 // than 1 is never used to represent True. 76 UInt size; /* size in bytes */ 77 Bool isText; 78 Bool isIFunc; /* symbol is an indirect function? */ 79 } 80 DiSym; 81 82 /* --------------------- SRCLOCS --------------------- */ 83 84 /* Line count at which overflow happens, due to line numbers being 85 stored as shorts in `struct nlist' in a.out.h. */ 86 #define LINENO_OVERFLOW (1 << (sizeof(short) * 8)) 87 88 #define LINENO_BITS 20 89 #define LOC_SIZE_BITS (32 - LINENO_BITS) 90 #define MAX_LINENO ((1 << LINENO_BITS) - 1) 91 92 /* Unlikely to have any lines with instruction ranges > 4096 bytes */ 93 #define MAX_LOC_SIZE ((1 << LOC_SIZE_BITS) - 1) 94 95 /* Number used to detect line number overflows; if one line is 96 60000-odd smaller than the previous, it was probably an overflow. 97 */ 98 #define OVERFLOW_DIFFERENCE (LINENO_OVERFLOW - 5000) 99 100 /* A structure to hold addr-to-source info for a single line. There 101 can be a lot of these, hence the dense packing. */ 102 typedef 103 struct { 104 /* Word 1 */ 105 Addr addr; /* lowest address for this line */ 106 /* Word 2 */ 107 UShort size:LOC_SIZE_BITS; /* # bytes; we catch overflows of this */ 108 UInt lineno:LINENO_BITS; /* source line number, or zero */ 109 /* Word 3 */ 110 UChar* filename; /* source filename */ 111 /* Word 4 */ 112 UChar* dirname; /* source directory name */ 113 } 114 DiLoc; 115 116 /* --------------------- CF INFO --------------------- */ 117 118 /* DiCfSI: a structure to summarise DWARF2/3 CFA info for the code 119 address range [base .. base+len-1]. 120 121 On x86 and amd64 ("IA"), if you know ({e,r}sp, {e,r}bp, {e,r}ip) at 122 some point and {e,r}ip is in the range [base .. base+len-1], it 123 tells you how to calculate ({e,r}sp, {e,r}bp) for the caller of the 124 current frame and also ra, the return address of the current frame. 125 126 First off, calculate CFA, the Canonical Frame Address, thusly: 127 128 cfa = case cfa_how of 129 CFIC_IA_SPREL -> {e,r}sp + cfa_off 130 CFIC_IA_BPREL -> {e,r}bp + cfa_off 131 CFIR_IA_EXPR -> expr whose index is in cfa_off 132 133 Once that is done, the previous frame's {e,r}sp/{e,r}bp values and 134 this frame's {e,r}ra value can be calculated like this: 135 136 old_{e,r}sp/{e,r}bp/ra 137 = case {e,r}sp/{e,r}bp/ra_how of 138 CFIR_UNKNOWN -> we don't know, sorry 139 CFIR_SAME -> same as it was before (sp/fp only) 140 CFIR_CFAREL -> cfa + sp/bp/ra_off 141 CFIR_MEMCFAREL -> *( cfa + sp/bp/ra_off ) 142 CFIR_EXPR -> expr whose index is in sp/bp/ra_off 143 144 On ARM it's pretty much the same, except we have more registers to 145 keep track of: 146 147 cfa = case cfa_how of 148 CFIC_R13REL -> r13 + cfa_off 149 CFIC_R12REL -> r12 + cfa_off 150 CFIC_R11REL -> r11 + cfa_off 151 CFIC_R7REL -> r7 + cfa_off 152 CFIR_EXPR -> expr whose index is in cfa_off 153 154 old_r14/r13/r12/r11/r7/ra 155 = case r14/r13/r12/r11/r7/ra_how of 156 CFIR_UNKNOWN -> we don't know, sorry 157 CFIR_SAME -> same as it was before (r14/r13/r12/r11/r7 only) 158 CFIR_CFAREL -> cfa + r14/r13/r12/r11/r7/ra_off 159 CFIR_MEMCFAREL -> *( cfa + r14/r13/r12/r11/r7/ra_off ) 160 CFIR_EXPR -> expr whose index is in r14/r13/r12/r11/r7/ra_off 161 162 On s390x we have a similar logic as x86 or amd64. We need the stack pointer 163 (r15), the frame pointer r11 (like BP) and together with the instruction 164 address in the PSW we can calculate the previous values: 165 cfa = case cfa_how of 166 CFIC_IA_SPREL -> r15 + cfa_off 167 CFIC_IA_BPREL -> r11 + cfa_off 168 CFIR_IA_EXPR -> expr whose index is in cfa_off 169 170 old_sp/fp/ra 171 = case sp/fp/ra_how of 172 CFIR_UNKNOWN -> we don't know, sorry 173 CFIR_SAME -> same as it was before (sp/fp only) 174 CFIR_CFAREL -> cfa + sp/fp/ra_off 175 CFIR_MEMCFAREL -> *( cfa + sp/fp/ra_off ) 176 CFIR_EXPR -> expr whose index is in sp/fp/ra_off 177 */ 178 179 #define CFIC_IA_SPREL ((UChar)1) 180 #define CFIC_IA_BPREL ((UChar)2) 181 #define CFIC_IA_EXPR ((UChar)3) 182 #define CFIC_ARM_R13REL ((UChar)4) 183 #define CFIC_ARM_R12REL ((UChar)5) 184 #define CFIC_ARM_R11REL ((UChar)6) 185 #define CFIC_ARM_R7REL ((UChar)7) 186 #define CFIC_EXPR ((UChar)8) /* all targets */ 187 188 #define CFIR_UNKNOWN ((UChar)64) 189 #define CFIR_SAME ((UChar)65) 190 #define CFIR_CFAREL ((UChar)66) 191 #define CFIR_MEMCFAREL ((UChar)67) 192 #define CFIR_EXPR ((UChar)68) 193 194 #if defined(VGA_x86) || defined(VGA_amd64) 195 typedef 196 struct { 197 Addr base; 198 UInt len; 199 UChar cfa_how; /* a CFIC_IA value */ 200 UChar ra_how; /* a CFIR_ value */ 201 UChar sp_how; /* a CFIR_ value */ 202 UChar bp_how; /* a CFIR_ value */ 203 Int cfa_off; 204 Int ra_off; 205 Int sp_off; 206 Int bp_off; 207 } 208 DiCfSI; 209 #elif defined(VGA_arm) 210 typedef 211 struct { 212 Addr base; 213 UInt len; 214 UChar cfa_how; /* a CFIC_ value */ 215 UChar ra_how; /* a CFIR_ value */ 216 UChar r14_how; /* a CFIR_ value */ 217 UChar r13_how; /* a CFIR_ value */ 218 UChar r12_how; /* a CFIR_ value */ 219 UChar r11_how; /* a CFIR_ value */ 220 UChar r7_how; /* a CFIR_ value */ 221 Int cfa_off; 222 Int ra_off; 223 Int r14_off; 224 Int r13_off; 225 Int r12_off; 226 Int r11_off; 227 Int r7_off; 228 } 229 DiCfSI; 230 #elif defined(VGA_ppc32) || defined(VGA_ppc64) 231 /* Just have a struct with the common fields in, so that code that 232 processes the common fields doesn't have to be ifdef'd against 233 VGP_/VGA_ symbols. These are not used in any way on ppc32/64-linux 234 at the moment. */ 235 typedef 236 struct { 237 Addr base; 238 UInt len; 239 UChar cfa_how; /* a CFIC_ value */ 240 UChar ra_how; /* a CFIR_ value */ 241 Int cfa_off; 242 Int ra_off; 243 } 244 DiCfSI; 245 #elif defined(VGA_s390x) 246 typedef 247 struct { 248 Addr base; 249 UInt len; 250 UChar cfa_how; /* a CFIC_ value */ 251 UChar sp_how; /* a CFIR_ value */ 252 UChar ra_how; /* a CFIR_ value */ 253 UChar fp_how; /* a CFIR_ value */ 254 Int cfa_off; 255 Int sp_off; 256 Int ra_off; 257 Int fp_off; 258 } 259 DiCfSI; 260 #elif defined(VGA_mips32) 261 typedef 262 struct { 263 Addr base; 264 UInt len; 265 UChar cfa_how; /* a CFIC_ value */ 266 UChar ra_how; /* a CFIR_ value */ 267 UChar sp_how; /* a CFIR_ value */ 268 UChar fp_how; /* a CFIR_ value */ 269 Int cfa_off; 270 Int ra_off; 271 Int sp_off; 272 Int fp_off; 273 } 274 DiCfSI; 275 #else 276 # error "Unknown arch" 277 #endif 278 279 280 typedef 281 enum { 282 Cop_Add=0x321, 283 Cop_Sub, 284 Cop_And, 285 Cop_Mul, 286 Cop_Shl, 287 Cop_Shr, 288 Cop_Eq, 289 Cop_Ge, 290 Cop_Gt, 291 Cop_Le, 292 Cop_Lt, 293 Cop_Ne 294 } 295 CfiOp; 296 297 typedef 298 enum { 299 Creg_IA_SP=0x213, 300 Creg_IA_BP, 301 Creg_IA_IP, 302 Creg_ARM_R13, 303 Creg_ARM_R12, 304 Creg_ARM_R15, 305 Creg_ARM_R14, 306 Creg_S390_R14, 307 Creg_MIPS_RA 308 } 309 CfiReg; 310 311 typedef 312 enum { 313 Cex_Undef=0x123, 314 Cex_Deref, 315 Cex_Const, 316 Cex_Binop, 317 Cex_CfiReg, 318 Cex_DwReg 319 } 320 CfiExprTag; 321 322 typedef 323 struct { 324 CfiExprTag tag; 325 union { 326 struct { 327 } Undef; 328 struct { 329 Int ixAddr; 330 } Deref; 331 struct { 332 UWord con; 333 } Const; 334 struct { 335 CfiOp op; 336 Int ixL; 337 Int ixR; 338 } Binop; 339 struct { 340 CfiReg reg; 341 } CfiReg; 342 struct { 343 Int reg; 344 } DwReg; 345 } 346 Cex; 347 } 348 CfiExpr; 349 350 extern Int ML_(CfiExpr_Undef) ( XArray* dst ); 351 extern Int ML_(CfiExpr_Deref) ( XArray* dst, Int ixAddr ); 352 extern Int ML_(CfiExpr_Const) ( XArray* dst, UWord con ); 353 extern Int ML_(CfiExpr_Binop) ( XArray* dst, CfiOp op, Int ixL, Int ixR ); 354 extern Int ML_(CfiExpr_CfiReg)( XArray* dst, CfiReg reg ); 355 extern Int ML_(CfiExpr_DwReg) ( XArray* dst, Int reg ); 356 357 extern void ML_(ppCfiExpr)( XArray* src, Int ix ); 358 359 /* ---------------- FPO INFO (Windows PE) -------------- */ 360 361 /* for apps using Wine: MSVC++ PDB FramePointerOmitted: somewhat like 362 a primitive CFI */ 363 typedef 364 struct _FPO_DATA { /* 16 bytes */ 365 UInt ulOffStart; /* offset of 1st byte of function code */ 366 UInt cbProcSize; /* # bytes in function */ 367 UInt cdwLocals; /* # bytes/4 in locals */ 368 UShort cdwParams; /* # bytes/4 in params */ 369 UChar cbProlog; /* # bytes in prolog */ 370 UChar cbRegs :3; /* # regs saved */ 371 UChar fHasSEH:1; /* Structured Exception Handling */ 372 UChar fUseBP :1; /* EBP has been used */ 373 UChar reserved:1; 374 UChar cbFrame:2; /* frame type */ 375 } 376 FPO_DATA; 377 378 #define PDB_FRAME_FPO 0 379 #define PDB_FRAME_TRAP 1 380 #define PDB_FRAME_TSS 2 381 382 /* --------------------- VARIABLES --------------------- */ 383 384 typedef 385 struct { 386 Addr aMin; 387 Addr aMax; 388 XArray* /* of DiVariable */ vars; 389 } 390 DiAddrRange; 391 392 typedef 393 struct { 394 UChar* name; /* in DebugInfo.strchunks */ 395 UWord typeR; /* a cuOff */ 396 GExpr* gexpr; /* on DebugInfo.gexprs list */ 397 GExpr* fbGX; /* SHARED. */ 398 UChar* fileName; /* where declared; may be NULL. in 399 DebugInfo.strchunks */ 400 Int lineNo; /* where declared; may be zero. */ 401 } 402 DiVariable; 403 404 Word 405 ML_(cmp_for_DiAddrRange_range) ( const void* keyV, const void* elemV ); 406 407 /* --------------------- DEBUGINFO --------------------- */ 408 409 /* This is the top-level data type. It's a structure which contains 410 information pertaining to one mapped ELF object. This type is 411 exported only abstractly - in pub_tool_debuginfo.h. */ 412 413 /* First though, here's an auxiliary data structure. It is only ever 414 used as part of a struct _DebugInfo. We use it to record 415 observations about mappings and permission changes to the 416 associated file, so as to decide when to read debug info. It's 417 essentially an ultra-trivial finite state machine which, when it 418 reaches an accept state, signals that we should now read debug info 419 from the object into the associated struct _DebugInfo. The accept 420 state is arrived at when have_rx_map and have_rw_map both become 421 true. The initial state is one in which we have no observations, 422 so have_rx_map and have_rw_map are both false. 423 424 This all started as a rather ad-hoc solution, but was further 425 expanded to handle weird object layouts, e.g. more than one rw 426 or rx mapping for one binary. 427 428 The normal sequence of events is one of 429 430 start --> r-x mapping --> rw- mapping --> accept 431 start --> rw- mapping --> r-x mapping --> accept 432 433 that is, take the first r-x and rw- mapping we see, and we're done. 434 435 On MacOSX 10.7, 32-bit, there appears to be a new variant: 436 437 start --> r-- mapping --> rw- mapping 438 --> upgrade r-- mapping to r-x mapping --> accept 439 440 where the upgrade is done by a call to vm_protect. Hence we 441 need to also track this possibility. 442 */ 443 444 struct _DebugInfoMapping 445 { 446 Addr avma; /* these fields record the file offset, length */ 447 SizeT size; /* and map address of each mapping */ 448 OffT foff; 449 Bool rx, rw, ro; /* memory access flags for this mapping */ 450 }; 451 452 struct _DebugInfoFSM 453 { 454 UChar* filename; /* in mallocville (VG_AR_DINFO) */ 455 XArray* maps; /* XArray of _DebugInfoMapping structs */ 456 Bool have_rx_map; /* did we see a r?x mapping yet for the file? */ 457 Bool have_rw_map; /* did we see a rw? mapping yet for the file? */ 458 Bool have_ro_map; /* did we see a r-- mapping yet for the file? */ 459 }; 460 461 462 /* To do with the string table in struct _DebugInfo (::strchunks) */ 463 #define SEGINFO_STRCHUNKSIZE (64*1024) 464 465 466 /* We may encounter more than one .eh_frame section in an object -- 467 unusual but apparently allowed by ELF. See 468 http://sourceware.org/bugzilla/show_bug.cgi?id=12675 469 */ 470 #define N_EHFRAME_SECTS 2 471 472 473 /* So, the main structure for holding debug info for one object. */ 474 475 struct _DebugInfo { 476 477 /* Admin stuff */ 478 479 struct _DebugInfo* next; /* list of DebugInfos */ 480 Bool mark; /* marked for deletion? */ 481 482 /* An abstract handle, which can be used by entities outside of 483 m_debuginfo to (in an abstract datatype sense) refer to this 484 struct _DebugInfo. A .handle of zero is invalid; valid handles 485 are 1 and above. The same handle is never issued twice (in any 486 given run of Valgrind), so a handle becomes invalid when the 487 associated struct _DebugInfo is discarded, and remains invalid 488 forever thereafter. The .handle field is set as soon as this 489 structure is allocated. */ 490 ULong handle; 491 492 /* Used for debugging only - indicate what stuff to dump whilst 493 reading stuff into the seginfo. Are computed as early in the 494 lifetime of the DebugInfo as possible -- at the point when it is 495 created. Use these when deciding what to spew out; do not use 496 the global VG_(clo_blah) flags. */ 497 498 Bool trace_symtab; /* symbols, our style */ 499 Bool trace_cfi; /* dwarf frame unwind, our style */ 500 Bool ddump_syms; /* mimic /usr/bin/readelf --syms */ 501 Bool ddump_line; /* mimic /usr/bin/readelf --debug-dump=line */ 502 Bool ddump_frames; /* mimic /usr/bin/readelf --debug-dump=frames */ 503 504 /* The "decide when it is time to read debuginfo" state machine. 505 This structure must get filled in before we can start reading 506 anything from the ELF/MachO file. This structure is filled in 507 by VG_(di_notify_mmap) and its immediate helpers. */ 508 struct _DebugInfoFSM fsm; 509 510 /* Once the ::fsm has reached an accept state -- typically, when 511 both a rw? and r?x mapping for .filename have been observed -- 512 we can go on to read the symbol tables and debug info. 513 .have_dinfo changes from False to True when the debug info has 514 been completely read in and postprocessed (canonicalised) and is 515 now suitable for querying. */ 516 /* If have_dinfo is False, then all fields below this point are 517 invalid and should not be consulted. */ 518 Bool have_dinfo; /* initially False */ 519 520 /* All the rest of the fields in this structure are filled in once 521 we have committed to reading the symbols and debug info (that 522 is, at the point where .have_dinfo is set to True). */ 523 524 /* The file's soname. */ 525 UChar* soname; 526 527 /* Description of some important mapped segments. The presence or 528 absence of the mapping is denoted by the _present field, since 529 in some obscure circumstances (to do with data/sdata/bss) it is 530 possible for the mapping to be present but have zero size. 531 Certainly text_ is mandatory on all platforms; not sure about 532 the rest though. 533 534 -------------------------------------------------------- 535 536 Comment_on_IMPORTANT_CFSI_REPRESENTATIONAL_INVARIANTS: we require that 537 538 either (size of all rx maps == 0 && cfsi == NULL) (the degenerate case) 539 540 or the normal case, which is the AND of the following: 541 (0) size of at least one rx mapping > 0 542 (1) no two DebugInfos with some rx mapping of size > 0 543 have overlapping rx mappings 544 (2) [cfsi_minavma,cfsi_maxavma] does not extend beyond 545 [avma,+size) of one rx mapping; that is, the former 546 is a subrange or equal to the latter. 547 (3) all DiCfSI in the cfsi array all have ranges that fall within 548 [avma,+size) of that rx mapping. 549 (4) all DiCfSI in the cfsi array are non-overlapping 550 551 The cumulative effect of these restrictions is to ensure that 552 all the DiCfSI records in the entire system are non overlapping. 553 Hence any address falls into either exactly one DiCfSI record, 554 or none. Hence it is safe to cache the results of searches for 555 DiCfSI records. This is the whole point of these restrictions. 556 The caching of DiCfSI searches is done in VG_(use_CF_info). The 557 cache is flushed after any change to debugInfo_list. DiCfSI 558 searches are cached because they are central to stack unwinding 559 on amd64-linux. 560 561 Where are these invariants imposed and checked? 562 563 They are checked after a successful read of debuginfo into 564 a DebugInfo*, in check_CFSI_related_invariants. 565 566 (1) is not really imposed anywhere. We simply assume that the 567 kernel will not map the text segments from two different objects 568 into the same space. Sounds reasonable. 569 570 (2) follows from (4) and (3). It is ensured by canonicaliseCFI. 571 (3) is ensured by ML_(addDiCfSI). 572 (4) is ensured by canonicaliseCFI. 573 574 -------------------------------------------------------- 575 576 Comment_on_DEBUG_SVMA_and_DEBUG_BIAS_fields: 577 578 The _debug_{svma,bias} fields were added as part of a fix to 579 #185816. The problem encompassed in that bug report was that it 580 wasn't correct to use apply the bias values deduced for a 581 primary object to its associated debuginfo object, because the 582 debuginfo object (or the primary) could have been prelinked to a 583 different SVMA. Hence debuginfo and primary objects need to 584 have their own biases. 585 586 ------ JRS: (referring to r9329): ------ 587 Let me see if I understand the workings correctly. Initially 588 the _debug_ values are set to the same values as the "normal" 589 ones, as there's a bunch of bits of code like this (in 590 readelf.c) 591 592 di->text_svma = svma; 593 ... 594 di->text_bias = rx_bias; 595 di->text_debug_svma = svma; 596 di->text_debug_bias = rx_bias; 597 598 If a debuginfo object subsequently shows up then the 599 _debug_svma/bias are set for the debuginfo object. Result is 600 that if there's no debuginfo object then the values are the same 601 as the primary-object values, and if there is a debuginfo object 602 then they will (or at least may) be different. 603 604 Then when we need to actually bias something, we'll have to 605 decide whether to use the primary bias or the debuginfo bias. 606 And the strategy is to use the primary bias for ELF symbols but 607 the debuginfo bias for anything pulled out of Dwarf. 608 609 ------ THH: ------ 610 Correct - the debug_svma and bias values apply to any address 611 read from the debug data regardless of where that debug data is 612 stored and the other values are used for addresses from other 613 places (primarily the symbol table). 614 615 ------ JRS: ------ 616 Ok; so this was my only area of concern. Are there any 617 corner-case scenarios where this wouldn't be right? It sounds 618 like we're assuming the ELF symbols come from the primary object 619 and, if there is a debug object, then all the Dwarf comes from 620 there. But what if (eg) both symbols and Dwarf come from the 621 debug object? Is that even possible or allowable? 622 623 ------ THH: ------ 624 You may have a point... 625 626 The current logic is to try and take any one set of data from 627 either the base object or the debug object. There are four sets 628 of data we consider: 629 630 - Symbol Table 631 - Stabs 632 - DWARF1 633 - DWARF2 634 635 If we see the primary section for a given set in the base object 636 then we ignore all sections relating to that set in the debug 637 object. 638 639 Now in principle if we saw a secondary section (like debug_line 640 say) in the base object, but not the main section (debug_info in 641 this case) then we would take debug_info from the debug object 642 but would use the debug_line from the base object unless we saw 643 a replacement copy in the debug object. That's probably unlikely 644 however. 645 646 A bigger issue might be, as you say, the symbol table as we will 647 pick that up from the debug object if it isn't in the base. The 648 dynamic symbol table will always have to be in the base object 649 though so we will have to be careful when processing symbols to 650 know which table we are reading in that case. 651 652 What we probably need to do is tell read_elf_symtab which object 653 the symbols it is being asked to read came from. 654 655 (A followup patch to deal with this was committed in r9469). 656 */ 657 /* .text */ 658 Bool text_present; 659 Addr text_avma; 660 Addr text_svma; 661 SizeT text_size; 662 PtrdiffT text_bias; 663 Addr text_debug_svma; 664 PtrdiffT text_debug_bias; 665 /* .data */ 666 Bool data_present; 667 Addr data_svma; 668 Addr data_avma; 669 SizeT data_size; 670 PtrdiffT data_bias; 671 Addr data_debug_svma; 672 PtrdiffT data_debug_bias; 673 /* .sdata */ 674 Bool sdata_present; 675 Addr sdata_svma; 676 Addr sdata_avma; 677 SizeT sdata_size; 678 PtrdiffT sdata_bias; 679 Addr sdata_debug_svma; 680 PtrdiffT sdata_debug_bias; 681 /* .rodata */ 682 Bool rodata_present; 683 Addr rodata_svma; 684 Addr rodata_avma; 685 SizeT rodata_size; 686 PtrdiffT rodata_bias; 687 Addr rodata_debug_svma; 688 PtrdiffT rodata_debug_bias; 689 /* .bss */ 690 Bool bss_present; 691 Addr bss_svma; 692 Addr bss_avma; 693 SizeT bss_size; 694 PtrdiffT bss_bias; 695 Addr bss_debug_svma; 696 PtrdiffT bss_debug_bias; 697 /* .sbss */ 698 Bool sbss_present; 699 Addr sbss_svma; 700 Addr sbss_avma; 701 SizeT sbss_size; 702 PtrdiffT sbss_bias; 703 Addr sbss_debug_svma; 704 PtrdiffT sbss_debug_bias; 705 /* .plt */ 706 Bool plt_present; 707 Addr plt_avma; 708 SizeT plt_size; 709 /* .got */ 710 Bool got_present; 711 Addr got_avma; 712 SizeT got_size; 713 /* .got.plt */ 714 Bool gotplt_present; 715 Addr gotplt_avma; 716 SizeT gotplt_size; 717 /* .opd -- needed on ppc64-linux for finding symbols */ 718 Bool opd_present; 719 Addr opd_avma; 720 SizeT opd_size; 721 /* .ehframe -- needed on amd64-linux for stack unwinding. We might 722 see more than one, hence the arrays. */ 723 UInt n_ehframe; /* 0 .. N_EHFRAME_SECTS */ 724 Addr ehframe_avma[N_EHFRAME_SECTS]; 725 SizeT ehframe_size[N_EHFRAME_SECTS]; 726 727 /* Sorted tables of stuff we snarfed from the file. This is the 728 eventual product of reading the debug info. All this stuff 729 lives in VG_AR_DINFO. */ 730 731 /* An expandable array of symbols. */ 732 DiSym* symtab; 733 UWord symtab_used; 734 UWord symtab_size; 735 /* An expandable array of locations. */ 736 DiLoc* loctab; 737 UWord loctab_used; 738 UWord loctab_size; 739 /* An expandable array of CFI summary info records. Also includes 740 summary address bounds, showing the min and max address covered 741 by any of the records, as an aid to fast searching. And, if the 742 records require any expression nodes, they are stored in 743 cfsi_exprs. */ 744 DiCfSI* cfsi; 745 UWord cfsi_used; 746 UWord cfsi_size; 747 Addr cfsi_minavma; 748 Addr cfsi_maxavma; 749 XArray* cfsi_exprs; /* XArray of CfiExpr */ 750 751 /* Optimized code under Wine x86: MSVC++ PDB FramePointerOmitted 752 data. Non-expandable array, hence .size == .used. */ 753 FPO_DATA* fpo; 754 UWord fpo_size; 755 Addr fpo_minavma; 756 Addr fpo_maxavma; 757 Addr fpo_base_avma; 758 759 /* Expandable arrays of characters -- the string table. Pointers 760 into this are stable (the arrays are not reallocated). */ 761 struct strchunk { 762 UInt strtab_used; 763 struct strchunk* next; 764 UChar strtab[SEGINFO_STRCHUNKSIZE]; 765 } *strchunks; 766 767 /* Variable scope information, as harvested from Dwarf3 files. 768 769 In short it's an 770 771 array of (array of PC address ranges and variables) 772 773 The outer array indexes over scopes, with Entry 0 containing 774 information on variables which exist for any value of the program 775 counter (PC) -- that is, the outermost scope. Entries 1, 2, 3, 776 etc contain information on increasinly deeply nested variables. 777 778 Each inner array is an array of (an address range, and a set 779 of variables that are in scope over that address range). 780 781 The address ranges may not overlap. 782 783 Since Entry 0 in the outer array holds information on variables 784 that exist for any value of the PC (that is, global vars), it 785 follows that Entry 0's inner array can only have one address 786 range pair, one that covers the entire address space. 787 */ 788 XArray* /* of OSet of DiAddrRange */varinfo; 789 790 /* These are arrays of the relevant typed objects, held here 791 partially for the purposes of visiting each object exactly once 792 when we need to delete them. */ 793 794 /* An array of TyEnts. These are needed to make sense of any types 795 in the .varinfo. Also, when deleting this DebugInfo, we must 796 first traverse this array and throw away malloc'd stuff hanging 797 off it -- by calling ML_(TyEnt__make_EMPTY) on each entry. */ 798 XArray* /* of TyEnt */ admin_tyents; 799 800 /* An array of guarded DWARF3 expressions. */ 801 XArray* admin_gexprs; 802 803 /* Cached last rx mapping matched and returned by ML_(find_rx_mapping). 804 This helps performance a lot during ML_(addLineInfo) etc., which can 805 easily be invoked hundreds of thousands of times. */ 806 struct _DebugInfoMapping* last_rx_map; 807 }; 808 809 /* --------------------- functions --------------------- */ 810 811 /* ------ Adding ------ */ 812 813 /* Add a symbol to si's symbol table. The contents of 'sym' are 814 copied. It is assumed (and checked) that 'sym' only contains one 815 name, so there is no auxiliary ::sec_names vector to duplicate. 816 IOW, the copy is a shallow copy, and there are assertions in place 817 to ensure that's OK. */ 818 extern void ML_(addSym) ( struct _DebugInfo* di, DiSym* sym ); 819 820 /* Add a line-number record to a DebugInfo. */ 821 extern 822 void ML_(addLineInfo) ( struct _DebugInfo* di, 823 UChar* filename, 824 UChar* dirname, /* NULL is allowable */ 825 Addr this, Addr next, Int lineno, Int entry); 826 827 /* Add a CFI summary record. The supplied DiCfSI is copied. */ 828 extern void ML_(addDiCfSI) ( struct _DebugInfo* di, DiCfSI* cfsi ); 829 830 /* Add a string to the string table of a DebugInfo. If len==-1, 831 ML_(addStr) will itself measure the length of the string. */ 832 extern UChar* ML_(addStr) ( struct _DebugInfo* di, UChar* str, Int len ); 833 834 extern void ML_(addVar)( struct _DebugInfo* di, 835 Int level, 836 Addr aMin, 837 Addr aMax, 838 UChar* name, 839 UWord typeR, /* a cuOff */ 840 GExpr* gexpr, 841 GExpr* fbGX, /* SHARED. */ 842 UChar* fileName, /* where decl'd - may be NULL */ 843 Int lineNo, /* where decl'd - may be zero */ 844 Bool show ); 845 846 /* Canonicalise the tables held by 'di', in preparation for use. Call 847 this after finishing adding entries to these tables. */ 848 extern void ML_(canonicaliseTables) ( struct _DebugInfo* di ); 849 850 /* Canonicalise the call-frame-info table held by 'di', in preparation 851 for use. This is called by ML_(canonicaliseTables) but can also be 852 called on it's own to sort just this table. */ 853 extern void ML_(canonicaliseCFI) ( struct _DebugInfo* di ); 854 855 /* ------ Searching ------ */ 856 857 /* Find a symbol-table index containing the specified pointer, or -1 858 if not found. Binary search. */ 859 extern Word ML_(search_one_symtab) ( struct _DebugInfo* di, Addr ptr, 860 Bool match_anywhere_in_sym, 861 Bool findText ); 862 863 /* Find a location-table index containing the specified pointer, or -1 864 if not found. Binary search. */ 865 extern Word ML_(search_one_loctab) ( struct _DebugInfo* di, Addr ptr ); 866 867 /* Find a CFI-table index containing the specified pointer, or -1 if 868 not found. Binary search. */ 869 extern Word ML_(search_one_cfitab) ( struct _DebugInfo* di, Addr ptr ); 870 871 /* Find a FPO-table index containing the specified pointer, or -1 872 if not found. Binary search. */ 873 extern Word ML_(search_one_fpotab) ( struct _DebugInfo* di, Addr ptr ); 874 875 /* Helper function for the most often needed searching for an rx 876 mapping containing the specified address range. The range must 877 fall entirely within the mapping to be considered to be within it. 878 Asserts if lo > hi; caller must ensure this doesn't happen. */ 879 extern struct _DebugInfoMapping* ML_(find_rx_mapping) ( struct _DebugInfo* di, 880 Addr lo, Addr hi ); 881 882 /* ------ Misc ------ */ 883 884 /* Show a non-fatal debug info reading error. Use vg_panic if 885 terminal. 'serious' errors are always shown, not 'serious' ones 886 are shown only at verbosity level 2 and above. */ 887 extern 888 void ML_(symerr) ( struct _DebugInfo* di, Bool serious, HChar* msg ); 889 890 /* Print a symbol. */ 891 extern void ML_(ppSym) ( Int idx, DiSym* sym ); 892 893 /* Print a call-frame-info summary. */ 894 extern void ML_(ppDiCfSI) ( XArray* /* of CfiExpr */ exprs, DiCfSI* si ); 895 896 897 #define TRACE_SYMTAB(format, args...) \ 898 if (di->trace_symtab) { VG_(printf)(format, ## args); } 899 900 901 #endif /* ndef __PRIV_STORAGE_H */ 902 903 /*--------------------------------------------------------------------*/ 904 /*--- end ---*/ 905 /*--------------------------------------------------------------------*/ 906