1 2 /*--------------------------------------------------------------------*/ 3 /*--- Format-neutral storage of and querying of info acquired from ---*/ 4 /*--- ELF/XCOFF stabs/dwarf1/dwarf2 debug info. ---*/ 5 /*--- priv_storage.h ---*/ 6 /*--------------------------------------------------------------------*/ 7 8 /* 9 This file is part of Valgrind, a dynamic binary instrumentation 10 framework. 11 12 Copyright (C) 2000-2017 Julian Seward 13 jseward@acm.org 14 15 This program is free software; you can redistribute it and/or 16 modify it under the terms of the GNU General Public License as 17 published by the Free Software Foundation; either version 2 of the 18 License, or (at your option) any later version. 19 20 This program is distributed in the hope that it will be useful, but 21 WITHOUT ANY WARRANTY; without even the implied warranty of 22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 23 General Public License for more details. 24 25 You should have received a copy of the GNU General Public License 26 along with this program; if not, write to the Free Software 27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 28 02111-1307, USA. 29 30 The GNU General Public License is contained in the file COPYING. 31 */ 32 /* 33 Stabs reader greatly improved by Nick Nethercote, Apr 02. 34 This module was also extensively hacked on by Jeremy Fitzhardinge 35 and Tom Hughes. 36 */ 37 /* See comment at top of debuginfo.c for explanation of 38 the _svma / _avma / _image / _bias naming scheme. 39 */ 40 /* Note this is not freestanding; needs pub_core_xarray.h and 41 priv_tytypes.h to be included before it. */ 42 43 #ifndef __PRIV_STORAGE_H 44 #define __PRIV_STORAGE_H 45 46 #include "pub_core_basics.h" // Addr 47 #include "pub_core_xarray.h" // XArray 48 #include "pub_core_deduppoolalloc.h" // DedupPoolAlloc 49 #include "priv_d3basics.h" // GExpr et al. 50 #include "priv_image.h" // DiCursor 51 52 /* --------------------- SYMBOLS --------------------- */ 53 54 /* A structure to hold an ELF/MachO symbol (very crudely). Usually 55 the symbol only has one name, which is stored in ::pri_name, and 56 ::sec_names is NULL. If there are other names, these are stored in 57 ::sec_names, which is a NULL terminated vector holding the names. 58 The vector is allocated in VG_AR_DINFO, the names themselves live 59 in DebugInfo::strpool. 60 61 From the point of view of ELF, the primary vs secondary distinction 62 is artificial: they are all just names associated with the address, 63 none of which has higher precedence than any other. However, from 64 the point of view of mapping an address to a name to display to the 65 user, we need to choose one "preferred" name, and so that might as 66 well be installed as the pri_name, whilst all others can live in 67 sec_names[]. This has the convenient side effect that, in the 68 common case where there is only one name for the address, 69 sec_names[] does not need to be allocated. 70 */ 71 typedef 72 struct { 73 SymAVMAs avmas; /* Symbol Actual VMAs: lowest address of entity, 74 + platform specific fields, to access with 75 the macros defined in pub_core_debuginfo.h */ 76 const HChar* pri_name; /* primary name, never NULL */ 77 const HChar** sec_names; /* NULL, or a NULL term'd array of other names */ 78 // XXX: DiSym could be shrunk (on 32-bit platforms to exactly 16 79 // bytes, on 64-bit platforms the first 3 pointers already add 80 // up to 24 bytes, so size plus bits will extend to 32 bytes 81 // anyway) by using 29 bits for the size and 1 bit each for 82 // isText, isIFunc and isGlobal. If you do this, make sure that 83 // all assignments to the latter two use 0 or 1 (or True or 84 // False), and that a positive number larger than 1 is never 85 // used to represent True. 86 UInt size; /* size in bytes */ 87 Bool isText; 88 Bool isIFunc; /* symbol is an indirect function? */ 89 Bool isGlobal; /* Is this symbol globally visible? */ 90 } 91 DiSym; 92 93 /* --------------------- SRCLOCS --------------------- */ 94 95 /* Line count at which overflow happens, due to line numbers being 96 stored as shorts in `struct nlist' in a.out.h. */ 97 #define LINENO_OVERFLOW (1 << (sizeof(short) * 8)) 98 99 #define LINENO_BITS 20 100 #define LOC_SIZE_BITS (32 - LINENO_BITS) 101 #define MAX_LINENO ((1 << LINENO_BITS) - 1) 102 103 /* Unlikely to have any lines with instruction ranges > 4096 bytes */ 104 #define MAX_LOC_SIZE ((1 << LOC_SIZE_BITS) - 1) 105 106 /* Number used to detect line number overflows; if one line is 107 60000-odd smaller than the previous, it was probably an overflow. 108 */ 109 #define OVERFLOW_DIFFERENCE (LINENO_OVERFLOW - 5000) 110 111 /* Filename and Dirname pair. FnDn are stored in di->fndnpool 112 and are allocated using VG_(allocFixedEltDedupPA). 113 The filename/dirname strings are themselves stored in di->strpool. */ 114 typedef 115 struct { 116 const HChar* filename; /* source filename */ 117 const HChar* dirname; /* source directory name */ 118 } FnDn; 119 120 /* A structure to hold addr-to-source info for a single line. There 121 can be a lot of these, hence the dense packing. */ 122 typedef 123 struct { 124 /* Word 1 */ 125 Addr addr; /* lowest address for this line */ 126 /* Word 2 */ 127 UShort size:LOC_SIZE_BITS; /* # bytes; we catch overflows of this */ 128 UInt lineno:LINENO_BITS; /* source line number, or zero */ 129 } 130 DiLoc; 131 132 #define LEVEL_BITS (32 - LINENO_BITS) 133 #define MAX_LEVEL ((1 << LEVEL_BITS) - 1) 134 135 /* A structure to hold addr-to-inlined fn info. There 136 can be a lot of these, hence the dense packing. 137 Only caller source filename and lineno are stored. 138 Handling dirname should be done using fndn_ix technique 139 similar to ML_(addLineInfo). */ 140 typedef 141 struct { 142 /* Word 1 */ 143 Addr addr_lo; /* lowest address for inlined fn */ 144 /* Word 2 */ 145 Addr addr_hi; /* highest address following the inlined fn */ 146 /* Word 3 */ 147 const HChar* inlinedfn; /* inlined function name */ 148 /* Word 4 and 5 */ 149 UInt fndn_ix; /* index in di->fndnpool of caller source 150 dirname/filename */ 151 UInt lineno:LINENO_BITS; /* caller line number */ 152 UShort level:LEVEL_BITS; /* level of inlining */ 153 } 154 DiInlLoc; 155 156 /* --------------------- CF INFO --------------------- */ 157 158 /* DiCfSI: a structure to summarise DWARF2/3 CFA info for the code 159 address range [base .. base+len-1]. 160 161 On x86 and amd64 ("IA"), if you know ({e,r}sp, {e,r}bp, {e,r}ip) at 162 some point and {e,r}ip is in the range [base .. base+len-1], it 163 tells you how to calculate ({e,r}sp, {e,r}bp) for the caller of the 164 current frame and also ra, the return address of the current frame. 165 166 First off, calculate CFA, the Canonical Frame Address, thusly: 167 168 cfa = case cfa_how of 169 CFIC_IA_SPREL -> {e,r}sp + cfa_off 170 CFIC_IA_BPREL -> {e,r}bp + cfa_off 171 CFIC_EXPR -> expr whose index is in cfa_off 172 173 Once that is done, the previous frame's {e,r}sp/{e,r}bp values and 174 this frame's {e,r}ra value can be calculated like this: 175 176 old_{e,r}sp/{e,r}bp/ra 177 = case {e,r}sp/{e,r}bp/ra_how of 178 CFIR_UNKNOWN -> we don't know, sorry 179 CFIR_SAME -> same as it was before (sp/fp only) 180 CFIR_CFAREL -> cfa + sp/bp/ra_off 181 CFIR_MEMCFAREL -> *( cfa + sp/bp/ra_off ) 182 CFIR_EXPR -> expr whose index is in sp/bp/ra_off 183 184 On ARM it's pretty much the same, except we have more registers to 185 keep track of: 186 187 cfa = case cfa_how of 188 CFIC_ARM_R13REL -> r13 + cfa_off 189 CFIC_ARM_R12REL -> r12 + cfa_off 190 CFIC_ARM_R11REL -> r11 + cfa_off 191 CFIC_ARM_R7REL -> r7 + cfa_off 192 CFIR_EXPR -> expr whose index is in cfa_off 193 194 old_r14/r13/r12/r11/r7/ra 195 = case r14/r13/r12/r11/r7/ra_how of 196 CFIR_UNKNOWN -> we don't know, sorry 197 CFIR_SAME -> same as it was before (r14/r13/r12/r11/r7 only) 198 CFIR_CFAREL -> cfa + r14/r13/r12/r11/r7/ra_off 199 CFIR_MEMCFAREL -> *( cfa + r14/r13/r12/r11/r7/ra_off ) 200 CFIR_EXPR -> expr whose index is in r14/r13/r12/r11/r7/ra_off 201 202 On ARM64: 203 204 cfa = case cfa_how of 205 CFIC_ARM64_SPREL -> sp + cfa_off 206 CFIC_ARM64_X29REL -> x29 + cfa_off 207 CFIC_EXPR -> expr whose index is in cfa_off 208 209 old_sp/x30/x29/ra 210 = case sp/x30/x29/ra_how of 211 CFIR_UNKNOWN -> we don't know, sorry 212 CFIR_SAME -> same as it was before 213 CFIR_CFAREL -> cfa + sp/x30/x29/ra_how 214 CFIR_MEMCFAREL -> *( cfa + sp/x30/x29/ra_how ) 215 CFIR_EXPR -> expr whose index is in sp/x30/x29/ra_off 216 217 On s390x we have a similar logic as x86 or amd64. We need the stack pointer 218 (r15), the frame pointer r11 (like BP) and together with the instruction 219 address in the PSW we can calculate the previous values: 220 cfa = case cfa_how of 221 CFIC_IA_SPREL -> r15 + cfa_off 222 CFIC_IA_BPREL -> r11 + cfa_off 223 CFIC_EXPR -> expr whose index is in cfa_off 224 225 old_sp/fp/ra 226 = case sp/fp/ra_how of 227 CFIR_UNKNOWN -> we don't know, sorry 228 CFIR_SAME -> same as it was before (sp/fp only) 229 CFIR_CFAREL -> cfa + sp/fp/ra_off 230 CFIR_MEMCFAREL -> *( cfa + sp/fp/ra_off ) 231 CFIR_EXPR -> expr whose index is in sp/fp/ra_off 232 */ 233 234 #define CFIC_IA_SPREL ((UChar)1) 235 #define CFIC_IA_BPREL ((UChar)2) 236 #define CFIC_ARM_R13REL ((UChar)3) 237 #define CFIC_ARM_R12REL ((UChar)4) 238 #define CFIC_ARM_R11REL ((UChar)5) 239 #define CFIC_ARM_R7REL ((UChar)6) 240 #define CFIC_ARM64_SPREL ((UChar)7) 241 #define CFIC_ARM64_X29REL ((UChar)8) 242 #define CFIC_EXPR ((UChar)9) /* all targets */ 243 244 #define CFIR_UNKNOWN ((UChar)64) 245 #define CFIR_SAME ((UChar)65) 246 #define CFIR_CFAREL ((UChar)66) 247 #define CFIR_MEMCFAREL ((UChar)67) 248 #define CFIR_EXPR ((UChar)68) 249 250 /* Definition of the DiCfSI_m DiCfSI machine dependent part. 251 These are highly duplicated, and are stored in a pool. */ 252 #if defined(VGA_x86) || defined(VGA_amd64) 253 typedef 254 struct { 255 UChar cfa_how; /* a CFIC_IA value */ 256 UChar ra_how; /* a CFIR_ value */ 257 UChar sp_how; /* a CFIR_ value */ 258 UChar bp_how; /* a CFIR_ value */ 259 Int cfa_off; 260 Int ra_off; 261 Int sp_off; 262 Int bp_off; 263 } 264 DiCfSI_m; 265 #elif defined(VGA_arm) 266 typedef 267 struct { 268 UChar cfa_how; /* a CFIC_ value */ 269 UChar ra_how; /* a CFIR_ value */ 270 UChar r14_how; /* a CFIR_ value */ 271 UChar r13_how; /* a CFIR_ value */ 272 UChar r12_how; /* a CFIR_ value */ 273 UChar r11_how; /* a CFIR_ value */ 274 UChar r7_how; /* a CFIR_ value */ 275 Int cfa_off; 276 Int ra_off; 277 Int r14_off; 278 Int r13_off; 279 Int r12_off; 280 Int r11_off; 281 Int r7_off; 282 // If you add additional fields, don't forget to update the 283 // initialisation of this in readexidx.c accordingly. 284 } 285 DiCfSI_m; 286 #elif defined(VGA_arm64) 287 typedef 288 struct { 289 UChar cfa_how; /* a CFIC_ value */ 290 UChar ra_how; /* a CFIR_ value */ 291 UChar sp_how; /* a CFIR_ value */ /*dw31=SP*/ 292 UChar x30_how; /* a CFIR_ value */ /*dw30=LR*/ 293 UChar x29_how; /* a CFIR_ value */ /*dw29=FP*/ 294 Int cfa_off; 295 Int ra_off; 296 Int sp_off; 297 Int x30_off; 298 Int x29_off; 299 } 300 DiCfSI_m; 301 #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) 302 /* Just have a struct with the common fields in, so that code that 303 processes the common fields doesn't have to be ifdef'd against 304 VGP_/VGA_ symbols. These are not used in any way on ppc32/64-linux 305 at the moment. */ 306 typedef 307 struct { 308 UChar cfa_how; /* a CFIC_ value */ 309 UChar ra_how; /* a CFIR_ value */ 310 Int cfa_off; 311 Int ra_off; 312 } 313 DiCfSI_m; 314 #elif defined(VGA_s390x) 315 typedef 316 struct { 317 UChar cfa_how; /* a CFIC_ value */ 318 UChar sp_how; /* a CFIR_ value */ 319 UChar ra_how; /* a CFIR_ value */ 320 UChar fp_how; /* a CFIR_ value */ 321 Int cfa_off; 322 Int sp_off; 323 Int ra_off; 324 Int fp_off; 325 } 326 DiCfSI_m; 327 #elif defined(VGA_mips32) || defined(VGA_mips64) 328 typedef 329 struct { 330 UChar cfa_how; /* a CFIC_ value */ 331 UChar ra_how; /* a CFIR_ value */ 332 UChar sp_how; /* a CFIR_ value */ 333 UChar fp_how; /* a CFIR_ value */ 334 Int cfa_off; 335 Int ra_off; 336 Int sp_off; 337 Int fp_off; 338 } 339 DiCfSI_m; 340 #else 341 # error "Unknown arch" 342 #endif 343 344 typedef 345 struct { 346 Addr base; 347 UInt len; 348 UInt cfsi_m_ix; 349 } 350 DiCfSI; 351 352 typedef 353 enum { 354 Cunop_Abs=0x231, 355 Cunop_Neg, 356 Cunop_Not 357 } 358 CfiUnop; 359 360 typedef 361 enum { 362 Cbinop_Add=0x321, 363 Cbinop_Sub, 364 Cbinop_And, 365 Cbinop_Mul, 366 Cbinop_Shl, 367 Cbinop_Shr, 368 Cbinop_Eq, 369 Cbinop_Ge, 370 Cbinop_Gt, 371 Cbinop_Le, 372 Cbinop_Lt, 373 Cbinop_Ne 374 } 375 CfiBinop; 376 377 typedef 378 enum { 379 Creg_INVALID=0x213, 380 Creg_IA_SP, 381 Creg_IA_BP, 382 Creg_IA_IP, 383 Creg_ARM_R13, 384 Creg_ARM_R12, 385 Creg_ARM_R15, 386 Creg_ARM_R14, 387 Creg_ARM_R7, 388 Creg_ARM64_X30, 389 Creg_S390_IA, 390 Creg_S390_SP, 391 Creg_S390_FP, 392 Creg_S390_LR, 393 Creg_MIPS_RA 394 } 395 CfiReg; 396 397 typedef 398 enum { 399 Cex_Undef=0x123, 400 Cex_Deref, 401 Cex_Const, 402 Cex_Unop, 403 Cex_Binop, 404 Cex_CfiReg, 405 Cex_DwReg 406 } 407 CfiExprTag; 408 409 typedef 410 struct { 411 CfiExprTag tag; 412 union { 413 struct { 414 } Undef; 415 struct { 416 Int ixAddr; 417 } Deref; 418 struct { 419 UWord con; 420 } Const; 421 struct { 422 CfiUnop op; 423 Int ix; 424 } Unop; 425 struct { 426 CfiBinop op; 427 Int ixL; 428 Int ixR; 429 } Binop; 430 struct { 431 CfiReg reg; 432 } CfiReg; 433 struct { 434 Int reg; 435 } DwReg; 436 } 437 Cex; 438 } 439 CfiExpr; 440 441 extern Int ML_(CfiExpr_Undef) ( XArray* dst ); 442 extern Int ML_(CfiExpr_Deref) ( XArray* dst, Int ixAddr ); 443 extern Int ML_(CfiExpr_Const) ( XArray* dst, UWord con ); 444 extern Int ML_(CfiExpr_Unop) ( XArray* dst, CfiUnop op, Int ix ); 445 extern Int ML_(CfiExpr_Binop) ( XArray* dst, CfiBinop op, Int ixL, Int ixR ); 446 extern Int ML_(CfiExpr_CfiReg)( XArray* dst, CfiReg reg ); 447 extern Int ML_(CfiExpr_DwReg) ( XArray* dst, Int reg ); 448 449 extern void ML_(ppCfiExpr)( const XArray* src, Int ix ); 450 451 /* ---------------- FPO INFO (Windows PE) -------------- */ 452 453 /* for apps using Wine: MSVC++ PDB FramePointerOmitted: somewhat like 454 a primitive CFI */ 455 typedef 456 struct _FPO_DATA { /* 16 bytes */ 457 UInt ulOffStart; /* offset of 1st byte of function code */ 458 UInt cbProcSize; /* # bytes in function */ 459 UInt cdwLocals; /* # bytes/4 in locals */ 460 UShort cdwParams; /* # bytes/4 in params */ 461 UChar cbProlog; /* # bytes in prolog */ 462 UChar cbRegs :3; /* # regs saved */ 463 UChar fHasSEH:1; /* Structured Exception Handling */ 464 UChar fUseBP :1; /* EBP has been used */ 465 UChar reserved:1; 466 UChar cbFrame:2; /* frame type */ 467 } 468 FPO_DATA; 469 470 #define PDB_FRAME_FPO 0 471 #define PDB_FRAME_TRAP 1 472 #define PDB_FRAME_TSS 2 473 474 /* --------------------- VARIABLES --------------------- */ 475 476 typedef 477 struct { 478 Addr aMin; 479 Addr aMax; 480 XArray* /* of DiVariable */ vars; 481 } 482 DiAddrRange; 483 484 typedef 485 struct { 486 const HChar* name; /* in DebugInfo.strpool */ 487 UWord typeR; /* a cuOff */ 488 const GExpr* gexpr; /* on DebugInfo.gexprs list */ 489 const GExpr* fbGX; /* SHARED. */ 490 UInt fndn_ix; /* where declared; may be zero. index 491 in DebugInfo.fndnpool */ 492 Int lineNo; /* where declared; may be zero. */ 493 } 494 DiVariable; 495 496 Word 497 ML_(cmp_for_DiAddrRange_range) ( const void* keyV, const void* elemV ); 498 499 /* --------------------- DEBUGINFO --------------------- */ 500 501 /* This is the top-level data type. It's a structure which contains 502 information pertaining to one mapped ELF object. This type is 503 exported only abstractly - in pub_tool_debuginfo.h. */ 504 505 /* First though, here's an auxiliary data structure. It is only ever 506 used as part of a struct _DebugInfo. We use it to record 507 observations about mappings and permission changes to the 508 associated file, so as to decide when to read debug info. It's 509 essentially an ultra-trivial finite state machine which, when it 510 reaches an accept state, signals that we should now read debug info 511 from the object into the associated struct _DebugInfo. The accept 512 state is arrived at when have_rx_map and have_rw_map both become 513 true. The initial state is one in which we have no observations, 514 so have_rx_map and have_rw_map are both false. 515 516 This all started as a rather ad-hoc solution, but was further 517 expanded to handle weird object layouts, e.g. more than one rw 518 or rx mapping for one binary. 519 520 The normal sequence of events is one of 521 522 start --> r-x mapping --> rw- mapping --> accept 523 start --> rw- mapping --> r-x mapping --> accept 524 525 that is, take the first r-x and rw- mapping we see, and we're done. 526 527 On MacOSX >= 10.7, 32-bit, there appears to be a new variant: 528 529 start --> r-- mapping --> rw- mapping 530 --> upgrade r-- mapping to r-x mapping --> accept 531 532 where the upgrade is done by a call to mach_vm_protect (OSX 10.7) 533 or kernelrpc_mach_vm_protect_trap (OSX 10.9 and possibly 10.8). 534 Hence we need to also track this possibility. 535 536 From perusal of dyld sources, it appears that this scheme could 537 also be used 64 bit libraries, although that doesn't seem to happen 538 in practice. dyld uses this scheme when the text section requires 539 relocation, which only appears to be the case for 32 bit objects. 540 */ 541 542 typedef struct 543 { 544 Addr avma; /* these fields record the file offset, length */ 545 SizeT size; /* and map address of each mapping */ 546 OffT foff; 547 Bool rx, rw, ro; /* memory access flags for this mapping */ 548 } DebugInfoMapping; 549 550 struct _DebugInfoFSM 551 { 552 HChar* filename; /* in mallocville (VG_AR_DINFO) */ 553 HChar* dbgname; /* in mallocville (VG_AR_DINFO) */ 554 XArray* maps; /* XArray of DebugInfoMapping structs */ 555 Bool have_rx_map; /* did we see a r?x mapping yet for the file? */ 556 Bool have_rw_map; /* did we see a rw? mapping yet for the file? */ 557 Bool have_ro_map; /* did we see a r-- mapping yet for the file? */ 558 }; 559 560 561 /* To do with the string table in struct _DebugInfo (::strpool) */ 562 #define SEGINFO_STRPOOLSIZE (64*1024) 563 564 565 /* We may encounter more than one .eh_frame section in an object -- 566 unusual but apparently allowed by ELF. See 567 http://sourceware.org/bugzilla/show_bug.cgi?id=12675 568 */ 569 #define N_EHFRAME_SECTS 2 570 571 572 /* So, the main structure for holding debug info for one object. */ 573 574 struct _DebugInfo { 575 576 /* Admin stuff */ 577 578 struct _DebugInfo* next; /* list of DebugInfos */ 579 Bool mark; /* marked for deletion? */ 580 581 /* An abstract handle, which can be used by entities outside of 582 m_debuginfo to (in an abstract datatype sense) refer to this 583 struct _DebugInfo. A .handle of zero is invalid; valid handles 584 are 1 and above. The same handle is never issued twice (in any 585 given run of Valgrind), so a handle becomes invalid when the 586 associated struct _DebugInfo is discarded, and remains invalid 587 forever thereafter. The .handle field is set as soon as this 588 structure is allocated. */ 589 ULong handle; 590 591 /* Used for debugging only - indicate what stuff to dump whilst 592 reading stuff into the seginfo. Are computed as early in the 593 lifetime of the DebugInfo as possible -- at the point when it is 594 created. Use these when deciding what to spew out; do not use 595 the global VG_(clo_blah) flags. */ 596 597 Bool trace_symtab; /* symbols, our style */ 598 Bool trace_cfi; /* dwarf frame unwind, our style */ 599 Bool ddump_syms; /* mimic /usr/bin/readelf --syms */ 600 Bool ddump_line; /* mimic /usr/bin/readelf --debug-dump=line */ 601 Bool ddump_frames; /* mimic /usr/bin/readelf --debug-dump=frames */ 602 603 /* The "decide when it is time to read debuginfo" state machine. 604 This structure must get filled in before we can start reading 605 anything from the ELF/MachO file. This structure is filled in 606 by VG_(di_notify_mmap) and its immediate helpers. */ 607 struct _DebugInfoFSM fsm; 608 609 /* Once the ::fsm has reached an accept state -- typically, when 610 both a rw? and r?x mapping for .filename have been observed -- 611 we can go on to read the symbol tables and debug info. 612 .have_dinfo changes from False to True when the debug info has 613 been completely read in and postprocessed (canonicalised) and is 614 now suitable for querying. */ 615 /* If have_dinfo is False, then all fields below this point are 616 invalid and should not be consulted. */ 617 Bool have_dinfo; /* initially False */ 618 619 /* All the rest of the fields in this structure are filled in once 620 we have committed to reading the symbols and debug info (that 621 is, at the point where .have_dinfo is set to True). */ 622 623 /* The file's soname. */ 624 HChar* soname; 625 626 /* Description of some important mapped segments. The presence or 627 absence of the mapping is denoted by the _present field, since 628 in some obscure circumstances (to do with data/sdata/bss) it is 629 possible for the mapping to be present but have zero size. 630 Certainly text_ is mandatory on all platforms; not sure about 631 the rest though. 632 633 -------------------------------------------------------- 634 635 Comment_on_IMPORTANT_CFSI_REPRESENTATIONAL_INVARIANTS: we require that 636 637 either (size of all rx maps == 0 && cfsi == NULL) (the degenerate case) 638 639 or the normal case, which is the AND of the following: 640 (0) size of at least one rx mapping > 0 641 (1) no two DebugInfos with some rx mapping of size > 0 642 have overlapping rx mappings 643 (2) [cfsi_minavma,cfsi_maxavma] does not extend beyond 644 [avma,+size) of one rx mapping; that is, the former 645 is a subrange or equal to the latter. 646 (3) all DiCfSI in the cfsi array all have ranges that fall within 647 [avma,+size) of that rx mapping. 648 (4) all DiCfSI in the cfsi array are non-overlapping 649 650 The cumulative effect of these restrictions is to ensure that 651 all the DiCfSI records in the entire system are non overlapping. 652 Hence any address falls into either exactly one DiCfSI record, 653 or none. Hence it is safe to cache the results of searches for 654 DiCfSI records. This is the whole point of these restrictions. 655 The caching of DiCfSI searches is done in VG_(use_CF_info). The 656 cache is flushed after any change to debugInfo_list. DiCfSI 657 searches are cached because they are central to stack unwinding 658 on amd64-linux. 659 660 Where are these invariants imposed and checked? 661 662 They are checked after a successful read of debuginfo into 663 a DebugInfo*, in check_CFSI_related_invariants. 664 665 (1) is not really imposed anywhere. We simply assume that the 666 kernel will not map the text segments from two different objects 667 into the same space. Sounds reasonable. 668 669 (2) follows from (4) and (3). It is ensured by canonicaliseCFI. 670 (3) is ensured by ML_(addDiCfSI). 671 (4) is ensured by canonicaliseCFI. 672 673 -------------------------------------------------------- 674 675 Comment_on_DEBUG_SVMA_and_DEBUG_BIAS_fields: 676 677 The _debug_{svma,bias} fields were added as part of a fix to 678 #185816. The problem encompassed in that bug report was that it 679 wasn't correct to use apply the bias values deduced for a 680 primary object to its associated debuginfo object, because the 681 debuginfo object (or the primary) could have been prelinked to a 682 different SVMA. Hence debuginfo and primary objects need to 683 have their own biases. 684 685 ------ JRS: (referring to r9329): ------ 686 Let me see if I understand the workings correctly. Initially 687 the _debug_ values are set to the same values as the "normal" 688 ones, as there's a bunch of bits of code like this (in 689 readelf.c) 690 691 di->text_svma = svma; 692 ... 693 di->text_bias = rx_bias; 694 di->text_debug_svma = svma; 695 di->text_debug_bias = rx_bias; 696 697 If a debuginfo object subsequently shows up then the 698 _debug_svma/bias are set for the debuginfo object. Result is 699 that if there's no debuginfo object then the values are the same 700 as the primary-object values, and if there is a debuginfo object 701 then they will (or at least may) be different. 702 703 Then when we need to actually bias something, we'll have to 704 decide whether to use the primary bias or the debuginfo bias. 705 And the strategy is to use the primary bias for ELF symbols but 706 the debuginfo bias for anything pulled out of Dwarf. 707 708 ------ THH: ------ 709 Correct - the debug_svma and bias values apply to any address 710 read from the debug data regardless of where that debug data is 711 stored and the other values are used for addresses from other 712 places (primarily the symbol table). 713 714 ------ JRS: ------ 715 Ok; so this was my only area of concern. Are there any 716 corner-case scenarios where this wouldn't be right? It sounds 717 like we're assuming the ELF symbols come from the primary object 718 and, if there is a debug object, then all the Dwarf comes from 719 there. But what if (eg) both symbols and Dwarf come from the 720 debug object? Is that even possible or allowable? 721 722 ------ THH: ------ 723 You may have a point... 724 725 The current logic is to try and take any one set of data from 726 either the base object or the debug object. There are four sets 727 of data we consider: 728 729 - Symbol Table 730 - Stabs 731 - DWARF1 732 - DWARF2 733 734 If we see the primary section for a given set in the base object 735 then we ignore all sections relating to that set in the debug 736 object. 737 738 Now in principle if we saw a secondary section (like debug_line 739 say) in the base object, but not the main section (debug_info in 740 this case) then we would take debug_info from the debug object 741 but would use the debug_line from the base object unless we saw 742 a replacement copy in the debug object. That's probably unlikely 743 however. 744 745 A bigger issue might be, as you say, the symbol table as we will 746 pick that up from the debug object if it isn't in the base. The 747 dynamic symbol table will always have to be in the base object 748 though so we will have to be careful when processing symbols to 749 know which table we are reading in that case. 750 751 What we probably need to do is tell read_elf_symtab which object 752 the symbols it is being asked to read came from. 753 754 (A followup patch to deal with this was committed in r9469). 755 */ 756 /* .text */ 757 Bool text_present; 758 Addr text_avma; 759 Addr text_svma; 760 SizeT text_size; 761 PtrdiffT text_bias; 762 Addr text_debug_svma; 763 PtrdiffT text_debug_bias; 764 /* .data */ 765 Bool data_present; 766 Addr data_svma; 767 Addr data_avma; 768 SizeT data_size; 769 PtrdiffT data_bias; 770 Addr data_debug_svma; 771 PtrdiffT data_debug_bias; 772 /* .sdata */ 773 Bool sdata_present; 774 Addr sdata_svma; 775 Addr sdata_avma; 776 SizeT sdata_size; 777 PtrdiffT sdata_bias; 778 Addr sdata_debug_svma; 779 PtrdiffT sdata_debug_bias; 780 /* .rodata */ 781 Bool rodata_present; 782 Addr rodata_svma; 783 Addr rodata_avma; 784 SizeT rodata_size; 785 PtrdiffT rodata_bias; 786 Addr rodata_debug_svma; 787 PtrdiffT rodata_debug_bias; 788 /* .bss */ 789 Bool bss_present; 790 Addr bss_svma; 791 Addr bss_avma; 792 SizeT bss_size; 793 PtrdiffT bss_bias; 794 Addr bss_debug_svma; 795 PtrdiffT bss_debug_bias; 796 /* .sbss */ 797 Bool sbss_present; 798 Addr sbss_svma; 799 Addr sbss_avma; 800 SizeT sbss_size; 801 PtrdiffT sbss_bias; 802 Addr sbss_debug_svma; 803 PtrdiffT sbss_debug_bias; 804 /* .ARM.exidx -- sometimes present on arm32, containing unwind info. */ 805 Bool exidx_present; 806 Addr exidx_avma; 807 Addr exidx_svma; 808 SizeT exidx_size; 809 PtrdiffT exidx_bias; 810 /* .ARM.extab -- sometimes present on arm32, containing unwind info. */ 811 Bool extab_present; 812 Addr extab_avma; 813 Addr extab_svma; 814 SizeT extab_size; 815 PtrdiffT extab_bias; 816 /* .plt */ 817 Bool plt_present; 818 Addr plt_avma; 819 SizeT plt_size; 820 /* .got */ 821 Bool got_present; 822 Addr got_avma; 823 SizeT got_size; 824 /* .got.plt */ 825 Bool gotplt_present; 826 Addr gotplt_avma; 827 SizeT gotplt_size; 828 /* .opd -- needed on ppc64be-linux for finding symbols */ 829 Bool opd_present; 830 Addr opd_avma; 831 SizeT opd_size; 832 /* .ehframe -- needed on amd64-linux for stack unwinding. We might 833 see more than one, hence the arrays. */ 834 UInt n_ehframe; /* 0 .. N_EHFRAME_SECTS */ 835 Addr ehframe_avma[N_EHFRAME_SECTS]; 836 SizeT ehframe_size[N_EHFRAME_SECTS]; 837 838 /* Sorted tables of stuff we snarfed from the file. This is the 839 eventual product of reading the debug info. All this stuff 840 lives in VG_AR_DINFO. */ 841 842 /* An expandable array of symbols. */ 843 DiSym* symtab; 844 UWord symtab_used; 845 UWord symtab_size; 846 /* Two expandable arrays, storing locations and their filename/dirname. */ 847 DiLoc* loctab; 848 UInt sizeof_fndn_ix; /* Similar use as sizeof_cfsi_m_ix below. */ 849 void* loctab_fndn_ix; /* loctab[i] filename/dirname is identified by 850 loctab_fnindex_ix[i] (an index in di->fndnpool) 851 0 means filename/dirname unknown. 852 The void* is an UChar* or UShort* or UInt* 853 depending on sizeof_fndn_ix. */ 854 UWord loctab_used; 855 UWord loctab_size; 856 /* An expandable array of inlined fn info. 857 maxinl_codesz is the biggest inlined piece of code 858 in inltab (i.e. the max of 'addr_hi - addr_lo'. */ 859 DiInlLoc* inltab; 860 UWord inltab_used; 861 UWord inltab_size; 862 SizeT maxinl_codesz; 863 864 /* A set of expandable arrays to store CFI summary info records. 865 The machine specific information (i.e. the DiCfSI_m struct) 866 are stored in cfsi_m_pool, as these are highly duplicated. 867 The DiCfSI_m are allocated in cfsi_m_pool and identified using 868 a (we hope) small integer : often one byte is enough, sometimes 869 2 bytes are needed. 870 871 cfsi_base contains the bases of the code address ranges. 872 cfsi_size is the size of the cfsi_base array. 873 The elements cfsi_base[0] till cfsi_base[cfsi_used-1] are used. 874 Following elements are not used (yet). 875 876 For each base in cfsi_base, an index into cfsi_m_pool is stored 877 in cfsi_m_ix array. The size of cfsi_m_ix is equal to 878 cfsi_size*sizeof_cfsi_m_ix. The used portion of cfsi_m_ix is 879 cfsi_m_ix[0] till cfsi_m_ix[(cfsi_used-1)*sizeof_cfsi_m_ix]. 880 881 cfsi_base[i] gives the base address of a code range covered by 882 some CF Info. The corresponding CF Info is identified by an index 883 in cfsi_m_pool. The DiCfSI_m index in cfsi_m_pool corresponding to 884 cfsi_base[i] is given 885 by ((UChar*) cfsi_m_ix)[i] if sizeof_cfsi_m_ix == 1 886 by ((UShort*)cfsi_m_ix)[i] if sizeof_cfsi_m_ix == 2 887 by ((UInt*) cfsi_m_ix)[i] if sizeof_cfsi_m_ix == 4. 888 889 The end of the code range starting at cfsi_base[i] is given by 890 cfsi_base[i+1]-1 (or cfsi_maxavma for cfsi_base[cfsi_used-1]). 891 Some code ranges between cfsi_minavma and cfsi_maxavma might not 892 be covered by cfi information. Such not covered ranges are stored by 893 a base in cfsi_base and a corresponding 0 index in cfsi_m_ix. 894 895 A variable size representation has been chosen for the elements of 896 cfsi_m_ix as in many case, one byte is good enough. For big 897 objects, 2 bytes are needed. No object has yet been found where 898 4 bytes are needed (but the code is ready to handle this case). 899 Not covered ranges ('cfi holes') are stored explicitly in 900 cfsi_base/cfsi_m_ix as this is more memory efficient than storing 901 a length for each covered range : on x86 or amd64, we typically have 902 a hole every 8 covered ranges. On arm64, we have very few holes 903 (1 every 50 or 100 ranges). 904 905 The cfsi information is read and prepared in the cfsi_rd array. 906 Once all the information has been read, the cfsi_base and cfsi_m_ix 907 arrays will be filled in from cfsi_rd. cfsi_rd will then be freed. 908 This is all done by ML_(finish_CFSI_arrays). 909 910 Also includes summary address bounds, showing the min and max address 911 covered by any of the records, as an aid to fast searching. And, if the 912 records require any expression nodes, they are stored in 913 cfsi_exprs. */ 914 Addr* cfsi_base; 915 UInt sizeof_cfsi_m_ix; /* size in byte of indexes stored in cfsi_m_ix. */ 916 void* cfsi_m_ix; /* Each index occupies sizeof_cfsi_m_ix bytes. 917 The void* is an UChar* or UShort* or UInt* 918 depending on sizeof_cfsi_m_ix. */ 919 920 DiCfSI* cfsi_rd; /* Only used during reading, NULL once info is read. */ 921 922 UWord cfsi_used; 923 UWord cfsi_size; 924 925 DedupPoolAlloc *cfsi_m_pool; 926 Addr cfsi_minavma; 927 Addr cfsi_maxavma; 928 XArray* cfsi_exprs; /* XArray of CfiExpr */ 929 930 /* Optimized code under Wine x86: MSVC++ PDB FramePointerOmitted 931 data. Non-expandable array, hence .size == .used. */ 932 FPO_DATA* fpo; 933 UWord fpo_size; 934 Addr fpo_minavma; 935 Addr fpo_maxavma; 936 Addr fpo_base_avma; 937 938 /* Pool of strings -- the string table. Pointers 939 into this are stable (the memory is not reallocated). */ 940 DedupPoolAlloc *strpool; 941 942 /* Pool of FnDn -- filename and dirname. 943 Elements in the pool are allocated using VG_(allocFixedEltDedupPA). */ 944 DedupPoolAlloc *fndnpool; 945 946 /* Variable scope information, as harvested from Dwarf3 files. 947 948 In short it's an 949 950 array of (array of PC address ranges and variables) 951 952 The outer array indexes over scopes, with Entry 0 containing 953 information on variables which exist for any value of the program 954 counter (PC) -- that is, the outermost scope. Entries 1, 2, 3, 955 etc contain information on increasinly deeply nested variables. 956 957 Each inner array is an array of (an address range, and a set 958 of variables that are in scope over that address range). 959 960 The address ranges may not overlap. 961 962 Since Entry 0 in the outer array holds information on variables 963 that exist for any value of the PC (that is, global vars), it 964 follows that Entry 0's inner array can only have one address 965 range pair, one that covers the entire address space. 966 */ 967 XArray* /* of OSet of DiAddrRange */varinfo; 968 969 /* These are arrays of the relevant typed objects, held here 970 partially for the purposes of visiting each object exactly once 971 when we need to delete them. */ 972 973 /* An array of TyEnts. These are needed to make sense of any types 974 in the .varinfo. Also, when deleting this DebugInfo, we must 975 first traverse this array and throw away malloc'd stuff hanging 976 off it -- by calling ML_(TyEnt__make_EMPTY) on each entry. */ 977 XArray* /* of TyEnt */ admin_tyents; 978 979 /* An array of guarded DWARF3 expressions. */ 980 XArray* admin_gexprs; 981 982 /* Cached last rx mapping matched and returned by ML_(find_rx_mapping). 983 This helps performance a lot during ML_(addLineInfo) etc., which can 984 easily be invoked hundreds of thousands of times. */ 985 DebugInfoMapping* last_rx_map; 986 }; 987 988 /* --------------------- functions --------------------- */ 989 990 /* ------ Adding ------ */ 991 992 /* Add a symbol to si's symbol table. The contents of 'sym' are 993 copied. It is assumed (and checked) that 'sym' only contains one 994 name, so there is no auxiliary ::sec_names vector to duplicate. 995 IOW, the copy is a shallow copy, and there are assertions in place 996 to ensure that's OK. */ 997 extern void ML_(addSym) ( struct _DebugInfo* di, DiSym* sym ); 998 999 /* Add a filename/dirname pair to a DebugInfo and returns the index 1000 in the fndnpool fixed pool. */ 1001 extern UInt ML_(addFnDn) (struct _DebugInfo* di, 1002 const HChar* filename, 1003 const HChar* dirname); /* NULL is allowable */ 1004 1005 /* Returns the filename of the fndn pair identified by fndn_ix. 1006 Returns "???" if fndn_ix is 0. */ 1007 extern const HChar* ML_(fndn_ix2filename) (const DebugInfo* di, 1008 UInt fndn_ix); 1009 1010 /* Returns the dirname of the fndn pair identified by fndn_ix. 1011 Returns "" if fndn_ix is 0 or fndn->dirname is NULL. */ 1012 extern const HChar* ML_(fndn_ix2dirname) (const DebugInfo* di, 1013 UInt fndn_ix); 1014 1015 /* Returns the fndn_ix for the LineInfo locno in di->loctab. 1016 0 if filename/dirname are unknown. */ 1017 extern UInt ML_(fndn_ix) (const DebugInfo* di, Word locno); 1018 1019 /* Add a line-number record to a DebugInfo. 1020 fndn_ix is an index in di->fndnpool, allocated using ML_(addFnDn). 1021 Give a 0 index for a unknown filename/dirname pair. */ 1022 extern 1023 void ML_(addLineInfo) ( struct _DebugInfo* di, 1024 UInt fndn_ix, 1025 Addr this, Addr next, Int lineno, Int entry); 1026 1027 /* Add a call inlined record to a DebugInfo. 1028 A call to the below means that inlinedfn code has been 1029 inlined, resulting in code from [addr_lo, addr_hi[. 1030 Note that addr_hi is excluded, i.e. is not part of the inlined code. 1031 fndn_ix and lineno identifies the location of the call that caused 1032 this inlining. 1033 fndn_ix is an index in di->fndnpool, allocated using ML_(addFnDn). 1034 Give a 0 index for an unknown filename/dirname pair. 1035 In case of nested inlining, a small level indicates the call 1036 is closer to main that a call with a higher level. */ 1037 extern 1038 void ML_(addInlInfo) ( struct _DebugInfo* di, 1039 Addr addr_lo, Addr addr_hi, 1040 const HChar* inlinedfn, 1041 UInt fndn_ix, 1042 Int lineno, UShort level); 1043 1044 /* Add a CFI summary record. The supplied DiCfSI_m is copied. */ 1045 extern void ML_(addDiCfSI) ( struct _DebugInfo* di, 1046 Addr base, UInt len, DiCfSI_m* cfsi_m ); 1047 1048 /* Given a position in the di->cfsi_base/cfsi_m_ix arrays, return 1049 the corresponding cfsi_m*. Return NULL if the position corresponds 1050 to a cfsi hole. */ 1051 DiCfSI_m* ML_(get_cfsi_m) (const DebugInfo* di, UInt pos); 1052 1053 /* Add a string to the string table of a DebugInfo. If len==-1, 1054 ML_(addStr) will itself measure the length of the string. */ 1055 extern const HChar* ML_(addStr) ( DebugInfo* di, const HChar* str, Int len ); 1056 1057 /* Add a string to the string table of a DebugInfo, by copying the 1058 string from the given DiCursor. Measures the length of the string 1059 itself. */ 1060 extern const HChar* ML_(addStrFromCursor)( DebugInfo* di, DiCursor c ); 1061 1062 extern void ML_(addVar)( struct _DebugInfo* di, 1063 Int level, 1064 Addr aMin, 1065 Addr aMax, 1066 const HChar* name, 1067 UWord typeR, /* a cuOff */ 1068 const GExpr* gexpr, 1069 const GExpr* fbGX, /* SHARED. */ 1070 UInt fndn_ix, /* where decl'd - may be zero */ 1071 Int lineNo, /* where decl'd - may be zero */ 1072 Bool show ); 1073 /* Note: fndn_ix identifies a filename/dirname pair similarly to 1074 ML_(addInlInfo) and ML_(addLineInfo). */ 1075 1076 /* Canonicalise the tables held by 'di', in preparation for use. Call 1077 this after finishing adding entries to these tables. */ 1078 extern void ML_(canonicaliseTables) ( struct _DebugInfo* di ); 1079 1080 /* Canonicalise the call-frame-info table held by 'di', in preparation 1081 for use. This is called by ML_(canonicaliseTables) but can also be 1082 called on it's own to sort just this table. */ 1083 extern void ML_(canonicaliseCFI) ( struct _DebugInfo* di ); 1084 1085 /* ML_(finish_CFSI_arrays) fills in the cfsi_base and cfsi_m_ix arrays 1086 from cfsi_rd array. cfsi_rd is then freed. */ 1087 extern void ML_(finish_CFSI_arrays) ( struct _DebugInfo* di ); 1088 1089 /* ------ Searching ------ */ 1090 1091 /* Find a symbol-table index containing the specified pointer, or -1 1092 if not found. Binary search. */ 1093 extern Word ML_(search_one_symtab) ( const DebugInfo* di, Addr ptr, 1094 Bool findText ); 1095 1096 /* Find a location-table index containing the specified pointer, or -1 1097 if not found. Binary search. */ 1098 extern Word ML_(search_one_loctab) ( const DebugInfo* di, Addr ptr ); 1099 1100 /* Find a CFI-table index containing the specified pointer, or -1 if 1101 not found. Binary search. */ 1102 extern Word ML_(search_one_cfitab) ( const DebugInfo* di, Addr ptr ); 1103 1104 /* Find a FPO-table index containing the specified pointer, or -1 1105 if not found. Binary search. */ 1106 extern Word ML_(search_one_fpotab) ( const DebugInfo* di, Addr ptr ); 1107 1108 /* Helper function for the most often needed searching for an rx 1109 mapping containing the specified address range. The range must 1110 fall entirely within the mapping to be considered to be within it. 1111 Asserts if lo > hi; caller must ensure this doesn't happen. */ 1112 extern DebugInfoMapping* ML_(find_rx_mapping) ( DebugInfo* di, 1113 Addr lo, Addr hi ); 1114 1115 /* ------ Misc ------ */ 1116 1117 /* Show a non-fatal debug info reading error. Use VG_(core_panic) for 1118 fatal errors. 'serious' errors are always shown, not 'serious' ones 1119 are shown only at verbosity level 2 and above. */ 1120 extern 1121 void ML_(symerr) ( const DebugInfo* di, Bool serious, const HChar* msg ); 1122 1123 /* Print a symbol. */ 1124 extern void ML_(ppSym) ( Int idx, const DiSym* sym ); 1125 1126 /* Print a call-frame-info summary. */ 1127 extern void ML_(ppDiCfSI) ( const XArray* /* of CfiExpr */ exprs, 1128 Addr base, UInt len, 1129 const DiCfSI_m* si_m ); 1130 1131 1132 #define TRACE_SYMTAB_ENABLED (di->trace_symtab) 1133 #define TRACE_SYMTAB(format, args...) \ 1134 if (TRACE_SYMTAB_ENABLED) { VG_(printf)(format, ## args); } 1135 1136 1137 #endif /* ndef __PRIV_STORAGE_H */ 1138 1139 /*--------------------------------------------------------------------*/ 1140 /*--- end ---*/ 1141 /*--------------------------------------------------------------------*/ 1142