• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*--------------------------------------------------------------------*/
3 /*--- Format-neutral storage of and querying of info acquired from ---*/
4 /*--- ELF/XCOFF stabs/dwarf1/dwarf2 debug info.                    ---*/
5 /*---                                               priv_storage.h ---*/
6 /*--------------------------------------------------------------------*/
7 
8 /*
9    This file is part of Valgrind, a dynamic binary instrumentation
10    framework.
11 
12    Copyright (C) 2000-2015 Julian Seward
13       jseward@acm.org
14 
15    This program is free software; you can redistribute it and/or
16    modify it under the terms of the GNU General Public License as
17    published by the Free Software Foundation; either version 2 of the
18    License, or (at your option) any later version.
19 
20    This program is distributed in the hope that it will be useful, but
21    WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    General Public License for more details.
24 
25    You should have received a copy of the GNU General Public License
26    along with this program; if not, write to the Free Software
27    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28    02111-1307, USA.
29 
30    The GNU General Public License is contained in the file COPYING.
31 */
32 /*
33    Stabs reader greatly improved by Nick Nethercote, Apr 02.
34    This module was also extensively hacked on by Jeremy Fitzhardinge
35    and Tom Hughes.
36 */
37 /* See comment at top of debuginfo.c for explanation of
38    the _svma / _avma / _image / _bias naming scheme.
39 */
40 /* Note this is not freestanding; needs pub_core_xarray.h and
41    priv_tytypes.h to be included before it. */
42 
43 #ifndef __PRIV_STORAGE_H
44 #define __PRIV_STORAGE_H
45 
46 #include "pub_core_basics.h"   // Addr
47 #include "pub_core_xarray.h"   // XArray
48 #include "pub_core_deduppoolalloc.h" // DedupPoolAlloc
49 #include "priv_d3basics.h"     // GExpr et al.
50 #include "priv_image.h"        // DiCursor
51 
52 /* --------------------- SYMBOLS --------------------- */
53 
54 /* A structure to hold an ELF/MachO symbol (very crudely).  Usually
55    the symbol only has one name, which is stored in ::pri_name, and
56    ::sec_names is NULL.  If there are other names, these are stored in
57    ::sec_names, which is a NULL terminated vector holding the names.
58    The vector is allocated in VG_AR_DINFO, the names themselves live
59    in DebugInfo::strpool.
60 
61    From the point of view of ELF, the primary vs secondary distinction
62    is artificial: they are all just names associated with the address,
63    none of which has higher precedence than any other.  However, from
64    the point of view of mapping an address to a name to display to the
65    user, we need to choose one "preferred" name, and so that might as
66    well be installed as the pri_name, whilst all others can live in
67    sec_names[].  This has the convenient side effect that, in the
68    common case where there is only one name for the address,
69    sec_names[] does not need to be allocated.
70 */
71 typedef
72    struct {
73       SymAVMAs avmas;    /* Symbol Actual VMAs: lowest address of entity,
74                             + platform specific fields, to access with
75                             the macros defined in pub_core_debuginfo.h */
76       const HChar*  pri_name;  /* primary name, never NULL */
77       const HChar** sec_names; /* NULL, or a NULL term'd array of other names */
78       // XXX: this could be shrunk (on 32-bit platforms) by using 30
79       // bits for the size and 1 bit each for isText and isIFunc.  If you
80       // do this, make sure that all assignments to the latter two use
81       // 0 or 1 (or True or False), and that a positive number larger
82       // than 1 is never used to represent True.
83       UInt    size;    /* size in bytes */
84       Bool    isText;
85       Bool    isIFunc; /* symbol is an indirect function? */
86    }
87    DiSym;
88 
89 /* --------------------- SRCLOCS --------------------- */
90 
91 /* Line count at which overflow happens, due to line numbers being
92    stored as shorts in `struct nlist' in a.out.h. */
93 #define LINENO_OVERFLOW (1 << (sizeof(short) * 8))
94 
95 #define LINENO_BITS     20
96 #define LOC_SIZE_BITS  (32 - LINENO_BITS)
97 #define MAX_LINENO     ((1 << LINENO_BITS) - 1)
98 
99 /* Unlikely to have any lines with instruction ranges > 4096 bytes */
100 #define MAX_LOC_SIZE   ((1 << LOC_SIZE_BITS) - 1)
101 
102 /* Number used to detect line number overflows; if one line is
103    60000-odd smaller than the previous, it was probably an overflow.
104  */
105 #define OVERFLOW_DIFFERENCE     (LINENO_OVERFLOW - 5000)
106 
107 /* Filename and Dirname pair. FnDn are stored in di->fndnpool
108    and are allocated using VG_(allocFixedEltDedupPA).
109    The filename/dirname strings are themselves stored in di->strpool. */
110 typedef
111    struct {
112       const HChar* filename;     /* source filename */
113       const HChar* dirname;      /* source directory name */
114    } FnDn;
115 
116 /* A structure to hold addr-to-source info for a single line.  There
117   can be a lot of these, hence the dense packing. */
118 typedef
119    struct {
120       /* Word 1 */
121       Addr   addr;               /* lowest address for this line */
122       /* Word 2 */
123       UShort size:LOC_SIZE_BITS; /* # bytes; we catch overflows of this */
124       UInt   lineno:LINENO_BITS; /* source line number, or zero */
125    }
126    DiLoc;
127 
128 #define LEVEL_BITS  (32 - LINENO_BITS)
129 #define MAX_LEVEL     ((1 << LEVEL_BITS) - 1)
130 
131 /* A structure to hold addr-to-inlined fn info.  There
132    can be a lot of these, hence the dense packing.
133    Only caller source filename and lineno are stored.
134    Handling dirname should be done using fndn_ix technique
135    similar to  ML_(addLineInfo). */
136 typedef
137    struct {
138       /* Word 1 */
139       Addr   addr_lo;            /* lowest address for inlined fn */
140       /* Word 2 */
141       Addr   addr_hi;            /* highest address following the inlined fn */
142       /* Word 3 */
143       const HChar* inlinedfn;    /* inlined function name */
144       /* Word 4 and 5 */
145       UInt   fndn_ix;            /* index in di->fndnpool of caller source
146                                     dirname/filename */
147       UInt   lineno:LINENO_BITS; /* caller line number */
148       UShort level:LEVEL_BITS;   /* level of inlining */
149    }
150    DiInlLoc;
151 
152 /* --------------------- CF INFO --------------------- */
153 
154 /* DiCfSI: a structure to summarise DWARF2/3 CFA info for the code
155    address range [base .. base+len-1].
156 
157    On x86 and amd64 ("IA"), if you know ({e,r}sp, {e,r}bp, {e,r}ip) at
158    some point and {e,r}ip is in the range [base .. base+len-1], it
159    tells you how to calculate ({e,r}sp, {e,r}bp) for the caller of the
160    current frame and also ra, the return address of the current frame.
161 
162    First off, calculate CFA, the Canonical Frame Address, thusly:
163 
164      cfa = case cfa_how of
165               CFIC_IA_SPREL -> {e,r}sp + cfa_off
166               CFIC_IA_BPREL -> {e,r}bp + cfa_off
167               CFIC_EXPR     -> expr whose index is in cfa_off
168 
169    Once that is done, the previous frame's {e,r}sp/{e,r}bp values and
170    this frame's {e,r}ra value can be calculated like this:
171 
172      old_{e,r}sp/{e,r}bp/ra
173          = case {e,r}sp/{e,r}bp/ra_how of
174               CFIR_UNKNOWN   -> we don't know, sorry
175               CFIR_SAME      -> same as it was before (sp/fp only)
176               CFIR_CFAREL    -> cfa + sp/bp/ra_off
177               CFIR_MEMCFAREL -> *( cfa + sp/bp/ra_off )
178               CFIR_EXPR      -> expr whose index is in sp/bp/ra_off
179 
180    On ARM it's pretty much the same, except we have more registers to
181    keep track of:
182 
183      cfa = case cfa_how of
184               CFIC_ARM_R13REL -> r13 + cfa_off
185               CFIC_ARM_R12REL -> r12 + cfa_off
186               CFIC_ARM_R11REL -> r11 + cfa_off
187               CFIC_ARM_R7REL  -> r7  + cfa_off
188               CFIR_EXPR       -> expr whose index is in cfa_off
189 
190      old_r14/r13/r12/r11/r7/ra
191          = case r14/r13/r12/r11/r7/ra_how of
192               CFIR_UNKNOWN   -> we don't know, sorry
193               CFIR_SAME      -> same as it was before (r14/r13/r12/r11/r7 only)
194               CFIR_CFAREL    -> cfa + r14/r13/r12/r11/r7/ra_off
195               CFIR_MEMCFAREL -> *( cfa + r14/r13/r12/r11/r7/ra_off )
196               CFIR_EXPR      -> expr whose index is in r14/r13/r12/r11/r7/ra_off
197 
198    On ARM64:
199 
200      cfa = case cfa_how of
201               CFIC_ARM64_SPREL  -> sp + cfa_off
202               CFIC_ARM64_X29REL -> x29 + cfa_off
203               CFIC_EXPR         -> expr whose index is in cfa_off
204 
205      old_sp/x30/x29/ra
206          = case sp/x30/x29/ra_how of
207               CFIR_UNKNOWN   -> we don't know, sorry
208               CFIR_SAME      -> same as it was before
209               CFIR_CFAREL    -> cfa + sp/x30/x29/ra_how
210               CFIR_MEMCFAREL -> *( cfa + sp/x30/x29/ra_how )
211               CFIR_EXPR      -> expr whose index is in sp/x30/x29/ra_off
212 
213    On s390x we have a similar logic as x86 or amd64. We need the stack pointer
214    (r15), the frame pointer r11 (like BP) and together with the instruction
215    address in the PSW we can calculate the previous values:
216      cfa = case cfa_how of
217               CFIC_IA_SPREL -> r15 + cfa_off
218               CFIC_IA_BPREL -> r11 + cfa_off
219               CFIC_EXPR     -> expr whose index is in cfa_off
220 
221      old_sp/fp/ra
222          = case sp/fp/ra_how of
223               CFIR_UNKNOWN   -> we don't know, sorry
224               CFIR_SAME      -> same as it was before (sp/fp only)
225               CFIR_CFAREL    -> cfa + sp/fp/ra_off
226               CFIR_MEMCFAREL -> *( cfa + sp/fp/ra_off )
227               CFIR_EXPR      -> expr whose index is in sp/fp/ra_off
228 */
229 
230 #define CFIC_IA_SPREL     ((UChar)1)
231 #define CFIC_IA_BPREL     ((UChar)2)
232 #define CFIC_ARM_R13REL   ((UChar)3)
233 #define CFIC_ARM_R12REL   ((UChar)4)
234 #define CFIC_ARM_R11REL   ((UChar)5)
235 #define CFIC_ARM_R7REL    ((UChar)6)
236 #define CFIC_ARM64_SPREL  ((UChar)7)
237 #define CFIC_ARM64_X29REL ((UChar)8)
238 #define CFIC_EXPR         ((UChar)9)  /* all targets */
239 
240 #define CFIR_UNKNOWN      ((UChar)64)
241 #define CFIR_SAME         ((UChar)65)
242 #define CFIR_CFAREL       ((UChar)66)
243 #define CFIR_MEMCFAREL    ((UChar)67)
244 #define CFIR_EXPR         ((UChar)68)
245 
246 /* Definition of the DiCfSI_m DiCfSI machine dependent part.
247    These are highly duplicated, and are stored in a pool. */
248 #if defined(VGA_x86) || defined(VGA_amd64)
249 typedef
250    struct {
251       UChar cfa_how; /* a CFIC_IA value */
252       UChar ra_how;  /* a CFIR_ value */
253       UChar sp_how;  /* a CFIR_ value */
254       UChar bp_how;  /* a CFIR_ value */
255       Int   cfa_off;
256       Int   ra_off;
257       Int   sp_off;
258       Int   bp_off;
259    }
260    DiCfSI_m;
261 #elif defined(VGA_arm)
262 typedef
263    struct {
264       UChar cfa_how; /* a CFIC_ value */
265       UChar ra_how;  /* a CFIR_ value */
266       UChar r14_how; /* a CFIR_ value */
267       UChar r13_how; /* a CFIR_ value */
268       UChar r12_how; /* a CFIR_ value */
269       UChar r11_how; /* a CFIR_ value */
270       UChar r7_how;  /* a CFIR_ value */
271       Int   cfa_off;
272       Int   ra_off;
273       Int   r14_off;
274       Int   r13_off;
275       Int   r12_off;
276       Int   r11_off;
277       Int   r7_off;
278       // If you add additional fields, don't forget to update the
279       // initialisation of this in readexidx.c accordingly.
280    }
281    DiCfSI_m;
282 #elif defined(VGA_arm64)
283 typedef
284    struct {
285       UChar cfa_how; /* a CFIC_ value */
286       UChar ra_how;  /* a CFIR_ value */
287       UChar sp_how;  /* a CFIR_ value */ /*dw31=SP*/
288       UChar x30_how; /* a CFIR_ value */ /*dw30=LR*/
289       UChar x29_how; /* a CFIR_ value */ /*dw29=FP*/
290       Int   cfa_off;
291       Int   ra_off;
292       Int   sp_off;
293       Int   x30_off;
294       Int   x29_off;
295    }
296    DiCfSI_m;
297 #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
298 /* Just have a struct with the common fields in, so that code that
299    processes the common fields doesn't have to be ifdef'd against
300    VGP_/VGA_ symbols.  These are not used in any way on ppc32/64-linux
301    at the moment. */
302 typedef
303    struct {
304       UChar cfa_how; /* a CFIC_ value */
305       UChar ra_how;  /* a CFIR_ value */
306       Int   cfa_off;
307       Int   ra_off;
308    }
309    DiCfSI_m;
310 #elif defined(VGA_s390x)
311 typedef
312    struct {
313       UChar cfa_how; /* a CFIC_ value */
314       UChar sp_how;  /* a CFIR_ value */
315       UChar ra_how;  /* a CFIR_ value */
316       UChar fp_how;  /* a CFIR_ value */
317       Int   cfa_off;
318       Int   sp_off;
319       Int   ra_off;
320       Int   fp_off;
321    }
322    DiCfSI_m;
323 #elif defined(VGA_mips32) || defined(VGA_mips64)
324 typedef
325    struct {
326       UChar cfa_how; /* a CFIC_ value */
327       UChar ra_how;  /* a CFIR_ value */
328       UChar sp_how;  /* a CFIR_ value */
329       UChar fp_how;  /* a CFIR_ value */
330       Int   cfa_off;
331       Int   ra_off;
332       Int   sp_off;
333       Int   fp_off;
334    }
335    DiCfSI_m;
336 #elif defined(VGA_tilegx)
337 typedef
338    struct {
339       UChar cfa_how; /* a CFIC_IA value */
340       UChar ra_how;  /* a CFIR_ value */
341       UChar sp_how;  /* a CFIR_ value */
342       UChar fp_how;  /* a CFIR_ value */
343       Int   cfa_off;
344       Int   ra_off;
345       Int   sp_off;
346       Int   fp_off;
347    }
348    DiCfSI_m;
349 #else
350 #  error "Unknown arch"
351 #endif
352 
353 typedef
354    struct {
355       Addr  base;
356       UInt  len;
357       UInt  cfsi_m_ix;
358    }
359    DiCfSI;
360 
361 typedef
362    enum {
363       Cunop_Abs=0x231,
364       Cunop_Neg,
365       Cunop_Not
366    }
367    CfiUnop;
368 
369 typedef
370    enum {
371       Cbinop_Add=0x321,
372       Cbinop_Sub,
373       Cbinop_And,
374       Cbinop_Mul,
375       Cbinop_Shl,
376       Cbinop_Shr,
377       Cbinop_Eq,
378       Cbinop_Ge,
379       Cbinop_Gt,
380       Cbinop_Le,
381       Cbinop_Lt,
382       Cbinop_Ne
383    }
384    CfiBinop;
385 
386 typedef
387    enum {
388       Creg_INVALID=0x213,
389       Creg_IA_SP,
390       Creg_IA_BP,
391       Creg_IA_IP,
392       Creg_ARM_R13,
393       Creg_ARM_R12,
394       Creg_ARM_R15,
395       Creg_ARM_R14,
396       Creg_ARM_R7,
397       Creg_ARM64_X30,
398       Creg_S390_IA,
399       Creg_S390_SP,
400       Creg_S390_FP,
401       Creg_S390_LR,
402       Creg_MIPS_RA,
403       Creg_TILEGX_IP,
404       Creg_TILEGX_SP,
405       Creg_TILEGX_BP,
406       Creg_TILEGX_LR
407    }
408    CfiReg;
409 
410 typedef
411    enum {
412       Cex_Undef=0x123,
413       Cex_Deref,
414       Cex_Const,
415       Cex_Unop,
416       Cex_Binop,
417       Cex_CfiReg,
418       Cex_DwReg
419    }
420    CfiExprTag;
421 
422 typedef
423    struct {
424       CfiExprTag tag;
425       union {
426          struct {
427          } Undef;
428          struct {
429             Int ixAddr;
430          } Deref;
431          struct {
432             UWord con;
433          } Const;
434          struct {
435             CfiUnop op;
436             Int ix;
437          } Unop;
438          struct {
439             CfiBinop op;
440             Int ixL;
441             Int ixR;
442          } Binop;
443          struct {
444             CfiReg reg;
445          } CfiReg;
446          struct {
447             Int reg;
448          } DwReg;
449       }
450       Cex;
451    }
452    CfiExpr;
453 
454 extern Int ML_(CfiExpr_Undef) ( XArray* dst );
455 extern Int ML_(CfiExpr_Deref) ( XArray* dst, Int ixAddr );
456 extern Int ML_(CfiExpr_Const) ( XArray* dst, UWord con );
457 extern Int ML_(CfiExpr_Unop)  ( XArray* dst, CfiUnop op, Int ix );
458 extern Int ML_(CfiExpr_Binop) ( XArray* dst, CfiBinop op, Int ixL, Int ixR );
459 extern Int ML_(CfiExpr_CfiReg)( XArray* dst, CfiReg reg );
460 extern Int ML_(CfiExpr_DwReg) ( XArray* dst, Int reg );
461 
462 extern void ML_(ppCfiExpr)( const XArray* src, Int ix );
463 
464 /* ---------------- FPO INFO (Windows PE) -------------- */
465 
466 /* for apps using Wine: MSVC++ PDB FramePointerOmitted: somewhat like
467    a primitive CFI */
468 typedef
469    struct _FPO_DATA {  /* 16 bytes */
470       UInt   ulOffStart; /* offset of 1st byte of function code */
471       UInt   cbProcSize; /* # bytes in function */
472       UInt   cdwLocals;  /* # bytes/4 in locals */
473       UShort cdwParams;  /* # bytes/4 in params */
474       UChar  cbProlog;   /* # bytes in prolog */
475       UChar  cbRegs :3;  /* # regs saved */
476       UChar  fHasSEH:1;  /* Structured Exception Handling */
477       UChar  fUseBP :1;  /* EBP has been used */
478       UChar  reserved:1;
479       UChar  cbFrame:2;  /* frame type */
480    }
481    FPO_DATA;
482 
483 #define PDB_FRAME_FPO  0
484 #define PDB_FRAME_TRAP 1
485 #define PDB_FRAME_TSS  2
486 
487 /* --------------------- VARIABLES --------------------- */
488 
489 typedef
490    struct {
491       Addr    aMin;
492       Addr    aMax;
493       XArray* /* of DiVariable */ vars;
494    }
495    DiAddrRange;
496 
497 typedef
498    struct {
499       const  HChar* name;  /* in DebugInfo.strpool */
500       UWord  typeR; /* a cuOff */
501       const GExpr* gexpr; /* on DebugInfo.gexprs list */
502       const GExpr* fbGX;  /* SHARED. */
503       UInt   fndn_ix; /* where declared; may be zero. index
504                          in DebugInfo.fndnpool */
505       Int    lineNo;   /* where declared; may be zero. */
506    }
507    DiVariable;
508 
509 Word
510 ML_(cmp_for_DiAddrRange_range) ( const void* keyV, const void* elemV );
511 
512 /* --------------------- DEBUGINFO --------------------- */
513 
514 /* This is the top-level data type.  It's a structure which contains
515    information pertaining to one mapped ELF object.  This type is
516    exported only abstractly - in pub_tool_debuginfo.h. */
517 
518 /* First though, here's an auxiliary data structure.  It is only ever
519    used as part of a struct _DebugInfo.  We use it to record
520    observations about mappings and permission changes to the
521    associated file, so as to decide when to read debug info.  It's
522    essentially an ultra-trivial finite state machine which, when it
523    reaches an accept state, signals that we should now read debug info
524    from the object into the associated struct _DebugInfo.  The accept
525    state is arrived at when have_rx_map and have_rw_map both become
526    true.  The initial state is one in which we have no observations,
527    so have_rx_map and have_rw_map are both false.
528 
529    This all started as a rather ad-hoc solution, but was further
530    expanded to handle weird object layouts, e.g. more than one rw
531    or rx mapping for one binary.
532 
533    The normal sequence of events is one of
534 
535    start  -->  r-x mapping  -->  rw- mapping  -->  accept
536    start  -->  rw- mapping  -->  r-x mapping  -->  accept
537 
538    that is, take the first r-x and rw- mapping we see, and we're done.
539 
540    On MacOSX >= 10.7, 32-bit, there appears to be a new variant:
541 
542    start  -->  r-- mapping  -->  rw- mapping
543           -->  upgrade r-- mapping to r-x mapping  -->  accept
544 
545    where the upgrade is done by a call to mach_vm_protect (OSX 10.7)
546    or kernelrpc_mach_vm_protect_trap (OSX 10.9 and possibly 10.8).
547    Hence we need to also track this possibility.
548 
549    From perusal of dyld sources, it appears that this scheme could
550    also be used 64 bit libraries, although that doesn't seem to happen
551    in practice.  dyld uses this scheme when the text section requires
552    relocation, which only appears to be the case for 32 bit objects.
553 */
554 
555 typedef struct
556 {
557    Addr  avma; /* these fields record the file offset, length */
558    SizeT size; /* and map address of each mapping             */
559    OffT  foff;
560    Bool  rx, rw, ro;  /* memory access flags for this mapping */
561 } DebugInfoMapping;
562 
563 struct _DebugInfoFSM
564 {
565    HChar*  filename;  /* in mallocville (VG_AR_DINFO)               */
566    HChar*  dbgname;   /* in mallocville (VG_AR_DINFO)               */
567    XArray* maps;      /* XArray of DebugInfoMapping structs         */
568    Bool  have_rx_map; /* did we see a r?x mapping yet for the file? */
569    Bool  have_rw_map; /* did we see a rw? mapping yet for the file? */
570    Bool  have_ro_map; /* did we see a r-- mapping yet for the file? */
571 };
572 
573 
574 /* To do with the string table in struct _DebugInfo (::strpool) */
575 #define SEGINFO_STRPOOLSIZE (64*1024)
576 
577 
578 /* We may encounter more than one .eh_frame section in an object --
579    unusual but apparently allowed by ELF.  See
580    http://sourceware.org/bugzilla/show_bug.cgi?id=12675
581 */
582 #define N_EHFRAME_SECTS 2
583 
584 
585 /* So, the main structure for holding debug info for one object. */
586 
587 struct _DebugInfo {
588 
589    /* Admin stuff */
590 
591    struct _DebugInfo* next;   /* list of DebugInfos */
592    Bool               mark;   /* marked for deletion? */
593 
594    /* An abstract handle, which can be used by entities outside of
595       m_debuginfo to (in an abstract datatype sense) refer to this
596       struct _DebugInfo.  A .handle of zero is invalid; valid handles
597       are 1 and above.  The same handle is never issued twice (in any
598       given run of Valgrind), so a handle becomes invalid when the
599       associated struct _DebugInfo is discarded, and remains invalid
600       forever thereafter.  The .handle field is set as soon as this
601       structure is allocated. */
602    ULong handle;
603 
604    /* Used for debugging only - indicate what stuff to dump whilst
605       reading stuff into the seginfo.  Are computed as early in the
606       lifetime of the DebugInfo as possible -- at the point when it is
607       created.  Use these when deciding what to spew out; do not use
608       the global VG_(clo_blah) flags. */
609 
610    Bool trace_symtab; /* symbols, our style */
611    Bool trace_cfi;    /* dwarf frame unwind, our style */
612    Bool ddump_syms;   /* mimic /usr/bin/readelf --syms */
613    Bool ddump_line;   /* mimic /usr/bin/readelf --debug-dump=line */
614    Bool ddump_frames; /* mimic /usr/bin/readelf --debug-dump=frames */
615 
616    /* The "decide when it is time to read debuginfo" state machine.
617       This structure must get filled in before we can start reading
618       anything from the ELF/MachO file.  This structure is filled in
619       by VG_(di_notify_mmap) and its immediate helpers. */
620    struct _DebugInfoFSM fsm;
621 
622    /* Once the ::fsm has reached an accept state -- typically, when
623       both a rw? and r?x mapping for .filename have been observed --
624       we can go on to read the symbol tables and debug info.
625       .have_dinfo changes from False to True when the debug info has
626       been completely read in and postprocessed (canonicalised) and is
627       now suitable for querying. */
628    /* If have_dinfo is False, then all fields below this point are
629       invalid and should not be consulted. */
630    Bool  have_dinfo; /* initially False */
631 
632    /* All the rest of the fields in this structure are filled in once
633       we have committed to reading the symbols and debug info (that
634       is, at the point where .have_dinfo is set to True). */
635 
636    /* The file's soname. */
637    HChar* soname;
638 
639    /* Description of some important mapped segments.  The presence or
640       absence of the mapping is denoted by the _present field, since
641       in some obscure circumstances (to do with data/sdata/bss) it is
642       possible for the mapping to be present but have zero size.
643       Certainly text_ is mandatory on all platforms; not sure about
644       the rest though.
645 
646       --------------------------------------------------------
647 
648       Comment_on_IMPORTANT_CFSI_REPRESENTATIONAL_INVARIANTS: we require that
649 
650       either (size of all rx maps == 0 && cfsi == NULL) (the degenerate case)
651 
652       or the normal case, which is the AND of the following:
653       (0) size of at least one rx mapping > 0
654       (1) no two DebugInfos with some rx mapping of size > 0
655           have overlapping rx mappings
656       (2) [cfsi_minavma,cfsi_maxavma] does not extend beyond
657           [avma,+size) of one rx mapping; that is, the former
658           is a subrange or equal to the latter.
659       (3) all DiCfSI in the cfsi array all have ranges that fall within
660           [avma,+size) of that rx mapping.
661       (4) all DiCfSI in the cfsi array are non-overlapping
662 
663       The cumulative effect of these restrictions is to ensure that
664       all the DiCfSI records in the entire system are non overlapping.
665       Hence any address falls into either exactly one DiCfSI record,
666       or none.  Hence it is safe to cache the results of searches for
667       DiCfSI records.  This is the whole point of these restrictions.
668       The caching of DiCfSI searches is done in VG_(use_CF_info).  The
669       cache is flushed after any change to debugInfo_list.  DiCfSI
670       searches are cached because they are central to stack unwinding
671       on amd64-linux.
672 
673       Where are these invariants imposed and checked?
674 
675       They are checked after a successful read of debuginfo into
676       a DebugInfo*, in check_CFSI_related_invariants.
677 
678       (1) is not really imposed anywhere.  We simply assume that the
679       kernel will not map the text segments from two different objects
680       into the same space.  Sounds reasonable.
681 
682       (2) follows from (4) and (3).  It is ensured by canonicaliseCFI.
683       (3) is ensured by ML_(addDiCfSI).
684       (4) is ensured by canonicaliseCFI.
685 
686       --------------------------------------------------------
687 
688       Comment_on_DEBUG_SVMA_and_DEBUG_BIAS_fields:
689 
690       The _debug_{svma,bias} fields were added as part of a fix to
691       #185816.  The problem encompassed in that bug report was that it
692       wasn't correct to use apply the bias values deduced for a
693       primary object to its associated debuginfo object, because the
694       debuginfo object (or the primary) could have been prelinked to a
695       different SVMA.  Hence debuginfo and primary objects need to
696       have their own biases.
697 
698       ------ JRS: (referring to r9329): ------
699       Let me see if I understand the workings correctly.  Initially
700       the _debug_ values are set to the same values as the "normal"
701       ones, as there's a bunch of bits of code like this (in
702       readelf.c)
703 
704          di->text_svma = svma;
705          ...
706          di->text_bias = rx_bias;
707          di->text_debug_svma = svma;
708          di->text_debug_bias = rx_bias;
709 
710       If a debuginfo object subsequently shows up then the
711       _debug_svma/bias are set for the debuginfo object.  Result is
712       that if there's no debuginfo object then the values are the same
713       as the primary-object values, and if there is a debuginfo object
714       then they will (or at least may) be different.
715 
716       Then when we need to actually bias something, we'll have to
717       decide whether to use the primary bias or the debuginfo bias.
718       And the strategy is to use the primary bias for ELF symbols but
719       the debuginfo bias for anything pulled out of Dwarf.
720 
721       ------ THH: ------
722       Correct - the debug_svma and bias values apply to any address
723       read from the debug data regardless of where that debug data is
724       stored and the other values are used for addresses from other
725       places (primarily the symbol table).
726 
727       ------ JRS: ------
728       Ok; so this was my only area of concern.  Are there any
729       corner-case scenarios where this wouldn't be right?  It sounds
730       like we're assuming the ELF symbols come from the primary object
731       and, if there is a debug object, then all the Dwarf comes from
732       there.  But what if (eg) both symbols and Dwarf come from the
733       debug object?  Is that even possible or allowable?
734 
735       ------ THH: ------
736       You may have a point...
737 
738       The current logic is to try and take any one set of data from
739       either the base object or the debug object. There are four sets
740       of data we consider:
741 
742          - Symbol Table
743          - Stabs
744          - DWARF1
745          - DWARF2
746 
747       If we see the primary section for a given set in the base object
748       then we ignore all sections relating to that set in the debug
749       object.
750 
751       Now in principle if we saw a secondary section (like debug_line
752       say) in the base object, but not the main section (debug_info in
753       this case) then we would take debug_info from the debug object
754       but would use the debug_line from the base object unless we saw
755       a replacement copy in the debug object. That's probably unlikely
756       however.
757 
758       A bigger issue might be, as you say, the symbol table as we will
759       pick that up from the debug object if it isn't in the base. The
760       dynamic symbol table will always have to be in the base object
761       though so we will have to be careful when processing symbols to
762       know which table we are reading in that case.
763 
764       What we probably need to do is tell read_elf_symtab which object
765       the symbols it is being asked to read came from.
766 
767       (A followup patch to deal with this was committed in r9469).
768    */
769    /* .text */
770    Bool     text_present;
771    Addr     text_avma;
772    Addr     text_svma;
773    SizeT    text_size;
774    PtrdiffT text_bias;
775    Addr     text_debug_svma;
776    PtrdiffT text_debug_bias;
777    /* .data */
778    Bool     data_present;
779    Addr     data_svma;
780    Addr     data_avma;
781    SizeT    data_size;
782    PtrdiffT data_bias;
783    Addr     data_debug_svma;
784    PtrdiffT data_debug_bias;
785    /* .sdata */
786    Bool     sdata_present;
787    Addr     sdata_svma;
788    Addr     sdata_avma;
789    SizeT    sdata_size;
790    PtrdiffT sdata_bias;
791    Addr     sdata_debug_svma;
792    PtrdiffT sdata_debug_bias;
793    /* .rodata */
794    Bool     rodata_present;
795    Addr     rodata_svma;
796    Addr     rodata_avma;
797    SizeT    rodata_size;
798    PtrdiffT rodata_bias;
799    Addr     rodata_debug_svma;
800    PtrdiffT rodata_debug_bias;
801    /* .bss */
802    Bool     bss_present;
803    Addr     bss_svma;
804    Addr     bss_avma;
805    SizeT    bss_size;
806    PtrdiffT bss_bias;
807    Addr     bss_debug_svma;
808    PtrdiffT bss_debug_bias;
809    /* .sbss */
810    Bool     sbss_present;
811    Addr     sbss_svma;
812    Addr     sbss_avma;
813    SizeT    sbss_size;
814    PtrdiffT sbss_bias;
815    Addr     sbss_debug_svma;
816    PtrdiffT sbss_debug_bias;
817    /* .ARM.exidx -- sometimes present on arm32, containing unwind info. */
818    Bool     exidx_present;
819    Addr     exidx_avma;
820    Addr     exidx_svma;
821    SizeT    exidx_size;
822    PtrdiffT exidx_bias;
823    /* .ARM.extab -- sometimes present on arm32, containing unwind info. */
824    Bool     extab_present;
825    Addr     extab_avma;
826    Addr     extab_svma;
827    SizeT    extab_size;
828    PtrdiffT extab_bias;
829    /* .plt */
830    Bool   plt_present;
831    Addr	  plt_avma;
832    SizeT  plt_size;
833    /* .got */
834    Bool   got_present;
835    Addr   got_avma;
836    SizeT  got_size;
837    /* .got.plt */
838    Bool   gotplt_present;
839    Addr   gotplt_avma;
840    SizeT  gotplt_size;
841    /* .opd -- needed on ppc64be-linux for finding symbols */
842    Bool   opd_present;
843    Addr   opd_avma;
844    SizeT  opd_size;
845    /* .ehframe -- needed on amd64-linux for stack unwinding.  We might
846       see more than one, hence the arrays. */
847    UInt   n_ehframe;  /* 0 .. N_EHFRAME_SECTS */
848    Addr   ehframe_avma[N_EHFRAME_SECTS];
849    SizeT  ehframe_size[N_EHFRAME_SECTS];
850 
851    /* Sorted tables of stuff we snarfed from the file.  This is the
852       eventual product of reading the debug info.  All this stuff
853       lives in VG_AR_DINFO. */
854 
855    /* An expandable array of symbols. */
856    DiSym*  symtab;
857    UWord   symtab_used;
858    UWord   symtab_size;
859    /* Two expandable arrays, storing locations and their filename/dirname. */
860    DiLoc*  loctab;
861    UInt    sizeof_fndn_ix;  /* Similar use as sizeof_cfsi_m_ix below. */
862    void*   loctab_fndn_ix;  /* loctab[i] filename/dirname is identified by
863                                loctab_fnindex_ix[i] (an index in di->fndnpool)
864                                0 means filename/dirname unknown.
865                                The void* is an UChar* or UShort* or UInt*
866                                depending on sizeof_fndn_ix. */
867    UWord   loctab_used;
868    UWord   loctab_size;
869    /* An expandable array of inlined fn info.
870       maxinl_codesz is the biggest inlined piece of code
871       in inltab (i.e. the max of 'addr_hi - addr_lo'. */
872    DiInlLoc* inltab;
873    UWord   inltab_used;
874    UWord   inltab_size;
875    SizeT   maxinl_codesz;
876 
877    /* A set of expandable arrays to store CFI summary info records.
878       The machine specific information (i.e. the DiCfSI_m struct)
879       are stored in cfsi_m_pool, as these are highly duplicated.
880       The DiCfSI_m are allocated in cfsi_m_pool and identified using
881       a (we hope) small integer : often one byte is enough, sometimes
882       2 bytes are needed.
883 
884       cfsi_base contains the bases of the code address ranges.
885       cfsi_size is the size of the cfsi_base array.
886       The elements cfsi_base[0] till cfsi_base[cfsi_used-1] are used.
887       Following elements are not used (yet).
888 
889       For each base in cfsi_base, an index into cfsi_m_pool is stored
890       in cfsi_m_ix array. The size of cfsi_m_ix is equal to
891       cfsi_size*sizeof_cfsi_m_ix. The used portion of cfsi_m_ix is
892       cfsi_m_ix[0] till cfsi_m_ix[(cfsi_used-1)*sizeof_cfsi_m_ix].
893 
894       cfsi_base[i] gives the base address of a code range covered by
895       some CF Info. The corresponding CF Info is identified by an index
896       in cfsi_m_pool. The DiCfSI_m index in cfsi_m_pool corresponding to
897       cfsi_base[i] is given
898         by ((UChar*) cfsi_m_ix)[i] if sizeof_cfsi_m_ix == 1
899         by ((UShort*)cfsi_m_ix)[i] if sizeof_cfsi_m_ix == 2
900         by ((UInt*)  cfsi_m_ix)[i] if sizeof_cfsi_m_ix == 4.
901 
902       The end of the code range starting at cfsi_base[i] is given by
903       cfsi_base[i+1]-1 (or cfsi_maxavma for  cfsi_base[cfsi_used-1]).
904       Some code ranges between cfsi_minavma and cfsi_maxavma might not
905       be covered by cfi information. Such not covered ranges are stored by
906       a base in cfsi_base and a corresponding 0 index in cfsi_m_ix.
907 
908       A variable size representation has been chosen for the elements of
909       cfsi_m_ix as in many case, one byte is good enough. For big
910       objects, 2 bytes are needed. No object has yet been found where
911       4 bytes are needed (but the code is ready to handle this case).
912       Not covered ranges ('cfi holes') are stored explicitely in
913       cfsi_base/cfsi_m_ix as this is more memory efficient than storing
914       a length for each covered range : on x86 or amd64, we typically have
915       a hole every 8 covered ranges. On arm64, we have very few holes
916       (1 every 50 or 100 ranges).
917 
918       The cfsi information is read and prepared in the cfsi_rd array.
919       Once all the information has been read, the cfsi_base and cfsi_m_ix
920       arrays will be filled in from cfsi_rd. cfsi_rd will then be freed.
921       This is all done by ML_(finish_CFSI_arrays).
922 
923       Also includes summary address bounds, showing the min and max address
924       covered by any of the records, as an aid to fast searching.  And, if the
925       records require any expression nodes, they are stored in
926       cfsi_exprs. */
927    Addr* cfsi_base;
928    UInt  sizeof_cfsi_m_ix; /* size in byte of indexes stored in cfsi_m_ix. */
929    void* cfsi_m_ix; /* Each index occupies sizeof_cfsi_m_ix bytes.
930                        The void* is an UChar* or UShort* or UInt*
931                        depending on sizeof_cfsi_m_ix.  */
932 
933    DiCfSI* cfsi_rd; /* Only used during reading, NULL once info is read. */
934 
935    UWord   cfsi_used;
936    UWord   cfsi_size;
937 
938    DedupPoolAlloc *cfsi_m_pool;
939    Addr    cfsi_minavma;
940    Addr    cfsi_maxavma;
941    XArray* cfsi_exprs; /* XArray of CfiExpr */
942 
943    /* Optimized code under Wine x86: MSVC++ PDB FramePointerOmitted
944       data.  Non-expandable array, hence .size == .used. */
945    FPO_DATA* fpo;
946    UWord     fpo_size;
947    Addr      fpo_minavma;
948    Addr      fpo_maxavma;
949    Addr      fpo_base_avma;
950 
951    /* Pool of strings -- the string table.  Pointers
952       into this are stable (the memory is not reallocated). */
953    DedupPoolAlloc *strpool;
954 
955    /* Pool of FnDn -- filename and dirname.
956       Elements in the pool are allocated using VG_(allocFixedEltDedupPA). */
957    DedupPoolAlloc *fndnpool;
958 
959    /* Variable scope information, as harvested from Dwarf3 files.
960 
961       In short it's an
962 
963          array of (array of PC address ranges and variables)
964 
965       The outer array indexes over scopes, with Entry 0 containing
966       information on variables which exist for any value of the program
967       counter (PC) -- that is, the outermost scope.  Entries 1, 2, 3,
968       etc contain information on increasinly deeply nested variables.
969 
970       Each inner array is an array of (an address range, and a set
971       of variables that are in scope over that address range).
972 
973       The address ranges may not overlap.
974 
975       Since Entry 0 in the outer array holds information on variables
976       that exist for any value of the PC (that is, global vars), it
977       follows that Entry 0's inner array can only have one address
978       range pair, one that covers the entire address space.
979    */
980    XArray* /* of OSet of DiAddrRange */varinfo;
981 
982    /* These are arrays of the relevant typed objects, held here
983       partially for the purposes of visiting each object exactly once
984       when we need to delete them. */
985 
986    /* An array of TyEnts.  These are needed to make sense of any types
987       in the .varinfo.  Also, when deleting this DebugInfo, we must
988       first traverse this array and throw away malloc'd stuff hanging
989       off it -- by calling ML_(TyEnt__make_EMPTY) on each entry. */
990    XArray* /* of TyEnt */ admin_tyents;
991 
992    /* An array of guarded DWARF3 expressions. */
993    XArray* admin_gexprs;
994 
995    /* Cached last rx mapping matched and returned by ML_(find_rx_mapping).
996       This helps performance a lot during ML_(addLineInfo) etc., which can
997       easily be invoked hundreds of thousands of times. */
998    DebugInfoMapping* last_rx_map;
999 };
1000 
1001 /* --------------------- functions --------------------- */
1002 
1003 /* ------ Adding ------ */
1004 
1005 /* Add a symbol to si's symbol table.  The contents of 'sym' are
1006    copied.  It is assumed (and checked) that 'sym' only contains one
1007    name, so there is no auxiliary ::sec_names vector to duplicate.
1008    IOW, the copy is a shallow copy, and there are assertions in place
1009    to ensure that's OK. */
1010 extern void ML_(addSym) ( struct _DebugInfo* di, DiSym* sym );
1011 
1012 /* Add a filename/dirname pair to a DebugInfo and returns the index
1013    in the fndnpool fixed pool. */
1014 extern UInt ML_(addFnDn) (struct _DebugInfo* di,
1015                           const HChar* filename,
1016                           const HChar* dirname);  /* NULL is allowable */
1017 
1018 /* Returns the filename of the fndn pair identified by fndn_ix.
1019    Returns "???" if fndn_ix is 0. */
1020 extern const HChar* ML_(fndn_ix2filename) (const DebugInfo* di,
1021                                            UInt fndn_ix);
1022 
1023 /* Returns the dirname of the fndn pair identified by fndn_ix.
1024    Returns "" if fndn_ix is 0 or fndn->dirname is NULL. */
1025 extern const HChar* ML_(fndn_ix2dirname) (const DebugInfo* di,
1026                                           UInt fndn_ix);
1027 
1028 /* Returns the fndn_ix for the LineInfo locno in di->loctab.
1029    0 if filename/dirname are unknown. */
1030 extern UInt ML_(fndn_ix) (const DebugInfo* di, Word locno);
1031 
1032 /* Add a line-number record to a DebugInfo.
1033    fndn_ix is an index in di->fndnpool, allocated using  ML_(addFnDn).
1034    Give a 0 index for a unknown filename/dirname pair. */
1035 extern
1036 void ML_(addLineInfo) ( struct _DebugInfo* di,
1037                         UInt fndn_ix,
1038                         Addr this, Addr next, Int lineno, Int entry);
1039 
1040 /* Add a call inlined record to a DebugInfo.
1041    A call to the below means that inlinedfn code has been
1042    inlined, resulting in code from [addr_lo, addr_hi[.
1043    Note that addr_hi is excluded, i.e. is not part of the inlined code.
1044    fndn_ix and lineno identifies the location of the call that caused
1045    this inlining.
1046    fndn_ix is an index in di->fndnpool, allocated using  ML_(addFnDn).
1047    Give a 0 index for an unknown filename/dirname pair.
1048    In case of nested inlining, a small level indicates the call
1049    is closer to main that a call with a higher level. */
1050 extern
1051 void ML_(addInlInfo) ( struct _DebugInfo* di,
1052                        Addr addr_lo, Addr addr_hi,
1053                        const HChar* inlinedfn,
1054                        UInt fndn_ix,
1055                        Int lineno, UShort level);
1056 
1057 /* Add a CFI summary record.  The supplied DiCfSI_m is copied. */
1058 extern void ML_(addDiCfSI) ( struct _DebugInfo* di,
1059                              Addr base, UInt len, DiCfSI_m* cfsi_m );
1060 
1061 /* Given a position in the di->cfsi_base/cfsi_m_ix arrays, return
1062    the corresponding cfsi_m*. Return NULL if the position corresponds
1063    to a cfsi hole. */
1064 DiCfSI_m* ML_(get_cfsi_m) (const DebugInfo* di, UInt pos);
1065 
1066 /* Add a string to the string table of a DebugInfo.  If len==-1,
1067    ML_(addStr) will itself measure the length of the string. */
1068 extern const HChar* ML_(addStr) ( DebugInfo* di, const HChar* str, Int len );
1069 
1070 /* Add a string to the string table of a DebugInfo, by copying the
1071    string from the given DiCursor.  Measures the length of the string
1072    itself. */
1073 extern const HChar* ML_(addStrFromCursor)( DebugInfo* di, DiCursor c );
1074 
1075 extern void ML_(addVar)( struct _DebugInfo* di,
1076                          Int    level,
1077                          Addr   aMin,
1078                          Addr   aMax,
1079                          const  HChar* name,
1080                          UWord  typeR, /* a cuOff */
1081                          const GExpr* gexpr,
1082                          const GExpr* fbGX, /* SHARED. */
1083                          UInt   fndn_ix, /* where decl'd - may be zero */
1084                          Int    lineNo, /* where decl'd - may be zero */
1085                          Bool   show );
1086 /* Note: fndn_ix identifies a filename/dirname pair similarly to
1087    ML_(addInlInfo) and ML_(addLineInfo). */
1088 
1089 /* Canonicalise the tables held by 'di', in preparation for use.  Call
1090    this after finishing adding entries to these tables. */
1091 extern void ML_(canonicaliseTables) ( struct _DebugInfo* di );
1092 
1093 /* Canonicalise the call-frame-info table held by 'di', in preparation
1094    for use. This is called by ML_(canonicaliseTables) but can also be
1095    called on it's own to sort just this table. */
1096 extern void ML_(canonicaliseCFI) ( struct _DebugInfo* di );
1097 
1098 /* ML_(finish_CFSI_arrays) fills in the cfsi_base and cfsi_m_ix arrays
1099    from cfsi_rd array. cfsi_rd is then freed. */
1100 extern void ML_(finish_CFSI_arrays) ( struct _DebugInfo* di );
1101 
1102 /* ------ Searching ------ */
1103 
1104 /* Find a symbol-table index containing the specified pointer, or -1
1105    if not found.  Binary search.  */
1106 extern Word ML_(search_one_symtab) ( const DebugInfo* di, Addr ptr,
1107                                      Bool match_anywhere_in_sym,
1108                                      Bool findText );
1109 
1110 /* Find a location-table index containing the specified pointer, or -1
1111    if not found.  Binary search.  */
1112 extern Word ML_(search_one_loctab) ( const DebugInfo* di, Addr ptr );
1113 
1114 /* Find a CFI-table index containing the specified pointer, or -1 if
1115    not found.  Binary search.  */
1116 extern Word ML_(search_one_cfitab) ( const DebugInfo* di, Addr ptr );
1117 
1118 /* Find a FPO-table index containing the specified pointer, or -1
1119    if not found.  Binary search.  */
1120 extern Word ML_(search_one_fpotab) ( const DebugInfo* di, Addr ptr );
1121 
1122 /* Helper function for the most often needed searching for an rx
1123    mapping containing the specified address range.  The range must
1124    fall entirely within the mapping to be considered to be within it.
1125    Asserts if lo > hi; caller must ensure this doesn't happen. */
1126 extern DebugInfoMapping* ML_(find_rx_mapping) ( DebugInfo* di,
1127                                                 Addr lo, Addr hi );
1128 
1129 /* ------ Misc ------ */
1130 
1131 /* Show a non-fatal debug info reading error.  Use VG_(core_panic) for
1132    fatal errors.  'serious' errors are always shown, not 'serious' ones
1133    are shown only at verbosity level 2 and above. */
1134 extern
1135 void ML_(symerr) ( const DebugInfo* di, Bool serious, const HChar* msg );
1136 
1137 /* Print a symbol. */
1138 extern void ML_(ppSym) ( Int idx, const DiSym* sym );
1139 
1140 /* Print a call-frame-info summary. */
1141 extern void ML_(ppDiCfSI) ( const XArray* /* of CfiExpr */ exprs,
1142                             Addr base, UInt len,
1143                             const DiCfSI_m* si_m );
1144 
1145 
1146 #define TRACE_SYMTAB_ENABLED (di->trace_symtab)
1147 #define TRACE_SYMTAB(format, args...) \
1148    if (TRACE_SYMTAB_ENABLED) { VG_(printf)(format, ## args); }
1149 
1150 
1151 #endif /* ndef __PRIV_STORAGE_H */
1152 
1153 /*--------------------------------------------------------------------*/
1154 /*--- end                                                          ---*/
1155 /*--------------------------------------------------------------------*/
1156