• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*--------------------------------------------------------------------*/
3 /*--- Format-neutral storage of and querying of info acquired from ---*/
4 /*--- ELF/XCOFF stabs/dwarf1/dwarf2 debug info.                    ---*/
5 /*---                                               priv_storage.h ---*/
6 /*--------------------------------------------------------------------*/
7 
8 /*
9    This file is part of Valgrind, a dynamic binary instrumentation
10    framework.
11 
12    Copyright (C) 2000-2017 Julian Seward
13       jseward@acm.org
14 
15    This program is free software; you can redistribute it and/or
16    modify it under the terms of the GNU General Public License as
17    published by the Free Software Foundation; either version 2 of the
18    License, or (at your option) any later version.
19 
20    This program is distributed in the hope that it will be useful, but
21    WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    General Public License for more details.
24 
25    You should have received a copy of the GNU General Public License
26    along with this program; if not, write to the Free Software
27    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28    02111-1307, USA.
29 
30    The GNU General Public License is contained in the file COPYING.
31 */
32 /*
33    Stabs reader greatly improved by Nick Nethercote, Apr 02.
34    This module was also extensively hacked on by Jeremy Fitzhardinge
35    and Tom Hughes.
36 */
37 /* See comment at top of debuginfo.c for explanation of
38    the _svma / _avma / _image / _bias naming scheme.
39 */
40 /* Note this is not freestanding; needs pub_core_xarray.h and
41    priv_tytypes.h to be included before it. */
42 
43 #ifndef __PRIV_STORAGE_H
44 #define __PRIV_STORAGE_H
45 
46 #include "pub_core_basics.h"   // Addr
47 #include "pub_core_xarray.h"   // XArray
48 #include "pub_core_deduppoolalloc.h" // DedupPoolAlloc
49 #include "priv_d3basics.h"     // GExpr et al.
50 #include "priv_image.h"        // DiCursor
51 
52 /* --------------------- SYMBOLS --------------------- */
53 
54 /* A structure to hold an ELF/MachO symbol (very crudely).  Usually
55    the symbol only has one name, which is stored in ::pri_name, and
56    ::sec_names is NULL.  If there are other names, these are stored in
57    ::sec_names, which is a NULL terminated vector holding the names.
58    The vector is allocated in VG_AR_DINFO, the names themselves live
59    in DebugInfo::strpool.
60 
61    From the point of view of ELF, the primary vs secondary distinction
62    is artificial: they are all just names associated with the address,
63    none of which has higher precedence than any other.  However, from
64    the point of view of mapping an address to a name to display to the
65    user, we need to choose one "preferred" name, and so that might as
66    well be installed as the pri_name, whilst all others can live in
67    sec_names[].  This has the convenient side effect that, in the
68    common case where there is only one name for the address,
69    sec_names[] does not need to be allocated.
70 */
71 typedef
72    struct {
73       SymAVMAs avmas;    /* Symbol Actual VMAs: lowest address of entity,
74                             + platform specific fields, to access with
75                             the macros defined in pub_core_debuginfo.h */
76       const HChar*  pri_name;  /* primary name, never NULL */
77       const HChar** sec_names; /* NULL, or a NULL term'd array of other names */
78       // XXX: DiSym could be shrunk (on 32-bit platforms to exactly 16
79       // bytes, on 64-bit platforms the first 3 pointers already add
80       // up to 24 bytes, so size plus bits will extend to 32 bytes
81       // anyway) by using 29 bits for the size and 1 bit each for
82       // isText, isIFunc and isGlobal.  If you do this, make sure that
83       // all assignments to the latter two use 0 or 1 (or True or
84       // False), and that a positive number larger than 1 is never
85       // used to represent True.
86       UInt    size;    /* size in bytes */
87       Bool    isText;
88       Bool    isIFunc; /* symbol is an indirect function? */
89       Bool    isGlobal; /* Is this symbol globally visible? */
90    }
91    DiSym;
92 
93 /* --------------------- SRCLOCS --------------------- */
94 
95 /* Line count at which overflow happens, due to line numbers being
96    stored as shorts in `struct nlist' in a.out.h. */
97 #define LINENO_OVERFLOW (1 << (sizeof(short) * 8))
98 
99 #define LINENO_BITS     20
100 #define LOC_SIZE_BITS  (32 - LINENO_BITS)
101 #define MAX_LINENO     ((1 << LINENO_BITS) - 1)
102 
103 /* Unlikely to have any lines with instruction ranges > 4096 bytes */
104 #define MAX_LOC_SIZE   ((1 << LOC_SIZE_BITS) - 1)
105 
106 /* Number used to detect line number overflows; if one line is
107    60000-odd smaller than the previous, it was probably an overflow.
108  */
109 #define OVERFLOW_DIFFERENCE     (LINENO_OVERFLOW - 5000)
110 
111 /* Filename and Dirname pair. FnDn are stored in di->fndnpool
112    and are allocated using VG_(allocFixedEltDedupPA).
113    The filename/dirname strings are themselves stored in di->strpool. */
114 typedef
115    struct {
116       const HChar* filename;     /* source filename */
117       const HChar* dirname;      /* source directory name */
118    } FnDn;
119 
120 /* A structure to hold addr-to-source info for a single line.  There
121   can be a lot of these, hence the dense packing. */
122 typedef
123    struct {
124       /* Word 1 */
125       Addr   addr;               /* lowest address for this line */
126       /* Word 2 */
127       UShort size:LOC_SIZE_BITS; /* # bytes; we catch overflows of this */
128       UInt   lineno:LINENO_BITS; /* source line number, or zero */
129    }
130    DiLoc;
131 
132 #define LEVEL_BITS  (32 - LINENO_BITS)
133 #define MAX_LEVEL     ((1 << LEVEL_BITS) - 1)
134 
135 /* A structure to hold addr-to-inlined fn info.  There
136    can be a lot of these, hence the dense packing.
137    Only caller source filename and lineno are stored.
138    Handling dirname should be done using fndn_ix technique
139    similar to  ML_(addLineInfo). */
140 typedef
141    struct {
142       /* Word 1 */
143       Addr   addr_lo;            /* lowest address for inlined fn */
144       /* Word 2 */
145       Addr   addr_hi;            /* highest address following the inlined fn */
146       /* Word 3 */
147       const HChar* inlinedfn;    /* inlined function name */
148       /* Word 4 and 5 */
149       UInt   fndn_ix;            /* index in di->fndnpool of caller source
150                                     dirname/filename */
151       UInt   lineno:LINENO_BITS; /* caller line number */
152       UShort level:LEVEL_BITS;   /* level of inlining */
153    }
154    DiInlLoc;
155 
156 /* --------------------- CF INFO --------------------- */
157 
158 /* DiCfSI: a structure to summarise DWARF2/3 CFA info for the code
159    address range [base .. base+len-1].
160 
161    On x86 and amd64 ("IA"), if you know ({e,r}sp, {e,r}bp, {e,r}ip) at
162    some point and {e,r}ip is in the range [base .. base+len-1], it
163    tells you how to calculate ({e,r}sp, {e,r}bp) for the caller of the
164    current frame and also ra, the return address of the current frame.
165 
166    First off, calculate CFA, the Canonical Frame Address, thusly:
167 
168      cfa = case cfa_how of
169               CFIC_IA_SPREL -> {e,r}sp + cfa_off
170               CFIC_IA_BPREL -> {e,r}bp + cfa_off
171               CFIC_EXPR     -> expr whose index is in cfa_off
172 
173    Once that is done, the previous frame's {e,r}sp/{e,r}bp values and
174    this frame's {e,r}ra value can be calculated like this:
175 
176      old_{e,r}sp/{e,r}bp/ra
177          = case {e,r}sp/{e,r}bp/ra_how of
178               CFIR_UNKNOWN   -> we don't know, sorry
179               CFIR_SAME      -> same as it was before (sp/fp only)
180               CFIR_CFAREL    -> cfa + sp/bp/ra_off
181               CFIR_MEMCFAREL -> *( cfa + sp/bp/ra_off )
182               CFIR_EXPR      -> expr whose index is in sp/bp/ra_off
183 
184    On ARM it's pretty much the same, except we have more registers to
185    keep track of:
186 
187      cfa = case cfa_how of
188               CFIC_ARM_R13REL -> r13 + cfa_off
189               CFIC_ARM_R12REL -> r12 + cfa_off
190               CFIC_ARM_R11REL -> r11 + cfa_off
191               CFIC_ARM_R7REL  -> r7  + cfa_off
192               CFIR_EXPR       -> expr whose index is in cfa_off
193 
194      old_r14/r13/r12/r11/r7/ra
195          = case r14/r13/r12/r11/r7/ra_how of
196               CFIR_UNKNOWN   -> we don't know, sorry
197               CFIR_SAME      -> same as it was before (r14/r13/r12/r11/r7 only)
198               CFIR_CFAREL    -> cfa + r14/r13/r12/r11/r7/ra_off
199               CFIR_MEMCFAREL -> *( cfa + r14/r13/r12/r11/r7/ra_off )
200               CFIR_EXPR      -> expr whose index is in r14/r13/r12/r11/r7/ra_off
201 
202    On ARM64:
203 
204      cfa = case cfa_how of
205               CFIC_ARM64_SPREL  -> sp + cfa_off
206               CFIC_ARM64_X29REL -> x29 + cfa_off
207               CFIC_EXPR         -> expr whose index is in cfa_off
208 
209      old_sp/x30/x29/ra
210          = case sp/x30/x29/ra_how of
211               CFIR_UNKNOWN   -> we don't know, sorry
212               CFIR_SAME      -> same as it was before
213               CFIR_CFAREL    -> cfa + sp/x30/x29/ra_how
214               CFIR_MEMCFAREL -> *( cfa + sp/x30/x29/ra_how )
215               CFIR_EXPR      -> expr whose index is in sp/x30/x29/ra_off
216 
217    On s390x we have a similar logic as x86 or amd64. We need the stack pointer
218    (r15), the frame pointer r11 (like BP) and together with the instruction
219    address in the PSW we can calculate the previous values:
220      cfa = case cfa_how of
221               CFIC_IA_SPREL -> r15 + cfa_off
222               CFIC_IA_BPREL -> r11 + cfa_off
223               CFIC_EXPR     -> expr whose index is in cfa_off
224 
225      old_sp/fp/ra
226          = case sp/fp/ra_how of
227               CFIR_UNKNOWN   -> we don't know, sorry
228               CFIR_SAME      -> same as it was before (sp/fp only)
229               CFIR_CFAREL    -> cfa + sp/fp/ra_off
230               CFIR_MEMCFAREL -> *( cfa + sp/fp/ra_off )
231               CFIR_EXPR      -> expr whose index is in sp/fp/ra_off
232 */
233 
234 #define CFIC_IA_SPREL     ((UChar)1)
235 #define CFIC_IA_BPREL     ((UChar)2)
236 #define CFIC_ARM_R13REL   ((UChar)3)
237 #define CFIC_ARM_R12REL   ((UChar)4)
238 #define CFIC_ARM_R11REL   ((UChar)5)
239 #define CFIC_ARM_R7REL    ((UChar)6)
240 #define CFIC_ARM64_SPREL  ((UChar)7)
241 #define CFIC_ARM64_X29REL ((UChar)8)
242 #define CFIC_EXPR         ((UChar)9)  /* all targets */
243 
244 #define CFIR_UNKNOWN      ((UChar)64)
245 #define CFIR_SAME         ((UChar)65)
246 #define CFIR_CFAREL       ((UChar)66)
247 #define CFIR_MEMCFAREL    ((UChar)67)
248 #define CFIR_EXPR         ((UChar)68)
249 
250 /* Definition of the DiCfSI_m DiCfSI machine dependent part.
251    These are highly duplicated, and are stored in a pool. */
252 #if defined(VGA_x86) || defined(VGA_amd64)
253 typedef
254    struct {
255       UChar cfa_how; /* a CFIC_IA value */
256       UChar ra_how;  /* a CFIR_ value */
257       UChar sp_how;  /* a CFIR_ value */
258       UChar bp_how;  /* a CFIR_ value */
259       Int   cfa_off;
260       Int   ra_off;
261       Int   sp_off;
262       Int   bp_off;
263    }
264    DiCfSI_m;
265 #elif defined(VGA_arm)
266 typedef
267    struct {
268       UChar cfa_how; /* a CFIC_ value */
269       UChar ra_how;  /* a CFIR_ value */
270       UChar r14_how; /* a CFIR_ value */
271       UChar r13_how; /* a CFIR_ value */
272       UChar r12_how; /* a CFIR_ value */
273       UChar r11_how; /* a CFIR_ value */
274       UChar r7_how;  /* a CFIR_ value */
275       Int   cfa_off;
276       Int   ra_off;
277       Int   r14_off;
278       Int   r13_off;
279       Int   r12_off;
280       Int   r11_off;
281       Int   r7_off;
282       // If you add additional fields, don't forget to update the
283       // initialisation of this in readexidx.c accordingly.
284    }
285    DiCfSI_m;
286 #elif defined(VGA_arm64)
287 typedef
288    struct {
289       UChar cfa_how; /* a CFIC_ value */
290       UChar ra_how;  /* a CFIR_ value */
291       UChar sp_how;  /* a CFIR_ value */ /*dw31=SP*/
292       UChar x30_how; /* a CFIR_ value */ /*dw30=LR*/
293       UChar x29_how; /* a CFIR_ value */ /*dw29=FP*/
294       Int   cfa_off;
295       Int   ra_off;
296       Int   sp_off;
297       Int   x30_off;
298       Int   x29_off;
299    }
300    DiCfSI_m;
301 #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
302 /* Just have a struct with the common fields in, so that code that
303    processes the common fields doesn't have to be ifdef'd against
304    VGP_/VGA_ symbols.  These are not used in any way on ppc32/64-linux
305    at the moment. */
306 typedef
307    struct {
308       UChar cfa_how; /* a CFIC_ value */
309       UChar ra_how;  /* a CFIR_ value */
310       Int   cfa_off;
311       Int   ra_off;
312    }
313    DiCfSI_m;
314 #elif defined(VGA_s390x)
315 typedef
316    struct {
317       UChar cfa_how; /* a CFIC_ value */
318       UChar sp_how;  /* a CFIR_ value */
319       UChar ra_how;  /* a CFIR_ value */
320       UChar fp_how;  /* a CFIR_ value */
321       Int   cfa_off;
322       Int   sp_off;
323       Int   ra_off;
324       Int   fp_off;
325    }
326    DiCfSI_m;
327 #elif defined(VGA_mips32) || defined(VGA_mips64)
328 typedef
329    struct {
330       UChar cfa_how; /* a CFIC_ value */
331       UChar ra_how;  /* a CFIR_ value */
332       UChar sp_how;  /* a CFIR_ value */
333       UChar fp_how;  /* a CFIR_ value */
334       Int   cfa_off;
335       Int   ra_off;
336       Int   sp_off;
337       Int   fp_off;
338    }
339    DiCfSI_m;
340 #else
341 #  error "Unknown arch"
342 #endif
343 
344 typedef
345    struct {
346       Addr  base;
347       UInt  len;
348       UInt  cfsi_m_ix;
349    }
350    DiCfSI;
351 
352 typedef
353    enum {
354       Cunop_Abs=0x231,
355       Cunop_Neg,
356       Cunop_Not
357    }
358    CfiUnop;
359 
360 typedef
361    enum {
362       Cbinop_Add=0x321,
363       Cbinop_Sub,
364       Cbinop_And,
365       Cbinop_Mul,
366       Cbinop_Shl,
367       Cbinop_Shr,
368       Cbinop_Eq,
369       Cbinop_Ge,
370       Cbinop_Gt,
371       Cbinop_Le,
372       Cbinop_Lt,
373       Cbinop_Ne
374    }
375    CfiBinop;
376 
377 typedef
378    enum {
379       Creg_INVALID=0x213,
380       Creg_IA_SP,
381       Creg_IA_BP,
382       Creg_IA_IP,
383       Creg_ARM_R13,
384       Creg_ARM_R12,
385       Creg_ARM_R15,
386       Creg_ARM_R14,
387       Creg_ARM_R7,
388       Creg_ARM64_X30,
389       Creg_S390_IA,
390       Creg_S390_SP,
391       Creg_S390_FP,
392       Creg_S390_LR,
393       Creg_MIPS_RA
394    }
395    CfiReg;
396 
397 typedef
398    enum {
399       Cex_Undef=0x123,
400       Cex_Deref,
401       Cex_Const,
402       Cex_Unop,
403       Cex_Binop,
404       Cex_CfiReg,
405       Cex_DwReg
406    }
407    CfiExprTag;
408 
409 typedef
410    struct {
411       CfiExprTag tag;
412       union {
413          struct {
414          } Undef;
415          struct {
416             Int ixAddr;
417          } Deref;
418          struct {
419             UWord con;
420          } Const;
421          struct {
422             CfiUnop op;
423             Int ix;
424          } Unop;
425          struct {
426             CfiBinop op;
427             Int ixL;
428             Int ixR;
429          } Binop;
430          struct {
431             CfiReg reg;
432          } CfiReg;
433          struct {
434             Int reg;
435          } DwReg;
436       }
437       Cex;
438    }
439    CfiExpr;
440 
441 extern Int ML_(CfiExpr_Undef) ( XArray* dst );
442 extern Int ML_(CfiExpr_Deref) ( XArray* dst, Int ixAddr );
443 extern Int ML_(CfiExpr_Const) ( XArray* dst, UWord con );
444 extern Int ML_(CfiExpr_Unop)  ( XArray* dst, CfiUnop op, Int ix );
445 extern Int ML_(CfiExpr_Binop) ( XArray* dst, CfiBinop op, Int ixL, Int ixR );
446 extern Int ML_(CfiExpr_CfiReg)( XArray* dst, CfiReg reg );
447 extern Int ML_(CfiExpr_DwReg) ( XArray* dst, Int reg );
448 
449 extern void ML_(ppCfiExpr)( const XArray* src, Int ix );
450 
451 /* ---------------- FPO INFO (Windows PE) -------------- */
452 
453 /* for apps using Wine: MSVC++ PDB FramePointerOmitted: somewhat like
454    a primitive CFI */
455 typedef
456    struct _FPO_DATA {  /* 16 bytes */
457       UInt   ulOffStart; /* offset of 1st byte of function code */
458       UInt   cbProcSize; /* # bytes in function */
459       UInt   cdwLocals;  /* # bytes/4 in locals */
460       UShort cdwParams;  /* # bytes/4 in params */
461       UChar  cbProlog;   /* # bytes in prolog */
462       UChar  cbRegs :3;  /* # regs saved */
463       UChar  fHasSEH:1;  /* Structured Exception Handling */
464       UChar  fUseBP :1;  /* EBP has been used */
465       UChar  reserved:1;
466       UChar  cbFrame:2;  /* frame type */
467    }
468    FPO_DATA;
469 
470 #define PDB_FRAME_FPO  0
471 #define PDB_FRAME_TRAP 1
472 #define PDB_FRAME_TSS  2
473 
474 /* --------------------- VARIABLES --------------------- */
475 
476 typedef
477    struct {
478       Addr    aMin;
479       Addr    aMax;
480       XArray* /* of DiVariable */ vars;
481    }
482    DiAddrRange;
483 
484 typedef
485    struct {
486       const  HChar* name;  /* in DebugInfo.strpool */
487       UWord  typeR; /* a cuOff */
488       const GExpr* gexpr; /* on DebugInfo.gexprs list */
489       const GExpr* fbGX;  /* SHARED. */
490       UInt   fndn_ix; /* where declared; may be zero. index
491                          in DebugInfo.fndnpool */
492       Int    lineNo;   /* where declared; may be zero. */
493    }
494    DiVariable;
495 
496 Word
497 ML_(cmp_for_DiAddrRange_range) ( const void* keyV, const void* elemV );
498 
499 /* --------------------- DEBUGINFO --------------------- */
500 
501 /* This is the top-level data type.  It's a structure which contains
502    information pertaining to one mapped ELF object.  This type is
503    exported only abstractly - in pub_tool_debuginfo.h. */
504 
505 /* First though, here's an auxiliary data structure.  It is only ever
506    used as part of a struct _DebugInfo.  We use it to record
507    observations about mappings and permission changes to the
508    associated file, so as to decide when to read debug info.  It's
509    essentially an ultra-trivial finite state machine which, when it
510    reaches an accept state, signals that we should now read debug info
511    from the object into the associated struct _DebugInfo.  The accept
512    state is arrived at when have_rx_map and have_rw_map both become
513    true.  The initial state is one in which we have no observations,
514    so have_rx_map and have_rw_map are both false.
515 
516    This all started as a rather ad-hoc solution, but was further
517    expanded to handle weird object layouts, e.g. more than one rw
518    or rx mapping for one binary.
519 
520    The normal sequence of events is one of
521 
522    start  -->  r-x mapping  -->  rw- mapping  -->  accept
523    start  -->  rw- mapping  -->  r-x mapping  -->  accept
524 
525    that is, take the first r-x and rw- mapping we see, and we're done.
526 
527    On MacOSX >= 10.7, 32-bit, there appears to be a new variant:
528 
529    start  -->  r-- mapping  -->  rw- mapping
530           -->  upgrade r-- mapping to r-x mapping  -->  accept
531 
532    where the upgrade is done by a call to mach_vm_protect (OSX 10.7)
533    or kernelrpc_mach_vm_protect_trap (OSX 10.9 and possibly 10.8).
534    Hence we need to also track this possibility.
535 
536    From perusal of dyld sources, it appears that this scheme could
537    also be used 64 bit libraries, although that doesn't seem to happen
538    in practice.  dyld uses this scheme when the text section requires
539    relocation, which only appears to be the case for 32 bit objects.
540 */
541 
542 typedef struct
543 {
544    Addr  avma; /* these fields record the file offset, length */
545    SizeT size; /* and map address of each mapping             */
546    OffT  foff;
547    Bool  rx, rw, ro;  /* memory access flags for this mapping */
548 } DebugInfoMapping;
549 
550 struct _DebugInfoFSM
551 {
552    HChar*  filename;  /* in mallocville (VG_AR_DINFO)               */
553    HChar*  dbgname;   /* in mallocville (VG_AR_DINFO)               */
554    XArray* maps;      /* XArray of DebugInfoMapping structs         */
555    Bool  have_rx_map; /* did we see a r?x mapping yet for the file? */
556    Bool  have_rw_map; /* did we see a rw? mapping yet for the file? */
557    Bool  have_ro_map; /* did we see a r-- mapping yet for the file? */
558 };
559 
560 
561 /* To do with the string table in struct _DebugInfo (::strpool) */
562 #define SEGINFO_STRPOOLSIZE (64*1024)
563 
564 
565 /* We may encounter more than one .eh_frame section in an object --
566    unusual but apparently allowed by ELF.  See
567    http://sourceware.org/bugzilla/show_bug.cgi?id=12675
568 */
569 #define N_EHFRAME_SECTS 2
570 
571 
572 /* So, the main structure for holding debug info for one object. */
573 
574 struct _DebugInfo {
575 
576    /* Admin stuff */
577 
578    struct _DebugInfo* next;   /* list of DebugInfos */
579    Bool               mark;   /* marked for deletion? */
580 
581    /* An abstract handle, which can be used by entities outside of
582       m_debuginfo to (in an abstract datatype sense) refer to this
583       struct _DebugInfo.  A .handle of zero is invalid; valid handles
584       are 1 and above.  The same handle is never issued twice (in any
585       given run of Valgrind), so a handle becomes invalid when the
586       associated struct _DebugInfo is discarded, and remains invalid
587       forever thereafter.  The .handle field is set as soon as this
588       structure is allocated. */
589    ULong handle;
590 
591    /* Used for debugging only - indicate what stuff to dump whilst
592       reading stuff into the seginfo.  Are computed as early in the
593       lifetime of the DebugInfo as possible -- at the point when it is
594       created.  Use these when deciding what to spew out; do not use
595       the global VG_(clo_blah) flags. */
596 
597    Bool trace_symtab; /* symbols, our style */
598    Bool trace_cfi;    /* dwarf frame unwind, our style */
599    Bool ddump_syms;   /* mimic /usr/bin/readelf --syms */
600    Bool ddump_line;   /* mimic /usr/bin/readelf --debug-dump=line */
601    Bool ddump_frames; /* mimic /usr/bin/readelf --debug-dump=frames */
602 
603    /* The "decide when it is time to read debuginfo" state machine.
604       This structure must get filled in before we can start reading
605       anything from the ELF/MachO file.  This structure is filled in
606       by VG_(di_notify_mmap) and its immediate helpers. */
607    struct _DebugInfoFSM fsm;
608 
609    /* Once the ::fsm has reached an accept state -- typically, when
610       both a rw? and r?x mapping for .filename have been observed --
611       we can go on to read the symbol tables and debug info.
612       .have_dinfo changes from False to True when the debug info has
613       been completely read in and postprocessed (canonicalised) and is
614       now suitable for querying. */
615    /* If have_dinfo is False, then all fields below this point are
616       invalid and should not be consulted. */
617    Bool  have_dinfo; /* initially False */
618 
619    /* All the rest of the fields in this structure are filled in once
620       we have committed to reading the symbols and debug info (that
621       is, at the point where .have_dinfo is set to True). */
622 
623    /* The file's soname. */
624    HChar* soname;
625 
626    /* Description of some important mapped segments.  The presence or
627       absence of the mapping is denoted by the _present field, since
628       in some obscure circumstances (to do with data/sdata/bss) it is
629       possible for the mapping to be present but have zero size.
630       Certainly text_ is mandatory on all platforms; not sure about
631       the rest though.
632 
633       --------------------------------------------------------
634 
635       Comment_on_IMPORTANT_CFSI_REPRESENTATIONAL_INVARIANTS: we require that
636 
637       either (size of all rx maps == 0 && cfsi == NULL) (the degenerate case)
638 
639       or the normal case, which is the AND of the following:
640       (0) size of at least one rx mapping > 0
641       (1) no two DebugInfos with some rx mapping of size > 0
642           have overlapping rx mappings
643       (2) [cfsi_minavma,cfsi_maxavma] does not extend beyond
644           [avma,+size) of one rx mapping; that is, the former
645           is a subrange or equal to the latter.
646       (3) all DiCfSI in the cfsi array all have ranges that fall within
647           [avma,+size) of that rx mapping.
648       (4) all DiCfSI in the cfsi array are non-overlapping
649 
650       The cumulative effect of these restrictions is to ensure that
651       all the DiCfSI records in the entire system are non overlapping.
652       Hence any address falls into either exactly one DiCfSI record,
653       or none.  Hence it is safe to cache the results of searches for
654       DiCfSI records.  This is the whole point of these restrictions.
655       The caching of DiCfSI searches is done in VG_(use_CF_info).  The
656       cache is flushed after any change to debugInfo_list.  DiCfSI
657       searches are cached because they are central to stack unwinding
658       on amd64-linux.
659 
660       Where are these invariants imposed and checked?
661 
662       They are checked after a successful read of debuginfo into
663       a DebugInfo*, in check_CFSI_related_invariants.
664 
665       (1) is not really imposed anywhere.  We simply assume that the
666       kernel will not map the text segments from two different objects
667       into the same space.  Sounds reasonable.
668 
669       (2) follows from (4) and (3).  It is ensured by canonicaliseCFI.
670       (3) is ensured by ML_(addDiCfSI).
671       (4) is ensured by canonicaliseCFI.
672 
673       --------------------------------------------------------
674 
675       Comment_on_DEBUG_SVMA_and_DEBUG_BIAS_fields:
676 
677       The _debug_{svma,bias} fields were added as part of a fix to
678       #185816.  The problem encompassed in that bug report was that it
679       wasn't correct to use apply the bias values deduced for a
680       primary object to its associated debuginfo object, because the
681       debuginfo object (or the primary) could have been prelinked to a
682       different SVMA.  Hence debuginfo and primary objects need to
683       have their own biases.
684 
685       ------ JRS: (referring to r9329): ------
686       Let me see if I understand the workings correctly.  Initially
687       the _debug_ values are set to the same values as the "normal"
688       ones, as there's a bunch of bits of code like this (in
689       readelf.c)
690 
691          di->text_svma = svma;
692          ...
693          di->text_bias = rx_bias;
694          di->text_debug_svma = svma;
695          di->text_debug_bias = rx_bias;
696 
697       If a debuginfo object subsequently shows up then the
698       _debug_svma/bias are set for the debuginfo object.  Result is
699       that if there's no debuginfo object then the values are the same
700       as the primary-object values, and if there is a debuginfo object
701       then they will (or at least may) be different.
702 
703       Then when we need to actually bias something, we'll have to
704       decide whether to use the primary bias or the debuginfo bias.
705       And the strategy is to use the primary bias for ELF symbols but
706       the debuginfo bias for anything pulled out of Dwarf.
707 
708       ------ THH: ------
709       Correct - the debug_svma and bias values apply to any address
710       read from the debug data regardless of where that debug data is
711       stored and the other values are used for addresses from other
712       places (primarily the symbol table).
713 
714       ------ JRS: ------
715       Ok; so this was my only area of concern.  Are there any
716       corner-case scenarios where this wouldn't be right?  It sounds
717       like we're assuming the ELF symbols come from the primary object
718       and, if there is a debug object, then all the Dwarf comes from
719       there.  But what if (eg) both symbols and Dwarf come from the
720       debug object?  Is that even possible or allowable?
721 
722       ------ THH: ------
723       You may have a point...
724 
725       The current logic is to try and take any one set of data from
726       either the base object or the debug object. There are four sets
727       of data we consider:
728 
729          - Symbol Table
730          - Stabs
731          - DWARF1
732          - DWARF2
733 
734       If we see the primary section for a given set in the base object
735       then we ignore all sections relating to that set in the debug
736       object.
737 
738       Now in principle if we saw a secondary section (like debug_line
739       say) in the base object, but not the main section (debug_info in
740       this case) then we would take debug_info from the debug object
741       but would use the debug_line from the base object unless we saw
742       a replacement copy in the debug object. That's probably unlikely
743       however.
744 
745       A bigger issue might be, as you say, the symbol table as we will
746       pick that up from the debug object if it isn't in the base. The
747       dynamic symbol table will always have to be in the base object
748       though so we will have to be careful when processing symbols to
749       know which table we are reading in that case.
750 
751       What we probably need to do is tell read_elf_symtab which object
752       the symbols it is being asked to read came from.
753 
754       (A followup patch to deal with this was committed in r9469).
755    */
756    /* .text */
757    Bool     text_present;
758    Addr     text_avma;
759    Addr     text_svma;
760    SizeT    text_size;
761    PtrdiffT text_bias;
762    Addr     text_debug_svma;
763    PtrdiffT text_debug_bias;
764    /* .data */
765    Bool     data_present;
766    Addr     data_svma;
767    Addr     data_avma;
768    SizeT    data_size;
769    PtrdiffT data_bias;
770    Addr     data_debug_svma;
771    PtrdiffT data_debug_bias;
772    /* .sdata */
773    Bool     sdata_present;
774    Addr     sdata_svma;
775    Addr     sdata_avma;
776    SizeT    sdata_size;
777    PtrdiffT sdata_bias;
778    Addr     sdata_debug_svma;
779    PtrdiffT sdata_debug_bias;
780    /* .rodata */
781    Bool     rodata_present;
782    Addr     rodata_svma;
783    Addr     rodata_avma;
784    SizeT    rodata_size;
785    PtrdiffT rodata_bias;
786    Addr     rodata_debug_svma;
787    PtrdiffT rodata_debug_bias;
788    /* .bss */
789    Bool     bss_present;
790    Addr     bss_svma;
791    Addr     bss_avma;
792    SizeT    bss_size;
793    PtrdiffT bss_bias;
794    Addr     bss_debug_svma;
795    PtrdiffT bss_debug_bias;
796    /* .sbss */
797    Bool     sbss_present;
798    Addr     sbss_svma;
799    Addr     sbss_avma;
800    SizeT    sbss_size;
801    PtrdiffT sbss_bias;
802    Addr     sbss_debug_svma;
803    PtrdiffT sbss_debug_bias;
804    /* .ARM.exidx -- sometimes present on arm32, containing unwind info. */
805    Bool     exidx_present;
806    Addr     exidx_avma;
807    Addr     exidx_svma;
808    SizeT    exidx_size;
809    PtrdiffT exidx_bias;
810    /* .ARM.extab -- sometimes present on arm32, containing unwind info. */
811    Bool     extab_present;
812    Addr     extab_avma;
813    Addr     extab_svma;
814    SizeT    extab_size;
815    PtrdiffT extab_bias;
816    /* .plt */
817    Bool   plt_present;
818    Addr	  plt_avma;
819    SizeT  plt_size;
820    /* .got */
821    Bool   got_present;
822    Addr   got_avma;
823    SizeT  got_size;
824    /* .got.plt */
825    Bool   gotplt_present;
826    Addr   gotplt_avma;
827    SizeT  gotplt_size;
828    /* .opd -- needed on ppc64be-linux for finding symbols */
829    Bool   opd_present;
830    Addr   opd_avma;
831    SizeT  opd_size;
832    /* .ehframe -- needed on amd64-linux for stack unwinding.  We might
833       see more than one, hence the arrays. */
834    UInt   n_ehframe;  /* 0 .. N_EHFRAME_SECTS */
835    Addr   ehframe_avma[N_EHFRAME_SECTS];
836    SizeT  ehframe_size[N_EHFRAME_SECTS];
837 
838    /* Sorted tables of stuff we snarfed from the file.  This is the
839       eventual product of reading the debug info.  All this stuff
840       lives in VG_AR_DINFO. */
841 
842    /* An expandable array of symbols. */
843    DiSym*  symtab;
844    UWord   symtab_used;
845    UWord   symtab_size;
846    /* Two expandable arrays, storing locations and their filename/dirname. */
847    DiLoc*  loctab;
848    UInt    sizeof_fndn_ix;  /* Similar use as sizeof_cfsi_m_ix below. */
849    void*   loctab_fndn_ix;  /* loctab[i] filename/dirname is identified by
850                                loctab_fnindex_ix[i] (an index in di->fndnpool)
851                                0 means filename/dirname unknown.
852                                The void* is an UChar* or UShort* or UInt*
853                                depending on sizeof_fndn_ix. */
854    UWord   loctab_used;
855    UWord   loctab_size;
856    /* An expandable array of inlined fn info.
857       maxinl_codesz is the biggest inlined piece of code
858       in inltab (i.e. the max of 'addr_hi - addr_lo'. */
859    DiInlLoc* inltab;
860    UWord   inltab_used;
861    UWord   inltab_size;
862    SizeT   maxinl_codesz;
863 
864    /* A set of expandable arrays to store CFI summary info records.
865       The machine specific information (i.e. the DiCfSI_m struct)
866       are stored in cfsi_m_pool, as these are highly duplicated.
867       The DiCfSI_m are allocated in cfsi_m_pool and identified using
868       a (we hope) small integer : often one byte is enough, sometimes
869       2 bytes are needed.
870 
871       cfsi_base contains the bases of the code address ranges.
872       cfsi_size is the size of the cfsi_base array.
873       The elements cfsi_base[0] till cfsi_base[cfsi_used-1] are used.
874       Following elements are not used (yet).
875 
876       For each base in cfsi_base, an index into cfsi_m_pool is stored
877       in cfsi_m_ix array. The size of cfsi_m_ix is equal to
878       cfsi_size*sizeof_cfsi_m_ix. The used portion of cfsi_m_ix is
879       cfsi_m_ix[0] till cfsi_m_ix[(cfsi_used-1)*sizeof_cfsi_m_ix].
880 
881       cfsi_base[i] gives the base address of a code range covered by
882       some CF Info. The corresponding CF Info is identified by an index
883       in cfsi_m_pool. The DiCfSI_m index in cfsi_m_pool corresponding to
884       cfsi_base[i] is given
885         by ((UChar*) cfsi_m_ix)[i] if sizeof_cfsi_m_ix == 1
886         by ((UShort*)cfsi_m_ix)[i] if sizeof_cfsi_m_ix == 2
887         by ((UInt*)  cfsi_m_ix)[i] if sizeof_cfsi_m_ix == 4.
888 
889       The end of the code range starting at cfsi_base[i] is given by
890       cfsi_base[i+1]-1 (or cfsi_maxavma for  cfsi_base[cfsi_used-1]).
891       Some code ranges between cfsi_minavma and cfsi_maxavma might not
892       be covered by cfi information. Such not covered ranges are stored by
893       a base in cfsi_base and a corresponding 0 index in cfsi_m_ix.
894 
895       A variable size representation has been chosen for the elements of
896       cfsi_m_ix as in many case, one byte is good enough. For big
897       objects, 2 bytes are needed. No object has yet been found where
898       4 bytes are needed (but the code is ready to handle this case).
899       Not covered ranges ('cfi holes') are stored explicitly in
900       cfsi_base/cfsi_m_ix as this is more memory efficient than storing
901       a length for each covered range : on x86 or amd64, we typically have
902       a hole every 8 covered ranges. On arm64, we have very few holes
903       (1 every 50 or 100 ranges).
904 
905       The cfsi information is read and prepared in the cfsi_rd array.
906       Once all the information has been read, the cfsi_base and cfsi_m_ix
907       arrays will be filled in from cfsi_rd. cfsi_rd will then be freed.
908       This is all done by ML_(finish_CFSI_arrays).
909 
910       Also includes summary address bounds, showing the min and max address
911       covered by any of the records, as an aid to fast searching.  And, if the
912       records require any expression nodes, they are stored in
913       cfsi_exprs. */
914    Addr* cfsi_base;
915    UInt  sizeof_cfsi_m_ix; /* size in byte of indexes stored in cfsi_m_ix. */
916    void* cfsi_m_ix; /* Each index occupies sizeof_cfsi_m_ix bytes.
917                        The void* is an UChar* or UShort* or UInt*
918                        depending on sizeof_cfsi_m_ix.  */
919 
920    DiCfSI* cfsi_rd; /* Only used during reading, NULL once info is read. */
921 
922    UWord   cfsi_used;
923    UWord   cfsi_size;
924 
925    DedupPoolAlloc *cfsi_m_pool;
926    Addr    cfsi_minavma;
927    Addr    cfsi_maxavma;
928    XArray* cfsi_exprs; /* XArray of CfiExpr */
929 
930    /* Optimized code under Wine x86: MSVC++ PDB FramePointerOmitted
931       data.  Non-expandable array, hence .size == .used. */
932    FPO_DATA* fpo;
933    UWord     fpo_size;
934    Addr      fpo_minavma;
935    Addr      fpo_maxavma;
936    Addr      fpo_base_avma;
937 
938    /* Pool of strings -- the string table.  Pointers
939       into this are stable (the memory is not reallocated). */
940    DedupPoolAlloc *strpool;
941 
942    /* Pool of FnDn -- filename and dirname.
943       Elements in the pool are allocated using VG_(allocFixedEltDedupPA). */
944    DedupPoolAlloc *fndnpool;
945 
946    /* Variable scope information, as harvested from Dwarf3 files.
947 
948       In short it's an
949 
950          array of (array of PC address ranges and variables)
951 
952       The outer array indexes over scopes, with Entry 0 containing
953       information on variables which exist for any value of the program
954       counter (PC) -- that is, the outermost scope.  Entries 1, 2, 3,
955       etc contain information on increasinly deeply nested variables.
956 
957       Each inner array is an array of (an address range, and a set
958       of variables that are in scope over that address range).
959 
960       The address ranges may not overlap.
961 
962       Since Entry 0 in the outer array holds information on variables
963       that exist for any value of the PC (that is, global vars), it
964       follows that Entry 0's inner array can only have one address
965       range pair, one that covers the entire address space.
966    */
967    XArray* /* of OSet of DiAddrRange */varinfo;
968 
969    /* These are arrays of the relevant typed objects, held here
970       partially for the purposes of visiting each object exactly once
971       when we need to delete them. */
972 
973    /* An array of TyEnts.  These are needed to make sense of any types
974       in the .varinfo.  Also, when deleting this DebugInfo, we must
975       first traverse this array and throw away malloc'd stuff hanging
976       off it -- by calling ML_(TyEnt__make_EMPTY) on each entry. */
977    XArray* /* of TyEnt */ admin_tyents;
978 
979    /* An array of guarded DWARF3 expressions. */
980    XArray* admin_gexprs;
981 
982    /* Cached last rx mapping matched and returned by ML_(find_rx_mapping).
983       This helps performance a lot during ML_(addLineInfo) etc., which can
984       easily be invoked hundreds of thousands of times. */
985    DebugInfoMapping* last_rx_map;
986 };
987 
988 /* --------------------- functions --------------------- */
989 
990 /* ------ Adding ------ */
991 
992 /* Add a symbol to si's symbol table.  The contents of 'sym' are
993    copied.  It is assumed (and checked) that 'sym' only contains one
994    name, so there is no auxiliary ::sec_names vector to duplicate.
995    IOW, the copy is a shallow copy, and there are assertions in place
996    to ensure that's OK. */
997 extern void ML_(addSym) ( struct _DebugInfo* di, DiSym* sym );
998 
999 /* Add a filename/dirname pair to a DebugInfo and returns the index
1000    in the fndnpool fixed pool. */
1001 extern UInt ML_(addFnDn) (struct _DebugInfo* di,
1002                           const HChar* filename,
1003                           const HChar* dirname);  /* NULL is allowable */
1004 
1005 /* Returns the filename of the fndn pair identified by fndn_ix.
1006    Returns "???" if fndn_ix is 0. */
1007 extern const HChar* ML_(fndn_ix2filename) (const DebugInfo* di,
1008                                            UInt fndn_ix);
1009 
1010 /* Returns the dirname of the fndn pair identified by fndn_ix.
1011    Returns "" if fndn_ix is 0 or fndn->dirname is NULL. */
1012 extern const HChar* ML_(fndn_ix2dirname) (const DebugInfo* di,
1013                                           UInt fndn_ix);
1014 
1015 /* Returns the fndn_ix for the LineInfo locno in di->loctab.
1016    0 if filename/dirname are unknown. */
1017 extern UInt ML_(fndn_ix) (const DebugInfo* di, Word locno);
1018 
1019 /* Add a line-number record to a DebugInfo.
1020    fndn_ix is an index in di->fndnpool, allocated using  ML_(addFnDn).
1021    Give a 0 index for a unknown filename/dirname pair. */
1022 extern
1023 void ML_(addLineInfo) ( struct _DebugInfo* di,
1024                         UInt fndn_ix,
1025                         Addr this, Addr next, Int lineno, Int entry);
1026 
1027 /* Add a call inlined record to a DebugInfo.
1028    A call to the below means that inlinedfn code has been
1029    inlined, resulting in code from [addr_lo, addr_hi[.
1030    Note that addr_hi is excluded, i.e. is not part of the inlined code.
1031    fndn_ix and lineno identifies the location of the call that caused
1032    this inlining.
1033    fndn_ix is an index in di->fndnpool, allocated using  ML_(addFnDn).
1034    Give a 0 index for an unknown filename/dirname pair.
1035    In case of nested inlining, a small level indicates the call
1036    is closer to main that a call with a higher level. */
1037 extern
1038 void ML_(addInlInfo) ( struct _DebugInfo* di,
1039                        Addr addr_lo, Addr addr_hi,
1040                        const HChar* inlinedfn,
1041                        UInt fndn_ix,
1042                        Int lineno, UShort level);
1043 
1044 /* Add a CFI summary record.  The supplied DiCfSI_m is copied. */
1045 extern void ML_(addDiCfSI) ( struct _DebugInfo* di,
1046                              Addr base, UInt len, DiCfSI_m* cfsi_m );
1047 
1048 /* Given a position in the di->cfsi_base/cfsi_m_ix arrays, return
1049    the corresponding cfsi_m*. Return NULL if the position corresponds
1050    to a cfsi hole. */
1051 DiCfSI_m* ML_(get_cfsi_m) (const DebugInfo* di, UInt pos);
1052 
1053 /* Add a string to the string table of a DebugInfo.  If len==-1,
1054    ML_(addStr) will itself measure the length of the string. */
1055 extern const HChar* ML_(addStr) ( DebugInfo* di, const HChar* str, Int len );
1056 
1057 /* Add a string to the string table of a DebugInfo, by copying the
1058    string from the given DiCursor.  Measures the length of the string
1059    itself. */
1060 extern const HChar* ML_(addStrFromCursor)( DebugInfo* di, DiCursor c );
1061 
1062 extern void ML_(addVar)( struct _DebugInfo* di,
1063                          Int    level,
1064                          Addr   aMin,
1065                          Addr   aMax,
1066                          const  HChar* name,
1067                          UWord  typeR, /* a cuOff */
1068                          const GExpr* gexpr,
1069                          const GExpr* fbGX, /* SHARED. */
1070                          UInt   fndn_ix, /* where decl'd - may be zero */
1071                          Int    lineNo, /* where decl'd - may be zero */
1072                          Bool   show );
1073 /* Note: fndn_ix identifies a filename/dirname pair similarly to
1074    ML_(addInlInfo) and ML_(addLineInfo). */
1075 
1076 /* Canonicalise the tables held by 'di', in preparation for use.  Call
1077    this after finishing adding entries to these tables. */
1078 extern void ML_(canonicaliseTables) ( struct _DebugInfo* di );
1079 
1080 /* Canonicalise the call-frame-info table held by 'di', in preparation
1081    for use. This is called by ML_(canonicaliseTables) but can also be
1082    called on it's own to sort just this table. */
1083 extern void ML_(canonicaliseCFI) ( struct _DebugInfo* di );
1084 
1085 /* ML_(finish_CFSI_arrays) fills in the cfsi_base and cfsi_m_ix arrays
1086    from cfsi_rd array. cfsi_rd is then freed. */
1087 extern void ML_(finish_CFSI_arrays) ( struct _DebugInfo* di );
1088 
1089 /* ------ Searching ------ */
1090 
1091 /* Find a symbol-table index containing the specified pointer, or -1
1092    if not found.  Binary search.  */
1093 extern Word ML_(search_one_symtab) ( const DebugInfo* di, Addr ptr,
1094                                      Bool findText );
1095 
1096 /* Find a location-table index containing the specified pointer, or -1
1097    if not found.  Binary search.  */
1098 extern Word ML_(search_one_loctab) ( const DebugInfo* di, Addr ptr );
1099 
1100 /* Find a CFI-table index containing the specified pointer, or -1 if
1101    not found.  Binary search.  */
1102 extern Word ML_(search_one_cfitab) ( const DebugInfo* di, Addr ptr );
1103 
1104 /* Find a FPO-table index containing the specified pointer, or -1
1105    if not found.  Binary search.  */
1106 extern Word ML_(search_one_fpotab) ( const DebugInfo* di, Addr ptr );
1107 
1108 /* Helper function for the most often needed searching for an rx
1109    mapping containing the specified address range.  The range must
1110    fall entirely within the mapping to be considered to be within it.
1111    Asserts if lo > hi; caller must ensure this doesn't happen. */
1112 extern DebugInfoMapping* ML_(find_rx_mapping) ( DebugInfo* di,
1113                                                 Addr lo, Addr hi );
1114 
1115 /* ------ Misc ------ */
1116 
1117 /* Show a non-fatal debug info reading error.  Use VG_(core_panic) for
1118    fatal errors.  'serious' errors are always shown, not 'serious' ones
1119    are shown only at verbosity level 2 and above. */
1120 extern
1121 void ML_(symerr) ( const DebugInfo* di, Bool serious, const HChar* msg );
1122 
1123 /* Print a symbol. */
1124 extern void ML_(ppSym) ( Int idx, const DiSym* sym );
1125 
1126 /* Print a call-frame-info summary. */
1127 extern void ML_(ppDiCfSI) ( const XArray* /* of CfiExpr */ exprs,
1128                             Addr base, UInt len,
1129                             const DiCfSI_m* si_m );
1130 
1131 
1132 #define TRACE_SYMTAB_ENABLED (di->trace_symtab)
1133 #define TRACE_SYMTAB(format, args...) \
1134    if (TRACE_SYMTAB_ENABLED) { VG_(printf)(format, ## args); }
1135 
1136 
1137 #endif /* ndef __PRIV_STORAGE_H */
1138 
1139 /*--------------------------------------------------------------------*/
1140 /*--- end                                                          ---*/
1141 /*--------------------------------------------------------------------*/
1142