• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*--------------------------------------------------------------------*/
3 /*--- Format-neutral storage of and querying of info acquired from ---*/
4 /*--- ELF/XCOFF stabs/dwarf1/dwarf2 debug info.                    ---*/
5 /*---                                               priv_storage.h ---*/
6 /*--------------------------------------------------------------------*/
7 
8 /*
9    This file is part of Valgrind, a dynamic binary instrumentation
10    framework.
11 
12    Copyright (C) 2000-2013 Julian Seward
13       jseward@acm.org
14 
15    This program is free software; you can redistribute it and/or
16    modify it under the terms of the GNU General Public License as
17    published by the Free Software Foundation; either version 2 of the
18    License, or (at your option) any later version.
19 
20    This program is distributed in the hope that it will be useful, but
21    WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    General Public License for more details.
24 
25    You should have received a copy of the GNU General Public License
26    along with this program; if not, write to the Free Software
27    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28    02111-1307, USA.
29 
30    The GNU General Public License is contained in the file COPYING.
31 */
32 /*
33    Stabs reader greatly improved by Nick Nethercote, Apr 02.
34    This module was also extensively hacked on by Jeremy Fitzhardinge
35    and Tom Hughes.
36 */
37 /* See comment at top of debuginfo.c for explanation of
38    the _svma / _avma / _image / _bias naming scheme.
39 */
40 /* Note this is not freestanding; needs pub_core_xarray.h and
41    priv_tytypes.h to be included before it. */
42 
43 #ifndef __PRIV_STORAGE_H
44 #define __PRIV_STORAGE_H
45 
46 #include "pub_core_basics.h"   // Addr
47 #include "pub_core_xarray.h"   // XArray
48 #include "priv_d3basics.h"     // GExpr et al.
49 #include "priv_image.h"        // DiCursor
50 
51 /* --------------------- SYMBOLS --------------------- */
52 
53 /* A structure to hold an ELF/MachO symbol (very crudely).  Usually
54    the symbol only has one name, which is stored in ::pri_name, and
55    ::sec_names is NULL.  If there are other names, these are stored in
56    ::sec_names, which is a NULL terminated vector holding the names.
57    The vector is allocated in VG_AR_DINFO, the names themselves live
58    in DebugInfo::strchunks.
59 
60    From the point of view of ELF, the primary vs secondary distinction
61    is artificial: they are all just names associated with the address,
62    none of which has higher precedence than any other.  However, from
63    the point of view of mapping an address to a name to display to the
64    user, we need to choose one "preferred" name, and so that might as
65    well be installed as the pri_name, whilst all others can live in
66    sec_names[].  This has the convenient side effect that, in the
67    common case where there is only one name for the address,
68    sec_names[] does not need to be allocated.
69 */
70 typedef
71    struct {
72       Addr    addr;    /* lowest address of entity */
73       Addr    tocptr;  /* ppc64-linux only: value that R2 should have */
74       HChar*  pri_name;  /* primary name, never NULL */
75       HChar** sec_names; /* NULL, or a NULL term'd array of other names */
76       // XXX: this could be shrunk (on 32-bit platforms) by using 30
77       // bits for the size and 1 bit each for isText and isIFunc.  If you
78       // do this, make sure that all assignments to the latter two use
79       // 0 or 1 (or True or False), and that a positive number larger
80       // than 1 is never used to represent True.
81       UInt    size;    /* size in bytes */
82       Bool    isText;
83       Bool    isIFunc; /* symbol is an indirect function? */
84    }
85    DiSym;
86 
87 /* --------------------- SRCLOCS --------------------- */
88 
89 /* Line count at which overflow happens, due to line numbers being
90    stored as shorts in `struct nlist' in a.out.h. */
91 #define LINENO_OVERFLOW (1 << (sizeof(short) * 8))
92 
93 #define LINENO_BITS     20
94 #define LOC_SIZE_BITS  (32 - LINENO_BITS)
95 #define MAX_LINENO     ((1 << LINENO_BITS) - 1)
96 
97 /* Unlikely to have any lines with instruction ranges > 4096 bytes */
98 #define MAX_LOC_SIZE   ((1 << LOC_SIZE_BITS) - 1)
99 
100 /* Number used to detect line number overflows; if one line is
101    60000-odd smaller than the previous, it was probably an overflow.
102  */
103 #define OVERFLOW_DIFFERENCE     (LINENO_OVERFLOW - 5000)
104 
105 /* A structure to hold addr-to-source info for a single line.  There
106   can be a lot of these, hence the dense packing. */
107 typedef
108    struct {
109       /* Word 1 */
110       Addr   addr;               /* lowest address for this line */
111       /* Word 2 */
112       UShort size:LOC_SIZE_BITS; /* # bytes; we catch overflows of this */
113       UInt   lineno:LINENO_BITS; /* source line number, or zero */
114       /* Word 3 */
115       const HChar* filename;     /* source filename */
116       /* Word 4 */
117       const HChar* dirname;      /* source directory name */
118    }
119    DiLoc;
120 
121 /* --------------------- CF INFO --------------------- */
122 
123 /* DiCfSI: a structure to summarise DWARF2/3 CFA info for the code
124    address range [base .. base+len-1].
125 
126    On x86 and amd64 ("IA"), if you know ({e,r}sp, {e,r}bp, {e,r}ip) at
127    some point and {e,r}ip is in the range [base .. base+len-1], it
128    tells you how to calculate ({e,r}sp, {e,r}bp) for the caller of the
129    current frame and also ra, the return address of the current frame.
130 
131    First off, calculate CFA, the Canonical Frame Address, thusly:
132 
133      cfa = case cfa_how of
134               CFIC_IA_SPREL -> {e,r}sp + cfa_off
135               CFIC_IA_BPREL -> {e,r}bp + cfa_off
136               CFIC_EXPR     -> expr whose index is in cfa_off
137 
138    Once that is done, the previous frame's {e,r}sp/{e,r}bp values and
139    this frame's {e,r}ra value can be calculated like this:
140 
141      old_{e,r}sp/{e,r}bp/ra
142          = case {e,r}sp/{e,r}bp/ra_how of
143               CFIR_UNKNOWN   -> we don't know, sorry
144               CFIR_SAME      -> same as it was before (sp/fp only)
145               CFIR_CFAREL    -> cfa + sp/bp/ra_off
146               CFIR_MEMCFAREL -> *( cfa + sp/bp/ra_off )
147               CFIR_EXPR      -> expr whose index is in sp/bp/ra_off
148 
149    On ARM it's pretty much the same, except we have more registers to
150    keep track of:
151 
152      cfa = case cfa_how of
153               CFIC_ARM_R13REL -> r13 + cfa_off
154               CFIC_ARM_R12REL -> r12 + cfa_off
155               CFIC_ARM_R11REL -> r11 + cfa_off
156               CFIC_ARM_R7REL  -> r7  + cfa_off
157               CFIR_EXPR       -> expr whose index is in cfa_off
158 
159      old_r14/r13/r12/r11/r7/ra
160          = case r14/r13/r12/r11/r7/ra_how of
161               CFIR_UNKNOWN   -> we don't know, sorry
162               CFIR_SAME      -> same as it was before (r14/r13/r12/r11/r7 only)
163               CFIR_CFAREL    -> cfa + r14/r13/r12/r11/r7/ra_off
164               CFIR_MEMCFAREL -> *( cfa + r14/r13/r12/r11/r7/ra_off )
165               CFIR_EXPR      -> expr whose index is in r14/r13/r12/r11/r7/ra_off
166 
167    On ARM64:
168 
169      cfa = case cfa_how of
170               CFIC_ARM64_SPREL  -> sp + cfa_off
171               CFIC_ARM64_X29REL -> x29 + cfa_off
172               CFIC_EXPR         -> expr whose index is in cfa_off
173 
174      old_sp/x30/x29/ra
175          = case sp/x30/x29/ra_how of
176               CFIR_UNKNOWN   -> we don't know, sorry
177               CFIR_SAME      -> same as it was before
178               CFIR_CFAREL    -> cfa + sp/x30/x29/ra_how
179               CFIR_MEMCFAREL -> *( cfa + sp/x30/x29/ra_how )
180               CFIR_EXPR      -> expr whose index is in sp/x30/x29/ra_off
181 
182    On s390x we have a similar logic as x86 or amd64. We need the stack pointer
183    (r15), the frame pointer r11 (like BP) and together with the instruction
184    address in the PSW we can calculate the previous values:
185      cfa = case cfa_how of
186               CFIC_IA_SPREL -> r15 + cfa_off
187               CFIC_IA_BPREL -> r11 + cfa_off
188               CFIC_EXPR     -> expr whose index is in cfa_off
189 
190      old_sp/fp/ra
191          = case sp/fp/ra_how of
192               CFIR_UNKNOWN   -> we don't know, sorry
193               CFIR_SAME      -> same as it was before (sp/fp only)
194               CFIR_CFAREL    -> cfa + sp/fp/ra_off
195               CFIR_MEMCFAREL -> *( cfa + sp/fp/ra_off )
196               CFIR_EXPR      -> expr whose index is in sp/fp/ra_off
197 */
198 
199 #define CFIC_IA_SPREL     ((UChar)1)
200 #define CFIC_IA_BPREL     ((UChar)2)
201 #define CFIC_ARM_R13REL   ((UChar)3)
202 #define CFIC_ARM_R12REL   ((UChar)4)
203 #define CFIC_ARM_R11REL   ((UChar)5)
204 #define CFIC_ARM_R7REL    ((UChar)6)
205 #define CFIC_ARM64_SPREL  ((UChar)7)
206 #define CFIC_ARM64_X29REL ((UChar)8)
207 #define CFIC_EXPR         ((UChar)9)  /* all targets */
208 
209 #define CFIR_UNKNOWN      ((UChar)64)
210 #define CFIR_SAME         ((UChar)65)
211 #define CFIR_CFAREL       ((UChar)66)
212 #define CFIR_MEMCFAREL    ((UChar)67)
213 #define CFIR_EXPR         ((UChar)68)
214 
215 #if defined(VGA_x86) || defined(VGA_amd64)
216 typedef
217    struct {
218       Addr  base;
219       UInt  len;
220       UChar cfa_how; /* a CFIC_IA value */
221       UChar ra_how;  /* a CFIR_ value */
222       UChar sp_how;  /* a CFIR_ value */
223       UChar bp_how;  /* a CFIR_ value */
224       Int   cfa_off;
225       Int   ra_off;
226       Int   sp_off;
227       Int   bp_off;
228    }
229    DiCfSI;
230 #elif defined(VGA_arm)
231 typedef
232    struct {
233       Addr  base;
234       UInt  len;
235       UChar cfa_how; /* a CFIC_ value */
236       UChar ra_how;  /* a CFIR_ value */
237       UChar r14_how; /* a CFIR_ value */
238       UChar r13_how; /* a CFIR_ value */
239       UChar r12_how; /* a CFIR_ value */
240       UChar r11_how; /* a CFIR_ value */
241       UChar r7_how;  /* a CFIR_ value */
242       Int   cfa_off;
243       Int   ra_off;
244       Int   r14_off;
245       Int   r13_off;
246       Int   r12_off;
247       Int   r11_off;
248       Int   r7_off;
249    }
250    DiCfSI;
251 #elif defined(VGA_arm64)
252 typedef
253    struct {
254       Addr  base;
255       UInt  len;
256       UChar cfa_how; /* a CFIC_ value */
257       UChar ra_how;  /* a CFIR_ value */
258       UChar sp_how;  /* a CFIR_ value */ /*dw31=SP*/
259       UChar x30_how; /* a CFIR_ value */ /*dw30=LR*/
260       UChar x29_how; /* a CFIR_ value */ /*dw29=FP*/
261       Int   cfa_off;
262       Int   ra_off;
263       Int   sp_off;
264       Int   x30_off;
265       Int   x29_off;
266    }
267    DiCfSI;
268 #elif defined(VGA_ppc32) || defined(VGA_ppc64)
269 /* Just have a struct with the common fields in, so that code that
270    processes the common fields doesn't have to be ifdef'd against
271    VGP_/VGA_ symbols.  These are not used in any way on ppc32/64-linux
272    at the moment. */
273 typedef
274    struct {
275       Addr  base;
276       UInt  len;
277       UChar cfa_how; /* a CFIC_ value */
278       UChar ra_how;  /* a CFIR_ value */
279       Int   cfa_off;
280       Int   ra_off;
281    }
282    DiCfSI;
283 #elif defined(VGA_s390x)
284 typedef
285    struct {
286       Addr  base;
287       UInt  len;
288       UChar cfa_how; /* a CFIC_ value */
289       UChar sp_how;  /* a CFIR_ value */
290       UChar ra_how;  /* a CFIR_ value */
291       UChar fp_how;  /* a CFIR_ value */
292       Int   cfa_off;
293       Int   sp_off;
294       Int   ra_off;
295       Int   fp_off;
296    }
297    DiCfSI;
298 #elif defined(VGA_mips32) || defined(VGA_mips64)
299 typedef
300    struct {
301       Addr  base;
302       UInt  len;
303       UChar cfa_how; /* a CFIC_ value */
304       UChar ra_how;  /* a CFIR_ value */
305       UChar sp_how;  /* a CFIR_ value */
306       UChar fp_how;  /* a CFIR_ value */
307       Int   cfa_off;
308       Int   ra_off;
309       Int   sp_off;
310       Int   fp_off;
311    }
312    DiCfSI;
313 #else
314 #  error "Unknown arch"
315 #endif
316 
317 
318 typedef
319    enum {
320       Cunop_Abs=0x231,
321       Cunop_Neg,
322       Cunop_Not
323    }
324    CfiUnop;
325 
326 typedef
327    enum {
328       Cbinop_Add=0x321,
329       Cbinop_Sub,
330       Cbinop_And,
331       Cbinop_Mul,
332       Cbinop_Shl,
333       Cbinop_Shr,
334       Cbinop_Eq,
335       Cbinop_Ge,
336       Cbinop_Gt,
337       Cbinop_Le,
338       Cbinop_Lt,
339       Cbinop_Ne
340    }
341    CfiBinop;
342 
343 typedef
344    enum {
345       Creg_IA_SP=0x213,
346       Creg_IA_BP,
347       Creg_IA_IP,
348       Creg_ARM_R13,
349       Creg_ARM_R12,
350       Creg_ARM_R15,
351       Creg_ARM_R14,
352       Creg_ARM64_X30,
353       Creg_S390_R14,
354       Creg_MIPS_RA
355    }
356    CfiReg;
357 
358 typedef
359    enum {
360       Cex_Undef=0x123,
361       Cex_Deref,
362       Cex_Const,
363       Cex_Unop,
364       Cex_Binop,
365       Cex_CfiReg,
366       Cex_DwReg
367    }
368    CfiExprTag;
369 
370 typedef
371    struct {
372       CfiExprTag tag;
373       union {
374          struct {
375          } Undef;
376          struct {
377             Int ixAddr;
378          } Deref;
379          struct {
380             UWord con;
381          } Const;
382          struct {
383             CfiUnop op;
384             Int ix;
385          } Unop;
386          struct {
387             CfiBinop op;
388             Int ixL;
389             Int ixR;
390          } Binop;
391          struct {
392             CfiReg reg;
393          } CfiReg;
394          struct {
395             Int reg;
396          } DwReg;
397       }
398       Cex;
399    }
400    CfiExpr;
401 
402 extern Int ML_(CfiExpr_Undef) ( XArray* dst );
403 extern Int ML_(CfiExpr_Deref) ( XArray* dst, Int ixAddr );
404 extern Int ML_(CfiExpr_Const) ( XArray* dst, UWord con );
405 extern Int ML_(CfiExpr_Unop)  ( XArray* dst, CfiUnop op, Int ix );
406 extern Int ML_(CfiExpr_Binop) ( XArray* dst, CfiBinop op, Int ixL, Int ixR );
407 extern Int ML_(CfiExpr_CfiReg)( XArray* dst, CfiReg reg );
408 extern Int ML_(CfiExpr_DwReg) ( XArray* dst, Int reg );
409 
410 extern void ML_(ppCfiExpr)( XArray* src, Int ix );
411 
412 /* ---------------- FPO INFO (Windows PE) -------------- */
413 
414 /* for apps using Wine: MSVC++ PDB FramePointerOmitted: somewhat like
415    a primitive CFI */
416 typedef
417    struct _FPO_DATA {  /* 16 bytes */
418       UInt   ulOffStart; /* offset of 1st byte of function code */
419       UInt   cbProcSize; /* # bytes in function */
420       UInt   cdwLocals;  /* # bytes/4 in locals */
421       UShort cdwParams;  /* # bytes/4 in params */
422       UChar  cbProlog;   /* # bytes in prolog */
423       UChar  cbRegs :3;  /* # regs saved */
424       UChar  fHasSEH:1;  /* Structured Exception Handling */
425       UChar  fUseBP :1;  /* EBP has been used */
426       UChar  reserved:1;
427       UChar  cbFrame:2;  /* frame type */
428    }
429    FPO_DATA;
430 
431 #define PDB_FRAME_FPO  0
432 #define PDB_FRAME_TRAP 1
433 #define PDB_FRAME_TSS  2
434 
435 /* --------------------- VARIABLES --------------------- */
436 
437 typedef
438    struct {
439       Addr    aMin;
440       Addr    aMax;
441       XArray* /* of DiVariable */ vars;
442    }
443    DiAddrRange;
444 
445 typedef
446    struct {
447       HChar* name;  /* in DebugInfo.strchunks */
448       UWord  typeR; /* a cuOff */
449       GExpr* gexpr; /* on DebugInfo.gexprs list */
450       GExpr* fbGX;  /* SHARED. */
451       HChar* fileName; /* where declared; may be NULL. in
452                           DebugInfo.strchunks */
453       Int    lineNo;   /* where declared; may be zero. */
454    }
455    DiVariable;
456 
457 Word
458 ML_(cmp_for_DiAddrRange_range) ( const void* keyV, const void* elemV );
459 
460 /* --------------------- DEBUGINFO --------------------- */
461 
462 /* This is the top-level data type.  It's a structure which contains
463    information pertaining to one mapped ELF object.  This type is
464    exported only abstractly - in pub_tool_debuginfo.h. */
465 
466 /* First though, here's an auxiliary data structure.  It is only ever
467    used as part of a struct _DebugInfo.  We use it to record
468    observations about mappings and permission changes to the
469    associated file, so as to decide when to read debug info.  It's
470    essentially an ultra-trivial finite state machine which, when it
471    reaches an accept state, signals that we should now read debug info
472    from the object into the associated struct _DebugInfo.  The accept
473    state is arrived at when have_rx_map and have_rw_map both become
474    true.  The initial state is one in which we have no observations,
475    so have_rx_map and have_rw_map are both false.
476 
477    This all started as a rather ad-hoc solution, but was further
478    expanded to handle weird object layouts, e.g. more than one rw
479    or rx mapping for one binary.
480 
481    The normal sequence of events is one of
482 
483    start  -->  r-x mapping  -->  rw- mapping  -->  accept
484    start  -->  rw- mapping  -->  r-x mapping  -->  accept
485 
486    that is, take the first r-x and rw- mapping we see, and we're done.
487 
488    On MacOSX 10.7, 32-bit, there appears to be a new variant:
489 
490    start  -->  r-- mapping  -->  rw- mapping
491           -->  upgrade r-- mapping to r-x mapping  -->  accept
492 
493    where the upgrade is done by a call to vm_protect.  Hence we
494    need to also track this possibility.
495 */
496 
497 struct _DebugInfoMapping
498 {
499    Addr  avma; /* these fields record the file offset, length */
500    SizeT size; /* and map address of each mapping             */
501    OffT  foff;
502    Bool  rx, rw, ro;  /* memory access flags for this mapping */
503 };
504 
505 struct _DebugInfoFSM
506 {
507    HChar*  filename;  /* in mallocville (VG_AR_DINFO)               */
508    XArray* maps;      /* XArray of _DebugInfoMapping structs        */
509    Bool  have_rx_map; /* did we see a r?x mapping yet for the file? */
510    Bool  have_rw_map; /* did we see a rw? mapping yet for the file? */
511    Bool  have_ro_map; /* did we see a r-- mapping yet for the file? */
512 };
513 
514 
515 /* To do with the string table in struct _DebugInfo (::strchunks) */
516 #define SEGINFO_STRCHUNKSIZE (64*1024)
517 
518 
519 /* We may encounter more than one .eh_frame section in an object --
520    unusual but apparently allowed by ELF.  See
521    http://sourceware.org/bugzilla/show_bug.cgi?id=12675
522 */
523 #define N_EHFRAME_SECTS 2
524 
525 
526 /* So, the main structure for holding debug info for one object. */
527 
528 struct _DebugInfo {
529 
530    /* Admin stuff */
531 
532    struct _DebugInfo* next;   /* list of DebugInfos */
533    Bool               mark;   /* marked for deletion? */
534 
535    /* An abstract handle, which can be used by entities outside of
536       m_debuginfo to (in an abstract datatype sense) refer to this
537       struct _DebugInfo.  A .handle of zero is invalid; valid handles
538       are 1 and above.  The same handle is never issued twice (in any
539       given run of Valgrind), so a handle becomes invalid when the
540       associated struct _DebugInfo is discarded, and remains invalid
541       forever thereafter.  The .handle field is set as soon as this
542       structure is allocated. */
543    ULong handle;
544 
545    /* Used for debugging only - indicate what stuff to dump whilst
546       reading stuff into the seginfo.  Are computed as early in the
547       lifetime of the DebugInfo as possible -- at the point when it is
548       created.  Use these when deciding what to spew out; do not use
549       the global VG_(clo_blah) flags. */
550 
551    Bool trace_symtab; /* symbols, our style */
552    Bool trace_cfi;    /* dwarf frame unwind, our style */
553    Bool ddump_syms;   /* mimic /usr/bin/readelf --syms */
554    Bool ddump_line;   /* mimic /usr/bin/readelf --debug-dump=line */
555    Bool ddump_frames; /* mimic /usr/bin/readelf --debug-dump=frames */
556 
557    /* The "decide when it is time to read debuginfo" state machine.
558       This structure must get filled in before we can start reading
559       anything from the ELF/MachO file.  This structure is filled in
560       by VG_(di_notify_mmap) and its immediate helpers. */
561    struct _DebugInfoFSM fsm;
562 
563    /* Once the ::fsm has reached an accept state -- typically, when
564       both a rw? and r?x mapping for .filename have been observed --
565       we can go on to read the symbol tables and debug info.
566       .have_dinfo changes from False to True when the debug info has
567       been completely read in and postprocessed (canonicalised) and is
568       now suitable for querying. */
569    /* If have_dinfo is False, then all fields below this point are
570       invalid and should not be consulted. */
571    Bool  have_dinfo; /* initially False */
572 
573    /* All the rest of the fields in this structure are filled in once
574       we have committed to reading the symbols and debug info (that
575       is, at the point where .have_dinfo is set to True). */
576 
577    /* The file's soname. */
578    HChar* soname;
579 
580    /* Description of some important mapped segments.  The presence or
581       absence of the mapping is denoted by the _present field, since
582       in some obscure circumstances (to do with data/sdata/bss) it is
583       possible for the mapping to be present but have zero size.
584       Certainly text_ is mandatory on all platforms; not sure about
585       the rest though.
586 
587       --------------------------------------------------------
588 
589       Comment_on_IMPORTANT_CFSI_REPRESENTATIONAL_INVARIANTS: we require that
590 
591       either (size of all rx maps == 0 && cfsi == NULL) (the degenerate case)
592 
593       or the normal case, which is the AND of the following:
594       (0) size of at least one rx mapping > 0
595       (1) no two DebugInfos with some rx mapping of size > 0
596           have overlapping rx mappings
597       (2) [cfsi_minavma,cfsi_maxavma] does not extend beyond
598           [avma,+size) of one rx mapping; that is, the former
599           is a subrange or equal to the latter.
600       (3) all DiCfSI in the cfsi array all have ranges that fall within
601           [avma,+size) of that rx mapping.
602       (4) all DiCfSI in the cfsi array are non-overlapping
603 
604       The cumulative effect of these restrictions is to ensure that
605       all the DiCfSI records in the entire system are non overlapping.
606       Hence any address falls into either exactly one DiCfSI record,
607       or none.  Hence it is safe to cache the results of searches for
608       DiCfSI records.  This is the whole point of these restrictions.
609       The caching of DiCfSI searches is done in VG_(use_CF_info).  The
610       cache is flushed after any change to debugInfo_list.  DiCfSI
611       searches are cached because they are central to stack unwinding
612       on amd64-linux.
613 
614       Where are these invariants imposed and checked?
615 
616       They are checked after a successful read of debuginfo into
617       a DebugInfo*, in check_CFSI_related_invariants.
618 
619       (1) is not really imposed anywhere.  We simply assume that the
620       kernel will not map the text segments from two different objects
621       into the same space.  Sounds reasonable.
622 
623       (2) follows from (4) and (3).  It is ensured by canonicaliseCFI.
624       (3) is ensured by ML_(addDiCfSI).
625       (4) is ensured by canonicaliseCFI.
626 
627       --------------------------------------------------------
628 
629       Comment_on_DEBUG_SVMA_and_DEBUG_BIAS_fields:
630 
631       The _debug_{svma,bias} fields were added as part of a fix to
632       #185816.  The problem encompassed in that bug report was that it
633       wasn't correct to use apply the bias values deduced for a
634       primary object to its associated debuginfo object, because the
635       debuginfo object (or the primary) could have been prelinked to a
636       different SVMA.  Hence debuginfo and primary objects need to
637       have their own biases.
638 
639       ------ JRS: (referring to r9329): ------
640       Let me see if I understand the workings correctly.  Initially
641       the _debug_ values are set to the same values as the "normal"
642       ones, as there's a bunch of bits of code like this (in
643       readelf.c)
644 
645          di->text_svma = svma;
646          ...
647          di->text_bias = rx_bias;
648          di->text_debug_svma = svma;
649          di->text_debug_bias = rx_bias;
650 
651       If a debuginfo object subsequently shows up then the
652       _debug_svma/bias are set for the debuginfo object.  Result is
653       that if there's no debuginfo object then the values are the same
654       as the primary-object values, and if there is a debuginfo object
655       then they will (or at least may) be different.
656 
657       Then when we need to actually bias something, we'll have to
658       decide whether to use the primary bias or the debuginfo bias.
659       And the strategy is to use the primary bias for ELF symbols but
660       the debuginfo bias for anything pulled out of Dwarf.
661 
662       ------ THH: ------
663       Correct - the debug_svma and bias values apply to any address
664       read from the debug data regardless of where that debug data is
665       stored and the other values are used for addresses from other
666       places (primarily the symbol table).
667 
668       ------ JRS: ------
669       Ok; so this was my only area of concern.  Are there any
670       corner-case scenarios where this wouldn't be right?  It sounds
671       like we're assuming the ELF symbols come from the primary object
672       and, if there is a debug object, then all the Dwarf comes from
673       there.  But what if (eg) both symbols and Dwarf come from the
674       debug object?  Is that even possible or allowable?
675 
676       ------ THH: ------
677       You may have a point...
678 
679       The current logic is to try and take any one set of data from
680       either the base object or the debug object. There are four sets
681       of data we consider:
682 
683          - Symbol Table
684          - Stabs
685          - DWARF1
686          - DWARF2
687 
688       If we see the primary section for a given set in the base object
689       then we ignore all sections relating to that set in the debug
690       object.
691 
692       Now in principle if we saw a secondary section (like debug_line
693       say) in the base object, but not the main section (debug_info in
694       this case) then we would take debug_info from the debug object
695       but would use the debug_line from the base object unless we saw
696       a replacement copy in the debug object. That's probably unlikely
697       however.
698 
699       A bigger issue might be, as you say, the symbol table as we will
700       pick that up from the debug object if it isn't in the base. The
701       dynamic symbol table will always have to be in the base object
702       though so we will have to be careful when processing symbols to
703       know which table we are reading in that case.
704 
705       What we probably need to do is tell read_elf_symtab which object
706       the symbols it is being asked to read came from.
707 
708       (A followup patch to deal with this was committed in r9469).
709    */
710    /* .text */
711    Bool     text_present;
712    Addr     text_avma;
713    Addr     text_svma;
714    SizeT    text_size;
715    PtrdiffT text_bias;
716    Addr     text_debug_svma;
717    PtrdiffT text_debug_bias;
718    /* .data */
719    Bool     data_present;
720    Addr     data_svma;
721    Addr     data_avma;
722    SizeT    data_size;
723    PtrdiffT data_bias;
724    Addr     data_debug_svma;
725    PtrdiffT data_debug_bias;
726    /* .sdata */
727    Bool     sdata_present;
728    Addr     sdata_svma;
729    Addr     sdata_avma;
730    SizeT    sdata_size;
731    PtrdiffT sdata_bias;
732    Addr     sdata_debug_svma;
733    PtrdiffT sdata_debug_bias;
734    /* .rodata */
735    Bool     rodata_present;
736    Addr     rodata_svma;
737    Addr     rodata_avma;
738    SizeT    rodata_size;
739    PtrdiffT rodata_bias;
740    Addr     rodata_debug_svma;
741    PtrdiffT rodata_debug_bias;
742    /* .bss */
743    Bool     bss_present;
744    Addr     bss_svma;
745    Addr     bss_avma;
746    SizeT    bss_size;
747    PtrdiffT bss_bias;
748    Addr     bss_debug_svma;
749    PtrdiffT bss_debug_bias;
750    /* .sbss */
751    Bool     sbss_present;
752    Addr     sbss_svma;
753    Addr     sbss_avma;
754    SizeT    sbss_size;
755    PtrdiffT sbss_bias;
756    Addr     sbss_debug_svma;
757    PtrdiffT sbss_debug_bias;
758    /* .plt */
759    Bool   plt_present;
760    Addr	  plt_avma;
761    SizeT  plt_size;
762    /* .got */
763    Bool   got_present;
764    Addr   got_avma;
765    SizeT  got_size;
766    /* .got.plt */
767    Bool   gotplt_present;
768    Addr   gotplt_avma;
769    SizeT  gotplt_size;
770    /* .opd -- needed on ppc64-linux for finding symbols */
771    Bool   opd_present;
772    Addr   opd_avma;
773    SizeT  opd_size;
774    /* .ehframe -- needed on amd64-linux for stack unwinding.  We might
775       see more than one, hence the arrays. */
776    UInt   n_ehframe;  /* 0 .. N_EHFRAME_SECTS */
777    Addr   ehframe_avma[N_EHFRAME_SECTS];
778    SizeT  ehframe_size[N_EHFRAME_SECTS];
779 
780    /* Sorted tables of stuff we snarfed from the file.  This is the
781       eventual product of reading the debug info.  All this stuff
782       lives in VG_AR_DINFO. */
783 
784    /* An expandable array of symbols. */
785    DiSym*  symtab;
786    UWord   symtab_used;
787    UWord   symtab_size;
788    /* An expandable array of locations. */
789    DiLoc*  loctab;
790    UWord   loctab_used;
791    UWord   loctab_size;
792    /* An expandable array of CFI summary info records.  Also includes
793       summary address bounds, showing the min and max address covered
794       by any of the records, as an aid to fast searching.  And, if the
795       records require any expression nodes, they are stored in
796       cfsi_exprs. */
797    DiCfSI* cfsi;
798    UWord   cfsi_used;
799    UWord   cfsi_size;
800    Addr    cfsi_minavma;
801    Addr    cfsi_maxavma;
802    XArray* cfsi_exprs; /* XArray of CfiExpr */
803 
804    /* Optimized code under Wine x86: MSVC++ PDB FramePointerOmitted
805       data.  Non-expandable array, hence .size == .used. */
806    FPO_DATA* fpo;
807    UWord     fpo_size;
808    Addr      fpo_minavma;
809    Addr      fpo_maxavma;
810    Addr      fpo_base_avma;
811 
812    /* Expandable arrays of characters -- the string table.  Pointers
813       into this are stable (the arrays are not reallocated). */
814    struct strchunk {
815       UInt   strtab_used;
816       struct strchunk* next;
817       HChar  strtab[SEGINFO_STRCHUNKSIZE];
818    } *strchunks;
819 
820    /* Variable scope information, as harvested from Dwarf3 files.
821 
822       In short it's an
823 
824          array of (array of PC address ranges and variables)
825 
826       The outer array indexes over scopes, with Entry 0 containing
827       information on variables which exist for any value of the program
828       counter (PC) -- that is, the outermost scope.  Entries 1, 2, 3,
829       etc contain information on increasinly deeply nested variables.
830 
831       Each inner array is an array of (an address range, and a set
832       of variables that are in scope over that address range).
833 
834       The address ranges may not overlap.
835 
836       Since Entry 0 in the outer array holds information on variables
837       that exist for any value of the PC (that is, global vars), it
838       follows that Entry 0's inner array can only have one address
839       range pair, one that covers the entire address space.
840    */
841    XArray* /* of OSet of DiAddrRange */varinfo;
842 
843    /* These are arrays of the relevant typed objects, held here
844       partially for the purposes of visiting each object exactly once
845       when we need to delete them. */
846 
847    /* An array of TyEnts.  These are needed to make sense of any types
848       in the .varinfo.  Also, when deleting this DebugInfo, we must
849       first traverse this array and throw away malloc'd stuff hanging
850       off it -- by calling ML_(TyEnt__make_EMPTY) on each entry. */
851    XArray* /* of TyEnt */ admin_tyents;
852 
853    /* An array of guarded DWARF3 expressions. */
854    XArray* admin_gexprs;
855 
856    /* Cached last rx mapping matched and returned by ML_(find_rx_mapping).
857       This helps performance a lot during ML_(addLineInfo) etc., which can
858       easily be invoked hundreds of thousands of times. */
859    struct _DebugInfoMapping* last_rx_map;
860 };
861 
862 /* --------------------- functions --------------------- */
863 
864 /* ------ Adding ------ */
865 
866 /* Add a symbol to si's symbol table.  The contents of 'sym' are
867    copied.  It is assumed (and checked) that 'sym' only contains one
868    name, so there is no auxiliary ::sec_names vector to duplicate.
869    IOW, the copy is a shallow copy, and there are assertions in place
870    to ensure that's OK. */
871 extern void ML_(addSym) ( struct _DebugInfo* di, DiSym* sym );
872 
873 /* Add a line-number record to a DebugInfo. */
874 extern
875 void ML_(addLineInfo) ( struct _DebugInfo* di,
876                         const HChar* filename,
877                         const HChar* dirname,  /* NULL is allowable */
878                         Addr this, Addr next, Int lineno, Int entry);
879 
880 /* Add a CFI summary record.  The supplied DiCfSI is copied. */
881 extern void ML_(addDiCfSI) ( struct _DebugInfo* di, DiCfSI* cfsi );
882 
883 /* Add a string to the string table of a DebugInfo.  If len==-1,
884    ML_(addStr) will itself measure the length of the string. */
885 extern HChar* ML_(addStr) ( struct _DebugInfo* di, const HChar* str, Int len );
886 
887 /* Add a string to the string table of a DebugInfo, by copying the
888    string from the given DiCursor.  Measures the length of the string
889    itself. */
890 extern HChar* ML_(addStrFromCursor)( struct _DebugInfo* di, DiCursor c );
891 
892 extern void ML_(addVar)( struct _DebugInfo* di,
893                          Int    level,
894                          Addr   aMin,
895                          Addr   aMax,
896                          HChar* name,
897                          UWord  typeR, /* a cuOff */
898                          GExpr* gexpr,
899                          GExpr* fbGX, /* SHARED. */
900                          HChar* fileName, /* where decl'd - may be NULL */
901                          Int    lineNo, /* where decl'd - may be zero */
902                          Bool   show );
903 
904 /* Canonicalise the tables held by 'di', in preparation for use.  Call
905    this after finishing adding entries to these tables. */
906 extern void ML_(canonicaliseTables) ( struct _DebugInfo* di );
907 
908 /* Canonicalise the call-frame-info table held by 'di', in preparation
909    for use. This is called by ML_(canonicaliseTables) but can also be
910    called on it's own to sort just this table. */
911 extern void ML_(canonicaliseCFI) ( struct _DebugInfo* di );
912 
913 /* ------ Searching ------ */
914 
915 /* Find a symbol-table index containing the specified pointer, or -1
916    if not found.  Binary search.  */
917 extern Word ML_(search_one_symtab) ( struct _DebugInfo* di, Addr ptr,
918                                      Bool match_anywhere_in_sym,
919                                      Bool findText );
920 
921 /* Find a location-table index containing the specified pointer, or -1
922    if not found.  Binary search.  */
923 extern Word ML_(search_one_loctab) ( struct _DebugInfo* di, Addr ptr );
924 
925 /* Find a CFI-table index containing the specified pointer, or -1 if
926    not found.  Binary search.  */
927 extern Word ML_(search_one_cfitab) ( struct _DebugInfo* di, Addr ptr );
928 
929 /* Find a FPO-table index containing the specified pointer, or -1
930    if not found.  Binary search.  */
931 extern Word ML_(search_one_fpotab) ( struct _DebugInfo* di, Addr ptr );
932 
933 /* Helper function for the most often needed searching for an rx
934    mapping containing the specified address range.  The range must
935    fall entirely within the mapping to be considered to be within it.
936    Asserts if lo > hi; caller must ensure this doesn't happen. */
937 extern struct _DebugInfoMapping* ML_(find_rx_mapping) ( struct _DebugInfo* di,
938                                                         Addr lo, Addr hi );
939 
940 /* ------ Misc ------ */
941 
942 /* Show a non-fatal debug info reading error.  Use vg_panic if
943    terminal.  'serious' errors are always shown, not 'serious' ones
944    are shown only at verbosity level 2 and above. */
945 extern
946 void ML_(symerr) ( struct _DebugInfo* di, Bool serious, const HChar* msg );
947 
948 /* Print a symbol. */
949 extern void ML_(ppSym) ( Int idx, DiSym* sym );
950 
951 /* Print a call-frame-info summary. */
952 extern void ML_(ppDiCfSI) ( XArray* /* of CfiExpr */ exprs, DiCfSI* si );
953 
954 
955 #define TRACE_SYMTAB_ENABLED (di->trace_symtab)
956 #define TRACE_SYMTAB(format, args...) \
957    if (TRACE_SYMTAB_ENABLED) { VG_(printf)(format, ## args); }
958 
959 
960 #endif /* ndef __PRIV_STORAGE_H */
961 
962 /*--------------------------------------------------------------------*/
963 /*--- end                                                          ---*/
964 /*--------------------------------------------------------------------*/
965