• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*--------------------------------------------------------------------*/
3 /*--- Format-neutral storage of and querying of info acquired from ---*/
4 /*--- ELF/XCOFF stabs/dwarf1/dwarf2 debug info.                    ---*/
5 /*---                                               priv_storage.h ---*/
6 /*--------------------------------------------------------------------*/
7 
8 /*
9    This file is part of Valgrind, a dynamic binary instrumentation
10    framework.
11 
12    Copyright (C) 2000-2012 Julian Seward
13       jseward@acm.org
14 
15    This program is free software; you can redistribute it and/or
16    modify it under the terms of the GNU General Public License as
17    published by the Free Software Foundation; either version 2 of the
18    License, or (at your option) any later version.
19 
20    This program is distributed in the hope that it will be useful, but
21    WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    General Public License for more details.
24 
25    You should have received a copy of the GNU General Public License
26    along with this program; if not, write to the Free Software
27    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28    02111-1307, USA.
29 
30    The GNU General Public License is contained in the file COPYING.
31 */
32 /*
33    Stabs reader greatly improved by Nick Nethercote, Apr 02.
34    This module was also extensively hacked on by Jeremy Fitzhardinge
35    and Tom Hughes.
36 */
37 /* See comment at top of debuginfo.c for explanation of
38    the _svma / _avma / _image / _bias naming scheme.
39 */
40 /* Note this is not freestanding; needs pub_core_xarray.h and
41    priv_tytypes.h to be included before it. */
42 
43 #ifndef __PRIV_STORAGE_H
44 #define __PRIV_STORAGE_H
45 
46 /* --------------------- SYMBOLS --------------------- */
47 
48 /* A structure to hold an ELF/MachO symbol (very crudely).  Usually
49    the symbol only has one name, which is stored in ::pri_name, and
50    ::sec_names is NULL.  If there are other names, these are stored in
51    ::sec_names, which is a NULL terminated vector holding the names.
52    The vector is allocated in VG_AR_DINFO, the names themselves live
53    in DebugInfo::strchunks.
54 
55    From the point of view of ELF, the primary vs secondary distinction
56    is artificial: they are all just names associated with the address,
57    none of which has higher precedence than any other.  However, from
58    the point of view of mapping an address to a name to display to the
59    user, we need to choose one "preferred" name, and so that might as
60    well be installed as the pri_name, whilst all others can live in
61    sec_names[].  This has the convenient side effect that, in the
62    common case where there is only one name for the address,
63    sec_names[] does not need to be allocated.
64 */
65 typedef
66    struct {
67       Addr    addr;    /* lowest address of entity */
68       Addr    tocptr;  /* ppc64-linux only: value that R2 should have */
69       UChar*  pri_name;  /* primary name, never NULL */
70       UChar** sec_names; /* NULL, or a NULL term'd array of other names */
71       // XXX: this could be shrunk (on 32-bit platforms) by using 30
72       // bits for the size and 1 bit each for isText and isIFunc.  If you
73       // do this, make sure that all assignments to the latter two use
74       // 0 or 1 (or True or False), and that a positive number larger
75       // than 1 is never used to represent True.
76       UInt    size;    /* size in bytes */
77       Bool    isText;
78       Bool    isIFunc; /* symbol is an indirect function? */
79    }
80    DiSym;
81 
82 /* --------------------- SRCLOCS --------------------- */
83 
84 /* Line count at which overflow happens, due to line numbers being
85    stored as shorts in `struct nlist' in a.out.h. */
86 #define LINENO_OVERFLOW (1 << (sizeof(short) * 8))
87 
88 #define LINENO_BITS     20
89 #define LOC_SIZE_BITS  (32 - LINENO_BITS)
90 #define MAX_LINENO     ((1 << LINENO_BITS) - 1)
91 
92 /* Unlikely to have any lines with instruction ranges > 4096 bytes */
93 #define MAX_LOC_SIZE   ((1 << LOC_SIZE_BITS) - 1)
94 
95 /* Number used to detect line number overflows; if one line is
96    60000-odd smaller than the previous, it was probably an overflow.
97  */
98 #define OVERFLOW_DIFFERENCE     (LINENO_OVERFLOW - 5000)
99 
100 /* A structure to hold addr-to-source info for a single line.  There
101   can be a lot of these, hence the dense packing. */
102 typedef
103    struct {
104       /* Word 1 */
105       Addr   addr;               /* lowest address for this line */
106       /* Word 2 */
107       UShort size:LOC_SIZE_BITS; /* # bytes; we catch overflows of this */
108       UInt   lineno:LINENO_BITS; /* source line number, or zero */
109       /* Word 3 */
110       UChar*  filename;          /* source filename */
111       /* Word 4 */
112       UChar*  dirname;           /* source directory name */
113    }
114    DiLoc;
115 
116 /* --------------------- CF INFO --------------------- */
117 
118 /* DiCfSI: a structure to summarise DWARF2/3 CFA info for the code
119    address range [base .. base+len-1].
120 
121    On x86 and amd64 ("IA"), if you know ({e,r}sp, {e,r}bp, {e,r}ip) at
122    some point and {e,r}ip is in the range [base .. base+len-1], it
123    tells you how to calculate ({e,r}sp, {e,r}bp) for the caller of the
124    current frame and also ra, the return address of the current frame.
125 
126    First off, calculate CFA, the Canonical Frame Address, thusly:
127 
128      cfa = case cfa_how of
129               CFIC_IA_SPREL -> {e,r}sp + cfa_off
130               CFIC_IA_BPREL -> {e,r}bp + cfa_off
131               CFIR_IA_EXPR  -> expr whose index is in cfa_off
132 
133    Once that is done, the previous frame's {e,r}sp/{e,r}bp values and
134    this frame's {e,r}ra value can be calculated like this:
135 
136      old_{e,r}sp/{e,r}bp/ra
137          = case {e,r}sp/{e,r}bp/ra_how of
138               CFIR_UNKNOWN   -> we don't know, sorry
139               CFIR_SAME      -> same as it was before (sp/fp only)
140               CFIR_CFAREL    -> cfa + sp/bp/ra_off
141               CFIR_MEMCFAREL -> *( cfa + sp/bp/ra_off )
142               CFIR_EXPR      -> expr whose index is in sp/bp/ra_off
143 
144    On ARM it's pretty much the same, except we have more registers to
145    keep track of:
146 
147      cfa = case cfa_how of
148               CFIC_R13REL -> r13 + cfa_off
149               CFIC_R12REL -> r12 + cfa_off
150               CFIC_R11REL -> r11 + cfa_off
151               CFIC_R7REL  -> r7  + cfa_off
152               CFIR_EXPR   -> expr whose index is in cfa_off
153 
154      old_r14/r13/r12/r11/r7/ra
155          = case r14/r13/r12/r11/r7/ra_how of
156               CFIR_UNKNOWN   -> we don't know, sorry
157               CFIR_SAME      -> same as it was before (r14/r13/r12/r11/r7 only)
158               CFIR_CFAREL    -> cfa + r14/r13/r12/r11/r7/ra_off
159               CFIR_MEMCFAREL -> *( cfa + r14/r13/r12/r11/r7/ra_off )
160               CFIR_EXPR      -> expr whose index is in r14/r13/r12/r11/r7/ra_off
161 
162    On s390x we have a similar logic as x86 or amd64. We need the stack pointer
163    (r15), the frame pointer r11 (like BP) and together with the instruction
164    address in the PSW we can calculate the previous values:
165      cfa = case cfa_how of
166               CFIC_IA_SPREL -> r15 + cfa_off
167               CFIC_IA_BPREL -> r11 + cfa_off
168               CFIR_IA_EXPR  -> expr whose index is in cfa_off
169 
170      old_sp/fp/ra
171          = case sp/fp/ra_how of
172               CFIR_UNKNOWN   -> we don't know, sorry
173               CFIR_SAME      -> same as it was before (sp/fp only)
174               CFIR_CFAREL    -> cfa + sp/fp/ra_off
175               CFIR_MEMCFAREL -> *( cfa + sp/fp/ra_off )
176               CFIR_EXPR      -> expr whose index is in sp/fp/ra_off
177 */
178 
179 #define CFIC_IA_SPREL     ((UChar)1)
180 #define CFIC_IA_BPREL     ((UChar)2)
181 #define CFIC_IA_EXPR      ((UChar)3)
182 #define CFIC_ARM_R13REL   ((UChar)4)
183 #define CFIC_ARM_R12REL   ((UChar)5)
184 #define CFIC_ARM_R11REL   ((UChar)6)
185 #define CFIC_ARM_R7REL    ((UChar)7)
186 #define CFIC_EXPR         ((UChar)8)  /* all targets */
187 
188 #define CFIR_UNKNOWN      ((UChar)64)
189 #define CFIR_SAME         ((UChar)65)
190 #define CFIR_CFAREL       ((UChar)66)
191 #define CFIR_MEMCFAREL    ((UChar)67)
192 #define CFIR_EXPR         ((UChar)68)
193 
194 #if defined(VGA_x86) || defined(VGA_amd64)
195 typedef
196    struct {
197       Addr  base;
198       UInt  len;
199       UChar cfa_how; /* a CFIC_IA value */
200       UChar ra_how;  /* a CFIR_ value */
201       UChar sp_how;  /* a CFIR_ value */
202       UChar bp_how;  /* a CFIR_ value */
203       Int   cfa_off;
204       Int   ra_off;
205       Int   sp_off;
206       Int   bp_off;
207    }
208    DiCfSI;
209 #elif defined(VGA_arm)
210 typedef
211    struct {
212       Addr  base;
213       UInt  len;
214       UChar cfa_how; /* a CFIC_ value */
215       UChar ra_how;  /* a CFIR_ value */
216       UChar r14_how; /* a CFIR_ value */
217       UChar r13_how; /* a CFIR_ value */
218       UChar r12_how; /* a CFIR_ value */
219       UChar r11_how; /* a CFIR_ value */
220       UChar r7_how;  /* a CFIR_ value */
221       Int   cfa_off;
222       Int   ra_off;
223       Int   r14_off;
224       Int   r13_off;
225       Int   r12_off;
226       Int   r11_off;
227       Int   r7_off;
228    }
229    DiCfSI;
230 #elif defined(VGA_ppc32) || defined(VGA_ppc64)
231 /* Just have a struct with the common fields in, so that code that
232    processes the common fields doesn't have to be ifdef'd against
233    VGP_/VGA_ symbols.  These are not used in any way on ppc32/64-linux
234    at the moment. */
235 typedef
236    struct {
237       Addr  base;
238       UInt  len;
239       UChar cfa_how; /* a CFIC_ value */
240       UChar ra_how;  /* a CFIR_ value */
241       Int   cfa_off;
242       Int   ra_off;
243    }
244    DiCfSI;
245 #elif defined(VGA_s390x)
246 typedef
247    struct {
248       Addr  base;
249       UInt  len;
250       UChar cfa_how; /* a CFIC_ value */
251       UChar sp_how;  /* a CFIR_ value */
252       UChar ra_how;  /* a CFIR_ value */
253       UChar fp_how;  /* a CFIR_ value */
254       Int   cfa_off;
255       Int   sp_off;
256       Int   ra_off;
257       Int   fp_off;
258    }
259    DiCfSI;
260 #elif defined(VGA_mips32)
261 typedef
262    struct {
263       Addr  base;
264       UInt  len;
265       UChar cfa_how; /* a CFIC_ value */
266       UChar ra_how;  /* a CFIR_ value */
267       UChar sp_how;  /* a CFIR_ value */
268       UChar fp_how;  /* a CFIR_ value */
269       Int   cfa_off;
270       Int   ra_off;
271       Int   sp_off;
272       Int   fp_off;
273    }
274    DiCfSI;
275 #else
276 #  error "Unknown arch"
277 #endif
278 
279 
280 typedef
281    enum {
282       Cop_Add=0x321,
283       Cop_Sub,
284       Cop_And,
285       Cop_Mul,
286       Cop_Shl,
287       Cop_Shr,
288       Cop_Eq,
289       Cop_Ge,
290       Cop_Gt,
291       Cop_Le,
292       Cop_Lt,
293       Cop_Ne
294    }
295    CfiOp;
296 
297 typedef
298    enum {
299       Creg_IA_SP=0x213,
300       Creg_IA_BP,
301       Creg_IA_IP,
302       Creg_ARM_R13,
303       Creg_ARM_R12,
304       Creg_ARM_R15,
305       Creg_ARM_R14,
306       Creg_S390_R14,
307       Creg_MIPS_RA
308    }
309    CfiReg;
310 
311 typedef
312    enum {
313       Cex_Undef=0x123,
314       Cex_Deref,
315       Cex_Const,
316       Cex_Binop,
317       Cex_CfiReg,
318       Cex_DwReg
319    }
320    CfiExprTag;
321 
322 typedef
323    struct {
324       CfiExprTag tag;
325       union {
326          struct {
327          } Undef;
328          struct {
329             Int ixAddr;
330          } Deref;
331          struct {
332             UWord con;
333          } Const;
334          struct {
335             CfiOp op;
336             Int ixL;
337             Int ixR;
338          } Binop;
339          struct {
340             CfiReg reg;
341          } CfiReg;
342          struct {
343             Int reg;
344          } DwReg;
345       }
346       Cex;
347    }
348    CfiExpr;
349 
350 extern Int ML_(CfiExpr_Undef) ( XArray* dst );
351 extern Int ML_(CfiExpr_Deref) ( XArray* dst, Int ixAddr );
352 extern Int ML_(CfiExpr_Const) ( XArray* dst, UWord con );
353 extern Int ML_(CfiExpr_Binop) ( XArray* dst, CfiOp op, Int ixL, Int ixR );
354 extern Int ML_(CfiExpr_CfiReg)( XArray* dst, CfiReg reg );
355 extern Int ML_(CfiExpr_DwReg) ( XArray* dst, Int reg );
356 
357 extern void ML_(ppCfiExpr)( XArray* src, Int ix );
358 
359 /* ---------------- FPO INFO (Windows PE) -------------- */
360 
361 /* for apps using Wine: MSVC++ PDB FramePointerOmitted: somewhat like
362    a primitive CFI */
363 typedef
364    struct _FPO_DATA {  /* 16 bytes */
365       UInt   ulOffStart; /* offset of 1st byte of function code */
366       UInt   cbProcSize; /* # bytes in function */
367       UInt   cdwLocals;  /* # bytes/4 in locals */
368       UShort cdwParams;  /* # bytes/4 in params */
369       UChar  cbProlog;   /* # bytes in prolog */
370       UChar  cbRegs :3;  /* # regs saved */
371       UChar  fHasSEH:1;  /* Structured Exception Handling */
372       UChar  fUseBP :1;  /* EBP has been used */
373       UChar  reserved:1;
374       UChar  cbFrame:2;  /* frame type */
375    }
376    FPO_DATA;
377 
378 #define PDB_FRAME_FPO  0
379 #define PDB_FRAME_TRAP 1
380 #define PDB_FRAME_TSS  2
381 
382 /* --------------------- VARIABLES --------------------- */
383 
384 typedef
385    struct {
386       Addr    aMin;
387       Addr    aMax;
388       XArray* /* of DiVariable */ vars;
389    }
390    DiAddrRange;
391 
392 typedef
393    struct {
394       UChar* name;  /* in DebugInfo.strchunks */
395       UWord  typeR; /* a cuOff */
396       GExpr* gexpr; /* on DebugInfo.gexprs list */
397       GExpr* fbGX;  /* SHARED. */
398       UChar* fileName; /* where declared; may be NULL. in
399                           DebugInfo.strchunks */
400       Int    lineNo;   /* where declared; may be zero. */
401    }
402    DiVariable;
403 
404 Word
405 ML_(cmp_for_DiAddrRange_range) ( const void* keyV, const void* elemV );
406 
407 /* --------------------- DEBUGINFO --------------------- */
408 
409 /* This is the top-level data type.  It's a structure which contains
410    information pertaining to one mapped ELF object.  This type is
411    exported only abstractly - in pub_tool_debuginfo.h. */
412 
413 /* First though, here's an auxiliary data structure.  It is only ever
414    used as part of a struct _DebugInfo.  We use it to record
415    observations about mappings and permission changes to the
416    associated file, so as to decide when to read debug info.  It's
417    essentially an ultra-trivial finite state machine which, when it
418    reaches an accept state, signals that we should now read debug info
419    from the object into the associated struct _DebugInfo.  The accept
420    state is arrived at when have_rx_map and have_rw_map both become
421    true.  The initial state is one in which we have no observations,
422    so have_rx_map and have_rw_map are both false.
423 
424    This all started as a rather ad-hoc solution, but was further
425    expanded to handle weird object layouts, e.g. more than one rw
426    or rx mapping for one binary.
427 
428    The normal sequence of events is one of
429 
430    start  -->  r-x mapping  -->  rw- mapping  -->  accept
431    start  -->  rw- mapping  -->  r-x mapping  -->  accept
432 
433    that is, take the first r-x and rw- mapping we see, and we're done.
434 
435    On MacOSX 10.7, 32-bit, there appears to be a new variant:
436 
437    start  -->  r-- mapping  -->  rw- mapping
438           -->  upgrade r-- mapping to r-x mapping  -->  accept
439 
440    where the upgrade is done by a call to vm_protect.  Hence we
441    need to also track this possibility.
442 */
443 
444 struct _DebugInfoMapping
445 {
446    Addr  avma; /* these fields record the file offset, length */
447    SizeT size; /* and map address of each mapping             */
448    OffT  foff;
449    Bool  rx, rw, ro;  /* memory access flags for this mapping */
450 };
451 
452 struct _DebugInfoFSM
453 {
454    UChar*  filename;  /* in mallocville (VG_AR_DINFO)               */
455    XArray* maps;      /* XArray of _DebugInfoMapping structs        */
456    Bool  have_rx_map; /* did we see a r?x mapping yet for the file? */
457    Bool  have_rw_map; /* did we see a rw? mapping yet for the file? */
458    Bool  have_ro_map; /* did we see a r-- mapping yet for the file? */
459 };
460 
461 
462 /* To do with the string table in struct _DebugInfo (::strchunks) */
463 #define SEGINFO_STRCHUNKSIZE (64*1024)
464 
465 
466 /* We may encounter more than one .eh_frame section in an object --
467    unusual but apparently allowed by ELF.  See
468    http://sourceware.org/bugzilla/show_bug.cgi?id=12675
469 */
470 #define N_EHFRAME_SECTS 2
471 
472 
473 /* So, the main structure for holding debug info for one object. */
474 
475 struct _DebugInfo {
476 
477    /* Admin stuff */
478 
479    struct _DebugInfo* next;   /* list of DebugInfos */
480    Bool               mark;   /* marked for deletion? */
481 
482    /* An abstract handle, which can be used by entities outside of
483       m_debuginfo to (in an abstract datatype sense) refer to this
484       struct _DebugInfo.  A .handle of zero is invalid; valid handles
485       are 1 and above.  The same handle is never issued twice (in any
486       given run of Valgrind), so a handle becomes invalid when the
487       associated struct _DebugInfo is discarded, and remains invalid
488       forever thereafter.  The .handle field is set as soon as this
489       structure is allocated. */
490    ULong handle;
491 
492    /* Used for debugging only - indicate what stuff to dump whilst
493       reading stuff into the seginfo.  Are computed as early in the
494       lifetime of the DebugInfo as possible -- at the point when it is
495       created.  Use these when deciding what to spew out; do not use
496       the global VG_(clo_blah) flags. */
497 
498    Bool trace_symtab; /* symbols, our style */
499    Bool trace_cfi;    /* dwarf frame unwind, our style */
500    Bool ddump_syms;   /* mimic /usr/bin/readelf --syms */
501    Bool ddump_line;   /* mimic /usr/bin/readelf --debug-dump=line */
502    Bool ddump_frames; /* mimic /usr/bin/readelf --debug-dump=frames */
503 
504    /* The "decide when it is time to read debuginfo" state machine.
505       This structure must get filled in before we can start reading
506       anything from the ELF/MachO file.  This structure is filled in
507       by VG_(di_notify_mmap) and its immediate helpers. */
508    struct _DebugInfoFSM fsm;
509 
510    /* Once the ::fsm has reached an accept state -- typically, when
511       both a rw? and r?x mapping for .filename have been observed --
512       we can go on to read the symbol tables and debug info.
513       .have_dinfo changes from False to True when the debug info has
514       been completely read in and postprocessed (canonicalised) and is
515       now suitable for querying. */
516    /* If have_dinfo is False, then all fields below this point are
517       invalid and should not be consulted. */
518    Bool  have_dinfo; /* initially False */
519 
520    /* All the rest of the fields in this structure are filled in once
521       we have committed to reading the symbols and debug info (that
522       is, at the point where .have_dinfo is set to True). */
523 
524    /* The file's soname. */
525    UChar* soname;
526 
527    /* Description of some important mapped segments.  The presence or
528       absence of the mapping is denoted by the _present field, since
529       in some obscure circumstances (to do with data/sdata/bss) it is
530       possible for the mapping to be present but have zero size.
531       Certainly text_ is mandatory on all platforms; not sure about
532       the rest though.
533 
534       --------------------------------------------------------
535 
536       Comment_on_IMPORTANT_CFSI_REPRESENTATIONAL_INVARIANTS: we require that
537 
538       either (size of all rx maps == 0 && cfsi == NULL) (the degenerate case)
539 
540       or the normal case, which is the AND of the following:
541       (0) size of at least one rx mapping > 0
542       (1) no two DebugInfos with some rx mapping of size > 0
543           have overlapping rx mappings
544       (2) [cfsi_minavma,cfsi_maxavma] does not extend beyond
545           [avma,+size) of one rx mapping; that is, the former
546           is a subrange or equal to the latter.
547       (3) all DiCfSI in the cfsi array all have ranges that fall within
548           [avma,+size) of that rx mapping.
549       (4) all DiCfSI in the cfsi array are non-overlapping
550 
551       The cumulative effect of these restrictions is to ensure that
552       all the DiCfSI records in the entire system are non overlapping.
553       Hence any address falls into either exactly one DiCfSI record,
554       or none.  Hence it is safe to cache the results of searches for
555       DiCfSI records.  This is the whole point of these restrictions.
556       The caching of DiCfSI searches is done in VG_(use_CF_info).  The
557       cache is flushed after any change to debugInfo_list.  DiCfSI
558       searches are cached because they are central to stack unwinding
559       on amd64-linux.
560 
561       Where are these invariants imposed and checked?
562 
563       They are checked after a successful read of debuginfo into
564       a DebugInfo*, in check_CFSI_related_invariants.
565 
566       (1) is not really imposed anywhere.  We simply assume that the
567       kernel will not map the text segments from two different objects
568       into the same space.  Sounds reasonable.
569 
570       (2) follows from (4) and (3).  It is ensured by canonicaliseCFI.
571       (3) is ensured by ML_(addDiCfSI).
572       (4) is ensured by canonicaliseCFI.
573 
574       --------------------------------------------------------
575 
576       Comment_on_DEBUG_SVMA_and_DEBUG_BIAS_fields:
577 
578       The _debug_{svma,bias} fields were added as part of a fix to
579       #185816.  The problem encompassed in that bug report was that it
580       wasn't correct to use apply the bias values deduced for a
581       primary object to its associated debuginfo object, because the
582       debuginfo object (or the primary) could have been prelinked to a
583       different SVMA.  Hence debuginfo and primary objects need to
584       have their own biases.
585 
586       ------ JRS: (referring to r9329): ------
587       Let me see if I understand the workings correctly.  Initially
588       the _debug_ values are set to the same values as the "normal"
589       ones, as there's a bunch of bits of code like this (in
590       readelf.c)
591 
592          di->text_svma = svma;
593          ...
594          di->text_bias = rx_bias;
595          di->text_debug_svma = svma;
596          di->text_debug_bias = rx_bias;
597 
598       If a debuginfo object subsequently shows up then the
599       _debug_svma/bias are set for the debuginfo object.  Result is
600       that if there's no debuginfo object then the values are the same
601       as the primary-object values, and if there is a debuginfo object
602       then they will (or at least may) be different.
603 
604       Then when we need to actually bias something, we'll have to
605       decide whether to use the primary bias or the debuginfo bias.
606       And the strategy is to use the primary bias for ELF symbols but
607       the debuginfo bias for anything pulled out of Dwarf.
608 
609       ------ THH: ------
610       Correct - the debug_svma and bias values apply to any address
611       read from the debug data regardless of where that debug data is
612       stored and the other values are used for addresses from other
613       places (primarily the symbol table).
614 
615       ------ JRS: ------
616       Ok; so this was my only area of concern.  Are there any
617       corner-case scenarios where this wouldn't be right?  It sounds
618       like we're assuming the ELF symbols come from the primary object
619       and, if there is a debug object, then all the Dwarf comes from
620       there.  But what if (eg) both symbols and Dwarf come from the
621       debug object?  Is that even possible or allowable?
622 
623       ------ THH: ------
624       You may have a point...
625 
626       The current logic is to try and take any one set of data from
627       either the base object or the debug object. There are four sets
628       of data we consider:
629 
630          - Symbol Table
631          - Stabs
632          - DWARF1
633          - DWARF2
634 
635       If we see the primary section for a given set in the base object
636       then we ignore all sections relating to that set in the debug
637       object.
638 
639       Now in principle if we saw a secondary section (like debug_line
640       say) in the base object, but not the main section (debug_info in
641       this case) then we would take debug_info from the debug object
642       but would use the debug_line from the base object unless we saw
643       a replacement copy in the debug object. That's probably unlikely
644       however.
645 
646       A bigger issue might be, as you say, the symbol table as we will
647       pick that up from the debug object if it isn't in the base. The
648       dynamic symbol table will always have to be in the base object
649       though so we will have to be careful when processing symbols to
650       know which table we are reading in that case.
651 
652       What we probably need to do is tell read_elf_symtab which object
653       the symbols it is being asked to read came from.
654 
655       (A followup patch to deal with this was committed in r9469).
656    */
657    /* .text */
658    Bool     text_present;
659    Addr     text_avma;
660    Addr     text_svma;
661    SizeT    text_size;
662    PtrdiffT text_bias;
663    Addr     text_debug_svma;
664    PtrdiffT text_debug_bias;
665    /* .data */
666    Bool     data_present;
667    Addr     data_svma;
668    Addr     data_avma;
669    SizeT    data_size;
670    PtrdiffT data_bias;
671    Addr     data_debug_svma;
672    PtrdiffT data_debug_bias;
673    /* .sdata */
674    Bool     sdata_present;
675    Addr     sdata_svma;
676    Addr     sdata_avma;
677    SizeT    sdata_size;
678    PtrdiffT sdata_bias;
679    Addr     sdata_debug_svma;
680    PtrdiffT sdata_debug_bias;
681    /* .rodata */
682    Bool     rodata_present;
683    Addr     rodata_svma;
684    Addr     rodata_avma;
685    SizeT    rodata_size;
686    PtrdiffT rodata_bias;
687    Addr     rodata_debug_svma;
688    PtrdiffT rodata_debug_bias;
689    /* .bss */
690    Bool     bss_present;
691    Addr     bss_svma;
692    Addr     bss_avma;
693    SizeT    bss_size;
694    PtrdiffT bss_bias;
695    Addr     bss_debug_svma;
696    PtrdiffT bss_debug_bias;
697    /* .sbss */
698    Bool     sbss_present;
699    Addr     sbss_svma;
700    Addr     sbss_avma;
701    SizeT    sbss_size;
702    PtrdiffT sbss_bias;
703    Addr     sbss_debug_svma;
704    PtrdiffT sbss_debug_bias;
705    /* .plt */
706    Bool   plt_present;
707    Addr	  plt_avma;
708    SizeT  plt_size;
709    /* .got */
710    Bool   got_present;
711    Addr   got_avma;
712    SizeT  got_size;
713    /* .got.plt */
714    Bool   gotplt_present;
715    Addr   gotplt_avma;
716    SizeT  gotplt_size;
717    /* .opd -- needed on ppc64-linux for finding symbols */
718    Bool   opd_present;
719    Addr   opd_avma;
720    SizeT  opd_size;
721    /* .ehframe -- needed on amd64-linux for stack unwinding.  We might
722       see more than one, hence the arrays. */
723    UInt   n_ehframe;  /* 0 .. N_EHFRAME_SECTS */
724    Addr   ehframe_avma[N_EHFRAME_SECTS];
725    SizeT  ehframe_size[N_EHFRAME_SECTS];
726 
727    /* Sorted tables of stuff we snarfed from the file.  This is the
728       eventual product of reading the debug info.  All this stuff
729       lives in VG_AR_DINFO. */
730 
731    /* An expandable array of symbols. */
732    DiSym*  symtab;
733    UWord   symtab_used;
734    UWord   symtab_size;
735    /* An expandable array of locations. */
736    DiLoc*  loctab;
737    UWord   loctab_used;
738    UWord   loctab_size;
739    /* An expandable array of CFI summary info records.  Also includes
740       summary address bounds, showing the min and max address covered
741       by any of the records, as an aid to fast searching.  And, if the
742       records require any expression nodes, they are stored in
743       cfsi_exprs. */
744    DiCfSI* cfsi;
745    UWord   cfsi_used;
746    UWord   cfsi_size;
747    Addr    cfsi_minavma;
748    Addr    cfsi_maxavma;
749    XArray* cfsi_exprs; /* XArray of CfiExpr */
750 
751    /* Optimized code under Wine x86: MSVC++ PDB FramePointerOmitted
752       data.  Non-expandable array, hence .size == .used. */
753    FPO_DATA* fpo;
754    UWord     fpo_size;
755    Addr      fpo_minavma;
756    Addr      fpo_maxavma;
757    Addr      fpo_base_avma;
758 
759    /* Expandable arrays of characters -- the string table.  Pointers
760       into this are stable (the arrays are not reallocated). */
761    struct strchunk {
762       UInt   strtab_used;
763       struct strchunk* next;
764       UChar  strtab[SEGINFO_STRCHUNKSIZE];
765    } *strchunks;
766 
767    /* Variable scope information, as harvested from Dwarf3 files.
768 
769       In short it's an
770 
771          array of (array of PC address ranges and variables)
772 
773       The outer array indexes over scopes, with Entry 0 containing
774       information on variables which exist for any value of the program
775       counter (PC) -- that is, the outermost scope.  Entries 1, 2, 3,
776       etc contain information on increasinly deeply nested variables.
777 
778       Each inner array is an array of (an address range, and a set
779       of variables that are in scope over that address range).
780 
781       The address ranges may not overlap.
782 
783       Since Entry 0 in the outer array holds information on variables
784       that exist for any value of the PC (that is, global vars), it
785       follows that Entry 0's inner array can only have one address
786       range pair, one that covers the entire address space.
787    */
788    XArray* /* of OSet of DiAddrRange */varinfo;
789 
790    /* These are arrays of the relevant typed objects, held here
791       partially for the purposes of visiting each object exactly once
792       when we need to delete them. */
793 
794    /* An array of TyEnts.  These are needed to make sense of any types
795       in the .varinfo.  Also, when deleting this DebugInfo, we must
796       first traverse this array and throw away malloc'd stuff hanging
797       off it -- by calling ML_(TyEnt__make_EMPTY) on each entry. */
798    XArray* /* of TyEnt */ admin_tyents;
799 
800    /* An array of guarded DWARF3 expressions. */
801    XArray* admin_gexprs;
802 
803    /* Cached last rx mapping matched and returned by ML_(find_rx_mapping).
804       This helps performance a lot during ML_(addLineInfo) etc., which can
805       easily be invoked hundreds of thousands of times. */
806    struct _DebugInfoMapping* last_rx_map;
807 };
808 
809 /* --------------------- functions --------------------- */
810 
811 /* ------ Adding ------ */
812 
813 /* Add a symbol to si's symbol table.  The contents of 'sym' are
814    copied.  It is assumed (and checked) that 'sym' only contains one
815    name, so there is no auxiliary ::sec_names vector to duplicate.
816    IOW, the copy is a shallow copy, and there are assertions in place
817    to ensure that's OK. */
818 extern void ML_(addSym) ( struct _DebugInfo* di, DiSym* sym );
819 
820 /* Add a line-number record to a DebugInfo. */
821 extern
822 void ML_(addLineInfo) ( struct _DebugInfo* di,
823                         UChar*   filename,
824                         UChar*   dirname,  /* NULL is allowable */
825                         Addr this, Addr next, Int lineno, Int entry);
826 
827 /* Add a CFI summary record.  The supplied DiCfSI is copied. */
828 extern void ML_(addDiCfSI) ( struct _DebugInfo* di, DiCfSI* cfsi );
829 
830 /* Add a string to the string table of a DebugInfo.  If len==-1,
831    ML_(addStr) will itself measure the length of the string. */
832 extern UChar* ML_(addStr) ( struct _DebugInfo* di, UChar* str, Int len );
833 
834 extern void ML_(addVar)( struct _DebugInfo* di,
835                          Int    level,
836                          Addr   aMin,
837                          Addr   aMax,
838                          UChar* name,
839                          UWord  typeR, /* a cuOff */
840                          GExpr* gexpr,
841                          GExpr* fbGX, /* SHARED. */
842                          UChar* fileName, /* where decl'd - may be NULL */
843                          Int    lineNo, /* where decl'd - may be zero */
844                          Bool   show );
845 
846 /* Canonicalise the tables held by 'di', in preparation for use.  Call
847    this after finishing adding entries to these tables. */
848 extern void ML_(canonicaliseTables) ( struct _DebugInfo* di );
849 
850 /* Canonicalise the call-frame-info table held by 'di', in preparation
851    for use. This is called by ML_(canonicaliseTables) but can also be
852    called on it's own to sort just this table. */
853 extern void ML_(canonicaliseCFI) ( struct _DebugInfo* di );
854 
855 /* ------ Searching ------ */
856 
857 /* Find a symbol-table index containing the specified pointer, or -1
858    if not found.  Binary search.  */
859 extern Word ML_(search_one_symtab) ( struct _DebugInfo* di, Addr ptr,
860                                      Bool match_anywhere_in_sym,
861                                      Bool findText );
862 
863 /* Find a location-table index containing the specified pointer, or -1
864    if not found.  Binary search.  */
865 extern Word ML_(search_one_loctab) ( struct _DebugInfo* di, Addr ptr );
866 
867 /* Find a CFI-table index containing the specified pointer, or -1 if
868    not found.  Binary search.  */
869 extern Word ML_(search_one_cfitab) ( struct _DebugInfo* di, Addr ptr );
870 
871 /* Find a FPO-table index containing the specified pointer, or -1
872    if not found.  Binary search.  */
873 extern Word ML_(search_one_fpotab) ( struct _DebugInfo* di, Addr ptr );
874 
875 /* Helper function for the most often needed searching for an rx
876    mapping containing the specified address range.  The range must
877    fall entirely within the mapping to be considered to be within it.
878    Asserts if lo > hi; caller must ensure this doesn't happen. */
879 extern struct _DebugInfoMapping* ML_(find_rx_mapping) ( struct _DebugInfo* di,
880                                                         Addr lo, Addr hi );
881 
882 /* ------ Misc ------ */
883 
884 /* Show a non-fatal debug info reading error.  Use vg_panic if
885    terminal.  'serious' errors are always shown, not 'serious' ones
886    are shown only at verbosity level 2 and above. */
887 extern
888 void ML_(symerr) ( struct _DebugInfo* di, Bool serious, HChar* msg );
889 
890 /* Print a symbol. */
891 extern void ML_(ppSym) ( Int idx, DiSym* sym );
892 
893 /* Print a call-frame-info summary. */
894 extern void ML_(ppDiCfSI) ( XArray* /* of CfiExpr */ exprs, DiCfSI* si );
895 
896 
897 #define TRACE_SYMTAB(format, args...) \
898    if (di->trace_symtab) { VG_(printf)(format, ## args); }
899 
900 
901 #endif /* ndef __PRIV_STORAGE_H */
902 
903 /*--------------------------------------------------------------------*/
904 /*--- end                                                          ---*/
905 /*--------------------------------------------------------------------*/
906