• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*--------------------------------------------------------------------*/
3 /*--- Format-neutral storage of and querying of info acquired from ---*/
4 /*--- ELF/XCOFF stabs/dwarf1/dwarf2 debug info.                    ---*/
5 /*---                                               priv_storage.h ---*/
6 /*--------------------------------------------------------------------*/
7 
8 /*
9    This file is part of Valgrind, a dynamic binary instrumentation
10    framework.
11 
12    Copyright (C) 2000-2011 Julian Seward
13       jseward@acm.org
14 
15    This program is free software; you can redistribute it and/or
16    modify it under the terms of the GNU General Public License as
17    published by the Free Software Foundation; either version 2 of the
18    License, or (at your option) any later version.
19 
20    This program is distributed in the hope that it will be useful, but
21    WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    General Public License for more details.
24 
25    You should have received a copy of the GNU General Public License
26    along with this program; if not, write to the Free Software
27    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28    02111-1307, USA.
29 
30    The GNU General Public License is contained in the file COPYING.
31 */
32 /*
33    Stabs reader greatly improved by Nick Nethercote, Apr 02.
34    This module was also extensively hacked on by Jeremy Fitzhardinge
35    and Tom Hughes.
36 */
37 /* See comment at top of debuginfo.c for explanation of
38    the _svma / _avma / _image / _bias naming scheme.
39 */
40 /* Note this is not freestanding; needs pub_core_xarray.h and
41    priv_tytypes.h to be included before it. */
42 
43 #ifndef __PRIV_STORAGE_H
44 #define __PRIV_STORAGE_H
45 
46 /* --------------------- SYMBOLS --------------------- */
47 
48 /* A structure to hold an ELF/MachO symbol (very crudely).  Usually
49    the symbol only has one name, which is stored in ::pri_name, and
50    ::sec_names is NULL.  If there are other names, these are stored in
51    ::sec_names, which is a NULL terminated vector holding the names.
52    The vector is allocated in VG_AR_DINFO, the names themselves live
53    in DebugInfo::strchunks.
54 
55    From the point of view of ELF, the primary vs secondary distinction
56    is artificial: they are all just names associated with the address,
57    none of which has higher precedence than any other.  However, from
58    the point of view of mapping an address to a name to display to the
59    user, we need to choose one "preferred" name, and so that might as
60    well be installed as the pri_name, whilst all others can live in
61    sec_names[].  This has the convenient side effect that, in the
62    common case where there is only one name for the address,
63    sec_names[] does not need to be allocated.
64 */
65 typedef
66    struct {
67       Addr    addr;    /* lowest address of entity */
68       Addr    tocptr;  /* ppc64-linux only: value that R2 should have */
69       UChar*  pri_name;  /* primary name, never NULL */
70       UChar** sec_names; /* NULL, or a NULL term'd array of other names */
71       // XXX: this could be shrunk (on 32-bit platforms) by using 30
72       // bits for the size and 1 bit each for isText and isIFunc.  If you
73       // do this, make sure that all assignments to the latter two use
74       // 0 or 1 (or True or False), and that a positive number larger
75       // than 1 is never used to represent True.
76       UInt    size;    /* size in bytes */
77       Bool    isText;
78       Bool    isIFunc; /* symbol is an indirect function? */
79    }
80    DiSym;
81 
82 /* --------------------- SRCLOCS --------------------- */
83 
84 /* Line count at which overflow happens, due to line numbers being
85    stored as shorts in `struct nlist' in a.out.h. */
86 #define LINENO_OVERFLOW (1 << (sizeof(short) * 8))
87 
88 #define LINENO_BITS     20
89 #define LOC_SIZE_BITS  (32 - LINENO_BITS)
90 #define MAX_LINENO     ((1 << LINENO_BITS) - 1)
91 
92 /* Unlikely to have any lines with instruction ranges > 4096 bytes */
93 #define MAX_LOC_SIZE   ((1 << LOC_SIZE_BITS) - 1)
94 
95 /* Number used to detect line number overflows; if one line is
96    60000-odd smaller than the previous, it was probably an overflow.
97  */
98 #define OVERFLOW_DIFFERENCE     (LINENO_OVERFLOW - 5000)
99 
100 /* A structure to hold addr-to-source info for a single line.  There
101   can be a lot of these, hence the dense packing. */
102 typedef
103    struct {
104       /* Word 1 */
105       Addr   addr;               /* lowest address for this line */
106       /* Word 2 */
107       UShort size:LOC_SIZE_BITS; /* # bytes; we catch overflows of this */
108       UInt   lineno:LINENO_BITS; /* source line number, or zero */
109       /* Word 3 */
110       UChar*  filename;          /* source filename */
111       /* Word 4 */
112       UChar*  dirname;           /* source directory name */
113    }
114    DiLoc;
115 
116 /* --------------------- CF INFO --------------------- */
117 
118 /* DiCfSI: a structure to summarise DWARF2/3 CFA info for the code
119    address range [base .. base+len-1].
120 
121    On x86 and amd64 ("IA"), if you know ({e,r}sp, {e,r}bp, {e,r}ip) at
122    some point and {e,r}ip is in the range [base .. base+len-1], it
123    tells you how to calculate ({e,r}sp, {e,r}bp) for the caller of the
124    current frame and also ra, the return address of the current frame.
125 
126    First off, calculate CFA, the Canonical Frame Address, thusly:
127 
128      cfa = case cfa_how of
129               CFIC_IA_SPREL -> {e,r}sp + cfa_off
130               CFIC_IA_BPREL -> {e,r}bp + cfa_off
131               CFIR_IA_EXPR  -> expr whose index is in cfa_off
132 
133    Once that is done, the previous frame's {e,r}sp/{e,r}bp values and
134    this frame's {e,r}ra value can be calculated like this:
135 
136      old_{e,r}sp/{e,r}bp/ra
137          = case {e,r}sp/{e,r}bp/ra_how of
138               CFIR_UNKNOWN   -> we don't know, sorry
139               CFIR_SAME      -> same as it was before (sp/fp only)
140               CFIR_CFAREL    -> cfa + sp/bp/ra_off
141               CFIR_MEMCFAREL -> *( cfa + sp/bp/ra_off )
142               CFIR_EXPR      -> expr whose index is in sp/bp/ra_off
143 
144    On ARM it's pretty much the same, except we have more registers to
145    keep track of:
146 
147      cfa = case cfa_how of
148               CFIC_R13REL -> r13 + cfa_off
149               CFIC_R12REL -> r12 + cfa_off
150               CFIC_R11REL -> r11 + cfa_off
151               CFIC_R7REL  -> r7  + cfa_off
152               CFIR_EXPR   -> expr whose index is in cfa_off
153 
154      old_r14/r13/r12/r11/r7/ra
155          = case r14/r13/r12/r11/r7/ra_how of
156               CFIR_UNKNOWN   -> we don't know, sorry
157               CFIR_SAME      -> same as it was before (r14/r13/r12/r11/r7 only)
158               CFIR_CFAREL    -> cfa + r14/r13/r12/r11/r7/ra_off
159               CFIR_MEMCFAREL -> *( cfa + r14/r13/r12/r11/r7/ra_off )
160               CFIR_EXPR      -> expr whose index is in r14/r13/r12/r11/r7/ra_off
161 
162    On s390x we have a similar logic as x86 or amd64. We need the stack pointer
163    (r15), the frame pointer r11 (like BP) and together with the instruction
164    address in the PSW we can calculate the previous values:
165      cfa = case cfa_how of
166               CFIC_IA_SPREL -> r15 + cfa_off
167               CFIC_IA_BPREL -> r11 + cfa_off
168               CFIR_IA_EXPR  -> expr whose index is in cfa_off
169 
170      old_sp/fp/ra
171          = case sp/fp/ra_how of
172               CFIR_UNKNOWN   -> we don't know, sorry
173               CFIR_SAME      -> same as it was before (sp/fp only)
174               CFIR_CFAREL    -> cfa + sp/fp/ra_off
175               CFIR_MEMCFAREL -> *( cfa + sp/fp/ra_off )
176               CFIR_EXPR      -> expr whose index is in sp/fp/ra_off
177 */
178 
179 #define CFIC_IA_SPREL     ((UChar)1)
180 #define CFIC_IA_BPREL     ((UChar)2)
181 #define CFIC_IA_EXPR      ((UChar)3)
182 #define CFIC_ARM_R13REL   ((UChar)4)
183 #define CFIC_ARM_R12REL   ((UChar)5)
184 #define CFIC_ARM_R11REL   ((UChar)6)
185 #define CFIC_ARM_R7REL    ((UChar)7)
186 #define CFIC_EXPR         ((UChar)8)  /* all targets */
187 
188 #define CFIR_UNKNOWN      ((UChar)64)
189 #define CFIR_SAME         ((UChar)65)
190 #define CFIR_CFAREL       ((UChar)66)
191 #define CFIR_MEMCFAREL    ((UChar)67)
192 #define CFIR_EXPR         ((UChar)68)
193 
194 #if defined(VGA_x86) || defined(VGA_amd64)
195 typedef
196    struct {
197       Addr  base;
198       UInt  len;
199       UChar cfa_how; /* a CFIC_IA value */
200       UChar ra_how;  /* a CFIR_ value */
201       UChar sp_how;  /* a CFIR_ value */
202       UChar bp_how;  /* a CFIR_ value */
203       Int   cfa_off;
204       Int   ra_off;
205       Int   sp_off;
206       Int   bp_off;
207    }
208    DiCfSI;
209 #elif defined(VGA_arm)
210 typedef
211    struct {
212       Addr  base;
213       UInt  len;
214       UChar cfa_how; /* a CFIC_ value */
215       UChar ra_how;  /* a CFIR_ value */
216       UChar r14_how; /* a CFIR_ value */
217       UChar r13_how; /* a CFIR_ value */
218       UChar r12_how; /* a CFIR_ value */
219       UChar r11_how; /* a CFIR_ value */
220       UChar r7_how;  /* a CFIR_ value */
221       Int   cfa_off;
222       Int   ra_off;
223       Int   r14_off;
224       Int   r13_off;
225       Int   r12_off;
226       Int   r11_off;
227       Int   r7_off;
228    }
229    DiCfSI;
230 #elif defined(VGA_ppc32) || defined(VGA_ppc64)
231 /* Just have a struct with the common fields in, so that code that
232    processes the common fields doesn't have to be ifdef'd against
233    VGP_/VGA_ symbols.  These are not used in any way on ppc32/64-linux
234    at the moment. */
235 typedef
236    struct {
237       Addr  base;
238       UInt  len;
239       UChar cfa_how; /* a CFIC_ value */
240       UChar ra_how;  /* a CFIR_ value */
241       Int   cfa_off;
242       Int   ra_off;
243    }
244    DiCfSI;
245 #elif defined(VGA_s390x)
246 typedef
247    struct {
248       Addr  base;
249       UInt  len;
250       UChar cfa_how; /* a CFIC_ value */
251       UChar sp_how;  /* a CFIR_ value */
252       UChar ra_how;  /* a CFIR_ value */
253       UChar fp_how;  /* a CFIR_ value */
254       Int   cfa_off;
255       Int   sp_off;
256       Int   ra_off;
257       Int   fp_off;
258    }
259    DiCfSI;
260 #else
261 #  error "Unknown arch"
262 #endif
263 
264 
265 typedef
266    enum {
267       Cop_Add=0x321,
268       Cop_Sub,
269       Cop_And,
270       Cop_Mul,
271       Cop_Shl,
272       Cop_Shr,
273       Cop_Eq,
274       Cop_Ge,
275       Cop_Gt,
276       Cop_Le,
277       Cop_Lt,
278       Cop_Ne
279    }
280    CfiOp;
281 
282 typedef
283    enum {
284       Creg_IA_SP=0x213,
285       Creg_IA_BP,
286       Creg_IA_IP,
287       Creg_ARM_R13,
288       Creg_ARM_R12,
289       Creg_ARM_R15,
290       Creg_ARM_R14,
291       Creg_S390_R14
292    }
293    CfiReg;
294 
295 typedef
296    enum {
297       Cex_Undef=0x123,
298       Cex_Deref,
299       Cex_Const,
300       Cex_Binop,
301       Cex_CfiReg,
302       Cex_DwReg
303    }
304    CfiExprTag;
305 
306 typedef
307    struct {
308       CfiExprTag tag;
309       union {
310          struct {
311          } Undef;
312          struct {
313             Int ixAddr;
314          } Deref;
315          struct {
316             UWord con;
317          } Const;
318          struct {
319             CfiOp op;
320             Int ixL;
321             Int ixR;
322          } Binop;
323          struct {
324             CfiReg reg;
325          } CfiReg;
326          struct {
327             Int reg;
328          } DwReg;
329       }
330       Cex;
331    }
332    CfiExpr;
333 
334 extern Int ML_(CfiExpr_Undef) ( XArray* dst );
335 extern Int ML_(CfiExpr_Deref) ( XArray* dst, Int ixAddr );
336 extern Int ML_(CfiExpr_Const) ( XArray* dst, UWord con );
337 extern Int ML_(CfiExpr_Binop) ( XArray* dst, CfiOp op, Int ixL, Int ixR );
338 extern Int ML_(CfiExpr_CfiReg)( XArray* dst, CfiReg reg );
339 extern Int ML_(CfiExpr_DwReg) ( XArray* dst, Int reg );
340 
341 extern void ML_(ppCfiExpr)( XArray* src, Int ix );
342 
343 /* ---------------- FPO INFO (Windows PE) -------------- */
344 
345 /* for apps using Wine: MSVC++ PDB FramePointerOmitted: somewhat like
346    a primitive CFI */
347 typedef
348    struct _FPO_DATA {  /* 16 bytes */
349       UInt   ulOffStart; /* offset of 1st byte of function code */
350       UInt   cbProcSize; /* # bytes in function */
351       UInt   cdwLocals;  /* # bytes/4 in locals */
352       UShort cdwParams;  /* # bytes/4 in params */
353       UChar  cbProlog;   /* # bytes in prolog */
354       UChar  cbRegs :3;  /* # regs saved */
355       UChar  fHasSEH:1;  /* Structured Exception Handling */
356       UChar  fUseBP :1;  /* EBP has been used */
357       UChar  reserved:1;
358       UChar  cbFrame:2;  /* frame type */
359    }
360    FPO_DATA;
361 
362 #define PDB_FRAME_FPO  0
363 #define PDB_FRAME_TRAP 1
364 #define PDB_FRAME_TSS  2
365 
366 /* --------------------- VARIABLES --------------------- */
367 
368 typedef
369    struct {
370       Addr    aMin;
371       Addr    aMax;
372       XArray* /* of DiVariable */ vars;
373    }
374    DiAddrRange;
375 
376 typedef
377    struct {
378       UChar* name;  /* in DebugInfo.strchunks */
379       UWord  typeR; /* a cuOff */
380       GExpr* gexpr; /* on DebugInfo.gexprs list */
381       GExpr* fbGX;  /* SHARED. */
382       UChar* fileName; /* where declared; may be NULL. in
383                           DebugInfo.strchunks */
384       Int    lineNo;   /* where declared; may be zero. */
385    }
386    DiVariable;
387 
388 Word
389 ML_(cmp_for_DiAddrRange_range) ( const void* keyV, const void* elemV );
390 
391 /* --------------------- DEBUGINFO --------------------- */
392 
393 /* This is the top-level data type.  It's a structure which contains
394    information pertaining to one mapped ELF object.  This type is
395    exported only abstractly - in pub_tool_debuginfo.h. */
396 
397 /* First though, here's an auxiliary data structure.  It is only ever
398    used as part of a struct _DebugInfo.  We use it to record
399    observations about mappings and permission changes to the
400    associated file, so as to decide when to read debug info.  It's
401    essentially an ultra-trivial finite state machine which, when it
402    reaches an accept state, signals that we should now read debug info
403    from the object into the associated struct _DebugInfo.  The accept
404    state is arrived at when have_rx_map and have_rw_map both become
405    true.  The initial state is one in which we have no observations,
406    so have_rx_map and have_rw_map are both false.
407 
408    This is all rather ad-hoc; for example it has no way to record more
409    than one rw or rx mapping for a given object, not because such
410    events have never been observed, but because we've never needed to
411    note more than the first one of any such in order when to decide to
412    read debug info.  It may be that in future we need to track more
413    state in order to make the decision, so this struct would then get
414    expanded.
415 
416    The normal sequence of events is one of
417 
418    start  -->  r-x mapping  -->  rw- mapping  -->  accept
419    start  -->  rw- mapping  -->  r-x mapping  -->  accept
420 
421    that is, take the first r-x and rw- mapping we see, and we're done.
422 
423    On MacOSX 10.7, 32-bit, there appears to be a new variant:
424 
425    start  -->  r-- mapping  -->  rw- mapping
426           -->  upgrade r-- mapping to r-x mapping  -->  accept
427 
428    where the upgrade is done by a call to vm_protect.  Hence we
429    need to also track this possibility.
430 */
431 struct _DebugInfoFSM
432 {
433    /* --- all targets --- */
434    UChar* filename; /* in mallocville (VG_AR_DINFO) */
435 
436    Bool  have_rx_map; /* did we see a r?x mapping yet for the file? */
437    Bool  have_rw_map; /* did we see a rw? mapping yet for the file? */
438 
439    Addr  rx_map_avma; /* these fields record the file offset, length */
440    SizeT rx_map_size; /* and map address of the r?x mapping we believe */
441    OffT  rx_map_foff; /* is the .text segment mapping */
442 
443    Addr  rw_map_avma; /* ditto, for the rw? mapping we believe is the */
444    SizeT rw_map_size; /* .data segment mapping */
445    OffT  rw_map_foff;
446 
447    /* --- OSX 10.7, 32-bit only --- */
448    Bool  have_ro_map; /* did we see a r-- mapping yet for the file? */
449 
450    Addr  ro_map_avma; /* file offset, length, avma for said mapping */
451    SizeT ro_map_size;
452    OffT  ro_map_foff;
453 };
454 
455 
456 /* To do with the string table in struct _DebugInfo (::strchunks) */
457 #define SEGINFO_STRCHUNKSIZE (64*1024)
458 
459 
460 /* We may encounter more than one .eh_frame section in an object --
461    unusual but apparently allowed by ELF.  See
462    http://sourceware.org/bugzilla/show_bug.cgi?id=12675
463 */
464 #define N_EHFRAME_SECTS 2
465 
466 
467 /* So, the main structure for holding debug info for one object. */
468 
469 struct _DebugInfo {
470 
471    /* Admin stuff */
472 
473    struct _DebugInfo* next;   /* list of DebugInfos */
474    Bool               mark;   /* marked for deletion? */
475 
476    /* An abstract handle, which can be used by entities outside of
477       m_debuginfo to (in an abstract datatype sense) refer to this
478       struct _DebugInfo.  A .handle of zero is invalid; valid handles
479       are 1 and above.  The same handle is never issued twice (in any
480       given run of Valgrind), so a handle becomes invalid when the
481       associated struct _DebugInfo is discarded, and remains invalid
482       forever thereafter.  The .handle field is set as soon as this
483       structure is allocated. */
484    ULong handle;
485 
486    /* Used for debugging only - indicate what stuff to dump whilst
487       reading stuff into the seginfo.  Are computed as early in the
488       lifetime of the DebugInfo as possible -- at the point when it is
489       created.  Use these when deciding what to spew out; do not use
490       the global VG_(clo_blah) flags. */
491 
492    Bool trace_symtab; /* symbols, our style */
493    Bool trace_cfi;    /* dwarf frame unwind, our style */
494    Bool ddump_syms;   /* mimic /usr/bin/readelf --syms */
495    Bool ddump_line;   /* mimic /usr/bin/readelf --debug-dump=line */
496    Bool ddump_frames; /* mimic /usr/bin/readelf --debug-dump=frames */
497 
498    /* The "decide when it is time to read debuginfo" state machine.
499       This structure must get filled in before we can start reading
500       anything from the ELF/MachO file.  This structure is filled in
501       by VG_(di_notify_mmap) and its immediate helpers. */
502    struct _DebugInfoFSM fsm;
503 
504    /* Once the ::fsm has reached an accept state -- typically, when
505       both a rw? and r?x mapping for .filename have been observed --
506       we can go on to read the symbol tables and debug info.
507       .have_dinfo changes from False to True when the debug info has
508       been completely read in and postprocessed (canonicalised) and is
509       now suitable for querying. */
510    /* If have_dinfo is False, then all fields below this point are
511       invalid and should not be consulted. */
512    Bool  have_dinfo; /* initially False */
513 
514    /* All the rest of the fields in this structure are filled in once
515       we have committed to reading the symbols and debug info (that
516       is, at the point where .have_dinfo is set to True). */
517 
518    /* The file's soname.  FIXME: ensure this is always allocated in
519       VG_AR_DINFO. */
520    UChar* soname;
521 
522    /* Description of some important mapped segments.  The presence or
523       absence of the mapping is denoted by the _present field, since
524       in some obscure circumstances (to do with data/sdata/bss) it is
525       possible for the mapping to be present but have zero size.
526       Certainly text_ is mandatory on all platforms; not sure about
527       the rest though.
528 
529       --------------------------------------------------------
530 
531       Comment_on_IMPORTANT_CFSI_REPRESENTATIONAL_INVARIANTS: we require that
532 
533       either (rx_map_size == 0 && cfsi == NULL) (the degenerate case)
534 
535       or the normal case, which is the AND of the following:
536       (0) rx_map_size > 0
537       (1) no two DebugInfos with rx_map_size > 0
538           have overlapping [rx_map_avma,+rx_map_size)
539       (2) [cfsi_minavma,cfsi_maxavma] does not extend
540           beyond [rx_map_avma,+rx_map_size); that is, the former is a
541           subrange or equal to the latter.
542       (3) all DiCfSI in the cfsi array all have ranges that fall within
543           [rx_map_avma,+rx_map_size).
544       (4) all DiCfSI in the cfsi array are non-overlapping
545 
546       The cumulative effect of these restrictions is to ensure that
547       all the DiCfSI records in the entire system are non overlapping.
548       Hence any address falls into either exactly one DiCfSI record,
549       or none.  Hence it is safe to cache the results of searches for
550       DiCfSI records.  This is the whole point of these restrictions.
551       The caching of DiCfSI searches is done in VG_(use_CF_info).  The
552       cache is flushed after any change to debugInfo_list.  DiCfSI
553       searches are cached because they are central to stack unwinding
554       on amd64-linux.
555 
556       Where are these invariants imposed and checked?
557 
558       They are checked after a successful read of debuginfo into
559       a DebugInfo*, in check_CFSI_related_invariants.
560 
561       (1) is not really imposed anywhere.  We simply assume that the
562       kernel will not map the text segments from two different objects
563       into the same space.  Sounds reasonable.
564 
565       (2) follows from (4) and (3).  It is ensured by canonicaliseCFI.
566       (3) is ensured by ML_(addDiCfSI).
567       (4) is ensured by canonicaliseCFI.
568 
569       --------------------------------------------------------
570 
571       Comment_on_DEBUG_SVMA_and_DEBUG_BIAS_fields:
572 
573       The _debug_{svma,bias} fields were added as part of a fix to
574       #185816.  The problem encompassed in that bug report was that it
575       wasn't correct to use apply the bias values deduced for a
576       primary object to its associated debuginfo object, because the
577       debuginfo object (or the primary) could have been prelinked to a
578       different SVMA.  Hence debuginfo and primary objects need to
579       have their own biases.
580 
581       ------ JRS: (referring to r9329): ------
582       Let me see if I understand the workings correctly.  Initially
583       the _debug_ values are set to the same values as the "normal"
584       ones, as there's a bunch of bits of code like this (in
585       readelf.c)
586 
587          di->text_svma = svma;
588          ...
589          di->text_bias = rx_bias;
590          di->text_debug_svma = svma;
591          di->text_debug_bias = rx_bias;
592 
593       If a debuginfo object subsequently shows up then the
594       _debug_svma/bias are set for the debuginfo object.  Result is
595       that if there's no debuginfo object then the values are the same
596       as the primary-object values, and if there is a debuginfo object
597       then they will (or at least may) be different.
598 
599       Then when we need to actually bias something, we'll have to
600       decide whether to use the primary bias or the debuginfo bias.
601       And the strategy is to use the primary bias for ELF symbols but
602       the debuginfo bias for anything pulled out of Dwarf.
603 
604       ------ THH: ------
605       Correct - the debug_svma and bias values apply to any address
606       read from the debug data regardless of where that debug data is
607       stored and the other values are used for addresses from other
608       places (primarily the symbol table).
609 
610       ------ JRS: ------
611       Ok; so this was my only area of concern.  Are there any
612       corner-case scenarios where this wouldn't be right?  It sounds
613       like we're assuming the ELF symbols come from the primary object
614       and, if there is a debug object, then all the Dwarf comes from
615       there.  But what if (eg) both symbols and Dwarf come from the
616       debug object?  Is that even possible or allowable?
617 
618       ------ THH: ------
619       You may have a point...
620 
621       The current logic is to try and take any one set of data from
622       either the base object or the debug object. There are four sets
623       of data we consider:
624 
625          - Symbol Table
626          - Stabs
627          - DWARF1
628          - DWARF2
629 
630       If we see the primary section for a given set in the base object
631       then we ignore all sections relating to that set in the debug
632       object.
633 
634       Now in principle if we saw a secondary section (like debug_line
635       say) in the base object, but not the main section (debug_info in
636       this case) then we would take debug_info from the debug object
637       but would use the debug_line from the base object unless we saw
638       a replacement copy in the debug object. That's probably unlikely
639       however.
640 
641       A bigger issue might be, as you say, the symbol table as we will
642       pick that up from the debug object if it isn't in the base. The
643       dynamic symbol table will always have to be in the base object
644       though so we will have to be careful when processing symbols to
645       know which table we are reading in that case.
646 
647       What we probably need to do is tell read_elf_symtab which object
648       the symbols it is being asked to read came from.
649 
650       (A followup patch to deal with this was committed in r9469).
651    */
652    /* .text */
653    Bool     text_present;
654    Addr     text_avma;
655    Addr     text_svma;
656    SizeT    text_size;
657    PtrdiffT text_bias;
658    Addr     text_debug_svma;
659    PtrdiffT text_debug_bias;
660    /* .data */
661    Bool     data_present;
662    Addr     data_svma;
663    Addr     data_avma;
664    SizeT    data_size;
665    PtrdiffT data_bias;
666    Addr     data_debug_svma;
667    PtrdiffT data_debug_bias;
668    /* .sdata */
669    Bool     sdata_present;
670    Addr     sdata_svma;
671    Addr     sdata_avma;
672    SizeT    sdata_size;
673    PtrdiffT sdata_bias;
674    Addr     sdata_debug_svma;
675    PtrdiffT sdata_debug_bias;
676    /* .rodata */
677    Bool     rodata_present;
678    Addr     rodata_svma;
679    Addr     rodata_avma;
680    SizeT    rodata_size;
681    PtrdiffT rodata_bias;
682    Addr     rodata_debug_svma;
683    PtrdiffT rodata_debug_bias;
684    /* .bss */
685    Bool     bss_present;
686    Addr     bss_svma;
687    Addr     bss_avma;
688    SizeT    bss_size;
689    PtrdiffT bss_bias;
690    Addr     bss_debug_svma;
691    PtrdiffT bss_debug_bias;
692    /* .sbss */
693    Bool     sbss_present;
694    Addr     sbss_svma;
695    Addr     sbss_avma;
696    SizeT    sbss_size;
697    PtrdiffT sbss_bias;
698    Addr     sbss_debug_svma;
699    PtrdiffT sbss_debug_bias;
700    /* .plt */
701    Bool   plt_present;
702    Addr	  plt_avma;
703    SizeT  plt_size;
704    /* .got */
705    Bool   got_present;
706    Addr   got_avma;
707    SizeT  got_size;
708    /* .got.plt */
709    Bool   gotplt_present;
710    Addr   gotplt_avma;
711    SizeT  gotplt_size;
712    /* .opd -- needed on ppc64-linux for finding symbols */
713    Bool   opd_present;
714    Addr   opd_avma;
715    SizeT  opd_size;
716    /* .ehframe -- needed on amd64-linux for stack unwinding.  We might
717       see more than one, hence the arrays. */
718    UInt   n_ehframe;  /* 0 .. N_EHFRAME_SECTS */
719    Addr   ehframe_avma[N_EHFRAME_SECTS];
720    SizeT  ehframe_size[N_EHFRAME_SECTS];
721 
722    /* Sorted tables of stuff we snarfed from the file.  This is the
723       eventual product of reading the debug info.  All this stuff
724       lives in VG_AR_DINFO. */
725 
726    /* An expandable array of symbols. */
727    DiSym*  symtab;
728    UWord   symtab_used;
729    UWord   symtab_size;
730    /* An expandable array of locations. */
731    DiLoc*  loctab;
732    UWord   loctab_used;
733    UWord   loctab_size;
734    /* An expandable array of CFI summary info records.  Also includes
735       summary address bounds, showing the min and max address covered
736       by any of the records, as an aid to fast searching.  And, if the
737       records require any expression nodes, they are stored in
738       cfsi_exprs. */
739    DiCfSI* cfsi;
740    UWord   cfsi_used;
741    UWord   cfsi_size;
742    Addr    cfsi_minavma;
743    Addr    cfsi_maxavma;
744    XArray* cfsi_exprs; /* XArray of CfiExpr */
745 
746    /* Optimized code under Wine x86: MSVC++ PDB FramePointerOmitted
747       data.  Non-expandable array, hence .size == .used. */
748    FPO_DATA* fpo;
749    UWord     fpo_size;
750    Addr      fpo_minavma;
751    Addr      fpo_maxavma;
752 
753    /* Expandable arrays of characters -- the string table.  Pointers
754       into this are stable (the arrays are not reallocated). */
755    struct strchunk {
756       UInt   strtab_used;
757       struct strchunk* next;
758       UChar  strtab[SEGINFO_STRCHUNKSIZE];
759    } *strchunks;
760 
761    /* Variable scope information, as harvested from Dwarf3 files.
762 
763       In short it's an
764 
765          array of (array of PC address ranges and variables)
766 
767       The outer array indexes over scopes, with Entry 0 containing
768       information on variables which exist for any value of the program
769       counter (PC) -- that is, the outermost scope.  Entries 1, 2, 3,
770       etc contain information on increasinly deeply nested variables.
771 
772       Each inner array is an array of (an address range, and a set
773       of variables that are in scope over that address range).
774 
775       The address ranges may not overlap.
776 
777       Since Entry 0 in the outer array holds information on variables
778       that exist for any value of the PC (that is, global vars), it
779       follows that Entry 0's inner array can only have one address
780       range pair, one that covers the entire address space.
781    */
782    XArray* /* of OSet of DiAddrRange */varinfo;
783 
784    /* These are arrays of the relevant typed objects, held here
785       partially for the purposes of visiting each object exactly once
786       when we need to delete them. */
787 
788    /* An array of TyEnts.  These are needed to make sense of any types
789       in the .varinfo.  Also, when deleting this DebugInfo, we must
790       first traverse this array and throw away malloc'd stuff hanging
791       off it -- by calling ML_(TyEnt__make_EMPTY) on each entry. */
792    XArray* /* of TyEnt */ admin_tyents;
793 
794    /* An array of guarded DWARF3 expressions. */
795    XArray* admin_gexprs;
796 };
797 
798 /* --------------------- functions --------------------- */
799 
800 /* ------ Adding ------ */
801 
802 /* Add a symbol to si's symbol table.  The contents of 'sym' are
803    copied.  It is assumed (and checked) that 'sym' only contains one
804    name, so there is no auxiliary ::sec_names vector to duplicate.
805    IOW, the copy is a shallow copy, and there are assertions in place
806    to ensure that's OK. */
807 extern void ML_(addSym) ( struct _DebugInfo* di, DiSym* sym );
808 
809 /* Add a line-number record to a DebugInfo. */
810 extern
811 void ML_(addLineInfo) ( struct _DebugInfo* di,
812                         UChar*   filename,
813                         UChar*   dirname,  /* NULL is allowable */
814                         Addr this, Addr next, Int lineno, Int entry);
815 
816 /* Add a CFI summary record.  The supplied DiCfSI is copied. */
817 extern void ML_(addDiCfSI) ( struct _DebugInfo* di, DiCfSI* cfsi );
818 
819 /* Add a string to the string table of a DebugInfo.  If len==-1,
820    ML_(addStr) will itself measure the length of the string. */
821 extern UChar* ML_(addStr) ( struct _DebugInfo* di, UChar* str, Int len );
822 
823 extern void ML_(addVar)( struct _DebugInfo* di,
824                          Int    level,
825                          Addr   aMin,
826                          Addr   aMax,
827                          UChar* name,
828                          UWord  typeR, /* a cuOff */
829                          GExpr* gexpr,
830                          GExpr* fbGX, /* SHARED. */
831                          UChar* fileName, /* where decl'd - may be NULL */
832                          Int    lineNo, /* where decl'd - may be zero */
833                          Bool   show );
834 
835 /* Canonicalise the tables held by 'di', in preparation for use.  Call
836    this after finishing adding entries to these tables. */
837 extern void ML_(canonicaliseTables) ( struct _DebugInfo* di );
838 
839 /* Canonicalise the call-frame-info table held by 'di', in preparation
840    for use. This is called by ML_(canonicaliseTables) but can also be
841    called on it's own to sort just this table. */
842 extern void ML_(canonicaliseCFI) ( struct _DebugInfo* di );
843 
844 /* ------ Searching ------ */
845 
846 /* Find a symbol-table index containing the specified pointer, or -1
847    if not found.  Binary search.  */
848 extern Word ML_(search_one_symtab) ( struct _DebugInfo* di, Addr ptr,
849                                      Bool match_anywhere_in_sym,
850                                      Bool findText );
851 
852 /* Find a location-table index containing the specified pointer, or -1
853    if not found.  Binary search.  */
854 extern Word ML_(search_one_loctab) ( struct _DebugInfo* di, Addr ptr );
855 
856 /* Find a CFI-table index containing the specified pointer, or -1 if
857    not found.  Binary search.  */
858 extern Word ML_(search_one_cfitab) ( struct _DebugInfo* di, Addr ptr );
859 
860 /* Find a FPO-table index containing the specified pointer, or -1
861    if not found.  Binary search.  */
862 extern Word ML_(search_one_fpotab) ( struct _DebugInfo* di, Addr ptr );
863 
864 /* ------ Misc ------ */
865 
866 /* Show a non-fatal debug info reading error.  Use vg_panic if
867    terminal.  'serious' errors are always shown, not 'serious' ones
868    are shown only at verbosity level 2 and above. */
869 extern
870 void ML_(symerr) ( struct _DebugInfo* di, Bool serious, HChar* msg );
871 
872 /* Print a symbol. */
873 extern void ML_(ppSym) ( Int idx, DiSym* sym );
874 
875 /* Print a call-frame-info summary. */
876 extern void ML_(ppDiCfSI) ( XArray* /* of CfiExpr */ exprs, DiCfSI* si );
877 
878 
879 #define TRACE_SYMTAB(format, args...) \
880    if (di->trace_symtab) { VG_(printf)(format, ## args); }
881 
882 
883 #endif /* ndef __PRIV_STORAGE_H */
884 
885 /*--------------------------------------------------------------------*/
886 /*--- end                                                          ---*/
887 /*--------------------------------------------------------------------*/
888