• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- mode: C; c-basic-offset: 3; -*- */
2 
3 /*--------------------------------------------------------------------*/
4 /*--- Read DWARF3/4 ".debug_info" sections (DIE trees).            ---*/
5 /*---                                                 readdwarf3.c ---*/
6 /*--------------------------------------------------------------------*/
7 
8 /*
9    This file is part of Valgrind, a dynamic binary instrumentation
10    framework.
11 
12    Copyright (C) 2008-2013 OpenWorks LLP
13       info@open-works.co.uk
14 
15    This program is free software; you can redistribute it and/or
16    modify it under the terms of the GNU General Public License as
17    published by the Free Software Foundation; either version 2 of the
18    License, or (at your option) any later version.
19 
20    This program is distributed in the hope that it will be useful, but
21    WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    General Public License for more details.
24 
25    You should have received a copy of the GNU General Public License
26    along with this program; if not, write to the Free Software
27    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28    02111-1307, USA.
29 
30    The GNU General Public License is contained in the file COPYING.
31 
32    Neither the names of the U.S. Department of Energy nor the
33    University of California nor the names of its contributors may be
34    used to endorse or promote products derived from this software
35    without prior written permission.
36 */
37 
38 #if defined(VGO_linux) || defined(VGO_darwin)
39 
40 /* REFERENCE (without which this code will not make much sense):
41 
42    DWARF Debugging Information Format, Version 3,
43    dated 20 December 2005 (the "D3 spec").
44 
45    Available at http://www.dwarfstd.org/Dwarf3.pdf.  There's also a
46    .doc (MS Word) version, but for some reason the section numbers
47    between the Word and PDF versions differ by 1 in the first digit.
48    All section references in this code are to the PDF version.
49 
50    CURRENT HACKS:
51 
52    DW_TAG_{const,volatile}_type no DW_AT_type is allowed; it is
53       assumed to mean "const void" or "volatile void" respectively.
54       GDB appears to interpret them like this, anyway.
55 
56    In many cases it is important to know the svma of a CU (the "base
57    address of the CU", as the D3 spec calls it).  There are some
58    situations in which the spec implies this value is unknown, but the
59    Dwarf3 produced by gcc-4.1 seems to assume is not unknown but
60    merely zero when not explicitly stated.  So we too have to make
61    that assumption.
62 
63    POTENTIAL BUG?  Spotted 6 Sept 08.  Why doesn't
64    unitary_range_list() bias the resulting range list in the same way
65    that its more general cousin, get_range_list(), does?  I don't
66    know.
67 
68    TODO, 2008 Feb 17:
69 
70    get rid of cu_svma_known and document the assumed-zero svma hack.
71 
72    ML_(sizeOfType): differentiate between zero sized types and types
73    for which the size is unknown.  Is this important?  I don't know.
74 
75    DW_TAG_array_types: deal with explicit sizes (currently we compute
76    the size from the bounds and the element size, although that's
77    fragile, if the bounds incompletely specified, or completely
78    absent)
79 
80    Document reason for difference (by 1) of stack preening depth in
81    parse_var_DIE vs parse_type_DIE.
82 
83    Don't hand to ML_(addVars), vars whose locations are entirely in
84    registers (DW_OP_reg*).  This is merely a space-saving
85    optimisation, as ML_(evaluate_Dwarf3_Expr) should handle these
86    expressions correctly, by failing to evaluate them and hence
87    effectively ignoring the variable with which they are associated.
88 
89    Deal with DW_TAG_array_types which have element size != stride
90 
91    In some cases, the info for a variable is split between two
92    different DIEs (generally a declarer and a definer).  We punt on
93    these.  Could do better here.
94 
95    The 'data_bias' argument passed to the expression evaluator
96    (ML_(evaluate_Dwarf3_Expr)) should really be changed to a
97    MaybeUWord, to make it clear when we do vs don't know what it is
98    for the evaluation of an expression.  At the moment zero is passed
99    for this parameter in the don't know case.  That's a bit fragile
100    and obscure; using a MaybeUWord would be clearer.
101 
102    POTENTIAL PERFORMANCE IMPROVEMENTS:
103 
104    Currently, duplicate removal and all other queries for the type
105    entities array is done using cuOffset-based pointing, which
106    involves a binary search (VG_(lookupXA)) for each access.  This is
107    wildly inefficient, although simple.  It would be better to
108    translate all the cuOffset-based references (iow, all the "R" and
109    "Rs" fields in the TyEnts in 'tyents') to direct index numbers in
110    'tyents' right at the start of dedup_types(), and use direct
111    indexing (VG_(indexXA)) wherever possible after that.
112 
113    cmp__XArrays_of_AddrRange is also a performance bottleneck.  Move
114    VG_(indexXA) into pub_tool_xarray.h so it can be inlined at all use
115    points, and possibly also make an _UNCHECKED version which skips
116    the range checks in performance-critical situations such as this.
117 
118    Handle interaction between read_DIE and parse_{var,type}_DIE
119    better.  Currently read_DIE reads the entire DIE just to find where
120    the end is (and for debug printing), so that it can later reliably
121    move the cursor to the end regardless of what parse_{var,type}_DIE
122    do.  This means many DIEs (most, even?) are read twice.  It would
123    be smarter to make parse_{var,type}_DIE return a Bool indicating
124    whether or not they advanced the DIE cursor, and only if they
125    didn't should read_DIE itself read through the DIE.
126 
127    ML_(addVar) and add_var_to_arange: quite a lot of DiAddrRanges have
128    zero variables in their .vars XArray.  Rather than have an XArray
129    with zero elements (which uses 2 malloc'd blocks), allow the .vars
130    pointer to be NULL in this case.
131 
132    More generally, reduce the amount of memory allocated and freed
133    while reading Dwarf3 type/variable information.  Even modest (20MB)
134    objects cause this module to allocate and free hundreds of
135    thousands of small blocks, and ML_(arena_malloc) and its various
136    groupies always show up at the top of performance profiles. */
137 
138 #include "pub_core_basics.h"
139 #include "pub_core_debuginfo.h"
140 #include "pub_core_libcbase.h"
141 #include "pub_core_libcassert.h"
142 #include "pub_core_libcprint.h"
143 #include "pub_core_libcsetjmp.h"   // setjmp facilities
144 #include "pub_core_hashtable.h"
145 #include "pub_core_options.h"
146 #include "pub_core_tooliface.h"    /* VG_(needs) */
147 #include "pub_core_xarray.h"
148 #include "pub_core_wordfm.h"
149 #include "priv_misc.h"             /* dinfo_zalloc/free */
150 #include "priv_image.h"
151 #include "priv_tytypes.h"
152 #include "priv_d3basics.h"
153 #include "priv_storage.h"
154 #include "priv_readdwarf3.h"       /* self */
155 
156 
157 /*------------------------------------------------------------*/
158 /*---                                                      ---*/
159 /*--- Basic machinery for parsing DIEs.                    ---*/
160 /*---                                                      ---*/
161 /*------------------------------------------------------------*/
162 
163 #define TRACE_D3(format, args...) \
164    if (td3) { VG_(printf)(format, ## args); }
165 
166 #define D3_INVALID_CUOFF  ((UWord)(-1UL))
167 #define D3_FAKEVOID_CUOFF ((UWord)(-2UL))
168 
169 typedef
170    struct {
171       DiSlice sli;      // to which this cursor applies
172       DiOffT  sli_next; // offset in underlying DiImage; must be >= sli.ioff
173       void (*barf)( const HChar* ) __attribute__((noreturn));
174       const HChar* barfstr;
175    }
176    Cursor;
177 
is_sane_Cursor(Cursor * c)178 static inline Bool is_sane_Cursor ( Cursor* c ) {
179    if (!c)                return False;
180    if (!c->barf)          return False;
181    if (!c->barfstr)       return False;
182    if (!ML_(sli_is_valid)(c->sli))    return False;
183    if (c->sli.ioff == DiOffT_INVALID) return False;
184    if (c->sli_next < c->sli.ioff)     return False;
185    return True;
186 }
187 
188 // Initialise a cursor from a DiSlice (ELF section, really) so as to
189 // start reading at offset |sli_initial_offset| from the start of the
190 // slice.
init_Cursor(Cursor * c,DiSlice sli,ULong sli_initial_offset,void (* barf)(const HChar *),const HChar * barfstr)191 static void init_Cursor ( /*OUT*/Cursor* c,
192                           DiSlice sli,
193                           ULong   sli_initial_offset,
194                           __attribute__((noreturn)) void (*barf)(const HChar*),
195                           const HChar* barfstr )
196 {
197    vg_assert(c);
198    VG_(bzero_inline)(c, sizeof(*c));
199    c->sli              = sli;
200    c->sli_next         = c->sli.ioff + sli_initial_offset;
201    c->barf             = barf;
202    c->barfstr          = barfstr;
203    vg_assert(is_sane_Cursor(c));
204 }
205 
is_at_end_Cursor(Cursor * c)206 static Bool is_at_end_Cursor ( Cursor* c ) {
207    vg_assert(is_sane_Cursor(c));
208    return c->sli_next >= c->sli.ioff + c->sli.szB;
209 }
210 
get_position_of_Cursor(Cursor * c)211 static inline ULong get_position_of_Cursor ( Cursor* c ) {
212    vg_assert(is_sane_Cursor(c));
213    return c->sli_next - c->sli.ioff;
214 }
set_position_of_Cursor(Cursor * c,ULong pos)215 static inline void set_position_of_Cursor ( Cursor* c, ULong pos ) {
216    c->sli_next = c->sli.ioff + pos;
217    vg_assert(is_sane_Cursor(c));
218 }
219 
get_remaining_length_Cursor(Cursor * c)220 static /*signed*/Long get_remaining_length_Cursor ( Cursor* c ) {
221    vg_assert(is_sane_Cursor(c));
222    return c->sli.ioff + c->sli.szB - c->sli_next;
223 }
224 
225 //static void* get_address_of_Cursor ( Cursor* c ) {
226 //   vg_assert(is_sane_Cursor(c));
227 //   return &c->region_start_img[ c->region_next ];
228 //}
229 
get_DiCursor_from_Cursor(Cursor * c)230 static DiCursor get_DiCursor_from_Cursor ( Cursor* c ) {
231    return mk_DiCursor(c->sli.img, c->sli_next);
232 }
233 
234 /* FIXME: document assumptions on endianness for
235    get_UShort/UInt/ULong. */
get_UChar(Cursor * c)236 static inline UChar get_UChar ( Cursor* c ) {
237    UChar r;
238    vg_assert(is_sane_Cursor(c));
239    if (c->sli_next + sizeof(UChar) > c->sli.ioff + c->sli.szB) {
240       c->barf(c->barfstr);
241       /*NOTREACHED*/
242       vg_assert(0);
243    }
244    r = ML_(img_get_UChar)(c->sli.img, c->sli_next);
245    c->sli_next += sizeof(UChar);
246    return r;
247 }
get_UShort(Cursor * c)248 static UShort get_UShort ( Cursor* c ) {
249    UShort r;
250    vg_assert(is_sane_Cursor(c));
251    if (c->sli_next + sizeof(UShort) > c->sli.ioff + c->sli.szB) {
252       c->barf(c->barfstr);
253       /*NOTREACHED*/
254       vg_assert(0);
255    }
256    r = ML_(img_get_UShort)(c->sli.img, c->sli_next);
257    c->sli_next += sizeof(UShort);
258    return r;
259 }
get_UInt(Cursor * c)260 static UInt get_UInt ( Cursor* c ) {
261    UInt r;
262    vg_assert(is_sane_Cursor(c));
263    if (c->sli_next + sizeof(UInt) > c->sli.ioff + c->sli.szB) {
264       c->barf(c->barfstr);
265       /*NOTREACHED*/
266       vg_assert(0);
267    }
268    r = ML_(img_get_UInt)(c->sli.img, c->sli_next);
269    c->sli_next += sizeof(UInt);
270    return r;
271 }
get_ULong(Cursor * c)272 static ULong get_ULong ( Cursor* c ) {
273    ULong r;
274    vg_assert(is_sane_Cursor(c));
275    if (c->sli_next + sizeof(ULong) > c->sli.ioff + c->sli.szB) {
276       c->barf(c->barfstr);
277       /*NOTREACHED*/
278       vg_assert(0);
279    }
280    r = ML_(img_get_ULong)(c->sli.img, c->sli_next);
281    c->sli_next += sizeof(ULong);
282    return r;
283 }
get_ULEB128(Cursor * c)284 static ULong get_ULEB128 ( Cursor* c ) {
285    ULong result;
286    Int   shift;
287    UChar byte;
288    /* unroll first iteration */
289    byte = get_UChar( c );
290    result = (ULong)(byte & 0x7f);
291    if (LIKELY(!(byte & 0x80))) return result;
292    shift = 7;
293    /* end unroll first iteration */
294    do {
295       byte = get_UChar( c );
296       result |= ((ULong)(byte & 0x7f)) << shift;
297       shift += 7;
298    } while (byte & 0x80);
299    return result;
300 }
get_SLEB128(Cursor * c)301 static Long get_SLEB128 ( Cursor* c ) {
302    ULong  result = 0;
303    Int    shift = 0;
304    UChar  byte;
305    do {
306       byte = get_UChar(c);
307       result |= ((ULong)(byte & 0x7f)) << shift;
308       shift += 7;
309    } while (byte & 0x80);
310    if (shift < 64 && (byte & 0x40))
311       result |= -(1ULL << shift);
312    return result;
313 }
314 
315 /* Assume 'c' points to the start of a string.  Return a DiCursor of
316    whatever it points at, and advance it past the terminating zero.
317    This makes it safe for the caller to then copy the string with
318    ML_(addStr), since (w.r.t. image overruns) the process of advancing
319    past the terminating zero will already have "vetted" the string. */
get_AsciiZ(Cursor * c)320 static DiCursor get_AsciiZ ( Cursor* c ) {
321    UChar uc;
322    DiCursor res = get_DiCursor_from_Cursor(c);
323    do { uc = get_UChar(c); } while (uc != 0);
324    return res;
325 }
326 
peek_ULEB128(Cursor * c)327 static ULong peek_ULEB128 ( Cursor* c ) {
328    DiOffT here = c->sli_next;
329    ULong  r    = get_ULEB128( c );
330    c->sli_next = here;
331    return r;
332 }
peek_UChar(Cursor * c)333 static UChar peek_UChar ( Cursor* c ) {
334    DiOffT here = c->sli_next;
335    UChar  r    = get_UChar( c );
336    c->sli_next = here;
337    return r;
338 }
339 
get_Dwarfish_UWord(Cursor * c,Bool is_dw64)340 static ULong get_Dwarfish_UWord ( Cursor* c, Bool is_dw64 ) {
341    return is_dw64 ? get_ULong(c) : (ULong) get_UInt(c);
342 }
343 
get_UWord(Cursor * c)344 static UWord get_UWord ( Cursor* c ) {
345    vg_assert(sizeof(UWord) == sizeof(void*));
346    if (sizeof(UWord) == 4) return get_UInt(c);
347    if (sizeof(UWord) == 8) return get_ULong(c);
348    vg_assert(0);
349 }
350 
351 /* Read a DWARF3 'Initial Length' field */
get_Initial_Length(Bool * is64,Cursor * c,const HChar * barfMsg)352 static ULong get_Initial_Length ( /*OUT*/Bool* is64,
353                                   Cursor* c,
354                                   const HChar* barfMsg )
355 {
356    ULong w64;
357    UInt  w32;
358    *is64 = False;
359    w32 = get_UInt( c );
360    if (w32 >= 0xFFFFFFF0 && w32 < 0xFFFFFFFF) {
361       c->barf( barfMsg );
362    }
363    else if (w32 == 0xFFFFFFFF) {
364       *is64 = True;
365       w64   = get_ULong( c );
366    } else {
367       *is64 = False;
368       w64 = (ULong)w32;
369    }
370    return w64;
371 }
372 
373 
374 /*------------------------------------------------------------*/
375 /*---                                                      ---*/
376 /*--- "CUConst" structure                                  ---*/
377 /*---                                                      ---*/
378 /*------------------------------------------------------------*/
379 
380 #define N_ABBV_CACHE 32
381 
382 /* Holds information that is constant through the parsing of a
383    Compilation Unit.  This is basically plumbed through to
384    everywhere. */
385 typedef
386    struct {
387       /* Call here if anything goes wrong */
388       void (*barf)( const HChar* ) __attribute__((noreturn));
389       /* Is this 64-bit DWARF ? */
390       Bool   is_dw64;
391       /* Which DWARF version ?  (2, 3 or 4) */
392       UShort version;
393       /* Length of this Compilation Unit, as stated in the
394          .unit_length :: InitialLength field of the CU Header.
395          However, this size (as specified by the D3 spec) does not
396          include the size of the .unit_length field itself, which is
397          either 4 or 12 bytes (32-bit or 64-bit Dwarf3).  That value
398          can be obtained through the expression ".is_dw64 ? 12 : 4". */
399       ULong  unit_length;
400       /* Offset of start of this unit in .debug_info */
401       UWord  cu_start_offset;
402       /* SVMA for this CU.  In the D3 spec, is known as the "base
403          address of the compilation unit (last para sec 3.1.1).
404          Needed for (amongst things) interpretation of location-list
405          values. */
406       Addr   cu_svma;
407       Bool   cu_svma_known;
408 
409       /* The debug_abbreviations table to be used for this Unit */
410       //UChar* debug_abbv;
411       /* Upper bound on size thereof (an overestimate, in general) */
412       //UWord  debug_abbv_maxszB;
413       /* A bounded area of the image, to be used as the
414          debug_abbreviations table tobe used for this Unit. */
415       DiSlice debug_abbv;
416 
417       /* Image information for various sections. */
418       DiSlice escn_debug_str;
419       DiSlice escn_debug_ranges;
420       DiSlice escn_debug_loc;
421       DiSlice escn_debug_line;
422       DiSlice escn_debug_info;
423       DiSlice escn_debug_types;
424       DiSlice escn_debug_info_alt;
425       DiSlice escn_debug_str_alt;
426       /* How much to add to .debug_types resp. alternate .debug_info offsets
427          in cook_die*.  */
428       UWord  types_cuOff_bias;
429       UWord  alt_cuOff_bias;
430       /* --- Needed so we can add stuff to the string table. --- */
431       struct _DebugInfo* di;
432       /* --- a cache for set_abbv_Cursor --- */
433       /* abbv_code == (ULong)-1 for an unused entry. */
434       struct { ULong abbv_code; UWord posn; } saC_cache[N_ABBV_CACHE];
435       UWord saC_cache_queries;
436       UWord saC_cache_misses;
437 
438       /* True if this came from .debug_types; otherwise it came from
439          .debug_info.  */
440       Bool is_type_unit;
441       /* For a unit coming from .debug_types, these hold the TU's type
442          signature and the uncooked DIE offset of the TU's signatured
443          type.  For a unit coming from .debug_info, these are unused.  */
444       ULong type_signature;
445       ULong type_offset;
446 
447       /* Signatured type hash; computed once and then shared by all
448          CUs.  */
449       VgHashTable signature_types;
450 
451       /* True if this came from alternate .debug_info; otherwise
452          it came from normal .debug_info or .debug_types.  */
453       Bool is_alt_info;
454    }
455    CUConst;
456 
457 
458 /* Return the cooked value of DIE depending on whether CC represents a
459    .debug_types unit.  To cook a DIE, we pretend that the .debug_info,
460    .debug_types and optional alternate .debug_info sections form
461    a contiguous whole, so that DIEs coming from .debug_types are numbered
462    starting at the end of .debug_info and DIEs coming from alternate
463    .debug_info are numbered starting at the end of .debug_types.  */
cook_die(CUConst * cc,UWord die)464 static UWord cook_die( CUConst* cc, UWord die )
465 {
466    if (cc->is_type_unit)
467       die += cc->types_cuOff_bias;
468    else if (cc->is_alt_info)
469       die += cc->alt_cuOff_bias;
470    return die;
471 }
472 
473 /* Like cook_die, but understand that DIEs coming from a
474    DW_FORM_ref_sig8 reference are already cooked.  Also, handle
475    DW_FORM_GNU_ref_alt from within primary .debug_info or .debug_types
476    as reference to alternate .debug_info.  */
cook_die_using_form(CUConst * cc,UWord die,DW_FORM form)477 static UWord cook_die_using_form( CUConst *cc, UWord die, DW_FORM form)
478 {
479    if (form == DW_FORM_ref_sig8)
480       return die;
481    if (form == DW_FORM_GNU_ref_alt)
482       return die + cc->alt_cuOff_bias;
483    return cook_die( cc, die );
484 }
485 
486 /* Return the uncooked offset of DIE and set *TYPE_FLAG to true if the DIE
487    came from the .debug_types section and *ALT_FLAG to true if the DIE
488    came from alternate .debug_info section.  */
uncook_die(CUConst * cc,UWord die,Bool * type_flag,Bool * alt_flag)489 static UWord uncook_die( CUConst *cc, UWord die, /*OUT*/Bool *type_flag,
490                          Bool *alt_flag )
491 {
492    *alt_flag = False;
493    *type_flag = False;
494    /* The use of escn_debug_{info,types}.szB seems safe to me even if
495       escn_debug_{info,types} are DiSlice_INVALID (meaning the
496       sections were not found), because DiSlice_INVALID.szB is always
497       zero.  That said, it seems unlikely we'd ever get here if
498       .debug_info or .debug_types were missing. */
499    if (die >= cc->escn_debug_info.szB) {
500       if (die >= cc->escn_debug_info.szB + cc->escn_debug_types.szB) {
501          *alt_flag = True;
502          die -= cc->escn_debug_info.szB + cc->escn_debug_types.szB;
503       } else {
504          *type_flag = True;
505          die -= cc->escn_debug_info.szB;
506       }
507    }
508    return die;
509 }
510 
511 /*------------------------------------------------------------*/
512 /*---                                                      ---*/
513 /*--- Helper functions for Guarded Expressions             ---*/
514 /*---                                                      ---*/
515 /*------------------------------------------------------------*/
516 
517 /* Parse the location list starting at img-offset 'debug_loc_offset'
518    in .debug_loc.  Results are biased with 'svma_of_referencing_CU'
519    and so I believe are correct SVMAs for the object as a whole.  This
520    function allocates the UChar*, and the caller must deallocate it.
521    The resulting block is in so-called Guarded-Expression format.
522 
523    Guarded-Expression format is similar but not identical to the DWARF3
524    location-list format.  The format of each returned block is:
525 
526       UChar biasMe;
527       UChar isEnd;
528       followed by zero or more of
529 
530       (Addr aMin;  Addr aMax;  UShort nbytes;  ..bytes..;  UChar isEnd)
531 
532    '..bytes..' is an standard DWARF3 location expression which is
533    valid when aMin <= pc <= aMax (possibly after suitable biasing).
534 
535    The number of bytes in '..bytes..' is nbytes.
536 
537    The end of the sequence is marked by an isEnd == 1 value.  All
538    previous isEnd values must be zero.
539 
540    biasMe is 1 if the aMin/aMax fields need this DebugInfo's
541    text_bias added before use, and 0 if the GX is this is not
542    necessary (is ready to go).
543 
544    Hence the block can be quickly parsed and is self-describing.  Note
545    that aMax is 1 less than the corresponding value in a DWARF3
546    location list.  Zero length ranges, with aMax == aMin-1, are not
547    allowed.
548 */
549 /* 2008-sept-12: moved ML_(pp_GX) from here to d3basics.c, where
550    it more logically belongs. */
551 
552 
553 /* Apply a text bias to a GX. */
bias_GX(GExpr * gx,struct _DebugInfo * di)554 static void bias_GX ( /*MOD*/GExpr* gx, struct _DebugInfo* di )
555 {
556    UShort nbytes;
557    UChar* p = &gx->payload[0];
558    UChar* pA;
559    UChar  uc;
560    uc = *p++; /*biasMe*/
561    if (uc == 0)
562       return;
563    vg_assert(uc == 1);
564    p[-1] = 0; /* mark it as done */
565    while (True) {
566       uc = *p++;
567       if (uc == 1)
568          break; /*isEnd*/
569       vg_assert(uc == 0);
570       /* t-bias aMin */
571       pA = (UChar*)p;
572       ML_(write_Addr)(pA, ML_(read_Addr)(pA) + di->text_debug_bias);
573       p += sizeof(Addr);
574       /* t-bias aMax */
575       pA = (UChar*)p;
576       ML_(write_Addr)(pA, ML_(read_Addr)(pA) + di->text_debug_bias);
577       p += sizeof(Addr);
578       /* nbytes, and actual expression */
579       nbytes = ML_(read_UShort)(p); p += sizeof(UShort);
580       p += nbytes;
581    }
582 }
583 
584 __attribute__((noinline))
make_singleton_GX(DiCursor block,ULong nbytes)585 static GExpr* make_singleton_GX ( DiCursor block, ULong nbytes )
586 {
587    SizeT  bytesReqd;
588    GExpr* gx;
589    UChar *p, *pstart;
590 
591    vg_assert(sizeof(UWord) == sizeof(Addr));
592    vg_assert(nbytes <= 0xFFFF); /* else we overflow the nbytes field */
593    bytesReqd
594       =   sizeof(UChar)  /*biasMe*/    + sizeof(UChar) /*!isEnd*/
595         + sizeof(UWord)  /*aMin*/      + sizeof(UWord) /*aMax*/
596         + sizeof(UShort) /*nbytes*/    + (SizeT)nbytes
597         + sizeof(UChar); /*isEnd*/
598 
599    gx = ML_(dinfo_zalloc)( "di.readdwarf3.msGX.1",
600                            sizeof(GExpr) + bytesReqd );
601    vg_assert(gx);
602 
603    p = pstart = &gx->payload[0];
604 
605    p = ML_(write_UChar)(p, 0);        /*biasMe*/
606    p = ML_(write_UChar)(p, 0);        /*!isEnd*/
607    p = ML_(write_Addr)(p, 0);         /*aMin*/
608    p = ML_(write_Addr)(p, ~0);        /*aMax*/
609    p = ML_(write_UShort)(p, nbytes);  /*nbytes*/
610    ML_(cur_read_get)(p, block, nbytes); p += nbytes;
611    p = ML_(write_UChar)(p, 1);        /*isEnd*/
612 
613    vg_assert( (SizeT)(p - pstart) == bytesReqd);
614    vg_assert( &gx->payload[bytesReqd]
615               == ((UChar*)gx) + sizeof(GExpr) + bytesReqd );
616 
617    return gx;
618 }
619 
620 __attribute__((noinline))
make_general_GX(CUConst * cc,Bool td3,ULong debug_loc_offset,Addr svma_of_referencing_CU)621 static GExpr* make_general_GX ( CUConst* cc,
622                                 Bool     td3,
623                                 ULong    debug_loc_offset,
624                                 Addr     svma_of_referencing_CU )
625 {
626    Addr      base;
627    Cursor    loc;
628    XArray*   xa; /* XArray of UChar */
629    GExpr*    gx;
630    Word      nbytes;
631 
632    vg_assert(sizeof(UWord) == sizeof(Addr));
633    if (!ML_(sli_is_valid)(cc->escn_debug_loc) || cc->escn_debug_loc.szB == 0)
634       cc->barf("make_general_GX: .debug_loc is empty/missing");
635 
636    init_Cursor( &loc, cc->escn_debug_loc, 0, cc->barf,
637                 "Overrun whilst reading .debug_loc section(2)" );
638    set_position_of_Cursor( &loc, debug_loc_offset );
639 
640    TRACE_D3("make_general_GX (.debug_loc_offset = %llu, ioff = %llu) {\n",
641             debug_loc_offset, (ULong)get_DiCursor_from_Cursor(&loc).ioff );
642 
643    /* Who frees this xa?  It is freed before this fn exits. */
644    xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.mgGX.1",
645                     ML_(dinfo_free),
646                     sizeof(UChar) );
647 
648    { UChar c = 1; /*biasMe*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
649 
650    base = 0;
651    while (True) {
652       Bool  acquire;
653       UWord len;
654       /* Read a (host-)word pair.  This is something of a hack since
655          the word size to read is really dictated by the ELF file;
656          however, we assume we're reading a file with the same
657          word-sizeness as the host.  Reasonably enough. */
658       UWord w1 = get_UWord( &loc );
659       UWord w2 = get_UWord( &loc );
660 
661       TRACE_D3("   %08lx %08lx\n", w1, w2);
662       if (w1 == 0 && w2 == 0)
663          break; /* end of list */
664 
665       if (w1 == -1UL) {
666          /* new value for 'base' */
667          base = w2;
668          continue;
669       }
670 
671       /* else a location expression follows */
672       /* else enumerate [w1+base, w2+base) */
673       /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
674          (sec 2.17.2) */
675       if (w1 > w2) {
676          TRACE_D3("negative range is for .debug_loc expr at "
677                   "file offset %llu\n",
678                   debug_loc_offset);
679          cc->barf( "negative range in .debug_loc section" );
680       }
681 
682       /* ignore zero length ranges */
683       acquire = w1 < w2;
684       len     = (UWord)get_UShort( &loc );
685 
686       if (acquire) {
687          UWord  w;
688          UShort s;
689          UChar  c;
690          c = 0; /* !isEnd*/
691          VG_(addBytesToXA)( xa, &c, sizeof(c) );
692          w = w1    + base + svma_of_referencing_CU;
693          VG_(addBytesToXA)( xa, &w, sizeof(w) );
694          w = w2 -1 + base + svma_of_referencing_CU;
695          VG_(addBytesToXA)( xa, &w, sizeof(w) );
696          s = (UShort)len;
697          VG_(addBytesToXA)( xa, &s, sizeof(s) );
698       }
699 
700       while (len > 0) {
701          UChar byte = get_UChar( &loc );
702          TRACE_D3("%02x", (UInt)byte);
703          if (acquire)
704             VG_(addBytesToXA)( xa, &byte, 1 );
705          len--;
706       }
707       TRACE_D3("\n");
708    }
709 
710    { UChar c = 1; /*isEnd*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
711 
712    nbytes = VG_(sizeXA)( xa );
713    vg_assert(nbytes >= 1);
714 
715    gx = ML_(dinfo_zalloc)( "di.readdwarf3.mgGX.2", sizeof(GExpr) + nbytes );
716    vg_assert(gx);
717    VG_(memcpy)( &gx->payload[0], (UChar*)VG_(indexXA)(xa,0), nbytes );
718    vg_assert( &gx->payload[nbytes]
719               == ((UChar*)gx) + sizeof(GExpr) + nbytes );
720 
721    VG_(deleteXA)( xa );
722 
723    TRACE_D3("}\n");
724 
725    return gx;
726 }
727 
728 
729 /*------------------------------------------------------------*/
730 /*---                                                      ---*/
731 /*--- Helper functions for range lists and CU headers      ---*/
732 /*---                                                      ---*/
733 /*------------------------------------------------------------*/
734 
735 /* Denotes an address range.  Both aMin and aMax are included in the
736    range; hence a complete range is (0, ~0) and an empty range is any
737    (X, X-1) for X > 0.*/
738 typedef
739    struct { Addr aMin; Addr aMax; }
740    AddrRange;
741 
742 
743 /* Generate an arbitrary structural total ordering on
744    XArray* of AddrRange. */
cmp__XArrays_of_AddrRange(XArray * rngs1,XArray * rngs2)745 static Word cmp__XArrays_of_AddrRange ( XArray* rngs1, XArray* rngs2 )
746 {
747    Word n1, n2, i;
748    tl_assert(rngs1 && rngs2);
749    n1 = VG_(sizeXA)( rngs1 );
750    n2 = VG_(sizeXA)( rngs2 );
751    if (n1 < n2) return -1;
752    if (n1 > n2) return 1;
753    for (i = 0; i < n1; i++) {
754       AddrRange* rng1 = (AddrRange*)VG_(indexXA)( rngs1, i );
755       AddrRange* rng2 = (AddrRange*)VG_(indexXA)( rngs2, i );
756       if (rng1->aMin < rng2->aMin) return -1;
757       if (rng1->aMin > rng2->aMin) return 1;
758       if (rng1->aMax < rng2->aMax) return -1;
759       if (rng1->aMax > rng2->aMax) return 1;
760    }
761    return 0;
762 }
763 
764 
765 __attribute__((noinline))
empty_range_list(void)766 static XArray* /* of AddrRange */ empty_range_list ( void )
767 {
768    XArray* xa; /* XArray of AddrRange */
769    /* Who frees this xa?  varstack_preen() does. */
770    xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.erl.1",
771                     ML_(dinfo_free),
772                     sizeof(AddrRange) );
773    return xa;
774 }
775 
776 
777 __attribute__((noinline))
unitary_range_list(Addr aMin,Addr aMax)778 static XArray* unitary_range_list ( Addr aMin, Addr aMax )
779 {
780    XArray*   xa;
781    AddrRange pair;
782    vg_assert(aMin <= aMax);
783    /* Who frees this xa?  varstack_preen() does. */
784    xa = VG_(newXA)( ML_(dinfo_zalloc),  "di.readdwarf3.url.1",
785                     ML_(dinfo_free),
786                     sizeof(AddrRange) );
787    pair.aMin = aMin;
788    pair.aMax = aMax;
789    VG_(addToXA)( xa, &pair );
790    return xa;
791 }
792 
793 
794 /* Enumerate the address ranges starting at img-offset
795    'debug_ranges_offset' in .debug_ranges.  Results are biased with
796    'svma_of_referencing_CU' and so I believe are correct SVMAs for the
797    object as a whole.  This function allocates the XArray, and the
798    caller must deallocate it. */
799 __attribute__((noinline))
800 static XArray* /* of AddrRange */
get_range_list(CUConst * cc,Bool td3,UWord debug_ranges_offset,Addr svma_of_referencing_CU)801        get_range_list ( CUConst* cc,
802                         Bool     td3,
803                         UWord    debug_ranges_offset,
804                         Addr     svma_of_referencing_CU )
805 {
806    Addr      base;
807    Cursor    ranges;
808    XArray*   xa; /* XArray of AddrRange */
809    AddrRange pair;
810 
811    if (!ML_(sli_is_valid)(cc->escn_debug_ranges)
812        || cc->escn_debug_ranges.szB == 0)
813       cc->barf("get_range_list: .debug_ranges is empty/missing");
814 
815    init_Cursor( &ranges, cc->escn_debug_ranges, 0, cc->barf,
816                 "Overrun whilst reading .debug_ranges section(2)" );
817    set_position_of_Cursor( &ranges, debug_ranges_offset );
818 
819    /* Who frees this xa?  varstack_preen() does. */
820    xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.grl.1", ML_(dinfo_free),
821                     sizeof(AddrRange) );
822    base = 0;
823    while (True) {
824       /* Read a (host-)word pair.  This is something of a hack since
825          the word size to read is really dictated by the ELF file;
826          however, we assume we're reading a file with the same
827          word-sizeness as the host.  Reasonably enough. */
828       UWord w1 = get_UWord( &ranges );
829       UWord w2 = get_UWord( &ranges );
830 
831       if (w1 == 0 && w2 == 0)
832          break; /* end of list. */
833 
834       if (w1 == -1UL) {
835          /* new value for 'base' */
836          base = w2;
837          continue;
838       }
839 
840       /* else enumerate [w1+base, w2+base) */
841       /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
842          (sec 2.17.2) */
843       if (w1 > w2)
844          cc->barf( "negative range in .debug_ranges section" );
845       if (w1 < w2) {
846          pair.aMin = w1     + base + svma_of_referencing_CU;
847          pair.aMax = w2 - 1 + base + svma_of_referencing_CU;
848          vg_assert(pair.aMin <= pair.aMax);
849          VG_(addToXA)( xa, &pair );
850       }
851    }
852    return xa;
853 }
854 
855 
856 /* Parse the Compilation Unit header indicated at 'c' and
857    initialise 'cc' accordingly. */
858 static __attribute__((noinline))
parse_CU_Header(CUConst * cc,Bool td3,Cursor * c,DiSlice escn_debug_abbv,Bool type_unit,Bool alt_info)859 void parse_CU_Header ( /*OUT*/CUConst* cc,
860                        Bool td3,
861                        Cursor* c,
862                        DiSlice escn_debug_abbv,
863 		       Bool type_unit,
864                        Bool alt_info )
865 {
866    UChar  address_size;
867    ULong  debug_abbrev_offset;
868    Int    i;
869 
870    VG_(memset)(cc, 0, sizeof(*cc));
871    vg_assert(c && c->barf);
872    cc->barf = c->barf;
873 
874    /* initial_length field */
875    cc->unit_length
876       = get_Initial_Length( &cc->is_dw64, c,
877            "parse_CU_Header: invalid initial-length field" );
878 
879    TRACE_D3("   Length:        %lld\n", cc->unit_length );
880 
881    /* version */
882    cc->version = get_UShort( c );
883    if (cc->version != 2 && cc->version != 3 && cc->version != 4)
884       cc->barf( "parse_CU_Header: is neither DWARF2 nor DWARF3 nor DWARF4" );
885    TRACE_D3("   Version:       %d\n", (Int)cc->version );
886 
887    /* debug_abbrev_offset */
888    debug_abbrev_offset = get_Dwarfish_UWord( c, cc->is_dw64 );
889    if (debug_abbrev_offset >= escn_debug_abbv.szB)
890       cc->barf( "parse_CU_Header: invalid debug_abbrev_offset" );
891    TRACE_D3("   Abbrev Offset: %lld\n", debug_abbrev_offset );
892 
893    /* address size.  If this isn't equal to the host word size, just
894       give up.  This makes it safe to assume elsewhere that
895       DW_FORM_addr and DW_FORM_ref_addr can be treated as a host
896       word. */
897    address_size = get_UChar( c );
898    if (address_size != sizeof(void*))
899       cc->barf( "parse_CU_Header: invalid address_size" );
900    TRACE_D3("   Pointer Size:  %d\n", (Int)address_size );
901 
902    cc->is_type_unit = type_unit;
903    cc->is_alt_info = alt_info;
904 
905    if (type_unit) {
906       cc->type_signature = get_ULong( c );
907       cc->type_offset = get_Dwarfish_UWord( c, cc->is_dw64 );
908    }
909 
910    /* Set up cc->debug_abbv to point to the relevant table for this
911       CU.  Set its .szB so that at least we can't read off the end of
912       the debug_abbrev section -- potentially (and quite likely) too
913       big, if this isn't the last table in the section, but at least
914       it's safe.
915 
916       This amounts to taking debug_abbv_escn and moving the start
917       position along by debug_abbrev_offset bytes, hence forming a
918       smaller DiSlice which has the same end point.  Since we checked
919       just above that debug_abbrev_offset is less than the size of
920       debug_abbv_escn, this should leave us with a nonempty slice. */
921    vg_assert(debug_abbrev_offset < escn_debug_abbv.szB);
922    cc->debug_abbv      = escn_debug_abbv;
923    cc->debug_abbv.ioff += debug_abbrev_offset;
924    cc->debug_abbv.szB  -= debug_abbrev_offset;
925 
926    /* and empty out the set_abbv_Cursor cache */
927    if (0) VG_(printf)("XXXXXX initialise set_abbv_Cursor cache\n");
928    for (i = 0; i < N_ABBV_CACHE; i++) {
929       cc->saC_cache[i].abbv_code = (ULong)-1; /* unused */
930       cc->saC_cache[i].posn = 0;
931    }
932    cc->saC_cache_queries = 0;
933    cc->saC_cache_misses = 0;
934 }
935 
936 
937 /* Set up 'c' so it is ready to parse the abbv table entry code
938    'abbv_code' for this compilation unit.  */
939 static __attribute__((noinline))
set_abbv_Cursor(Cursor * c,Bool td3,CUConst * cc,ULong abbv_code)940 void set_abbv_Cursor ( /*OUT*/Cursor* c, Bool td3,
941                        CUConst* cc, ULong abbv_code )
942 {
943    Int   i;
944    ULong acode;
945 
946    if (abbv_code == 0)
947       cc->barf("set_abbv_Cursor: abbv_code == 0" );
948 
949    /* (ULong)-1 is used to represent an empty cache slot.  So we can't
950       allow it.  In any case no valid DWARF3 should make a reference
951       to a negative abbreviation code.  [at least, they always seem to
952       be numbered upwards from zero as far as I have seen] */
953    vg_assert(abbv_code != (ULong)-1);
954 
955    /* First search the cache. */
956    if (0) VG_(printf)("XXXXXX search set_abbv_Cursor cache\n");
957    cc->saC_cache_queries++;
958    for (i = 0; i < N_ABBV_CACHE; i++) {
959       /* No need to test the cached abbv_codes for -1 (empty), since
960          we just asserted that abbv_code is not -1. */
961       if (LIKELY(cc->saC_cache[i].abbv_code == abbv_code)) {
962          /* Found it.  Set up the parser using the cached position,
963             and move this cache entry to the front. */
964          if (0) VG_(printf)("XXXXXX found in set_abbv_Cursor cache\n");
965          init_Cursor( c, cc->debug_abbv, cc->saC_cache[i].posn,
966                       cc->barf,
967                       "Overrun whilst parsing .debug_abbrev section(1)" );
968          if (i > 0) {
969             ULong t_abbv_code = cc->saC_cache[i].abbv_code;
970             UWord t_posn      = cc->saC_cache[i].posn;
971             while (i > 0) {
972                cc->saC_cache[i] = cc->saC_cache[i-1];
973                i--;
974             }
975             cc->saC_cache[0].abbv_code = t_abbv_code;
976             cc->saC_cache[0].posn      = t_posn;
977          }
978          return;
979       }
980    }
981 
982    /* No.  It's not in the cache.  We have to search through
983       .debug_abbrev, of course taking care to update the cache
984       when done. */
985 
986    cc->saC_cache_misses++;
987    init_Cursor( c, cc->debug_abbv, 0, cc->barf,
988                "Overrun whilst parsing .debug_abbrev section(2)" );
989 
990    /* Now iterate though the table until we find the requested
991       entry. */
992    while (True) {
993       //ULong atag;
994       //UInt  has_children;
995       acode = get_ULEB128( c );
996       if (acode == 0) break; /* end of the table */
997       if (acode == abbv_code) break; /* found it */
998       /*atag         = */ get_ULEB128( c );
999       /*has_children = */ get_UChar( c );
1000       //TRACE_D3("   %llu      %s    [%s]\n",
1001       //         acode, pp_DW_TAG(atag), pp_DW_children(has_children));
1002       while (True) {
1003          ULong at_name = get_ULEB128( c );
1004          ULong at_form = get_ULEB128( c );
1005          if (at_name == 0 && at_form == 0) break;
1006          //TRACE_D3("    %18s %s\n",
1007          //         pp_DW_AT(at_name), pp_DW_FORM(at_form));
1008       }
1009    }
1010 
1011    if (acode == 0) {
1012       /* Not found.  This is fatal. */
1013       cc->barf("set_abbv_Cursor: abbv_code not found");
1014    }
1015 
1016    /* Otherwise, 'c' is now set correctly to parse the relevant entry,
1017       starting from the abbreviation entry's tag.  So just cache
1018       the result, and return. */
1019    for (i = N_ABBV_CACHE-1; i > N_ABBV_CACHE/2; i--) {
1020       cc->saC_cache[i] = cc->saC_cache[i-1];
1021    }
1022    if (0) VG_(printf)("XXXXXX update set_abbv_Cursor cache\n");
1023    cc->saC_cache[N_ABBV_CACHE/2].abbv_code = abbv_code;
1024    cc->saC_cache[N_ABBV_CACHE/2].posn = get_position_of_Cursor(c);
1025 }
1026 
1027 /* This represents a single signatured type.  It maps a type signature
1028    (a ULong) to a cooked DIE offset.  Objects of this type are stored
1029    in the type signature hash table.  */
1030 typedef
1031    struct D3SignatureType {
1032       struct D3SignatureType *next;
1033       UWord data;
1034       ULong type_signature;
1035       UWord die;
1036    }
1037    D3SignatureType;
1038 
1039 /* Record a signatured type in the hash table.  */
record_signatured_type(VgHashTable tab,ULong type_signature,UWord die)1040 static void record_signatured_type ( VgHashTable tab,
1041                                      ULong type_signature,
1042                                      UWord die )
1043 {
1044    D3SignatureType *dstype = ML_(dinfo_zalloc) ( "di.readdwarf3.sigtype",
1045                                                  sizeof(D3SignatureType) );
1046    dstype->data = (UWord) type_signature;
1047    dstype->type_signature = type_signature;
1048    dstype->die = die;
1049    VG_(HT_add_node) ( tab, dstype );
1050 }
1051 
1052 /* Given a type signature hash table and a type signature, return the
1053    cooked DIE offset of the type.  If the type cannot be found, call
1054    BARF.  */
lookup_signatured_type(VgHashTable tab,ULong type_signature,void (* barf)(const HChar *))1055 static UWord lookup_signatured_type ( VgHashTable tab,
1056                                       ULong type_signature,
1057                                       void (*barf)( const HChar* ) __attribute__((noreturn)) )
1058 {
1059    D3SignatureType *dstype = VG_(HT_lookup) ( tab, (UWord) type_signature );
1060    /* This may be unwarranted chumminess with the hash table
1061       implementation.  */
1062    while ( dstype != NULL && dstype->type_signature != type_signature)
1063       dstype = dstype->next;
1064    if (dstype == NULL) {
1065       barf("lookup_signatured_type: could not find signatured type");
1066       /*NOTREACHED*/
1067       vg_assert(0);
1068    }
1069    return dstype->die;
1070 }
1071 
1072 
1073 /* Represents Form data.  If szB is 1/2/4/8 then the result is in the
1074    lowest 1/2/4/8 bytes of u.val.  If szB is zero or negative then the
1075    result is an image section beginning at u.cur and with size -szB.
1076    No other szB values are allowed. */
1077 typedef
1078    struct {
1079       Long szB; // 1, 2, 4, 8 or non-positive values only.
1080       union { ULong val; DiCursor cur; } u;
1081    }
1082    FormContents;
1083 
1084 /* From 'c', get the Form data into 'cts'.  Either it gets a 1/2/4/8
1085    byte scalar value, or (a reference to) zero or more bytes starting
1086    at a DiCursor.*/
1087 static
get_Form_contents(FormContents * cts,CUConst * cc,Cursor * c,Bool td3,DW_FORM form)1088 void get_Form_contents ( /*OUT*/FormContents* cts,
1089                          CUConst* cc, Cursor* c,
1090                          Bool td3, DW_FORM form )
1091 {
1092    VG_(bzero_inline)(cts, sizeof(*cts));
1093    switch (form) {
1094       case DW_FORM_data1:
1095          cts->u.val = (ULong)(UChar)get_UChar(c);
1096          cts->szB   = 1;
1097          TRACE_D3("%u", (UInt)cts->u.val);
1098          break;
1099       case DW_FORM_data2:
1100          cts->u.val = (ULong)(UShort)get_UShort(c);
1101          cts->szB   = 2;
1102          TRACE_D3("%u", (UInt)cts->u.val);
1103          break;
1104       case DW_FORM_data4:
1105          cts->u.val = (ULong)(UInt)get_UInt(c);
1106          cts->szB   = 4;
1107          TRACE_D3("%u", (UInt)cts->u.val);
1108          break;
1109       case DW_FORM_data8:
1110          cts->u.val = get_ULong(c);
1111          cts->szB   = 8;
1112          TRACE_D3("%llu", cts->u.val);
1113          break;
1114       case DW_FORM_sec_offset:
1115          cts->u.val = (ULong)get_Dwarfish_UWord( c, cc->is_dw64 );
1116          cts->szB   = cc->is_dw64 ? 8 : 4;
1117          TRACE_D3("%llu", cts->u.val);
1118          break;
1119       case DW_FORM_sdata:
1120          cts->u.val = (ULong)(Long)get_SLEB128(c);
1121          cts->szB   = 8;
1122          TRACE_D3("%lld", (Long)cts->u.val);
1123          break;
1124       case DW_FORM_udata:
1125          cts->u.val = (ULong)(Long)get_ULEB128(c);
1126          cts->szB   = 8;
1127          TRACE_D3("%llu", (Long)cts->u.val);
1128          break;
1129       case DW_FORM_addr:
1130          /* note, this is a hack.  DW_FORM_addr is defined as getting
1131             a word the size of the target machine as defined by the
1132             address_size field in the CU Header.  However,
1133             parse_CU_Header() rejects all inputs except those for
1134             which address_size == sizeof(Word), hence we can just
1135             treat it as a (host) Word.  */
1136          cts->u.val = (ULong)(UWord)get_UWord(c);
1137          cts->szB   = sizeof(UWord);
1138          TRACE_D3("0x%lx", (UWord)cts->u.val);
1139          break;
1140 
1141       case DW_FORM_ref_addr:
1142          /* We make the same word-size assumption as DW_FORM_addr. */
1143          /* What does this really mean?  From D3 Sec 7.5.4,
1144             description of "reference", it would appear to reference
1145             some other DIE, by specifying the offset from the
1146             beginning of a .debug_info section.  The D3 spec mentions
1147             that this might be in some other shared object and
1148             executable.  But I don't see how the name of the other
1149             object/exe is specified.
1150 
1151             At least for the DW_FORM_ref_addrs created by icc11, the
1152             references seem to be within the same object/executable.
1153             So for the moment we merely range-check, to see that they
1154             actually do specify a plausible offset within this
1155             object's .debug_info, and return the value unchanged.
1156 
1157             In DWARF 2, DW_FORM_ref_addr is address-sized, but in
1158             DWARF 3 and later, it is offset-sized.
1159          */
1160          if (cc->version == 2) {
1161             cts->u.val = (ULong)(UWord)get_UWord(c);
1162             cts->szB   = sizeof(UWord);
1163          } else {
1164             cts->u.val = get_Dwarfish_UWord(c, cc->is_dw64);
1165             cts->szB   = cc->is_dw64 ? sizeof(ULong) : sizeof(UInt);
1166          }
1167          TRACE_D3("0x%lx", (UWord)cts->u.val);
1168          if (0) VG_(printf)("DW_FORM_ref_addr 0x%lx\n", (UWord)cts->u.val);
1169          if (/* the following is surely impossible, but ... */
1170              !ML_(sli_is_valid)(cc->escn_debug_info)
1171              || cts->u.val >= (ULong)cc->escn_debug_info.szB) {
1172             /* Hmm.  Offset is nonsensical for this object's .debug_info
1173                section.  Be safe and reject it. */
1174             cc->barf("get_Form_contents: DW_FORM_ref_addr points "
1175                      "outside .debug_info");
1176          }
1177          break;
1178 
1179       case DW_FORM_strp: {
1180          /* this is an offset into .debug_str */
1181          UWord uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 );
1182          if (!ML_(sli_is_valid)(cc->escn_debug_str)
1183              || uw >= cc->escn_debug_str.szB)
1184             cc->barf("get_Form_contents: DW_FORM_strp "
1185                      "points outside .debug_str");
1186          /* FIXME: check the entire string lies inside debug_str,
1187             not just the first byte of it. */
1188          DiCursor str
1189             = ML_(cur_plus)( ML_(cur_from_sli)(cc->escn_debug_str), uw );
1190          if (td3) {
1191             HChar* tmp = ML_(cur_read_strdup)(str, "di.getFC.1");
1192             TRACE_D3("(indirect string, offset: 0x%lx): %s", uw, tmp);
1193             ML_(dinfo_free)(tmp);
1194          }
1195          cts->u.cur = str;
1196          cts->szB   = - (Long)(1 + (ULong)ML_(cur_strlen)(str));
1197          break;
1198       }
1199       case DW_FORM_string: {
1200          DiCursor str = get_AsciiZ(c);
1201          if (td3) {
1202             HChar* tmp = ML_(cur_read_strdup)(str, "di.getFC.2");
1203             TRACE_D3("%s", tmp);
1204             ML_(dinfo_free)(tmp);
1205          }
1206          cts->u.cur = str;
1207          /* strlen is safe because get_AsciiZ already 'vetted' the
1208             entire string */
1209          cts->szB   = - (Long)(1 + (ULong)ML_(cur_strlen)(str));
1210          break;
1211       }
1212       case DW_FORM_ref1: {
1213          UChar u8   = get_UChar(c);
1214          UWord res  = cc->cu_start_offset + (UWord)u8;
1215          cts->u.val = (ULong)res;
1216          cts->szB   = sizeof(UWord);
1217          TRACE_D3("<%lx>", res);
1218          break;
1219       }
1220       case DW_FORM_ref2: {
1221          UShort u16 = get_UShort(c);
1222          UWord  res = cc->cu_start_offset + (UWord)u16;
1223          cts->u.val = (ULong)res;
1224          cts->szB   = sizeof(UWord);
1225          TRACE_D3("<%lx>", res);
1226          break;
1227       }
1228       case DW_FORM_ref4: {
1229          UInt  u32  = get_UInt(c);
1230          UWord res  = cc->cu_start_offset + (UWord)u32;
1231          cts->u.val = (ULong)res;
1232          cts->szB   = sizeof(UWord);
1233          TRACE_D3("<%lx>", res);
1234          break;
1235       }
1236       case DW_FORM_ref8: {
1237          ULong u64  = get_ULong(c);
1238          UWord res  = cc->cu_start_offset + (UWord)u64;
1239          cts->u.val = (ULong)res;
1240          cts->szB   = sizeof(UWord);
1241          TRACE_D3("<%lx>", res);
1242          break;
1243       }
1244       case DW_FORM_ref_udata: {
1245          ULong u64  = get_ULEB128(c);
1246          UWord res  = cc->cu_start_offset + (UWord)u64;
1247          cts->u.val = (ULong)res;
1248          cts->szB   = sizeof(UWord);
1249          TRACE_D3("<%lx>", res);
1250          break;
1251       }
1252       case DW_FORM_flag: {
1253          UChar u8 = get_UChar(c);
1254          TRACE_D3("%u", (UInt)u8);
1255          cts->u.val = (ULong)u8;
1256          cts->szB   = 1;
1257          break;
1258       }
1259       case DW_FORM_flag_present:
1260          TRACE_D3("1");
1261          cts->u.val = 1;
1262          cts->szB   = 1;
1263          break;
1264       case DW_FORM_block1: {
1265          ULong    u64b;
1266          ULong    u64   = (ULong)get_UChar(c);
1267          DiCursor block = get_DiCursor_from_Cursor(c);
1268          TRACE_D3("%llu byte block: ", u64);
1269          for (u64b = u64; u64b > 0; u64b--) {
1270             UChar u8 = get_UChar(c);
1271             TRACE_D3("%x ", (UInt)u8);
1272          }
1273          cts->u.cur = block;
1274          cts->szB   = - (Long)u64;
1275          break;
1276       }
1277       case DW_FORM_block2: {
1278          ULong    u64b;
1279          ULong    u64   = (ULong)get_UShort(c);
1280          DiCursor block = get_DiCursor_from_Cursor(c);
1281          TRACE_D3("%llu byte block: ", u64);
1282          for (u64b = u64; u64b > 0; u64b--) {
1283             UChar u8 = get_UChar(c);
1284             TRACE_D3("%x ", (UInt)u8);
1285          }
1286          cts->u.cur = block;
1287          cts->szB   = - (Long)u64;
1288          break;
1289       }
1290       case DW_FORM_block4: {
1291          ULong    u64b;
1292          ULong    u64   = (ULong)get_UInt(c);
1293          DiCursor block = get_DiCursor_from_Cursor(c);
1294          TRACE_D3("%llu byte block: ", u64);
1295          for (u64b = u64; u64b > 0; u64b--) {
1296             UChar u8 = get_UChar(c);
1297             TRACE_D3("%x ", (UInt)u8);
1298          }
1299          cts->u.cur = block;
1300          cts->szB   = - (Long)u64;
1301          break;
1302       }
1303       case DW_FORM_exprloc:
1304       case DW_FORM_block: {
1305          ULong    u64b;
1306          ULong    u64   = (ULong)get_ULEB128(c);
1307          DiCursor block = get_DiCursor_from_Cursor(c);
1308          TRACE_D3("%llu byte block: ", u64);
1309          for (u64b = u64; u64b > 0; u64b--) {
1310             UChar u8 = get_UChar(c);
1311             TRACE_D3("%x ", (UInt)u8);
1312          }
1313          cts->u.cur = block;
1314          cts->szB   = - (Long)u64;
1315          break;
1316       }
1317       case DW_FORM_ref_sig8: {
1318          ULong  u64b;
1319          ULong  signature = get_ULong (c);
1320          ULong  work = signature;
1321          TRACE_D3("8 byte signature: ");
1322          for (u64b = 8; u64b > 0; u64b--) {
1323             UChar u8 = work & 0xff;
1324             TRACE_D3("%x ", (UInt)u8);
1325             work >>= 8;
1326          }
1327          /* Due to the way that the hash table is constructed, the
1328             resulting DIE offset here is already "cooked".  See
1329             cook_die_using_form.  */
1330          cts->u.val = lookup_signatured_type (cc->signature_types, signature,
1331                                               c->barf);
1332          cts->szB   = sizeof(UWord);
1333          break;
1334       }
1335       case DW_FORM_indirect:
1336          get_Form_contents (cts, cc, c, td3, (DW_FORM)get_ULEB128(c));
1337          return;
1338 
1339       case DW_FORM_GNU_ref_alt:
1340          cts->u.val = get_Dwarfish_UWord(c, cc->is_dw64);
1341          cts->szB   = cc->is_dw64 ? sizeof(ULong) : sizeof(UInt);
1342          TRACE_D3("0x%lx", (UWord)cts->u.val);
1343          if (0) VG_(printf)("DW_FORM_GNU_ref_alt 0x%lx\n", (UWord)cts->u.val);
1344          if (/* the following is surely impossible, but ... */
1345              !ML_(sli_is_valid)(cc->escn_debug_info_alt)
1346              || cts->u.val >= (ULong)cc->escn_debug_info_alt.szB) {
1347             /* Hmm.  Offset is nonsensical for this object's .debug_info
1348                section.  Be safe and reject it. */
1349             cc->barf("get_Form_contents: DW_FORM_ref_addr points "
1350                      "outside alternate .debug_info");
1351          }
1352          break;
1353 
1354       case DW_FORM_GNU_strp_alt: {
1355          /* this is an offset into alternate .debug_str */
1356          SizeT uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 );
1357          if (!ML_(sli_is_valid)(cc->escn_debug_str_alt)
1358              || uw >= cc->escn_debug_str_alt.szB)
1359             cc->barf("get_Form_contents: DW_FORM_GNU_strp_alt "
1360                      "points outside alternate .debug_str");
1361          /* FIXME: check the entire string lies inside debug_str,
1362             not just the first byte of it. */
1363          DiCursor str
1364             = ML_(cur_plus)( ML_(cur_from_sli)(cc->escn_debug_str_alt), uw);
1365          if (td3) {
1366             HChar* tmp = ML_(cur_read_strdup)(str, "di.getFC.3");
1367             TRACE_D3("(indirect alt string, offset: 0x%lx): %s", uw, tmp);
1368             ML_(dinfo_free)(tmp);
1369          }
1370          cts->u.cur = str;
1371          cts->szB   = - (Long)(1 + (ULong)ML_(cur_strlen)(str));
1372          break;
1373       }
1374 
1375       default:
1376          VG_(printf)(
1377             "get_Form_contents: unhandled %d (%s) at <%llx>\n",
1378             form, ML_(pp_DW_FORM)(form), get_position_of_Cursor(c));
1379          c->barf("get_Form_contents: unhandled DW_FORM");
1380    }
1381 }
1382 
1383 
1384 /*------------------------------------------------------------*/
1385 /*---                                                      ---*/
1386 /*--- Parsing of variable-related DIEs                     ---*/
1387 /*---                                                      ---*/
1388 /*------------------------------------------------------------*/
1389 
1390 typedef
1391    struct _TempVar {
1392       HChar*  name; /* in DebugInfo's .strchunks */
1393       /* Represent ranges economically.  nRanges is the number of
1394          ranges.  Cases:
1395          0: .rngOneMin .rngOneMax .manyRanges are all zero
1396          1: .rngOneMin .rngOneMax hold the range; .rngMany is NULL
1397          2: .rngOneMin .rngOneMax are zero; .rngMany holds the ranges.
1398          This is merely an optimisation to avoid having to allocate
1399          and free the XArray in the common (98%) of cases where there
1400          is zero or one address ranges. */
1401       UWord   nRanges;
1402       Addr    rngOneMin;
1403       Addr    rngOneMax;
1404       XArray* rngMany; /* of AddrRange.  NON-UNIQUE PTR in AR_DINFO. */
1405       /* Do not free .rngMany, since many TempVars will have the same
1406          value.  Instead the associated storage is to be freed by
1407          deleting 'rangetree', which stores a single copy of each
1408          range. */
1409       /* --- */
1410       Int     level;
1411       UWord   typeR; /* a cuOff */
1412       GExpr*  gexpr; /* for this variable */
1413       GExpr*  fbGX;  /* to find the frame base of the enclosing fn, if
1414                         any */
1415       HChar*  fName; /* declaring file name, or NULL */
1416       Int     fLine; /* declaring file line number, or zero */
1417       /* offset in .debug_info, so that abstract instances can be
1418          found to satisfy references from concrete instances. */
1419       UWord   dioff;
1420       UWord   absOri; /* so the absOri fields refer to dioff fields
1421                          in some other, related TempVar. */
1422    }
1423    TempVar;
1424 
1425 #define N_D3_VAR_STACK 48
1426 
1427 typedef
1428    struct {
1429       /* Contains the range stack: a stack of address ranges, one
1430          stack entry for each nested scope.
1431 
1432          Some scope entries are created by function definitions
1433          (DW_AT_subprogram), and for those, we also note the GExpr
1434          derived from its DW_AT_frame_base attribute, if any.
1435          Consequently it should be possible to find, for any
1436          variable's DIE, the GExpr for the the containing function's
1437          DW_AT_frame_base by scanning back through the stack to find
1438          the nearest entry associated with a function.  This somewhat
1439          elaborate scheme is provided so as to make it possible to
1440          obtain the correct DW_AT_frame_base expression even in the
1441          presence of nested functions (or to be more precise, in the
1442          presence of nested DW_AT_subprogram DIEs).
1443       */
1444       Int     sp; /* [sp] is innermost active entry; sp==-1 for empty
1445                      stack */
1446       XArray* ranges[N_D3_VAR_STACK]; /* XArray of AddrRange */
1447       Int     level[N_D3_VAR_STACK];  /* D3 DIE levels */
1448       Bool    isFunc[N_D3_VAR_STACK]; /* from DW_AT_subprogram? */
1449       GExpr*  fbGX[N_D3_VAR_STACK];   /* if isFunc, contains the FB
1450                                          expr, else NULL */
1451       /* The file name table.  Is a mapping from integer index to the
1452          (permanent) copy of the string in in DebugInfo's .strchunks. */
1453       XArray* /* of UChar* */ filenameTable;
1454    }
1455    D3VarParser;
1456 
varstack_show(D3VarParser * parser,const HChar * str)1457 static void varstack_show ( D3VarParser* parser, const HChar* str ) {
1458    Word i, j;
1459    VG_(printf)("  varstack (%s) {\n", str);
1460    for (i = 0; i <= parser->sp; i++) {
1461       XArray* xa = parser->ranges[i];
1462       vg_assert(xa);
1463       VG_(printf)("    [%ld] (level %d)", i, parser->level[i]);
1464       if (parser->isFunc[i]) {
1465          VG_(printf)(" (fbGX=%p)", parser->fbGX[i]);
1466       } else {
1467          vg_assert(parser->fbGX[i] == NULL);
1468       }
1469       VG_(printf)(": ");
1470       if (VG_(sizeXA)( xa ) == 0) {
1471          VG_(printf)("** empty PC range array **");
1472       } else {
1473          for (j = 0; j < VG_(sizeXA)( xa ); j++) {
1474             AddrRange* range = (AddrRange*) VG_(indexXA)( xa, j );
1475             vg_assert(range);
1476             VG_(printf)("[%#lx,%#lx] ", range->aMin, range->aMax);
1477          }
1478       }
1479       VG_(printf)("\n");
1480    }
1481    VG_(printf)("  }\n");
1482 }
1483 
1484 /* Remove from the stack, all entries with .level > 'level' */
1485 static
varstack_preen(D3VarParser * parser,Bool td3,Int level)1486 void varstack_preen ( D3VarParser* parser, Bool td3, Int level )
1487 {
1488    Bool changed = False;
1489    vg_assert(parser->sp < N_D3_VAR_STACK);
1490    while (True) {
1491       vg_assert(parser->sp >= -1);
1492       if (parser->sp == -1) break;
1493       if (parser->level[parser->sp] <= level) break;
1494       if (0)
1495          TRACE_D3("BBBBAAAA varstack_pop [newsp=%d]\n", parser->sp-1);
1496       vg_assert(parser->ranges[parser->sp]);
1497       /* Who allocated this xa?  get_range_list() or
1498          unitary_range_list(). */
1499       VG_(deleteXA)( parser->ranges[parser->sp] );
1500       parser->ranges[parser->sp] = NULL;
1501       parser->level[parser->sp]  = 0;
1502       parser->isFunc[parser->sp] = False;
1503       parser->fbGX[parser->sp]   = NULL;
1504       parser->sp--;
1505       changed = True;
1506    }
1507    if (changed && td3)
1508       varstack_show( parser, "after preen" );
1509 }
1510 
varstack_push(CUConst * cc,D3VarParser * parser,Bool td3,XArray * ranges,Int level,Bool isFunc,GExpr * fbGX)1511 static void varstack_push ( CUConst* cc,
1512                             D3VarParser* parser,
1513                             Bool td3,
1514                             XArray* ranges, Int level,
1515                             Bool    isFunc, GExpr* fbGX ) {
1516    if (0)
1517    TRACE_D3("BBBBAAAA varstack_push[newsp=%d]: %d  %p\n",
1518             parser->sp+1, level, ranges);
1519 
1520    /* First we need to zap everything >= 'level', as we are about to
1521       replace any previous entry at 'level', so .. */
1522    varstack_preen(parser, /*td3*/False, level-1);
1523 
1524    vg_assert(parser->sp >= -1);
1525    vg_assert(parser->sp < N_D3_VAR_STACK);
1526    if (parser->sp == N_D3_VAR_STACK-1)
1527       cc->barf("varstack_push: N_D3_VAR_STACK is too low; "
1528                "increase and recompile");
1529    if (parser->sp >= 0)
1530       vg_assert(parser->level[parser->sp] < level);
1531    parser->sp++;
1532    vg_assert(parser->ranges[parser->sp] == NULL);
1533    vg_assert(parser->level[parser->sp]  == 0);
1534    vg_assert(parser->isFunc[parser->sp] == False);
1535    vg_assert(parser->fbGX[parser->sp]   == NULL);
1536    vg_assert(ranges != NULL);
1537    if (!isFunc) vg_assert(fbGX == NULL);
1538    parser->ranges[parser->sp] = ranges;
1539    parser->level[parser->sp]  = level;
1540    parser->isFunc[parser->sp] = isFunc;
1541    parser->fbGX[parser->sp]   = fbGX;
1542    if (td3)
1543       varstack_show( parser, "after push" );
1544 }
1545 
1546 
1547 /* cts is derived from a DW_AT_location and so refers either to a
1548    location expression or to a location list.  Figure out which, and
1549    in both cases bundle the expression or location list into a
1550    so-called GExpr (guarded expression). */
1551 __attribute__((noinline))
get_GX(CUConst * cc,Bool td3,const FormContents * cts)1552 static GExpr* get_GX ( CUConst* cc, Bool td3, const FormContents* cts )
1553 {
1554    GExpr* gexpr = NULL;
1555    if (cts->szB < 0) {
1556       /* represents a non-empty in-line location expression, and
1557          cts->u.cur points at the image bytes */
1558       gexpr = make_singleton_GX( cts->u.cur, (ULong)(- cts->szB) );
1559    }
1560    else
1561    if (cts->szB > 0) {
1562       /* represents a location list.  cts->u.val is the offset of it
1563          in .debug_loc. */
1564       if (!cc->cu_svma_known)
1565          cc->barf("get_GX: location list, but CU svma is unknown");
1566       gexpr = make_general_GX( cc, td3, cts->u.val, cc->cu_svma );
1567    }
1568    else {
1569       vg_assert(0); /* else caller is bogus */
1570    }
1571    return gexpr;
1572 }
1573 
1574 
1575 static
read_filename_table(D3VarParser * parser,CUConst * cc,ULong debug_line_offset,Bool td3)1576 void read_filename_table( /*MOD*/D3VarParser* parser,
1577                           CUConst* cc, ULong debug_line_offset,
1578                           Bool td3 )
1579 {
1580    Bool   is_dw64;
1581    Cursor c;
1582    Word   i;
1583    UShort version;
1584    UChar  opcode_base;
1585    HChar* str;
1586 
1587    vg_assert(parser && cc && cc->barf);
1588    if (!ML_(sli_is_valid)(cc->escn_debug_line)
1589        || cc->escn_debug_line.szB <= debug_line_offset) {
1590       cc->barf("read_filename_table: .debug_line is missing?");
1591    }
1592 
1593    init_Cursor( &c, cc->escn_debug_line, debug_line_offset, cc->barf,
1594                 "Overrun whilst reading .debug_line section(1)" );
1595 
1596    /* unit_length = */
1597       get_Initial_Length( &is_dw64, &c,
1598            "read_filename_table: invalid initial-length field" );
1599    version = get_UShort( &c );
1600    if (version != 2 && version != 3 && version != 4)
1601      cc->barf("read_filename_table: Only DWARF version 2, 3 and 4 line info "
1602               "is currently supported.");
1603    /*header_length              = (ULong)*/ get_Dwarfish_UWord( &c, is_dw64 );
1604    /*minimum_instruction_length = */ get_UChar( &c );
1605    if (version >= 4)
1606       /*maximum_operations_per_insn = */ get_UChar( &c );
1607    /*default_is_stmt            = */ get_UChar( &c );
1608    /*line_base                  = (Char)*/ get_UChar( &c );
1609    /*line_range                 = */ get_UChar( &c );
1610    opcode_base                = get_UChar( &c );
1611    /* skip over "standard_opcode_lengths" */
1612    for (i = 1; i < (Word)opcode_base; i++)
1613      (void)get_UChar( &c );
1614 
1615    /* skip over the directory names table */
1616    while (peek_UChar(&c) != 0) {
1617      (void)get_AsciiZ(&c);
1618    }
1619    (void)get_UChar(&c); /* skip terminating zero */
1620 
1621    /* Read and record the file names table */
1622    vg_assert(parser->filenameTable);
1623    vg_assert( VG_(sizeXA)( parser->filenameTable ) == 0 );
1624    /* Add a dummy index-zero entry.  DWARF3 numbers its files
1625       from 1, for some reason. */
1626    str = ML_(addStr)( cc->di, "<unknown_file>", -1 );
1627    VG_(addToXA)( parser->filenameTable, &str );
1628    while (peek_UChar(&c) != 0) {
1629       DiCursor cur = get_AsciiZ(&c);
1630       str = ML_(addStrFromCursor)( cc->di, cur );
1631       TRACE_D3("  read_filename_table: %ld %s\n",
1632                VG_(sizeXA)(parser->filenameTable), str);
1633       VG_(addToXA)( parser->filenameTable, &str );
1634       (void)get_ULEB128( &c ); /* skip directory index # */
1635       (void)get_ULEB128( &c ); /* skip last mod time */
1636       (void)get_ULEB128( &c ); /* file size */
1637    }
1638    /* We're done!  The rest of it is not interesting. */
1639 }
1640 
1641 __attribute__((noinline))
bad_DIE_confusion(int linenr)1642 static void bad_DIE_confusion(int linenr)
1643 {
1644    VG_(printf)("\nparse_var_DIE(%d): confused by:\n", linenr);
1645 }
1646 #define goto_bad_DIE do {bad_DIE_confusion(__LINE__); goto bad_DIE;} while (0)
1647 
1648 __attribute__((noinline))
parse_var_DIE(WordFM * rangestree,XArray * tempvars,XArray * gexprs,D3VarParser * parser,DW_TAG dtag,UWord posn,Int level,Cursor * c_die,Cursor * c_abbv,CUConst * cc,Bool td3)1649 static void parse_var_DIE (
1650    /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
1651    /*MOD*/XArray* /* of TempVar* */ tempvars,
1652    /*MOD*/XArray* /* of GExpr* */ gexprs,
1653    /*MOD*/D3VarParser* parser,
1654    DW_TAG dtag,
1655    UWord posn,
1656    Int level,
1657    Cursor* c_die,
1658    Cursor* c_abbv,
1659    CUConst* cc,
1660    Bool td3
1661 )
1662 {
1663    FormContents cts;
1664 
1665    UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
1666    UWord saved_abbv_c_offset = get_position_of_Cursor( c_abbv );
1667    Bool  debug_types_flag;
1668    Bool  alt_flag;
1669 
1670    varstack_preen( parser, td3, level-1 );
1671 
1672    if (dtag == DW_TAG_compile_unit
1673        || dtag == DW_TAG_type_unit
1674        || dtag == DW_TAG_partial_unit) {
1675       Bool have_lo    = False;
1676       Bool have_hi1   = False;
1677       Bool hiIsRelative = False;
1678       Bool have_range = False;
1679       Addr ip_lo    = 0;
1680       Addr ip_hi1   = 0;
1681       Addr rangeoff = 0;
1682       while (True) {
1683          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
1684          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
1685          if (attr == 0 && form == 0) break;
1686          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
1687          if (attr == DW_AT_low_pc && cts.szB > 0) {
1688             ip_lo   = cts.u.val;
1689             have_lo = True;
1690          }
1691          if (attr == DW_AT_high_pc && cts.szB > 0) {
1692             ip_hi1   = cts.u.val;
1693             have_hi1 = True;
1694             if (form != DW_FORM_addr)
1695                hiIsRelative = True;
1696          }
1697          if (attr == DW_AT_ranges && cts.szB > 0) {
1698             rangeoff   = cts.u.val;
1699             have_range = True;
1700          }
1701          if (attr == DW_AT_stmt_list && cts.szB > 0) {
1702             read_filename_table( parser, cc, cts.u.val, td3 );
1703          }
1704       }
1705       if (have_lo && have_hi1 && hiIsRelative)
1706          ip_hi1 += ip_lo;
1707       /* Now, does this give us an opportunity to find this
1708          CU's svma? */
1709 #if 0
1710       if (level == 0 && have_lo) {
1711          vg_assert(!cc->cu_svma_known); /* if this fails, it must be
1712          because we've already seen a DW_TAG_compile_unit DIE at level
1713          0.  But that can't happen, because DWARF3 only allows exactly
1714          one top level DIE per CU. */
1715          cc->cu_svma_known = True;
1716          cc->cu_svma = ip_lo;
1717          if (1)
1718             TRACE_D3("BBBBAAAA acquire CU_SVMA of %p\n", cc->cu_svma);
1719          /* Now, it may be that this DIE doesn't tell us the CU's
1720             SVMA, by way of not having a DW_AT_low_pc.  That's OK --
1721             the CU doesn't *have* to have its SVMA specified.
1722 
1723             But as per last para D3 spec sec 3.1.1 ("Normal and
1724             Partial Compilation Unit Entries", "If the base address
1725             (viz, the SVMA) is undefined, then any DWARF entry of
1726             structure defined interms of the base address of that
1727             compilation unit is not valid.".  So that means, if whilst
1728             processing the children of this top level DIE (or their
1729             children, etc) we see a DW_AT_range, and cu_svma_known is
1730             False, then the DIE that contains it is (per the spec)
1731             invalid, and we can legitimately stop and complain. */
1732       }
1733 #else
1734       /* .. whereas The Reality is, simply assume the SVMA is zero
1735          if it isn't specified. */
1736       if (level == 0) {
1737          vg_assert(!cc->cu_svma_known);
1738          cc->cu_svma_known = True;
1739          if (have_lo)
1740             cc->cu_svma = ip_lo;
1741          else
1742             cc->cu_svma = 0;
1743       }
1744 #endif
1745       /* Do we have something that looks sane? */
1746       if (have_lo && have_hi1 && (!have_range)) {
1747          if (ip_lo < ip_hi1)
1748             varstack_push( cc, parser, td3,
1749                            unitary_range_list(ip_lo, ip_hi1 - 1),
1750                            level,
1751                            False/*isFunc*/, NULL/*fbGX*/ );
1752          else if (ip_lo == 0 && ip_hi1 == 0)
1753             /* CU has no code, presumably?
1754                Such situations have been encountered for code
1755                compiled with -ffunction-sections -fdata-sections
1756                and linked with --gc-sections. Completely
1757                eliminated CU gives such 0 lo/hi pc. Similarly
1758                to a CU which has no lo/hi/range pc, we push
1759                an empty range list. */
1760             varstack_push( cc, parser, td3,
1761                            empty_range_list(),
1762                            level,
1763                            False/*isFunc*/, NULL/*fbGX*/ );
1764       } else
1765       if ((!have_lo) && (!have_hi1) && have_range) {
1766          varstack_push( cc, parser, td3,
1767                         get_range_list( cc, td3,
1768                                         rangeoff, cc->cu_svma ),
1769                         level,
1770                         False/*isFunc*/, NULL/*fbGX*/ );
1771       } else
1772       if ((!have_lo) && (!have_hi1) && (!have_range)) {
1773          /* CU has no code, presumably? */
1774          varstack_push( cc, parser, td3,
1775                         empty_range_list(),
1776                         level,
1777                         False/*isFunc*/, NULL/*fbGX*/ );
1778       } else
1779       if (have_lo && (!have_hi1) && have_range && ip_lo == 0) {
1780          /* broken DIE created by gcc-4.3.X ?  Ignore the
1781             apparently-redundant DW_AT_low_pc and use the DW_AT_ranges
1782             instead. */
1783          varstack_push( cc, parser, td3,
1784                         get_range_list( cc, td3,
1785                                         rangeoff, cc->cu_svma ),
1786                         level,
1787                         False/*isFunc*/, NULL/*fbGX*/ );
1788       } else {
1789          if (0) VG_(printf)("I got hlo %d hhi1 %d hrange %d\n",
1790                             (Int)have_lo, (Int)have_hi1, (Int)have_range);
1791          goto_bad_DIE;
1792       }
1793    }
1794 
1795    if (dtag == DW_TAG_lexical_block || dtag == DW_TAG_subprogram) {
1796       Bool   have_lo    = False;
1797       Bool   have_hi1   = False;
1798       Bool   have_range = False;
1799       Bool   hiIsRelative = False;
1800       Addr   ip_lo      = 0;
1801       Addr   ip_hi1     = 0;
1802       Addr   rangeoff   = 0;
1803       Bool   isFunc     = dtag == DW_TAG_subprogram;
1804       GExpr* fbGX       = NULL;
1805       while (True) {
1806          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
1807          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
1808          if (attr == 0 && form == 0) break;
1809          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
1810          if (attr == DW_AT_low_pc && cts.szB > 0) {
1811             ip_lo   = cts.u.val;
1812             have_lo = True;
1813          }
1814          if (attr == DW_AT_high_pc && cts.szB > 0) {
1815             ip_hi1   = cts.u.val;
1816             have_hi1 = True;
1817             if (form != DW_FORM_addr)
1818                hiIsRelative = True;
1819          }
1820          if (attr == DW_AT_ranges && cts.szB > 0) {
1821             rangeoff   = cts.u.val;
1822             have_range = True;
1823          }
1824          if (isFunc
1825              && attr == DW_AT_frame_base
1826              && cts.szB != 0 /* either scalar or nonempty block */) {
1827             fbGX = get_GX( cc, False/*td3*/, &cts );
1828             vg_assert(fbGX);
1829             VG_(addToXA)(gexprs, &fbGX);
1830          }
1831       }
1832       if (have_lo && have_hi1 && hiIsRelative)
1833          ip_hi1 += ip_lo;
1834       /* Do we have something that looks sane? */
1835       if (dtag == DW_TAG_subprogram
1836           && (!have_lo) && (!have_hi1) && (!have_range)) {
1837          /* This is legit - ignore it. Sec 3.3.3: "A subroutine entry
1838             representing a subroutine declaration that is not also a
1839             definition does not have code address or range
1840             attributes." */
1841       } else
1842       if (dtag == DW_TAG_lexical_block
1843           && (!have_lo) && (!have_hi1) && (!have_range)) {
1844          /* I believe this is legit, and means the lexical block
1845             contains no insns (whatever that might mean).  Ignore. */
1846       } else
1847       if (have_lo && have_hi1 && (!have_range)) {
1848          /* This scope supplies just a single address range. */
1849          if (ip_lo < ip_hi1)
1850             varstack_push( cc, parser, td3,
1851                            unitary_range_list(ip_lo, ip_hi1 - 1),
1852                            level, isFunc, fbGX );
1853       } else
1854       if ((!have_lo) && (!have_hi1) && have_range) {
1855          /* This scope supplies multiple address ranges via the use of
1856             a range list. */
1857          varstack_push( cc, parser, td3,
1858                         get_range_list( cc, td3,
1859                                         rangeoff, cc->cu_svma ),
1860                         level, isFunc, fbGX );
1861       } else
1862       if (have_lo && (!have_hi1) && (!have_range)) {
1863          /* This scope is bogus.  The D3 spec sec 3.4 (Lexical Block
1864             Entries) says fairly clearly that a scope must have either
1865             _range or (_low_pc and _high_pc). */
1866          /* The spec is a bit ambiguous though.  Perhaps a single byte
1867             range is intended?  See sec 2.17 (Code Addresses And Ranges) */
1868          /* This case is here because icc9 produced this:
1869          <2><13bd>: DW_TAG_lexical_block
1870             DW_AT_decl_line   : 5229
1871             DW_AT_decl_column : 37
1872             DW_AT_decl_file   : 1
1873             DW_AT_low_pc      : 0x401b03
1874          */
1875          /* Ignore (seems safe than pushing a single byte range) */
1876       } else
1877          goto_bad_DIE;
1878    }
1879 
1880    if (dtag == DW_TAG_variable || dtag == DW_TAG_formal_parameter) {
1881       HChar* name        = NULL;
1882       UWord  typeR       = D3_INVALID_CUOFF;
1883       Bool   global      = False;
1884       GExpr* gexpr       = NULL;
1885       Int    n_attrs     = 0;
1886       UWord  abs_ori     = (UWord)D3_INVALID_CUOFF;
1887       Int    lineNo      = 0;
1888       HChar* fileName    = NULL;
1889       while (True) {
1890          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
1891          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
1892          if (attr == 0 && form == 0) break;
1893          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
1894          n_attrs++;
1895          if (attr == DW_AT_name && cts.szB < 0) {
1896             name = ML_(addStrFromCursor)( cc->di, cts.u.cur );
1897          }
1898          if (attr == DW_AT_location
1899              && cts.szB != 0 /* either scalar or nonempty block */) {
1900             gexpr = get_GX( cc, False/*td3*/, &cts );
1901             vg_assert(gexpr);
1902             VG_(addToXA)(gexprs, &gexpr);
1903          }
1904          if (attr == DW_AT_type && cts.szB > 0) {
1905             typeR = cook_die_using_form( cc, cts.u.val, form );
1906          }
1907          if (attr == DW_AT_external && cts.szB > 0 && cts.u.val > 0) {
1908             global = True;
1909          }
1910          if (attr == DW_AT_abstract_origin && cts.szB > 0) {
1911             abs_ori = (UWord)cts.u.val;
1912          }
1913          if (attr == DW_AT_declaration && cts.szB > 0 && cts.u.val > 0) {
1914             /*declaration = True;*/
1915          }
1916          if (attr == DW_AT_decl_line && cts.szB > 0) {
1917             lineNo = (Int)cts.u.val;
1918          }
1919          if (attr == DW_AT_decl_file && cts.szB > 0) {
1920             Int ftabIx = (Int)cts.u.val;
1921             if (ftabIx >= 1
1922                 && ftabIx < VG_(sizeXA)( parser->filenameTable )) {
1923                fileName = *(HChar**)
1924                           VG_(indexXA)( parser->filenameTable, ftabIx );
1925                vg_assert(fileName);
1926             }
1927             if (0) VG_(printf)("XXX filename = %s\n", fileName);
1928          }
1929       }
1930       if (!global && dtag == DW_TAG_variable && level == 1) {
1931          /* Case of a static variable. It is better to declare
1932             it global as the variable is not really related to
1933             a PC range, as its address can be used by program
1934             counters outside of the ranges where it is visible . */
1935          global = True;
1936       }
1937 
1938       /* We'll collect it under if one of the following three
1939          conditions holds:
1940          (1) has location and type    -> completed
1941          (2) has type only            -> is an abstract instance
1942          (3) has location and abs_ori -> is a concrete instance
1943          Name, filename and line number are all optional frills.
1944       */
1945       if ( /* 1 */ (gexpr && typeR != D3_INVALID_CUOFF)
1946            /* 2 */ || (typeR != D3_INVALID_CUOFF)
1947            /* 3 */ || (gexpr && abs_ori != (UWord)D3_INVALID_CUOFF) ) {
1948 
1949          /* Add this variable to the list of interesting looking
1950             variables.  Crucially, note along with it the address
1951             range(s) associated with the variable, which for locals
1952             will be the address ranges at the top of the varparser's
1953             stack. */
1954          GExpr*   fbGX = NULL;
1955          Word     i, nRanges;
1956          XArray*  /* of AddrRange */ xa;
1957          TempVar* tv;
1958          /* Stack can't be empty; we put a dummy entry on it for the
1959             entire address range before starting with the DIEs for
1960             this CU. */
1961          vg_assert(parser->sp >= 0);
1962 
1963          /* If this is a local variable (non-global), try to find
1964             the GExpr for the DW_AT_frame_base of the containing
1965             function.  It should have been pushed on the stack at the
1966             time we encountered its DW_TAG_subprogram DIE, so the way
1967             to find it is to scan back down the stack looking for it.
1968             If there isn't an enclosing stack entry marked 'isFunc'
1969             then we must be seeing variable or formal param DIEs
1970             outside of a function, so we deem the Dwarf to be
1971             malformed if that happens.  Note that the fbGX may be NULL
1972             if the containing DT_TAG_subprogram didn't supply a
1973             DW_AT_frame_base -- that's OK, but there must actually be
1974             a containing DW_TAG_subprogram. */
1975          if (!global) {
1976             Bool found = False;
1977             for (i = parser->sp; i >= 0; i--) {
1978                if (parser->isFunc[i]) {
1979                   fbGX = parser->fbGX[i];
1980                   found = True;
1981                   break;
1982                }
1983             }
1984             if (!found) {
1985                if (0 && VG_(clo_verbosity) >= 0) {
1986                   VG_(message)(Vg_DebugMsg,
1987                      "warning: parse_var_DIE: non-global variable "
1988                      "outside DW_TAG_subprogram\n");
1989                }
1990                /* goto_bad_DIE; */
1991                /* This seems to happen a lot.  Just ignore it -- if,
1992                   when we come to evaluation of the location (guarded)
1993                   expression, it requires a frame base value, and
1994                   there's no expression for that, then evaluation as a
1995                   whole will fail.  Harmless - a bit of a waste of
1996                   cycles but nothing more. */
1997             }
1998          }
1999 
2000          /* re "global ? 0 : parser->sp" (twice), if the var is
2001             marked 'global' then we must put it at the global scope,
2002             as only the global scope (level 0) covers the entire PC
2003             address space.  It is asserted elsewhere that level 0
2004             always covers the entire address space. */
2005          xa = parser->ranges[global ? 0 : parser->sp];
2006          nRanges = VG_(sizeXA)(xa);
2007          vg_assert(nRanges >= 0);
2008 
2009          tv = ML_(dinfo_zalloc)( "di.readdwarf3.pvD.1", sizeof(TempVar) );
2010          tv->name   = name;
2011          tv->level  = global ? 0 : parser->sp;
2012          tv->typeR  = typeR;
2013          tv->gexpr  = gexpr;
2014          tv->fbGX   = fbGX;
2015          tv->fName  = fileName;
2016          tv->fLine  = lineNo;
2017          tv->dioff  = posn;
2018          tv->absOri = abs_ori;
2019 
2020          /* See explanation on definition of type TempVar for the
2021             reason for this elaboration. */
2022          tv->nRanges = nRanges;
2023          tv->rngOneMin = 0;
2024          tv->rngOneMax = 0;
2025          tv->rngMany = NULL;
2026          if (nRanges == 1) {
2027             AddrRange* range = VG_(indexXA)(xa, 0);
2028             tv->rngOneMin = range->aMin;
2029             tv->rngOneMax = range->aMax;
2030          }
2031          else if (nRanges > 1) {
2032             /* See if we already have a range list which is
2033                structurally identical.  If so, use that; if not, clone
2034                this one, and add it to our collection. */
2035             UWord keyW, valW;
2036             if (VG_(lookupFM)( rangestree, &keyW, &valW, (UWord)xa )) {
2037                XArray* old = (XArray*)keyW;
2038                tl_assert(valW == 0);
2039                tl_assert(old != xa);
2040                tv->rngMany = old;
2041             } else {
2042                XArray* cloned = VG_(cloneXA)( "di.readdwarf3.pvD.2", xa );
2043                tv->rngMany = cloned;
2044                VG_(addToFM)( rangestree, (UWord)cloned, 0 );
2045             }
2046          }
2047 
2048          VG_(addToXA)( tempvars, &tv );
2049 
2050          TRACE_D3("  Recording this variable, with %ld PC range(s)\n",
2051                   VG_(sizeXA)(xa) );
2052          /* collect stats on how effective the ->ranges special
2053             casing is */
2054          if (0) {
2055             static Int ntot=0, ngt=0;
2056             ntot++;
2057             if (tv->rngMany) ngt++;
2058             if (0 == (ntot % 100000))
2059                VG_(printf)("XXXX %d tot, %d cloned\n", ntot, ngt);
2060          }
2061 
2062       }
2063 
2064       /* Here are some other weird cases seen in the wild:
2065 
2066             We have a variable with a name and a type, but no
2067             location.  I guess that's a sign that it has been
2068             optimised away.  Ignore it.  Here's an example:
2069 
2070             static Int lc_compar(void* n1, void* n2) {
2071                MC_Chunk* mc1 = *(MC_Chunk**)n1;
2072                MC_Chunk* mc2 = *(MC_Chunk**)n2;
2073                return (mc1->data < mc2->data ? -1 : 1);
2074             }
2075 
2076             Both mc1 and mc2 are like this
2077             <2><5bc>: Abbrev Number: 21 (DW_TAG_variable)
2078                 DW_AT_name        : mc1
2079                 DW_AT_decl_file   : 1
2080                 DW_AT_decl_line   : 216
2081                 DW_AT_type        : <5d3>
2082 
2083             whereas n1 and n2 do have locations specified.
2084 
2085             ---------------------------------------------
2086 
2087             We see a DW_TAG_formal_parameter with a type, but
2088             no name and no location.  It's probably part of a function type
2089             construction, thusly, hence ignore it:
2090          <1><2b4>: Abbrev Number: 12 (DW_TAG_subroutine_type)
2091              DW_AT_sibling     : <2c9>
2092              DW_AT_prototyped  : 1
2093              DW_AT_type        : <114>
2094          <2><2be>: Abbrev Number: 13 (DW_TAG_formal_parameter)
2095              DW_AT_type        : <13e>
2096          <2><2c3>: Abbrev Number: 13 (DW_TAG_formal_parameter)
2097              DW_AT_type        : <133>
2098 
2099             ---------------------------------------------
2100 
2101             Is very minimal, like this:
2102             <4><81d>: Abbrev Number: 44 (DW_TAG_variable)
2103                 DW_AT_abstract_origin: <7ba>
2104             What that signifies I have no idea.  Ignore.
2105 
2106             ----------------------------------------------
2107 
2108             Is very minimal, like this:
2109             <200f>: DW_TAG_formal_parameter
2110                 DW_AT_abstract_ori: <1f4c>
2111                 DW_AT_location    : 13440
2112             What that signifies I have no idea.  Ignore.
2113             It might be significant, though: the variable at least
2114             has a location and so might exist somewhere.
2115             Maybe we should handle this.
2116 
2117             ---------------------------------------------
2118 
2119             <22407>: DW_TAG_variable
2120               DW_AT_name        : (indirect string, offset: 0x6579):
2121                                   vgPlain_trampoline_stuff_start
2122               DW_AT_decl_file   : 29
2123               DW_AT_decl_line   : 56
2124               DW_AT_external    : 1
2125               DW_AT_declaration : 1
2126 
2127             Nameless and typeless variable that has a location?  Who
2128             knows.  Not me.
2129             <2><3d178>: Abbrev Number: 22 (DW_TAG_variable)
2130                  DW_AT_location    : 9 byte block: 3 c0 c7 13 38 0 0 0 0
2131                                      (DW_OP_addr: 3813c7c0)
2132 
2133             No, really.  Check it out.  gcc is quite simply borked.
2134             <3><168cc>: Abbrev Number: 141 (DW_TAG_variable)
2135             // followed by no attributes, and the next DIE is a sibling,
2136             // not a child
2137             */
2138    }
2139    return;
2140 
2141   bad_DIE:
2142    set_position_of_Cursor( c_die,  saved_die_c_offset );
2143    set_position_of_Cursor( c_abbv, saved_abbv_c_offset );
2144    posn = uncook_die( cc, posn, &debug_types_flag, &alt_flag );
2145    VG_(printf)(" <%d><%lx>: %s", level, posn, ML_(pp_DW_TAG)( dtag ) );
2146    if (debug_types_flag) {
2147       VG_(printf)(" (in .debug_types)");
2148    }
2149    else if (alt_flag) {
2150       VG_(printf)(" (in alternate .debug_info)");
2151    }
2152    VG_(printf)("\n");
2153    while (True) {
2154       DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2155       DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2156       if (attr == 0 && form == 0) break;
2157       VG_(printf)("     %18s: ", ML_(pp_DW_AT)(attr));
2158       /* Get the form contents, so as to print them */
2159       get_Form_contents( &cts, cc, c_die, True, form );
2160       VG_(printf)("\t\n");
2161    }
2162    VG_(printf)("\n");
2163    cc->barf("parse_var_DIE: confused by the above DIE");
2164    /*NOTREACHED*/
2165 }
2166 
2167 
2168 /*------------------------------------------------------------*/
2169 /*---                                                      ---*/
2170 /*--- Parsing of type-related DIEs                         ---*/
2171 /*---                                                      ---*/
2172 /*------------------------------------------------------------*/
2173 
2174 #define N_D3_TYPE_STACK 16
2175 
2176 typedef
2177    struct {
2178       /* What source language?  'A'=Ada83/95,
2179                                 'C'=C/C++,
2180                                 'F'=Fortran,
2181                                 '?'=other
2182          Established once per compilation unit. */
2183       UChar language;
2184       /* A stack of types which are currently under construction */
2185       Int   sp; /* [sp] is innermost active entry; sp==-1 for empty
2186                    stack */
2187       /* Note that the TyEnts in qparentE are temporary copies of the
2188          ones accumulating in the main tyent array.  So it is not safe
2189          to free up anything on them when popping them off the stack
2190          (iow, it isn't safe to use TyEnt__make_EMPTY on them).  Just
2191          memset them to zero when done. */
2192       TyEnt qparentE[N_D3_TYPE_STACK]; /* parent TyEnts */
2193       Int   qlevel[N_D3_TYPE_STACK];
2194 
2195    }
2196    D3TypeParser;
2197 
typestack_show(D3TypeParser * parser,const HChar * str)2198 static void typestack_show ( D3TypeParser* parser, const HChar* str ) {
2199    Word i;
2200    VG_(printf)("  typestack (%s) {\n", str);
2201    for (i = 0; i <= parser->sp; i++) {
2202       VG_(printf)("    [%ld] (level %d): ", i, parser->qlevel[i]);
2203       ML_(pp_TyEnt)( &parser->qparentE[i] );
2204       VG_(printf)("\n");
2205    }
2206    VG_(printf)("  }\n");
2207 }
2208 
2209 /* Remove from the stack, all entries with .level > 'level' */
2210 static
typestack_preen(D3TypeParser * parser,Bool td3,Int level)2211 void typestack_preen ( D3TypeParser* parser, Bool td3, Int level )
2212 {
2213    Bool changed = False;
2214    vg_assert(parser->sp < N_D3_TYPE_STACK);
2215    while (True) {
2216       vg_assert(parser->sp >= -1);
2217       if (parser->sp == -1) break;
2218       if (parser->qlevel[parser->sp] <= level) break;
2219       if (0)
2220          TRACE_D3("BBBBAAAA typestack_pop [newsp=%d]\n", parser->sp-1);
2221       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
2222       VG_(memset)(&parser->qparentE[parser->sp], 0, sizeof(TyEnt));
2223       parser->qparentE[parser->sp].cuOff = D3_INVALID_CUOFF;
2224       parser->qparentE[parser->sp].tag = Te_EMPTY;
2225       parser->qlevel[parser->sp] = 0;
2226       parser->sp--;
2227       changed = True;
2228    }
2229    if (changed && td3)
2230       typestack_show( parser, "after preen" );
2231 }
2232 
typestack_is_empty(D3TypeParser * parser)2233 static Bool typestack_is_empty ( D3TypeParser* parser ) {
2234    vg_assert(parser->sp >= -1 && parser->sp < N_D3_TYPE_STACK);
2235    return parser->sp == -1;
2236 }
2237 
typestack_push(CUConst * cc,D3TypeParser * parser,Bool td3,TyEnt * parentE,Int level)2238 static void typestack_push ( CUConst* cc,
2239                              D3TypeParser* parser,
2240                              Bool td3,
2241                              TyEnt* parentE, Int level ) {
2242    if (0)
2243    TRACE_D3("BBBBAAAA typestack_push[newsp=%d]: %d  %05lx\n",
2244             parser->sp+1, level, parentE->cuOff);
2245 
2246    /* First we need to zap everything >= 'level', as we are about to
2247       replace any previous entry at 'level', so .. */
2248    typestack_preen(parser, /*td3*/False, level-1);
2249 
2250    vg_assert(parser->sp >= -1);
2251    vg_assert(parser->sp < N_D3_TYPE_STACK);
2252    if (parser->sp == N_D3_TYPE_STACK-1)
2253       cc->barf("typestack_push: N_D3_TYPE_STACK is too low; "
2254                "increase and recompile");
2255    if (parser->sp >= 0)
2256       vg_assert(parser->qlevel[parser->sp] < level);
2257    parser->sp++;
2258    vg_assert(parser->qparentE[parser->sp].tag == Te_EMPTY);
2259    vg_assert(parser->qlevel[parser->sp]  == 0);
2260    vg_assert(parentE);
2261    vg_assert(ML_(TyEnt__is_type)(parentE));
2262    vg_assert(parentE->cuOff != D3_INVALID_CUOFF);
2263    parser->qparentE[parser->sp] = *parentE;
2264    parser->qlevel[parser->sp]  = level;
2265    if (td3)
2266       typestack_show( parser, "after push" );
2267 }
2268 
2269 /* True if the subrange type being parsed gives the bounds of an array. */
subrange_type_denotes_array_bounds(D3TypeParser * parser,DW_TAG dtag)2270 static Bool subrange_type_denotes_array_bounds ( D3TypeParser* parser,
2271                                                  DW_TAG dtag ) {
2272    vg_assert(dtag == DW_TAG_subrange_type);
2273    /* For most languages, a subrange_type dtag always gives the
2274       bounds of an array.
2275       For Ada, there are additional conditions as a subrange_type
2276       is also used for other purposes. */
2277    if (parser->language != 'A')
2278       /* not Ada, so it definitely denotes an array bound. */
2279       return True;
2280    else
2281       /* Extra constraints for Ada: it only denotes an array bound if .. */
2282       return (! typestack_is_empty(parser)
2283               && parser->qparentE[parser->sp].tag == Te_TyArray);
2284 }
2285 
2286 /* Parse a type-related DIE.  'parser' holds the current parser state.
2287    'admin' is where the completed types are dumped.  'dtag' is the tag
2288    for this DIE.  'c_die' points to the start of the data fields (FORM
2289    stuff) for the DIE.  c_abbv points to the start of the (name,form)
2290    pairs which describe the DIE.
2291 
2292    We may find the DIE uninteresting, in which case we should ignore
2293    it.
2294 
2295    What happens: the DIE is examined.  If uninteresting, it is ignored.
2296    Otherwise, the DIE gives rise to two things:
2297 
2298    (1) the offset of this DIE in the CU -- the cuOffset, a UWord
2299    (2) a TyAdmin structure, which holds the type, or related stuff
2300 
2301    (2) is added at the end of 'tyadmins', at some index, say 'i'.
2302 
2303    A pair (cuOffset, i) is added to 'tydict'.
2304 
2305    Hence 'tyadmins' holds the actual type entities, and 'tydict' holds
2306    a mapping from cuOffset to the index of the corresponding entry in
2307    'tyadmin'.
2308 
2309    When resolving a cuOffset to a TyAdmin, first look up the cuOffset
2310    in the tydict (by binary search).  This gives an index into
2311    tyadmins, and the required entity lives in tyadmins at that index.
2312 */
2313 __attribute__((noinline))
parse_type_DIE(XArray * tyents,D3TypeParser * parser,DW_TAG dtag,UWord posn,Int level,Cursor * c_die,Cursor * c_abbv,CUConst * cc,Bool td3)2314 static void parse_type_DIE ( /*MOD*/XArray* /* of TyEnt */ tyents,
2315                              /*MOD*/D3TypeParser* parser,
2316                              DW_TAG dtag,
2317                              UWord posn,
2318                              Int level,
2319                              Cursor* c_die,
2320                              Cursor* c_abbv,
2321                              CUConst* cc,
2322                              Bool td3 )
2323 {
2324    FormContents cts;
2325    TyEnt typeE;
2326    TyEnt atomE;
2327    TyEnt fieldE;
2328    TyEnt boundE;
2329    Bool  debug_types_flag;
2330    Bool  alt_flag;
2331 
2332    UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
2333    UWord saved_abbv_c_offset = get_position_of_Cursor( c_abbv );
2334 
2335    VG_(memset)( &typeE,  0xAA, sizeof(typeE) );
2336    VG_(memset)( &atomE,  0xAA, sizeof(atomE) );
2337    VG_(memset)( &fieldE, 0xAA, sizeof(fieldE) );
2338    VG_(memset)( &boundE, 0xAA, sizeof(boundE) );
2339 
2340    /* If we've returned to a level at or above any previously noted
2341       parent, un-note it, so we don't believe we're still collecting
2342       its children. */
2343    typestack_preen( parser, td3, level-1 );
2344 
2345    if (dtag == DW_TAG_compile_unit
2346        || dtag == DW_TAG_type_unit
2347        || dtag == DW_TAG_partial_unit) {
2348       /* See if we can find DW_AT_language, since it is important for
2349          establishing array bounds (see DW_TAG_subrange_type below in
2350          this fn) */
2351       while (True) {
2352          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2353          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2354          if (attr == 0 && form == 0) break;
2355          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2356          if (attr != DW_AT_language)
2357             continue;
2358          if (cts.szB <= 0)
2359            goto_bad_DIE;
2360          switch (cts.u.val) {
2361             case DW_LANG_C89: case DW_LANG_C:
2362             case DW_LANG_C_plus_plus: case DW_LANG_ObjC:
2363             case DW_LANG_ObjC_plus_plus: case DW_LANG_UPC:
2364             case DW_LANG_Upc: case DW_LANG_C99:
2365                parser->language = 'C'; break;
2366             case DW_LANG_Fortran77: case DW_LANG_Fortran90:
2367             case DW_LANG_Fortran95:
2368                parser->language = 'F'; break;
2369             case DW_LANG_Ada83: case DW_LANG_Ada95:
2370                parser->language = 'A'; break;
2371             case DW_LANG_Cobol74:
2372             case DW_LANG_Cobol85: case DW_LANG_Pascal83:
2373             case DW_LANG_Modula2: case DW_LANG_Java:
2374             case DW_LANG_PLI:
2375             case DW_LANG_D: case DW_LANG_Python:
2376             case DW_LANG_Mips_Assembler:
2377                parser->language = '?'; break;
2378             default:
2379                goto_bad_DIE;
2380          }
2381       }
2382    }
2383 
2384    if (dtag == DW_TAG_base_type) {
2385       /* We can pick up a new base type any time. */
2386       VG_(memset)(&typeE, 0, sizeof(typeE));
2387       typeE.cuOff = D3_INVALID_CUOFF;
2388       typeE.tag   = Te_TyBase;
2389       while (True) {
2390          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2391          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2392          if (attr == 0 && form == 0) break;
2393          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2394          if (attr == DW_AT_name && cts.szB < 0) {
2395             typeE.Te.TyBase.name
2396                = ML_(cur_read_strdup)( cts.u.cur,
2397                                        "di.readdwarf3.ptD.base_type.1" );
2398          }
2399          if (attr == DW_AT_byte_size && cts.szB > 0) {
2400             typeE.Te.TyBase.szB = cts.u.val;
2401          }
2402          if (attr == DW_AT_encoding && cts.szB > 0) {
2403             switch (cts.u.val) {
2404                case DW_ATE_unsigned: case DW_ATE_unsigned_char:
2405                case DW_ATE_UTF: /* since DWARF4, e.g. char16_t from C++ */
2406                case DW_ATE_boolean:/* FIXME - is this correct? */
2407                case DW_ATE_unsigned_fixed:
2408                   typeE.Te.TyBase.enc = 'U'; break;
2409                case DW_ATE_signed: case DW_ATE_signed_char:
2410                case DW_ATE_signed_fixed:
2411                   typeE.Te.TyBase.enc = 'S'; break;
2412                case DW_ATE_float:
2413                   typeE.Te.TyBase.enc = 'F'; break;
2414                case DW_ATE_complex_float:
2415                   typeE.Te.TyBase.enc = 'C'; break;
2416                default:
2417                   goto_bad_DIE;
2418             }
2419          }
2420       }
2421 
2422       /* Invent a name if it doesn't have one.  gcc-4.3
2423          -ftree-vectorize is observed to emit nameless base types. */
2424       if (!typeE.Te.TyBase.name)
2425          typeE.Te.TyBase.name
2426             = ML_(dinfo_strdup)( "di.readdwarf3.ptD.base_type.2",
2427                                  "<anon_base_type>" );
2428 
2429       /* Do we have something that looks sane? */
2430       if (/* must have a name */
2431           typeE.Te.TyBase.name == NULL
2432           /* and a plausible size.  Yes, really 32: "complex long
2433              double" apparently has size=32 */
2434           || typeE.Te.TyBase.szB < 0 || typeE.Te.TyBase.szB > 32
2435           /* and a plausible encoding */
2436           || (typeE.Te.TyBase.enc != 'U'
2437               && typeE.Te.TyBase.enc != 'S'
2438               && typeE.Te.TyBase.enc != 'F'
2439               && typeE.Te.TyBase.enc != 'C'))
2440          goto_bad_DIE;
2441       /* Last minute hack: if we see this
2442          <1><515>: DW_TAG_base_type
2443              DW_AT_byte_size   : 0
2444              DW_AT_encoding    : 5
2445              DW_AT_name        : void
2446          convert it into a real Void type. */
2447       if (typeE.Te.TyBase.szB == 0
2448           && 0 == VG_(strcmp)("void", typeE.Te.TyBase.name)) {
2449          ML_(TyEnt__make_EMPTY)(&typeE);
2450          typeE.tag = Te_TyVoid;
2451          typeE.Te.TyVoid.isFake = False; /* it's a real one! */
2452       }
2453 
2454       goto acquire_Type;
2455    }
2456 
2457    /*
2458     * An example of DW_TAG_rvalue_reference_type:
2459     *
2460     * $ readelf --debug-dump /usr/lib/debug/usr/lib/libstdc++.so.6.0.16.debug
2461     *  <1><1014>: Abbrev Number: 55 (DW_TAG_rvalue_reference_type)
2462     *     <1015>   DW_AT_byte_size   : 4
2463     *     <1016>   DW_AT_type        : <0xe52>
2464     */
2465    if (dtag == DW_TAG_pointer_type || dtag == DW_TAG_reference_type
2466        || dtag == DW_TAG_ptr_to_member_type
2467        || dtag == DW_TAG_rvalue_reference_type) {
2468       /* This seems legit for _pointer_type and _reference_type.  I
2469          don't know if rolling _ptr_to_member_type in here really is
2470          legit, but it's better than not handling it at all. */
2471       VG_(memset)(&typeE, 0, sizeof(typeE));
2472       typeE.cuOff = D3_INVALID_CUOFF;
2473       switch (dtag) {
2474       case DW_TAG_pointer_type:
2475          typeE.tag = Te_TyPtr;
2476          break;
2477       case DW_TAG_reference_type:
2478          typeE.tag = Te_TyRef;
2479          break;
2480       case DW_TAG_ptr_to_member_type:
2481          typeE.tag = Te_TyPtrMbr;
2482          break;
2483       case DW_TAG_rvalue_reference_type:
2484          typeE.tag = Te_TyRvalRef;
2485          break;
2486       default:
2487          vg_assert(False);
2488       }
2489       /* target type defaults to void */
2490       typeE.Te.TyPorR.typeR = D3_FAKEVOID_CUOFF;
2491       /* These four type kinds don't *have* to specify their size, in
2492          which case we assume it's a machine word.  But if they do
2493          specify it, it must be a machine word :-)  This probably
2494          assumes that the word size of the Dwarf3 we're reading is the
2495          same size as that on the machine.  gcc appears to give a size
2496          whereas icc9 doesn't. */
2497       typeE.Te.TyPorR.szB = sizeof(UWord);
2498       while (True) {
2499          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2500          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2501          if (attr == 0 && form == 0) break;
2502          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2503          if (attr == DW_AT_byte_size && cts.szB > 0) {
2504             typeE.Te.TyPorR.szB = cts.u.val;
2505          }
2506          if (attr == DW_AT_type && cts.szB > 0) {
2507             typeE.Te.TyPorR.typeR
2508                = cook_die_using_form( cc, (UWord)cts.u.val, form );
2509          }
2510       }
2511       /* Do we have something that looks sane? */
2512       if (typeE.Te.TyPorR.szB != sizeof(UWord))
2513          goto_bad_DIE;
2514       else
2515          goto acquire_Type;
2516    }
2517 
2518    if (dtag == DW_TAG_enumeration_type) {
2519       /* Create a new Type to hold the results. */
2520       VG_(memset)(&typeE, 0, sizeof(typeE));
2521       typeE.cuOff = posn;
2522       typeE.tag   = Te_TyEnum;
2523       Bool is_decl = False;
2524       typeE.Te.TyEnum.atomRs
2525          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.enum_type.1",
2526                        ML_(dinfo_free),
2527                        sizeof(UWord) );
2528       while (True) {
2529          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2530          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2531          if (attr == 0 && form == 0) break;
2532          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2533          if (attr == DW_AT_name && cts.szB < 0) {
2534             typeE.Te.TyEnum.name
2535                = ML_(cur_read_strdup)( cts.u.cur,
2536                                        "di.readdwarf3.pTD.enum_type.2" );
2537          }
2538          if (attr == DW_AT_byte_size && cts.szB > 0) {
2539             typeE.Te.TyEnum.szB = cts.u.val;
2540          }
2541          if (attr == DW_AT_declaration) {
2542             is_decl = True;
2543          }
2544       }
2545 
2546       if (!typeE.Te.TyEnum.name)
2547          typeE.Te.TyEnum.name
2548             = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enum_type.3",
2549                                  "<anon_enum_type>" );
2550 
2551       /* Do we have something that looks sane? */
2552       if (typeE.Te.TyEnum.szB == 0
2553           /* we must know the size */
2554           /* but not for Ada, which uses such dummy
2555              enumerations as helper for gdb ada mode.
2556              Also GCC allows incomplete enums as GNU extension.
2557              http://gcc.gnu.org/onlinedocs/gcc/Incomplete-Enums.html
2558              These are marked as DW_AT_declaration and won't have
2559              a size. They can only be used in declaration or as
2560              pointer types.  You can't allocate variables or storage
2561              using such an enum type. (Also GCC seems to have a bug
2562              that will put such an enumeration_type into a .debug_types
2563              unit which should only contain complete types.) */
2564           && (parser->language != 'A' && !is_decl)) {
2565          goto_bad_DIE;
2566       }
2567 
2568       /* On't stack! */
2569       typestack_push( cc, parser, td3, &typeE, level );
2570       goto acquire_Type;
2571    }
2572 
2573    /* gcc (GCC) 4.4.0 20081017 (experimental) occasionally produces
2574       DW_TAG_enumerator with only a DW_AT_name but no
2575       DW_AT_const_value.  This is in violation of the Dwarf3 standard,
2576       and appears to be a new "feature" of gcc - versions 4.3.x and
2577       earlier do not appear to do this.  So accept DW_TAG_enumerator
2578       which only have a name but no value.  An example:
2579 
2580       <1><180>: Abbrev Number: 6 (DW_TAG_enumeration_type)
2581          <181>   DW_AT_name        : (indirect string, offset: 0xda70):
2582                                      QtMsgType
2583          <185>   DW_AT_byte_size   : 4
2584          <186>   DW_AT_decl_file   : 14
2585          <187>   DW_AT_decl_line   : 1480
2586          <189>   DW_AT_sibling     : <0x1a7>
2587       <2><18d>: Abbrev Number: 7 (DW_TAG_enumerator)
2588          <18e>   DW_AT_name        : (indirect string, offset: 0x9e18):
2589                                      QtDebugMsg
2590       <2><192>: Abbrev Number: 7 (DW_TAG_enumerator)
2591          <193>   DW_AT_name        : (indirect string, offset: 0x1505f):
2592                                      QtWarningMsg
2593       <2><197>: Abbrev Number: 7 (DW_TAG_enumerator)
2594          <198>   DW_AT_name        : (indirect string, offset: 0x16f4a):
2595                                      QtCriticalMsg
2596       <2><19c>: Abbrev Number: 7 (DW_TAG_enumerator)
2597          <19d>   DW_AT_name        : (indirect string, offset: 0x156dd):
2598                                      QtFatalMsg
2599       <2><1a1>: Abbrev Number: 7 (DW_TAG_enumerator)
2600          <1a2>   DW_AT_name        : (indirect string, offset: 0x13660):
2601                                      QtSystemMsg
2602    */
2603    if (dtag == DW_TAG_enumerator) {
2604       VG_(memset)( &atomE, 0, sizeof(atomE) );
2605       atomE.cuOff = posn;
2606       atomE.tag   = Te_Atom;
2607       while (True) {
2608          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2609          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2610          if (attr == 0 && form == 0) break;
2611          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2612          if (attr == DW_AT_name && cts.szB < 0) {
2613             atomE.Te.Atom.name
2614               = ML_(cur_read_strdup)( cts.u.cur,
2615                                       "di.readdwarf3.pTD.enumerator.1" );
2616          }
2617          if (attr == DW_AT_const_value && cts.szB > 0) {
2618             atomE.Te.Atom.value      = cts.u.val;
2619             atomE.Te.Atom.valueKnown = True;
2620          }
2621       }
2622       /* Do we have something that looks sane? */
2623       if (atomE.Te.Atom.name == NULL)
2624          goto_bad_DIE;
2625       /* Do we have a plausible parent? */
2626       if (typestack_is_empty(parser)) goto_bad_DIE;
2627       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
2628       vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
2629       if (level != parser->qlevel[parser->sp]+1) goto_bad_DIE;
2630       if (parser->qparentE[parser->sp].tag != Te_TyEnum) goto_bad_DIE;
2631       /* Record this child in the parent */
2632       vg_assert(parser->qparentE[parser->sp].Te.TyEnum.atomRs);
2633       VG_(addToXA)( parser->qparentE[parser->sp].Te.TyEnum.atomRs,
2634                     &atomE );
2635       /* And record the child itself */
2636       goto acquire_Atom;
2637    }
2638 
2639    /* Treat DW_TAG_class_type as if it was a DW_TAG_structure_type.  I
2640       don't know if this is correct, but it at least makes this reader
2641       usable for gcc-4.3 produced Dwarf3. */
2642    if (dtag == DW_TAG_structure_type || dtag == DW_TAG_class_type
2643        || dtag == DW_TAG_union_type) {
2644       Bool have_szB = False;
2645       Bool is_decl  = False;
2646       Bool is_spec  = False;
2647       /* Create a new Type to hold the results. */
2648       VG_(memset)(&typeE, 0, sizeof(typeE));
2649       typeE.cuOff = posn;
2650       typeE.tag   = Te_TyStOrUn;
2651       typeE.Te.TyStOrUn.name = NULL;
2652       typeE.Te.TyStOrUn.typeR = D3_INVALID_CUOFF;
2653       typeE.Te.TyStOrUn.fieldRs
2654          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.pTD.struct_type.1",
2655                        ML_(dinfo_free),
2656                        sizeof(UWord) );
2657       typeE.Te.TyStOrUn.complete = True;
2658       typeE.Te.TyStOrUn.isStruct = dtag == DW_TAG_structure_type
2659                                    || dtag == DW_TAG_class_type;
2660       while (True) {
2661          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2662          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2663          if (attr == 0 && form == 0) break;
2664          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2665          if (attr == DW_AT_name && cts.szB < 0) {
2666             typeE.Te.TyStOrUn.name
2667                = ML_(cur_read_strdup)( cts.u.cur,
2668                                        "di.readdwarf3.ptD.struct_type.2" );
2669          }
2670          if (attr == DW_AT_byte_size && cts.szB >= 0) {
2671             typeE.Te.TyStOrUn.szB = cts.u.val;
2672             have_szB = True;
2673          }
2674          if (attr == DW_AT_declaration && cts.szB > 0 && cts.u.val > 0) {
2675             is_decl = True;
2676          }
2677          if (attr == DW_AT_specification && cts.szB > 0 && cts.u.val > 0) {
2678             is_spec = True;
2679          }
2680          if (attr == DW_AT_signature && form == DW_FORM_ref_sig8
2681              && cts.szB > 0) {
2682             have_szB = True;
2683             typeE.Te.TyStOrUn.szB = 8;
2684             typeE.Te.TyStOrUn.typeR
2685                = cook_die_using_form( cc, (UWord)cts.u.val, form );
2686          }
2687       }
2688       /* Do we have something that looks sane? */
2689       if (is_decl && (!is_spec)) {
2690          /* It's a DW_AT_declaration.  We require the name but
2691             nothing else. */
2692          /* JRS 2012-06-28: following discussion w/ tromey, if the the
2693             type doesn't have name, just make one up, and accept it.
2694             It might be referred to by other DIEs, so ignoring it
2695             doesn't seem like a safe option. */
2696          if (typeE.Te.TyStOrUn.name == NULL)
2697             typeE.Te.TyStOrUn.name
2698                = ML_(dinfo_strdup)( "di.readdwarf3.ptD.struct_type.3",
2699                                     "<anon_struct_type>" );
2700          typeE.Te.TyStOrUn.complete = False;
2701          /* JRS 2009 Aug 10: <possible kludge>? */
2702          /* Push this tyent on the stack, even though it's incomplete.
2703             It appears that gcc-4.4 on Fedora 11 will sometimes create
2704             DW_TAG_member entries for it, and so we need to have a
2705             plausible parent present in order for that to work.  See
2706             #200029 comments 8 and 9. */
2707          typestack_push( cc, parser, td3, &typeE, level );
2708          /* </possible kludge> */
2709          goto acquire_Type;
2710       }
2711       if ((!is_decl) /* && (!is_spec) */) {
2712          /* this is the common, ordinary case */
2713          /* The name can be present, or not */
2714          if (!have_szB) {
2715             /* We must know the size.
2716                But in Ada, record with discriminants might have no size.
2717                But in C, VLA in the middle of a struct (gcc extension)
2718                might have no size.
2719                Instead, some GNAT dwarf extensions and/or dwarf entries
2720                allow to calculate the struct size at runtime.
2721                We cannot do that (yet?) so, the temporary kludge is to use
2722                a small size. */
2723             typeE.Te.TyStOrUn.szB = 1;
2724          }
2725          /* On't stack! */
2726          typestack_push( cc, parser, td3, &typeE, level );
2727          goto acquire_Type;
2728       }
2729       else {
2730          /* don't know how to handle any other variants just now */
2731          goto_bad_DIE;
2732       }
2733    }
2734 
2735    if (dtag == DW_TAG_member) {
2736       /* Acquire member entries for both DW_TAG_structure_type and
2737          DW_TAG_union_type.  They differ minorly, in that struct
2738          members must have a DW_AT_data_member_location expression
2739          whereas union members must not. */
2740       Bool parent_is_struct;
2741       VG_(memset)( &fieldE, 0, sizeof(fieldE) );
2742       fieldE.cuOff = posn;
2743       fieldE.tag   = Te_Field;
2744       fieldE.Te.Field.typeR = D3_INVALID_CUOFF;
2745       while (True) {
2746          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2747          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2748          if (attr == 0 && form == 0) break;
2749          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2750          if (attr == DW_AT_name && cts.szB < 0) {
2751             fieldE.Te.Field.name
2752                = ML_(cur_read_strdup)( cts.u.cur,
2753                                        "di.readdwarf3.ptD.member.1" );
2754          }
2755          if (attr == DW_AT_type && cts.szB > 0) {
2756             fieldE.Te.Field.typeR
2757                = cook_die_using_form( cc, (UWord)cts.u.val, form );
2758          }
2759          /* There are 2 different cases for DW_AT_data_member_location.
2760             If it is a constant class attribute, it contains byte offset
2761             from the beginning of the containing entity.
2762             Otherwise it is a location expression.  */
2763          if (attr == DW_AT_data_member_location && cts.szB > 0) {
2764             fieldE.Te.Field.nLoc = -1;
2765             fieldE.Te.Field.pos.offset = cts.u.val;
2766          }
2767          if (attr == DW_AT_data_member_location && cts.szB <= 0) {
2768             fieldE.Te.Field.nLoc = (UWord)(-cts.szB);
2769             fieldE.Te.Field.pos.loc
2770                = ML_(cur_read_memdup)( cts.u.cur,
2771                                        (SizeT)fieldE.Te.Field.nLoc,
2772                                        "di.readdwarf3.ptD.member.2" );
2773          }
2774       }
2775       /* Do we have a plausible parent? */
2776       if (typestack_is_empty(parser)) goto_bad_DIE;
2777       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
2778       vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
2779       if (level != parser->qlevel[parser->sp]+1) goto_bad_DIE;
2780       if (parser->qparentE[parser->sp].tag != Te_TyStOrUn) goto_bad_DIE;
2781       /* Do we have something that looks sane?  If this a member of a
2782          struct, we must have a location expression; but if a member
2783          of a union that is irrelevant (D3 spec sec 5.6.6).  We ought
2784          to reject in the latter case, but some compilers have been
2785          observed to emit constant-zero expressions.  So just ignore
2786          them. */
2787       parent_is_struct
2788          = parser->qparentE[parser->sp].Te.TyStOrUn.isStruct;
2789       if (!fieldE.Te.Field.name)
2790          fieldE.Te.Field.name
2791             = ML_(dinfo_strdup)( "di.readdwarf3.ptD.member.3",
2792                                  "<anon_field>" );
2793       vg_assert(fieldE.Te.Field.name);
2794       if (fieldE.Te.Field.typeR == D3_INVALID_CUOFF)
2795          goto_bad_DIE;
2796       if (fieldE.Te.Field.nLoc) {
2797          if (!parent_is_struct) {
2798             /* If this is a union type, pretend we haven't seen the data
2799                member location expression, as it is by definition
2800                redundant (it must be zero). */
2801             if (fieldE.Te.Field.nLoc > 0)
2802                ML_(dinfo_free)(fieldE.Te.Field.pos.loc);
2803             fieldE.Te.Field.pos.loc = NULL;
2804             fieldE.Te.Field.nLoc = 0;
2805          }
2806          /* Record this child in the parent */
2807          fieldE.Te.Field.isStruct = parent_is_struct;
2808          vg_assert(parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs);
2809          VG_(addToXA)( parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs,
2810                        &posn );
2811          /* And record the child itself */
2812          goto acquire_Field;
2813       } else {
2814          /* Member with no location - this can happen with static
2815             const members in C++ code which are compile time constants
2816             that do no exist in the class. They're not of any interest
2817             to us so we ignore them. */
2818          ML_(TyEnt__make_EMPTY)(&fieldE);
2819       }
2820    }
2821 
2822    if (dtag == DW_TAG_array_type) {
2823       VG_(memset)(&typeE, 0, sizeof(typeE));
2824       typeE.cuOff = posn;
2825       typeE.tag   = Te_TyArray;
2826       typeE.Te.TyArray.typeR = D3_INVALID_CUOFF;
2827       typeE.Te.TyArray.boundRs
2828          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.array_type.1",
2829                        ML_(dinfo_free),
2830                        sizeof(UWord) );
2831       while (True) {
2832          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2833          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2834          if (attr == 0 && form == 0) break;
2835          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2836          if (attr == DW_AT_type && cts.szB > 0) {
2837             typeE.Te.TyArray.typeR
2838                = cook_die_using_form( cc, (UWord)cts.u.val, form );
2839          }
2840       }
2841       if (typeE.Te.TyArray.typeR == D3_INVALID_CUOFF)
2842          goto_bad_DIE;
2843       /* On't stack! */
2844       typestack_push( cc, parser, td3, &typeE, level );
2845       goto acquire_Type;
2846    }
2847 
2848    /* this is a subrange type defining the bounds of an array. */
2849    if (dtag == DW_TAG_subrange_type
2850        && subrange_type_denotes_array_bounds(parser, dtag)) {
2851       Bool have_lower = False;
2852       Bool have_upper = False;
2853       Bool have_count = False;
2854       Long lower = 0;
2855       Long upper = 0;
2856 
2857       switch (parser->language) {
2858          case 'C': have_lower = True;  lower = 0; break;
2859          case 'F': have_lower = True;  lower = 1; break;
2860          case '?': have_lower = False; break;
2861          case 'A': have_lower = False; break;
2862          default:  vg_assert(0); /* assured us by handling of
2863                                     DW_TAG_compile_unit in this fn */
2864       }
2865 
2866       VG_(memset)( &boundE, 0, sizeof(boundE) );
2867       boundE.cuOff = D3_INVALID_CUOFF;
2868       boundE.tag   = Te_Bound;
2869       while (True) {
2870          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2871          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2872          if (attr == 0 && form == 0) break;
2873          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2874          if (attr == DW_AT_lower_bound && cts.szB > 0) {
2875             lower      = (Long)cts.u.val;
2876             have_lower = True;
2877          }
2878          if (attr == DW_AT_upper_bound && cts.szB > 0) {
2879             upper      = (Long)cts.u.val;
2880             have_upper = True;
2881          }
2882          if (attr == DW_AT_count && cts.szB > 0) {
2883             /*count    = (Long)cts.u.val;*/
2884             have_count = True;
2885          }
2886       }
2887       /* FIXME: potentially skip the rest if no parent present, since
2888          it could be the case that this subrange type is free-standing
2889          (not being used to describe the bounds of a containing array
2890          type) */
2891       /* Do we have a plausible parent? */
2892       if (typestack_is_empty(parser)) goto_bad_DIE;
2893       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
2894       vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
2895       if (level != parser->qlevel[parser->sp]+1) goto_bad_DIE;
2896       if (parser->qparentE[parser->sp].tag != Te_TyArray) goto_bad_DIE;
2897 
2898       /* Figure out if we have a definite range or not */
2899       if (have_lower && have_upper && (!have_count)) {
2900          boundE.Te.Bound.knownL = True;
2901          boundE.Te.Bound.knownU = True;
2902          boundE.Te.Bound.boundL = lower;
2903          boundE.Te.Bound.boundU = upper;
2904       }
2905       else if (have_lower && (!have_upper) && (!have_count)) {
2906          boundE.Te.Bound.knownL = True;
2907          boundE.Te.Bound.knownU = False;
2908          boundE.Te.Bound.boundL = lower;
2909          boundE.Te.Bound.boundU = 0;
2910       }
2911       else if ((!have_lower) && have_upper && (!have_count)) {
2912          boundE.Te.Bound.knownL = False;
2913          boundE.Te.Bound.knownU = True;
2914          boundE.Te.Bound.boundL = 0;
2915          boundE.Te.Bound.boundU = upper;
2916       }
2917       else if ((!have_lower) && (!have_upper) && (!have_count)) {
2918          boundE.Te.Bound.knownL = False;
2919          boundE.Te.Bound.knownU = False;
2920          boundE.Te.Bound.boundL = 0;
2921          boundE.Te.Bound.boundU = 0;
2922       } else {
2923          /* FIXME: handle more cases */
2924          goto_bad_DIE;
2925       }
2926 
2927       /* Record this bound in the parent */
2928       boundE.cuOff = posn;
2929       vg_assert(parser->qparentE[parser->sp].Te.TyArray.boundRs);
2930       VG_(addToXA)( parser->qparentE[parser->sp].Te.TyArray.boundRs,
2931                     &boundE.cuOff );
2932       /* And record the child itself */
2933       goto acquire_Bound;
2934    }
2935 
2936    /* typedef or subrange_type other than array bounds. */
2937    if (dtag == DW_TAG_typedef
2938        || (dtag == DW_TAG_subrange_type
2939            && !subrange_type_denotes_array_bounds(parser, dtag))) {
2940       /* subrange_type other than array bound is only for Ada. */
2941       vg_assert (dtag == DW_TAG_typedef || parser->language == 'A');
2942       /* We can pick up a new typedef/subrange_type any time. */
2943       VG_(memset)(&typeE, 0, sizeof(typeE));
2944       typeE.cuOff = D3_INVALID_CUOFF;
2945       typeE.tag   = Te_TyTyDef;
2946       typeE.Te.TyTyDef.name = NULL;
2947       typeE.Te.TyTyDef.typeR = D3_INVALID_CUOFF;
2948       while (True) {
2949          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2950          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2951          if (attr == 0 && form == 0) break;
2952          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2953          if (attr == DW_AT_name && cts.szB < 0) {
2954             typeE.Te.TyTyDef.name
2955                = ML_(cur_read_strdup)( cts.u.cur,
2956                                        "di.readdwarf3.ptD.typedef.1" );
2957          }
2958          if (attr == DW_AT_type && cts.szB > 0) {
2959             typeE.Te.TyTyDef.typeR
2960                = cook_die_using_form( cc, (UWord)cts.u.val, form );
2961          }
2962       }
2963       /* Do we have something that looks sane?
2964          gcc gnat Ada generates minimal typedef
2965          such as the below
2966          <6><91cc>: DW_TAG_typedef
2967             DW_AT_abstract_ori: <9066>
2968          g++ for OMP can generate artificial functions that have
2969          parameters that refer to pointers to unnamed typedefs.
2970          See https://bugs.kde.org/show_bug.cgi?id=273475
2971          So we cannot require a name for a DW_TAG_typedef.
2972       */
2973       goto acquire_Type;
2974    }
2975 
2976    if (dtag == DW_TAG_subroutine_type) {
2977       /* function type? just record that one fact and ask no
2978          further questions. */
2979       VG_(memset)(&typeE, 0, sizeof(typeE));
2980       typeE.cuOff = D3_INVALID_CUOFF;
2981       typeE.tag   = Te_TyFn;
2982       goto acquire_Type;
2983    }
2984 
2985    if (dtag == DW_TAG_volatile_type || dtag == DW_TAG_const_type) {
2986       Int have_ty = 0;
2987       VG_(memset)(&typeE, 0, sizeof(typeE));
2988       typeE.cuOff = D3_INVALID_CUOFF;
2989       typeE.tag   = Te_TyQual;
2990       typeE.Te.TyQual.qual
2991          = dtag == DW_TAG_volatile_type ? 'V' : 'C';
2992       /* target type defaults to 'void' */
2993       typeE.Te.TyQual.typeR = D3_FAKEVOID_CUOFF;
2994       while (True) {
2995          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
2996          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2997          if (attr == 0 && form == 0) break;
2998          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2999          if (attr == DW_AT_type && cts.szB > 0) {
3000             typeE.Te.TyQual.typeR
3001                = cook_die_using_form( cc, (UWord)cts.u.val, form );
3002             have_ty++;
3003          }
3004       }
3005       /* gcc sometimes generates DW_TAG_const/volatile_type without
3006          DW_AT_type and GDB appears to interpret the type as 'const
3007          void' (resp. 'volatile void').  So just allow it .. */
3008       if (have_ty == 1 || have_ty == 0)
3009          goto acquire_Type;
3010       else
3011          goto_bad_DIE;
3012    }
3013 
3014    /*
3015     * Treat DW_TAG_unspecified_type as type void. An example of DW_TAG_unspecified_type:
3016     *
3017     * $ readelf --debug-dump /usr/lib/debug/usr/lib/libstdc++.so.6.0.16.debug
3018     *  <1><10d4>: Abbrev Number: 53 (DW_TAG_unspecified_type)
3019     *     <10d5>   DW_AT_name        : (indirect string, offset: 0xdb7): decltype(nullptr)
3020     */
3021    if (dtag == DW_TAG_unspecified_type) {
3022       VG_(memset)(&typeE, 0, sizeof(typeE));
3023       typeE.cuOff           = D3_INVALID_CUOFF;
3024       typeE.tag             = Te_TyQual;
3025       typeE.Te.TyQual.typeR = D3_FAKEVOID_CUOFF;
3026       goto acquire_Type;
3027    }
3028 
3029    /* else ignore this DIE */
3030    return;
3031    /*NOTREACHED*/
3032 
3033   acquire_Type:
3034    if (0) VG_(printf)("YYYY Acquire Type\n");
3035    vg_assert(ML_(TyEnt__is_type)( &typeE ));
3036    vg_assert(typeE.cuOff == D3_INVALID_CUOFF || typeE.cuOff == posn);
3037    typeE.cuOff = posn;
3038    VG_(addToXA)( tyents, &typeE );
3039    return;
3040    /*NOTREACHED*/
3041 
3042   acquire_Atom:
3043    if (0) VG_(printf)("YYYY Acquire Atom\n");
3044    vg_assert(atomE.tag == Te_Atom);
3045    vg_assert(atomE.cuOff == D3_INVALID_CUOFF || atomE.cuOff == posn);
3046    atomE.cuOff = posn;
3047    VG_(addToXA)( tyents, &atomE );
3048    return;
3049    /*NOTREACHED*/
3050 
3051   acquire_Field:
3052    /* For union members, Expr should be absent */
3053    if (0) VG_(printf)("YYYY Acquire Field\n");
3054    vg_assert(fieldE.tag == Te_Field);
3055    vg_assert(fieldE.Te.Field.nLoc <= 0 || fieldE.Te.Field.pos.loc != NULL);
3056    vg_assert(fieldE.Te.Field.nLoc != 0 || fieldE.Te.Field.pos.loc == NULL);
3057    if (fieldE.Te.Field.isStruct) {
3058       vg_assert(fieldE.Te.Field.nLoc != 0);
3059    } else {
3060       vg_assert(fieldE.Te.Field.nLoc == 0);
3061    }
3062    vg_assert(fieldE.cuOff == D3_INVALID_CUOFF || fieldE.cuOff == posn);
3063    fieldE.cuOff = posn;
3064    VG_(addToXA)( tyents, &fieldE );
3065    return;
3066    /*NOTREACHED*/
3067 
3068   acquire_Bound:
3069    if (0) VG_(printf)("YYYY Acquire Bound\n");
3070    vg_assert(boundE.tag == Te_Bound);
3071    vg_assert(boundE.cuOff == D3_INVALID_CUOFF || boundE.cuOff == posn);
3072    boundE.cuOff = posn;
3073    VG_(addToXA)( tyents, &boundE );
3074    return;
3075    /*NOTREACHED*/
3076 
3077   bad_DIE:
3078    set_position_of_Cursor( c_die,  saved_die_c_offset );
3079    set_position_of_Cursor( c_abbv, saved_abbv_c_offset );
3080    posn = uncook_die( cc, posn, &debug_types_flag, &alt_flag );
3081    VG_(printf)(" <%d><%lx>: %s", level, posn, ML_(pp_DW_TAG)( dtag ) );
3082    if (debug_types_flag) {
3083       VG_(printf)(" (in .debug_types)");
3084    } else if (alt_flag) {
3085       VG_(printf)(" (in alternate .debug_info)");
3086    }
3087    VG_(printf)("\n");
3088    while (True) {
3089       DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
3090       DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
3091       if (attr == 0 && form == 0) break;
3092       VG_(printf)("     %18s: ", ML_(pp_DW_AT)(attr));
3093       /* Get the form contents, so as to print them */
3094       get_Form_contents( &cts, cc, c_die, True, form );
3095       VG_(printf)("\t\n");
3096    }
3097    VG_(printf)("\n");
3098    cc->barf("parse_type_DIE: confused by the above DIE");
3099    /*NOTREACHED*/
3100 }
3101 
3102 
3103 /*------------------------------------------------------------*/
3104 /*---                                                      ---*/
3105 /*--- Compression of type DIE information                  ---*/
3106 /*---                                                      ---*/
3107 /*------------------------------------------------------------*/
3108 
chase_cuOff(Bool * changed,XArray * ents,TyEntIndexCache * ents_cache,UWord cuOff)3109 static UWord chase_cuOff ( Bool* changed,
3110                            XArray* /* of TyEnt */ ents,
3111                            TyEntIndexCache* ents_cache,
3112                            UWord cuOff )
3113 {
3114    TyEnt* ent;
3115    ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, cuOff );
3116 
3117    if (!ent) {
3118       VG_(printf)("chase_cuOff: no entry for 0x%05lx\n", cuOff);
3119       *changed = False;
3120       return cuOff;
3121    }
3122 
3123    vg_assert(ent->tag != Te_EMPTY);
3124    if (ent->tag != Te_INDIR) {
3125       *changed = False;
3126       return cuOff;
3127    } else {
3128       vg_assert(ent->Te.INDIR.indR < cuOff);
3129       *changed = True;
3130       return ent->Te.INDIR.indR;
3131    }
3132 }
3133 
3134 static
chase_cuOffs_in_XArray(Bool * changed,XArray * ents,TyEntIndexCache * ents_cache,XArray * cuOffs)3135 void chase_cuOffs_in_XArray ( Bool* changed,
3136                               XArray* /* of TyEnt */ ents,
3137                               TyEntIndexCache* ents_cache,
3138                               /*MOD*/XArray* /* of UWord */ cuOffs )
3139 {
3140    Bool b2 = False;
3141    Word i, n = VG_(sizeXA)( cuOffs );
3142    for (i = 0; i < n; i++) {
3143       Bool   b = False;
3144       UWord* p = VG_(indexXA)( cuOffs, i );
3145       *p = chase_cuOff( &b, ents, ents_cache, *p );
3146       if (b)
3147          b2 = True;
3148    }
3149    *changed = b2;
3150 }
3151 
TyEnt__subst_R_fields(XArray * ents,TyEntIndexCache * ents_cache,TyEnt * te)3152 static Bool TyEnt__subst_R_fields ( XArray* /* of TyEnt */ ents,
3153                                     TyEntIndexCache* ents_cache,
3154                                     /*MOD*/TyEnt* te )
3155 {
3156    Bool b, changed = False;
3157    switch (te->tag) {
3158       case Te_EMPTY:
3159          break;
3160       case Te_INDIR:
3161          te->Te.INDIR.indR
3162             = chase_cuOff( &b, ents, ents_cache, te->Te.INDIR.indR );
3163          if (b) changed = True;
3164          break;
3165       case Te_UNKNOWN:
3166          break;
3167       case Te_Atom:
3168          break;
3169       case Te_Field:
3170          te->Te.Field.typeR
3171             = chase_cuOff( &b, ents, ents_cache, te->Te.Field.typeR );
3172          if (b) changed = True;
3173          break;
3174       case Te_Bound:
3175          break;
3176       case Te_TyBase:
3177          break;
3178       case Te_TyPtr:
3179       case Te_TyRef:
3180       case Te_TyPtrMbr:
3181       case Te_TyRvalRef:
3182          te->Te.TyPorR.typeR
3183             = chase_cuOff( &b, ents, ents_cache, te->Te.TyPorR.typeR );
3184          if (b) changed = True;
3185          break;
3186       case Te_TyTyDef:
3187          te->Te.TyTyDef.typeR
3188             = chase_cuOff( &b, ents, ents_cache, te->Te.TyTyDef.typeR );
3189          if (b) changed = True;
3190          break;
3191       case Te_TyStOrUn:
3192          chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyStOrUn.fieldRs );
3193          if (b) changed = True;
3194          break;
3195       case Te_TyEnum:
3196          chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyEnum.atomRs );
3197          if (b) changed = True;
3198          break;
3199       case Te_TyArray:
3200          te->Te.TyArray.typeR
3201             = chase_cuOff( &b, ents, ents_cache, te->Te.TyArray.typeR );
3202          if (b) changed = True;
3203          chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyArray.boundRs );
3204          if (b) changed = True;
3205          break;
3206       case Te_TyFn:
3207          break;
3208       case Te_TyQual:
3209          te->Te.TyQual.typeR
3210             = chase_cuOff( &b, ents, ents_cache, te->Te.TyQual.typeR );
3211          if (b) changed = True;
3212          break;
3213       case Te_TyVoid:
3214          break;
3215       default:
3216          ML_(pp_TyEnt)(te);
3217          vg_assert(0);
3218    }
3219    return changed;
3220 }
3221 
3222 /* Make a pass over 'ents'.  For each tyent, inspect the target of any
3223    'R' or 'Rs' fields (those which refer to other tyents), and replace
3224    any which point to INDIR nodes with the target of the indirection
3225    (which should not itself be an indirection).  In summary, this
3226    routine shorts out all references to indirection nodes. */
3227 static
dedup_types_substitution_pass(XArray * ents,TyEntIndexCache * ents_cache)3228 Word dedup_types_substitution_pass ( /*MOD*/XArray* /* of TyEnt */ ents,
3229                                      TyEntIndexCache* ents_cache )
3230 {
3231    Word i, n, nChanged = 0;
3232    Bool b;
3233    n = VG_(sizeXA)( ents );
3234    for (i = 0; i < n; i++) {
3235       TyEnt* ent = VG_(indexXA)( ents, i );
3236       vg_assert(ent->tag != Te_EMPTY);
3237       /* We have to substitute everything, even indirections, so as to
3238          ensure that chains of indirections don't build up. */
3239       b = TyEnt__subst_R_fields( ents, ents_cache, ent );
3240       if (b)
3241          nChanged++;
3242    }
3243 
3244    return nChanged;
3245 }
3246 
3247 
3248 /* Make a pass over 'ents', building a dictionary of TyEnts as we go.
3249    Look up each new tyent in the dictionary in turn.  If it is already
3250    in the dictionary, replace this tyent with an indirection to the
3251    existing one, and delete any malloc'd stuff hanging off this one.
3252    In summary, this routine commons up all tyents that are identical
3253    as defined by TyEnt__cmp_by_all_except_cuOff. */
3254 static
dedup_types_commoning_pass(XArray * ents)3255 Word dedup_types_commoning_pass ( /*MOD*/XArray* /* of TyEnt */ ents )
3256 {
3257    Word    n, i, nDeleted;
3258    WordFM* dict; /* TyEnt* -> void */
3259    TyEnt*  ent;
3260    UWord   keyW, valW;
3261 
3262    dict = VG_(newFM)(
3263              ML_(dinfo_zalloc), "di.readdwarf3.dtcp.1",
3264              ML_(dinfo_free),
3265              (Word(*)(UWord,UWord)) ML_(TyEnt__cmp_by_all_except_cuOff)
3266           );
3267 
3268    nDeleted = 0;
3269    n = VG_(sizeXA)( ents );
3270    for (i = 0; i < n; i++) {
3271       ent = VG_(indexXA)( ents, i );
3272       vg_assert(ent->tag != Te_EMPTY);
3273 
3274       /* Ignore indirections, although check that they are
3275          not forming a cycle. */
3276       if (ent->tag == Te_INDIR) {
3277          vg_assert(ent->Te.INDIR.indR < ent->cuOff);
3278          continue;
3279       }
3280 
3281       keyW = valW = 0;
3282       if (VG_(lookupFM)( dict, &keyW, &valW, (UWord)ent )) {
3283          /* it's already in the dictionary. */
3284          TyEnt* old = (TyEnt*)keyW;
3285          vg_assert(valW == 0);
3286          vg_assert(old != ent);
3287          vg_assert(old->tag != Te_INDIR);
3288          /* since we are traversing the array in increasing order of
3289             cuOff: */
3290          vg_assert(old->cuOff < ent->cuOff);
3291          /* So anyway, dump this entry and replace it with an
3292             indirection to the one in the dictionary.  Note that the
3293             assertion above guarantees that we cannot create cycles of
3294             indirections, since we are always creating an indirection
3295             to a tyent with a cuOff lower than this one. */
3296          ML_(TyEnt__make_EMPTY)( ent );
3297          ent->tag = Te_INDIR;
3298          ent->Te.INDIR.indR = old->cuOff;
3299          nDeleted++;
3300       } else {
3301          /* not in dictionary; add it and keep going. */
3302          VG_(addToFM)( dict, (UWord)ent, 0 );
3303       }
3304    }
3305 
3306    VG_(deleteFM)( dict, NULL, NULL );
3307 
3308    return nDeleted;
3309 }
3310 
3311 
3312 static
dedup_types(Bool td3,XArray * ents,TyEntIndexCache * ents_cache)3313 void dedup_types ( Bool td3,
3314                    /*MOD*/XArray* /* of TyEnt */ ents,
3315                    TyEntIndexCache* ents_cache )
3316 {
3317    Word m, n, i, nDel, nSubst, nThresh;
3318    if (0) td3 = True;
3319 
3320    n = VG_(sizeXA)( ents );
3321 
3322    /* If a commoning pass and a substitution pass both make fewer than
3323       this many changes, just stop.  It's pointless to burn up CPU
3324       time trying to compress the last 1% or so out of the array. */
3325    nThresh = n / 200;
3326 
3327    /* First we must sort .ents by its .cuOff fields, so we
3328       can index into it. */
3329    VG_(setCmpFnXA)( ents, (XACmpFn_t) ML_(TyEnt__cmp_by_cuOff_only) );
3330    VG_(sortXA)( ents );
3331 
3332    /* Now repeatedly do commoning and substitution passes over
3333       the array, until there are no more changes. */
3334    do {
3335       nDel   = dedup_types_commoning_pass ( ents );
3336       nSubst = dedup_types_substitution_pass ( ents, ents_cache );
3337       vg_assert(nDel >= 0 && nSubst >= 0);
3338       TRACE_D3("   %ld deletions, %ld substitutions\n", nDel, nSubst);
3339    } while (nDel > nThresh || nSubst > nThresh);
3340 
3341    /* Sanity check: all INDIR nodes should point at a non-INDIR thing.
3342       In fact this should be true at the end of every loop iteration
3343       above (a commoning pass followed by a substitution pass), but
3344       checking it on every iteration is excessively expensive.  Note,
3345       this loop also computes 'm' for the stats printing below it. */
3346    m = 0;
3347    n = VG_(sizeXA)( ents );
3348    for (i = 0; i < n; i++) {
3349       TyEnt *ent, *ind;
3350       ent = VG_(indexXA)( ents, i );
3351       if (ent->tag != Te_INDIR) continue;
3352       m++;
3353       ind = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
3354                                          ent->Te.INDIR.indR );
3355       vg_assert(ind);
3356       vg_assert(ind->tag != Te_INDIR);
3357    }
3358 
3359    TRACE_D3("Overall: %ld before, %ld after\n", n, n-m);
3360 }
3361 
3362 
3363 /*------------------------------------------------------------*/
3364 /*---                                                      ---*/
3365 /*--- Resolution of references to type DIEs                ---*/
3366 /*---                                                      ---*/
3367 /*------------------------------------------------------------*/
3368 
3369 /* Make a pass through the (temporary) variables array.  Examine the
3370    type of each variable, check is it found, and chase any Te_INDIRs.
3371    Postcondition is: each variable has a typeR field that refers to a
3372    valid type in tyents, or a Te_UNKNOWN, and is certainly guaranteed
3373    not to refer to a Te_INDIR.  (This is so that we can throw all the
3374    Te_INDIRs away later). */
3375 
3376 __attribute__((noinline))
resolve_variable_types(void (* barf)(const HChar *),XArray * ents,TyEntIndexCache * ents_cache,XArray * vars)3377 static void resolve_variable_types (
3378                void (*barf)( const HChar* ) __attribute__((noreturn)),
3379                /*R-O*/XArray* /* of TyEnt */ ents,
3380                /*MOD*/TyEntIndexCache* ents_cache,
3381                /*MOD*/XArray* /* of TempVar* */ vars
3382             )
3383 {
3384    Word i, n;
3385    n = VG_(sizeXA)( vars );
3386    for (i = 0; i < n; i++) {
3387       TempVar* var = *(TempVar**)VG_(indexXA)( vars, i );
3388       /* This is the stated type of the variable.  But it might be
3389          an indirection, so be careful. */
3390       TyEnt* ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
3391                                                 var->typeR );
3392       if (ent && ent->tag == Te_INDIR) {
3393          ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
3394                                             ent->Te.INDIR.indR );
3395          vg_assert(ent);
3396          vg_assert(ent->tag != Te_INDIR);
3397       }
3398 
3399       /* Deal first with "normal" cases */
3400       if (ent && ML_(TyEnt__is_type)(ent)) {
3401          var->typeR = ent->cuOff;
3402          continue;
3403       }
3404 
3405       /* If there's no ent, it probably we did not manage to read a
3406          type at the cuOffset which is stated as being this variable's
3407          type.  Maybe a deficiency in parse_type_DIE.  Complain. */
3408       if (ent == NULL) {
3409          VG_(printf)("\n: Invalid cuOff = 0x%05lx\n", var->typeR );
3410          barf("resolve_variable_types: "
3411               "cuOff does not refer to a known type");
3412       }
3413       vg_assert(ent);
3414       /* If ent has any other tag, something bad happened, along the
3415          lines of var->typeR not referring to a type at all. */
3416       vg_assert(ent->tag == Te_UNKNOWN);
3417       /* Just accept it; the type will be useless, but at least keep
3418          going. */
3419       var->typeR = ent->cuOff;
3420    }
3421 }
3422 
3423 
3424 /*------------------------------------------------------------*/
3425 /*---                                                      ---*/
3426 /*--- Parsing of Compilation Units                         ---*/
3427 /*---                                                      ---*/
3428 /*------------------------------------------------------------*/
3429 
cmp_TempVar_by_dioff(const void * v1,const void * v2)3430 static Int cmp_TempVar_by_dioff ( const void* v1, const void* v2 ) {
3431    const TempVar* t1 = *(const TempVar *const *)v1;
3432    const TempVar* t2 = *(const TempVar *const *)v2;
3433    if (t1->dioff < t2->dioff) return -1;
3434    if (t1->dioff > t2->dioff) return 1;
3435    return 0;
3436 }
3437 
read_DIE(WordFM * rangestree,XArray * tyents,XArray * tempvars,XArray * gexprs,D3TypeParser * typarser,D3VarParser * varparser,Cursor * c,Bool td3,CUConst * cc,Int level)3438 static void read_DIE (
3439    /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
3440    /*MOD*/XArray* /* of TyEnt */ tyents,
3441    /*MOD*/XArray* /* of TempVar* */ tempvars,
3442    /*MOD*/XArray* /* of GExpr* */ gexprs,
3443    /*MOD*/D3TypeParser* typarser,
3444    /*MOD*/D3VarParser* varparser,
3445    Cursor* c, Bool td3, CUConst* cc, Int level
3446 )
3447 {
3448    Cursor abbv;
3449    ULong  atag, abbv_code;
3450    UWord  posn;
3451    UInt   has_children;
3452    UWord  start_die_c_offset, start_abbv_c_offset;
3453    UWord  after_die_c_offset, after_abbv_c_offset;
3454 
3455    /* --- Deal with this DIE --- */
3456    posn      = cook_die( cc, get_position_of_Cursor( c ) );
3457    abbv_code = get_ULEB128( c );
3458    set_abbv_Cursor( &abbv, td3, cc, abbv_code );
3459    atag      = get_ULEB128( &abbv );
3460    TRACE_D3("\n");
3461    TRACE_D3(" <%d><%lx>: Abbrev Number: %llu (%s)\n",
3462             level, posn, abbv_code, ML_(pp_DW_TAG)( atag ) );
3463 
3464    if (atag == 0)
3465       cc->barf("read_DIE: invalid zero tag on DIE");
3466 
3467    has_children = get_UChar( &abbv );
3468    if (has_children != DW_children_no && has_children != DW_children_yes)
3469       cc->barf("read_DIE: invalid has_children value");
3470 
3471    /* We're set up to look at the fields of this DIE.  Hand it off to
3472       any parser(s) that want to see it.  Since they will in general
3473       advance both the DIE and abbrev cursors, remember their current
3474       settings so that we can then back up and do one final pass over
3475       the DIE, to print out its contents. */
3476 
3477    start_die_c_offset  = get_position_of_Cursor( c );
3478    start_abbv_c_offset = get_position_of_Cursor( &abbv );
3479 
3480    while (True) {
3481       FormContents cts;
3482       ULong at_name = get_ULEB128( &abbv );
3483       ULong at_form = get_ULEB128( &abbv );
3484       if (at_name == 0 && at_form == 0) break;
3485       TRACE_D3("     %18s: ", ML_(pp_DW_AT)(at_name));
3486       /* Get the form contents, but ignore them; the only purpose is
3487          to print them, if td3 is True */
3488       get_Form_contents( &cts, cc, c, td3, (DW_FORM)at_form );
3489       TRACE_D3("\t");
3490       TRACE_D3("\n");
3491    }
3492 
3493    after_die_c_offset  = get_position_of_Cursor( c );
3494    after_abbv_c_offset = get_position_of_Cursor( &abbv );
3495 
3496    set_position_of_Cursor( c,     start_die_c_offset );
3497    set_position_of_Cursor( &abbv, start_abbv_c_offset );
3498 
3499    parse_type_DIE( tyents,
3500                    typarser,
3501                    (DW_TAG)atag,
3502                    posn,
3503                    level,
3504                    c,     /* DIE cursor */
3505                    &abbv, /* abbrev cursor */
3506                    cc,
3507                    td3 );
3508 
3509    set_position_of_Cursor( c,     start_die_c_offset );
3510    set_position_of_Cursor( &abbv, start_abbv_c_offset );
3511 
3512    parse_var_DIE( rangestree,
3513                   tempvars,
3514                   gexprs,
3515                   varparser,
3516                   (DW_TAG)atag,
3517                   posn,
3518                   level,
3519                   c,     /* DIE cursor */
3520                   &abbv, /* abbrev cursor */
3521                   cc,
3522                   td3 );
3523 
3524    set_position_of_Cursor( c,     after_die_c_offset );
3525    set_position_of_Cursor( &abbv, after_abbv_c_offset );
3526 
3527    /* --- Now recurse into its children, if any --- */
3528    if (has_children == DW_children_yes) {
3529       if (0) TRACE_D3("BEGIN children of level %d\n", level);
3530       while (True) {
3531          atag = peek_ULEB128( c );
3532          if (atag == 0) break;
3533          read_DIE( rangestree, tyents, tempvars, gexprs,
3534                    typarser, varparser,
3535                    c, td3, cc, level+1 );
3536       }
3537       /* Now we need to eat the terminating zero */
3538       atag = get_ULEB128( c );
3539       vg_assert(atag == 0);
3540       if (0) TRACE_D3("END children of level %d\n", level);
3541    }
3542 
3543 }
3544 
3545 
3546 static
new_dwarf3_reader_wrk(struct _DebugInfo * di,void (* barf)(const HChar *),DiSlice escn_debug_info,DiSlice escn_debug_types,DiSlice escn_debug_abbv,DiSlice escn_debug_line,DiSlice escn_debug_str,DiSlice escn_debug_ranges,DiSlice escn_debug_loc,DiSlice escn_debug_info_alt,DiSlice escn_debug_abbv_alt,DiSlice escn_debug_line_alt,DiSlice escn_debug_str_alt)3547 void new_dwarf3_reader_wrk (
3548    struct _DebugInfo* di,
3549    __attribute__((noreturn)) void (*barf)( const HChar* ),
3550    DiSlice escn_debug_info,      DiSlice escn_debug_types,
3551    DiSlice escn_debug_abbv,      DiSlice escn_debug_line,
3552    DiSlice escn_debug_str,       DiSlice escn_debug_ranges,
3553    DiSlice escn_debug_loc,       DiSlice escn_debug_info_alt,
3554    DiSlice escn_debug_abbv_alt,  DiSlice escn_debug_line_alt,
3555    DiSlice escn_debug_str_alt
3556 )
3557 {
3558    XArray* /* of TyEnt */     tyents;
3559    XArray* /* of TyEnt */     tyents_to_keep;
3560    XArray* /* of GExpr* */    gexprs;
3561    XArray* /* of TempVar* */  tempvars;
3562    WordFM* /* of (XArray* of AddrRange, void) */ rangestree;
3563    TyEntIndexCache* tyents_cache = NULL;
3564    TyEntIndexCache* tyents_to_keep_cache = NULL;
3565    TempVar *varp, *varp2;
3566    GExpr* gexpr;
3567    Cursor abbv; /* for showing .debug_abbrev */
3568    Cursor info; /* primary cursor for parsing .debug_info */
3569    Cursor ranges; /* for showing .debug_ranges */
3570    D3TypeParser typarser;
3571    D3VarParser varparser;
3572    Addr  dr_base;
3573    UWord dr_offset;
3574    Word  i, j, n;
3575    Bool td3 = di->trace_symtab;
3576    XArray* /* of TempVar* */ dioff_lookup_tab;
3577    Int pass;
3578    VgHashTable signature_types;
3579 #if 0
3580    /* This doesn't work properly because it assumes all entries are
3581       packed end to end, with no holes.  But that doesn't always
3582       appear to be the case, so it loses sync.  And the D3 spec
3583       doesn't appear to require a no-hole situation either. */
3584    /* Display .debug_loc */
3585    Addr  dl_base;
3586    UWord dl_offset;
3587    Cursor loc; /* for showing .debug_loc */
3588    TRACE_SYMTAB("\n");
3589    TRACE_SYMTAB("\n------ The contents of .debug_loc ------\n");
3590    TRACE_SYMTAB("    Offset   Begin    End      Expression\n");
3591    init_Cursor( &loc, debug_loc_img,
3592                 debug_loc_sz, 0, barf,
3593                 "Overrun whilst reading .debug_loc section(1)" );
3594    dl_base = 0;
3595    dl_offset = 0;
3596    while (True) {
3597       UWord  w1, w2;
3598       UWord  len;
3599       if (is_at_end_Cursor( &loc ))
3600          break;
3601 
3602       /* Read a (host-)word pair.  This is something of a hack since
3603          the word size to read is really dictated by the ELF file;
3604          however, we assume we're reading a file with the same
3605          word-sizeness as the host.  Reasonably enough. */
3606       w1 = get_UWord( &loc );
3607       w2 = get_UWord( &loc );
3608 
3609       if (w1 == 0 && w2 == 0) {
3610          /* end of list.  reset 'base' */
3611          TRACE_D3("    %08lx <End of list>\n", dl_offset);
3612          dl_base = 0;
3613          dl_offset = get_position_of_Cursor( &loc );
3614          continue;
3615       }
3616 
3617       if (w1 == -1UL) {
3618          /* new value for 'base' */
3619          TRACE_D3("    %08lx %16lx %08lx (base address)\n",
3620                   dl_offset, w1, w2);
3621          dl_base = w2;
3622          continue;
3623       }
3624 
3625       /* else a location expression follows */
3626       TRACE_D3("    %08lx %08lx %08lx ",
3627                dl_offset, w1 + dl_base, w2 + dl_base);
3628       len = (UWord)get_UShort( &loc );
3629       while (len > 0) {
3630          UChar byte = get_UChar( &loc );
3631          TRACE_D3("%02x", (UInt)byte);
3632          len--;
3633       }
3634       TRACE_SYMTAB("\n");
3635    }
3636 #endif
3637 
3638    /* Display .debug_ranges */
3639    TRACE_SYMTAB("\n");
3640    TRACE_SYMTAB("\n------ The contents of .debug_ranges ------\n");
3641    TRACE_SYMTAB("    Offset   Begin    End\n");
3642    if (ML_(sli_is_valid)(escn_debug_ranges)) {
3643       init_Cursor( &ranges, escn_debug_ranges, 0, barf,
3644                    "Overrun whilst reading .debug_ranges section(1)" );
3645       dr_base = 0;
3646       dr_offset = 0;
3647       while (True) {
3648          UWord  w1, w2;
3649 
3650          if (is_at_end_Cursor( &ranges ))
3651             break;
3652 
3653          /* Read a (host-)word pair.  This is something of a hack since
3654             the word size to read is really dictated by the ELF file;
3655             however, we assume we're reading a file with the same
3656             word-sizeness as the host.  Reasonably enough. */
3657          w1 = get_UWord( &ranges );
3658          w2 = get_UWord( &ranges );
3659 
3660          if (w1 == 0 && w2 == 0) {
3661             /* end of list.  reset 'base' */
3662             TRACE_D3("    %08lx <End of list>\n", dr_offset);
3663             dr_base = 0;
3664             dr_offset = get_position_of_Cursor( &ranges );
3665             continue;
3666          }
3667 
3668          if (w1 == -1UL) {
3669             /* new value for 'base' */
3670             TRACE_D3("    %08lx %16lx %08lx (base address)\n",
3671                      dr_offset, w1, w2);
3672             dr_base = w2;
3673             continue;
3674          }
3675 
3676          /* else a range [w1+base, w2+base) is denoted */
3677          TRACE_D3("    %08lx %08lx %08lx\n",
3678                   dr_offset, w1 + dr_base, w2 + dr_base);
3679       }
3680    }
3681 
3682    /* Display .debug_abbrev */
3683    TRACE_SYMTAB("\n");
3684    TRACE_SYMTAB("\n------ The contents of .debug_abbrev ------\n");
3685    if (ML_(sli_is_valid)(escn_debug_abbv)) {
3686       init_Cursor( &abbv, escn_debug_abbv, 0, barf,
3687                    "Overrun whilst reading .debug_abbrev section" );
3688       while (True) {
3689          if (is_at_end_Cursor( &abbv ))
3690             break;
3691          /* Read one abbreviation table */
3692          TRACE_D3("  Number TAG\n");
3693          while (True) {
3694             ULong atag;
3695             UInt  has_children;
3696             ULong acode = get_ULEB128( &abbv );
3697             if (acode == 0) break; /* end of the table */
3698             atag = get_ULEB128( &abbv );
3699             has_children = get_UChar( &abbv );
3700             TRACE_D3("   %llu      %s    [%s]\n",
3701                      acode, ML_(pp_DW_TAG)(atag),
3702                             ML_(pp_DW_children)(has_children));
3703             while (True) {
3704                ULong at_name = get_ULEB128( &abbv );
3705                ULong at_form = get_ULEB128( &abbv );
3706                if (at_name == 0 && at_form == 0) break;
3707                TRACE_D3("    %18s %s\n",
3708                         ML_(pp_DW_AT)(at_name), ML_(pp_DW_FORM)(at_form));
3709             }
3710          }
3711       }
3712    }
3713    TRACE_SYMTAB("\n");
3714 
3715    /* We'll park the harvested type information in here.  Also create
3716       a fake "void" entry with offset D3_FAKEVOID_CUOFF, so we always
3717       have at least one type entry to refer to.  D3_FAKEVOID_CUOFF is
3718       huge and presumably will not occur in any valid DWARF3 file --
3719       it would need to have a .debug_info section 4GB long for that to
3720       happen.  These type entries end up in the DebugInfo. */
3721    tyents = VG_(newXA)( ML_(dinfo_zalloc),
3722                         "di.readdwarf3.ndrw.1 (TyEnt temp array)",
3723                         ML_(dinfo_free), sizeof(TyEnt) );
3724    { TyEnt tyent;
3725      VG_(memset)(&tyent, 0, sizeof(tyent));
3726      tyent.tag   = Te_TyVoid;
3727      tyent.cuOff = D3_FAKEVOID_CUOFF;
3728      tyent.Te.TyVoid.isFake = True;
3729      VG_(addToXA)( tyents, &tyent );
3730    }
3731    { TyEnt tyent;
3732      VG_(memset)(&tyent, 0, sizeof(tyent));
3733      tyent.tag   = Te_UNKNOWN;
3734      tyent.cuOff = D3_INVALID_CUOFF;
3735      VG_(addToXA)( tyents, &tyent );
3736    }
3737 
3738    /* This is a tree used to unique-ify the range lists that are
3739       manufactured by parse_var_DIE.  References to the keys in the
3740       tree wind up in .rngMany fields in TempVars.  We'll need to
3741       delete this tree, and the XArrays attached to it, at the end of
3742       this function. */
3743    rangestree = VG_(newFM)( ML_(dinfo_zalloc),
3744                             "di.readdwarf3.ndrw.2 (rangestree)",
3745                             ML_(dinfo_free),
3746                             (Word(*)(UWord,UWord))cmp__XArrays_of_AddrRange );
3747 
3748    /* List of variables we're accumulating.  These don't end up in the
3749       DebugInfo; instead their contents are handed to ML_(addVar) and
3750       the list elements are then deleted. */
3751    tempvars = VG_(newXA)( ML_(dinfo_zalloc),
3752                           "di.readdwarf3.ndrw.3 (TempVar*s array)",
3753                           ML_(dinfo_free),
3754                           sizeof(TempVar*) );
3755 
3756    /* List of GExprs we're accumulating.  These wind up in the
3757       DebugInfo. */
3758    gexprs = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.4",
3759                         ML_(dinfo_free), sizeof(GExpr*) );
3760 
3761    /* We need a D3TypeParser to keep track of partially constructed
3762       types.  It'll be discarded as soon as we've completed the CU,
3763       since the resulting information is tipped in to 'tyents' as it
3764       is generated. */
3765    VG_(memset)( &typarser, 0, sizeof(typarser) );
3766    typarser.sp = -1;
3767    typarser.language = '?';
3768    for (i = 0; i < N_D3_TYPE_STACK; i++) {
3769       typarser.qparentE[i].tag   = Te_EMPTY;
3770       typarser.qparentE[i].cuOff = D3_INVALID_CUOFF;
3771    }
3772 
3773    VG_(memset)( &varparser, 0, sizeof(varparser) );
3774    varparser.sp = -1;
3775 
3776    signature_types = VG_(HT_construct) ("signature_types");
3777 
3778    /* Do an initial pass to scan the .debug_types section, if any, and
3779       fill in the signatured types hash table.  This lets us handle
3780       mapping from a type signature to a (cooked) DIE offset directly
3781       in get_Form_contents.  */
3782    if (ML_(sli_is_valid)(escn_debug_types)) {
3783       init_Cursor( &info, escn_debug_types, 0, barf,
3784                    "Overrun whilst reading .debug_types section" );
3785       TRACE_D3("\n------ Collecting signatures from "
3786                ".debug_types section ------\n");
3787 
3788       while (True) {
3789          UWord   cu_start_offset, cu_offset_now;
3790          CUConst cc;
3791 
3792          cu_start_offset = get_position_of_Cursor( &info );
3793          TRACE_D3("\n");
3794          TRACE_D3("  Compilation Unit @ offset 0x%lx:\n", cu_start_offset);
3795          /* parse_CU_header initialises the CU's set_abbv_Cursor cache
3796             (saC_cache) */
3797          parse_CU_Header( &cc, td3, &info, escn_debug_abbv, True, False );
3798 
3799          /* Needed by cook_die.  */
3800          cc.types_cuOff_bias = escn_debug_info.szB;
3801 
3802          record_signatured_type( signature_types, cc.type_signature,
3803                                  cook_die( &cc, cc.type_offset ));
3804 
3805          /* Until proven otherwise we assume we don't need the icc9
3806             workaround in this case; see the DIE-reading loop below
3807             for details.  */
3808          cu_offset_now = (cu_start_offset + cc.unit_length
3809                           + (cc.is_dw64 ? 12 : 4));
3810 
3811          if (cu_offset_now >= escn_debug_types.szB)
3812             break;
3813 
3814          set_position_of_Cursor ( &info, cu_offset_now );
3815       }
3816    }
3817 
3818    /* Perform three DIE-reading passes.  The first pass reads DIEs from
3819       alternate .debug_info (if any), the second pass reads DIEs from
3820       .debug_info, and the third pass reads DIEs from .debug_types.
3821       Moving the body of this loop into a separate function would
3822       require a large number of arguments to be passed in, so it is
3823       kept inline instead.  */
3824    for (pass = 0; pass < 3; ++pass) {
3825       ULong section_size;
3826 
3827       if (pass == 0) {
3828          if (!ML_(sli_is_valid)(escn_debug_info_alt))
3829 	    continue;
3830          /* Now loop over the Compilation Units listed in the alternate
3831             .debug_info section (see D3SPEC sec 7.5) paras 1 and 2.
3832             Each compilation unit contains a Compilation Unit Header
3833             followed by precisely one DW_TAG_compile_unit or
3834             DW_TAG_partial_unit DIE. */
3835          init_Cursor( &info, escn_debug_info_alt, 0, barf,
3836                       "Overrun whilst reading alternate .debug_info section" );
3837          section_size = escn_debug_info_alt.szB;
3838 
3839          TRACE_D3("\n------ Parsing alternate .debug_info section ------\n");
3840       } else if (pass == 1) {
3841          /* Now loop over the Compilation Units listed in the .debug_info
3842             section (see D3SPEC sec 7.5) paras 1 and 2.  Each compilation
3843             unit contains a Compilation Unit Header followed by precisely
3844             one DW_TAG_compile_unit or DW_TAG_partial_unit DIE. */
3845          init_Cursor( &info, escn_debug_info, 0, barf,
3846                       "Overrun whilst reading .debug_info section" );
3847          section_size = escn_debug_info.szB;
3848 
3849          TRACE_D3("\n------ Parsing .debug_info section ------\n");
3850       } else {
3851          if (!ML_(sli_is_valid)(escn_debug_types))
3852             continue;
3853          init_Cursor( &info, escn_debug_types, 0, barf,
3854                       "Overrun whilst reading .debug_types section" );
3855          section_size = escn_debug_types.szB;
3856 
3857          TRACE_D3("\n------ Parsing .debug_types section ------\n");
3858       }
3859 
3860       while (True) {
3861          ULong   cu_start_offset, cu_offset_now;
3862          CUConst cc;
3863          /* It may be that the stated size of this CU is larger than the
3864             amount of stuff actually in it.  icc9 seems to generate CUs
3865             thusly.  We use these variables to figure out if this is
3866             indeed the case, and if so how many bytes we need to skip to
3867             get to the start of the next CU.  Not skipping those bytes
3868             causes us to misidentify the start of the next CU, and it all
3869             goes badly wrong after that (not surprisingly). */
3870          UWord cu_size_including_IniLen, cu_amount_used;
3871 
3872          /* It seems icc9 finishes the DIE info before debug_info_sz
3873             bytes have been used up.  So be flexible, and declare the
3874             sequence complete if there is not enough remaining bytes to
3875             hold even the smallest conceivable CU header.  (11 bytes I
3876             reckon). */
3877          /* JRS 23Jan09: I suspect this is no longer necessary now that
3878             the code below contains a 'while (cu_amount_used <
3879             cu_size_including_IniLen ...'  style loop, which skips over
3880             any leftover bytes at the end of a CU in the case where the
3881             CU's stated size is larger than its actual size (as
3882             determined by reading all its DIEs).  However, for prudence,
3883             I'll leave the following test in place.  I can't see that a
3884             CU header can be smaller than 11 bytes, so I don't think
3885             there's any harm possible through the test -- it just adds
3886             robustness. */
3887          Word avail = get_remaining_length_Cursor( &info );
3888          if (avail < 11) {
3889             if (avail > 0)
3890                TRACE_D3("new_dwarf3_reader_wrk: warning: "
3891                         "%ld unused bytes after end of DIEs\n", avail);
3892             break;
3893          }
3894 
3895          /* Check the varparser's stack is in a sane state. */
3896          vg_assert(varparser.sp == -1);
3897          for (i = 0; i < N_D3_VAR_STACK; i++) {
3898             vg_assert(varparser.ranges[i] == NULL);
3899             vg_assert(varparser.level[i] == 0);
3900          }
3901          for (i = 0; i < N_D3_TYPE_STACK; i++) {
3902             vg_assert(typarser.qparentE[i].cuOff == D3_INVALID_CUOFF);
3903             vg_assert(typarser.qparentE[i].tag   == Te_EMPTY);
3904             vg_assert(typarser.qlevel[i] == 0);
3905          }
3906 
3907          cu_start_offset = get_position_of_Cursor( &info );
3908          TRACE_D3("\n");
3909          TRACE_D3("  Compilation Unit @ offset 0x%llx:\n", cu_start_offset);
3910          /* parse_CU_header initialises the CU's set_abbv_Cursor cache
3911             (saC_cache) */
3912          if (pass == 0) {
3913             parse_CU_Header( &cc, td3, &info, escn_debug_abbv_alt,
3914                              False, True );
3915          } else {
3916             parse_CU_Header( &cc, td3, &info, escn_debug_abbv,
3917                              pass == 2, False );
3918          }
3919          cc.escn_debug_str      = pass == 0 ? escn_debug_str_alt
3920                                             : escn_debug_str;
3921          cc.escn_debug_ranges   = escn_debug_ranges;
3922          cc.escn_debug_loc      = escn_debug_loc;
3923          cc.escn_debug_line     = pass == 0 ? escn_debug_line_alt
3924                                             : escn_debug_line;
3925          cc.escn_debug_info     = pass == 0 ? escn_debug_info_alt
3926                                             : escn_debug_info;
3927          cc.escn_debug_types    = escn_debug_types;
3928          cc.escn_debug_info_alt = escn_debug_info_alt;
3929          cc.escn_debug_str_alt  = escn_debug_str_alt;
3930          cc.types_cuOff_bias    = escn_debug_info.szB;
3931          cc.alt_cuOff_bias      = escn_debug_info.szB + escn_debug_types.szB;
3932          cc.cu_start_offset     = cu_start_offset;
3933          cc.di = di;
3934          /* The CU's svma can be deduced by looking at the AT_low_pc
3935             value in the top level TAG_compile_unit, which is the topmost
3936             DIE.  We'll leave it for the 'varparser' to acquire that info
3937             and fill it in -- since it is the only party to want to know
3938             it. */
3939          cc.cu_svma_known = False;
3940          cc.cu_svma       = 0;
3941 
3942          cc.signature_types = signature_types;
3943 
3944          /* Create a fake outermost-level range covering the entire
3945             address range.  So we always have *something* to catch all
3946             variable declarations. */
3947          varstack_push( &cc, &varparser, td3,
3948                         unitary_range_list(0UL, ~0UL),
3949                         -1, False/*isFunc*/, NULL/*fbGX*/ );
3950 
3951          /* And set up the file name table.  When we come across the top
3952             level DIE for this CU (which is what the next call to
3953             read_DIE should process) we will copy all the file names out
3954             of the .debug_line img area and use this table to look up the
3955             copies when we later see filename numbers in DW_TAG_variables
3956             etc. */
3957          vg_assert(!varparser.filenameTable );
3958          varparser.filenameTable
3959             = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.5",
3960                           ML_(dinfo_free),
3961                           sizeof(UChar*) );
3962          vg_assert(varparser.filenameTable);
3963 
3964          /* Now read the one-and-only top-level DIE for this CU. */
3965          vg_assert(varparser.sp == 0);
3966          read_DIE( rangestree,
3967                    tyents, tempvars, gexprs,
3968                    &typarser, &varparser,
3969                    &info, td3, &cc, 0 );
3970 
3971          cu_offset_now = get_position_of_Cursor( &info );
3972 
3973          if (0) VG_(printf)("Travelled: %llu  size %llu\n",
3974                             cu_offset_now - cc.cu_start_offset,
3975                             cc.unit_length + (cc.is_dw64 ? 12 : 4));
3976 
3977          /* How big the CU claims it is .. */
3978          cu_size_including_IniLen = cc.unit_length + (cc.is_dw64 ? 12 : 4);
3979          /* .. vs how big we have found it to be */
3980          cu_amount_used = cu_offset_now - cc.cu_start_offset;
3981 
3982          if (1) TRACE_D3("offset now %lld, d-i-size %lld\n",
3983                          cu_offset_now, section_size);
3984          if (cu_offset_now > section_size)
3985             barf("toplevel DIEs beyond end of CU");
3986 
3987          /* If the CU is bigger than it claims to be, we've got a serious
3988             problem. */
3989          if (cu_amount_used > cu_size_including_IniLen)
3990             barf("CU's actual size appears to be larger than it claims it is");
3991 
3992          /* If the CU is smaller than it claims to be, we need to skip some
3993             bytes.  Loop updates cu_offset_new and cu_amount_used. */
3994          while (cu_amount_used < cu_size_including_IniLen
3995                 && get_remaining_length_Cursor( &info ) > 0) {
3996             if (0) VG_(printf)("SKIP\n");
3997             (void)get_UChar( &info );
3998             cu_offset_now = get_position_of_Cursor( &info );
3999             cu_amount_used = cu_offset_now - cc.cu_start_offset;
4000          }
4001 
4002          /* Preen to level -2.  DIEs have level >= 0 so -2 cannot occur
4003             anywhere else at all.  Our fake the-entire-address-space
4004             range is at level -1, so preening to -2 should completely
4005             empty the stack out. */
4006          TRACE_D3("\n");
4007          varstack_preen( &varparser, td3, -2 );
4008          /* Similarly, empty the type stack out. */
4009          typestack_preen( &typarser, td3, -2 );
4010 
4011          TRACE_D3("set_abbv_Cursor cache: %lu queries, %lu misses\n",
4012                   cc.saC_cache_queries, cc.saC_cache_misses);
4013 
4014          vg_assert(varparser.filenameTable );
4015          VG_(deleteXA)( varparser.filenameTable );
4016          varparser.filenameTable = NULL;
4017 
4018          if (cu_offset_now == section_size)
4019             break;
4020          /* else keep going */
4021       }
4022    }
4023 
4024    /* From here on we're post-processing the stuff we got
4025       out of the .debug_info section. */
4026    if (td3) {
4027       TRACE_D3("\n");
4028       ML_(pp_TyEnts)(tyents, "Initial type entity (TyEnt) array");
4029       TRACE_D3("\n");
4030       TRACE_D3("------ Compressing type entries ------\n");
4031    }
4032 
4033    tyents_cache = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.6",
4034                                      sizeof(TyEntIndexCache) );
4035    ML_(TyEntIndexCache__invalidate)( tyents_cache );
4036    dedup_types( td3, tyents, tyents_cache );
4037    if (td3) {
4038       TRACE_D3("\n");
4039       ML_(pp_TyEnts)(tyents, "After type entity (TyEnt) compression");
4040    }
4041 
4042    TRACE_D3("\n");
4043    TRACE_D3("------ Resolving the types of variables ------\n" );
4044    resolve_variable_types( barf, tyents, tyents_cache, tempvars );
4045 
4046    /* Copy all the non-INDIR tyents into a new table.  For large
4047       .so's, about 90% of the tyents will by now have been resolved to
4048       INDIRs, and we no longer need them, and so don't need to store
4049       them. */
4050    tyents_to_keep
4051       = VG_(newXA)( ML_(dinfo_zalloc),
4052                     "di.readdwarf3.ndrw.7 (TyEnt to-keep array)",
4053                     ML_(dinfo_free), sizeof(TyEnt) );
4054    n = VG_(sizeXA)( tyents );
4055    for (i = 0; i < n; i++) {
4056       TyEnt* ent = VG_(indexXA)( tyents, i );
4057       if (ent->tag != Te_INDIR)
4058          VG_(addToXA)( tyents_to_keep, ent );
4059    }
4060 
4061    VG_(deleteXA)( tyents );
4062    tyents = NULL;
4063    ML_(dinfo_free)( tyents_cache );
4064    tyents_cache = NULL;
4065 
4066    /* Sort tyents_to_keep so we can lookup in it.  A complete (if
4067       minor) waste of time, since tyents itself is sorted, but
4068       necessary since VG_(lookupXA) refuses to cooperate if we
4069       don't. */
4070    VG_(setCmpFnXA)( tyents_to_keep, (XACmpFn_t) ML_(TyEnt__cmp_by_cuOff_only) );
4071    VG_(sortXA)( tyents_to_keep );
4072 
4073    /* Enable cacheing on tyents_to_keep */
4074    tyents_to_keep_cache
4075       = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.8",
4076                            sizeof(TyEntIndexCache) );
4077    ML_(TyEntIndexCache__invalidate)( tyents_to_keep_cache );
4078 
4079    /* And record the tyents in the DebugInfo.  We do this before
4080       starting to hand variables to ML_(addVar), since if ML_(addVar)
4081       wants to do debug printing (of the types of said vars) then it
4082       will need the tyents.*/
4083    vg_assert(!di->admin_tyents);
4084    di->admin_tyents = tyents_to_keep;
4085 
4086    /* Bias all the location expressions. */
4087    TRACE_D3("\n");
4088    TRACE_D3("------ Biasing the location expressions ------\n" );
4089 
4090    n = VG_(sizeXA)( gexprs );
4091    for (i = 0; i < n; i++) {
4092       gexpr = *(GExpr**)VG_(indexXA)( gexprs, i );
4093       bias_GX( gexpr, di );
4094    }
4095 
4096    TRACE_D3("\n");
4097    TRACE_D3("------ Acquired the following variables: ------\n\n");
4098 
4099    /* Park (pointers to) all the vars in an XArray, so we can look up
4100       abstract origins quickly.  The array is sorted (hence, looked-up
4101       by) the .dioff fields.  Since the .dioffs should be in strictly
4102       ascending order, there is no need to sort the array after
4103       construction.  The ascendingness is however asserted for. */
4104    dioff_lookup_tab
4105       = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.9",
4106                     ML_(dinfo_free),
4107                     sizeof(TempVar*) );
4108    vg_assert(dioff_lookup_tab);
4109 
4110    n = VG_(sizeXA)( tempvars );
4111    Word first_primary_var = 0;
4112    for (first_primary_var = 0;
4113         escn_debug_info_alt.szB/*really?*/ && first_primary_var < n;
4114         first_primary_var++) {
4115       varp = *(TempVar**)VG_(indexXA)( tempvars, first_primary_var );
4116       if (varp->dioff < escn_debug_info.szB + escn_debug_types.szB)
4117          break;
4118    }
4119    for (i = 0; i < n; i++) {
4120       varp = *(TempVar**)VG_(indexXA)( tempvars, (i + first_primary_var) % n );
4121       if (i > first_primary_var) {
4122          varp2 = *(TempVar**)VG_(indexXA)( tempvars,
4123                                            (i + first_primary_var - 1) % n );
4124          /* why should this hold?  Only, I think, because we've
4125             constructed the array by reading .debug_info sequentially,
4126             and so the array .dioff fields should reflect that, and be
4127             strictly ascending. */
4128          vg_assert(varp2->dioff < varp->dioff);
4129       }
4130       VG_(addToXA)( dioff_lookup_tab, &varp );
4131    }
4132    VG_(setCmpFnXA)( dioff_lookup_tab, cmp_TempVar_by_dioff );
4133    VG_(sortXA)( dioff_lookup_tab ); /* POINTLESS; FIXME: rm */
4134 
4135    /* Now visit each var.  Collect up as much info as possible for
4136       each var and hand it to ML_(addVar). */
4137    n = VG_(sizeXA)( tempvars );
4138    for (j = 0; j < n; j++) {
4139       TyEnt* ent;
4140       varp = *(TempVar**)VG_(indexXA)( tempvars, j );
4141 
4142       /* Possibly show .. */
4143       if (td3) {
4144          VG_(printf)("<%lx> addVar: level %d: %s :: ",
4145                      varp->dioff,
4146                      varp->level,
4147                      varp->name ? varp->name : "<anon_var>" );
4148          if (varp->typeR) {
4149             ML_(pp_TyEnt_C_ishly)( tyents_to_keep, varp->typeR );
4150          } else {
4151             VG_(printf)("NULL");
4152          }
4153          VG_(printf)("\n  Loc=");
4154          if (varp->gexpr) {
4155             ML_(pp_GX)(varp->gexpr);
4156          } else {
4157             VG_(printf)("NULL");
4158          }
4159          VG_(printf)("\n");
4160          if (varp->fbGX) {
4161             VG_(printf)("  FrB=");
4162             ML_(pp_GX)( varp->fbGX );
4163             VG_(printf)("\n");
4164          } else {
4165             VG_(printf)("  FrB=none\n");
4166          }
4167          VG_(printf)("  declared at: %s:%d\n",
4168                      varp->fName ? varp->fName : "NULL",
4169                      varp->fLine );
4170          if (varp->absOri != (UWord)D3_INVALID_CUOFF)
4171             VG_(printf)("  abstract origin: <%lx>\n", varp->absOri);
4172       }
4173 
4174       /* Skip variables which have no location.  These must be
4175          abstract instances; they are useless as-is since with no
4176          location they have no specified memory location.  They will
4177          presumably be referred to via the absOri fields of other
4178          variables. */
4179       if (!varp->gexpr) {
4180          TRACE_D3("  SKIP (no location)\n\n");
4181          continue;
4182       }
4183 
4184       /* So it has a location, at least.  If it refers to some other
4185          entry through its absOri field, pull in further info through
4186          that. */
4187       if (varp->absOri != (UWord)D3_INVALID_CUOFF) {
4188          Bool found;
4189          Word ixFirst, ixLast;
4190          TempVar key;
4191          TempVar* keyp = &key;
4192          TempVar *varAI;
4193          VG_(memset)(&key, 0, sizeof(key)); /* not necessary */
4194          key.dioff = varp->absOri; /* this is what we want to find */
4195          found = VG_(lookupXA)( dioff_lookup_tab, &keyp,
4196                                 &ixFirst, &ixLast );
4197          if (!found) {
4198             /* barf("DW_AT_abstract_origin can't be resolved"); */
4199             TRACE_D3("  SKIP (DW_AT_abstract_origin can't be resolved)\n\n");
4200             continue;
4201          }
4202          /* If the following fails, there is more than one entry with
4203             the same dioff.  Which can't happen. */
4204          vg_assert(ixFirst == ixLast);
4205          varAI = *(TempVar**)VG_(indexXA)( dioff_lookup_tab, ixFirst );
4206          /* stay sane */
4207          vg_assert(varAI);
4208          vg_assert(varAI->dioff == varp->absOri);
4209 
4210          /* Copy what useful info we can. */
4211          if (varAI->typeR && !varp->typeR)
4212             varp->typeR = varAI->typeR;
4213          if (varAI->name && !varp->name)
4214             varp->name = varAI->name;
4215          if (varAI->fName && !varp->fName)
4216             varp->fName = varAI->fName;
4217          if (varAI->fLine > 0 && varp->fLine == 0)
4218             varp->fLine = varAI->fLine;
4219       }
4220 
4221       /* Give it a name if it doesn't have one. */
4222       if (!varp->name)
4223          varp->name = ML_(addStr)( di, "<anon_var>", -1 );
4224 
4225       /* So now does it have enough info to be useful? */
4226       /* NOTE: re typeR: this is a hack.  If typeR is Te_UNKNOWN then
4227          the type didn't get resolved.  Really, in that case
4228          something's broken earlier on, and should be fixed, rather
4229          than just skipping the variable. */
4230       ent = ML_(TyEnts__index_by_cuOff)( tyents_to_keep,
4231                                          tyents_to_keep_cache,
4232                                          varp->typeR );
4233       /* The next two assertions should be guaranteed by
4234          our previous call to resolve_variable_types. */
4235       vg_assert(ent);
4236       vg_assert(ML_(TyEnt__is_type)(ent) || ent->tag == Te_UNKNOWN);
4237 
4238       if (ent->tag == Te_UNKNOWN) continue;
4239 
4240       vg_assert(varp->gexpr);
4241       vg_assert(varp->name);
4242       vg_assert(varp->typeR);
4243       vg_assert(varp->level >= 0);
4244 
4245       /* Ok.  So we're going to keep it.  Call ML_(addVar) once for
4246          each address range in which the variable exists. */
4247       TRACE_D3("  ACQUIRE for range(s) ");
4248       { AddrRange  oneRange;
4249         AddrRange* varPcRanges;
4250         Word       nVarPcRanges;
4251         /* Set up to iterate over address ranges, however
4252            represented. */
4253         if (varp->nRanges == 0 || varp->nRanges == 1) {
4254            vg_assert(!varp->rngMany);
4255            if (varp->nRanges == 0) {
4256               vg_assert(varp->rngOneMin == 0);
4257               vg_assert(varp->rngOneMax == 0);
4258            }
4259            nVarPcRanges = varp->nRanges;
4260            oneRange.aMin = varp->rngOneMin;
4261            oneRange.aMax = varp->rngOneMax;
4262            varPcRanges = &oneRange;
4263         } else {
4264            vg_assert(varp->rngMany);
4265            vg_assert(varp->rngOneMin == 0);
4266            vg_assert(varp->rngOneMax == 0);
4267            nVarPcRanges = VG_(sizeXA)(varp->rngMany);
4268            vg_assert(nVarPcRanges >= 2);
4269            vg_assert(nVarPcRanges == (Word)varp->nRanges);
4270            varPcRanges = VG_(indexXA)(varp->rngMany, 0);
4271         }
4272         if (varp->level == 0)
4273            vg_assert( nVarPcRanges == 1 );
4274         /* and iterate */
4275         for (i = 0; i < nVarPcRanges; i++) {
4276            Addr pcMin = varPcRanges[i].aMin;
4277            Addr pcMax = varPcRanges[i].aMax;
4278            vg_assert(pcMin <= pcMax);
4279            /* Level 0 is the global address range.  So at level 0 we
4280               don't want to bias pcMin/pcMax; but at all other levels
4281               we do since those are derived from svmas in the Dwarf
4282               we're reading.  Be paranoid ... */
4283            if (varp->level == 0) {
4284               vg_assert(pcMin == (Addr)0);
4285               vg_assert(pcMax == ~(Addr)0);
4286            } else {
4287               /* vg_assert(pcMin > (Addr)0);
4288                  No .. we can legitimately expect to see ranges like
4289                  0x0-0x11D (pre-biasing, of course). */
4290               vg_assert(pcMax < ~(Addr)0);
4291            }
4292 
4293            /* Apply text biasing, for non-global variables. */
4294            if (varp->level > 0) {
4295               pcMin += di->text_debug_bias;
4296               pcMax += di->text_debug_bias;
4297            }
4298 
4299            if (i > 0 && (i%2) == 0)
4300               TRACE_D3("\n                       ");
4301            TRACE_D3("[%#lx,%#lx] ", pcMin, pcMax );
4302 
4303            ML_(addVar)(
4304               di, varp->level,
4305                   pcMin, pcMax,
4306                   varp->name,  varp->typeR,
4307                   varp->gexpr, varp->fbGX,
4308                   varp->fName, varp->fLine, td3
4309            );
4310         }
4311       }
4312 
4313       TRACE_D3("\n\n");
4314       /* and move on to the next var */
4315    }
4316 
4317    /* Now free all the TempVars */
4318    n = VG_(sizeXA)( tempvars );
4319    for (i = 0; i < n; i++) {
4320       varp = *(TempVar**)VG_(indexXA)( tempvars, i );
4321       ML_(dinfo_free)(varp);
4322    }
4323    VG_(deleteXA)( tempvars );
4324    tempvars = NULL;
4325 
4326    /* and the temp lookup table */
4327    VG_(deleteXA)( dioff_lookup_tab );
4328 
4329    /* and the ranges tree.  Note that we need to also free the XArrays
4330       which constitute the keys, hence pass VG_(deleteXA) as a
4331       key-finalizer. */
4332    VG_(deleteFM)( rangestree, (void(*)(UWord))VG_(deleteXA), NULL );
4333 
4334    /* and the tyents_to_keep cache */
4335    ML_(dinfo_free)( tyents_to_keep_cache );
4336    tyents_to_keep_cache = NULL;
4337 
4338    vg_assert( varparser.filenameTable == NULL );
4339 
4340    /* And the signatured type hash.  */
4341    VG_(HT_destruct) ( signature_types, ML_(dinfo_free) );
4342 
4343    /* record the GExprs in di so they can be freed later */
4344    vg_assert(!di->admin_gexprs);
4345    di->admin_gexprs = gexprs;
4346 }
4347 
4348 
4349 /*------------------------------------------------------------*/
4350 /*---                                                      ---*/
4351 /*--- The "new" DWARF3 reader -- top level control logic   ---*/
4352 /*---                                                      ---*/
4353 /*------------------------------------------------------------*/
4354 
4355 static Bool               d3rd_jmpbuf_valid  = False;
4356 static const HChar*       d3rd_jmpbuf_reason = NULL;
4357 static VG_MINIMAL_JMP_BUF(d3rd_jmpbuf);
4358 
barf(const HChar * reason)4359 static __attribute__((noreturn)) void barf ( const HChar* reason ) {
4360    vg_assert(d3rd_jmpbuf_valid);
4361    d3rd_jmpbuf_reason = reason;
4362    VG_MINIMAL_LONGJMP(d3rd_jmpbuf);
4363    /*NOTREACHED*/
4364    vg_assert(0);
4365 }
4366 
4367 
4368 void
ML_(new_dwarf3_reader)4369 ML_(new_dwarf3_reader) (
4370    struct _DebugInfo* di,
4371    DiSlice escn_debug_info,      DiSlice escn_debug_types,
4372    DiSlice escn_debug_abbv,      DiSlice escn_debug_line,
4373    DiSlice escn_debug_str,       DiSlice escn_debug_ranges,
4374    DiSlice escn_debug_loc,       DiSlice escn_debug_info_alt,
4375    DiSlice escn_debug_abbv_alt,  DiSlice escn_debug_line_alt,
4376    DiSlice escn_debug_str_alt
4377 )
4378 {
4379    volatile Int  jumped;
4380    volatile Bool td3 = di->trace_symtab;
4381 
4382    /* Run the _wrk function to read the dwarf3.  If it succeeds, it
4383       just returns normally.  If there is any failure, it longjmp's
4384       back here, having first set d3rd_jmpbuf_reason to something
4385       useful. */
4386    vg_assert(d3rd_jmpbuf_valid  == False);
4387    vg_assert(d3rd_jmpbuf_reason == NULL);
4388 
4389    d3rd_jmpbuf_valid = True;
4390    jumped = VG_MINIMAL_SETJMP(d3rd_jmpbuf);
4391    if (jumped == 0) {
4392       /* try this ... */
4393       new_dwarf3_reader_wrk( di, barf,
4394                              escn_debug_info,     escn_debug_types,
4395                              escn_debug_abbv,     escn_debug_line,
4396                              escn_debug_str,      escn_debug_ranges,
4397                              escn_debug_loc,      escn_debug_info_alt,
4398                              escn_debug_abbv_alt, escn_debug_line_alt,
4399                              escn_debug_str_alt );
4400       d3rd_jmpbuf_valid = False;
4401       TRACE_D3("\n------ .debug_info reading was successful ------\n");
4402    } else {
4403       /* It longjmp'd. */
4404       d3rd_jmpbuf_valid = False;
4405       /* Can't longjump without giving some sort of reason. */
4406       vg_assert(d3rd_jmpbuf_reason != NULL);
4407 
4408       TRACE_D3("\n------ .debug_info reading failed ------\n");
4409 
4410       ML_(symerr)(di, True, d3rd_jmpbuf_reason);
4411    }
4412 
4413    d3rd_jmpbuf_valid  = False;
4414    d3rd_jmpbuf_reason = NULL;
4415 }
4416 
4417 
4418 
4419 /* --- Unused code fragments which might be useful one day. --- */
4420 
4421 #if 0
4422    /* Read the arange tables */
4423    TRACE_SYMTAB("\n");
4424    TRACE_SYMTAB("\n------ The contents of .debug_arange ------\n");
4425    init_Cursor( &aranges, debug_aranges_img,
4426                 debug_aranges_sz, 0, barf,
4427                 "Overrun whilst reading .debug_aranges section" );
4428    while (True) {
4429       ULong  len, d_i_offset;
4430       Bool   is64;
4431       UShort version;
4432       UChar  asize, segsize;
4433 
4434       if (is_at_end_Cursor( &aranges ))
4435          break;
4436       /* Read one arange thingy */
4437       /* initial_length field */
4438       len = get_Initial_Length( &is64, &aranges,
4439                "in .debug_aranges: invalid initial-length field" );
4440       version    = get_UShort( &aranges );
4441       d_i_offset = get_Dwarfish_UWord( &aranges, is64 );
4442       asize      = get_UChar( &aranges );
4443       segsize    = get_UChar( &aranges );
4444       TRACE_D3("  Length:                   %llu\n", len);
4445       TRACE_D3("  Version:                  %d\n", (Int)version);
4446       TRACE_D3("  Offset into .debug_info:  %llx\n", d_i_offset);
4447       TRACE_D3("  Pointer Size:             %d\n", (Int)asize);
4448       TRACE_D3("  Segment Size:             %d\n", (Int)segsize);
4449       TRACE_D3("\n");
4450       TRACE_D3("    Address            Length\n");
4451 
4452       while ((get_position_of_Cursor( &aranges ) % (2 * asize)) > 0) {
4453          (void)get_UChar( & aranges );
4454       }
4455       while (True) {
4456          ULong address = get_Dwarfish_UWord( &aranges, asize==8 );
4457          ULong length = get_Dwarfish_UWord( &aranges, asize==8 );
4458          TRACE_D3("    0x%016llx 0x%llx\n", address, length);
4459          if (address == 0 && length == 0) break;
4460       }
4461    }
4462    TRACE_SYMTAB("\n");
4463 #endif
4464 
4465 #endif // defined(VGO_linux) || defined(VGO_darwin)
4466 
4467 /*--------------------------------------------------------------------*/
4468 /*--- end                                                          ---*/
4469 /*--------------------------------------------------------------------*/
4470