1 /* -*- mode: C; c-basic-offset: 3; -*- */
2
3 /*--------------------------------------------------------------------*/
4 /*--- Read DWARF3/4 ".debug_info" sections (DIE trees). ---*/
5 /*--- readdwarf3.c ---*/
6 /*--------------------------------------------------------------------*/
7
8 /*
9 This file is part of Valgrind, a dynamic binary instrumentation
10 framework.
11
12 Copyright (C) 2008-2017 OpenWorks LLP
13 info@open-works.co.uk
14
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file COPYING.
31
32 Neither the names of the U.S. Department of Energy nor the
33 University of California nor the names of its contributors may be
34 used to endorse or promote products derived from this software
35 without prior written permission.
36 */
37
38 #if defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_solaris)
39
40 /* REFERENCE (without which this code will not make much sense):
41
42 DWARF Debugging Information Format, Version 3,
43 dated 20 December 2005 (the "D3 spec").
44
45 Available at http://www.dwarfstd.org/Dwarf3.pdf. There's also a
46 .doc (MS Word) version, but for some reason the section numbers
47 between the Word and PDF versions differ by 1 in the first digit.
48 All section references in this code are to the PDF version.
49
50 CURRENT HACKS:
51
52 DW_TAG_{const,volatile}_type no DW_AT_type is allowed; it is
53 assumed to mean "const void" or "volatile void" respectively.
54 GDB appears to interpret them like this, anyway.
55
56 In many cases it is important to know the svma of a CU (the "base
57 address of the CU", as the D3 spec calls it). There are some
58 situations in which the spec implies this value is unknown, but the
59 Dwarf3 produced by gcc-4.1 seems to assume is not unknown but
60 merely zero when not explicitly stated. So we too have to make
61 that assumption.
62
63 POTENTIAL BUG? Spotted 6 Sept 08. Why doesn't
64 unitary_range_list() bias the resulting range list in the same way
65 that its more general cousin, get_range_list(), does? I don't
66 know.
67
68 TODO, 2008 Feb 17:
69
70 get rid of cu_svma_known and document the assumed-zero svma hack.
71
72 ML_(sizeOfType): differentiate between zero sized types and types
73 for which the size is unknown. Is this important? I don't know.
74
75 DW_TAG_array_types: deal with explicit sizes (currently we compute
76 the size from the bounds and the element size, although that's
77 fragile, if the bounds incompletely specified, or completely
78 absent)
79
80 Document reason for difference (by 1) of stack preening depth in
81 parse_var_DIE vs parse_type_DIE.
82
83 Don't hand to ML_(addVars), vars whose locations are entirely in
84 registers (DW_OP_reg*). This is merely a space-saving
85 optimisation, as ML_(evaluate_Dwarf3_Expr) should handle these
86 expressions correctly, by failing to evaluate them and hence
87 effectively ignoring the variable with which they are associated.
88
89 Deal with DW_TAG_array_types which have element size != stride
90
91 In some cases, the info for a variable is split between two
92 different DIEs (generally a declarer and a definer). We punt on
93 these. Could do better here.
94
95 The 'data_bias' argument passed to the expression evaluator
96 (ML_(evaluate_Dwarf3_Expr)) should really be changed to a
97 MaybeUWord, to make it clear when we do vs don't know what it is
98 for the evaluation of an expression. At the moment zero is passed
99 for this parameter in the don't know case. That's a bit fragile
100 and obscure; using a MaybeUWord would be clearer.
101
102 POTENTIAL PERFORMANCE IMPROVEMENTS:
103
104 Currently, duplicate removal and all other queries for the type
105 entities array is done using cuOffset-based pointing, which
106 involves a binary search (VG_(lookupXA)) for each access. This is
107 wildly inefficient, although simple. It would be better to
108 translate all the cuOffset-based references (iow, all the "R" and
109 "Rs" fields in the TyEnts in 'tyents') to direct index numbers in
110 'tyents' right at the start of dedup_types(), and use direct
111 indexing (VG_(indexXA)) wherever possible after that.
112
113 cmp__XArrays_of_AddrRange is also a performance bottleneck. Move
114 VG_(indexXA) into pub_tool_xarray.h so it can be inlined at all use
115 points, and possibly also make an _UNCHECKED version which skips
116 the range checks in performance-critical situations such as this.
117
118 Handle interaction between read_DIE and parse_{var,type}_DIE
119 better. Currently read_DIE reads the entire DIE just to find where
120 the end is (and for debug printing), so that it can later reliably
121 move the cursor to the end regardless of what parse_{var,type}_DIE
122 do. This means many DIEs (most, even?) are read twice. It would
123 be smarter to make parse_{var,type}_DIE return a Bool indicating
124 whether or not they advanced the DIE cursor, and only if they
125 didn't should read_DIE itself read through the DIE.
126
127 ML_(addVar) and add_var_to_arange: quite a lot of DiAddrRanges have
128 zero variables in their .vars XArray. Rather than have an XArray
129 with zero elements (which uses 2 malloc'd blocks), allow the .vars
130 pointer to be NULL in this case.
131
132 More generally, reduce the amount of memory allocated and freed
133 while reading Dwarf3 type/variable information. Even modest (20MB)
134 objects cause this module to allocate and free hundreds of
135 thousands of small blocks, and ML_(arena_malloc) and its various
136 groupies always show up at the top of performance profiles. */
137
138 #include "pub_core_basics.h"
139 #include "pub_core_debuginfo.h"
140 #include "pub_core_libcbase.h"
141 #include "pub_core_libcassert.h"
142 #include "pub_core_libcprint.h"
143 #include "pub_core_libcsetjmp.h" // setjmp facilities
144 #include "pub_core_hashtable.h"
145 #include "pub_core_options.h"
146 #include "pub_core_tooliface.h" /* VG_(needs) */
147 #include "pub_core_xarray.h"
148 #include "pub_core_wordfm.h"
149 #include "priv_misc.h" /* dinfo_zalloc/free */
150 #include "priv_image.h"
151 #include "priv_tytypes.h"
152 #include "priv_d3basics.h"
153 #include "priv_storage.h"
154 #include "priv_readdwarf3.h" /* self */
155
156
157 /*------------------------------------------------------------*/
158 /*--- ---*/
159 /*--- Basic machinery for parsing DIEs. ---*/
160 /*--- ---*/
161 /*------------------------------------------------------------*/
162
163 #define TRACE_D3(format, args...) \
164 if (UNLIKELY(td3)) { VG_(printf)(format, ## args); }
165 #define TD3 (UNLIKELY(td3))
166
167 #define D3_INVALID_CUOFF ((UWord)(-1UL))
168 #define D3_FAKEVOID_CUOFF ((UWord)(-2UL))
169
170 typedef
171 struct {
172 DiSlice sli; // to which this cursor applies
173 DiOffT sli_next; // offset in underlying DiImage; must be >= sli.ioff
174 void (*barf)( const HChar* ) __attribute__((noreturn));
175 const HChar* barfstr;
176 }
177 Cursor;
178
is_sane_Cursor(const Cursor * c)179 static inline Bool is_sane_Cursor ( const Cursor* c ) {
180 if (!c) return False;
181 if (!c->barf) return False;
182 if (!c->barfstr) return False;
183 if (!ML_(sli_is_valid)(c->sli)) return False;
184 if (c->sli.ioff == DiOffT_INVALID) return False;
185 if (c->sli_next < c->sli.ioff) return False;
186 return True;
187 }
188
189 // Initialise a cursor from a DiSlice (ELF section, really) so as to
190 // start reading at offset |sli_initial_offset| from the start of the
191 // slice.
init_Cursor(Cursor * c,DiSlice sli,ULong sli_initial_offset,void (* barf)(const HChar *),const HChar * barfstr)192 static void init_Cursor ( /*OUT*/Cursor* c,
193 DiSlice sli,
194 ULong sli_initial_offset,
195 __attribute__((noreturn)) void (*barf)(const HChar*),
196 const HChar* barfstr )
197 {
198 vg_assert(c);
199 VG_(bzero_inline)(c, sizeof(*c));
200 c->sli = sli;
201 c->sli_next = c->sli.ioff + sli_initial_offset;
202 c->barf = barf;
203 c->barfstr = barfstr;
204 vg_assert(is_sane_Cursor(c));
205 }
206
is_at_end_Cursor(const Cursor * c)207 static Bool is_at_end_Cursor ( const Cursor* c ) {
208 vg_assert(is_sane_Cursor(c));
209 return c->sli_next >= c->sli.ioff + c->sli.szB;
210 }
211
get_position_of_Cursor(const Cursor * c)212 static inline ULong get_position_of_Cursor ( const Cursor* c ) {
213 vg_assert(is_sane_Cursor(c));
214 return c->sli_next - c->sli.ioff;
215 }
set_position_of_Cursor(Cursor * c,ULong pos)216 static inline void set_position_of_Cursor ( Cursor* c, ULong pos ) {
217 c->sli_next = c->sli.ioff + pos;
218 vg_assert(is_sane_Cursor(c));
219 }
advance_position_of_Cursor(Cursor * c,ULong delta)220 static inline void advance_position_of_Cursor ( Cursor* c, ULong delta ) {
221 c->sli_next += delta;
222 vg_assert(is_sane_Cursor(c));
223 }
224
get_remaining_length_Cursor(const Cursor * c)225 static /*signed*/Long get_remaining_length_Cursor ( const Cursor* c ) {
226 vg_assert(is_sane_Cursor(c));
227 return c->sli.ioff + c->sli.szB - c->sli_next;
228 }
229
230 //static void* get_address_of_Cursor ( Cursor* c ) {
231 // vg_assert(is_sane_Cursor(c));
232 // return &c->region_start_img[ c->region_next ];
233 //}
234
get_DiCursor_from_Cursor(const Cursor * c)235 static DiCursor get_DiCursor_from_Cursor ( const Cursor* c ) {
236 return mk_DiCursor(c->sli.img, c->sli_next);
237 }
238
239 /* FIXME: document assumptions on endianness for
240 get_UShort/UInt/ULong. */
get_UChar(Cursor * c)241 static inline UChar get_UChar ( Cursor* c ) {
242 UChar r;
243 vg_assert(is_sane_Cursor(c));
244 if (c->sli_next + sizeof(UChar) > c->sli.ioff + c->sli.szB) {
245 c->barf(c->barfstr);
246 /*NOTREACHED*/
247 vg_assert(0);
248 }
249 r = ML_(img_get_UChar)(c->sli.img, c->sli_next);
250 c->sli_next += sizeof(UChar);
251 return r;
252 }
get_UShort(Cursor * c)253 static UShort get_UShort ( Cursor* c ) {
254 UShort r;
255 vg_assert(is_sane_Cursor(c));
256 if (c->sli_next + sizeof(UShort) > c->sli.ioff + c->sli.szB) {
257 c->barf(c->barfstr);
258 /*NOTREACHED*/
259 vg_assert(0);
260 }
261 r = ML_(img_get_UShort)(c->sli.img, c->sli_next);
262 c->sli_next += sizeof(UShort);
263 return r;
264 }
get_UInt(Cursor * c)265 static UInt get_UInt ( Cursor* c ) {
266 UInt r;
267 vg_assert(is_sane_Cursor(c));
268 if (c->sli_next + sizeof(UInt) > c->sli.ioff + c->sli.szB) {
269 c->barf(c->barfstr);
270 /*NOTREACHED*/
271 vg_assert(0);
272 }
273 r = ML_(img_get_UInt)(c->sli.img, c->sli_next);
274 c->sli_next += sizeof(UInt);
275 return r;
276 }
get_ULong(Cursor * c)277 static ULong get_ULong ( Cursor* c ) {
278 ULong r;
279 vg_assert(is_sane_Cursor(c));
280 if (c->sli_next + sizeof(ULong) > c->sli.ioff + c->sli.szB) {
281 c->barf(c->barfstr);
282 /*NOTREACHED*/
283 vg_assert(0);
284 }
285 r = ML_(img_get_ULong)(c->sli.img, c->sli_next);
286 c->sli_next += sizeof(ULong);
287 return r;
288 }
get_ULEB128(Cursor * c)289 static ULong get_ULEB128 ( Cursor* c ) {
290 ULong result;
291 Int shift;
292 UChar byte;
293 /* unroll first iteration */
294 byte = get_UChar( c );
295 result = (ULong)(byte & 0x7f);
296 if (LIKELY(!(byte & 0x80))) return result;
297 shift = 7;
298 /* end unroll first iteration */
299 do {
300 byte = get_UChar( c );
301 result |= ((ULong)(byte & 0x7f)) << shift;
302 shift += 7;
303 } while (byte & 0x80);
304 return result;
305 }
get_SLEB128(Cursor * c)306 static Long get_SLEB128 ( Cursor* c ) {
307 ULong result = 0;
308 Int shift = 0;
309 UChar byte;
310 do {
311 byte = get_UChar(c);
312 result |= ((ULong)(byte & 0x7f)) << shift;
313 shift += 7;
314 } while (byte & 0x80);
315 if (shift < 64 && (byte & 0x40))
316 result |= -(1ULL << shift);
317 return result;
318 }
319
320 /* Assume 'c' points to the start of a string. Return a DiCursor of
321 whatever it points at, and advance it past the terminating zero.
322 This makes it safe for the caller to then copy the string with
323 ML_(addStr), since (w.r.t. image overruns) the process of advancing
324 past the terminating zero will already have "vetted" the string. */
get_AsciiZ(Cursor * c)325 static DiCursor get_AsciiZ ( Cursor* c ) {
326 UChar uc;
327 DiCursor res = get_DiCursor_from_Cursor(c);
328 do { uc = get_UChar(c); } while (uc != 0);
329 return res;
330 }
331
peek_ULEB128(Cursor * c)332 static ULong peek_ULEB128 ( Cursor* c ) {
333 DiOffT here = c->sli_next;
334 ULong r = get_ULEB128( c );
335 c->sli_next = here;
336 return r;
337 }
peek_UChar(Cursor * c)338 static UChar peek_UChar ( Cursor* c ) {
339 DiOffT here = c->sli_next;
340 UChar r = get_UChar( c );
341 c->sli_next = here;
342 return r;
343 }
344
get_Dwarfish_UWord(Cursor * c,Bool is_dw64)345 static ULong get_Dwarfish_UWord ( Cursor* c, Bool is_dw64 ) {
346 return is_dw64 ? get_ULong(c) : (ULong) get_UInt(c);
347 }
348
get_UWord(Cursor * c)349 static UWord get_UWord ( Cursor* c ) {
350 vg_assert(sizeof(UWord) == sizeof(void*));
351 if (sizeof(UWord) == 4) return get_UInt(c);
352 if (sizeof(UWord) == 8) return get_ULong(c);
353 vg_assert(0);
354 }
355
356 /* Read a DWARF3 'Initial Length' field */
get_Initial_Length(Bool * is64,Cursor * c,const HChar * barfMsg)357 static ULong get_Initial_Length ( /*OUT*/Bool* is64,
358 Cursor* c,
359 const HChar* barfMsg )
360 {
361 ULong w64;
362 UInt w32;
363 *is64 = False;
364 w32 = get_UInt( c );
365 if (w32 >= 0xFFFFFFF0 && w32 < 0xFFFFFFFF) {
366 c->barf( barfMsg );
367 }
368 else if (w32 == 0xFFFFFFFF) {
369 *is64 = True;
370 w64 = get_ULong( c );
371 } else {
372 *is64 = False;
373 w64 = (ULong)w32;
374 }
375 return w64;
376 }
377
378
379 /*------------------------------------------------------------*/
380 /*--- ---*/
381 /*--- "CUConst" structure ---*/
382 /*--- ---*/
383 /*------------------------------------------------------------*/
384
385 typedef
386 struct _name_form {
387 ULong at_name; // Dwarf Attribute name
388 ULong at_form; // Dwarf Attribute form
389 UInt skip_szB; // Nr of bytes skippable from here ...
390 UInt next_nf; // ... to reach this attr/form index in the g_abbv.nf
391 } name_form;
392 /* skip_szB and next_nf are used to optimise the skipping of uninteresting DIEs.
393 Each name_form maintains how many (fixed) nr of bytes can be skipped from
394 the beginning of this form till the next attr/form to look at.
395 The next form to look can be:
396 an 'interesting' attr/form to read while skipping a DIE
397 (currently, this is only DW_AT_sibling)
398 or
399 a variable length form which must be read to be skipped.
400 For a variable length form, the skip_szB will be equal to VARSZ_FORM.
401
402 Note: this technique could also be used to speed up the parsing
403 of DIEs : for each parser kind, we could have the nr of bytes
404 to skip to directly reach the interesting form(s) for the parser. */
405
406 typedef
407 struct _g_abbv {
408 struct _g_abbv *next; // read/write by hash table.
409 UWord abbv_code; // key, read by hash table
410 ULong atag;
411 ULong has_children;
412 name_form nf[0];
413 /* Variable-length array of name/form pairs, terminated
414 by a 0/0 pair.
415 The skip_szB/next_nf allows to skip efficiently a DIE
416 described by this g_abbv; */
417 } g_abbv;
418
419 /* Holds information that is constant through the parsing of a
420 Compilation Unit. This is basically plumbed through to
421 everywhere. */
422 typedef
423 struct {
424 /* Call here if anything goes wrong */
425 void (*barf)( const HChar* ) __attribute__((noreturn));
426 /* Is this 64-bit DWARF ? */
427 Bool is_dw64;
428 /* Which DWARF version ? (2, 3 or 4) */
429 UShort version;
430 /* Length of this Compilation Unit, as stated in the
431 .unit_length :: InitialLength field of the CU Header.
432 However, this size (as specified by the D3 spec) does not
433 include the size of the .unit_length field itself, which is
434 either 4 or 12 bytes (32-bit or 64-bit Dwarf3). That value
435 can be obtained through the expression ".is_dw64 ? 12 : 4". */
436 ULong unit_length;
437 /* Offset of start of this unit in .debug_info */
438 UWord cu_start_offset;
439 /* SVMA for this CU. In the D3 spec, is known as the "base
440 address of the compilation unit (last para sec 3.1.1).
441 Needed for (amongst things) interpretation of location-list
442 values. */
443 Addr cu_svma;
444 Bool cu_svma_known;
445
446 /* The debug_abbreviations table to be used for this Unit */
447 //UChar* debug_abbv;
448 /* Upper bound on size thereof (an overestimate, in general) */
449 //UWord debug_abbv_maxszB;
450 /* A bounded area of the image, to be used as the
451 debug_abbreviations table tobe used for this Unit. */
452 DiSlice debug_abbv;
453
454 /* Image information for various sections. */
455 DiSlice escn_debug_str;
456 DiSlice escn_debug_ranges;
457 DiSlice escn_debug_loc;
458 DiSlice escn_debug_line;
459 DiSlice escn_debug_info;
460 DiSlice escn_debug_types;
461 DiSlice escn_debug_info_alt;
462 DiSlice escn_debug_str_alt;
463 /* How much to add to .debug_types resp. alternate .debug_info offsets
464 in cook_die*. */
465 UWord types_cuOff_bias;
466 UWord alt_cuOff_bias;
467 /* --- Needed so we can add stuff to the string table. --- */
468 struct _DebugInfo* di;
469 /* --- a hash table of g_abbv (i.e. parsed abbreviations) --- */
470 VgHashTable *ht_abbvs;
471
472 /* True if this came from .debug_types; otherwise it came from
473 .debug_info. */
474 Bool is_type_unit;
475 /* For a unit coming from .debug_types, these hold the TU's type
476 signature and the uncooked DIE offset of the TU's signatured
477 type. For a unit coming from .debug_info, these are unused. */
478 ULong type_signature;
479 ULong type_offset;
480
481 /* Signatured type hash; computed once and then shared by all
482 CUs. */
483 VgHashTable *signature_types;
484
485 /* True if this came from alternate .debug_info; otherwise
486 it came from normal .debug_info or .debug_types. */
487 Bool is_alt_info;
488 }
489 CUConst;
490
491
492 /* Return the cooked value of DIE depending on whether CC represents a
493 .debug_types unit. To cook a DIE, we pretend that the .debug_info,
494 .debug_types and optional alternate .debug_info sections form
495 a contiguous whole, so that DIEs coming from .debug_types are numbered
496 starting at the end of .debug_info and DIEs coming from alternate
497 .debug_info are numbered starting at the end of .debug_types. */
cook_die(const CUConst * cc,UWord die)498 static UWord cook_die( const CUConst* cc, UWord die )
499 {
500 if (cc->is_type_unit)
501 die += cc->types_cuOff_bias;
502 else if (cc->is_alt_info)
503 die += cc->alt_cuOff_bias;
504 return die;
505 }
506
507 /* Like cook_die, but understand that DIEs coming from a
508 DW_FORM_ref_sig8 reference are already cooked. Also, handle
509 DW_FORM_GNU_ref_alt from within primary .debug_info or .debug_types
510 as reference to alternate .debug_info. */
cook_die_using_form(const CUConst * cc,UWord die,DW_FORM form)511 static UWord cook_die_using_form( const CUConst *cc, UWord die, DW_FORM form)
512 {
513 if (form == DW_FORM_ref_sig8)
514 return die;
515 if (form == DW_FORM_GNU_ref_alt)
516 return die + cc->alt_cuOff_bias;
517 return cook_die( cc, die );
518 }
519
520 /* Return the uncooked offset of DIE and set *TYPE_FLAG to true if the DIE
521 came from the .debug_types section and *ALT_FLAG to true if the DIE
522 came from alternate .debug_info section. */
uncook_die(const CUConst * cc,UWord die,Bool * type_flag,Bool * alt_flag)523 static UWord uncook_die( const CUConst *cc, UWord die, /*OUT*/Bool *type_flag,
524 Bool *alt_flag )
525 {
526 *alt_flag = False;
527 *type_flag = False;
528 /* The use of escn_debug_{info,types}.szB seems safe to me even if
529 escn_debug_{info,types} are DiSlice_INVALID (meaning the
530 sections were not found), because DiSlice_INVALID.szB is always
531 zero. That said, it seems unlikely we'd ever get here if
532 .debug_info or .debug_types were missing. */
533 if (die >= cc->escn_debug_info.szB) {
534 if (die >= cc->escn_debug_info.szB + cc->escn_debug_types.szB) {
535 *alt_flag = True;
536 die -= cc->escn_debug_info.szB + cc->escn_debug_types.szB;
537 } else {
538 *type_flag = True;
539 die -= cc->escn_debug_info.szB;
540 }
541 }
542 return die;
543 }
544
545 /*------------------------------------------------------------*/
546 /*--- ---*/
547 /*--- Helper functions for Guarded Expressions ---*/
548 /*--- ---*/
549 /*------------------------------------------------------------*/
550
551 /* Parse the location list starting at img-offset 'debug_loc_offset'
552 in .debug_loc. Results are biased with 'svma_of_referencing_CU'
553 and so I believe are correct SVMAs for the object as a whole. This
554 function allocates the UChar*, and the caller must deallocate it.
555 The resulting block is in so-called Guarded-Expression format.
556
557 Guarded-Expression format is similar but not identical to the DWARF3
558 location-list format. The format of each returned block is:
559
560 UChar biasMe;
561 UChar isEnd;
562 followed by zero or more of
563
564 (Addr aMin; Addr aMax; UShort nbytes; ..bytes..; UChar isEnd)
565
566 '..bytes..' is an standard DWARF3 location expression which is
567 valid when aMin <= pc <= aMax (possibly after suitable biasing).
568
569 The number of bytes in '..bytes..' is nbytes.
570
571 The end of the sequence is marked by an isEnd == 1 value. All
572 previous isEnd values must be zero.
573
574 biasMe is 1 if the aMin/aMax fields need this DebugInfo's
575 text_bias added before use, and 0 if the GX is this is not
576 necessary (is ready to go).
577
578 Hence the block can be quickly parsed and is self-describing. Note
579 that aMax is 1 less than the corresponding value in a DWARF3
580 location list. Zero length ranges, with aMax == aMin-1, are not
581 allowed.
582 */
583 /* 2008-sept-12: moved ML_(pp_GX) from here to d3basics.c, where
584 it more logically belongs. */
585
586
587 /* Apply a text bias to a GX. */
bias_GX(GExpr * gx,const DebugInfo * di)588 static void bias_GX ( /*MOD*/GExpr* gx, const DebugInfo* di )
589 {
590 UShort nbytes;
591 UChar* p = &gx->payload[0];
592 UChar* pA;
593 UChar uc;
594 uc = *p++; /*biasMe*/
595 if (uc == 0)
596 return;
597 vg_assert(uc == 1);
598 p[-1] = 0; /* mark it as done */
599 while (True) {
600 uc = *p++;
601 if (uc == 1)
602 break; /*isEnd*/
603 vg_assert(uc == 0);
604 /* t-bias aMin */
605 pA = (UChar*)p;
606 ML_(write_Addr)(pA, ML_(read_Addr)(pA) + di->text_debug_bias);
607 p += sizeof(Addr);
608 /* t-bias aMax */
609 pA = (UChar*)p;
610 ML_(write_Addr)(pA, ML_(read_Addr)(pA) + di->text_debug_bias);
611 p += sizeof(Addr);
612 /* nbytes, and actual expression */
613 nbytes = ML_(read_UShort)(p); p += sizeof(UShort);
614 p += nbytes;
615 }
616 }
617
618 __attribute__((noinline))
make_singleton_GX(DiCursor block,ULong nbytes)619 static GExpr* make_singleton_GX ( DiCursor block, ULong nbytes )
620 {
621 SizeT bytesReqd;
622 GExpr* gx;
623 UChar *p, *pstart;
624
625 vg_assert(sizeof(UWord) == sizeof(Addr));
626 vg_assert(nbytes <= 0xFFFF); /* else we overflow the nbytes field */
627 bytesReqd
628 = sizeof(UChar) /*biasMe*/ + sizeof(UChar) /*!isEnd*/
629 + sizeof(UWord) /*aMin*/ + sizeof(UWord) /*aMax*/
630 + sizeof(UShort) /*nbytes*/ + (SizeT)nbytes
631 + sizeof(UChar); /*isEnd*/
632
633 gx = ML_(dinfo_zalloc)( "di.readdwarf3.msGX.1",
634 sizeof(GExpr) + bytesReqd );
635
636 p = pstart = &gx->payload[0];
637
638 p = ML_(write_UChar)(p, 0); /*biasMe*/
639 p = ML_(write_UChar)(p, 0); /*!isEnd*/
640 p = ML_(write_Addr)(p, 0); /*aMin*/
641 p = ML_(write_Addr)(p, ~0); /*aMax*/
642 p = ML_(write_UShort)(p, nbytes); /*nbytes*/
643 ML_(cur_read_get)(p, block, nbytes); p += nbytes;
644 p = ML_(write_UChar)(p, 1); /*isEnd*/
645
646 vg_assert( (SizeT)(p - pstart) == bytesReqd);
647 vg_assert( &gx->payload[bytesReqd]
648 == ((UChar*)gx) + sizeof(GExpr) + bytesReqd );
649
650 return gx;
651 }
652
653 __attribute__((noinline))
make_general_GX(const CUConst * cc,Bool td3,ULong debug_loc_offset,Addr svma_of_referencing_CU)654 static GExpr* make_general_GX ( const CUConst* cc,
655 Bool td3,
656 ULong debug_loc_offset,
657 Addr svma_of_referencing_CU )
658 {
659 Addr base;
660 Cursor loc;
661 XArray* xa; /* XArray of UChar */
662 GExpr* gx;
663 Word nbytes;
664
665 vg_assert(sizeof(UWord) == sizeof(Addr));
666 if (!ML_(sli_is_valid)(cc->escn_debug_loc) || cc->escn_debug_loc.szB == 0)
667 cc->barf("make_general_GX: .debug_loc is empty/missing");
668
669 init_Cursor( &loc, cc->escn_debug_loc, 0, cc->barf,
670 "Overrun whilst reading .debug_loc section(2)" );
671 set_position_of_Cursor( &loc, debug_loc_offset );
672
673 TRACE_D3("make_general_GX (.debug_loc_offset = %llu, ioff = %llu) {\n",
674 debug_loc_offset, get_DiCursor_from_Cursor(&loc).ioff );
675
676 /* Who frees this xa? It is freed before this fn exits. */
677 xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.mgGX.1",
678 ML_(dinfo_free),
679 sizeof(UChar) );
680
681 { UChar c = 1; /*biasMe*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
682
683 base = 0;
684 while (True) {
685 Bool acquire;
686 UWord len;
687 /* Read a (host-)word pair. This is something of a hack since
688 the word size to read is really dictated by the ELF file;
689 however, we assume we're reading a file with the same
690 word-sizeness as the host. Reasonably enough. */
691 UWord w1 = get_UWord( &loc );
692 UWord w2 = get_UWord( &loc );
693
694 TRACE_D3(" %08lx %08lx\n", w1, w2);
695 if (w1 == 0 && w2 == 0)
696 break; /* end of list */
697
698 if (w1 == -1UL) {
699 /* new value for 'base' */
700 base = w2;
701 continue;
702 }
703
704 /* else a location expression follows */
705 /* else enumerate [w1+base, w2+base) */
706 /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
707 (sec 2.17.2) */
708 if (w1 > w2) {
709 TRACE_D3("negative range is for .debug_loc expr at "
710 "file offset %llu\n",
711 debug_loc_offset);
712 cc->barf( "negative range in .debug_loc section" );
713 }
714
715 /* ignore zero length ranges */
716 acquire = w1 < w2;
717 len = (UWord)get_UShort( &loc );
718
719 if (acquire) {
720 UWord w;
721 UShort s;
722 UChar c;
723 c = 0; /* !isEnd*/
724 VG_(addBytesToXA)( xa, &c, sizeof(c) );
725 w = w1 + base + svma_of_referencing_CU;
726 VG_(addBytesToXA)( xa, &w, sizeof(w) );
727 w = w2 -1 + base + svma_of_referencing_CU;
728 VG_(addBytesToXA)( xa, &w, sizeof(w) );
729 s = (UShort)len;
730 VG_(addBytesToXA)( xa, &s, sizeof(s) );
731 }
732
733 while (len > 0) {
734 UChar byte = get_UChar( &loc );
735 TRACE_D3("%02x", (UInt)byte);
736 if (acquire)
737 VG_(addBytesToXA)( xa, &byte, 1 );
738 len--;
739 }
740 TRACE_D3("\n");
741 }
742
743 { UChar c = 1; /*isEnd*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
744
745 nbytes = VG_(sizeXA)( xa );
746 vg_assert(nbytes >= 1);
747
748 gx = ML_(dinfo_zalloc)( "di.readdwarf3.mgGX.2", sizeof(GExpr) + nbytes );
749 VG_(memcpy)( &gx->payload[0], (UChar*)VG_(indexXA)(xa,0), nbytes );
750 vg_assert( &gx->payload[nbytes]
751 == ((UChar*)gx) + sizeof(GExpr) + nbytes );
752
753 VG_(deleteXA)( xa );
754
755 TRACE_D3("}\n");
756
757 return gx;
758 }
759
760
761 /*------------------------------------------------------------*/
762 /*--- ---*/
763 /*--- Helper functions for range lists and CU headers ---*/
764 /*--- ---*/
765 /*------------------------------------------------------------*/
766
767 /* Denotes an address range. Both aMin and aMax are included in the
768 range; hence a complete range is (0, ~0) and an empty range is any
769 (X, X-1) for X > 0.*/
770 typedef
771 struct { Addr aMin; Addr aMax; }
772 AddrRange;
773
774
775 /* Generate an arbitrary structural total ordering on
776 XArray* of AddrRange. */
cmp__XArrays_of_AddrRange(const XArray * rngs1,const XArray * rngs2)777 static Word cmp__XArrays_of_AddrRange ( const XArray* rngs1,
778 const XArray* rngs2 )
779 {
780 Word n1, n2, i;
781 vg_assert(rngs1 && rngs2);
782 n1 = VG_(sizeXA)( rngs1 );
783 n2 = VG_(sizeXA)( rngs2 );
784 if (n1 < n2) return -1;
785 if (n1 > n2) return 1;
786 for (i = 0; i < n1; i++) {
787 AddrRange* rng1 = (AddrRange*)VG_(indexXA)( rngs1, i );
788 AddrRange* rng2 = (AddrRange*)VG_(indexXA)( rngs2, i );
789 if (rng1->aMin < rng2->aMin) return -1;
790 if (rng1->aMin > rng2->aMin) return 1;
791 if (rng1->aMax < rng2->aMax) return -1;
792 if (rng1->aMax > rng2->aMax) return 1;
793 }
794 return 0;
795 }
796
797
798 __attribute__((noinline))
empty_range_list(void)799 static XArray* /* of AddrRange */ empty_range_list ( void )
800 {
801 XArray* xa; /* XArray of AddrRange */
802 /* Who frees this xa? varstack_preen() does. */
803 xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.erl.1",
804 ML_(dinfo_free),
805 sizeof(AddrRange) );
806 return xa;
807 }
808
809
810 __attribute__((noinline))
unitary_range_list(Addr aMin,Addr aMax)811 static XArray* unitary_range_list ( Addr aMin, Addr aMax )
812 {
813 XArray* xa;
814 AddrRange pair;
815 vg_assert(aMin <= aMax);
816 /* Who frees this xa? varstack_preen() does. */
817 xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.url.1",
818 ML_(dinfo_free),
819 sizeof(AddrRange) );
820 pair.aMin = aMin;
821 pair.aMax = aMax;
822 VG_(addToXA)( xa, &pair );
823 return xa;
824 }
825
826
827 /* Enumerate the address ranges starting at img-offset
828 'debug_ranges_offset' in .debug_ranges. Results are biased with
829 'svma_of_referencing_CU' and so I believe are correct SVMAs for the
830 object as a whole. This function allocates the XArray, and the
831 caller must deallocate it. */
832 __attribute__((noinline))
833 static XArray* /* of AddrRange */
get_range_list(const CUConst * cc,Bool td3,UWord debug_ranges_offset,Addr svma_of_referencing_CU)834 get_range_list ( const CUConst* cc,
835 Bool td3,
836 UWord debug_ranges_offset,
837 Addr svma_of_referencing_CU )
838 {
839 Addr base;
840 Cursor ranges;
841 XArray* xa; /* XArray of AddrRange */
842 AddrRange pair;
843
844 if (!ML_(sli_is_valid)(cc->escn_debug_ranges)
845 || cc->escn_debug_ranges.szB == 0)
846 cc->barf("get_range_list: .debug_ranges is empty/missing");
847
848 init_Cursor( &ranges, cc->escn_debug_ranges, 0, cc->barf,
849 "Overrun whilst reading .debug_ranges section(2)" );
850 set_position_of_Cursor( &ranges, debug_ranges_offset );
851
852 /* Who frees this xa? varstack_preen() does. */
853 xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.grl.1", ML_(dinfo_free),
854 sizeof(AddrRange) );
855 base = 0;
856 while (True) {
857 /* Read a (host-)word pair. This is something of a hack since
858 the word size to read is really dictated by the ELF file;
859 however, we assume we're reading a file with the same
860 word-sizeness as the host. Reasonably enough. */
861 UWord w1 = get_UWord( &ranges );
862 UWord w2 = get_UWord( &ranges );
863
864 if (w1 == 0 && w2 == 0)
865 break; /* end of list. */
866
867 if (w1 == -1UL) {
868 /* new value for 'base' */
869 base = w2;
870 continue;
871 }
872
873 /* else enumerate [w1+base, w2+base) */
874 /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
875 (sec 2.17.2) */
876 if (w1 > w2)
877 cc->barf( "negative range in .debug_ranges section" );
878 if (w1 < w2) {
879 pair.aMin = w1 + base + svma_of_referencing_CU;
880 pair.aMax = w2 - 1 + base + svma_of_referencing_CU;
881 vg_assert(pair.aMin <= pair.aMax);
882 VG_(addToXA)( xa, &pair );
883 }
884 }
885 return xa;
886 }
887
888 #define VARSZ_FORM 0xffffffff
889 static UInt get_Form_szB (const CUConst* cc, DW_FORM form );
890
891 /* Initialises the hash table of abbreviations.
892 We do a single scan of the abbv slice to parse and
893 build all abbreviations, for the following reasons:
894 * all or most abbreviations will be needed in any case
895 (at least for var-info reading).
896 * re-reading each time an abbreviation causes a lot of calls
897 to get_ULEB128.
898 * a CU should not have many abbreviations. */
init_ht_abbvs(CUConst * cc,Bool td3)899 static void init_ht_abbvs (CUConst* cc,
900 Bool td3)
901 {
902 Cursor c;
903 g_abbv *ta; // temporary abbreviation, reallocated if needed.
904 UInt ta_nf_maxE; // max nr of pairs in ta.nf[], doubled when reallocated.
905 UInt ta_nf_n; // nr of pairs in ta->nf that are initialised.
906 g_abbv *ht_ta; // abbv to insert in hash table.
907 Int i;
908
909 #define SZ_G_ABBV(_nf_szE) (sizeof(g_abbv) + _nf_szE * sizeof(name_form))
910
911 ta_nf_maxE = 10; // starting with enough for 9 pairs+terminating pair.
912 ta = ML_(dinfo_zalloc) ("di.readdwarf3.ht_ta_nf", SZ_G_ABBV(ta_nf_maxE));
913 cc->ht_abbvs = VG_(HT_construct) ("di.readdwarf3.ht_abbvs");
914
915 init_Cursor( &c, cc->debug_abbv, 0, cc->barf,
916 "Overrun whilst parsing .debug_abbrev section(2)" );
917 while (True) {
918 ta->abbv_code = get_ULEB128( &c );
919 if (ta->abbv_code == 0) break; /* end of the table */
920
921 ta->atag = get_ULEB128( &c );
922 ta->has_children = get_UChar( &c );
923 ta_nf_n = 0;
924 while (True) {
925 if (ta_nf_n >= ta_nf_maxE) {
926 g_abbv *old_ta = ta;
927 ta = ML_(dinfo_zalloc) ("di.readdwarf3.ht_ta_nf",
928 SZ_G_ABBV(2 * ta_nf_maxE));
929 ta_nf_maxE = 2 * ta_nf_maxE;
930 VG_(memcpy) (ta, old_ta, SZ_G_ABBV(ta_nf_n));
931 ML_(dinfo_free) (old_ta);
932 }
933 ta->nf[ta_nf_n].at_name = get_ULEB128( &c );
934 ta->nf[ta_nf_n].at_form = get_ULEB128( &c );
935 if (ta->nf[ta_nf_n].at_name == 0 && ta->nf[ta_nf_n].at_form == 0) {
936 ta_nf_n++;
937 break;
938 }
939 ta_nf_n++;
940 }
941
942 // Initialises the skip_szB/next_nf elements : an element at position
943 // i must contain the sum of its own size + the sizes of all elements
944 // following i till either the next variable size element, the next
945 // sibling element or the end of the DIE.
946 ta->nf[ta_nf_n - 1].skip_szB = 0;
947 ta->nf[ta_nf_n - 1].next_nf = 0;
948 for (i = ta_nf_n - 2; i >= 0; i--) {
949 const UInt form_szB = get_Form_szB (cc, (DW_FORM)ta->nf[i].at_form);
950
951 if (ta->nf[i+1].at_name == DW_AT_sibling
952 || ta->nf[i+1].skip_szB == VARSZ_FORM) {
953 ta->nf[i].skip_szB = form_szB;
954 ta->nf[i].next_nf = i+1;
955 } else if (form_szB == VARSZ_FORM) {
956 ta->nf[i].skip_szB = form_szB;
957 ta->nf[i].next_nf = i+1;
958 } else {
959 ta->nf[i].skip_szB = ta->nf[i+1].skip_szB + form_szB;
960 ta->nf[i].next_nf = ta->nf[i+1].next_nf;
961 }
962 }
963
964 ht_ta = ML_(dinfo_zalloc) ("di.readdwarf3.ht_ta", SZ_G_ABBV(ta_nf_n));
965 VG_(memcpy) (ht_ta, ta, SZ_G_ABBV(ta_nf_n));
966 VG_(HT_add_node) ( cc->ht_abbvs, ht_ta );
967 if (TD3) {
968 TRACE_D3(" Adding abbv_code %lu TAG %s [%s] nf %u ",
969 ht_ta->abbv_code, ML_(pp_DW_TAG)(ht_ta->atag),
970 ML_(pp_DW_children)(ht_ta->has_children),
971 ta_nf_n);
972 TRACE_D3(" ");
973 for (i = 0; i < ta_nf_n; i++)
974 TRACE_D3("[%u,%u] ", ta->nf[i].skip_szB, ta->nf[i].next_nf);
975 TRACE_D3("\n");
976 }
977 }
978
979 ML_(dinfo_free) (ta);
980 #undef SZ_G_ABBV
981 }
982
get_abbv(const CUConst * cc,ULong abbv_code)983 static g_abbv* get_abbv (const CUConst* cc, ULong abbv_code)
984 {
985 g_abbv *abbv;
986
987 abbv = VG_(HT_lookup) (cc->ht_abbvs, abbv_code);
988 if (!abbv)
989 cc->barf ("abbv_code not found in ht_abbvs table");
990 return abbv;
991 }
992
993 /* Free the memory allocated in CUConst. */
clear_CUConst(CUConst * cc)994 static void clear_CUConst (CUConst* cc)
995 {
996 VG_(HT_destruct) ( cc->ht_abbvs, ML_(dinfo_free));
997 cc->ht_abbvs = NULL;
998 }
999
1000 /* Parse the Compilation Unit header indicated at 'c' and
1001 initialise 'cc' accordingly. */
1002 static __attribute__((noinline))
parse_CU_Header(CUConst * cc,Bool td3,Cursor * c,DiSlice escn_debug_abbv,Bool type_unit,Bool alt_info)1003 void parse_CU_Header ( /*OUT*/CUConst* cc,
1004 Bool td3,
1005 Cursor* c,
1006 DiSlice escn_debug_abbv,
1007 Bool type_unit,
1008 Bool alt_info )
1009 {
1010 UChar address_size;
1011 ULong debug_abbrev_offset;
1012
1013 VG_(memset)(cc, 0, sizeof(*cc));
1014 vg_assert(c && c->barf);
1015 cc->barf = c->barf;
1016
1017 /* initial_length field */
1018 cc->unit_length
1019 = get_Initial_Length( &cc->is_dw64, c,
1020 "parse_CU_Header: invalid initial-length field" );
1021
1022 TRACE_D3(" Length: %llu\n", cc->unit_length );
1023
1024 /* version */
1025 cc->version = get_UShort( c );
1026 if (cc->version != 2 && cc->version != 3 && cc->version != 4)
1027 cc->barf( "parse_CU_Header: is neither DWARF2 nor DWARF3 nor DWARF4" );
1028 TRACE_D3(" Version: %d\n", (Int)cc->version );
1029
1030 /* debug_abbrev_offset */
1031 debug_abbrev_offset = get_Dwarfish_UWord( c, cc->is_dw64 );
1032 if (debug_abbrev_offset >= escn_debug_abbv.szB)
1033 cc->barf( "parse_CU_Header: invalid debug_abbrev_offset" );
1034 TRACE_D3(" Abbrev Offset: %llu\n", debug_abbrev_offset );
1035
1036 /* address size. If this isn't equal to the host word size, just
1037 give up. This makes it safe to assume elsewhere that
1038 DW_FORM_addr and DW_FORM_ref_addr can be treated as a host
1039 word. */
1040 address_size = get_UChar( c );
1041 if (address_size != sizeof(void*))
1042 cc->barf( "parse_CU_Header: invalid address_size" );
1043 TRACE_D3(" Pointer Size: %d\n", (Int)address_size );
1044
1045 cc->is_type_unit = type_unit;
1046 cc->is_alt_info = alt_info;
1047
1048 if (type_unit) {
1049 cc->type_signature = get_ULong( c );
1050 cc->type_offset = get_Dwarfish_UWord( c, cc->is_dw64 );
1051 }
1052
1053 /* Set up cc->debug_abbv to point to the relevant table for this
1054 CU. Set its .szB so that at least we can't read off the end of
1055 the debug_abbrev section -- potentially (and quite likely) too
1056 big, if this isn't the last table in the section, but at least
1057 it's safe.
1058
1059 This amounts to taking debug_abbv_escn and moving the start
1060 position along by debug_abbrev_offset bytes, hence forming a
1061 smaller DiSlice which has the same end point. Since we checked
1062 just above that debug_abbrev_offset is less than the size of
1063 debug_abbv_escn, this should leave us with a nonempty slice. */
1064 vg_assert(debug_abbrev_offset < escn_debug_abbv.szB);
1065 cc->debug_abbv = escn_debug_abbv;
1066 cc->debug_abbv.ioff += debug_abbrev_offset;
1067 cc->debug_abbv.szB -= debug_abbrev_offset;
1068
1069 init_ht_abbvs(cc, td3);
1070 }
1071
1072 /* This represents a single signatured type. It maps a type signature
1073 (a ULong) to a cooked DIE offset. Objects of this type are stored
1074 in the type signature hash table. */
1075 typedef
1076 struct D3SignatureType {
1077 struct D3SignatureType *next;
1078 UWord data;
1079 ULong type_signature;
1080 UWord die;
1081 }
1082 D3SignatureType;
1083
1084 /* Record a signatured type in the hash table. */
record_signatured_type(VgHashTable * tab,ULong type_signature,UWord die)1085 static void record_signatured_type ( VgHashTable *tab,
1086 ULong type_signature,
1087 UWord die )
1088 {
1089 D3SignatureType *dstype = ML_(dinfo_zalloc) ( "di.readdwarf3.sigtype",
1090 sizeof(D3SignatureType) );
1091 dstype->data = (UWord) type_signature;
1092 dstype->type_signature = type_signature;
1093 dstype->die = die;
1094 VG_(HT_add_node) ( tab, dstype );
1095 }
1096
1097 /* Given a type signature hash table and a type signature, return the
1098 cooked DIE offset of the type. If the type cannot be found, call
1099 BARF. */
lookup_signatured_type(const VgHashTable * tab,ULong type_signature,void (* barf)(const HChar *))1100 static UWord lookup_signatured_type ( const VgHashTable *tab,
1101 ULong type_signature,
1102 void (*barf)( const HChar* ) __attribute__((noreturn)) )
1103 {
1104 D3SignatureType *dstype = VG_(HT_lookup) ( tab, (UWord) type_signature );
1105 /* This may be unwarranted chumminess with the hash table
1106 implementation. */
1107 while ( dstype != NULL && dstype->type_signature != type_signature)
1108 dstype = dstype->next;
1109 if (dstype == NULL) {
1110 barf("lookup_signatured_type: could not find signatured type");
1111 /*NOTREACHED*/
1112 vg_assert(0);
1113 }
1114 return dstype->die;
1115 }
1116
1117
1118 /* Represents Form data. If szB is 1/2/4/8 then the result is in the
1119 lowest 1/2/4/8 bytes of u.val. If szB is zero or negative then the
1120 result is an image section beginning at u.cur and with size -szB.
1121 No other szB values are allowed. */
1122 typedef
1123 struct {
1124 Long szB; // 1, 2, 4, 8 or non-positive values only.
1125 union { ULong val; DiCursor cur; } u;
1126 }
1127 FormContents;
1128
1129 /* From 'c', get the Form data into 'cts'. Either it gets a 1/2/4/8
1130 byte scalar value, or (a reference to) zero or more bytes starting
1131 at a DiCursor.*/
1132 static
get_Form_contents(FormContents * cts,const CUConst * cc,Cursor * c,Bool td3,DW_FORM form)1133 void get_Form_contents ( /*OUT*/FormContents* cts,
1134 const CUConst* cc, Cursor* c,
1135 Bool td3, DW_FORM form )
1136 {
1137 VG_(bzero_inline)(cts, sizeof(*cts));
1138 // !!! keep switch in sync with get_Form_szB. The nr of characters read below
1139 // must be computed similarly in get_Form_szB.
1140 // The consistency is verified in trace_DIE.
1141 switch (form) {
1142 case DW_FORM_data1:
1143 cts->u.val = (ULong)(UChar)get_UChar(c);
1144 cts->szB = 1;
1145 TRACE_D3("%u", (UInt)cts->u.val);
1146 break;
1147 case DW_FORM_data2:
1148 cts->u.val = (ULong)(UShort)get_UShort(c);
1149 cts->szB = 2;
1150 TRACE_D3("%u", (UInt)cts->u.val);
1151 break;
1152 case DW_FORM_data4:
1153 cts->u.val = (ULong)(UInt)get_UInt(c);
1154 cts->szB = 4;
1155 TRACE_D3("%u", (UInt)cts->u.val);
1156 break;
1157 case DW_FORM_data8:
1158 cts->u.val = get_ULong(c);
1159 cts->szB = 8;
1160 TRACE_D3("%llu", cts->u.val);
1161 break;
1162 case DW_FORM_sec_offset:
1163 cts->u.val = (ULong)get_Dwarfish_UWord( c, cc->is_dw64 );
1164 cts->szB = cc->is_dw64 ? 8 : 4;
1165 TRACE_D3("%llu", cts->u.val);
1166 break;
1167 case DW_FORM_sdata:
1168 cts->u.val = (ULong)(Long)get_SLEB128(c);
1169 cts->szB = 8;
1170 TRACE_D3("%llu", cts->u.val);
1171 break;
1172 case DW_FORM_udata:
1173 cts->u.val = (ULong)(Long)get_ULEB128(c);
1174 cts->szB = 8;
1175 TRACE_D3("%llu", cts->u.val);
1176 break;
1177 case DW_FORM_addr:
1178 /* note, this is a hack. DW_FORM_addr is defined as getting
1179 a word the size of the target machine as defined by the
1180 address_size field in the CU Header. However,
1181 parse_CU_Header() rejects all inputs except those for
1182 which address_size == sizeof(Word), hence we can just
1183 treat it as a (host) Word. */
1184 cts->u.val = (ULong)(UWord)get_UWord(c);
1185 cts->szB = sizeof(UWord);
1186 TRACE_D3("0x%lx", (UWord)cts->u.val);
1187 break;
1188
1189 case DW_FORM_ref_addr:
1190 /* We make the same word-size assumption as DW_FORM_addr. */
1191 /* What does this really mean? From D3 Sec 7.5.4,
1192 description of "reference", it would appear to reference
1193 some other DIE, by specifying the offset from the
1194 beginning of a .debug_info section. The D3 spec mentions
1195 that this might be in some other shared object and
1196 executable. But I don't see how the name of the other
1197 object/exe is specified.
1198
1199 At least for the DW_FORM_ref_addrs created by icc11, the
1200 references seem to be within the same object/executable.
1201 So for the moment we merely range-check, to see that they
1202 actually do specify a plausible offset within this
1203 object's .debug_info, and return the value unchanged.
1204
1205 In DWARF 2, DW_FORM_ref_addr is address-sized, but in
1206 DWARF 3 and later, it is offset-sized.
1207 */
1208 if (cc->version == 2) {
1209 cts->u.val = (ULong)(UWord)get_UWord(c);
1210 cts->szB = sizeof(UWord);
1211 } else {
1212 cts->u.val = get_Dwarfish_UWord(c, cc->is_dw64);
1213 cts->szB = cc->is_dw64 ? sizeof(ULong) : sizeof(UInt);
1214 }
1215 TRACE_D3("0x%lx", (UWord)cts->u.val);
1216 if (0) VG_(printf)("DW_FORM_ref_addr 0x%lx\n", (UWord)cts->u.val);
1217 if (/* the following is surely impossible, but ... */
1218 !ML_(sli_is_valid)(cc->escn_debug_info)
1219 || cts->u.val >= (ULong)cc->escn_debug_info.szB) {
1220 /* Hmm. Offset is nonsensical for this object's .debug_info
1221 section. Be safe and reject it. */
1222 cc->barf("get_Form_contents: DW_FORM_ref_addr points "
1223 "outside .debug_info");
1224 }
1225 break;
1226
1227 case DW_FORM_strp: {
1228 /* this is an offset into .debug_str */
1229 UWord uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 );
1230 if (!ML_(sli_is_valid)(cc->escn_debug_str)
1231 || uw >= cc->escn_debug_str.szB)
1232 cc->barf("get_Form_contents: DW_FORM_strp "
1233 "points outside .debug_str");
1234 /* FIXME: check the entire string lies inside debug_str,
1235 not just the first byte of it. */
1236 DiCursor str
1237 = ML_(cur_plus)( ML_(cur_from_sli)(cc->escn_debug_str), uw );
1238 if (TD3) {
1239 HChar* tmp = ML_(cur_read_strdup)(str, "di.getFC.1");
1240 TRACE_D3("(indirect string, offset: 0x%lx): %s", uw, tmp);
1241 ML_(dinfo_free)(tmp);
1242 }
1243 cts->u.cur = str;
1244 cts->szB = - (Long)(1 + (ULong)ML_(cur_strlen)(str));
1245 break;
1246 }
1247 case DW_FORM_string: {
1248 DiCursor str = get_AsciiZ(c);
1249 if (TD3) {
1250 HChar* tmp = ML_(cur_read_strdup)(str, "di.getFC.2");
1251 TRACE_D3("%s", tmp);
1252 ML_(dinfo_free)(tmp);
1253 }
1254 cts->u.cur = str;
1255 /* strlen is safe because get_AsciiZ already 'vetted' the
1256 entire string */
1257 cts->szB = - (Long)(1 + (ULong)ML_(cur_strlen)(str));
1258 break;
1259 }
1260 case DW_FORM_ref1: {
1261 UChar u8 = get_UChar(c);
1262 UWord res = cc->cu_start_offset + (UWord)u8;
1263 cts->u.val = (ULong)res;
1264 cts->szB = sizeof(UWord);
1265 TRACE_D3("<%lx>", res);
1266 break;
1267 }
1268 case DW_FORM_ref2: {
1269 UShort u16 = get_UShort(c);
1270 UWord res = cc->cu_start_offset + (UWord)u16;
1271 cts->u.val = (ULong)res;
1272 cts->szB = sizeof(UWord);
1273 TRACE_D3("<%lx>", res);
1274 break;
1275 }
1276 case DW_FORM_ref4: {
1277 UInt u32 = get_UInt(c);
1278 UWord res = cc->cu_start_offset + (UWord)u32;
1279 cts->u.val = (ULong)res;
1280 cts->szB = sizeof(UWord);
1281 TRACE_D3("<%lx>", res);
1282 break;
1283 }
1284 case DW_FORM_ref8: {
1285 ULong u64 = get_ULong(c);
1286 UWord res = cc->cu_start_offset + (UWord)u64;
1287 cts->u.val = (ULong)res;
1288 cts->szB = sizeof(UWord);
1289 TRACE_D3("<%lx>", res);
1290 break;
1291 }
1292 case DW_FORM_ref_udata: {
1293 ULong u64 = get_ULEB128(c);
1294 UWord res = cc->cu_start_offset + (UWord)u64;
1295 cts->u.val = (ULong)res;
1296 cts->szB = sizeof(UWord);
1297 TRACE_D3("<%lx>", res);
1298 break;
1299 }
1300 case DW_FORM_flag: {
1301 UChar u8 = get_UChar(c);
1302 TRACE_D3("%u", (UInt)u8);
1303 cts->u.val = (ULong)u8;
1304 cts->szB = 1;
1305 break;
1306 }
1307 case DW_FORM_flag_present:
1308 TRACE_D3("1");
1309 cts->u.val = 1;
1310 cts->szB = 1;
1311 break;
1312 case DW_FORM_block1: {
1313 ULong u64b;
1314 ULong u64 = (ULong)get_UChar(c);
1315 DiCursor block = get_DiCursor_from_Cursor(c);
1316 TRACE_D3("%llu byte block: ", u64);
1317 for (u64b = u64; u64b > 0; u64b--) {
1318 UChar u8 = get_UChar(c);
1319 TRACE_D3("%x ", (UInt)u8);
1320 }
1321 cts->u.cur = block;
1322 cts->szB = - (Long)u64;
1323 break;
1324 }
1325 case DW_FORM_block2: {
1326 ULong u64b;
1327 ULong u64 = (ULong)get_UShort(c);
1328 DiCursor block = get_DiCursor_from_Cursor(c);
1329 TRACE_D3("%llu byte block: ", u64);
1330 for (u64b = u64; u64b > 0; u64b--) {
1331 UChar u8 = get_UChar(c);
1332 TRACE_D3("%x ", (UInt)u8);
1333 }
1334 cts->u.cur = block;
1335 cts->szB = - (Long)u64;
1336 break;
1337 }
1338 case DW_FORM_block4: {
1339 ULong u64b;
1340 ULong u64 = (ULong)get_UInt(c);
1341 DiCursor block = get_DiCursor_from_Cursor(c);
1342 TRACE_D3("%llu byte block: ", u64);
1343 for (u64b = u64; u64b > 0; u64b--) {
1344 UChar u8 = get_UChar(c);
1345 TRACE_D3("%x ", (UInt)u8);
1346 }
1347 cts->u.cur = block;
1348 cts->szB = - (Long)u64;
1349 break;
1350 }
1351 case DW_FORM_exprloc:
1352 case DW_FORM_block: {
1353 ULong u64b;
1354 ULong u64 = (ULong)get_ULEB128(c);
1355 DiCursor block = get_DiCursor_from_Cursor(c);
1356 TRACE_D3("%llu byte block: ", u64);
1357 for (u64b = u64; u64b > 0; u64b--) {
1358 UChar u8 = get_UChar(c);
1359 TRACE_D3("%x ", (UInt)u8);
1360 }
1361 cts->u.cur = block;
1362 cts->szB = - (Long)u64;
1363 break;
1364 }
1365 case DW_FORM_ref_sig8: {
1366 ULong u64b;
1367 ULong signature = get_ULong (c);
1368 ULong work = signature;
1369 TRACE_D3("8 byte signature: ");
1370 for (u64b = 8; u64b > 0; u64b--) {
1371 UChar u8 = work & 0xff;
1372 TRACE_D3("%x ", (UInt)u8);
1373 work >>= 8;
1374 }
1375
1376 /* cc->signature_types is only built/initialised when
1377 VG_(clo_read_var_info) is set. In this case,
1378 the DW_FORM_ref_sig8 can be looked up.
1379 But we can also arrive here when only reading inline info
1380 and VG_(clo_trace_symtab) is set. In such a case,
1381 we cannot lookup the DW_FORM_ref_sig8, we rather assign
1382 a dummy value. This is a kludge, but otherwise,
1383 the 'dwarf inline info reader' tracing would have to
1384 do type processing/reading. It is better to avoid
1385 adding significant 'real' processing only due to tracing. */
1386 if (VG_(clo_read_var_info)) {
1387 /* Due to the way that the hash table is constructed, the
1388 resulting DIE offset here is already "cooked". See
1389 cook_die_using_form. */
1390 cts->u.val = lookup_signatured_type (cc->signature_types, signature,
1391 c->barf);
1392 } else {
1393 vg_assert (td3);
1394 vg_assert (VG_(clo_read_inline_info));
1395 TRACE_D3("<not dereferencing signature type>");
1396 cts->u.val = 0; /* Assign a dummy/rubbish value */
1397 }
1398 cts->szB = sizeof(UWord);
1399 break;
1400 }
1401 case DW_FORM_indirect:
1402 get_Form_contents (cts, cc, c, td3, (DW_FORM)get_ULEB128(c));
1403 return;
1404
1405 case DW_FORM_GNU_ref_alt:
1406 cts->u.val = get_Dwarfish_UWord(c, cc->is_dw64);
1407 cts->szB = cc->is_dw64 ? sizeof(ULong) : sizeof(UInt);
1408 TRACE_D3("0x%lx", (UWord)cts->u.val);
1409 if (0) VG_(printf)("DW_FORM_GNU_ref_alt 0x%lx\n", (UWord)cts->u.val);
1410 if (/* the following is surely impossible, but ... */
1411 !ML_(sli_is_valid)(cc->escn_debug_info_alt))
1412 cc->barf("get_Form_contents: DW_FORM_GNU_ref_addr used, "
1413 "but no alternate .debug_info");
1414 else if (cts->u.val >= (ULong)cc->escn_debug_info_alt.szB) {
1415 /* Hmm. Offset is nonsensical for this object's .debug_info
1416 section. Be safe and reject it. */
1417 cc->barf("get_Form_contents: DW_FORM_GNU_ref_addr points "
1418 "outside alternate .debug_info");
1419 }
1420 break;
1421
1422 case DW_FORM_GNU_strp_alt: {
1423 /* this is an offset into alternate .debug_str */
1424 SizeT uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 );
1425 if (!ML_(sli_is_valid)(cc->escn_debug_str_alt))
1426 cc->barf("get_Form_contents: DW_FORM_GNU_strp_alt used, "
1427 "but no alternate .debug_str");
1428 else if (uw >= cc->escn_debug_str_alt.szB)
1429 cc->barf("get_Form_contents: DW_FORM_GNU_strp_alt "
1430 "points outside alternate .debug_str");
1431 /* FIXME: check the entire string lies inside debug_str,
1432 not just the first byte of it. */
1433 DiCursor str
1434 = ML_(cur_plus)( ML_(cur_from_sli)(cc->escn_debug_str_alt), uw);
1435 if (TD3) {
1436 HChar* tmp = ML_(cur_read_strdup)(str, "di.getFC.3");
1437 TRACE_D3("(indirect alt string, offset: 0x%lx): %s", uw, tmp);
1438 ML_(dinfo_free)(tmp);
1439 }
1440 cts->u.cur = str;
1441 cts->szB = - (Long)(1 + (ULong)ML_(cur_strlen)(str));
1442 break;
1443 }
1444
1445 default:
1446 VG_(printf)(
1447 "get_Form_contents: unhandled %u (%s) at <%llx>\n",
1448 form, ML_(pp_DW_FORM)(form), get_position_of_Cursor(c));
1449 c->barf("get_Form_contents: unhandled DW_FORM");
1450 }
1451 }
1452
sizeof_Dwarfish_UWord(Bool is_dw64)1453 static inline UInt sizeof_Dwarfish_UWord (Bool is_dw64)
1454 {
1455 if (is_dw64)
1456 return sizeof(ULong);
1457 else
1458 return sizeof(UInt);
1459 }
1460
1461 #define VARSZ_FORM 0xffffffff
1462 /* If the form is a fixed length form, return the nr of bytes for this form.
1463 If the form is a variable length form, return VARSZ_FORM. */
1464 static
get_Form_szB(const CUConst * cc,DW_FORM form)1465 UInt get_Form_szB (const CUConst* cc, DW_FORM form )
1466 {
1467 // !!! keep switch in sync with get_Form_contents : the nr of bytes
1468 // read from a cursor by get_Form_contents must be returned by
1469 // the below switch.
1470 // The consistency is verified in trace_DIE.
1471 switch (form) {
1472 case DW_FORM_data1: return 1;
1473 case DW_FORM_data2: return 2;
1474 case DW_FORM_data4: return 4;
1475 case DW_FORM_data8: return 8;
1476 case DW_FORM_sec_offset:
1477 if (cc->is_dw64)
1478 return 8;
1479 else
1480 return 4;
1481 case DW_FORM_sdata:
1482 return VARSZ_FORM;
1483 case DW_FORM_udata:
1484 return VARSZ_FORM;
1485 case DW_FORM_addr: // See hack in get_Form_contents
1486 return sizeof(UWord);
1487 case DW_FORM_ref_addr: // See hack in get_Form_contents
1488 if (cc->version == 2)
1489 return sizeof(UWord);
1490 else
1491 return sizeof_Dwarfish_UWord (cc->is_dw64);
1492 case DW_FORM_strp:
1493 return sizeof_Dwarfish_UWord (cc->is_dw64);
1494 case DW_FORM_string:
1495 return VARSZ_FORM;
1496 case DW_FORM_ref1:
1497 return 1;
1498 case DW_FORM_ref2:
1499 return 2;
1500 case DW_FORM_ref4:
1501 return 4;
1502 case DW_FORM_ref8:
1503 return 8;
1504 case DW_FORM_ref_udata:
1505 return VARSZ_FORM;
1506 case DW_FORM_flag:
1507 return 1;
1508 case DW_FORM_flag_present:
1509 return 0; // !!! special case, no data.
1510 case DW_FORM_block1:
1511 return VARSZ_FORM;
1512 case DW_FORM_block2:
1513 return VARSZ_FORM;
1514 case DW_FORM_block4:
1515 return VARSZ_FORM;
1516 case DW_FORM_exprloc:
1517 case DW_FORM_block:
1518 return VARSZ_FORM;
1519 case DW_FORM_ref_sig8:
1520 return 8;
1521 case DW_FORM_indirect:
1522 return VARSZ_FORM;
1523 case DW_FORM_GNU_ref_alt:
1524 return sizeof_Dwarfish_UWord(cc->is_dw64);
1525 case DW_FORM_GNU_strp_alt:
1526 return sizeof_Dwarfish_UWord(cc->is_dw64);
1527 default:
1528 VG_(printf)(
1529 "get_Form_szB: unhandled %u (%s)\n",
1530 form, ML_(pp_DW_FORM)(form));
1531 cc->barf("get_Form_contents: unhandled DW_FORM");
1532 }
1533 }
1534
1535 /* Skip a DIE as described by abbv.
1536 If the DIE has a sibling, *sibling is set to the skipped DIE sibling value. */
1537 static
skip_DIE(UWord * sibling,Cursor * c_die,const g_abbv * abbv,const CUConst * cc)1538 void skip_DIE (UWord *sibling,
1539 Cursor* c_die,
1540 const g_abbv *abbv,
1541 const CUConst* cc)
1542 {
1543 UInt nf_i;
1544 FormContents cts;
1545 nf_i = 0;
1546 while (True) {
1547 if (abbv->nf[nf_i].at_name == DW_AT_sibling) {
1548 get_Form_contents( &cts, cc, c_die, False /*td3*/,
1549 (DW_FORM)abbv->nf[nf_i].at_form );
1550 if ( cts.szB > 0 )
1551 *sibling = cts.u.val;
1552 nf_i++;
1553 } else if (abbv->nf[nf_i].skip_szB == VARSZ_FORM) {
1554 get_Form_contents( &cts, cc, c_die, False /*td3*/,
1555 (DW_FORM)abbv->nf[nf_i].at_form );
1556 nf_i++;
1557 } else {
1558 advance_position_of_Cursor (c_die, (ULong)abbv->nf[nf_i].skip_szB);
1559 nf_i = abbv->nf[nf_i].next_nf;
1560 }
1561 if (nf_i == 0)
1562 break;
1563 }
1564 }
1565
1566
1567 /*------------------------------------------------------------*/
1568 /*--- ---*/
1569 /*--- Parsing of variable-related DIEs ---*/
1570 /*--- ---*/
1571 /*------------------------------------------------------------*/
1572
1573 typedef
1574 struct _TempVar {
1575 const HChar* name; /* in DebugInfo's .strpool */
1576 /* Represent ranges economically. nRanges is the number of
1577 ranges. Cases:
1578 0: .rngOneMin .rngOneMax .manyRanges are all zero
1579 1: .rngOneMin .rngOneMax hold the range; .rngMany is NULL
1580 2: .rngOneMin .rngOneMax are zero; .rngMany holds the ranges.
1581 This is merely an optimisation to avoid having to allocate
1582 and free the XArray in the common (98%) of cases where there
1583 is zero or one address ranges. */
1584 UWord nRanges;
1585 Addr rngOneMin;
1586 Addr rngOneMax;
1587 XArray* rngMany; /* of AddrRange. NON-UNIQUE PTR in AR_DINFO. */
1588 /* Do not free .rngMany, since many TempVars will have the same
1589 value. Instead the associated storage is to be freed by
1590 deleting 'rangetree', which stores a single copy of each
1591 range. */
1592 /* --- */
1593 Int level;
1594 UWord typeR; /* a cuOff */
1595 GExpr* gexpr; /* for this variable */
1596 GExpr* fbGX; /* to find the frame base of the enclosing fn, if
1597 any */
1598 UInt fndn_ix; /* declaring file/dirname index in fndnpool, or 0 */
1599 Int fLine; /* declaring file line number, or zero */
1600 /* offset in .debug_info, so that abstract instances can be
1601 found to satisfy references from concrete instances. */
1602 UWord dioff;
1603 UWord absOri; /* so the absOri fields refer to dioff fields
1604 in some other, related TempVar. */
1605 }
1606 TempVar;
1607
1608 typedef
1609 struct {
1610 /* Contains the range stack: a stack of address ranges, one
1611 stack entry for each nested scope.
1612
1613 Some scope entries are created by function definitions
1614 (DW_AT_subprogram), and for those, we also note the GExpr
1615 derived from its DW_AT_frame_base attribute, if any.
1616 Consequently it should be possible to find, for any
1617 variable's DIE, the GExpr for the containing function's
1618 DW_AT_frame_base by scanning back through the stack to find
1619 the nearest entry associated with a function. This somewhat
1620 elaborate scheme is provided so as to make it possible to
1621 obtain the correct DW_AT_frame_base expression even in the
1622 presence of nested functions (or to be more precise, in the
1623 presence of nested DW_AT_subprogram DIEs).
1624 */
1625 Int sp; /* [sp] is innermost active entry; sp==-1 for empty
1626 stack */
1627 Int stack_size;
1628 XArray **ranges; /* XArray of AddrRange */
1629 Int *level; /* D3 DIE levels */
1630 Bool *isFunc; /* from DW_AT_subprogram? */
1631 GExpr **fbGX; /* if isFunc, contains the FB expr, else NULL */
1632 /* The fndn_ix file name/dirname table. Is a mapping from dwarf
1633 integer index to the index in di->fndnpool. */
1634 XArray* /* of UInt* */ fndn_ix_Table;
1635 }
1636 D3VarParser;
1637
1638 /* Completely initialise a variable parser object */
1639 static void
var_parser_init(D3VarParser * parser)1640 var_parser_init ( D3VarParser *parser )
1641 {
1642 parser->sp = -1;
1643 parser->stack_size = 0;
1644 parser->ranges = NULL;
1645 parser->level = NULL;
1646 parser->isFunc = NULL;
1647 parser->fbGX = NULL;
1648 parser->fndn_ix_Table = NULL;
1649 }
1650
1651 /* Release any memory hanging off a variable parser object */
1652 static void
var_parser_release(D3VarParser * parser)1653 var_parser_release ( D3VarParser *parser )
1654 {
1655 ML_(dinfo_free)( parser->ranges );
1656 ML_(dinfo_free)( parser->level );
1657 ML_(dinfo_free)( parser->isFunc );
1658 ML_(dinfo_free)( parser->fbGX );
1659 }
1660
varstack_show(const D3VarParser * parser,const HChar * str)1661 static void varstack_show ( const D3VarParser* parser, const HChar* str )
1662 {
1663 Word i, j;
1664 VG_(printf)(" varstack (%s) {\n", str);
1665 for (i = 0; i <= parser->sp; i++) {
1666 XArray* xa = parser->ranges[i];
1667 vg_assert(xa);
1668 VG_(printf)(" [%ld] (level %d)", i, parser->level[i]);
1669 if (parser->isFunc[i]) {
1670 VG_(printf)(" (fbGX=%p)", parser->fbGX[i]);
1671 } else {
1672 vg_assert(parser->fbGX[i] == NULL);
1673 }
1674 VG_(printf)(": ");
1675 if (VG_(sizeXA)( xa ) == 0) {
1676 VG_(printf)("** empty PC range array **");
1677 } else {
1678 for (j = 0; j < VG_(sizeXA)( xa ); j++) {
1679 AddrRange* range = (AddrRange*) VG_(indexXA)( xa, j );
1680 vg_assert(range);
1681 VG_(printf)("[%#lx,%#lx] ", range->aMin, range->aMax);
1682 }
1683 }
1684 VG_(printf)("\n");
1685 }
1686 VG_(printf)(" }\n");
1687 }
1688
1689 /* Remove from the stack, all entries with .level > 'level' */
1690 static
varstack_preen(D3VarParser * parser,Bool td3,Int level)1691 void varstack_preen ( D3VarParser* parser, Bool td3, Int level )
1692 {
1693 Bool changed = False;
1694 vg_assert(parser->sp < parser->stack_size);
1695 while (True) {
1696 vg_assert(parser->sp >= -1);
1697 if (parser->sp == -1) break;
1698 if (parser->level[parser->sp] <= level) break;
1699 if (0)
1700 TRACE_D3("BBBBAAAA varstack_pop [newsp=%d]\n", parser->sp-1);
1701 vg_assert(parser->ranges[parser->sp]);
1702 /* Who allocated this xa? get_range_list() or
1703 unitary_range_list(). */
1704 VG_(deleteXA)( parser->ranges[parser->sp] );
1705 parser->sp--;
1706 changed = True;
1707 }
1708 if (changed && td3)
1709 varstack_show( parser, "after preen" );
1710 }
1711
varstack_push(const CUConst * cc,D3VarParser * parser,Bool td3,XArray * ranges,Int level,Bool isFunc,GExpr * fbGX)1712 static void varstack_push ( const CUConst* cc,
1713 D3VarParser* parser,
1714 Bool td3,
1715 XArray* ranges, Int level,
1716 Bool isFunc, GExpr* fbGX ) {
1717 if (0)
1718 TRACE_D3("BBBBAAAA varstack_push[newsp=%d]: %d %p\n",
1719 parser->sp+1, level, ranges);
1720
1721 /* First we need to zap everything >= 'level', as we are about to
1722 replace any previous entry at 'level', so .. */
1723 varstack_preen(parser, /*td3*/False, level-1);
1724
1725 vg_assert(parser->sp >= -1);
1726 vg_assert(parser->sp < parser->stack_size);
1727 if (parser->sp == parser->stack_size - 1) {
1728 parser->stack_size += 48;
1729 parser->ranges =
1730 ML_(dinfo_realloc)("di.readdwarf3.varpush.1", parser->ranges,
1731 parser->stack_size * sizeof parser->ranges[0]);
1732 parser->level =
1733 ML_(dinfo_realloc)("di.readdwarf3.varpush.2", parser->level,
1734 parser->stack_size * sizeof parser->level[0]);
1735 parser->isFunc =
1736 ML_(dinfo_realloc)("di.readdwarf3.varpush.3", parser->isFunc,
1737 parser->stack_size * sizeof parser->isFunc[0]);
1738 parser->fbGX =
1739 ML_(dinfo_realloc)("di.readdwarf3.varpush.4", parser->fbGX,
1740 parser->stack_size * sizeof parser->fbGX[0]);
1741 }
1742 if (parser->sp >= 0)
1743 vg_assert(parser->level[parser->sp] < level);
1744 parser->sp++;
1745 vg_assert(ranges != NULL);
1746 if (!isFunc) vg_assert(fbGX == NULL);
1747 parser->ranges[parser->sp] = ranges;
1748 parser->level[parser->sp] = level;
1749 parser->isFunc[parser->sp] = isFunc;
1750 parser->fbGX[parser->sp] = fbGX;
1751 if (TD3)
1752 varstack_show( parser, "after push" );
1753 }
1754
1755
1756 /* cts is derived from a DW_AT_location and so refers either to a
1757 location expression or to a location list. Figure out which, and
1758 in both cases bundle the expression or location list into a
1759 so-called GExpr (guarded expression). */
1760 __attribute__((noinline))
get_GX(const CUConst * cc,Bool td3,const FormContents * cts)1761 static GExpr* get_GX ( const CUConst* cc, Bool td3, const FormContents* cts )
1762 {
1763 GExpr* gexpr = NULL;
1764 if (cts->szB < 0) {
1765 /* represents a non-empty in-line location expression, and
1766 cts->u.cur points at the image bytes */
1767 gexpr = make_singleton_GX( cts->u.cur, (ULong)(- cts->szB) );
1768 }
1769 else
1770 if (cts->szB > 0) {
1771 /* represents a location list. cts->u.val is the offset of it
1772 in .debug_loc. */
1773 if (!cc->cu_svma_known)
1774 cc->barf("get_GX: location list, but CU svma is unknown");
1775 gexpr = make_general_GX( cc, td3, cts->u.val, cc->cu_svma );
1776 }
1777 else {
1778 vg_assert(0); /* else caller is bogus */
1779 }
1780 return gexpr;
1781 }
1782
1783 /* Returns an xarray* of directory names (indexed by the dwarf dirname
1784 integer).
1785 If 'compdir' is NULL, entry [0] will be set to "."
1786 otherwise entry [0] is set to compdir.
1787 Entry [0] basically means "the current directory of the compilation",
1788 whatever that means, according to the DWARF3 spec.
1789 FIXME??? readdwarf3.c/readdwarf.c have a lot of duplicated code */
1790 static
read_dirname_xa(DebugInfo * di,const HChar * compdir,Cursor * c,Bool td3)1791 XArray* read_dirname_xa (DebugInfo* di, const HChar *compdir,
1792 Cursor *c,
1793 Bool td3 )
1794 {
1795 XArray* dirname_xa; /* xarray of HChar* dirname */
1796 const HChar* dirname;
1797 UInt compdir_len;
1798
1799 dirname_xa = VG_(newXA) (ML_(dinfo_zalloc), "di.rdxa.1", ML_(dinfo_free),
1800 sizeof(HChar*) );
1801
1802 if (compdir == NULL) {
1803 dirname = ".";
1804 compdir_len = 1;
1805 } else {
1806 dirname = compdir;
1807 compdir_len = VG_(strlen)(compdir);
1808 }
1809 VG_(addToXA) (dirname_xa, &dirname);
1810
1811 TRACE_D3(" The Directory Table%s\n",
1812 peek_UChar(c) == 0 ? " is empty." : ":" );
1813
1814 while (peek_UChar(c) != 0) {
1815
1816 DiCursor cur = get_AsciiZ(c);
1817 HChar* data_str = ML_(cur_read_strdup)( cur, "dirname_xa.1" );
1818 TRACE_D3(" %s\n", data_str);
1819
1820 /* If data_str[0] is '/', then 'data' is an absolute path and we
1821 don't mess with it. Otherwise, construct the
1822 path 'compdir' ++ "/" ++ 'data'. */
1823
1824 if (data_str[0] != '/'
1825 /* not an absolute path */
1826 && compdir
1827 /* actually got something sensible for compdir */
1828 && compdir_len)
1829 {
1830 SizeT len = compdir_len + 1 + VG_(strlen)(data_str);
1831 HChar *buf = ML_(dinfo_zalloc)("dirname_xa.2", len + 1);
1832
1833 VG_(strcpy)(buf, compdir);
1834 VG_(strcat)(buf, "/");
1835 VG_(strcat)(buf, data_str);
1836
1837 dirname = ML_(addStr)(di, buf, len);
1838 VG_(addToXA) (dirname_xa, &dirname);
1839 if (0) VG_(printf)("rel path %s\n", buf);
1840 ML_(dinfo_free)(buf);
1841 } else {
1842 /* just use 'data'. */
1843 dirname = ML_(addStr)(di,data_str,-1);
1844 VG_(addToXA) (dirname_xa, &dirname);
1845 if (0) VG_(printf)("abs path %s\n", data_str);
1846 }
1847
1848 ML_(dinfo_free)(data_str);
1849 }
1850
1851 TRACE_D3 ("\n");
1852
1853 if (get_UChar (c) != 0) {
1854 ML_(symerr)(NULL, True,
1855 "could not get NUL at end of DWARF directory table");
1856 VG_(deleteXA)(dirname_xa);
1857 return NULL;
1858 }
1859
1860 return dirname_xa;
1861 }
1862
1863 static
read_filename_table(XArray * fndn_ix_Table,const HChar * compdir,const CUConst * cc,ULong debug_line_offset,Bool td3)1864 void read_filename_table( /*MOD*/XArray* /* of UInt* */ fndn_ix_Table,
1865 const HChar* compdir,
1866 const CUConst* cc, ULong debug_line_offset,
1867 Bool td3 )
1868 {
1869 Bool is_dw64;
1870 Cursor c;
1871 Word i;
1872 UShort version;
1873 UChar opcode_base;
1874 const HChar* str;
1875 XArray* dirname_xa; /* xarray of HChar* dirname */
1876 ULong dir_xa_ix; /* Index in dirname_xa, as read from dwarf info. */
1877 const HChar* dirname;
1878 UInt fndn_ix;
1879
1880 vg_assert(fndn_ix_Table && cc && cc->barf);
1881 if (!ML_(sli_is_valid)(cc->escn_debug_line)
1882 || cc->escn_debug_line.szB <= debug_line_offset) {
1883 cc->barf("read_filename_table: .debug_line is missing?");
1884 }
1885
1886 init_Cursor( &c, cc->escn_debug_line, debug_line_offset, cc->barf,
1887 "Overrun whilst reading .debug_line section(1)" );
1888
1889 /* unit_length = */
1890 get_Initial_Length( &is_dw64, &c,
1891 "read_filename_table: invalid initial-length field" );
1892 version = get_UShort( &c );
1893 if (version != 2 && version != 3 && version != 4)
1894 cc->barf("read_filename_table: Only DWARF version 2, 3 and 4 line info "
1895 "is currently supported.");
1896 /*header_length = (ULong)*/ get_Dwarfish_UWord( &c, is_dw64 );
1897 /*minimum_instruction_length = */ get_UChar( &c );
1898 if (version >= 4)
1899 /*maximum_operations_per_insn = */ get_UChar( &c );
1900 /*default_is_stmt = */ get_UChar( &c );
1901 /*line_base = (Char)*/ get_UChar( &c );
1902 /*line_range = */ get_UChar( &c );
1903 opcode_base = get_UChar( &c );
1904 /* skip over "standard_opcode_lengths" */
1905 for (i = 1; i < (Word)opcode_base; i++)
1906 (void)get_UChar( &c );
1907
1908 dirname_xa = read_dirname_xa(cc->di, compdir, &c, td3);
1909
1910 /* Read and record the file names table */
1911 vg_assert( VG_(sizeXA)( fndn_ix_Table ) == 0 );
1912 /* Add a dummy index-zero entry. DWARF3 numbers its files
1913 from 1, for some reason. */
1914 fndn_ix = ML_(addFnDn) ( cc->di, "<unknown_file>", NULL );
1915 VG_(addToXA)( fndn_ix_Table, &fndn_ix );
1916 while (peek_UChar(&c) != 0) {
1917 DiCursor cur = get_AsciiZ(&c);
1918 str = ML_(addStrFromCursor)( cc->di, cur );
1919 dir_xa_ix = get_ULEB128( &c );
1920 if (dirname_xa != NULL
1921 && dir_xa_ix >= 0 && dir_xa_ix < VG_(sizeXA) (dirname_xa))
1922 dirname = *(HChar**)VG_(indexXA) ( dirname_xa, dir_xa_ix );
1923 else
1924 dirname = NULL;
1925 fndn_ix = ML_(addFnDn)( cc->di, str, dirname);
1926 TRACE_D3(" read_filename_table: %ld fndn_ix %u %s %s\n",
1927 VG_(sizeXA)(fndn_ix_Table), fndn_ix,
1928 dirname, str);
1929 VG_(addToXA)( fndn_ix_Table, &fndn_ix );
1930 (void)get_ULEB128( &c ); /* skip last mod time */
1931 (void)get_ULEB128( &c ); /* file size */
1932 }
1933 /* We're done! The rest of it is not interesting. */
1934 if (dirname_xa != NULL)
1935 VG_(deleteXA)(dirname_xa);
1936 }
1937
1938 /* setup_cu_svma to be called when a cu is found at level 0,
1939 to establish the cu_svma. */
setup_cu_svma(CUConst * cc,Bool have_lo,Addr ip_lo,Bool td3)1940 static void setup_cu_svma(CUConst* cc, Bool have_lo, Addr ip_lo, Bool td3)
1941 {
1942 Addr cu_svma;
1943 /* We have potentially more than one type of parser parsing the
1944 dwarf information. At least currently, each parser establishes
1945 the cu_svma. So, in case cu_svma_known, we check that the same
1946 result is obtained by the 2nd parsing of the cu.
1947
1948 Alternatively, we could reset cu_svma_known after each parsing
1949 and then check that we only see a single DW_TAG_compile_unit DIE
1950 at level 0, DWARF3 only allows exactly one top level DIE per
1951 CU. */
1952
1953 if (have_lo)
1954 cu_svma = ip_lo;
1955 else {
1956 /* Now, it may be that this DIE doesn't tell us the CU's
1957 SVMA, by way of not having a DW_AT_low_pc. That's OK --
1958 the CU doesn't *have* to have its SVMA specified.
1959
1960 But as per last para D3 spec sec 3.1.1 ("Normal and
1961 Partial Compilation Unit Entries", "If the base address
1962 (viz, the SVMA) is undefined, then any DWARF entry of
1963 structure defined interms of the base address of that
1964 compilation unit is not valid.". So that means, if whilst
1965 processing the children of this top level DIE (or their
1966 children, etc) we see a DW_AT_range, and cu_svma_known is
1967 False, then the DIE that contains it is (per the spec)
1968 invalid, and we can legitimately stop and complain. */
1969 /* .. whereas The Reality is, simply assume the SVMA is zero
1970 if it isn't specified. */
1971 cu_svma = 0;
1972 }
1973
1974 if (cc->cu_svma_known) {
1975 vg_assert (cu_svma == cc->cu_svma);
1976 } else {
1977 cc->cu_svma_known = True;
1978 cc->cu_svma = cu_svma;
1979 if (0)
1980 TRACE_D3("setup_cu_svma: acquire CU_SVMA of %p\n", (void*) cc->cu_svma);
1981 }
1982 }
1983
trace_DIE(DW_TAG dtag,UWord posn,Int level,UWord saved_die_c_offset,const g_abbv * abbv,const CUConst * cc)1984 static void trace_DIE(
1985 DW_TAG dtag,
1986 UWord posn,
1987 Int level,
1988 UWord saved_die_c_offset,
1989 const g_abbv *abbv,
1990 const CUConst* cc)
1991 {
1992 Cursor c;
1993 FormContents cts;
1994 UWord sibling = 0;
1995 UInt nf_i;
1996 Bool debug_types_flag;
1997 Bool alt_flag;
1998 Cursor check_skip;
1999 UWord check_sibling = 0;
2000
2001 posn = uncook_die( cc, posn, &debug_types_flag, &alt_flag );
2002 init_Cursor (&c,
2003 debug_types_flag ? cc->escn_debug_types :
2004 alt_flag ? cc->escn_debug_info_alt : cc->escn_debug_info,
2005 saved_die_c_offset, cc->barf,
2006 "Overrun trace_DIE");
2007 check_skip = c;
2008 VG_(printf)(" <%d><%lx>: Abbrev Number: %llu (%s)%s%s\n",
2009 level, posn, (ULong) abbv->abbv_code, ML_(pp_DW_TAG)( dtag ),
2010 debug_types_flag ? " (in .debug_types)" : "",
2011 alt_flag ? " (in alternate .debug_info)" : "");
2012 nf_i = 0;
2013 while (True) {
2014 DW_AT attr = (DW_AT) abbv->nf[nf_i].at_name;
2015 DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
2016 nf_i++;
2017 if (attr == 0 && form == 0) break;
2018 VG_(printf)(" %-18s: ", ML_(pp_DW_AT)(attr));
2019 /* Get the form contents, so as to print them */
2020 get_Form_contents( &cts, cc, &c, True, form );
2021 if (attr == DW_AT_sibling && cts.szB > 0) {
2022 sibling = cts.u.val;
2023 }
2024 VG_(printf)("\t\n");
2025 }
2026
2027 /* Verify that skipping a DIE gives the same displacement as
2028 tracing (i.e. reading) a DIE. If there is an inconsistency in
2029 the nr of bytes read by get_Form_contents and get_Form_szB, this
2030 should be detected by the below. Using --trace-symtab=yes
2031 --read-var-info=yes will ensure all DIEs are systematically
2032 verified. */
2033 skip_DIE (&check_sibling, &check_skip, abbv, cc);
2034 vg_assert (check_sibling == sibling);
2035 vg_assert (get_position_of_Cursor (&check_skip)
2036 == get_position_of_Cursor (&c));
2037 }
2038
2039 __attribute__((noreturn))
dump_bad_die_and_barf(const HChar * whichparser,DW_TAG dtag,UWord posn,Int level,Cursor * c_die,UWord saved_die_c_offset,const g_abbv * abbv,const CUConst * cc)2040 static void dump_bad_die_and_barf(
2041 const HChar *whichparser,
2042 DW_TAG dtag,
2043 UWord posn,
2044 Int level,
2045 Cursor* c_die,
2046 UWord saved_die_c_offset,
2047 const g_abbv *abbv,
2048 const CUConst* cc)
2049 {
2050 trace_DIE (dtag, posn, level, saved_die_c_offset, abbv, cc);
2051 VG_(printf)("%s:\n", whichparser);
2052 cc->barf("confused by the above DIE");
2053 }
2054
2055 __attribute__((noinline))
bad_DIE_confusion(int linenr)2056 static void bad_DIE_confusion(int linenr)
2057 {
2058 VG_(printf)("\nparse DIE(readdwarf3.c:%d): confused by:\n", linenr);
2059 }
2060 #define goto_bad_DIE do {bad_DIE_confusion(__LINE__); goto bad_DIE;} while (0)
2061
2062 __attribute__((noinline))
parse_var_DIE(WordFM * rangestree,XArray * tempvars,XArray * gexprs,D3VarParser * parser,DW_TAG dtag,UWord posn,Int level,Cursor * c_die,const g_abbv * abbv,CUConst * cc,Bool td3)2063 static void parse_var_DIE (
2064 /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
2065 /*MOD*/XArray* /* of TempVar* */ tempvars,
2066 /*MOD*/XArray* /* of GExpr* */ gexprs,
2067 /*MOD*/D3VarParser* parser,
2068 DW_TAG dtag,
2069 UWord posn,
2070 Int level,
2071 Cursor* c_die,
2072 const g_abbv *abbv,
2073 CUConst* cc,
2074 Bool td3
2075 )
2076 {
2077 FormContents cts;
2078 UInt nf_i;
2079
2080 UWord saved_die_c_offset = get_position_of_Cursor( c_die );
2081
2082 varstack_preen( parser, td3, level-1 );
2083
2084 if (dtag == DW_TAG_compile_unit
2085 || dtag == DW_TAG_type_unit
2086 || dtag == DW_TAG_partial_unit) {
2087 Bool have_lo = False;
2088 Bool have_hi1 = False;
2089 Bool hiIsRelative = False;
2090 Bool have_range = False;
2091 Addr ip_lo = 0;
2092 Addr ip_hi1 = 0;
2093 Addr rangeoff = 0;
2094 const HChar *compdir = NULL;
2095 nf_i = 0;
2096 while (True) {
2097 DW_AT attr = (DW_AT) abbv->nf[nf_i].at_name;
2098 DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
2099 nf_i++;
2100 if (attr == 0 && form == 0) break;
2101 get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2102 if (attr == DW_AT_low_pc && cts.szB > 0) {
2103 ip_lo = cts.u.val;
2104 have_lo = True;
2105 }
2106 if (attr == DW_AT_high_pc && cts.szB > 0) {
2107 ip_hi1 = cts.u.val;
2108 have_hi1 = True;
2109 if (form != DW_FORM_addr)
2110 hiIsRelative = True;
2111 }
2112 if (attr == DW_AT_ranges && cts.szB > 0) {
2113 rangeoff = cts.u.val;
2114 have_range = True;
2115 }
2116 if (attr == DW_AT_comp_dir) {
2117 if (cts.szB >= 0)
2118 cc->barf("parse_var_DIE compdir: expecting indirect string");
2119 HChar *str = ML_(cur_read_strdup)( cts.u.cur,
2120 "parse_var_DIE.compdir" );
2121 compdir = ML_(addStr)(cc->di, str, -1);
2122 ML_(dinfo_free) (str);
2123 }
2124 if (attr == DW_AT_stmt_list && cts.szB > 0) {
2125 read_filename_table( parser->fndn_ix_Table, compdir,
2126 cc, cts.u.val, td3 );
2127 }
2128 }
2129 if (have_lo && have_hi1 && hiIsRelative)
2130 ip_hi1 += ip_lo;
2131
2132 /* Now, does this give us an opportunity to find this
2133 CU's svma? */
2134 if (level == 0)
2135 setup_cu_svma(cc, have_lo, ip_lo, td3);
2136
2137 /* Do we have something that looks sane? */
2138 if (have_lo && have_hi1 && (!have_range)) {
2139 if (ip_lo < ip_hi1)
2140 varstack_push( cc, parser, td3,
2141 unitary_range_list(ip_lo, ip_hi1 - 1),
2142 level,
2143 False/*isFunc*/, NULL/*fbGX*/ );
2144 else if (ip_lo == 0 && ip_hi1 == 0)
2145 /* CU has no code, presumably?
2146 Such situations have been encountered for code
2147 compiled with -ffunction-sections -fdata-sections
2148 and linked with --gc-sections. Completely
2149 eliminated CU gives such 0 lo/hi pc. Similarly
2150 to a CU which has no lo/hi/range pc, we push
2151 an empty range list. */
2152 varstack_push( cc, parser, td3,
2153 empty_range_list(),
2154 level,
2155 False/*isFunc*/, NULL/*fbGX*/ );
2156 } else
2157 if ((!have_lo) && (!have_hi1) && have_range) {
2158 varstack_push( cc, parser, td3,
2159 get_range_list( cc, td3,
2160 rangeoff, cc->cu_svma ),
2161 level,
2162 False/*isFunc*/, NULL/*fbGX*/ );
2163 } else
2164 if ((!have_lo) && (!have_hi1) && (!have_range)) {
2165 /* CU has no code, presumably? */
2166 varstack_push( cc, parser, td3,
2167 empty_range_list(),
2168 level,
2169 False/*isFunc*/, NULL/*fbGX*/ );
2170 } else
2171 if (have_lo && (!have_hi1) && have_range && ip_lo == 0) {
2172 /* broken DIE created by gcc-4.3.X ? Ignore the
2173 apparently-redundant DW_AT_low_pc and use the DW_AT_ranges
2174 instead. */
2175 varstack_push( cc, parser, td3,
2176 get_range_list( cc, td3,
2177 rangeoff, cc->cu_svma ),
2178 level,
2179 False/*isFunc*/, NULL/*fbGX*/ );
2180 } else {
2181 if (0) VG_(printf)("I got hlo %d hhi1 %d hrange %d\n",
2182 (Int)have_lo, (Int)have_hi1, (Int)have_range);
2183 goto_bad_DIE;
2184 }
2185 }
2186
2187 if (dtag == DW_TAG_lexical_block || dtag == DW_TAG_subprogram) {
2188 Bool have_lo = False;
2189 Bool have_hi1 = False;
2190 Bool have_range = False;
2191 Bool hiIsRelative = False;
2192 Addr ip_lo = 0;
2193 Addr ip_hi1 = 0;
2194 Addr rangeoff = 0;
2195 Bool isFunc = dtag == DW_TAG_subprogram;
2196 GExpr* fbGX = NULL;
2197 nf_i = 0;
2198 while (True) {
2199 DW_AT attr = (DW_AT) abbv->nf[nf_i].at_name;
2200 DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
2201 nf_i++;
2202 if (attr == 0 && form == 0) break;
2203 get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2204 if (attr == DW_AT_low_pc && cts.szB > 0) {
2205 ip_lo = cts.u.val;
2206 have_lo = True;
2207 }
2208 if (attr == DW_AT_high_pc && cts.szB > 0) {
2209 ip_hi1 = cts.u.val;
2210 have_hi1 = True;
2211 if (form != DW_FORM_addr)
2212 hiIsRelative = True;
2213 }
2214 if (attr == DW_AT_ranges && cts.szB > 0) {
2215 rangeoff = cts.u.val;
2216 have_range = True;
2217 }
2218 if (isFunc
2219 && attr == DW_AT_frame_base
2220 && cts.szB != 0 /* either scalar or nonempty block */) {
2221 fbGX = get_GX( cc, False/*td3*/, &cts );
2222 vg_assert(fbGX);
2223 VG_(addToXA)(gexprs, &fbGX);
2224 }
2225 }
2226 if (have_lo && have_hi1 && hiIsRelative)
2227 ip_hi1 += ip_lo;
2228 /* Do we have something that looks sane? */
2229 if (dtag == DW_TAG_subprogram
2230 && (!have_lo) && (!have_hi1) && (!have_range)) {
2231 /* This is legit - ignore it. Sec 3.3.3: "A subroutine entry
2232 representing a subroutine declaration that is not also a
2233 definition does not have code address or range
2234 attributes." */
2235 } else
2236 if (dtag == DW_TAG_lexical_block
2237 && (!have_lo) && (!have_hi1) && (!have_range)) {
2238 /* I believe this is legit, and means the lexical block
2239 contains no insns (whatever that might mean). Ignore. */
2240 } else
2241 if (have_lo && have_hi1 && (!have_range)) {
2242 /* This scope supplies just a single address range. */
2243 if (ip_lo < ip_hi1)
2244 varstack_push( cc, parser, td3,
2245 unitary_range_list(ip_lo, ip_hi1 - 1),
2246 level, isFunc, fbGX );
2247 } else
2248 if ((!have_lo) && (!have_hi1) && have_range) {
2249 /* This scope supplies multiple address ranges via the use of
2250 a range list. */
2251 varstack_push( cc, parser, td3,
2252 get_range_list( cc, td3,
2253 rangeoff, cc->cu_svma ),
2254 level, isFunc, fbGX );
2255 } else
2256 if (have_lo && (!have_hi1) && (!have_range)) {
2257 /* This scope is bogus. The D3 spec sec 3.4 (Lexical Block
2258 Entries) says fairly clearly that a scope must have either
2259 _range or (_low_pc and _high_pc). */
2260 /* The spec is a bit ambiguous though. Perhaps a single byte
2261 range is intended? See sec 2.17 (Code Addresses And Ranges) */
2262 /* This case is here because icc9 produced this:
2263 <2><13bd>: DW_TAG_lexical_block
2264 DW_AT_decl_line : 5229
2265 DW_AT_decl_column : 37
2266 DW_AT_decl_file : 1
2267 DW_AT_low_pc : 0x401b03
2268 */
2269 /* Ignore (seems safe than pushing a single byte range) */
2270 } else
2271 goto_bad_DIE;
2272 }
2273
2274 if (dtag == DW_TAG_variable || dtag == DW_TAG_formal_parameter) {
2275 const HChar* name = NULL;
2276 UWord typeR = D3_INVALID_CUOFF;
2277 Bool global = False;
2278 GExpr* gexpr = NULL;
2279 Int n_attrs = 0;
2280 UWord abs_ori = (UWord)D3_INVALID_CUOFF;
2281 Int lineNo = 0;
2282 UInt fndn_ix = 0;
2283 nf_i = 0;
2284 while (True) {
2285 DW_AT attr = (DW_AT) abbv->nf[nf_i].at_name;
2286 DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
2287 nf_i++;
2288 if (attr == 0 && form == 0) break;
2289 get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2290 n_attrs++;
2291 if (attr == DW_AT_name && cts.szB < 0) {
2292 name = ML_(addStrFromCursor)( cc->di, cts.u.cur );
2293 }
2294 if (attr == DW_AT_location
2295 && cts.szB != 0 /* either scalar or nonempty block */) {
2296 gexpr = get_GX( cc, False/*td3*/, &cts );
2297 vg_assert(gexpr);
2298 VG_(addToXA)(gexprs, &gexpr);
2299 }
2300 if (attr == DW_AT_type && cts.szB > 0) {
2301 typeR = cook_die_using_form( cc, cts.u.val, form );
2302 }
2303 if (attr == DW_AT_external && cts.szB > 0 && cts.u.val > 0) {
2304 global = True;
2305 }
2306 if (attr == DW_AT_abstract_origin && cts.szB > 0) {
2307 abs_ori = (UWord)cts.u.val;
2308 }
2309 if (attr == DW_AT_declaration && cts.szB > 0 && cts.u.val > 0) {
2310 /*declaration = True;*/
2311 }
2312 if (attr == DW_AT_decl_line && cts.szB > 0) {
2313 lineNo = (Int)cts.u.val;
2314 }
2315 if (attr == DW_AT_decl_file && cts.szB > 0) {
2316 Int ftabIx = (Int)cts.u.val;
2317 if (ftabIx >= 1
2318 && ftabIx < VG_(sizeXA)( parser->fndn_ix_Table )) {
2319 fndn_ix = *(UInt*)VG_(indexXA)( parser->fndn_ix_Table, ftabIx );
2320 }
2321 if (0) VG_(printf)("XXX filename fndn_ix = %u %s\n", fndn_ix,
2322 ML_(fndn_ix2filename) (cc->di, fndn_ix));
2323 }
2324 }
2325 if (!global && dtag == DW_TAG_variable && level == 1) {
2326 /* Case of a static variable. It is better to declare
2327 it global as the variable is not really related to
2328 a PC range, as its address can be used by program
2329 counters outside of the ranges where it is visible . */
2330 global = True;
2331 }
2332
2333 /* We'll collect it under if one of the following three
2334 conditions holds:
2335 (1) has location and type -> completed
2336 (2) has type only -> is an abstract instance
2337 (3) has location and abs_ori -> is a concrete instance
2338 Name, fndn_ix and line number are all optional frills.
2339 */
2340 if ( /* 1 */ (gexpr && typeR != D3_INVALID_CUOFF)
2341 /* 2 */ || (typeR != D3_INVALID_CUOFF)
2342 /* 3 */ || (gexpr && abs_ori != (UWord)D3_INVALID_CUOFF) ) {
2343
2344 /* Add this variable to the list of interesting looking
2345 variables. Crucially, note along with it the address
2346 range(s) associated with the variable, which for locals
2347 will be the address ranges at the top of the varparser's
2348 stack. */
2349 GExpr* fbGX = NULL;
2350 Word i, nRanges;
2351 const XArray* /* of AddrRange */ xa;
2352 TempVar* tv;
2353 /* Stack can't be empty; we put a dummy entry on it for the
2354 entire address range before starting with the DIEs for
2355 this CU. */
2356 vg_assert(parser->sp >= 0);
2357
2358 /* If this is a local variable (non-global), try to find
2359 the GExpr for the DW_AT_frame_base of the containing
2360 function. It should have been pushed on the stack at the
2361 time we encountered its DW_TAG_subprogram DIE, so the way
2362 to find it is to scan back down the stack looking for it.
2363 If there isn't an enclosing stack entry marked 'isFunc'
2364 then we must be seeing variable or formal param DIEs
2365 outside of a function, so we deem the Dwarf to be
2366 malformed if that happens. Note that the fbGX may be NULL
2367 if the containing DT_TAG_subprogram didn't supply a
2368 DW_AT_frame_base -- that's OK, but there must actually be
2369 a containing DW_TAG_subprogram. */
2370 if (!global) {
2371 Bool found = False;
2372 for (i = parser->sp; i >= 0; i--) {
2373 if (parser->isFunc[i]) {
2374 fbGX = parser->fbGX[i];
2375 found = True;
2376 break;
2377 }
2378 }
2379 if (!found) {
2380 if (0 && VG_(clo_verbosity) >= 0) {
2381 VG_(message)(Vg_DebugMsg,
2382 "warning: parse_var_DIE: non-global variable "
2383 "outside DW_TAG_subprogram\n");
2384 }
2385 /* goto_bad_DIE; */
2386 /* This seems to happen a lot. Just ignore it -- if,
2387 when we come to evaluation of the location (guarded)
2388 expression, it requires a frame base value, and
2389 there's no expression for that, then evaluation as a
2390 whole will fail. Harmless - a bit of a waste of
2391 cycles but nothing more. */
2392 }
2393 }
2394
2395 /* re "global ? 0 : parser->sp" (twice), if the var is
2396 marked 'global' then we must put it at the global scope,
2397 as only the global scope (level 0) covers the entire PC
2398 address space. It is asserted elsewhere that level 0
2399 always covers the entire address space. */
2400 xa = parser->ranges[global ? 0 : parser->sp];
2401 nRanges = VG_(sizeXA)(xa);
2402 vg_assert(nRanges >= 0);
2403
2404 tv = ML_(dinfo_zalloc)( "di.readdwarf3.pvD.1", sizeof(TempVar) );
2405 tv->name = name;
2406 tv->level = global ? 0 : parser->sp;
2407 tv->typeR = typeR;
2408 tv->gexpr = gexpr;
2409 tv->fbGX = fbGX;
2410 tv->fndn_ix= fndn_ix;
2411 tv->fLine = lineNo;
2412 tv->dioff = posn;
2413 tv->absOri = abs_ori;
2414
2415 /* See explanation on definition of type TempVar for the
2416 reason for this elaboration. */
2417 tv->nRanges = nRanges;
2418 tv->rngOneMin = 0;
2419 tv->rngOneMax = 0;
2420 tv->rngMany = NULL;
2421 if (nRanges == 1) {
2422 AddrRange* range = VG_(indexXA)(xa, 0);
2423 tv->rngOneMin = range->aMin;
2424 tv->rngOneMax = range->aMax;
2425 }
2426 else if (nRanges > 1) {
2427 /* See if we already have a range list which is
2428 structurally identical. If so, use that; if not, clone
2429 this one, and add it to our collection. */
2430 UWord keyW, valW;
2431 if (VG_(lookupFM)( rangestree, &keyW, &valW, (UWord)xa )) {
2432 XArray* old = (XArray*)keyW;
2433 vg_assert(valW == 0);
2434 vg_assert(old != xa);
2435 tv->rngMany = old;
2436 } else {
2437 XArray* cloned = VG_(cloneXA)( "di.readdwarf3.pvD.2", xa );
2438 tv->rngMany = cloned;
2439 VG_(addToFM)( rangestree, (UWord)cloned, 0 );
2440 }
2441 }
2442
2443 VG_(addToXA)( tempvars, &tv );
2444
2445 TRACE_D3(" Recording this variable, with %ld PC range(s)\n",
2446 VG_(sizeXA)(xa) );
2447 /* collect stats on how effective the ->ranges special
2448 casing is */
2449 if (0) {
2450 static Int ntot=0, ngt=0;
2451 ntot++;
2452 if (tv->rngMany) ngt++;
2453 if (0 == (ntot % 100000))
2454 VG_(printf)("XXXX %d tot, %d cloned\n", ntot, ngt);
2455 }
2456
2457 }
2458
2459 /* Here are some other weird cases seen in the wild:
2460
2461 We have a variable with a name and a type, but no
2462 location. I guess that's a sign that it has been
2463 optimised away. Ignore it. Here's an example:
2464
2465 static Int lc_compar(void* n1, void* n2) {
2466 MC_Chunk* mc1 = *(MC_Chunk**)n1;
2467 MC_Chunk* mc2 = *(MC_Chunk**)n2;
2468 return (mc1->data < mc2->data ? -1 : 1);
2469 }
2470
2471 Both mc1 and mc2 are like this
2472 <2><5bc>: Abbrev Number: 21 (DW_TAG_variable)
2473 DW_AT_name : mc1
2474 DW_AT_decl_file : 1
2475 DW_AT_decl_line : 216
2476 DW_AT_type : <5d3>
2477
2478 whereas n1 and n2 do have locations specified.
2479
2480 ---------------------------------------------
2481
2482 We see a DW_TAG_formal_parameter with a type, but
2483 no name and no location. It's probably part of a function type
2484 construction, thusly, hence ignore it:
2485 <1><2b4>: Abbrev Number: 12 (DW_TAG_subroutine_type)
2486 DW_AT_sibling : <2c9>
2487 DW_AT_prototyped : 1
2488 DW_AT_type : <114>
2489 <2><2be>: Abbrev Number: 13 (DW_TAG_formal_parameter)
2490 DW_AT_type : <13e>
2491 <2><2c3>: Abbrev Number: 13 (DW_TAG_formal_parameter)
2492 DW_AT_type : <133>
2493
2494 ---------------------------------------------
2495
2496 Is very minimal, like this:
2497 <4><81d>: Abbrev Number: 44 (DW_TAG_variable)
2498 DW_AT_abstract_origin: <7ba>
2499 What that signifies I have no idea. Ignore.
2500
2501 ----------------------------------------------
2502
2503 Is very minimal, like this:
2504 <200f>: DW_TAG_formal_parameter
2505 DW_AT_abstract_ori: <1f4c>
2506 DW_AT_location : 13440
2507 What that signifies I have no idea. Ignore.
2508 It might be significant, though: the variable at least
2509 has a location and so might exist somewhere.
2510 Maybe we should handle this.
2511
2512 ---------------------------------------------
2513
2514 <22407>: DW_TAG_variable
2515 DW_AT_name : (indirect string, offset: 0x6579):
2516 vgPlain_trampoline_stuff_start
2517 DW_AT_decl_file : 29
2518 DW_AT_decl_line : 56
2519 DW_AT_external : 1
2520 DW_AT_declaration : 1
2521
2522 Nameless and typeless variable that has a location? Who
2523 knows. Not me.
2524 <2><3d178>: Abbrev Number: 22 (DW_TAG_variable)
2525 DW_AT_location : 9 byte block: 3 c0 c7 13 38 0 0 0 0
2526 (DW_OP_addr: 3813c7c0)
2527
2528 No, really. Check it out. gcc is quite simply borked.
2529 <3><168cc>: Abbrev Number: 141 (DW_TAG_variable)
2530 // followed by no attributes, and the next DIE is a sibling,
2531 // not a child
2532 */
2533 }
2534 return;
2535
2536 bad_DIE:
2537 dump_bad_die_and_barf("parse_var_DIE", dtag, posn, level,
2538 c_die, saved_die_c_offset,
2539 abbv,
2540 cc);
2541 /*NOTREACHED*/
2542 }
2543
2544 typedef
2545 struct {
2546 /* The fndn_ix file name/dirname table. Is a mapping from dwarf
2547 integer index to the index in di->fndnpool. */
2548 XArray* /* of UInt* */ fndn_ix_Table;
2549 UWord sibling; // sibling of the last read DIE (if it has a sibling).
2550 }
2551 D3InlParser;
2552
2553 /* Return the function name corresponding to absori.
2554
2555 absori is a 'cooked' reference to a DIE, i.e. absori can be either
2556 in cc->escn_debug_info or in cc->escn_debug_info_alt.
2557 get_inlFnName will uncook absori.
2558
2559 The returned value is a (permanent) string in DebugInfo's .strchunks.
2560
2561 LIMITATION: absori must point in the CU of cc. If absori points
2562 in another CU, returns "UnknownInlinedFun".
2563
2564 Here are the problems to retrieve the fun name if absori is in
2565 another CU: the DIE reading code cannot properly extract data from
2566 another CU, as the abbv code retrieved in the other CU cannot be
2567 translated in an abbreviation. Reading data from the alternate debug
2568 info also gives problems as the string reference is also in the alternate
2569 file, but when reading the alt DIE, the string form is a 'local' string,
2570 but cannot be read in the current CU, but must be read in the alt CU.
2571 See bug 338803 comment#3 and attachment for a failed attempt to handle
2572 these problems (failed because with the patch, only one alt abbrev hash
2573 table is kept, while we must handle all abbreviations in all CUs
2574 referenced by an absori (being a reference to an alt CU, or a previous
2575 or following CU). */
get_inlFnName(Int absori,const CUConst * cc,Bool td3)2576 static const HChar* get_inlFnName (Int absori, const CUConst* cc, Bool td3)
2577 {
2578 Cursor c;
2579 const g_abbv *abbv;
2580 ULong atag, abbv_code;
2581 UInt has_children;
2582 UWord posn;
2583 Bool type_flag, alt_flag;
2584 const HChar *ret = NULL;
2585 FormContents cts;
2586 UInt nf_i;
2587
2588 posn = uncook_die( cc, absori, &type_flag, &alt_flag);
2589 if (type_flag)
2590 cc->barf("get_inlFnName: uncooked absori in type debug info");
2591
2592 /* LIMITATION: check we are in the same CU.
2593 If not, return unknown inlined function name. */
2594 /* if crossing between alt debug info<>normal info
2595 or posn not in the cu range,
2596 then it is in another CU. */
2597 if (alt_flag != cc->is_alt_info
2598 || posn < cc->cu_start_offset
2599 || posn >= cc->cu_start_offset + cc->unit_length) {
2600 static Bool reported = False;
2601 if (!reported && VG_(clo_verbosity) > 1) {
2602 VG_(message)(Vg_DebugMsg,
2603 "Warning: cross-CU LIMITATION: some inlined fn names\n"
2604 "might be shown as UnknownInlinedFun\n");
2605 reported = True;
2606 }
2607 TRACE_D3(" <get_inlFnName><%lx>: cross-CU LIMITATION", posn);
2608 return ML_(addStr)(cc->di, "UnknownInlinedFun", -1);
2609 }
2610
2611 init_Cursor (&c, cc->escn_debug_info, posn, cc->barf,
2612 "Overrun get_inlFnName absori");
2613
2614 abbv_code = get_ULEB128( &c );
2615 abbv = get_abbv ( cc, abbv_code);
2616 atag = abbv->atag;
2617 TRACE_D3(" <get_inlFnName><%lx>: Abbrev Number: %llu (%s)\n",
2618 posn, abbv_code, ML_(pp_DW_TAG)( atag ) );
2619
2620 if (atag == 0)
2621 cc->barf("get_inlFnName: invalid zero tag on DIE");
2622
2623 has_children = abbv->has_children;
2624 if (has_children != DW_children_no && has_children != DW_children_yes)
2625 cc->barf("get_inlFnName: invalid has_children value");
2626
2627 if (atag != DW_TAG_subprogram)
2628 cc->barf("get_inlFnName: absori not a subprogram");
2629
2630 nf_i = 0;
2631 while (True) {
2632 DW_AT attr = (DW_AT) abbv->nf[nf_i].at_name;
2633 DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
2634 nf_i++;
2635 if (attr == 0 && form == 0) break;
2636 get_Form_contents( &cts, cc, &c, False/*td3*/, form );
2637 if (attr == DW_AT_name) {
2638 HChar *fnname;
2639 if (cts.szB >= 0)
2640 cc->barf("get_inlFnName: expecting indirect string");
2641 fnname = ML_(cur_read_strdup)( cts.u.cur,
2642 "get_inlFnName.1" );
2643 ret = ML_(addStr)(cc->di, fnname, -1);
2644 ML_(dinfo_free) (fnname);
2645 break; /* Name found, get out of the loop, as this has priority over
2646 DW_AT_specification. */
2647 }
2648 if (attr == DW_AT_specification) {
2649 UWord cdie;
2650
2651 if (cts.szB == 0)
2652 cc->barf("get_inlFnName: AT specification missing");
2653
2654 /* The recursive call to get_inlFnName will uncook its arg.
2655 So, we need to cook it here, so as to reference the
2656 correct section (e.g. the alt info). */
2657 cdie = cook_die_using_form(cc, (UWord)cts.u.val, form);
2658
2659 /* hoping that there is no loop */
2660 ret = get_inlFnName (cdie, cc, td3);
2661 /* Unclear if having both DW_AT_specification and DW_AT_name is
2662 possible but in any case, we do not break here.
2663 If we find later on a DW_AT_name, it will override the name found
2664 in the DW_AT_specification.*/
2665 }
2666 }
2667
2668 if (ret)
2669 return ret;
2670 else {
2671 TRACE_D3("AbsOriFnNameNotFound");
2672 return ML_(addStr)(cc->di, "AbsOriFnNameNotFound", -1);
2673 }
2674 }
2675
2676 /* Returns True if the (possibly) childrens of the current DIE are interesting
2677 to parse. Returns False otherwise.
2678 If the current DIE has a sibling, the non interesting children can
2679 maybe be skipped (if the DIE has a DW_AT_sibling). */
2680 __attribute__((noinline))
parse_inl_DIE(D3InlParser * parser,DW_TAG dtag,UWord posn,Int level,Cursor * c_die,const g_abbv * abbv,CUConst * cc,Bool td3)2681 static Bool parse_inl_DIE (
2682 /*MOD*/D3InlParser* parser,
2683 DW_TAG dtag,
2684 UWord posn,
2685 Int level,
2686 Cursor* c_die,
2687 const g_abbv *abbv,
2688 CUConst* cc,
2689 Bool td3
2690 )
2691 {
2692 FormContents cts;
2693 UInt nf_i;
2694
2695 UWord saved_die_c_offset = get_position_of_Cursor( c_die );
2696
2697 /* Get info about DW_TAG_compile_unit and DW_TAG_partial_unit 'which
2698 in theory could also contain inlined fn calls). */
2699 if (dtag == DW_TAG_compile_unit || dtag == DW_TAG_partial_unit) {
2700 Bool have_lo = False;
2701 Addr ip_lo = 0;
2702 const HChar *compdir = NULL;
2703
2704 nf_i = 0;
2705 while (True) {
2706 DW_AT attr = (DW_AT) abbv->nf[nf_i].at_name;
2707 DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
2708 nf_i++;
2709 if (attr == 0 && form == 0) break;
2710 get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2711 if (attr == DW_AT_low_pc && cts.szB > 0) {
2712 ip_lo = cts.u.val;
2713 have_lo = True;
2714 }
2715 if (attr == DW_AT_comp_dir) {
2716 if (cts.szB >= 0)
2717 cc->barf("parse_inl_DIE compdir: expecting indirect string");
2718 HChar *str = ML_(cur_read_strdup)( cts.u.cur,
2719 "parse_inl_DIE.compdir" );
2720 compdir = ML_(addStr)(cc->di, str, -1);
2721 ML_(dinfo_free) (str);
2722 }
2723 if (attr == DW_AT_stmt_list && cts.szB > 0) {
2724 read_filename_table( parser->fndn_ix_Table, compdir,
2725 cc, cts.u.val, td3 );
2726 }
2727 if (attr == DW_AT_sibling && cts.szB > 0) {
2728 parser->sibling = cts.u.val;
2729 }
2730 }
2731 if (level == 0)
2732 setup_cu_svma (cc, have_lo, ip_lo, td3);
2733 }
2734
2735 if (dtag == DW_TAG_inlined_subroutine) {
2736 Bool have_lo = False;
2737 Bool have_hi1 = False;
2738 Bool have_range = False;
2739 Bool hiIsRelative = False;
2740 Addr ip_lo = 0;
2741 Addr ip_hi1 = 0;
2742 Addr rangeoff = 0;
2743 UInt caller_fndn_ix = 0;
2744 Int caller_lineno = 0;
2745 Int inlinedfn_abstract_origin = 0;
2746
2747 nf_i = 0;
2748 while (True) {
2749 DW_AT attr = (DW_AT) abbv->nf[nf_i].at_name;
2750 DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
2751 nf_i++;
2752 if (attr == 0 && form == 0) break;
2753 get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2754 if (attr == DW_AT_call_file && cts.szB > 0) {
2755 Int ftabIx = (Int)cts.u.val;
2756 if (ftabIx >= 1
2757 && ftabIx < VG_(sizeXA)( parser->fndn_ix_Table )) {
2758 caller_fndn_ix = *(UInt*)
2759 VG_(indexXA)( parser->fndn_ix_Table, ftabIx );
2760 }
2761 if (0) VG_(printf)("XXX caller_fndn_ix = %u %s\n", caller_fndn_ix,
2762 ML_(fndn_ix2filename) (cc->di, caller_fndn_ix));
2763 }
2764 if (attr == DW_AT_call_line && cts.szB > 0) {
2765 caller_lineno = cts.u.val;
2766 }
2767
2768 if (attr == DW_AT_abstract_origin && cts.szB > 0) {
2769 inlinedfn_abstract_origin
2770 = cook_die_using_form (cc, (UWord)cts.u.val, form);
2771 }
2772
2773 if (attr == DW_AT_low_pc && cts.szB > 0) {
2774 ip_lo = cts.u.val;
2775 have_lo = True;
2776 }
2777 if (attr == DW_AT_high_pc && cts.szB > 0) {
2778 ip_hi1 = cts.u.val;
2779 have_hi1 = True;
2780 if (form != DW_FORM_addr)
2781 hiIsRelative = True;
2782 }
2783 if (attr == DW_AT_ranges && cts.szB > 0) {
2784 rangeoff = cts.u.val;
2785 have_range = True;
2786 }
2787 if (attr == DW_AT_sibling && cts.szB > 0) {
2788 parser->sibling = cts.u.val;
2789 }
2790 }
2791 if (have_lo && have_hi1 && hiIsRelative)
2792 ip_hi1 += ip_lo;
2793 /* Do we have something that looks sane? */
2794 if (dtag == DW_TAG_inlined_subroutine
2795 && (!have_lo) && (!have_hi1) && (!have_range)) {
2796 /* Seems strange. How can an inlined subroutine have
2797 no code ? */
2798 goto_bad_DIE;
2799 } else
2800 if (have_lo && have_hi1 && (!have_range)) {
2801 /* This inlined call is just a single address range. */
2802 if (ip_lo < ip_hi1) {
2803 /* Apply text debug biasing */
2804 ip_lo += cc->di->text_debug_bias;
2805 ip_hi1 += cc->di->text_debug_bias;
2806 ML_(addInlInfo) (cc->di,
2807 ip_lo, ip_hi1,
2808 get_inlFnName (inlinedfn_abstract_origin, cc, td3),
2809 caller_fndn_ix,
2810 caller_lineno, level);
2811 }
2812 } else if (have_range) {
2813 /* This inlined call is several address ranges. */
2814 XArray *ranges;
2815 Word j;
2816 const HChar *inlfnname =
2817 get_inlFnName (inlinedfn_abstract_origin, cc, td3);
2818
2819 /* Ranges are biased for the inline info using the same logic
2820 as what is used for biasing ranges for the var info, for which
2821 ranges are read using cc->cu_svma (see parse_var_DIE).
2822 Then text_debug_bias is added when a (non global) var
2823 is recorded (see just before the call to ML_(addVar)) */
2824 ranges = get_range_list( cc, td3,
2825 rangeoff, cc->cu_svma );
2826 for (j = 0; j < VG_(sizeXA)( ranges ); j++) {
2827 AddrRange* range = (AddrRange*) VG_(indexXA)( ranges, j );
2828 ML_(addInlInfo) (cc->di,
2829 range->aMin + cc->di->text_debug_bias,
2830 range->aMax+1 + cc->di->text_debug_bias,
2831 // aMax+1 as range has its last bound included
2832 // while ML_(addInlInfo) expects last bound not
2833 // included.
2834 inlfnname,
2835 caller_fndn_ix,
2836 caller_lineno, level);
2837 }
2838 VG_(deleteXA)( ranges );
2839 } else
2840 goto_bad_DIE;
2841 }
2842
2843 // Only recursively parse the (possible) children for the DIE which
2844 // might maybe contain a DW_TAG_inlined_subroutine:
2845 return dtag == DW_TAG_lexical_block || dtag == DW_TAG_subprogram
2846 || dtag == DW_TAG_inlined_subroutine
2847 || dtag == DW_TAG_compile_unit || dtag == DW_TAG_partial_unit;
2848
2849 bad_DIE:
2850 dump_bad_die_and_barf("parse_inl_DIE", dtag, posn, level,
2851 c_die, saved_die_c_offset,
2852 abbv,
2853 cc);
2854 /*NOTREACHED*/
2855 }
2856
2857
2858 /*------------------------------------------------------------*/
2859 /*--- ---*/
2860 /*--- Parsing of type-related DIEs ---*/
2861 /*--- ---*/
2862 /*------------------------------------------------------------*/
2863
2864 typedef
2865 struct {
2866 /* What source language? 'A'=Ada83/95,
2867 'C'=C/C++,
2868 'F'=Fortran,
2869 '?'=other
2870 Established once per compilation unit. */
2871 UChar language;
2872 /* A stack of types which are currently under construction */
2873 Int sp; /* [sp] is innermost active entry; sp==-1 for empty
2874 stack */
2875 Int stack_size;
2876 /* Note that the TyEnts in qparentE are temporary copies of the
2877 ones accumulating in the main tyent array. So it is not safe
2878 to free up anything on them when popping them off the stack
2879 (iow, it isn't safe to use TyEnt__make_EMPTY on them). Just
2880 memset them to zero when done. */
2881 TyEnt *qparentE; /* parent TyEnts */
2882 Int *qlevel;
2883 }
2884 D3TypeParser;
2885
2886 /* Completely initialise a type parser object */
2887 static void
type_parser_init(D3TypeParser * parser)2888 type_parser_init ( D3TypeParser *parser )
2889 {
2890 parser->sp = -1;
2891 parser->language = '?';
2892 parser->stack_size = 0;
2893 parser->qparentE = NULL;
2894 parser->qlevel = NULL;
2895 }
2896
2897 /* Release any memory hanging off a type parser object */
2898 static void
type_parser_release(D3TypeParser * parser)2899 type_parser_release ( D3TypeParser *parser )
2900 {
2901 ML_(dinfo_free)( parser->qparentE );
2902 ML_(dinfo_free)( parser->qlevel );
2903 }
2904
typestack_show(const D3TypeParser * parser,const HChar * str)2905 static void typestack_show ( const D3TypeParser* parser, const HChar* str )
2906 {
2907 Word i;
2908 VG_(printf)(" typestack (%s) {\n", str);
2909 for (i = 0; i <= parser->sp; i++) {
2910 VG_(printf)(" [%ld] (level %d): ", i, parser->qlevel[i]);
2911 ML_(pp_TyEnt)( &parser->qparentE[i] );
2912 VG_(printf)("\n");
2913 }
2914 VG_(printf)(" }\n");
2915 }
2916
2917 /* Remove from the stack, all entries with .level > 'level' */
2918 static
typestack_preen(D3TypeParser * parser,Bool td3,Int level)2919 void typestack_preen ( D3TypeParser* parser, Bool td3, Int level )
2920 {
2921 Bool changed = False;
2922 vg_assert(parser->sp < parser->stack_size);
2923 while (True) {
2924 vg_assert(parser->sp >= -1);
2925 if (parser->sp == -1) break;
2926 if (parser->qlevel[parser->sp] <= level) break;
2927 if (0)
2928 TRACE_D3("BBBBAAAA typestack_pop [newsp=%d]\n", parser->sp-1);
2929 vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
2930 parser->sp--;
2931 changed = True;
2932 }
2933 if (changed && td3)
2934 typestack_show( parser, "after preen" );
2935 }
2936
typestack_is_empty(const D3TypeParser * parser)2937 static Bool typestack_is_empty ( const D3TypeParser* parser )
2938 {
2939 vg_assert(parser->sp >= -1 && parser->sp < parser->stack_size);
2940 return parser->sp == -1;
2941 }
2942
typestack_push(const CUConst * cc,D3TypeParser * parser,Bool td3,const TyEnt * parentE,Int level)2943 static void typestack_push ( const CUConst* cc,
2944 D3TypeParser* parser,
2945 Bool td3,
2946 const TyEnt* parentE, Int level )
2947 {
2948 if (0)
2949 TRACE_D3("BBBBAAAA typestack_push[newsp=%d]: %d %05lx\n",
2950 parser->sp+1, level, parentE->cuOff);
2951
2952 /* First we need to zap everything >= 'level', as we are about to
2953 replace any previous entry at 'level', so .. */
2954 typestack_preen(parser, /*td3*/False, level-1);
2955
2956 vg_assert(parser->sp >= -1);
2957 vg_assert(parser->sp < parser->stack_size);
2958 if (parser->sp == parser->stack_size - 1) {
2959 parser->stack_size += 16;
2960 parser->qparentE =
2961 ML_(dinfo_realloc)("di.readdwarf3.typush.1", parser->qparentE,
2962 parser->stack_size * sizeof parser->qparentE[0]);
2963 parser->qlevel =
2964 ML_(dinfo_realloc)("di.readdwarf3.typush.2", parser->qlevel,
2965 parser->stack_size * sizeof parser->qlevel[0]);
2966 }
2967 if (parser->sp >= 0)
2968 vg_assert(parser->qlevel[parser->sp] < level);
2969 parser->sp++;
2970 vg_assert(parentE);
2971 vg_assert(ML_(TyEnt__is_type)(parentE));
2972 vg_assert(parentE->cuOff != D3_INVALID_CUOFF);
2973 parser->qparentE[parser->sp] = *parentE;
2974 parser->qlevel[parser->sp] = level;
2975 if (TD3)
2976 typestack_show( parser, "after push" );
2977 }
2978
2979 /* True if the subrange type being parsed gives the bounds of an array. */
subrange_type_denotes_array_bounds(const D3TypeParser * parser,DW_TAG dtag)2980 static Bool subrange_type_denotes_array_bounds ( const D3TypeParser* parser,
2981 DW_TAG dtag ) {
2982 vg_assert(dtag == DW_TAG_subrange_type);
2983 /* For most languages, a subrange_type dtag always gives the
2984 bounds of an array.
2985 For Ada, there are additional conditions as a subrange_type
2986 is also used for other purposes. */
2987 if (parser->language != 'A')
2988 /* not Ada, so it definitely denotes an array bound. */
2989 return True;
2990 else
2991 /* Extra constraints for Ada: it only denotes an array bound if .. */
2992 return (! typestack_is_empty(parser)
2993 && parser->qparentE[parser->sp].tag == Te_TyArray);
2994 }
2995
2996 /* Parse a type-related DIE. 'parser' holds the current parser state.
2997 'admin' is where the completed types are dumped. 'dtag' is the tag
2998 for this DIE. 'c_die' points to the start of the data fields (FORM
2999 stuff) for the DIE. abbv is the parsed abbreviation which describe
3000 the DIE.
3001
3002 We may find the DIE uninteresting, in which case we should ignore
3003 it.
3004
3005 What happens: the DIE is examined. If uninteresting, it is ignored.
3006 Otherwise, the DIE gives rise to two things:
3007
3008 (1) the offset of this DIE in the CU -- the cuOffset, a UWord
3009 (2) a TyAdmin structure, which holds the type, or related stuff
3010
3011 (2) is added at the end of 'tyadmins', at some index, say 'i'.
3012
3013 A pair (cuOffset, i) is added to 'tydict'.
3014
3015 Hence 'tyadmins' holds the actual type entities, and 'tydict' holds
3016 a mapping from cuOffset to the index of the corresponding entry in
3017 'tyadmin'.
3018
3019 When resolving a cuOffset to a TyAdmin, first look up the cuOffset
3020 in the tydict (by binary search). This gives an index into
3021 tyadmins, and the required entity lives in tyadmins at that index.
3022 */
3023 __attribute__((noinline))
parse_type_DIE(XArray * tyents,D3TypeParser * parser,DW_TAG dtag,UWord posn,Int level,Cursor * c_die,const g_abbv * abbv,const CUConst * cc,Bool td3)3024 static void parse_type_DIE ( /*MOD*/XArray* /* of TyEnt */ tyents,
3025 /*MOD*/D3TypeParser* parser,
3026 DW_TAG dtag,
3027 UWord posn,
3028 Int level,
3029 Cursor* c_die,
3030 const g_abbv *abbv,
3031 const CUConst* cc,
3032 Bool td3 )
3033 {
3034 FormContents cts;
3035 UInt nf_i;
3036 TyEnt typeE;
3037 TyEnt atomE;
3038 TyEnt fieldE;
3039 TyEnt boundE;
3040
3041 UWord saved_die_c_offset = get_position_of_Cursor( c_die );
3042
3043 VG_(memset)( &typeE, 0xAA, sizeof(typeE) );
3044 VG_(memset)( &atomE, 0xAA, sizeof(atomE) );
3045 VG_(memset)( &fieldE, 0xAA, sizeof(fieldE) );
3046 VG_(memset)( &boundE, 0xAA, sizeof(boundE) );
3047
3048 /* If we've returned to a level at or above any previously noted
3049 parent, un-note it, so we don't believe we're still collecting
3050 its children. */
3051 typestack_preen( parser, td3, level-1 );
3052
3053 if (dtag == DW_TAG_compile_unit
3054 || dtag == DW_TAG_type_unit
3055 || dtag == DW_TAG_partial_unit) {
3056 /* See if we can find DW_AT_language, since it is important for
3057 establishing array bounds (see DW_TAG_subrange_type below in
3058 this fn) */
3059 nf_i = 0;
3060 while (True) {
3061 DW_AT attr = (DW_AT) abbv->nf[nf_i].at_name;
3062 DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3063 nf_i++;
3064 if (attr == 0 && form == 0) break;
3065 get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3066 if (attr != DW_AT_language)
3067 continue;
3068 if (cts.szB <= 0)
3069 goto_bad_DIE;
3070 switch (cts.u.val) {
3071 case DW_LANG_C89: case DW_LANG_C:
3072 case DW_LANG_C_plus_plus: case DW_LANG_ObjC:
3073 case DW_LANG_ObjC_plus_plus: case DW_LANG_UPC:
3074 case DW_LANG_Upc: case DW_LANG_C99: case DW_LANG_C11:
3075 case DW_LANG_C_plus_plus_11: case DW_LANG_C_plus_plus_14:
3076 parser->language = 'C'; break;
3077 case DW_LANG_Fortran77: case DW_LANG_Fortran90:
3078 case DW_LANG_Fortran95: case DW_LANG_Fortran03:
3079 case DW_LANG_Fortran08:
3080 parser->language = 'F'; break;
3081 case DW_LANG_Ada83: case DW_LANG_Ada95:
3082 parser->language = 'A'; break;
3083 case DW_LANG_Cobol74:
3084 case DW_LANG_Cobol85: case DW_LANG_Pascal83:
3085 case DW_LANG_Modula2: case DW_LANG_Java:
3086 case DW_LANG_PLI:
3087 case DW_LANG_D: case DW_LANG_Python: case DW_LANG_Go:
3088 case DW_LANG_Mips_Assembler:
3089 parser->language = '?'; break;
3090 default:
3091 goto_bad_DIE;
3092 }
3093 }
3094 }
3095
3096 if (dtag == DW_TAG_base_type) {
3097 /* We can pick up a new base type any time. */
3098 VG_(memset)(&typeE, 0, sizeof(typeE));
3099 typeE.cuOff = D3_INVALID_CUOFF;
3100 typeE.tag = Te_TyBase;
3101 nf_i = 0;
3102 while (True) {
3103 DW_AT attr = (DW_AT) abbv->nf[nf_i].at_name;
3104 DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3105 nf_i++;
3106 if (attr == 0 && form == 0) break;
3107 get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3108 if (attr == DW_AT_name && cts.szB < 0) {
3109 typeE.Te.TyBase.name
3110 = ML_(cur_read_strdup)( cts.u.cur,
3111 "di.readdwarf3.ptD.base_type.1" );
3112 }
3113 if (attr == DW_AT_byte_size && cts.szB > 0) {
3114 typeE.Te.TyBase.szB = cts.u.val;
3115 }
3116 if (attr == DW_AT_encoding && cts.szB > 0) {
3117 switch (cts.u.val) {
3118 case DW_ATE_unsigned: case DW_ATE_unsigned_char:
3119 case DW_ATE_UTF: /* since DWARF4, e.g. char16_t from C++ */
3120 case DW_ATE_boolean:/* FIXME - is this correct? */
3121 case DW_ATE_unsigned_fixed:
3122 typeE.Te.TyBase.enc = 'U'; break;
3123 case DW_ATE_signed: case DW_ATE_signed_char:
3124 case DW_ATE_signed_fixed:
3125 typeE.Te.TyBase.enc = 'S'; break;
3126 case DW_ATE_float:
3127 typeE.Te.TyBase.enc = 'F'; break;
3128 case DW_ATE_complex_float:
3129 typeE.Te.TyBase.enc = 'C'; break;
3130 default:
3131 goto_bad_DIE;
3132 }
3133 }
3134 }
3135
3136 /* Invent a name if it doesn't have one. gcc-4.3
3137 -ftree-vectorize is observed to emit nameless base types. */
3138 if (!typeE.Te.TyBase.name)
3139 typeE.Te.TyBase.name
3140 = ML_(dinfo_strdup)( "di.readdwarf3.ptD.base_type.2",
3141 "<anon_base_type>" );
3142
3143 /* Do we have something that looks sane? */
3144 if (/* must have a name */
3145 typeE.Te.TyBase.name == NULL
3146 /* and a plausible size. Yes, really 32: "complex long
3147 double" apparently has size=32 */
3148 || typeE.Te.TyBase.szB < 0 || typeE.Te.TyBase.szB > 32
3149 /* and a plausible encoding */
3150 || (typeE.Te.TyBase.enc != 'U'
3151 && typeE.Te.TyBase.enc != 'S'
3152 && typeE.Te.TyBase.enc != 'F'
3153 && typeE.Te.TyBase.enc != 'C'))
3154 goto_bad_DIE;
3155 /* Last minute hack: if we see this
3156 <1><515>: DW_TAG_base_type
3157 DW_AT_byte_size : 0
3158 DW_AT_encoding : 5
3159 DW_AT_name : void
3160 convert it into a real Void type. */
3161 if (typeE.Te.TyBase.szB == 0
3162 && 0 == VG_(strcmp)("void", typeE.Te.TyBase.name)) {
3163 ML_(TyEnt__make_EMPTY)(&typeE);
3164 typeE.tag = Te_TyVoid;
3165 typeE.Te.TyVoid.isFake = False; /* it's a real one! */
3166 }
3167
3168 goto acquire_Type;
3169 }
3170
3171 /*
3172 * An example of DW_TAG_rvalue_reference_type:
3173 *
3174 * $ readelf --debug-dump /usr/lib/debug/usr/lib/libstdc++.so.6.0.16.debug
3175 * <1><1014>: Abbrev Number: 55 (DW_TAG_rvalue_reference_type)
3176 * <1015> DW_AT_byte_size : 4
3177 * <1016> DW_AT_type : <0xe52>
3178 */
3179 if (dtag == DW_TAG_pointer_type || dtag == DW_TAG_reference_type
3180 || dtag == DW_TAG_ptr_to_member_type
3181 || dtag == DW_TAG_rvalue_reference_type) {
3182 /* This seems legit for _pointer_type and _reference_type. I
3183 don't know if rolling _ptr_to_member_type in here really is
3184 legit, but it's better than not handling it at all. */
3185 VG_(memset)(&typeE, 0, sizeof(typeE));
3186 typeE.cuOff = D3_INVALID_CUOFF;
3187 switch (dtag) {
3188 case DW_TAG_pointer_type:
3189 typeE.tag = Te_TyPtr;
3190 break;
3191 case DW_TAG_reference_type:
3192 typeE.tag = Te_TyRef;
3193 break;
3194 case DW_TAG_ptr_to_member_type:
3195 typeE.tag = Te_TyPtrMbr;
3196 break;
3197 case DW_TAG_rvalue_reference_type:
3198 typeE.tag = Te_TyRvalRef;
3199 break;
3200 default:
3201 vg_assert(False);
3202 }
3203 /* target type defaults to void */
3204 typeE.Te.TyPorR.typeR = D3_FAKEVOID_CUOFF;
3205 /* These four type kinds don't *have* to specify their size, in
3206 which case we assume it's a machine word. But if they do
3207 specify it, it must be a machine word :-) This probably
3208 assumes that the word size of the Dwarf3 we're reading is the
3209 same size as that on the machine. gcc appears to give a size
3210 whereas icc9 doesn't. */
3211 typeE.Te.TyPorR.szB = sizeof(UWord);
3212 nf_i = 0;
3213 while (True) {
3214 DW_AT attr = (DW_AT) abbv->nf[nf_i].at_name;
3215 DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3216 nf_i++;
3217 if (attr == 0 && form == 0) break;
3218 get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3219 if (attr == DW_AT_byte_size && cts.szB > 0) {
3220 typeE.Te.TyPorR.szB = cts.u.val;
3221 }
3222 if (attr == DW_AT_type && cts.szB > 0) {
3223 typeE.Te.TyPorR.typeR
3224 = cook_die_using_form( cc, (UWord)cts.u.val, form );
3225 }
3226 }
3227 /* Do we have something that looks sane? */
3228 if (typeE.Te.TyPorR.szB != sizeof(UWord))
3229 goto_bad_DIE;
3230 else
3231 goto acquire_Type;
3232 }
3233
3234 if (dtag == DW_TAG_enumeration_type) {
3235 /* Create a new Type to hold the results. */
3236 VG_(memset)(&typeE, 0, sizeof(typeE));
3237 typeE.cuOff = posn;
3238 typeE.tag = Te_TyEnum;
3239 Bool is_decl = False;
3240 typeE.Te.TyEnum.atomRs
3241 = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.enum_type.1",
3242 ML_(dinfo_free),
3243 sizeof(UWord) );
3244 nf_i=0;
3245 while (True) {
3246 DW_AT attr = (DW_AT) abbv->nf[nf_i].at_name;
3247 DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3248 nf_i++;
3249 if (attr == 0 && form == 0) break;
3250 get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3251 if (attr == DW_AT_name && cts.szB < 0) {
3252 typeE.Te.TyEnum.name
3253 = ML_(cur_read_strdup)( cts.u.cur,
3254 "di.readdwarf3.pTD.enum_type.2" );
3255 }
3256 if (attr == DW_AT_byte_size && cts.szB > 0) {
3257 typeE.Te.TyEnum.szB = cts.u.val;
3258 }
3259 if (attr == DW_AT_declaration) {
3260 is_decl = True;
3261 }
3262 }
3263
3264 if (!typeE.Te.TyEnum.name)
3265 typeE.Te.TyEnum.name
3266 = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enum_type.3",
3267 "<anon_enum_type>" );
3268
3269 /* Do we have something that looks sane? */
3270 if (typeE.Te.TyEnum.szB == 0
3271 /* we must know the size */
3272 /* but not for Ada, which uses such dummy
3273 enumerations as helper for gdb ada mode.
3274 Also GCC allows incomplete enums as GNU extension.
3275 http://gcc.gnu.org/onlinedocs/gcc/Incomplete-Enums.html
3276 These are marked as DW_AT_declaration and won't have
3277 a size. They can only be used in declaration or as
3278 pointer types. You can't allocate variables or storage
3279 using such an enum type. (Also GCC seems to have a bug
3280 that will put such an enumeration_type into a .debug_types
3281 unit which should only contain complete types.) */
3282 && (parser->language != 'A' && !is_decl)) {
3283 goto_bad_DIE;
3284 }
3285
3286 /* On't stack! */
3287 typestack_push( cc, parser, td3, &typeE, level );
3288 goto acquire_Type;
3289 }
3290
3291 /* gcc (GCC) 4.4.0 20081017 (experimental) occasionally produces
3292 DW_TAG_enumerator with only a DW_AT_name but no
3293 DW_AT_const_value. This is in violation of the Dwarf3 standard,
3294 and appears to be a new "feature" of gcc - versions 4.3.x and
3295 earlier do not appear to do this. So accept DW_TAG_enumerator
3296 which only have a name but no value. An example:
3297
3298 <1><180>: Abbrev Number: 6 (DW_TAG_enumeration_type)
3299 <181> DW_AT_name : (indirect string, offset: 0xda70):
3300 QtMsgType
3301 <185> DW_AT_byte_size : 4
3302 <186> DW_AT_decl_file : 14
3303 <187> DW_AT_decl_line : 1480
3304 <189> DW_AT_sibling : <0x1a7>
3305 <2><18d>: Abbrev Number: 7 (DW_TAG_enumerator)
3306 <18e> DW_AT_name : (indirect string, offset: 0x9e18):
3307 QtDebugMsg
3308 <2><192>: Abbrev Number: 7 (DW_TAG_enumerator)
3309 <193> DW_AT_name : (indirect string, offset: 0x1505f):
3310 QtWarningMsg
3311 <2><197>: Abbrev Number: 7 (DW_TAG_enumerator)
3312 <198> DW_AT_name : (indirect string, offset: 0x16f4a):
3313 QtCriticalMsg
3314 <2><19c>: Abbrev Number: 7 (DW_TAG_enumerator)
3315 <19d> DW_AT_name : (indirect string, offset: 0x156dd):
3316 QtFatalMsg
3317 <2><1a1>: Abbrev Number: 7 (DW_TAG_enumerator)
3318 <1a2> DW_AT_name : (indirect string, offset: 0x13660):
3319 QtSystemMsg
3320 */
3321 if (dtag == DW_TAG_enumerator) {
3322 VG_(memset)( &atomE, 0, sizeof(atomE) );
3323 atomE.cuOff = posn;
3324 atomE.tag = Te_Atom;
3325 nf_i = 0;
3326 while (True) {
3327 DW_AT attr = (DW_AT) abbv->nf[nf_i].at_name;
3328 DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3329 nf_i++;
3330 if (attr == 0 && form == 0) break;
3331 get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3332 if (attr == DW_AT_name && cts.szB < 0) {
3333 atomE.Te.Atom.name
3334 = ML_(cur_read_strdup)( cts.u.cur,
3335 "di.readdwarf3.pTD.enumerator.1" );
3336 }
3337 if (attr == DW_AT_const_value && cts.szB > 0) {
3338 atomE.Te.Atom.value = cts.u.val;
3339 atomE.Te.Atom.valueKnown = True;
3340 }
3341 }
3342 /* Do we have something that looks sane? */
3343 if (atomE.Te.Atom.name == NULL)
3344 goto_bad_DIE;
3345 /* Do we have a plausible parent? */
3346 if (typestack_is_empty(parser)) goto_bad_DIE;
3347 vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
3348 vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
3349 if (level != parser->qlevel[parser->sp]+1) goto_bad_DIE;
3350 if (parser->qparentE[parser->sp].tag != Te_TyEnum) goto_bad_DIE;
3351 /* Record this child in the parent */
3352 vg_assert(parser->qparentE[parser->sp].Te.TyEnum.atomRs);
3353 VG_(addToXA)( parser->qparentE[parser->sp].Te.TyEnum.atomRs,
3354 &atomE );
3355 /* And record the child itself */
3356 goto acquire_Atom;
3357 }
3358
3359 /* Treat DW_TAG_class_type as if it was a DW_TAG_structure_type. I
3360 don't know if this is correct, but it at least makes this reader
3361 usable for gcc-4.3 produced Dwarf3. */
3362 if (dtag == DW_TAG_structure_type || dtag == DW_TAG_class_type
3363 || dtag == DW_TAG_union_type) {
3364 Bool have_szB = False;
3365 Bool is_decl = False;
3366 Bool is_spec = False;
3367 /* Create a new Type to hold the results. */
3368 VG_(memset)(&typeE, 0, sizeof(typeE));
3369 typeE.cuOff = posn;
3370 typeE.tag = Te_TyStOrUn;
3371 typeE.Te.TyStOrUn.name = NULL;
3372 typeE.Te.TyStOrUn.typeR = D3_INVALID_CUOFF;
3373 typeE.Te.TyStOrUn.fieldRs
3374 = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.pTD.struct_type.1",
3375 ML_(dinfo_free),
3376 sizeof(UWord) );
3377 typeE.Te.TyStOrUn.complete = True;
3378 typeE.Te.TyStOrUn.isStruct = dtag == DW_TAG_structure_type
3379 || dtag == DW_TAG_class_type;
3380 nf_i = 0;
3381 while (True) {
3382 DW_AT attr = (DW_AT) abbv->nf[nf_i].at_name;
3383 DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3384 nf_i++;
3385 if (attr == 0 && form == 0) break;
3386 get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3387 if (attr == DW_AT_name && cts.szB < 0) {
3388 typeE.Te.TyStOrUn.name
3389 = ML_(cur_read_strdup)( cts.u.cur,
3390 "di.readdwarf3.ptD.struct_type.2" );
3391 }
3392 if (attr == DW_AT_byte_size && cts.szB >= 0) {
3393 typeE.Te.TyStOrUn.szB = cts.u.val;
3394 have_szB = True;
3395 }
3396 if (attr == DW_AT_declaration && cts.szB > 0 && cts.u.val > 0) {
3397 is_decl = True;
3398 }
3399 if (attr == DW_AT_specification && cts.szB > 0 && cts.u.val > 0) {
3400 is_spec = True;
3401 }
3402 if (attr == DW_AT_signature && form == DW_FORM_ref_sig8
3403 && cts.szB > 0) {
3404 have_szB = True;
3405 typeE.Te.TyStOrUn.szB = 8;
3406 typeE.Te.TyStOrUn.typeR
3407 = cook_die_using_form( cc, (UWord)cts.u.val, form );
3408 }
3409 }
3410 /* Do we have something that looks sane? */
3411 if (is_decl && (!is_spec)) {
3412 /* It's a DW_AT_declaration. We require the name but
3413 nothing else. */
3414 /* JRS 2012-06-28: following discussion w/ tromey, if the
3415 type doesn't have name, just make one up, and accept it.
3416 It might be referred to by other DIEs, so ignoring it
3417 doesn't seem like a safe option. */
3418 if (typeE.Te.TyStOrUn.name == NULL)
3419 typeE.Te.TyStOrUn.name
3420 = ML_(dinfo_strdup)( "di.readdwarf3.ptD.struct_type.3",
3421 "<anon_struct_type>" );
3422 typeE.Te.TyStOrUn.complete = False;
3423 /* JRS 2009 Aug 10: <possible kludge>? */
3424 /* Push this tyent on the stack, even though it's incomplete.
3425 It appears that gcc-4.4 on Fedora 11 will sometimes create
3426 DW_TAG_member entries for it, and so we need to have a
3427 plausible parent present in order for that to work. See
3428 #200029 comments 8 and 9. */
3429 typestack_push( cc, parser, td3, &typeE, level );
3430 /* </possible kludge> */
3431 goto acquire_Type;
3432 }
3433 if ((!is_decl) /* && (!is_spec) */) {
3434 /* this is the common, ordinary case */
3435 /* The name can be present, or not */
3436 if (!have_szB) {
3437 /* We must know the size.
3438 But in Ada, record with discriminants might have no size.
3439 But in C, VLA in the middle of a struct (gcc extension)
3440 might have no size.
3441 Instead, some GNAT dwarf extensions and/or dwarf entries
3442 allow to calculate the struct size at runtime.
3443 We cannot do that (yet?) so, the temporary kludge is to use
3444 a small size. */
3445 typeE.Te.TyStOrUn.szB = 1;
3446 }
3447 /* On't stack! */
3448 typestack_push( cc, parser, td3, &typeE, level );
3449 goto acquire_Type;
3450 }
3451 else {
3452 /* don't know how to handle any other variants just now */
3453 goto_bad_DIE;
3454 }
3455 }
3456
3457 if (dtag == DW_TAG_member) {
3458 /* Acquire member entries for both DW_TAG_structure_type and
3459 DW_TAG_union_type. They differ minorly, in that struct
3460 members must have a DW_AT_data_member_location expression
3461 whereas union members must not. */
3462 Bool parent_is_struct;
3463 VG_(memset)( &fieldE, 0, sizeof(fieldE) );
3464 fieldE.cuOff = posn;
3465 fieldE.tag = Te_Field;
3466 fieldE.Te.Field.typeR = D3_INVALID_CUOFF;
3467 nf_i = 0;
3468 while (True) {
3469 DW_AT attr = (DW_AT) abbv->nf[nf_i].at_name;
3470 DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3471 nf_i++;
3472 if (attr == 0 && form == 0) break;
3473 get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3474 if (attr == DW_AT_name && cts.szB < 0) {
3475 fieldE.Te.Field.name
3476 = ML_(cur_read_strdup)( cts.u.cur,
3477 "di.readdwarf3.ptD.member.1" );
3478 }
3479 if (attr == DW_AT_type && cts.szB > 0) {
3480 fieldE.Te.Field.typeR
3481 = cook_die_using_form( cc, (UWord)cts.u.val, form );
3482 }
3483 /* There are 2 different cases for DW_AT_data_member_location.
3484 If it is a constant class attribute, it contains byte offset
3485 from the beginning of the containing entity.
3486 Otherwise it is a location expression. */
3487 if (attr == DW_AT_data_member_location && cts.szB > 0) {
3488 fieldE.Te.Field.nLoc = -1;
3489 fieldE.Te.Field.pos.offset = cts.u.val;
3490 }
3491 if (attr == DW_AT_data_member_location && cts.szB <= 0) {
3492 fieldE.Te.Field.nLoc = (UWord)(-cts.szB);
3493 fieldE.Te.Field.pos.loc
3494 = ML_(cur_read_memdup)( cts.u.cur,
3495 (SizeT)fieldE.Te.Field.nLoc,
3496 "di.readdwarf3.ptD.member.2" );
3497 }
3498 }
3499 /* Do we have a plausible parent? */
3500 if (typestack_is_empty(parser)) goto_bad_DIE;
3501 vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
3502 vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
3503 if (level != parser->qlevel[parser->sp]+1) goto_bad_DIE;
3504 if (parser->qparentE[parser->sp].tag != Te_TyStOrUn) goto_bad_DIE;
3505 /* Do we have something that looks sane? If this a member of a
3506 struct, we must have a location expression; but if a member
3507 of a union that is irrelevant (D3 spec sec 5.6.6). We ought
3508 to reject in the latter case, but some compilers have been
3509 observed to emit constant-zero expressions. So just ignore
3510 them. */
3511 parent_is_struct
3512 = parser->qparentE[parser->sp].Te.TyStOrUn.isStruct;
3513 if (!fieldE.Te.Field.name)
3514 fieldE.Te.Field.name
3515 = ML_(dinfo_strdup)( "di.readdwarf3.ptD.member.3",
3516 "<anon_field>" );
3517 if (fieldE.Te.Field.typeR == D3_INVALID_CUOFF)
3518 goto_bad_DIE;
3519 if (fieldE.Te.Field.nLoc) {
3520 if (!parent_is_struct) {
3521 /* If this is a union type, pretend we haven't seen the data
3522 member location expression, as it is by definition
3523 redundant (it must be zero). */
3524 if (fieldE.Te.Field.nLoc > 0)
3525 ML_(dinfo_free)(fieldE.Te.Field.pos.loc);
3526 fieldE.Te.Field.pos.loc = NULL;
3527 fieldE.Te.Field.nLoc = 0;
3528 }
3529 /* Record this child in the parent */
3530 fieldE.Te.Field.isStruct = parent_is_struct;
3531 vg_assert(parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs);
3532 VG_(addToXA)( parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs,
3533 &posn );
3534 /* And record the child itself */
3535 goto acquire_Field;
3536 } else {
3537 /* Member with no location - this can happen with static
3538 const members in C++ code which are compile time constants
3539 that do no exist in the class. They're not of any interest
3540 to us so we ignore them. */
3541 ML_(TyEnt__make_EMPTY)(&fieldE);
3542 }
3543 }
3544
3545 if (dtag == DW_TAG_array_type) {
3546 VG_(memset)(&typeE, 0, sizeof(typeE));
3547 typeE.cuOff = posn;
3548 typeE.tag = Te_TyArray;
3549 typeE.Te.TyArray.typeR = D3_INVALID_CUOFF;
3550 typeE.Te.TyArray.boundRs
3551 = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.array_type.1",
3552 ML_(dinfo_free),
3553 sizeof(UWord) );
3554 nf_i = 0;
3555 while (True) {
3556 DW_AT attr = (DW_AT) abbv->nf[nf_i].at_name;
3557 DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3558 nf_i++;
3559 if (attr == 0 && form == 0) break;
3560 get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3561 if (attr == DW_AT_type && cts.szB > 0) {
3562 typeE.Te.TyArray.typeR
3563 = cook_die_using_form( cc, (UWord)cts.u.val, form );
3564 }
3565 }
3566 if (typeE.Te.TyArray.typeR == D3_INVALID_CUOFF)
3567 goto_bad_DIE;
3568 /* On't stack! */
3569 typestack_push( cc, parser, td3, &typeE, level );
3570 goto acquire_Type;
3571 }
3572
3573 /* this is a subrange type defining the bounds of an array. */
3574 if (dtag == DW_TAG_subrange_type
3575 && subrange_type_denotes_array_bounds(parser, dtag)) {
3576 Bool have_lower = False;
3577 Bool have_upper = False;
3578 Bool have_count = False;
3579 Long lower = 0;
3580 Long upper = 0;
3581
3582 switch (parser->language) {
3583 case 'C': have_lower = True; lower = 0; break;
3584 case 'F': have_lower = True; lower = 1; break;
3585 case '?': have_lower = False; break;
3586 case 'A': have_lower = False; break;
3587 default: vg_assert(0); /* assured us by handling of
3588 DW_TAG_compile_unit in this fn */
3589 }
3590
3591 VG_(memset)( &boundE, 0, sizeof(boundE) );
3592 boundE.cuOff = D3_INVALID_CUOFF;
3593 boundE.tag = Te_Bound;
3594 nf_i = 0;
3595 while (True) {
3596 DW_AT attr = (DW_AT) abbv->nf[nf_i].at_name;
3597 DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3598 nf_i++;
3599 if (attr == 0 && form == 0) break;
3600 get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3601 if (attr == DW_AT_lower_bound && cts.szB > 0) {
3602 lower = (Long)cts.u.val;
3603 have_lower = True;
3604 }
3605 if (attr == DW_AT_upper_bound && cts.szB > 0) {
3606 upper = (Long)cts.u.val;
3607 have_upper = True;
3608 }
3609 if (attr == DW_AT_count && cts.szB > 0) {
3610 /*count = (Long)cts.u.val;*/
3611 have_count = True;
3612 }
3613 }
3614 /* FIXME: potentially skip the rest if no parent present, since
3615 it could be the case that this subrange type is free-standing
3616 (not being used to describe the bounds of a containing array
3617 type) */
3618 /* Do we have a plausible parent? */
3619 if (typestack_is_empty(parser)) goto_bad_DIE;
3620 vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
3621 vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
3622 if (level != parser->qlevel[parser->sp]+1) goto_bad_DIE;
3623 if (parser->qparentE[parser->sp].tag != Te_TyArray) goto_bad_DIE;
3624
3625 /* Figure out if we have a definite range or not */
3626 if (have_lower && have_upper && (!have_count)) {
3627 boundE.Te.Bound.knownL = True;
3628 boundE.Te.Bound.knownU = True;
3629 boundE.Te.Bound.boundL = lower;
3630 boundE.Te.Bound.boundU = upper;
3631 }
3632 else if (have_lower && (!have_upper) && (!have_count)) {
3633 boundE.Te.Bound.knownL = True;
3634 boundE.Te.Bound.knownU = False;
3635 boundE.Te.Bound.boundL = lower;
3636 boundE.Te.Bound.boundU = 0;
3637 }
3638 else if ((!have_lower) && have_upper && (!have_count)) {
3639 boundE.Te.Bound.knownL = False;
3640 boundE.Te.Bound.knownU = True;
3641 boundE.Te.Bound.boundL = 0;
3642 boundE.Te.Bound.boundU = upper;
3643 }
3644 else if ((!have_lower) && (!have_upper) && (!have_count)) {
3645 boundE.Te.Bound.knownL = False;
3646 boundE.Te.Bound.knownU = False;
3647 boundE.Te.Bound.boundL = 0;
3648 boundE.Te.Bound.boundU = 0;
3649 } else {
3650 /* FIXME: handle more cases */
3651 goto_bad_DIE;
3652 }
3653
3654 /* Record this bound in the parent */
3655 boundE.cuOff = posn;
3656 vg_assert(parser->qparentE[parser->sp].Te.TyArray.boundRs);
3657 VG_(addToXA)( parser->qparentE[parser->sp].Te.TyArray.boundRs,
3658 &boundE.cuOff );
3659 /* And record the child itself */
3660 goto acquire_Bound;
3661 }
3662
3663 /* typedef or subrange_type other than array bounds. */
3664 if (dtag == DW_TAG_typedef
3665 || (dtag == DW_TAG_subrange_type
3666 && !subrange_type_denotes_array_bounds(parser, dtag))) {
3667 /* subrange_type other than array bound is only for Ada. */
3668 vg_assert (dtag == DW_TAG_typedef || parser->language == 'A');
3669 /* We can pick up a new typedef/subrange_type any time. */
3670 VG_(memset)(&typeE, 0, sizeof(typeE));
3671 typeE.cuOff = D3_INVALID_CUOFF;
3672 typeE.tag = Te_TyTyDef;
3673 typeE.Te.TyTyDef.name = NULL;
3674 typeE.Te.TyTyDef.typeR = D3_INVALID_CUOFF;
3675 nf_i = 0;
3676 while (True) {
3677 DW_AT attr = (DW_AT) abbv->nf[nf_i].at_name;
3678 DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3679 nf_i++;
3680 if (attr == 0 && form == 0) break;
3681 get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3682 if (attr == DW_AT_name && cts.szB < 0) {
3683 typeE.Te.TyTyDef.name
3684 = ML_(cur_read_strdup)( cts.u.cur,
3685 "di.readdwarf3.ptD.typedef.1" );
3686 }
3687 if (attr == DW_AT_type && cts.szB > 0) {
3688 typeE.Te.TyTyDef.typeR
3689 = cook_die_using_form( cc, (UWord)cts.u.val, form );
3690 }
3691 }
3692 /* Do we have something that looks sane?
3693 gcc gnat Ada generates minimal typedef
3694 such as the below
3695 <6><91cc>: DW_TAG_typedef
3696 DW_AT_abstract_ori: <9066>
3697 g++ for OMP can generate artificial functions that have
3698 parameters that refer to pointers to unnamed typedefs.
3699 See https://bugs.kde.org/show_bug.cgi?id=273475
3700 So we cannot require a name for a DW_TAG_typedef.
3701 */
3702 goto acquire_Type;
3703 }
3704
3705 if (dtag == DW_TAG_subroutine_type) {
3706 /* function type? just record that one fact and ask no
3707 further questions. */
3708 VG_(memset)(&typeE, 0, sizeof(typeE));
3709 typeE.cuOff = D3_INVALID_CUOFF;
3710 typeE.tag = Te_TyFn;
3711 goto acquire_Type;
3712 }
3713
3714 if (dtag == DW_TAG_volatile_type || dtag == DW_TAG_const_type
3715 || dtag == DW_TAG_restrict_type) {
3716 Int have_ty = 0;
3717 VG_(memset)(&typeE, 0, sizeof(typeE));
3718 typeE.cuOff = D3_INVALID_CUOFF;
3719 typeE.tag = Te_TyQual;
3720 typeE.Te.TyQual.qual
3721 = (dtag == DW_TAG_volatile_type ? 'V'
3722 : (dtag == DW_TAG_const_type ? 'C' : 'R'));
3723 /* target type defaults to 'void' */
3724 typeE.Te.TyQual.typeR = D3_FAKEVOID_CUOFF;
3725 nf_i = 0;
3726 while (True) {
3727 DW_AT attr = (DW_AT) abbv->nf[nf_i].at_name;
3728 DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3729 nf_i++;
3730 if (attr == 0 && form == 0) break;
3731 get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3732 if (attr == DW_AT_type && cts.szB > 0) {
3733 typeE.Te.TyQual.typeR
3734 = cook_die_using_form( cc, (UWord)cts.u.val, form );
3735 have_ty++;
3736 }
3737 }
3738 /* gcc sometimes generates DW_TAG_const/volatile_type without
3739 DW_AT_type and GDB appears to interpret the type as 'const
3740 void' (resp. 'volatile void'). So just allow it .. */
3741 if (have_ty == 1 || have_ty == 0)
3742 goto acquire_Type;
3743 else
3744 goto_bad_DIE;
3745 }
3746
3747 /*
3748 * Treat DW_TAG_unspecified_type as type void. An example of DW_TAG_unspecified_type:
3749 *
3750 * $ readelf --debug-dump /usr/lib/debug/usr/lib/libstdc++.so.6.0.16.debug
3751 * <1><10d4>: Abbrev Number: 53 (DW_TAG_unspecified_type)
3752 * <10d5> DW_AT_name : (indirect string, offset: 0xdb7): decltype(nullptr)
3753 */
3754 if (dtag == DW_TAG_unspecified_type) {
3755 VG_(memset)(&typeE, 0, sizeof(typeE));
3756 typeE.cuOff = D3_INVALID_CUOFF;
3757 typeE.tag = Te_TyQual;
3758 typeE.Te.TyQual.typeR = D3_FAKEVOID_CUOFF;
3759 goto acquire_Type;
3760 }
3761
3762 /* else ignore this DIE */
3763 return;
3764 /*NOTREACHED*/
3765
3766 acquire_Type:
3767 if (0) VG_(printf)("YYYY Acquire Type\n");
3768 vg_assert(ML_(TyEnt__is_type)( &typeE ));
3769 vg_assert(typeE.cuOff == D3_INVALID_CUOFF || typeE.cuOff == posn);
3770 typeE.cuOff = posn;
3771 VG_(addToXA)( tyents, &typeE );
3772 return;
3773 /*NOTREACHED*/
3774
3775 acquire_Atom:
3776 if (0) VG_(printf)("YYYY Acquire Atom\n");
3777 vg_assert(atomE.tag == Te_Atom);
3778 vg_assert(atomE.cuOff == D3_INVALID_CUOFF || atomE.cuOff == posn);
3779 atomE.cuOff = posn;
3780 VG_(addToXA)( tyents, &atomE );
3781 return;
3782 /*NOTREACHED*/
3783
3784 acquire_Field:
3785 /* For union members, Expr should be absent */
3786 if (0) VG_(printf)("YYYY Acquire Field\n");
3787 vg_assert(fieldE.tag == Te_Field);
3788 vg_assert(fieldE.Te.Field.nLoc <= 0 || fieldE.Te.Field.pos.loc != NULL);
3789 vg_assert(fieldE.Te.Field.nLoc != 0 || fieldE.Te.Field.pos.loc == NULL);
3790 if (fieldE.Te.Field.isStruct) {
3791 vg_assert(fieldE.Te.Field.nLoc != 0);
3792 } else {
3793 vg_assert(fieldE.Te.Field.nLoc == 0);
3794 }
3795 vg_assert(fieldE.cuOff == D3_INVALID_CUOFF || fieldE.cuOff == posn);
3796 fieldE.cuOff = posn;
3797 VG_(addToXA)( tyents, &fieldE );
3798 return;
3799 /*NOTREACHED*/
3800
3801 acquire_Bound:
3802 if (0) VG_(printf)("YYYY Acquire Bound\n");
3803 vg_assert(boundE.tag == Te_Bound);
3804 vg_assert(boundE.cuOff == D3_INVALID_CUOFF || boundE.cuOff == posn);
3805 boundE.cuOff = posn;
3806 VG_(addToXA)( tyents, &boundE );
3807 return;
3808 /*NOTREACHED*/
3809
3810 bad_DIE:
3811 dump_bad_die_and_barf("parse_type_DIE", dtag, posn, level,
3812 c_die, saved_die_c_offset,
3813 abbv,
3814 cc);
3815 /*NOTREACHED*/
3816 }
3817
3818
3819 /*------------------------------------------------------------*/
3820 /*--- ---*/
3821 /*--- Compression of type DIE information ---*/
3822 /*--- ---*/
3823 /*------------------------------------------------------------*/
3824
chase_cuOff(Bool * changed,const XArray * ents,TyEntIndexCache * ents_cache,UWord cuOff)3825 static UWord chase_cuOff ( Bool* changed,
3826 const XArray* /* of TyEnt */ ents,
3827 TyEntIndexCache* ents_cache,
3828 UWord cuOff )
3829 {
3830 TyEnt* ent;
3831 ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, cuOff );
3832
3833 if (!ent) {
3834 VG_(printf)("chase_cuOff: no entry for 0x%05lx\n", cuOff);
3835 *changed = False;
3836 return cuOff;
3837 }
3838
3839 vg_assert(ent->tag != Te_EMPTY);
3840 if (ent->tag != Te_INDIR) {
3841 *changed = False;
3842 return cuOff;
3843 } else {
3844 vg_assert(ent->Te.INDIR.indR < cuOff);
3845 *changed = True;
3846 return ent->Te.INDIR.indR;
3847 }
3848 }
3849
3850 static
chase_cuOffs_in_XArray(Bool * changed,const XArray * ents,TyEntIndexCache * ents_cache,XArray * cuOffs)3851 void chase_cuOffs_in_XArray ( Bool* changed,
3852 const XArray* /* of TyEnt */ ents,
3853 TyEntIndexCache* ents_cache,
3854 /*MOD*/XArray* /* of UWord */ cuOffs )
3855 {
3856 Bool b2 = False;
3857 Word i, n = VG_(sizeXA)( cuOffs );
3858 for (i = 0; i < n; i++) {
3859 Bool b = False;
3860 UWord* p = VG_(indexXA)( cuOffs, i );
3861 *p = chase_cuOff( &b, ents, ents_cache, *p );
3862 if (b)
3863 b2 = True;
3864 }
3865 *changed = b2;
3866 }
3867
TyEnt__subst_R_fields(const XArray * ents,TyEntIndexCache * ents_cache,TyEnt * te)3868 static Bool TyEnt__subst_R_fields ( const XArray* /* of TyEnt */ ents,
3869 TyEntIndexCache* ents_cache,
3870 /*MOD*/TyEnt* te )
3871 {
3872 Bool b, changed = False;
3873 switch (te->tag) {
3874 case Te_EMPTY:
3875 break;
3876 case Te_INDIR:
3877 te->Te.INDIR.indR
3878 = chase_cuOff( &b, ents, ents_cache, te->Te.INDIR.indR );
3879 if (b) changed = True;
3880 break;
3881 case Te_UNKNOWN:
3882 break;
3883 case Te_Atom:
3884 break;
3885 case Te_Field:
3886 te->Te.Field.typeR
3887 = chase_cuOff( &b, ents, ents_cache, te->Te.Field.typeR );
3888 if (b) changed = True;
3889 break;
3890 case Te_Bound:
3891 break;
3892 case Te_TyBase:
3893 break;
3894 case Te_TyPtr:
3895 case Te_TyRef:
3896 case Te_TyPtrMbr:
3897 case Te_TyRvalRef:
3898 te->Te.TyPorR.typeR
3899 = chase_cuOff( &b, ents, ents_cache, te->Te.TyPorR.typeR );
3900 if (b) changed = True;
3901 break;
3902 case Te_TyTyDef:
3903 te->Te.TyTyDef.typeR
3904 = chase_cuOff( &b, ents, ents_cache, te->Te.TyTyDef.typeR );
3905 if (b) changed = True;
3906 break;
3907 case Te_TyStOrUn:
3908 chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyStOrUn.fieldRs );
3909 if (b) changed = True;
3910 break;
3911 case Te_TyEnum:
3912 chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyEnum.atomRs );
3913 if (b) changed = True;
3914 break;
3915 case Te_TyArray:
3916 te->Te.TyArray.typeR
3917 = chase_cuOff( &b, ents, ents_cache, te->Te.TyArray.typeR );
3918 if (b) changed = True;
3919 chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyArray.boundRs );
3920 if (b) changed = True;
3921 break;
3922 case Te_TyFn:
3923 break;
3924 case Te_TyQual:
3925 te->Te.TyQual.typeR
3926 = chase_cuOff( &b, ents, ents_cache, te->Te.TyQual.typeR );
3927 if (b) changed = True;
3928 break;
3929 case Te_TyVoid:
3930 break;
3931 default:
3932 ML_(pp_TyEnt)(te);
3933 vg_assert(0);
3934 }
3935 return changed;
3936 }
3937
3938 /* Make a pass over 'ents'. For each tyent, inspect the target of any
3939 'R' or 'Rs' fields (those which refer to other tyents), and replace
3940 any which point to INDIR nodes with the target of the indirection
3941 (which should not itself be an indirection). In summary, this
3942 routine shorts out all references to indirection nodes. */
3943 static
dedup_types_substitution_pass(XArray * ents,TyEntIndexCache * ents_cache)3944 Word dedup_types_substitution_pass ( /*MOD*/XArray* /* of TyEnt */ ents,
3945 TyEntIndexCache* ents_cache )
3946 {
3947 Word i, n, nChanged = 0;
3948 Bool b;
3949 n = VG_(sizeXA)( ents );
3950 for (i = 0; i < n; i++) {
3951 TyEnt* ent = VG_(indexXA)( ents, i );
3952 vg_assert(ent->tag != Te_EMPTY);
3953 /* We have to substitute everything, even indirections, so as to
3954 ensure that chains of indirections don't build up. */
3955 b = TyEnt__subst_R_fields( ents, ents_cache, ent );
3956 if (b)
3957 nChanged++;
3958 }
3959
3960 return nChanged;
3961 }
3962
3963
3964 /* Make a pass over 'ents', building a dictionary of TyEnts as we go.
3965 Look up each new tyent in the dictionary in turn. If it is already
3966 in the dictionary, replace this tyent with an indirection to the
3967 existing one, and delete any malloc'd stuff hanging off this one.
3968 In summary, this routine commons up all tyents that are identical
3969 as defined by TyEnt__cmp_by_all_except_cuOff. */
3970 static
dedup_types_commoning_pass(XArray * ents)3971 Word dedup_types_commoning_pass ( /*MOD*/XArray* /* of TyEnt */ ents )
3972 {
3973 Word n, i, nDeleted;
3974 WordFM* dict; /* TyEnt* -> void */
3975 TyEnt* ent;
3976 UWord keyW, valW;
3977
3978 dict = VG_(newFM)(
3979 ML_(dinfo_zalloc), "di.readdwarf3.dtcp.1",
3980 ML_(dinfo_free),
3981 (Word(*)(UWord,UWord)) ML_(TyEnt__cmp_by_all_except_cuOff)
3982 );
3983
3984 nDeleted = 0;
3985 n = VG_(sizeXA)( ents );
3986 for (i = 0; i < n; i++) {
3987 ent = VG_(indexXA)( ents, i );
3988 vg_assert(ent->tag != Te_EMPTY);
3989
3990 /* Ignore indirections, although check that they are
3991 not forming a cycle. */
3992 if (ent->tag == Te_INDIR) {
3993 vg_assert(ent->Te.INDIR.indR < ent->cuOff);
3994 continue;
3995 }
3996
3997 keyW = valW = 0;
3998 if (VG_(lookupFM)( dict, &keyW, &valW, (UWord)ent )) {
3999 /* it's already in the dictionary. */
4000 TyEnt* old = (TyEnt*)keyW;
4001 vg_assert(valW == 0);
4002 vg_assert(old != ent);
4003 vg_assert(old->tag != Te_INDIR);
4004 /* since we are traversing the array in increasing order of
4005 cuOff: */
4006 vg_assert(old->cuOff < ent->cuOff);
4007 /* So anyway, dump this entry and replace it with an
4008 indirection to the one in the dictionary. Note that the
4009 assertion above guarantees that we cannot create cycles of
4010 indirections, since we are always creating an indirection
4011 to a tyent with a cuOff lower than this one. */
4012 ML_(TyEnt__make_EMPTY)( ent );
4013 ent->tag = Te_INDIR;
4014 ent->Te.INDIR.indR = old->cuOff;
4015 nDeleted++;
4016 } else {
4017 /* not in dictionary; add it and keep going. */
4018 VG_(addToFM)( dict, (UWord)ent, 0 );
4019 }
4020 }
4021
4022 VG_(deleteFM)( dict, NULL, NULL );
4023
4024 return nDeleted;
4025 }
4026
4027
4028 static
dedup_types(Bool td3,XArray * ents,TyEntIndexCache * ents_cache)4029 void dedup_types ( Bool td3,
4030 /*MOD*/XArray* /* of TyEnt */ ents,
4031 TyEntIndexCache* ents_cache )
4032 {
4033 Word m, n, i, nDel, nSubst, nThresh;
4034 if (0) td3 = True;
4035
4036 n = VG_(sizeXA)( ents );
4037
4038 /* If a commoning pass and a substitution pass both make fewer than
4039 this many changes, just stop. It's pointless to burn up CPU
4040 time trying to compress the last 1% or so out of the array. */
4041 nThresh = n / 200;
4042
4043 /* First we must sort .ents by its .cuOff fields, so we
4044 can index into it. */
4045 VG_(setCmpFnXA)( ents, (XACmpFn_t) ML_(TyEnt__cmp_by_cuOff_only) );
4046 VG_(sortXA)( ents );
4047
4048 /* Now repeatedly do commoning and substitution passes over
4049 the array, until there are no more changes. */
4050 do {
4051 nDel = dedup_types_commoning_pass ( ents );
4052 nSubst = dedup_types_substitution_pass ( ents, ents_cache );
4053 vg_assert(nDel >= 0 && nSubst >= 0);
4054 TRACE_D3(" %ld deletions, %ld substitutions\n", nDel, nSubst);
4055 } while (nDel > nThresh || nSubst > nThresh);
4056
4057 /* Sanity check: all INDIR nodes should point at a non-INDIR thing.
4058 In fact this should be true at the end of every loop iteration
4059 above (a commoning pass followed by a substitution pass), but
4060 checking it on every iteration is excessively expensive. Note,
4061 this loop also computes 'm' for the stats printing below it. */
4062 m = 0;
4063 n = VG_(sizeXA)( ents );
4064 for (i = 0; i < n; i++) {
4065 TyEnt *ent, *ind;
4066 ent = VG_(indexXA)( ents, i );
4067 if (ent->tag != Te_INDIR) continue;
4068 m++;
4069 ind = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
4070 ent->Te.INDIR.indR );
4071 vg_assert(ind);
4072 vg_assert(ind->tag != Te_INDIR);
4073 }
4074
4075 TRACE_D3("Overall: %ld before, %ld after\n", n, n-m);
4076 }
4077
4078
4079 /*------------------------------------------------------------*/
4080 /*--- ---*/
4081 /*--- Resolution of references to type DIEs ---*/
4082 /*--- ---*/
4083 /*------------------------------------------------------------*/
4084
4085 /* Make a pass through the (temporary) variables array. Examine the
4086 type of each variable, check is it found, and chase any Te_INDIRs.
4087 Postcondition is: each variable has a typeR field that refers to a
4088 valid type in tyents, or a Te_UNKNOWN, and is certainly guaranteed
4089 not to refer to a Te_INDIR. (This is so that we can throw all the
4090 Te_INDIRs away later). */
4091
4092 __attribute__((noinline))
resolve_variable_types(void (* barf)(const HChar *),XArray * ents,TyEntIndexCache * ents_cache,XArray * vars)4093 static void resolve_variable_types (
4094 void (*barf)( const HChar* ) __attribute__((noreturn)),
4095 /*R-O*/XArray* /* of TyEnt */ ents,
4096 /*MOD*/TyEntIndexCache* ents_cache,
4097 /*MOD*/XArray* /* of TempVar* */ vars
4098 )
4099 {
4100 Word i, n;
4101 n = VG_(sizeXA)( vars );
4102 for (i = 0; i < n; i++) {
4103 TempVar* var = *(TempVar**)VG_(indexXA)( vars, i );
4104 /* This is the stated type of the variable. But it might be
4105 an indirection, so be careful. */
4106 TyEnt* ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
4107 var->typeR );
4108 if (ent && ent->tag == Te_INDIR) {
4109 ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
4110 ent->Te.INDIR.indR );
4111 vg_assert(ent);
4112 vg_assert(ent->tag != Te_INDIR);
4113 }
4114
4115 /* Deal first with "normal" cases */
4116 if (ent && ML_(TyEnt__is_type)(ent)) {
4117 var->typeR = ent->cuOff;
4118 continue;
4119 }
4120
4121 /* If there's no ent, it probably we did not manage to read a
4122 type at the cuOffset which is stated as being this variable's
4123 type. Maybe a deficiency in parse_type_DIE. Complain. */
4124 if (ent == NULL) {
4125 VG_(printf)("\n: Invalid cuOff = 0x%05lx\n", var->typeR );
4126 barf("resolve_variable_types: "
4127 "cuOff does not refer to a known type");
4128 }
4129 vg_assert(ent);
4130 /* If ent has any other tag, something bad happened, along the
4131 lines of var->typeR not referring to a type at all. */
4132 vg_assert(ent->tag == Te_UNKNOWN);
4133 /* Just accept it; the type will be useless, but at least keep
4134 going. */
4135 var->typeR = ent->cuOff;
4136 }
4137 }
4138
4139
4140 /*------------------------------------------------------------*/
4141 /*--- ---*/
4142 /*--- Parsing of Compilation Units ---*/
4143 /*--- ---*/
4144 /*------------------------------------------------------------*/
4145
cmp_TempVar_by_dioff(const void * v1,const void * v2)4146 static Int cmp_TempVar_by_dioff ( const void* v1, const void* v2 ) {
4147 const TempVar* t1 = *(const TempVar *const *)v1;
4148 const TempVar* t2 = *(const TempVar *const *)v2;
4149 if (t1->dioff < t2->dioff) return -1;
4150 if (t1->dioff > t2->dioff) return 1;
4151 return 0;
4152 }
4153
read_DIE(WordFM * rangestree,XArray * tyents,XArray * tempvars,XArray * gexprs,D3TypeParser * typarser,D3VarParser * varparser,D3InlParser * inlparser,Cursor * c,Bool td3,CUConst * cc,Int level)4154 static void read_DIE (
4155 /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
4156 /*MOD*/XArray* /* of TyEnt */ tyents,
4157 /*MOD*/XArray* /* of TempVar* */ tempvars,
4158 /*MOD*/XArray* /* of GExpr* */ gexprs,
4159 /*MOD*/D3TypeParser* typarser,
4160 /*MOD*/D3VarParser* varparser,
4161 /*MOD*/D3InlParser* inlparser,
4162 Cursor* c, Bool td3, CUConst* cc, Int level
4163 )
4164 {
4165 const g_abbv *abbv;
4166 ULong atag, abbv_code;
4167 UWord posn;
4168 UInt has_children;
4169 UWord start_die_c_offset;
4170 UWord after_die_c_offset;
4171 // If the DIE we will parse has a sibling and the parser(s) are
4172 // all indicating that parse_children is not necessary, then
4173 // we will skip the children by jumping to the sibling of this DIE
4174 // (if it has a sibling).
4175 UWord sibling = 0;
4176 Bool parse_children = False;
4177
4178 /* --- Deal with this DIE --- */
4179 posn = cook_die( cc, get_position_of_Cursor( c ) );
4180 abbv_code = get_ULEB128( c );
4181 abbv = get_abbv(cc, abbv_code);
4182 atag = abbv->atag;
4183
4184 if (TD3) {
4185 TRACE_D3("\n");
4186 trace_DIE ((DW_TAG)atag, posn, level,
4187 get_position_of_Cursor( c ), abbv, cc);
4188 }
4189
4190 if (atag == 0)
4191 cc->barf("read_DIE: invalid zero tag on DIE");
4192
4193 has_children = abbv->has_children;
4194 if (has_children != DW_children_no && has_children != DW_children_yes)
4195 cc->barf("read_DIE: invalid has_children value");
4196
4197 /* We're set up to look at the fields of this DIE. Hand it off to
4198 any parser(s) that want to see it. Since they will in general
4199 advance the DIE cursor, remember the current settings so that we
4200 can then back up. . */
4201 start_die_c_offset = get_position_of_Cursor( c );
4202 after_die_c_offset = 0; // set to c position if a parser has read the DIE.
4203
4204 if (VG_(clo_read_var_info)) {
4205 parse_type_DIE( tyents,
4206 typarser,
4207 (DW_TAG)atag,
4208 posn,
4209 level,
4210 c, /* DIE cursor */
4211 abbv, /* abbrev */
4212 cc,
4213 td3 );
4214 if (get_position_of_Cursor( c ) != start_die_c_offset) {
4215 after_die_c_offset = get_position_of_Cursor( c );
4216 set_position_of_Cursor( c, start_die_c_offset );
4217 }
4218
4219 parse_var_DIE( rangestree,
4220 tempvars,
4221 gexprs,
4222 varparser,
4223 (DW_TAG)atag,
4224 posn,
4225 level,
4226 c, /* DIE cursor */
4227 abbv, /* abbrev */
4228 cc,
4229 td3 );
4230 if (get_position_of_Cursor( c ) != start_die_c_offset) {
4231 after_die_c_offset = get_position_of_Cursor( c );
4232 set_position_of_Cursor( c, start_die_c_offset );
4233 }
4234
4235 parse_children = True;
4236 // type and var parsers do not have logic to skip childrens and establish
4237 // the value of sibling.
4238 }
4239
4240 if (VG_(clo_read_inline_info)) {
4241 inlparser->sibling = 0;
4242 parse_children =
4243 parse_inl_DIE( inlparser,
4244 (DW_TAG)atag,
4245 posn,
4246 level,
4247 c, /* DIE cursor */
4248 abbv, /* abbrev */
4249 cc,
4250 td3 )
4251 || parse_children;
4252 if (get_position_of_Cursor( c ) != start_die_c_offset) {
4253 after_die_c_offset = get_position_of_Cursor( c );
4254 // Last parser, no need to reset the cursor to start_die_c_offset.
4255 }
4256 if (sibling == 0)
4257 sibling = inlparser->sibling;
4258 vg_assert (inlparser->sibling == 0 || inlparser->sibling == sibling);
4259 }
4260
4261 if (after_die_c_offset > 0) {
4262 // DIE was read by a parser above, so we know where the DIE ends.
4263 set_position_of_Cursor( c, after_die_c_offset );
4264 } else {
4265 /* No parser has parsed this DIE. So, we need to skip the DIE,
4266 in order to read the next DIE.
4267 At the same time, establish sibling value if the DIE has one. */
4268 TRACE_D3(" uninteresting DIE -> skipping ...\n");
4269 skip_DIE (&sibling, c, abbv, cc);
4270 }
4271
4272 /* --- Now recurse into its children, if any
4273 and the parsing of the children is requested by a parser --- */
4274 if (has_children == DW_children_yes) {
4275 if (parse_children || sibling == 0) {
4276 if (0) TRACE_D3("BEGIN children of level %d\n", level);
4277 while (True) {
4278 atag = peek_ULEB128( c );
4279 if (atag == 0) break;
4280 read_DIE( rangestree, tyents, tempvars, gexprs,
4281 typarser, varparser, inlparser,
4282 c, td3, cc, level+1 );
4283 }
4284 /* Now we need to eat the terminating zero */
4285 atag = get_ULEB128( c );
4286 vg_assert(atag == 0);
4287 if (0) TRACE_D3("END children of level %d\n", level);
4288 } else {
4289 // We can skip the childrens, by jumping to the sibling
4290 TRACE_D3(" SKIPPING DIE's children,"
4291 "jumping to sibling <%d><%lx>\n",
4292 level, sibling);
4293 set_position_of_Cursor( c, sibling );
4294 }
4295 }
4296
4297 }
4298
trace_debug_loc(const DebugInfo * di,void (* barf)(const HChar *),DiSlice escn_debug_loc)4299 static void trace_debug_loc (const DebugInfo* di,
4300 __attribute__((noreturn)) void (*barf)( const HChar* ),
4301 DiSlice escn_debug_loc)
4302 {
4303 #if 0
4304 /* This doesn't work properly because it assumes all entries are
4305 packed end to end, with no holes. But that doesn't always
4306 appear to be the case, so it loses sync. And the D3 spec
4307 doesn't appear to require a no-hole situation either. */
4308 /* Display .debug_loc */
4309 Addr dl_base;
4310 UWord dl_offset;
4311 Cursor loc; /* for showing .debug_loc */
4312 Bool td3 = di->trace_symtab;
4313
4314 TRACE_SYMTAB("\n");
4315 TRACE_SYMTAB("\n------ The contents of .debug_loc ------\n");
4316 TRACE_SYMTAB(" Offset Begin End Expression\n");
4317 if (ML_(sli_is_valid)(escn_debug_loc)) {
4318 init_Cursor( &loc, escn_debug_loc, 0, barf,
4319 "Overrun whilst reading .debug_loc section(1)" );
4320 dl_base = 0;
4321 dl_offset = 0;
4322 while (True) {
4323 UWord w1, w2;
4324 UWord len;
4325 if (is_at_end_Cursor( &loc ))
4326 break;
4327
4328 /* Read a (host-)word pair. This is something of a hack since
4329 the word size to read is really dictated by the ELF file;
4330 however, we assume we're reading a file with the same
4331 word-sizeness as the host. Reasonably enough. */
4332 w1 = get_UWord( &loc );
4333 w2 = get_UWord( &loc );
4334
4335 if (w1 == 0 && w2 == 0) {
4336 /* end of list. reset 'base' */
4337 TRACE_D3(" %08lx <End of list>\n", dl_offset);
4338 dl_base = 0;
4339 dl_offset = get_position_of_Cursor( &loc );
4340 continue;
4341 }
4342
4343 if (w1 == -1UL) {
4344 /* new value for 'base' */
4345 TRACE_D3(" %08lx %16lx %08lx (base address)\n",
4346 dl_offset, w1, w2);
4347 dl_base = w2;
4348 continue;
4349 }
4350
4351 /* else a location expression follows */
4352 TRACE_D3(" %08lx %08lx %08lx ",
4353 dl_offset, w1 + dl_base, w2 + dl_base);
4354 len = (UWord)get_UShort( &loc );
4355 while (len > 0) {
4356 UChar byte = get_UChar( &loc );
4357 TRACE_D3("%02x", (UInt)byte);
4358 len--;
4359 }
4360 TRACE_SYMTAB("\n");
4361 }
4362 }
4363 #endif
4364 }
4365
trace_debug_ranges(const DebugInfo * di,void (* barf)(const HChar *),DiSlice escn_debug_ranges)4366 static void trace_debug_ranges (const DebugInfo* di,
4367 __attribute__((noreturn)) void (*barf)( const HChar* ),
4368 DiSlice escn_debug_ranges)
4369 {
4370 Cursor ranges; /* for showing .debug_ranges */
4371 Addr dr_base;
4372 UWord dr_offset;
4373 Bool td3 = di->trace_symtab;
4374
4375 /* Display .debug_ranges */
4376 TRACE_SYMTAB("\n");
4377 TRACE_SYMTAB("\n------ The contents of .debug_ranges ------\n");
4378 TRACE_SYMTAB(" Offset Begin End\n");
4379 if (ML_(sli_is_valid)(escn_debug_ranges)) {
4380 init_Cursor( &ranges, escn_debug_ranges, 0, barf,
4381 "Overrun whilst reading .debug_ranges section(1)" );
4382 dr_base = 0;
4383 dr_offset = 0;
4384 while (True) {
4385 UWord w1, w2;
4386
4387 if (is_at_end_Cursor( &ranges ))
4388 break;
4389
4390 /* Read a (host-)word pair. This is something of a hack since
4391 the word size to read is really dictated by the ELF file;
4392 however, we assume we're reading a file with the same
4393 word-sizeness as the host. Reasonably enough. */
4394 w1 = get_UWord( &ranges );
4395 w2 = get_UWord( &ranges );
4396
4397 if (w1 == 0 && w2 == 0) {
4398 /* end of list. reset 'base' */
4399 TRACE_D3(" %08lx <End of list>\n", dr_offset);
4400 dr_base = 0;
4401 dr_offset = get_position_of_Cursor( &ranges );
4402 continue;
4403 }
4404
4405 if (w1 == -1UL) {
4406 /* new value for 'base' */
4407 TRACE_D3(" %08lx %16lx %08lx (base address)\n",
4408 dr_offset, w1, w2);
4409 dr_base = w2;
4410 continue;
4411 }
4412
4413 /* else a range [w1+base, w2+base) is denoted */
4414 TRACE_D3(" %08lx %08lx %08lx\n",
4415 dr_offset, w1 + dr_base, w2 + dr_base);
4416 }
4417 }
4418 }
4419
trace_debug_abbrev(const DebugInfo * di,void (* barf)(const HChar *),DiSlice escn_debug_abbv)4420 static void trace_debug_abbrev (const DebugInfo* di,
4421 __attribute__((noreturn)) void (*barf)( const HChar* ),
4422 DiSlice escn_debug_abbv)
4423 {
4424 Cursor abbv; /* for showing .debug_abbrev */
4425 Bool td3 = di->trace_symtab;
4426
4427 /* Display .debug_abbrev */
4428 TRACE_SYMTAB("\n");
4429 TRACE_SYMTAB("\n------ The contents of .debug_abbrev ------\n");
4430 if (ML_(sli_is_valid)(escn_debug_abbv)) {
4431 init_Cursor( &abbv, escn_debug_abbv, 0, barf,
4432 "Overrun whilst reading .debug_abbrev section" );
4433 while (True) {
4434 if (is_at_end_Cursor( &abbv ))
4435 break;
4436 /* Read one abbreviation table */
4437 TRACE_D3(" Number TAG\n");
4438 while (True) {
4439 ULong atag;
4440 UInt has_children;
4441 ULong acode = get_ULEB128( &abbv );
4442 if (acode == 0) break; /* end of the table */
4443 atag = get_ULEB128( &abbv );
4444 has_children = get_UChar( &abbv );
4445 TRACE_D3(" %llu %s [%s]\n",
4446 acode, ML_(pp_DW_TAG)(atag),
4447 ML_(pp_DW_children)(has_children));
4448 while (True) {
4449 ULong at_name = get_ULEB128( &abbv );
4450 ULong at_form = get_ULEB128( &abbv );
4451 if (at_name == 0 && at_form == 0) break;
4452 TRACE_D3(" %-18s %s\n",
4453 ML_(pp_DW_AT)(at_name), ML_(pp_DW_FORM)(at_form));
4454 }
4455 }
4456 }
4457 }
4458 }
4459
4460 static
new_dwarf3_reader_wrk(DebugInfo * di,void (* barf)(const HChar *),DiSlice escn_debug_info,DiSlice escn_debug_types,DiSlice escn_debug_abbv,DiSlice escn_debug_line,DiSlice escn_debug_str,DiSlice escn_debug_ranges,DiSlice escn_debug_loc,DiSlice escn_debug_info_alt,DiSlice escn_debug_abbv_alt,DiSlice escn_debug_line_alt,DiSlice escn_debug_str_alt)4461 void new_dwarf3_reader_wrk (
4462 DebugInfo* di,
4463 __attribute__((noreturn)) void (*barf)( const HChar* ),
4464 DiSlice escn_debug_info, DiSlice escn_debug_types,
4465 DiSlice escn_debug_abbv, DiSlice escn_debug_line,
4466 DiSlice escn_debug_str, DiSlice escn_debug_ranges,
4467 DiSlice escn_debug_loc, DiSlice escn_debug_info_alt,
4468 DiSlice escn_debug_abbv_alt, DiSlice escn_debug_line_alt,
4469 DiSlice escn_debug_str_alt
4470 )
4471 {
4472 XArray* /* of TyEnt */ tyents = NULL;
4473 XArray* /* of TyEnt */ tyents_to_keep = NULL;
4474 XArray* /* of GExpr* */ gexprs = NULL;
4475 XArray* /* of TempVar* */ tempvars = NULL;
4476 WordFM* /* of (XArray* of AddrRange, void) */ rangestree = NULL;
4477 TyEntIndexCache* tyents_cache = NULL;
4478 TyEntIndexCache* tyents_to_keep_cache = NULL;
4479 TempVar *varp, *varp2;
4480 GExpr* gexpr;
4481 Cursor info; /* primary cursor for parsing .debug_info */
4482 D3TypeParser typarser;
4483 D3VarParser varparser;
4484 D3InlParser inlparser;
4485 Word i, j, n;
4486 Bool td3 = di->trace_symtab;
4487 XArray* /* of TempVar* */ dioff_lookup_tab;
4488 Int pass;
4489 VgHashTable *signature_types = NULL;
4490
4491 /* Display/trace various information, if requested. */
4492 if (TD3) {
4493 trace_debug_loc (di, barf, escn_debug_loc);
4494 trace_debug_ranges (di, barf, escn_debug_ranges);
4495 trace_debug_abbrev (di, barf, escn_debug_abbv);
4496 TRACE_SYMTAB("\n");
4497 }
4498
4499 /* Zero out all parsers. Parsers will really be initialised
4500 according to VG_(clo_read_*_info). */
4501 VG_(memset)( &inlparser, 0, sizeof(inlparser) );
4502
4503 if (VG_(clo_read_var_info)) {
4504 /* We'll park the harvested type information in here. Also create
4505 a fake "void" entry with offset D3_FAKEVOID_CUOFF, so we always
4506 have at least one type entry to refer to. D3_FAKEVOID_CUOFF is
4507 huge and presumably will not occur in any valid DWARF3 file --
4508 it would need to have a .debug_info section 4GB long for that to
4509 happen. These type entries end up in the DebugInfo. */
4510 tyents = VG_(newXA)( ML_(dinfo_zalloc),
4511 "di.readdwarf3.ndrw.1 (TyEnt temp array)",
4512 ML_(dinfo_free), sizeof(TyEnt) );
4513 { TyEnt tyent;
4514 VG_(memset)(&tyent, 0, sizeof(tyent));
4515 tyent.tag = Te_TyVoid;
4516 tyent.cuOff = D3_FAKEVOID_CUOFF;
4517 tyent.Te.TyVoid.isFake = True;
4518 VG_(addToXA)( tyents, &tyent );
4519 }
4520 { TyEnt tyent;
4521 VG_(memset)(&tyent, 0, sizeof(tyent));
4522 tyent.tag = Te_UNKNOWN;
4523 tyent.cuOff = D3_INVALID_CUOFF;
4524 VG_(addToXA)( tyents, &tyent );
4525 }
4526
4527 /* This is a tree used to unique-ify the range lists that are
4528 manufactured by parse_var_DIE. References to the keys in the
4529 tree wind up in .rngMany fields in TempVars. We'll need to
4530 delete this tree, and the XArrays attached to it, at the end of
4531 this function. */
4532 rangestree = VG_(newFM)( ML_(dinfo_zalloc),
4533 "di.readdwarf3.ndrw.2 (rangestree)",
4534 ML_(dinfo_free),
4535 (Word(*)(UWord,UWord))cmp__XArrays_of_AddrRange );
4536
4537 /* List of variables we're accumulating. These don't end up in the
4538 DebugInfo; instead their contents are handed to ML_(addVar) and
4539 the list elements are then deleted. */
4540 tempvars = VG_(newXA)( ML_(dinfo_zalloc),
4541 "di.readdwarf3.ndrw.3 (TempVar*s array)",
4542 ML_(dinfo_free),
4543 sizeof(TempVar*) );
4544
4545 /* List of GExprs we're accumulating. These wind up in the
4546 DebugInfo. */
4547 gexprs = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.4",
4548 ML_(dinfo_free), sizeof(GExpr*) );
4549
4550 /* We need a D3TypeParser to keep track of partially constructed
4551 types. It'll be discarded as soon as we've completed the CU,
4552 since the resulting information is tipped in to 'tyents' as it
4553 is generated. */
4554 type_parser_init(&typarser);
4555
4556 var_parser_init(&varparser);
4557
4558 signature_types = VG_(HT_construct) ("signature_types");
4559 }
4560
4561 /* Do an initial pass to scan the .debug_types section, if any, and
4562 fill in the signatured types hash table. This lets us handle
4563 mapping from a type signature to a (cooked) DIE offset directly
4564 in get_Form_contents. */
4565 if (VG_(clo_read_var_info) && ML_(sli_is_valid)(escn_debug_types)) {
4566 init_Cursor( &info, escn_debug_types, 0, barf,
4567 "Overrun whilst reading .debug_types section" );
4568 TRACE_D3("\n------ Collecting signatures from "
4569 ".debug_types section ------\n");
4570
4571 while (True) {
4572 UWord cu_start_offset, cu_offset_now;
4573 CUConst cc;
4574
4575 cu_start_offset = get_position_of_Cursor( &info );
4576 TRACE_D3("\n");
4577 TRACE_D3(" Compilation Unit @ offset 0x%lx:\n", cu_start_offset);
4578 /* parse_CU_header initialises the CU's abbv hash table. */
4579 parse_CU_Header( &cc, td3, &info, escn_debug_abbv, True, False );
4580
4581 /* Needed by cook_die. */
4582 cc.types_cuOff_bias = escn_debug_info.szB;
4583
4584 record_signatured_type( signature_types, cc.type_signature,
4585 cook_die( &cc, cc.type_offset ));
4586
4587 /* Until proven otherwise we assume we don't need the icc9
4588 workaround in this case; see the DIE-reading loop below
4589 for details. */
4590 cu_offset_now = (cu_start_offset + cc.unit_length
4591 + (cc.is_dw64 ? 12 : 4));
4592
4593 clear_CUConst ( &cc);
4594
4595 if (cu_offset_now >= escn_debug_types.szB) {
4596 break;
4597 }
4598
4599 set_position_of_Cursor ( &info, cu_offset_now );
4600 }
4601 }
4602
4603 /* Perform three DIE-reading passes. The first pass reads DIEs from
4604 alternate .debug_info (if any), the second pass reads DIEs from
4605 .debug_info, and the third pass reads DIEs from .debug_types.
4606 Moving the body of this loop into a separate function would
4607 require a large number of arguments to be passed in, so it is
4608 kept inline instead. */
4609 for (pass = 0; pass < 3; ++pass) {
4610 ULong section_size;
4611
4612 if (pass == 0) {
4613 if (!ML_(sli_is_valid)(escn_debug_info_alt))
4614 continue;
4615 /* Now loop over the Compilation Units listed in the alternate
4616 .debug_info section (see D3SPEC sec 7.5) paras 1 and 2.
4617 Each compilation unit contains a Compilation Unit Header
4618 followed by precisely one DW_TAG_compile_unit or
4619 DW_TAG_partial_unit DIE. */
4620 init_Cursor( &info, escn_debug_info_alt, 0, barf,
4621 "Overrun whilst reading alternate .debug_info section" );
4622 section_size = escn_debug_info_alt.szB;
4623
4624 TRACE_D3("\n------ Parsing alternate .debug_info section ------\n");
4625 } else if (pass == 1) {
4626 /* Now loop over the Compilation Units listed in the .debug_info
4627 section (see D3SPEC sec 7.5) paras 1 and 2. Each compilation
4628 unit contains a Compilation Unit Header followed by precisely
4629 one DW_TAG_compile_unit or DW_TAG_partial_unit DIE. */
4630 init_Cursor( &info, escn_debug_info, 0, barf,
4631 "Overrun whilst reading .debug_info section" );
4632 section_size = escn_debug_info.szB;
4633
4634 TRACE_D3("\n------ Parsing .debug_info section ------\n");
4635 } else {
4636 if (!ML_(sli_is_valid)(escn_debug_types))
4637 continue;
4638 if (!VG_(clo_read_var_info))
4639 continue; // Types not needed when only reading inline info.
4640 init_Cursor( &info, escn_debug_types, 0, barf,
4641 "Overrun whilst reading .debug_types section" );
4642 section_size = escn_debug_types.szB;
4643
4644 TRACE_D3("\n------ Parsing .debug_types section ------\n");
4645 }
4646
4647 while (True) {
4648 ULong cu_start_offset, cu_offset_now;
4649 CUConst cc;
4650 /* It may be that the stated size of this CU is larger than the
4651 amount of stuff actually in it. icc9 seems to generate CUs
4652 thusly. We use these variables to figure out if this is
4653 indeed the case, and if so how many bytes we need to skip to
4654 get to the start of the next CU. Not skipping those bytes
4655 causes us to misidentify the start of the next CU, and it all
4656 goes badly wrong after that (not surprisingly). */
4657 UWord cu_size_including_IniLen, cu_amount_used;
4658
4659 /* It seems icc9 finishes the DIE info before debug_info_sz
4660 bytes have been used up. So be flexible, and declare the
4661 sequence complete if there is not enough remaining bytes to
4662 hold even the smallest conceivable CU header. (11 bytes I
4663 reckon). */
4664 /* JRS 23Jan09: I suspect this is no longer necessary now that
4665 the code below contains a 'while (cu_amount_used <
4666 cu_size_including_IniLen ...' style loop, which skips over
4667 any leftover bytes at the end of a CU in the case where the
4668 CU's stated size is larger than its actual size (as
4669 determined by reading all its DIEs). However, for prudence,
4670 I'll leave the following test in place. I can't see that a
4671 CU header can be smaller than 11 bytes, so I don't think
4672 there's any harm possible through the test -- it just adds
4673 robustness. */
4674 Word avail = get_remaining_length_Cursor( &info );
4675 if (avail < 11) {
4676 if (avail > 0)
4677 TRACE_D3("new_dwarf3_reader_wrk: warning: "
4678 "%ld unused bytes after end of DIEs\n", avail);
4679 break;
4680 }
4681
4682 if (VG_(clo_read_var_info)) {
4683 /* Check the varparser's stack is in a sane state. */
4684 vg_assert(varparser.sp == -1);
4685 /* Check the typarser's stack is in a sane state. */
4686 vg_assert(typarser.sp == -1);
4687 }
4688
4689 cu_start_offset = get_position_of_Cursor( &info );
4690 TRACE_D3("\n");
4691 TRACE_D3(" Compilation Unit @ offset 0x%llx:\n", cu_start_offset);
4692 /* parse_CU_header initialises the CU's hashtable of abbvs ht_abbvs */
4693 if (pass == 0) {
4694 parse_CU_Header( &cc, td3, &info, escn_debug_abbv_alt,
4695 False, True );
4696 } else {
4697 parse_CU_Header( &cc, td3, &info, escn_debug_abbv,
4698 pass == 2, False );
4699 }
4700 cc.escn_debug_str = pass == 0 ? escn_debug_str_alt
4701 : escn_debug_str;
4702 cc.escn_debug_ranges = escn_debug_ranges;
4703 cc.escn_debug_loc = escn_debug_loc;
4704 cc.escn_debug_line = pass == 0 ? escn_debug_line_alt
4705 : escn_debug_line;
4706 cc.escn_debug_info = pass == 0 ? escn_debug_info_alt
4707 : escn_debug_info;
4708 cc.escn_debug_types = escn_debug_types;
4709 cc.escn_debug_info_alt = escn_debug_info_alt;
4710 cc.escn_debug_str_alt = escn_debug_str_alt;
4711 cc.types_cuOff_bias = escn_debug_info.szB;
4712 cc.alt_cuOff_bias = escn_debug_info.szB + escn_debug_types.szB;
4713 cc.cu_start_offset = cu_start_offset;
4714 cc.di = di;
4715 /* The CU's svma can be deduced by looking at the AT_low_pc
4716 value in the top level TAG_compile_unit, which is the topmost
4717 DIE. We'll leave it for the 'varparser' to acquire that info
4718 and fill it in -- since it is the only party to want to know
4719 it. */
4720 cc.cu_svma_known = False;
4721 cc.cu_svma = 0;
4722
4723 if (VG_(clo_read_var_info)) {
4724 cc.signature_types = signature_types;
4725
4726 /* Create a fake outermost-level range covering the entire
4727 address range. So we always have *something* to catch all
4728 variable declarations. */
4729 varstack_push( &cc, &varparser, td3,
4730 unitary_range_list(0UL, ~0UL),
4731 -1, False/*isFunc*/, NULL/*fbGX*/ );
4732
4733 /* And set up the fndn_ix_Table. When we come across the top
4734 level DIE for this CU (which is what the next call to
4735 read_DIE should process) we will copy all the file names out
4736 of the .debug_line img area and use this table to look up the
4737 copies when we later see filename numbers in DW_TAG_variables
4738 etc. */
4739 vg_assert(!varparser.fndn_ix_Table );
4740 varparser.fndn_ix_Table
4741 = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.5var",
4742 ML_(dinfo_free),
4743 sizeof(UInt) );
4744 }
4745
4746 if (VG_(clo_read_inline_info)) {
4747 /* fndn_ix_Table for the inlined call parser */
4748 vg_assert(!inlparser.fndn_ix_Table );
4749 inlparser.fndn_ix_Table
4750 = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.5inl",
4751 ML_(dinfo_free),
4752 sizeof(UInt) );
4753 }
4754
4755 /* Now read the one-and-only top-level DIE for this CU. */
4756 vg_assert(!VG_(clo_read_var_info) || varparser.sp == 0);
4757 read_DIE( rangestree,
4758 tyents, tempvars, gexprs,
4759 &typarser, &varparser, &inlparser,
4760 &info, td3, &cc, 0 );
4761
4762 cu_offset_now = get_position_of_Cursor( &info );
4763
4764 if (0) VG_(printf)("Travelled: %llu size %llu\n",
4765 cu_offset_now - cc.cu_start_offset,
4766 cc.unit_length + (cc.is_dw64 ? 12 : 4));
4767
4768 /* How big the CU claims it is .. */
4769 cu_size_including_IniLen = cc.unit_length + (cc.is_dw64 ? 12 : 4);
4770 /* .. vs how big we have found it to be */
4771 cu_amount_used = cu_offset_now - cc.cu_start_offset;
4772
4773 if (1) TRACE_D3("offset now %llu, d-i-size %llu\n",
4774 cu_offset_now, section_size);
4775 if (cu_offset_now > section_size)
4776 barf("toplevel DIEs beyond end of CU");
4777
4778 /* If the CU is bigger than it claims to be, we've got a serious
4779 problem. */
4780 if (cu_amount_used > cu_size_including_IniLen)
4781 barf("CU's actual size appears to be larger than it claims it is");
4782
4783 /* If the CU is smaller than it claims to be, we need to skip some
4784 bytes. Loop updates cu_offset_new and cu_amount_used. */
4785 while (cu_amount_used < cu_size_including_IniLen
4786 && get_remaining_length_Cursor( &info ) > 0) {
4787 if (0) VG_(printf)("SKIP\n");
4788 (void)get_UChar( &info );
4789 cu_offset_now = get_position_of_Cursor( &info );
4790 cu_amount_used = cu_offset_now - cc.cu_start_offset;
4791 }
4792
4793 if (VG_(clo_read_var_info)) {
4794 /* Preen to level -2. DIEs have level >= 0 so -2 cannot occur
4795 anywhere else at all. Our fake the-entire-address-space
4796 range is at level -1, so preening to -2 should completely
4797 empty the stack out. */
4798 TRACE_D3("\n");
4799 varstack_preen( &varparser, td3, -2 );
4800 /* Similarly, empty the type stack out. */
4801 typestack_preen( &typarser, td3, -2 );
4802 }
4803
4804 if (VG_(clo_read_var_info)) {
4805 vg_assert(varparser.fndn_ix_Table );
4806 VG_(deleteXA)( varparser.fndn_ix_Table );
4807 varparser.fndn_ix_Table = NULL;
4808 }
4809 if (VG_(clo_read_inline_info)) {
4810 vg_assert(inlparser.fndn_ix_Table );
4811 VG_(deleteXA)( inlparser.fndn_ix_Table );
4812 inlparser.fndn_ix_Table = NULL;
4813 }
4814 clear_CUConst(&cc);
4815
4816 if (cu_offset_now == section_size)
4817 break;
4818 /* else keep going */
4819 }
4820 }
4821
4822
4823 if (VG_(clo_read_var_info)) {
4824 /* From here on we're post-processing the stuff we got
4825 out of the .debug_info section. */
4826 if (TD3) {
4827 TRACE_D3("\n");
4828 ML_(pp_TyEnts)(tyents, "Initial type entity (TyEnt) array");
4829 TRACE_D3("\n");
4830 TRACE_D3("------ Compressing type entries ------\n");
4831 }
4832
4833 tyents_cache = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.6",
4834 sizeof(TyEntIndexCache) );
4835 ML_(TyEntIndexCache__invalidate)( tyents_cache );
4836 dedup_types( td3, tyents, tyents_cache );
4837 if (TD3) {
4838 TRACE_D3("\n");
4839 ML_(pp_TyEnts)(tyents, "After type entity (TyEnt) compression");
4840 }
4841
4842 TRACE_D3("\n");
4843 TRACE_D3("------ Resolving the types of variables ------\n" );
4844 resolve_variable_types( barf, tyents, tyents_cache, tempvars );
4845
4846 /* Copy all the non-INDIR tyents into a new table. For large
4847 .so's, about 90% of the tyents will by now have been resolved to
4848 INDIRs, and we no longer need them, and so don't need to store
4849 them. */
4850 tyents_to_keep
4851 = VG_(newXA)( ML_(dinfo_zalloc),
4852 "di.readdwarf3.ndrw.7 (TyEnt to-keep array)",
4853 ML_(dinfo_free), sizeof(TyEnt) );
4854 n = VG_(sizeXA)( tyents );
4855 for (i = 0; i < n; i++) {
4856 TyEnt* ent = VG_(indexXA)( tyents, i );
4857 if (ent->tag != Te_INDIR)
4858 VG_(addToXA)( tyents_to_keep, ent );
4859 }
4860
4861 VG_(deleteXA)( tyents );
4862 tyents = NULL;
4863 ML_(dinfo_free)( tyents_cache );
4864 tyents_cache = NULL;
4865
4866 /* Sort tyents_to_keep so we can lookup in it. A complete (if
4867 minor) waste of time, since tyents itself is sorted, but
4868 necessary since VG_(lookupXA) refuses to cooperate if we
4869 don't. */
4870 VG_(setCmpFnXA)( tyents_to_keep, (XACmpFn_t) ML_(TyEnt__cmp_by_cuOff_only) );
4871 VG_(sortXA)( tyents_to_keep );
4872
4873 /* Enable cacheing on tyents_to_keep */
4874 tyents_to_keep_cache
4875 = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.8",
4876 sizeof(TyEntIndexCache) );
4877 ML_(TyEntIndexCache__invalidate)( tyents_to_keep_cache );
4878
4879 /* And record the tyents in the DebugInfo. We do this before
4880 starting to hand variables to ML_(addVar), since if ML_(addVar)
4881 wants to do debug printing (of the types of said vars) then it
4882 will need the tyents.*/
4883 vg_assert(!di->admin_tyents);
4884 di->admin_tyents = tyents_to_keep;
4885
4886 /* Bias all the location expressions. */
4887 TRACE_D3("\n");
4888 TRACE_D3("------ Biasing the location expressions ------\n" );
4889
4890 n = VG_(sizeXA)( gexprs );
4891 for (i = 0; i < n; i++) {
4892 gexpr = *(GExpr**)VG_(indexXA)( gexprs, i );
4893 bias_GX( gexpr, di );
4894 }
4895
4896 TRACE_D3("\n");
4897 TRACE_D3("------ Acquired the following variables: ------\n\n");
4898
4899 /* Park (pointers to) all the vars in an XArray, so we can look up
4900 abstract origins quickly. The array is sorted (hence, looked-up
4901 by) the .dioff fields. Since the .dioffs should be in strictly
4902 ascending order, there is no need to sort the array after
4903 construction. The ascendingness is however asserted for. */
4904 dioff_lookup_tab
4905 = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.9",
4906 ML_(dinfo_free),
4907 sizeof(TempVar*) );
4908
4909 n = VG_(sizeXA)( tempvars );
4910 Word first_primary_var = 0;
4911 for (first_primary_var = 0;
4912 escn_debug_info_alt.szB/*really?*/ && first_primary_var < n;
4913 first_primary_var++) {
4914 varp = *(TempVar**)VG_(indexXA)( tempvars, first_primary_var );
4915 if (varp->dioff < escn_debug_info.szB + escn_debug_types.szB)
4916 break;
4917 }
4918 for (i = 0; i < n; i++) {
4919 varp = *(TempVar**)VG_(indexXA)( tempvars, (i + first_primary_var) % n );
4920 if (i > first_primary_var) {
4921 varp2 = *(TempVar**)VG_(indexXA)( tempvars,
4922 (i + first_primary_var - 1) % n );
4923 /* why should this hold? Only, I think, because we've
4924 constructed the array by reading .debug_info sequentially,
4925 and so the array .dioff fields should reflect that, and be
4926 strictly ascending. */
4927 vg_assert(varp2->dioff < varp->dioff);
4928 }
4929 VG_(addToXA)( dioff_lookup_tab, &varp );
4930 }
4931 VG_(setCmpFnXA)( dioff_lookup_tab, cmp_TempVar_by_dioff );
4932 VG_(sortXA)( dioff_lookup_tab ); /* POINTLESS; FIXME: rm */
4933
4934 /* Now visit each var. Collect up as much info as possible for
4935 each var and hand it to ML_(addVar). */
4936 n = VG_(sizeXA)( tempvars );
4937 for (j = 0; j < n; j++) {
4938 TyEnt* ent;
4939 varp = *(TempVar**)VG_(indexXA)( tempvars, j );
4940
4941 /* Possibly show .. */
4942 if (TD3) {
4943 VG_(printf)("<%lx> addVar: level %d: %s :: ",
4944 varp->dioff,
4945 varp->level,
4946 varp->name ? varp->name : "<anon_var>" );
4947 if (varp->typeR) {
4948 ML_(pp_TyEnt_C_ishly)( tyents_to_keep, varp->typeR );
4949 } else {
4950 VG_(printf)("NULL");
4951 }
4952 VG_(printf)("\n Loc=");
4953 if (varp->gexpr) {
4954 ML_(pp_GX)(varp->gexpr);
4955 } else {
4956 VG_(printf)("NULL");
4957 }
4958 VG_(printf)("\n");
4959 if (varp->fbGX) {
4960 VG_(printf)(" FrB=");
4961 ML_(pp_GX)( varp->fbGX );
4962 VG_(printf)("\n");
4963 } else {
4964 VG_(printf)(" FrB=none\n");
4965 }
4966 VG_(printf)(" declared at: %u %s:%d\n",
4967 varp->fndn_ix,
4968 ML_(fndn_ix2filename) (di, varp->fndn_ix),
4969 varp->fLine );
4970 if (varp->absOri != (UWord)D3_INVALID_CUOFF)
4971 VG_(printf)(" abstract origin: <%lx>\n", varp->absOri);
4972 }
4973
4974 /* Skip variables which have no location. These must be
4975 abstract instances; they are useless as-is since with no
4976 location they have no specified memory location. They will
4977 presumably be referred to via the absOri fields of other
4978 variables. */
4979 if (!varp->gexpr) {
4980 TRACE_D3(" SKIP (no location)\n\n");
4981 continue;
4982 }
4983
4984 /* So it has a location, at least. If it refers to some other
4985 entry through its absOri field, pull in further info through
4986 that. */
4987 if (varp->absOri != (UWord)D3_INVALID_CUOFF) {
4988 Bool found;
4989 Word ixFirst, ixLast;
4990 TempVar key;
4991 TempVar* keyp = &key;
4992 TempVar *varAI;
4993 VG_(memset)(&key, 0, sizeof(key)); /* not necessary */
4994 key.dioff = varp->absOri; /* this is what we want to find */
4995 found = VG_(lookupXA)( dioff_lookup_tab, &keyp,
4996 &ixFirst, &ixLast );
4997 if (!found) {
4998 /* barf("DW_AT_abstract_origin can't be resolved"); */
4999 TRACE_D3(" SKIP (DW_AT_abstract_origin can't be resolved)\n\n");
5000 continue;
5001 }
5002 /* If the following fails, there is more than one entry with
5003 the same dioff. Which can't happen. */
5004 vg_assert(ixFirst == ixLast);
5005 varAI = *(TempVar**)VG_(indexXA)( dioff_lookup_tab, ixFirst );
5006 /* stay sane */
5007 vg_assert(varAI);
5008 vg_assert(varAI->dioff == varp->absOri);
5009
5010 /* Copy what useful info we can. */
5011 if (varAI->typeR && !varp->typeR)
5012 varp->typeR = varAI->typeR;
5013 if (varAI->name && !varp->name)
5014 varp->name = varAI->name;
5015 if (varAI->fndn_ix && !varp->fndn_ix)
5016 varp->fndn_ix = varAI->fndn_ix;
5017 if (varAI->fLine > 0 && varp->fLine == 0)
5018 varp->fLine = varAI->fLine;
5019 }
5020
5021 /* Give it a name if it doesn't have one. */
5022 if (!varp->name)
5023 varp->name = ML_(addStr)( di, "<anon_var>", -1 );
5024
5025 /* So now does it have enough info to be useful? */
5026 /* NOTE: re typeR: this is a hack. If typeR is Te_UNKNOWN then
5027 the type didn't get resolved. Really, in that case
5028 something's broken earlier on, and should be fixed, rather
5029 than just skipping the variable. */
5030 ent = ML_(TyEnts__index_by_cuOff)( tyents_to_keep,
5031 tyents_to_keep_cache,
5032 varp->typeR );
5033 /* The next two assertions should be guaranteed by
5034 our previous call to resolve_variable_types. */
5035 vg_assert(ent);
5036 vg_assert(ML_(TyEnt__is_type)(ent) || ent->tag == Te_UNKNOWN);
5037
5038 if (ent->tag == Te_UNKNOWN) continue;
5039
5040 vg_assert(varp->gexpr);
5041 vg_assert(varp->name);
5042 vg_assert(varp->typeR);
5043 vg_assert(varp->level >= 0);
5044
5045 /* Ok. So we're going to keep it. Call ML_(addVar) once for
5046 each address range in which the variable exists. */
5047 TRACE_D3(" ACQUIRE for range(s) ");
5048 { AddrRange oneRange;
5049 AddrRange* varPcRanges;
5050 Word nVarPcRanges;
5051 /* Set up to iterate over address ranges, however
5052 represented. */
5053 if (varp->nRanges == 0 || varp->nRanges == 1) {
5054 vg_assert(!varp->rngMany);
5055 if (varp->nRanges == 0) {
5056 vg_assert(varp->rngOneMin == 0);
5057 vg_assert(varp->rngOneMax == 0);
5058 }
5059 nVarPcRanges = varp->nRanges;
5060 oneRange.aMin = varp->rngOneMin;
5061 oneRange.aMax = varp->rngOneMax;
5062 varPcRanges = &oneRange;
5063 } else {
5064 vg_assert(varp->rngMany);
5065 vg_assert(varp->rngOneMin == 0);
5066 vg_assert(varp->rngOneMax == 0);
5067 nVarPcRanges = VG_(sizeXA)(varp->rngMany);
5068 vg_assert(nVarPcRanges >= 2);
5069 vg_assert(nVarPcRanges == (Word)varp->nRanges);
5070 varPcRanges = VG_(indexXA)(varp->rngMany, 0);
5071 }
5072 if (varp->level == 0)
5073 vg_assert( nVarPcRanges == 1 );
5074 /* and iterate */
5075 for (i = 0; i < nVarPcRanges; i++) {
5076 Addr pcMin = varPcRanges[i].aMin;
5077 Addr pcMax = varPcRanges[i].aMax;
5078 vg_assert(pcMin <= pcMax);
5079 /* Level 0 is the global address range. So at level 0 we
5080 don't want to bias pcMin/pcMax; but at all other levels
5081 we do since those are derived from svmas in the Dwarf
5082 we're reading. Be paranoid ... */
5083 if (varp->level == 0) {
5084 vg_assert(pcMin == (Addr)0);
5085 vg_assert(pcMax == ~(Addr)0);
5086 } else {
5087 /* vg_assert(pcMin > (Addr)0);
5088 No .. we can legitimately expect to see ranges like
5089 0x0-0x11D (pre-biasing, of course). */
5090 vg_assert(pcMax < ~(Addr)0);
5091 }
5092
5093 /* Apply text biasing, for non-global variables. */
5094 if (varp->level > 0) {
5095 pcMin += di->text_debug_bias;
5096 pcMax += di->text_debug_bias;
5097 }
5098
5099 if (i > 0 && (i%2) == 0)
5100 TRACE_D3("\n ");
5101 TRACE_D3("[%#lx,%#lx] ", pcMin, pcMax );
5102
5103 ML_(addVar)(
5104 di, varp->level,
5105 pcMin, pcMax,
5106 varp->name, varp->typeR,
5107 varp->gexpr, varp->fbGX,
5108 varp->fndn_ix, varp->fLine, td3
5109 );
5110 }
5111 }
5112
5113 TRACE_D3("\n\n");
5114 /* and move on to the next var */
5115 }
5116
5117 /* Now free all the TempVars */
5118 n = VG_(sizeXA)( tempvars );
5119 for (i = 0; i < n; i++) {
5120 varp = *(TempVar**)VG_(indexXA)( tempvars, i );
5121 ML_(dinfo_free)(varp);
5122 }
5123 VG_(deleteXA)( tempvars );
5124 tempvars = NULL;
5125
5126 /* and the temp lookup table */
5127 VG_(deleteXA)( dioff_lookup_tab );
5128
5129 /* and the ranges tree. Note that we need to also free the XArrays
5130 which constitute the keys, hence pass VG_(deleteXA) as a
5131 key-finalizer. */
5132 VG_(deleteFM)( rangestree, (void(*)(UWord))VG_(deleteXA), NULL );
5133
5134 /* and the tyents_to_keep cache */
5135 ML_(dinfo_free)( tyents_to_keep_cache );
5136 tyents_to_keep_cache = NULL;
5137
5138 vg_assert( varparser.fndn_ix_Table == NULL );
5139
5140 /* And the signatured type hash. */
5141 VG_(HT_destruct) ( signature_types, ML_(dinfo_free) );
5142
5143 /* record the GExprs in di so they can be freed later */
5144 vg_assert(!di->admin_gexprs);
5145 di->admin_gexprs = gexprs;
5146 }
5147
5148 // Free up dynamically allocated memory
5149 if (VG_(clo_read_var_info)) {
5150 type_parser_release(&typarser);
5151 var_parser_release(&varparser);
5152 }
5153 }
5154
5155
5156 /*------------------------------------------------------------*/
5157 /*--- ---*/
5158 /*--- The "new" DWARF3 reader -- top level control logic ---*/
5159 /*--- ---*/
5160 /*------------------------------------------------------------*/
5161
5162 static Bool d3rd_jmpbuf_valid = False;
5163 static const HChar* d3rd_jmpbuf_reason = NULL;
5164 static VG_MINIMAL_JMP_BUF(d3rd_jmpbuf);
5165
barf(const HChar * reason)5166 static __attribute__((noreturn)) void barf ( const HChar* reason ) {
5167 vg_assert(d3rd_jmpbuf_valid);
5168 d3rd_jmpbuf_reason = reason;
5169 VG_MINIMAL_LONGJMP(d3rd_jmpbuf);
5170 /*NOTREACHED*/
5171 vg_assert(0);
5172 }
5173
5174
5175 void
ML_(new_dwarf3_reader)5176 ML_(new_dwarf3_reader) (
5177 DebugInfo* di,
5178 DiSlice escn_debug_info, DiSlice escn_debug_types,
5179 DiSlice escn_debug_abbv, DiSlice escn_debug_line,
5180 DiSlice escn_debug_str, DiSlice escn_debug_ranges,
5181 DiSlice escn_debug_loc, DiSlice escn_debug_info_alt,
5182 DiSlice escn_debug_abbv_alt, DiSlice escn_debug_line_alt,
5183 DiSlice escn_debug_str_alt
5184 )
5185 {
5186 volatile Int jumped;
5187 volatile Bool td3 = di->trace_symtab;
5188
5189 /* Run the _wrk function to read the dwarf3. If it succeeds, it
5190 just returns normally. If there is any failure, it longjmp's
5191 back here, having first set d3rd_jmpbuf_reason to something
5192 useful. */
5193 vg_assert(d3rd_jmpbuf_valid == False);
5194 vg_assert(d3rd_jmpbuf_reason == NULL);
5195
5196 d3rd_jmpbuf_valid = True;
5197 jumped = VG_MINIMAL_SETJMP(d3rd_jmpbuf);
5198 if (jumped == 0) {
5199 /* try this ... */
5200 new_dwarf3_reader_wrk( di, barf,
5201 escn_debug_info, escn_debug_types,
5202 escn_debug_abbv, escn_debug_line,
5203 escn_debug_str, escn_debug_ranges,
5204 escn_debug_loc, escn_debug_info_alt,
5205 escn_debug_abbv_alt, escn_debug_line_alt,
5206 escn_debug_str_alt );
5207 d3rd_jmpbuf_valid = False;
5208 TRACE_D3("\n------ .debug_info reading was successful ------\n");
5209 } else {
5210 /* It longjmp'd. */
5211 d3rd_jmpbuf_valid = False;
5212 /* Can't longjump without giving some sort of reason. */
5213 vg_assert(d3rd_jmpbuf_reason != NULL);
5214
5215 TRACE_D3("\n------ .debug_info reading failed ------\n");
5216
5217 ML_(symerr)(di, True, d3rd_jmpbuf_reason);
5218 }
5219
5220 d3rd_jmpbuf_valid = False;
5221 d3rd_jmpbuf_reason = NULL;
5222 }
5223
5224
5225
5226 /* --- Unused code fragments which might be useful one day. --- */
5227
5228 #if 0
5229 /* Read the arange tables */
5230 TRACE_SYMTAB("\n");
5231 TRACE_SYMTAB("\n------ The contents of .debug_arange ------\n");
5232 init_Cursor( &aranges, debug_aranges_img,
5233 debug_aranges_sz, 0, barf,
5234 "Overrun whilst reading .debug_aranges section" );
5235 while (True) {
5236 ULong len, d_i_offset;
5237 Bool is64;
5238 UShort version;
5239 UChar asize, segsize;
5240
5241 if (is_at_end_Cursor( &aranges ))
5242 break;
5243 /* Read one arange thingy */
5244 /* initial_length field */
5245 len = get_Initial_Length( &is64, &aranges,
5246 "in .debug_aranges: invalid initial-length field" );
5247 version = get_UShort( &aranges );
5248 d_i_offset = get_Dwarfish_UWord( &aranges, is64 );
5249 asize = get_UChar( &aranges );
5250 segsize = get_UChar( &aranges );
5251 TRACE_D3(" Length: %llu\n", len);
5252 TRACE_D3(" Version: %d\n", (Int)version);
5253 TRACE_D3(" Offset into .debug_info: %llx\n", d_i_offset);
5254 TRACE_D3(" Pointer Size: %d\n", (Int)asize);
5255 TRACE_D3(" Segment Size: %d\n", (Int)segsize);
5256 TRACE_D3("\n");
5257 TRACE_D3(" Address Length\n");
5258
5259 while ((get_position_of_Cursor( &aranges ) % (2 * asize)) > 0) {
5260 (void)get_UChar( & aranges );
5261 }
5262 while (True) {
5263 ULong address = get_Dwarfish_UWord( &aranges, asize==8 );
5264 ULong length = get_Dwarfish_UWord( &aranges, asize==8 );
5265 TRACE_D3(" 0x%016llx 0x%llx\n", address, length);
5266 if (address == 0 && length == 0) break;
5267 }
5268 }
5269 TRACE_SYMTAB("\n");
5270 #endif
5271
5272 #endif // defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_solaris)
5273
5274 /*--------------------------------------------------------------------*/
5275 /*--- end ---*/
5276 /*--------------------------------------------------------------------*/
5277