1 /* -*- mode: C; c-basic-offset: 3; -*- */
2
3 /*--------------------------------------------------------------------*/
4 /*--- Read DWARF3/4 ".debug_info" sections (DIE trees). ---*/
5 /*--- readdwarf3.c ---*/
6 /*--------------------------------------------------------------------*/
7
8 /*
9 This file is part of Valgrind, a dynamic binary instrumentation
10 framework.
11
12 Copyright (C) 2008-2013 OpenWorks LLP
13 info@open-works.co.uk
14
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file COPYING.
31
32 Neither the names of the U.S. Department of Energy nor the
33 University of California nor the names of its contributors may be
34 used to endorse or promote products derived from this software
35 without prior written permission.
36 */
37
38 #if defined(VGO_linux) || defined(VGO_darwin)
39
40 /* REFERENCE (without which this code will not make much sense):
41
42 DWARF Debugging Information Format, Version 3,
43 dated 20 December 2005 (the "D3 spec").
44
45 Available at http://www.dwarfstd.org/Dwarf3.pdf. There's also a
46 .doc (MS Word) version, but for some reason the section numbers
47 between the Word and PDF versions differ by 1 in the first digit.
48 All section references in this code are to the PDF version.
49
50 CURRENT HACKS:
51
52 DW_TAG_{const,volatile}_type no DW_AT_type is allowed; it is
53 assumed to mean "const void" or "volatile void" respectively.
54 GDB appears to interpret them like this, anyway.
55
56 In many cases it is important to know the svma of a CU (the "base
57 address of the CU", as the D3 spec calls it). There are some
58 situations in which the spec implies this value is unknown, but the
59 Dwarf3 produced by gcc-4.1 seems to assume is not unknown but
60 merely zero when not explicitly stated. So we too have to make
61 that assumption.
62
63 POTENTIAL BUG? Spotted 6 Sept 08. Why doesn't
64 unitary_range_list() bias the resulting range list in the same way
65 that its more general cousin, get_range_list(), does? I don't
66 know.
67
68 TODO, 2008 Feb 17:
69
70 get rid of cu_svma_known and document the assumed-zero svma hack.
71
72 ML_(sizeOfType): differentiate between zero sized types and types
73 for which the size is unknown. Is this important? I don't know.
74
75 DW_TAG_array_types: deal with explicit sizes (currently we compute
76 the size from the bounds and the element size, although that's
77 fragile, if the bounds incompletely specified, or completely
78 absent)
79
80 Document reason for difference (by 1) of stack preening depth in
81 parse_var_DIE vs parse_type_DIE.
82
83 Don't hand to ML_(addVars), vars whose locations are entirely in
84 registers (DW_OP_reg*). This is merely a space-saving
85 optimisation, as ML_(evaluate_Dwarf3_Expr) should handle these
86 expressions correctly, by failing to evaluate them and hence
87 effectively ignoring the variable with which they are associated.
88
89 Deal with DW_TAG_array_types which have element size != stride
90
91 In some cases, the info for a variable is split between two
92 different DIEs (generally a declarer and a definer). We punt on
93 these. Could do better here.
94
95 The 'data_bias' argument passed to the expression evaluator
96 (ML_(evaluate_Dwarf3_Expr)) should really be changed to a
97 MaybeUWord, to make it clear when we do vs don't know what it is
98 for the evaluation of an expression. At the moment zero is passed
99 for this parameter in the don't know case. That's a bit fragile
100 and obscure; using a MaybeUWord would be clearer.
101
102 POTENTIAL PERFORMANCE IMPROVEMENTS:
103
104 Currently, duplicate removal and all other queries for the type
105 entities array is done using cuOffset-based pointing, which
106 involves a binary search (VG_(lookupXA)) for each access. This is
107 wildly inefficient, although simple. It would be better to
108 translate all the cuOffset-based references (iow, all the "R" and
109 "Rs" fields in the TyEnts in 'tyents') to direct index numbers in
110 'tyents' right at the start of dedup_types(), and use direct
111 indexing (VG_(indexXA)) wherever possible after that.
112
113 cmp__XArrays_of_AddrRange is also a performance bottleneck. Move
114 VG_(indexXA) into pub_tool_xarray.h so it can be inlined at all use
115 points, and possibly also make an _UNCHECKED version which skips
116 the range checks in performance-critical situations such as this.
117
118 Handle interaction between read_DIE and parse_{var,type}_DIE
119 better. Currently read_DIE reads the entire DIE just to find where
120 the end is (and for debug printing), so that it can later reliably
121 move the cursor to the end regardless of what parse_{var,type}_DIE
122 do. This means many DIEs (most, even?) are read twice. It would
123 be smarter to make parse_{var,type}_DIE return a Bool indicating
124 whether or not they advanced the DIE cursor, and only if they
125 didn't should read_DIE itself read through the DIE.
126
127 ML_(addVar) and add_var_to_arange: quite a lot of DiAddrRanges have
128 zero variables in their .vars XArray. Rather than have an XArray
129 with zero elements (which uses 2 malloc'd blocks), allow the .vars
130 pointer to be NULL in this case.
131
132 More generally, reduce the amount of memory allocated and freed
133 while reading Dwarf3 type/variable information. Even modest (20MB)
134 objects cause this module to allocate and free hundreds of
135 thousands of small blocks, and ML_(arena_malloc) and its various
136 groupies always show up at the top of performance profiles. */
137
138 #include "pub_core_basics.h"
139 #include "pub_core_debuginfo.h"
140 #include "pub_core_libcbase.h"
141 #include "pub_core_libcassert.h"
142 #include "pub_core_libcprint.h"
143 #include "pub_core_libcsetjmp.h" // setjmp facilities
144 #include "pub_core_hashtable.h"
145 #include "pub_core_options.h"
146 #include "pub_core_tooliface.h" /* VG_(needs) */
147 #include "pub_core_xarray.h"
148 #include "pub_core_wordfm.h"
149 #include "priv_misc.h" /* dinfo_zalloc/free */
150 #include "priv_image.h"
151 #include "priv_tytypes.h"
152 #include "priv_d3basics.h"
153 #include "priv_storage.h"
154 #include "priv_readdwarf3.h" /* self */
155
156
157 /*------------------------------------------------------------*/
158 /*--- ---*/
159 /*--- Basic machinery for parsing DIEs. ---*/
160 /*--- ---*/
161 /*------------------------------------------------------------*/
162
163 #define TRACE_D3(format, args...) \
164 if (td3) { VG_(printf)(format, ## args); }
165
166 #define D3_INVALID_CUOFF ((UWord)(-1UL))
167 #define D3_FAKEVOID_CUOFF ((UWord)(-2UL))
168
169 typedef
170 struct {
171 DiSlice sli; // to which this cursor applies
172 DiOffT sli_next; // offset in underlying DiImage; must be >= sli.ioff
173 void (*barf)( const HChar* ) __attribute__((noreturn));
174 const HChar* barfstr;
175 }
176 Cursor;
177
is_sane_Cursor(Cursor * c)178 static inline Bool is_sane_Cursor ( Cursor* c ) {
179 if (!c) return False;
180 if (!c->barf) return False;
181 if (!c->barfstr) return False;
182 if (!ML_(sli_is_valid)(c->sli)) return False;
183 if (c->sli.ioff == DiOffT_INVALID) return False;
184 if (c->sli_next < c->sli.ioff) return False;
185 return True;
186 }
187
188 // Initialise a cursor from a DiSlice (ELF section, really) so as to
189 // start reading at offset |sli_initial_offset| from the start of the
190 // slice.
init_Cursor(Cursor * c,DiSlice sli,ULong sli_initial_offset,void (* barf)(const HChar *),const HChar * barfstr)191 static void init_Cursor ( /*OUT*/Cursor* c,
192 DiSlice sli,
193 ULong sli_initial_offset,
194 __attribute__((noreturn)) void (*barf)(const HChar*),
195 const HChar* barfstr )
196 {
197 vg_assert(c);
198 VG_(bzero_inline)(c, sizeof(*c));
199 c->sli = sli;
200 c->sli_next = c->sli.ioff + sli_initial_offset;
201 c->barf = barf;
202 c->barfstr = barfstr;
203 vg_assert(is_sane_Cursor(c));
204 }
205
is_at_end_Cursor(Cursor * c)206 static Bool is_at_end_Cursor ( Cursor* c ) {
207 vg_assert(is_sane_Cursor(c));
208 return c->sli_next >= c->sli.ioff + c->sli.szB;
209 }
210
get_position_of_Cursor(Cursor * c)211 static inline ULong get_position_of_Cursor ( Cursor* c ) {
212 vg_assert(is_sane_Cursor(c));
213 return c->sli_next - c->sli.ioff;
214 }
set_position_of_Cursor(Cursor * c,ULong pos)215 static inline void set_position_of_Cursor ( Cursor* c, ULong pos ) {
216 c->sli_next = c->sli.ioff + pos;
217 vg_assert(is_sane_Cursor(c));
218 }
219
get_remaining_length_Cursor(Cursor * c)220 static /*signed*/Long get_remaining_length_Cursor ( Cursor* c ) {
221 vg_assert(is_sane_Cursor(c));
222 return c->sli.ioff + c->sli.szB - c->sli_next;
223 }
224
225 //static void* get_address_of_Cursor ( Cursor* c ) {
226 // vg_assert(is_sane_Cursor(c));
227 // return &c->region_start_img[ c->region_next ];
228 //}
229
get_DiCursor_from_Cursor(Cursor * c)230 static DiCursor get_DiCursor_from_Cursor ( Cursor* c ) {
231 return mk_DiCursor(c->sli.img, c->sli_next);
232 }
233
234 /* FIXME: document assumptions on endianness for
235 get_UShort/UInt/ULong. */
get_UChar(Cursor * c)236 static inline UChar get_UChar ( Cursor* c ) {
237 UChar r;
238 vg_assert(is_sane_Cursor(c));
239 if (c->sli_next + sizeof(UChar) > c->sli.ioff + c->sli.szB) {
240 c->barf(c->barfstr);
241 /*NOTREACHED*/
242 vg_assert(0);
243 }
244 r = ML_(img_get_UChar)(c->sli.img, c->sli_next);
245 c->sli_next += sizeof(UChar);
246 return r;
247 }
get_UShort(Cursor * c)248 static UShort get_UShort ( Cursor* c ) {
249 UShort r;
250 vg_assert(is_sane_Cursor(c));
251 if (c->sli_next + sizeof(UShort) > c->sli.ioff + c->sli.szB) {
252 c->barf(c->barfstr);
253 /*NOTREACHED*/
254 vg_assert(0);
255 }
256 r = ML_(img_get_UShort)(c->sli.img, c->sli_next);
257 c->sli_next += sizeof(UShort);
258 return r;
259 }
get_UInt(Cursor * c)260 static UInt get_UInt ( Cursor* c ) {
261 UInt r;
262 vg_assert(is_sane_Cursor(c));
263 if (c->sli_next + sizeof(UInt) > c->sli.ioff + c->sli.szB) {
264 c->barf(c->barfstr);
265 /*NOTREACHED*/
266 vg_assert(0);
267 }
268 r = ML_(img_get_UInt)(c->sli.img, c->sli_next);
269 c->sli_next += sizeof(UInt);
270 return r;
271 }
get_ULong(Cursor * c)272 static ULong get_ULong ( Cursor* c ) {
273 ULong r;
274 vg_assert(is_sane_Cursor(c));
275 if (c->sli_next + sizeof(ULong) > c->sli.ioff + c->sli.szB) {
276 c->barf(c->barfstr);
277 /*NOTREACHED*/
278 vg_assert(0);
279 }
280 r = ML_(img_get_ULong)(c->sli.img, c->sli_next);
281 c->sli_next += sizeof(ULong);
282 return r;
283 }
get_ULEB128(Cursor * c)284 static ULong get_ULEB128 ( Cursor* c ) {
285 ULong result;
286 Int shift;
287 UChar byte;
288 /* unroll first iteration */
289 byte = get_UChar( c );
290 result = (ULong)(byte & 0x7f);
291 if (LIKELY(!(byte & 0x80))) return result;
292 shift = 7;
293 /* end unroll first iteration */
294 do {
295 byte = get_UChar( c );
296 result |= ((ULong)(byte & 0x7f)) << shift;
297 shift += 7;
298 } while (byte & 0x80);
299 return result;
300 }
get_SLEB128(Cursor * c)301 static Long get_SLEB128 ( Cursor* c ) {
302 ULong result = 0;
303 Int shift = 0;
304 UChar byte;
305 do {
306 byte = get_UChar(c);
307 result |= ((ULong)(byte & 0x7f)) << shift;
308 shift += 7;
309 } while (byte & 0x80);
310 if (shift < 64 && (byte & 0x40))
311 result |= -(1ULL << shift);
312 return result;
313 }
314
315 /* Assume 'c' points to the start of a string. Return a DiCursor of
316 whatever it points at, and advance it past the terminating zero.
317 This makes it safe for the caller to then copy the string with
318 ML_(addStr), since (w.r.t. image overruns) the process of advancing
319 past the terminating zero will already have "vetted" the string. */
get_AsciiZ(Cursor * c)320 static DiCursor get_AsciiZ ( Cursor* c ) {
321 UChar uc;
322 DiCursor res = get_DiCursor_from_Cursor(c);
323 do { uc = get_UChar(c); } while (uc != 0);
324 return res;
325 }
326
peek_ULEB128(Cursor * c)327 static ULong peek_ULEB128 ( Cursor* c ) {
328 DiOffT here = c->sli_next;
329 ULong r = get_ULEB128( c );
330 c->sli_next = here;
331 return r;
332 }
peek_UChar(Cursor * c)333 static UChar peek_UChar ( Cursor* c ) {
334 DiOffT here = c->sli_next;
335 UChar r = get_UChar( c );
336 c->sli_next = here;
337 return r;
338 }
339
get_Dwarfish_UWord(Cursor * c,Bool is_dw64)340 static ULong get_Dwarfish_UWord ( Cursor* c, Bool is_dw64 ) {
341 return is_dw64 ? get_ULong(c) : (ULong) get_UInt(c);
342 }
343
get_UWord(Cursor * c)344 static UWord get_UWord ( Cursor* c ) {
345 vg_assert(sizeof(UWord) == sizeof(void*));
346 if (sizeof(UWord) == 4) return get_UInt(c);
347 if (sizeof(UWord) == 8) return get_ULong(c);
348 vg_assert(0);
349 }
350
351 /* Read a DWARF3 'Initial Length' field */
get_Initial_Length(Bool * is64,Cursor * c,const HChar * barfMsg)352 static ULong get_Initial_Length ( /*OUT*/Bool* is64,
353 Cursor* c,
354 const HChar* barfMsg )
355 {
356 ULong w64;
357 UInt w32;
358 *is64 = False;
359 w32 = get_UInt( c );
360 if (w32 >= 0xFFFFFFF0 && w32 < 0xFFFFFFFF) {
361 c->barf( barfMsg );
362 }
363 else if (w32 == 0xFFFFFFFF) {
364 *is64 = True;
365 w64 = get_ULong( c );
366 } else {
367 *is64 = False;
368 w64 = (ULong)w32;
369 }
370 return w64;
371 }
372
373
374 /*------------------------------------------------------------*/
375 /*--- ---*/
376 /*--- "CUConst" structure ---*/
377 /*--- ---*/
378 /*------------------------------------------------------------*/
379
380 #define N_ABBV_CACHE 32
381
382 /* Holds information that is constant through the parsing of a
383 Compilation Unit. This is basically plumbed through to
384 everywhere. */
385 typedef
386 struct {
387 /* Call here if anything goes wrong */
388 void (*barf)( const HChar* ) __attribute__((noreturn));
389 /* Is this 64-bit DWARF ? */
390 Bool is_dw64;
391 /* Which DWARF version ? (2, 3 or 4) */
392 UShort version;
393 /* Length of this Compilation Unit, as stated in the
394 .unit_length :: InitialLength field of the CU Header.
395 However, this size (as specified by the D3 spec) does not
396 include the size of the .unit_length field itself, which is
397 either 4 or 12 bytes (32-bit or 64-bit Dwarf3). That value
398 can be obtained through the expression ".is_dw64 ? 12 : 4". */
399 ULong unit_length;
400 /* Offset of start of this unit in .debug_info */
401 UWord cu_start_offset;
402 /* SVMA for this CU. In the D3 spec, is known as the "base
403 address of the compilation unit (last para sec 3.1.1).
404 Needed for (amongst things) interpretation of location-list
405 values. */
406 Addr cu_svma;
407 Bool cu_svma_known;
408
409 /* The debug_abbreviations table to be used for this Unit */
410 //UChar* debug_abbv;
411 /* Upper bound on size thereof (an overestimate, in general) */
412 //UWord debug_abbv_maxszB;
413 /* A bounded area of the image, to be used as the
414 debug_abbreviations table tobe used for this Unit. */
415 DiSlice debug_abbv;
416
417 /* Image information for various sections. */
418 DiSlice escn_debug_str;
419 DiSlice escn_debug_ranges;
420 DiSlice escn_debug_loc;
421 DiSlice escn_debug_line;
422 DiSlice escn_debug_info;
423 DiSlice escn_debug_types;
424 DiSlice escn_debug_info_alt;
425 DiSlice escn_debug_str_alt;
426 /* How much to add to .debug_types resp. alternate .debug_info offsets
427 in cook_die*. */
428 UWord types_cuOff_bias;
429 UWord alt_cuOff_bias;
430 /* --- Needed so we can add stuff to the string table. --- */
431 struct _DebugInfo* di;
432 /* --- a cache for set_abbv_Cursor --- */
433 /* abbv_code == (ULong)-1 for an unused entry. */
434 struct { ULong abbv_code; UWord posn; } saC_cache[N_ABBV_CACHE];
435 UWord saC_cache_queries;
436 UWord saC_cache_misses;
437
438 /* True if this came from .debug_types; otherwise it came from
439 .debug_info. */
440 Bool is_type_unit;
441 /* For a unit coming from .debug_types, these hold the TU's type
442 signature and the uncooked DIE offset of the TU's signatured
443 type. For a unit coming from .debug_info, these are unused. */
444 ULong type_signature;
445 ULong type_offset;
446
447 /* Signatured type hash; computed once and then shared by all
448 CUs. */
449 VgHashTable signature_types;
450
451 /* True if this came from alternate .debug_info; otherwise
452 it came from normal .debug_info or .debug_types. */
453 Bool is_alt_info;
454 }
455 CUConst;
456
457
458 /* Return the cooked value of DIE depending on whether CC represents a
459 .debug_types unit. To cook a DIE, we pretend that the .debug_info,
460 .debug_types and optional alternate .debug_info sections form
461 a contiguous whole, so that DIEs coming from .debug_types are numbered
462 starting at the end of .debug_info and DIEs coming from alternate
463 .debug_info are numbered starting at the end of .debug_types. */
cook_die(CUConst * cc,UWord die)464 static UWord cook_die( CUConst* cc, UWord die )
465 {
466 if (cc->is_type_unit)
467 die += cc->types_cuOff_bias;
468 else if (cc->is_alt_info)
469 die += cc->alt_cuOff_bias;
470 return die;
471 }
472
473 /* Like cook_die, but understand that DIEs coming from a
474 DW_FORM_ref_sig8 reference are already cooked. Also, handle
475 DW_FORM_GNU_ref_alt from within primary .debug_info or .debug_types
476 as reference to alternate .debug_info. */
cook_die_using_form(CUConst * cc,UWord die,DW_FORM form)477 static UWord cook_die_using_form( CUConst *cc, UWord die, DW_FORM form)
478 {
479 if (form == DW_FORM_ref_sig8)
480 return die;
481 if (form == DW_FORM_GNU_ref_alt)
482 return die + cc->alt_cuOff_bias;
483 return cook_die( cc, die );
484 }
485
486 /* Return the uncooked offset of DIE and set *TYPE_FLAG to true if the DIE
487 came from the .debug_types section and *ALT_FLAG to true if the DIE
488 came from alternate .debug_info section. */
uncook_die(CUConst * cc,UWord die,Bool * type_flag,Bool * alt_flag)489 static UWord uncook_die( CUConst *cc, UWord die, /*OUT*/Bool *type_flag,
490 Bool *alt_flag )
491 {
492 *alt_flag = False;
493 *type_flag = False;
494 /* The use of escn_debug_{info,types}.szB seems safe to me even if
495 escn_debug_{info,types} are DiSlice_INVALID (meaning the
496 sections were not found), because DiSlice_INVALID.szB is always
497 zero. That said, it seems unlikely we'd ever get here if
498 .debug_info or .debug_types were missing. */
499 if (die >= cc->escn_debug_info.szB) {
500 if (die >= cc->escn_debug_info.szB + cc->escn_debug_types.szB) {
501 *alt_flag = True;
502 die -= cc->escn_debug_info.szB + cc->escn_debug_types.szB;
503 } else {
504 *type_flag = True;
505 die -= cc->escn_debug_info.szB;
506 }
507 }
508 return die;
509 }
510
511 /*------------------------------------------------------------*/
512 /*--- ---*/
513 /*--- Helper functions for Guarded Expressions ---*/
514 /*--- ---*/
515 /*------------------------------------------------------------*/
516
517 /* Parse the location list starting at img-offset 'debug_loc_offset'
518 in .debug_loc. Results are biased with 'svma_of_referencing_CU'
519 and so I believe are correct SVMAs for the object as a whole. This
520 function allocates the UChar*, and the caller must deallocate it.
521 The resulting block is in so-called Guarded-Expression format.
522
523 Guarded-Expression format is similar but not identical to the DWARF3
524 location-list format. The format of each returned block is:
525
526 UChar biasMe;
527 UChar isEnd;
528 followed by zero or more of
529
530 (Addr aMin; Addr aMax; UShort nbytes; ..bytes..; UChar isEnd)
531
532 '..bytes..' is an standard DWARF3 location expression which is
533 valid when aMin <= pc <= aMax (possibly after suitable biasing).
534
535 The number of bytes in '..bytes..' is nbytes.
536
537 The end of the sequence is marked by an isEnd == 1 value. All
538 previous isEnd values must be zero.
539
540 biasMe is 1 if the aMin/aMax fields need this DebugInfo's
541 text_bias added before use, and 0 if the GX is this is not
542 necessary (is ready to go).
543
544 Hence the block can be quickly parsed and is self-describing. Note
545 that aMax is 1 less than the corresponding value in a DWARF3
546 location list. Zero length ranges, with aMax == aMin-1, are not
547 allowed.
548 */
549 /* 2008-sept-12: moved ML_(pp_GX) from here to d3basics.c, where
550 it more logically belongs. */
551
552
553 /* Apply a text bias to a GX. */
bias_GX(GExpr * gx,struct _DebugInfo * di)554 static void bias_GX ( /*MOD*/GExpr* gx, struct _DebugInfo* di )
555 {
556 UShort nbytes;
557 UChar* p = &gx->payload[0];
558 UChar* pA;
559 UChar uc;
560 uc = *p++; /*biasMe*/
561 if (uc == 0)
562 return;
563 vg_assert(uc == 1);
564 p[-1] = 0; /* mark it as done */
565 while (True) {
566 uc = *p++;
567 if (uc == 1)
568 break; /*isEnd*/
569 vg_assert(uc == 0);
570 /* t-bias aMin */
571 pA = (UChar*)p;
572 ML_(write_Addr)(pA, ML_(read_Addr)(pA) + di->text_debug_bias);
573 p += sizeof(Addr);
574 /* t-bias aMax */
575 pA = (UChar*)p;
576 ML_(write_Addr)(pA, ML_(read_Addr)(pA) + di->text_debug_bias);
577 p += sizeof(Addr);
578 /* nbytes, and actual expression */
579 nbytes = ML_(read_UShort)(p); p += sizeof(UShort);
580 p += nbytes;
581 }
582 }
583
584 __attribute__((noinline))
make_singleton_GX(DiCursor block,ULong nbytes)585 static GExpr* make_singleton_GX ( DiCursor block, ULong nbytes )
586 {
587 SizeT bytesReqd;
588 GExpr* gx;
589 UChar *p, *pstart;
590
591 vg_assert(sizeof(UWord) == sizeof(Addr));
592 vg_assert(nbytes <= 0xFFFF); /* else we overflow the nbytes field */
593 bytesReqd
594 = sizeof(UChar) /*biasMe*/ + sizeof(UChar) /*!isEnd*/
595 + sizeof(UWord) /*aMin*/ + sizeof(UWord) /*aMax*/
596 + sizeof(UShort) /*nbytes*/ + (SizeT)nbytes
597 + sizeof(UChar); /*isEnd*/
598
599 gx = ML_(dinfo_zalloc)( "di.readdwarf3.msGX.1",
600 sizeof(GExpr) + bytesReqd );
601 vg_assert(gx);
602
603 p = pstart = &gx->payload[0];
604
605 p = ML_(write_UChar)(p, 0); /*biasMe*/
606 p = ML_(write_UChar)(p, 0); /*!isEnd*/
607 p = ML_(write_Addr)(p, 0); /*aMin*/
608 p = ML_(write_Addr)(p, ~0); /*aMax*/
609 p = ML_(write_UShort)(p, nbytes); /*nbytes*/
610 ML_(cur_read_get)(p, block, nbytes); p += nbytes;
611 p = ML_(write_UChar)(p, 1); /*isEnd*/
612
613 vg_assert( (SizeT)(p - pstart) == bytesReqd);
614 vg_assert( &gx->payload[bytesReqd]
615 == ((UChar*)gx) + sizeof(GExpr) + bytesReqd );
616
617 return gx;
618 }
619
620 __attribute__((noinline))
make_general_GX(CUConst * cc,Bool td3,ULong debug_loc_offset,Addr svma_of_referencing_CU)621 static GExpr* make_general_GX ( CUConst* cc,
622 Bool td3,
623 ULong debug_loc_offset,
624 Addr svma_of_referencing_CU )
625 {
626 Addr base;
627 Cursor loc;
628 XArray* xa; /* XArray of UChar */
629 GExpr* gx;
630 Word nbytes;
631
632 vg_assert(sizeof(UWord) == sizeof(Addr));
633 if (!ML_(sli_is_valid)(cc->escn_debug_loc) || cc->escn_debug_loc.szB == 0)
634 cc->barf("make_general_GX: .debug_loc is empty/missing");
635
636 init_Cursor( &loc, cc->escn_debug_loc, 0, cc->barf,
637 "Overrun whilst reading .debug_loc section(2)" );
638 set_position_of_Cursor( &loc, debug_loc_offset );
639
640 TRACE_D3("make_general_GX (.debug_loc_offset = %llu, ioff = %llu) {\n",
641 debug_loc_offset, (ULong)get_DiCursor_from_Cursor(&loc).ioff );
642
643 /* Who frees this xa? It is freed before this fn exits. */
644 xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.mgGX.1",
645 ML_(dinfo_free),
646 sizeof(UChar) );
647
648 { UChar c = 1; /*biasMe*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
649
650 base = 0;
651 while (True) {
652 Bool acquire;
653 UWord len;
654 /* Read a (host-)word pair. This is something of a hack since
655 the word size to read is really dictated by the ELF file;
656 however, we assume we're reading a file with the same
657 word-sizeness as the host. Reasonably enough. */
658 UWord w1 = get_UWord( &loc );
659 UWord w2 = get_UWord( &loc );
660
661 TRACE_D3(" %08lx %08lx\n", w1, w2);
662 if (w1 == 0 && w2 == 0)
663 break; /* end of list */
664
665 if (w1 == -1UL) {
666 /* new value for 'base' */
667 base = w2;
668 continue;
669 }
670
671 /* else a location expression follows */
672 /* else enumerate [w1+base, w2+base) */
673 /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
674 (sec 2.17.2) */
675 if (w1 > w2) {
676 TRACE_D3("negative range is for .debug_loc expr at "
677 "file offset %llu\n",
678 debug_loc_offset);
679 cc->barf( "negative range in .debug_loc section" );
680 }
681
682 /* ignore zero length ranges */
683 acquire = w1 < w2;
684 len = (UWord)get_UShort( &loc );
685
686 if (acquire) {
687 UWord w;
688 UShort s;
689 UChar c;
690 c = 0; /* !isEnd*/
691 VG_(addBytesToXA)( xa, &c, sizeof(c) );
692 w = w1 + base + svma_of_referencing_CU;
693 VG_(addBytesToXA)( xa, &w, sizeof(w) );
694 w = w2 -1 + base + svma_of_referencing_CU;
695 VG_(addBytesToXA)( xa, &w, sizeof(w) );
696 s = (UShort)len;
697 VG_(addBytesToXA)( xa, &s, sizeof(s) );
698 }
699
700 while (len > 0) {
701 UChar byte = get_UChar( &loc );
702 TRACE_D3("%02x", (UInt)byte);
703 if (acquire)
704 VG_(addBytesToXA)( xa, &byte, 1 );
705 len--;
706 }
707 TRACE_D3("\n");
708 }
709
710 { UChar c = 1; /*isEnd*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
711
712 nbytes = VG_(sizeXA)( xa );
713 vg_assert(nbytes >= 1);
714
715 gx = ML_(dinfo_zalloc)( "di.readdwarf3.mgGX.2", sizeof(GExpr) + nbytes );
716 vg_assert(gx);
717 VG_(memcpy)( &gx->payload[0], (UChar*)VG_(indexXA)(xa,0), nbytes );
718 vg_assert( &gx->payload[nbytes]
719 == ((UChar*)gx) + sizeof(GExpr) + nbytes );
720
721 VG_(deleteXA)( xa );
722
723 TRACE_D3("}\n");
724
725 return gx;
726 }
727
728
729 /*------------------------------------------------------------*/
730 /*--- ---*/
731 /*--- Helper functions for range lists and CU headers ---*/
732 /*--- ---*/
733 /*------------------------------------------------------------*/
734
735 /* Denotes an address range. Both aMin and aMax are included in the
736 range; hence a complete range is (0, ~0) and an empty range is any
737 (X, X-1) for X > 0.*/
738 typedef
739 struct { Addr aMin; Addr aMax; }
740 AddrRange;
741
742
743 /* Generate an arbitrary structural total ordering on
744 XArray* of AddrRange. */
cmp__XArrays_of_AddrRange(XArray * rngs1,XArray * rngs2)745 static Word cmp__XArrays_of_AddrRange ( XArray* rngs1, XArray* rngs2 )
746 {
747 Word n1, n2, i;
748 tl_assert(rngs1 && rngs2);
749 n1 = VG_(sizeXA)( rngs1 );
750 n2 = VG_(sizeXA)( rngs2 );
751 if (n1 < n2) return -1;
752 if (n1 > n2) return 1;
753 for (i = 0; i < n1; i++) {
754 AddrRange* rng1 = (AddrRange*)VG_(indexXA)( rngs1, i );
755 AddrRange* rng2 = (AddrRange*)VG_(indexXA)( rngs2, i );
756 if (rng1->aMin < rng2->aMin) return -1;
757 if (rng1->aMin > rng2->aMin) return 1;
758 if (rng1->aMax < rng2->aMax) return -1;
759 if (rng1->aMax > rng2->aMax) return 1;
760 }
761 return 0;
762 }
763
764
765 __attribute__((noinline))
empty_range_list(void)766 static XArray* /* of AddrRange */ empty_range_list ( void )
767 {
768 XArray* xa; /* XArray of AddrRange */
769 /* Who frees this xa? varstack_preen() does. */
770 xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.erl.1",
771 ML_(dinfo_free),
772 sizeof(AddrRange) );
773 return xa;
774 }
775
776
777 __attribute__((noinline))
unitary_range_list(Addr aMin,Addr aMax)778 static XArray* unitary_range_list ( Addr aMin, Addr aMax )
779 {
780 XArray* xa;
781 AddrRange pair;
782 vg_assert(aMin <= aMax);
783 /* Who frees this xa? varstack_preen() does. */
784 xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.url.1",
785 ML_(dinfo_free),
786 sizeof(AddrRange) );
787 pair.aMin = aMin;
788 pair.aMax = aMax;
789 VG_(addToXA)( xa, &pair );
790 return xa;
791 }
792
793
794 /* Enumerate the address ranges starting at img-offset
795 'debug_ranges_offset' in .debug_ranges. Results are biased with
796 'svma_of_referencing_CU' and so I believe are correct SVMAs for the
797 object as a whole. This function allocates the XArray, and the
798 caller must deallocate it. */
799 __attribute__((noinline))
800 static XArray* /* of AddrRange */
get_range_list(CUConst * cc,Bool td3,UWord debug_ranges_offset,Addr svma_of_referencing_CU)801 get_range_list ( CUConst* cc,
802 Bool td3,
803 UWord debug_ranges_offset,
804 Addr svma_of_referencing_CU )
805 {
806 Addr base;
807 Cursor ranges;
808 XArray* xa; /* XArray of AddrRange */
809 AddrRange pair;
810
811 if (!ML_(sli_is_valid)(cc->escn_debug_ranges)
812 || cc->escn_debug_ranges.szB == 0)
813 cc->barf("get_range_list: .debug_ranges is empty/missing");
814
815 init_Cursor( &ranges, cc->escn_debug_ranges, 0, cc->barf,
816 "Overrun whilst reading .debug_ranges section(2)" );
817 set_position_of_Cursor( &ranges, debug_ranges_offset );
818
819 /* Who frees this xa? varstack_preen() does. */
820 xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.grl.1", ML_(dinfo_free),
821 sizeof(AddrRange) );
822 base = 0;
823 while (True) {
824 /* Read a (host-)word pair. This is something of a hack since
825 the word size to read is really dictated by the ELF file;
826 however, we assume we're reading a file with the same
827 word-sizeness as the host. Reasonably enough. */
828 UWord w1 = get_UWord( &ranges );
829 UWord w2 = get_UWord( &ranges );
830
831 if (w1 == 0 && w2 == 0)
832 break; /* end of list. */
833
834 if (w1 == -1UL) {
835 /* new value for 'base' */
836 base = w2;
837 continue;
838 }
839
840 /* else enumerate [w1+base, w2+base) */
841 /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
842 (sec 2.17.2) */
843 if (w1 > w2)
844 cc->barf( "negative range in .debug_ranges section" );
845 if (w1 < w2) {
846 pair.aMin = w1 + base + svma_of_referencing_CU;
847 pair.aMax = w2 - 1 + base + svma_of_referencing_CU;
848 vg_assert(pair.aMin <= pair.aMax);
849 VG_(addToXA)( xa, &pair );
850 }
851 }
852 return xa;
853 }
854
855
856 /* Parse the Compilation Unit header indicated at 'c' and
857 initialise 'cc' accordingly. */
858 static __attribute__((noinline))
parse_CU_Header(CUConst * cc,Bool td3,Cursor * c,DiSlice escn_debug_abbv,Bool type_unit,Bool alt_info)859 void parse_CU_Header ( /*OUT*/CUConst* cc,
860 Bool td3,
861 Cursor* c,
862 DiSlice escn_debug_abbv,
863 Bool type_unit,
864 Bool alt_info )
865 {
866 UChar address_size;
867 ULong debug_abbrev_offset;
868 Int i;
869
870 VG_(memset)(cc, 0, sizeof(*cc));
871 vg_assert(c && c->barf);
872 cc->barf = c->barf;
873
874 /* initial_length field */
875 cc->unit_length
876 = get_Initial_Length( &cc->is_dw64, c,
877 "parse_CU_Header: invalid initial-length field" );
878
879 TRACE_D3(" Length: %lld\n", cc->unit_length );
880
881 /* version */
882 cc->version = get_UShort( c );
883 if (cc->version != 2 && cc->version != 3 && cc->version != 4)
884 cc->barf( "parse_CU_Header: is neither DWARF2 nor DWARF3 nor DWARF4" );
885 TRACE_D3(" Version: %d\n", (Int)cc->version );
886
887 /* debug_abbrev_offset */
888 debug_abbrev_offset = get_Dwarfish_UWord( c, cc->is_dw64 );
889 if (debug_abbrev_offset >= escn_debug_abbv.szB)
890 cc->barf( "parse_CU_Header: invalid debug_abbrev_offset" );
891 TRACE_D3(" Abbrev Offset: %lld\n", debug_abbrev_offset );
892
893 /* address size. If this isn't equal to the host word size, just
894 give up. This makes it safe to assume elsewhere that
895 DW_FORM_addr and DW_FORM_ref_addr can be treated as a host
896 word. */
897 address_size = get_UChar( c );
898 if (address_size != sizeof(void*))
899 cc->barf( "parse_CU_Header: invalid address_size" );
900 TRACE_D3(" Pointer Size: %d\n", (Int)address_size );
901
902 cc->is_type_unit = type_unit;
903 cc->is_alt_info = alt_info;
904
905 if (type_unit) {
906 cc->type_signature = get_ULong( c );
907 cc->type_offset = get_Dwarfish_UWord( c, cc->is_dw64 );
908 }
909
910 /* Set up cc->debug_abbv to point to the relevant table for this
911 CU. Set its .szB so that at least we can't read off the end of
912 the debug_abbrev section -- potentially (and quite likely) too
913 big, if this isn't the last table in the section, but at least
914 it's safe.
915
916 This amounts to taking debug_abbv_escn and moving the start
917 position along by debug_abbrev_offset bytes, hence forming a
918 smaller DiSlice which has the same end point. Since we checked
919 just above that debug_abbrev_offset is less than the size of
920 debug_abbv_escn, this should leave us with a nonempty slice. */
921 vg_assert(debug_abbrev_offset < escn_debug_abbv.szB);
922 cc->debug_abbv = escn_debug_abbv;
923 cc->debug_abbv.ioff += debug_abbrev_offset;
924 cc->debug_abbv.szB -= debug_abbrev_offset;
925
926 /* and empty out the set_abbv_Cursor cache */
927 if (0) VG_(printf)("XXXXXX initialise set_abbv_Cursor cache\n");
928 for (i = 0; i < N_ABBV_CACHE; i++) {
929 cc->saC_cache[i].abbv_code = (ULong)-1; /* unused */
930 cc->saC_cache[i].posn = 0;
931 }
932 cc->saC_cache_queries = 0;
933 cc->saC_cache_misses = 0;
934 }
935
936
937 /* Set up 'c' so it is ready to parse the abbv table entry code
938 'abbv_code' for this compilation unit. */
939 static __attribute__((noinline))
set_abbv_Cursor(Cursor * c,Bool td3,CUConst * cc,ULong abbv_code)940 void set_abbv_Cursor ( /*OUT*/Cursor* c, Bool td3,
941 CUConst* cc, ULong abbv_code )
942 {
943 Int i;
944 ULong acode;
945
946 if (abbv_code == 0)
947 cc->barf("set_abbv_Cursor: abbv_code == 0" );
948
949 /* (ULong)-1 is used to represent an empty cache slot. So we can't
950 allow it. In any case no valid DWARF3 should make a reference
951 to a negative abbreviation code. [at least, they always seem to
952 be numbered upwards from zero as far as I have seen] */
953 vg_assert(abbv_code != (ULong)-1);
954
955 /* First search the cache. */
956 if (0) VG_(printf)("XXXXXX search set_abbv_Cursor cache\n");
957 cc->saC_cache_queries++;
958 for (i = 0; i < N_ABBV_CACHE; i++) {
959 /* No need to test the cached abbv_codes for -1 (empty), since
960 we just asserted that abbv_code is not -1. */
961 if (LIKELY(cc->saC_cache[i].abbv_code == abbv_code)) {
962 /* Found it. Set up the parser using the cached position,
963 and move this cache entry to the front. */
964 if (0) VG_(printf)("XXXXXX found in set_abbv_Cursor cache\n");
965 init_Cursor( c, cc->debug_abbv, cc->saC_cache[i].posn,
966 cc->barf,
967 "Overrun whilst parsing .debug_abbrev section(1)" );
968 if (i > 0) {
969 ULong t_abbv_code = cc->saC_cache[i].abbv_code;
970 UWord t_posn = cc->saC_cache[i].posn;
971 while (i > 0) {
972 cc->saC_cache[i] = cc->saC_cache[i-1];
973 i--;
974 }
975 cc->saC_cache[0].abbv_code = t_abbv_code;
976 cc->saC_cache[0].posn = t_posn;
977 }
978 return;
979 }
980 }
981
982 /* No. It's not in the cache. We have to search through
983 .debug_abbrev, of course taking care to update the cache
984 when done. */
985
986 cc->saC_cache_misses++;
987 init_Cursor( c, cc->debug_abbv, 0, cc->barf,
988 "Overrun whilst parsing .debug_abbrev section(2)" );
989
990 /* Now iterate though the table until we find the requested
991 entry. */
992 while (True) {
993 //ULong atag;
994 //UInt has_children;
995 acode = get_ULEB128( c );
996 if (acode == 0) break; /* end of the table */
997 if (acode == abbv_code) break; /* found it */
998 /*atag = */ get_ULEB128( c );
999 /*has_children = */ get_UChar( c );
1000 //TRACE_D3(" %llu %s [%s]\n",
1001 // acode, pp_DW_TAG(atag), pp_DW_children(has_children));
1002 while (True) {
1003 ULong at_name = get_ULEB128( c );
1004 ULong at_form = get_ULEB128( c );
1005 if (at_name == 0 && at_form == 0) break;
1006 //TRACE_D3(" %18s %s\n",
1007 // pp_DW_AT(at_name), pp_DW_FORM(at_form));
1008 }
1009 }
1010
1011 if (acode == 0) {
1012 /* Not found. This is fatal. */
1013 cc->barf("set_abbv_Cursor: abbv_code not found");
1014 }
1015
1016 /* Otherwise, 'c' is now set correctly to parse the relevant entry,
1017 starting from the abbreviation entry's tag. So just cache
1018 the result, and return. */
1019 for (i = N_ABBV_CACHE-1; i > N_ABBV_CACHE/2; i--) {
1020 cc->saC_cache[i] = cc->saC_cache[i-1];
1021 }
1022 if (0) VG_(printf)("XXXXXX update set_abbv_Cursor cache\n");
1023 cc->saC_cache[N_ABBV_CACHE/2].abbv_code = abbv_code;
1024 cc->saC_cache[N_ABBV_CACHE/2].posn = get_position_of_Cursor(c);
1025 }
1026
1027 /* This represents a single signatured type. It maps a type signature
1028 (a ULong) to a cooked DIE offset. Objects of this type are stored
1029 in the type signature hash table. */
1030 typedef
1031 struct D3SignatureType {
1032 struct D3SignatureType *next;
1033 UWord data;
1034 ULong type_signature;
1035 UWord die;
1036 }
1037 D3SignatureType;
1038
1039 /* Record a signatured type in the hash table. */
record_signatured_type(VgHashTable tab,ULong type_signature,UWord die)1040 static void record_signatured_type ( VgHashTable tab,
1041 ULong type_signature,
1042 UWord die )
1043 {
1044 D3SignatureType *dstype = ML_(dinfo_zalloc) ( "di.readdwarf3.sigtype",
1045 sizeof(D3SignatureType) );
1046 dstype->data = (UWord) type_signature;
1047 dstype->type_signature = type_signature;
1048 dstype->die = die;
1049 VG_(HT_add_node) ( tab, dstype );
1050 }
1051
1052 /* Given a type signature hash table and a type signature, return the
1053 cooked DIE offset of the type. If the type cannot be found, call
1054 BARF. */
lookup_signatured_type(VgHashTable tab,ULong type_signature,void (* barf)(const HChar *))1055 static UWord lookup_signatured_type ( VgHashTable tab,
1056 ULong type_signature,
1057 void (*barf)( const HChar* ) __attribute__((noreturn)) )
1058 {
1059 D3SignatureType *dstype = VG_(HT_lookup) ( tab, (UWord) type_signature );
1060 /* This may be unwarranted chumminess with the hash table
1061 implementation. */
1062 while ( dstype != NULL && dstype->type_signature != type_signature)
1063 dstype = dstype->next;
1064 if (dstype == NULL) {
1065 barf("lookup_signatured_type: could not find signatured type");
1066 /*NOTREACHED*/
1067 vg_assert(0);
1068 }
1069 return dstype->die;
1070 }
1071
1072
1073 /* Represents Form data. If szB is 1/2/4/8 then the result is in the
1074 lowest 1/2/4/8 bytes of u.val. If szB is zero or negative then the
1075 result is an image section beginning at u.cur and with size -szB.
1076 No other szB values are allowed. */
1077 typedef
1078 struct {
1079 Long szB; // 1, 2, 4, 8 or non-positive values only.
1080 union { ULong val; DiCursor cur; } u;
1081 }
1082 FormContents;
1083
1084 /* From 'c', get the Form data into 'cts'. Either it gets a 1/2/4/8
1085 byte scalar value, or (a reference to) zero or more bytes starting
1086 at a DiCursor.*/
1087 static
get_Form_contents(FormContents * cts,CUConst * cc,Cursor * c,Bool td3,DW_FORM form)1088 void get_Form_contents ( /*OUT*/FormContents* cts,
1089 CUConst* cc, Cursor* c,
1090 Bool td3, DW_FORM form )
1091 {
1092 VG_(bzero_inline)(cts, sizeof(*cts));
1093 switch (form) {
1094 case DW_FORM_data1:
1095 cts->u.val = (ULong)(UChar)get_UChar(c);
1096 cts->szB = 1;
1097 TRACE_D3("%u", (UInt)cts->u.val);
1098 break;
1099 case DW_FORM_data2:
1100 cts->u.val = (ULong)(UShort)get_UShort(c);
1101 cts->szB = 2;
1102 TRACE_D3("%u", (UInt)cts->u.val);
1103 break;
1104 case DW_FORM_data4:
1105 cts->u.val = (ULong)(UInt)get_UInt(c);
1106 cts->szB = 4;
1107 TRACE_D3("%u", (UInt)cts->u.val);
1108 break;
1109 case DW_FORM_data8:
1110 cts->u.val = get_ULong(c);
1111 cts->szB = 8;
1112 TRACE_D3("%llu", cts->u.val);
1113 break;
1114 case DW_FORM_sec_offset:
1115 cts->u.val = (ULong)get_Dwarfish_UWord( c, cc->is_dw64 );
1116 cts->szB = cc->is_dw64 ? 8 : 4;
1117 TRACE_D3("%llu", cts->u.val);
1118 break;
1119 case DW_FORM_sdata:
1120 cts->u.val = (ULong)(Long)get_SLEB128(c);
1121 cts->szB = 8;
1122 TRACE_D3("%lld", (Long)cts->u.val);
1123 break;
1124 case DW_FORM_udata:
1125 cts->u.val = (ULong)(Long)get_ULEB128(c);
1126 cts->szB = 8;
1127 TRACE_D3("%llu", (Long)cts->u.val);
1128 break;
1129 case DW_FORM_addr:
1130 /* note, this is a hack. DW_FORM_addr is defined as getting
1131 a word the size of the target machine as defined by the
1132 address_size field in the CU Header. However,
1133 parse_CU_Header() rejects all inputs except those for
1134 which address_size == sizeof(Word), hence we can just
1135 treat it as a (host) Word. */
1136 cts->u.val = (ULong)(UWord)get_UWord(c);
1137 cts->szB = sizeof(UWord);
1138 TRACE_D3("0x%lx", (UWord)cts->u.val);
1139 break;
1140
1141 case DW_FORM_ref_addr:
1142 /* We make the same word-size assumption as DW_FORM_addr. */
1143 /* What does this really mean? From D3 Sec 7.5.4,
1144 description of "reference", it would appear to reference
1145 some other DIE, by specifying the offset from the
1146 beginning of a .debug_info section. The D3 spec mentions
1147 that this might be in some other shared object and
1148 executable. But I don't see how the name of the other
1149 object/exe is specified.
1150
1151 At least for the DW_FORM_ref_addrs created by icc11, the
1152 references seem to be within the same object/executable.
1153 So for the moment we merely range-check, to see that they
1154 actually do specify a plausible offset within this
1155 object's .debug_info, and return the value unchanged.
1156
1157 In DWARF 2, DW_FORM_ref_addr is address-sized, but in
1158 DWARF 3 and later, it is offset-sized.
1159 */
1160 if (cc->version == 2) {
1161 cts->u.val = (ULong)(UWord)get_UWord(c);
1162 cts->szB = sizeof(UWord);
1163 } else {
1164 cts->u.val = get_Dwarfish_UWord(c, cc->is_dw64);
1165 cts->szB = cc->is_dw64 ? sizeof(ULong) : sizeof(UInt);
1166 }
1167 TRACE_D3("0x%lx", (UWord)cts->u.val);
1168 if (0) VG_(printf)("DW_FORM_ref_addr 0x%lx\n", (UWord)cts->u.val);
1169 if (/* the following is surely impossible, but ... */
1170 !ML_(sli_is_valid)(cc->escn_debug_info)
1171 || cts->u.val >= (ULong)cc->escn_debug_info.szB) {
1172 /* Hmm. Offset is nonsensical for this object's .debug_info
1173 section. Be safe and reject it. */
1174 cc->barf("get_Form_contents: DW_FORM_ref_addr points "
1175 "outside .debug_info");
1176 }
1177 break;
1178
1179 case DW_FORM_strp: {
1180 /* this is an offset into .debug_str */
1181 UWord uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 );
1182 if (!ML_(sli_is_valid)(cc->escn_debug_str)
1183 || uw >= cc->escn_debug_str.szB)
1184 cc->barf("get_Form_contents: DW_FORM_strp "
1185 "points outside .debug_str");
1186 /* FIXME: check the entire string lies inside debug_str,
1187 not just the first byte of it. */
1188 DiCursor str
1189 = ML_(cur_plus)( ML_(cur_from_sli)(cc->escn_debug_str), uw );
1190 if (td3) {
1191 HChar* tmp = ML_(cur_read_strdup)(str, "di.getFC.1");
1192 TRACE_D3("(indirect string, offset: 0x%lx): %s", uw, tmp);
1193 ML_(dinfo_free)(tmp);
1194 }
1195 cts->u.cur = str;
1196 cts->szB = - (Long)(1 + (ULong)ML_(cur_strlen)(str));
1197 break;
1198 }
1199 case DW_FORM_string: {
1200 DiCursor str = get_AsciiZ(c);
1201 if (td3) {
1202 HChar* tmp = ML_(cur_read_strdup)(str, "di.getFC.2");
1203 TRACE_D3("%s", tmp);
1204 ML_(dinfo_free)(tmp);
1205 }
1206 cts->u.cur = str;
1207 /* strlen is safe because get_AsciiZ already 'vetted' the
1208 entire string */
1209 cts->szB = - (Long)(1 + (ULong)ML_(cur_strlen)(str));
1210 break;
1211 }
1212 case DW_FORM_ref1: {
1213 UChar u8 = get_UChar(c);
1214 UWord res = cc->cu_start_offset + (UWord)u8;
1215 cts->u.val = (ULong)res;
1216 cts->szB = sizeof(UWord);
1217 TRACE_D3("<%lx>", res);
1218 break;
1219 }
1220 case DW_FORM_ref2: {
1221 UShort u16 = get_UShort(c);
1222 UWord res = cc->cu_start_offset + (UWord)u16;
1223 cts->u.val = (ULong)res;
1224 cts->szB = sizeof(UWord);
1225 TRACE_D3("<%lx>", res);
1226 break;
1227 }
1228 case DW_FORM_ref4: {
1229 UInt u32 = get_UInt(c);
1230 UWord res = cc->cu_start_offset + (UWord)u32;
1231 cts->u.val = (ULong)res;
1232 cts->szB = sizeof(UWord);
1233 TRACE_D3("<%lx>", res);
1234 break;
1235 }
1236 case DW_FORM_ref8: {
1237 ULong u64 = get_ULong(c);
1238 UWord res = cc->cu_start_offset + (UWord)u64;
1239 cts->u.val = (ULong)res;
1240 cts->szB = sizeof(UWord);
1241 TRACE_D3("<%lx>", res);
1242 break;
1243 }
1244 case DW_FORM_ref_udata: {
1245 ULong u64 = get_ULEB128(c);
1246 UWord res = cc->cu_start_offset + (UWord)u64;
1247 cts->u.val = (ULong)res;
1248 cts->szB = sizeof(UWord);
1249 TRACE_D3("<%lx>", res);
1250 break;
1251 }
1252 case DW_FORM_flag: {
1253 UChar u8 = get_UChar(c);
1254 TRACE_D3("%u", (UInt)u8);
1255 cts->u.val = (ULong)u8;
1256 cts->szB = 1;
1257 break;
1258 }
1259 case DW_FORM_flag_present:
1260 TRACE_D3("1");
1261 cts->u.val = 1;
1262 cts->szB = 1;
1263 break;
1264 case DW_FORM_block1: {
1265 ULong u64b;
1266 ULong u64 = (ULong)get_UChar(c);
1267 DiCursor block = get_DiCursor_from_Cursor(c);
1268 TRACE_D3("%llu byte block: ", u64);
1269 for (u64b = u64; u64b > 0; u64b--) {
1270 UChar u8 = get_UChar(c);
1271 TRACE_D3("%x ", (UInt)u8);
1272 }
1273 cts->u.cur = block;
1274 cts->szB = - (Long)u64;
1275 break;
1276 }
1277 case DW_FORM_block2: {
1278 ULong u64b;
1279 ULong u64 = (ULong)get_UShort(c);
1280 DiCursor block = get_DiCursor_from_Cursor(c);
1281 TRACE_D3("%llu byte block: ", u64);
1282 for (u64b = u64; u64b > 0; u64b--) {
1283 UChar u8 = get_UChar(c);
1284 TRACE_D3("%x ", (UInt)u8);
1285 }
1286 cts->u.cur = block;
1287 cts->szB = - (Long)u64;
1288 break;
1289 }
1290 case DW_FORM_block4: {
1291 ULong u64b;
1292 ULong u64 = (ULong)get_UInt(c);
1293 DiCursor block = get_DiCursor_from_Cursor(c);
1294 TRACE_D3("%llu byte block: ", u64);
1295 for (u64b = u64; u64b > 0; u64b--) {
1296 UChar u8 = get_UChar(c);
1297 TRACE_D3("%x ", (UInt)u8);
1298 }
1299 cts->u.cur = block;
1300 cts->szB = - (Long)u64;
1301 break;
1302 }
1303 case DW_FORM_exprloc:
1304 case DW_FORM_block: {
1305 ULong u64b;
1306 ULong u64 = (ULong)get_ULEB128(c);
1307 DiCursor block = get_DiCursor_from_Cursor(c);
1308 TRACE_D3("%llu byte block: ", u64);
1309 for (u64b = u64; u64b > 0; u64b--) {
1310 UChar u8 = get_UChar(c);
1311 TRACE_D3("%x ", (UInt)u8);
1312 }
1313 cts->u.cur = block;
1314 cts->szB = - (Long)u64;
1315 break;
1316 }
1317 case DW_FORM_ref_sig8: {
1318 ULong u64b;
1319 ULong signature = get_ULong (c);
1320 ULong work = signature;
1321 TRACE_D3("8 byte signature: ");
1322 for (u64b = 8; u64b > 0; u64b--) {
1323 UChar u8 = work & 0xff;
1324 TRACE_D3("%x ", (UInt)u8);
1325 work >>= 8;
1326 }
1327 /* Due to the way that the hash table is constructed, the
1328 resulting DIE offset here is already "cooked". See
1329 cook_die_using_form. */
1330 cts->u.val = lookup_signatured_type (cc->signature_types, signature,
1331 c->barf);
1332 cts->szB = sizeof(UWord);
1333 break;
1334 }
1335 case DW_FORM_indirect:
1336 get_Form_contents (cts, cc, c, td3, (DW_FORM)get_ULEB128(c));
1337 return;
1338
1339 case DW_FORM_GNU_ref_alt:
1340 cts->u.val = get_Dwarfish_UWord(c, cc->is_dw64);
1341 cts->szB = cc->is_dw64 ? sizeof(ULong) : sizeof(UInt);
1342 TRACE_D3("0x%lx", (UWord)cts->u.val);
1343 if (0) VG_(printf)("DW_FORM_GNU_ref_alt 0x%lx\n", (UWord)cts->u.val);
1344 if (/* the following is surely impossible, but ... */
1345 !ML_(sli_is_valid)(cc->escn_debug_info_alt)
1346 || cts->u.val >= (ULong)cc->escn_debug_info_alt.szB) {
1347 /* Hmm. Offset is nonsensical for this object's .debug_info
1348 section. Be safe and reject it. */
1349 cc->barf("get_Form_contents: DW_FORM_ref_addr points "
1350 "outside alternate .debug_info");
1351 }
1352 break;
1353
1354 case DW_FORM_GNU_strp_alt: {
1355 /* this is an offset into alternate .debug_str */
1356 SizeT uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 );
1357 if (!ML_(sli_is_valid)(cc->escn_debug_str_alt)
1358 || uw >= cc->escn_debug_str_alt.szB)
1359 cc->barf("get_Form_contents: DW_FORM_GNU_strp_alt "
1360 "points outside alternate .debug_str");
1361 /* FIXME: check the entire string lies inside debug_str,
1362 not just the first byte of it. */
1363 DiCursor str
1364 = ML_(cur_plus)( ML_(cur_from_sli)(cc->escn_debug_str_alt), uw);
1365 if (td3) {
1366 HChar* tmp = ML_(cur_read_strdup)(str, "di.getFC.3");
1367 TRACE_D3("(indirect alt string, offset: 0x%lx): %s", uw, tmp);
1368 ML_(dinfo_free)(tmp);
1369 }
1370 cts->u.cur = str;
1371 cts->szB = - (Long)(1 + (ULong)ML_(cur_strlen)(str));
1372 break;
1373 }
1374
1375 default:
1376 VG_(printf)(
1377 "get_Form_contents: unhandled %d (%s) at <%llx>\n",
1378 form, ML_(pp_DW_FORM)(form), get_position_of_Cursor(c));
1379 c->barf("get_Form_contents: unhandled DW_FORM");
1380 }
1381 }
1382
1383
1384 /*------------------------------------------------------------*/
1385 /*--- ---*/
1386 /*--- Parsing of variable-related DIEs ---*/
1387 /*--- ---*/
1388 /*------------------------------------------------------------*/
1389
1390 typedef
1391 struct _TempVar {
1392 HChar* name; /* in DebugInfo's .strchunks */
1393 /* Represent ranges economically. nRanges is the number of
1394 ranges. Cases:
1395 0: .rngOneMin .rngOneMax .manyRanges are all zero
1396 1: .rngOneMin .rngOneMax hold the range; .rngMany is NULL
1397 2: .rngOneMin .rngOneMax are zero; .rngMany holds the ranges.
1398 This is merely an optimisation to avoid having to allocate
1399 and free the XArray in the common (98%) of cases where there
1400 is zero or one address ranges. */
1401 UWord nRanges;
1402 Addr rngOneMin;
1403 Addr rngOneMax;
1404 XArray* rngMany; /* of AddrRange. NON-UNIQUE PTR in AR_DINFO. */
1405 /* Do not free .rngMany, since many TempVars will have the same
1406 value. Instead the associated storage is to be freed by
1407 deleting 'rangetree', which stores a single copy of each
1408 range. */
1409 /* --- */
1410 Int level;
1411 UWord typeR; /* a cuOff */
1412 GExpr* gexpr; /* for this variable */
1413 GExpr* fbGX; /* to find the frame base of the enclosing fn, if
1414 any */
1415 HChar* fName; /* declaring file name, or NULL */
1416 Int fLine; /* declaring file line number, or zero */
1417 /* offset in .debug_info, so that abstract instances can be
1418 found to satisfy references from concrete instances. */
1419 UWord dioff;
1420 UWord absOri; /* so the absOri fields refer to dioff fields
1421 in some other, related TempVar. */
1422 }
1423 TempVar;
1424
1425 #define N_D3_VAR_STACK 48
1426
1427 typedef
1428 struct {
1429 /* Contains the range stack: a stack of address ranges, one
1430 stack entry for each nested scope.
1431
1432 Some scope entries are created by function definitions
1433 (DW_AT_subprogram), and for those, we also note the GExpr
1434 derived from its DW_AT_frame_base attribute, if any.
1435 Consequently it should be possible to find, for any
1436 variable's DIE, the GExpr for the the containing function's
1437 DW_AT_frame_base by scanning back through the stack to find
1438 the nearest entry associated with a function. This somewhat
1439 elaborate scheme is provided so as to make it possible to
1440 obtain the correct DW_AT_frame_base expression even in the
1441 presence of nested functions (or to be more precise, in the
1442 presence of nested DW_AT_subprogram DIEs).
1443 */
1444 Int sp; /* [sp] is innermost active entry; sp==-1 for empty
1445 stack */
1446 XArray* ranges[N_D3_VAR_STACK]; /* XArray of AddrRange */
1447 Int level[N_D3_VAR_STACK]; /* D3 DIE levels */
1448 Bool isFunc[N_D3_VAR_STACK]; /* from DW_AT_subprogram? */
1449 GExpr* fbGX[N_D3_VAR_STACK]; /* if isFunc, contains the FB
1450 expr, else NULL */
1451 /* The file name table. Is a mapping from integer index to the
1452 (permanent) copy of the string in in DebugInfo's .strchunks. */
1453 XArray* /* of UChar* */ filenameTable;
1454 }
1455 D3VarParser;
1456
varstack_show(D3VarParser * parser,const HChar * str)1457 static void varstack_show ( D3VarParser* parser, const HChar* str ) {
1458 Word i, j;
1459 VG_(printf)(" varstack (%s) {\n", str);
1460 for (i = 0; i <= parser->sp; i++) {
1461 XArray* xa = parser->ranges[i];
1462 vg_assert(xa);
1463 VG_(printf)(" [%ld] (level %d)", i, parser->level[i]);
1464 if (parser->isFunc[i]) {
1465 VG_(printf)(" (fbGX=%p)", parser->fbGX[i]);
1466 } else {
1467 vg_assert(parser->fbGX[i] == NULL);
1468 }
1469 VG_(printf)(": ");
1470 if (VG_(sizeXA)( xa ) == 0) {
1471 VG_(printf)("** empty PC range array **");
1472 } else {
1473 for (j = 0; j < VG_(sizeXA)( xa ); j++) {
1474 AddrRange* range = (AddrRange*) VG_(indexXA)( xa, j );
1475 vg_assert(range);
1476 VG_(printf)("[%#lx,%#lx] ", range->aMin, range->aMax);
1477 }
1478 }
1479 VG_(printf)("\n");
1480 }
1481 VG_(printf)(" }\n");
1482 }
1483
1484 /* Remove from the stack, all entries with .level > 'level' */
1485 static
varstack_preen(D3VarParser * parser,Bool td3,Int level)1486 void varstack_preen ( D3VarParser* parser, Bool td3, Int level )
1487 {
1488 Bool changed = False;
1489 vg_assert(parser->sp < N_D3_VAR_STACK);
1490 while (True) {
1491 vg_assert(parser->sp >= -1);
1492 if (parser->sp == -1) break;
1493 if (parser->level[parser->sp] <= level) break;
1494 if (0)
1495 TRACE_D3("BBBBAAAA varstack_pop [newsp=%d]\n", parser->sp-1);
1496 vg_assert(parser->ranges[parser->sp]);
1497 /* Who allocated this xa? get_range_list() or
1498 unitary_range_list(). */
1499 VG_(deleteXA)( parser->ranges[parser->sp] );
1500 parser->ranges[parser->sp] = NULL;
1501 parser->level[parser->sp] = 0;
1502 parser->isFunc[parser->sp] = False;
1503 parser->fbGX[parser->sp] = NULL;
1504 parser->sp--;
1505 changed = True;
1506 }
1507 if (changed && td3)
1508 varstack_show( parser, "after preen" );
1509 }
1510
varstack_push(CUConst * cc,D3VarParser * parser,Bool td3,XArray * ranges,Int level,Bool isFunc,GExpr * fbGX)1511 static void varstack_push ( CUConst* cc,
1512 D3VarParser* parser,
1513 Bool td3,
1514 XArray* ranges, Int level,
1515 Bool isFunc, GExpr* fbGX ) {
1516 if (0)
1517 TRACE_D3("BBBBAAAA varstack_push[newsp=%d]: %d %p\n",
1518 parser->sp+1, level, ranges);
1519
1520 /* First we need to zap everything >= 'level', as we are about to
1521 replace any previous entry at 'level', so .. */
1522 varstack_preen(parser, /*td3*/False, level-1);
1523
1524 vg_assert(parser->sp >= -1);
1525 vg_assert(parser->sp < N_D3_VAR_STACK);
1526 if (parser->sp == N_D3_VAR_STACK-1)
1527 cc->barf("varstack_push: N_D3_VAR_STACK is too low; "
1528 "increase and recompile");
1529 if (parser->sp >= 0)
1530 vg_assert(parser->level[parser->sp] < level);
1531 parser->sp++;
1532 vg_assert(parser->ranges[parser->sp] == NULL);
1533 vg_assert(parser->level[parser->sp] == 0);
1534 vg_assert(parser->isFunc[parser->sp] == False);
1535 vg_assert(parser->fbGX[parser->sp] == NULL);
1536 vg_assert(ranges != NULL);
1537 if (!isFunc) vg_assert(fbGX == NULL);
1538 parser->ranges[parser->sp] = ranges;
1539 parser->level[parser->sp] = level;
1540 parser->isFunc[parser->sp] = isFunc;
1541 parser->fbGX[parser->sp] = fbGX;
1542 if (td3)
1543 varstack_show( parser, "after push" );
1544 }
1545
1546
1547 /* cts is derived from a DW_AT_location and so refers either to a
1548 location expression or to a location list. Figure out which, and
1549 in both cases bundle the expression or location list into a
1550 so-called GExpr (guarded expression). */
1551 __attribute__((noinline))
get_GX(CUConst * cc,Bool td3,const FormContents * cts)1552 static GExpr* get_GX ( CUConst* cc, Bool td3, const FormContents* cts )
1553 {
1554 GExpr* gexpr = NULL;
1555 if (cts->szB < 0) {
1556 /* represents a non-empty in-line location expression, and
1557 cts->u.cur points at the image bytes */
1558 gexpr = make_singleton_GX( cts->u.cur, (ULong)(- cts->szB) );
1559 }
1560 else
1561 if (cts->szB > 0) {
1562 /* represents a location list. cts->u.val is the offset of it
1563 in .debug_loc. */
1564 if (!cc->cu_svma_known)
1565 cc->barf("get_GX: location list, but CU svma is unknown");
1566 gexpr = make_general_GX( cc, td3, cts->u.val, cc->cu_svma );
1567 }
1568 else {
1569 vg_assert(0); /* else caller is bogus */
1570 }
1571 return gexpr;
1572 }
1573
1574
1575 static
read_filename_table(D3VarParser * parser,CUConst * cc,ULong debug_line_offset,Bool td3)1576 void read_filename_table( /*MOD*/D3VarParser* parser,
1577 CUConst* cc, ULong debug_line_offset,
1578 Bool td3 )
1579 {
1580 Bool is_dw64;
1581 Cursor c;
1582 Word i;
1583 UShort version;
1584 UChar opcode_base;
1585 HChar* str;
1586
1587 vg_assert(parser && cc && cc->barf);
1588 if (!ML_(sli_is_valid)(cc->escn_debug_line)
1589 || cc->escn_debug_line.szB <= debug_line_offset) {
1590 cc->barf("read_filename_table: .debug_line is missing?");
1591 }
1592
1593 init_Cursor( &c, cc->escn_debug_line, debug_line_offset, cc->barf,
1594 "Overrun whilst reading .debug_line section(1)" );
1595
1596 /* unit_length = */
1597 get_Initial_Length( &is_dw64, &c,
1598 "read_filename_table: invalid initial-length field" );
1599 version = get_UShort( &c );
1600 if (version != 2 && version != 3 && version != 4)
1601 cc->barf("read_filename_table: Only DWARF version 2, 3 and 4 line info "
1602 "is currently supported.");
1603 /*header_length = (ULong)*/ get_Dwarfish_UWord( &c, is_dw64 );
1604 /*minimum_instruction_length = */ get_UChar( &c );
1605 if (version >= 4)
1606 /*maximum_operations_per_insn = */ get_UChar( &c );
1607 /*default_is_stmt = */ get_UChar( &c );
1608 /*line_base = (Char)*/ get_UChar( &c );
1609 /*line_range = */ get_UChar( &c );
1610 opcode_base = get_UChar( &c );
1611 /* skip over "standard_opcode_lengths" */
1612 for (i = 1; i < (Word)opcode_base; i++)
1613 (void)get_UChar( &c );
1614
1615 /* skip over the directory names table */
1616 while (peek_UChar(&c) != 0) {
1617 (void)get_AsciiZ(&c);
1618 }
1619 (void)get_UChar(&c); /* skip terminating zero */
1620
1621 /* Read and record the file names table */
1622 vg_assert(parser->filenameTable);
1623 vg_assert( VG_(sizeXA)( parser->filenameTable ) == 0 );
1624 /* Add a dummy index-zero entry. DWARF3 numbers its files
1625 from 1, for some reason. */
1626 str = ML_(addStr)( cc->di, "<unknown_file>", -1 );
1627 VG_(addToXA)( parser->filenameTable, &str );
1628 while (peek_UChar(&c) != 0) {
1629 DiCursor cur = get_AsciiZ(&c);
1630 str = ML_(addStrFromCursor)( cc->di, cur );
1631 TRACE_D3(" read_filename_table: %ld %s\n",
1632 VG_(sizeXA)(parser->filenameTable), str);
1633 VG_(addToXA)( parser->filenameTable, &str );
1634 (void)get_ULEB128( &c ); /* skip directory index # */
1635 (void)get_ULEB128( &c ); /* skip last mod time */
1636 (void)get_ULEB128( &c ); /* file size */
1637 }
1638 /* We're done! The rest of it is not interesting. */
1639 }
1640
1641 __attribute__((noinline))
bad_DIE_confusion(int linenr)1642 static void bad_DIE_confusion(int linenr)
1643 {
1644 VG_(printf)("\nparse_var_DIE(%d): confused by:\n", linenr);
1645 }
1646 #define goto_bad_DIE do {bad_DIE_confusion(__LINE__); goto bad_DIE;} while (0)
1647
1648 __attribute__((noinline))
parse_var_DIE(WordFM * rangestree,XArray * tempvars,XArray * gexprs,D3VarParser * parser,DW_TAG dtag,UWord posn,Int level,Cursor * c_die,Cursor * c_abbv,CUConst * cc,Bool td3)1649 static void parse_var_DIE (
1650 /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
1651 /*MOD*/XArray* /* of TempVar* */ tempvars,
1652 /*MOD*/XArray* /* of GExpr* */ gexprs,
1653 /*MOD*/D3VarParser* parser,
1654 DW_TAG dtag,
1655 UWord posn,
1656 Int level,
1657 Cursor* c_die,
1658 Cursor* c_abbv,
1659 CUConst* cc,
1660 Bool td3
1661 )
1662 {
1663 FormContents cts;
1664
1665 UWord saved_die_c_offset = get_position_of_Cursor( c_die );
1666 UWord saved_abbv_c_offset = get_position_of_Cursor( c_abbv );
1667 Bool debug_types_flag;
1668 Bool alt_flag;
1669
1670 varstack_preen( parser, td3, level-1 );
1671
1672 if (dtag == DW_TAG_compile_unit
1673 || dtag == DW_TAG_type_unit
1674 || dtag == DW_TAG_partial_unit) {
1675 Bool have_lo = False;
1676 Bool have_hi1 = False;
1677 Bool hiIsRelative = False;
1678 Bool have_range = False;
1679 Addr ip_lo = 0;
1680 Addr ip_hi1 = 0;
1681 Addr rangeoff = 0;
1682 while (True) {
1683 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
1684 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
1685 if (attr == 0 && form == 0) break;
1686 get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
1687 if (attr == DW_AT_low_pc && cts.szB > 0) {
1688 ip_lo = cts.u.val;
1689 have_lo = True;
1690 }
1691 if (attr == DW_AT_high_pc && cts.szB > 0) {
1692 ip_hi1 = cts.u.val;
1693 have_hi1 = True;
1694 if (form != DW_FORM_addr)
1695 hiIsRelative = True;
1696 }
1697 if (attr == DW_AT_ranges && cts.szB > 0) {
1698 rangeoff = cts.u.val;
1699 have_range = True;
1700 }
1701 if (attr == DW_AT_stmt_list && cts.szB > 0) {
1702 read_filename_table( parser, cc, cts.u.val, td3 );
1703 }
1704 }
1705 if (have_lo && have_hi1 && hiIsRelative)
1706 ip_hi1 += ip_lo;
1707 /* Now, does this give us an opportunity to find this
1708 CU's svma? */
1709 #if 0
1710 if (level == 0 && have_lo) {
1711 vg_assert(!cc->cu_svma_known); /* if this fails, it must be
1712 because we've already seen a DW_TAG_compile_unit DIE at level
1713 0. But that can't happen, because DWARF3 only allows exactly
1714 one top level DIE per CU. */
1715 cc->cu_svma_known = True;
1716 cc->cu_svma = ip_lo;
1717 if (1)
1718 TRACE_D3("BBBBAAAA acquire CU_SVMA of %p\n", cc->cu_svma);
1719 /* Now, it may be that this DIE doesn't tell us the CU's
1720 SVMA, by way of not having a DW_AT_low_pc. That's OK --
1721 the CU doesn't *have* to have its SVMA specified.
1722
1723 But as per last para D3 spec sec 3.1.1 ("Normal and
1724 Partial Compilation Unit Entries", "If the base address
1725 (viz, the SVMA) is undefined, then any DWARF entry of
1726 structure defined interms of the base address of that
1727 compilation unit is not valid.". So that means, if whilst
1728 processing the children of this top level DIE (or their
1729 children, etc) we see a DW_AT_range, and cu_svma_known is
1730 False, then the DIE that contains it is (per the spec)
1731 invalid, and we can legitimately stop and complain. */
1732 }
1733 #else
1734 /* .. whereas The Reality is, simply assume the SVMA is zero
1735 if it isn't specified. */
1736 if (level == 0) {
1737 vg_assert(!cc->cu_svma_known);
1738 cc->cu_svma_known = True;
1739 if (have_lo)
1740 cc->cu_svma = ip_lo;
1741 else
1742 cc->cu_svma = 0;
1743 }
1744 #endif
1745 /* Do we have something that looks sane? */
1746 if (have_lo && have_hi1 && (!have_range)) {
1747 if (ip_lo < ip_hi1)
1748 varstack_push( cc, parser, td3,
1749 unitary_range_list(ip_lo, ip_hi1 - 1),
1750 level,
1751 False/*isFunc*/, NULL/*fbGX*/ );
1752 else if (ip_lo == 0 && ip_hi1 == 0)
1753 /* CU has no code, presumably?
1754 Such situations have been encountered for code
1755 compiled with -ffunction-sections -fdata-sections
1756 and linked with --gc-sections. Completely
1757 eliminated CU gives such 0 lo/hi pc. Similarly
1758 to a CU which has no lo/hi/range pc, we push
1759 an empty range list. */
1760 varstack_push( cc, parser, td3,
1761 empty_range_list(),
1762 level,
1763 False/*isFunc*/, NULL/*fbGX*/ );
1764 } else
1765 if ((!have_lo) && (!have_hi1) && have_range) {
1766 varstack_push( cc, parser, td3,
1767 get_range_list( cc, td3,
1768 rangeoff, cc->cu_svma ),
1769 level,
1770 False/*isFunc*/, NULL/*fbGX*/ );
1771 } else
1772 if ((!have_lo) && (!have_hi1) && (!have_range)) {
1773 /* CU has no code, presumably? */
1774 varstack_push( cc, parser, td3,
1775 empty_range_list(),
1776 level,
1777 False/*isFunc*/, NULL/*fbGX*/ );
1778 } else
1779 if (have_lo && (!have_hi1) && have_range && ip_lo == 0) {
1780 /* broken DIE created by gcc-4.3.X ? Ignore the
1781 apparently-redundant DW_AT_low_pc and use the DW_AT_ranges
1782 instead. */
1783 varstack_push( cc, parser, td3,
1784 get_range_list( cc, td3,
1785 rangeoff, cc->cu_svma ),
1786 level,
1787 False/*isFunc*/, NULL/*fbGX*/ );
1788 } else {
1789 if (0) VG_(printf)("I got hlo %d hhi1 %d hrange %d\n",
1790 (Int)have_lo, (Int)have_hi1, (Int)have_range);
1791 goto_bad_DIE;
1792 }
1793 }
1794
1795 if (dtag == DW_TAG_lexical_block || dtag == DW_TAG_subprogram) {
1796 Bool have_lo = False;
1797 Bool have_hi1 = False;
1798 Bool have_range = False;
1799 Bool hiIsRelative = False;
1800 Addr ip_lo = 0;
1801 Addr ip_hi1 = 0;
1802 Addr rangeoff = 0;
1803 Bool isFunc = dtag == DW_TAG_subprogram;
1804 GExpr* fbGX = NULL;
1805 while (True) {
1806 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
1807 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
1808 if (attr == 0 && form == 0) break;
1809 get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
1810 if (attr == DW_AT_low_pc && cts.szB > 0) {
1811 ip_lo = cts.u.val;
1812 have_lo = True;
1813 }
1814 if (attr == DW_AT_high_pc && cts.szB > 0) {
1815 ip_hi1 = cts.u.val;
1816 have_hi1 = True;
1817 if (form != DW_FORM_addr)
1818 hiIsRelative = True;
1819 }
1820 if (attr == DW_AT_ranges && cts.szB > 0) {
1821 rangeoff = cts.u.val;
1822 have_range = True;
1823 }
1824 if (isFunc
1825 && attr == DW_AT_frame_base
1826 && cts.szB != 0 /* either scalar or nonempty block */) {
1827 fbGX = get_GX( cc, False/*td3*/, &cts );
1828 vg_assert(fbGX);
1829 VG_(addToXA)(gexprs, &fbGX);
1830 }
1831 }
1832 if (have_lo && have_hi1 && hiIsRelative)
1833 ip_hi1 += ip_lo;
1834 /* Do we have something that looks sane? */
1835 if (dtag == DW_TAG_subprogram
1836 && (!have_lo) && (!have_hi1) && (!have_range)) {
1837 /* This is legit - ignore it. Sec 3.3.3: "A subroutine entry
1838 representing a subroutine declaration that is not also a
1839 definition does not have code address or range
1840 attributes." */
1841 } else
1842 if (dtag == DW_TAG_lexical_block
1843 && (!have_lo) && (!have_hi1) && (!have_range)) {
1844 /* I believe this is legit, and means the lexical block
1845 contains no insns (whatever that might mean). Ignore. */
1846 } else
1847 if (have_lo && have_hi1 && (!have_range)) {
1848 /* This scope supplies just a single address range. */
1849 if (ip_lo < ip_hi1)
1850 varstack_push( cc, parser, td3,
1851 unitary_range_list(ip_lo, ip_hi1 - 1),
1852 level, isFunc, fbGX );
1853 } else
1854 if ((!have_lo) && (!have_hi1) && have_range) {
1855 /* This scope supplies multiple address ranges via the use of
1856 a range list. */
1857 varstack_push( cc, parser, td3,
1858 get_range_list( cc, td3,
1859 rangeoff, cc->cu_svma ),
1860 level, isFunc, fbGX );
1861 } else
1862 if (have_lo && (!have_hi1) && (!have_range)) {
1863 /* This scope is bogus. The D3 spec sec 3.4 (Lexical Block
1864 Entries) says fairly clearly that a scope must have either
1865 _range or (_low_pc and _high_pc). */
1866 /* The spec is a bit ambiguous though. Perhaps a single byte
1867 range is intended? See sec 2.17 (Code Addresses And Ranges) */
1868 /* This case is here because icc9 produced this:
1869 <2><13bd>: DW_TAG_lexical_block
1870 DW_AT_decl_line : 5229
1871 DW_AT_decl_column : 37
1872 DW_AT_decl_file : 1
1873 DW_AT_low_pc : 0x401b03
1874 */
1875 /* Ignore (seems safe than pushing a single byte range) */
1876 } else
1877 goto_bad_DIE;
1878 }
1879
1880 if (dtag == DW_TAG_variable || dtag == DW_TAG_formal_parameter) {
1881 HChar* name = NULL;
1882 UWord typeR = D3_INVALID_CUOFF;
1883 Bool global = False;
1884 GExpr* gexpr = NULL;
1885 Int n_attrs = 0;
1886 UWord abs_ori = (UWord)D3_INVALID_CUOFF;
1887 Int lineNo = 0;
1888 HChar* fileName = NULL;
1889 while (True) {
1890 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
1891 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
1892 if (attr == 0 && form == 0) break;
1893 get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
1894 n_attrs++;
1895 if (attr == DW_AT_name && cts.szB < 0) {
1896 name = ML_(addStrFromCursor)( cc->di, cts.u.cur );
1897 }
1898 if (attr == DW_AT_location
1899 && cts.szB != 0 /* either scalar or nonempty block */) {
1900 gexpr = get_GX( cc, False/*td3*/, &cts );
1901 vg_assert(gexpr);
1902 VG_(addToXA)(gexprs, &gexpr);
1903 }
1904 if (attr == DW_AT_type && cts.szB > 0) {
1905 typeR = cook_die_using_form( cc, cts.u.val, form );
1906 }
1907 if (attr == DW_AT_external && cts.szB > 0 && cts.u.val > 0) {
1908 global = True;
1909 }
1910 if (attr == DW_AT_abstract_origin && cts.szB > 0) {
1911 abs_ori = (UWord)cts.u.val;
1912 }
1913 if (attr == DW_AT_declaration && cts.szB > 0 && cts.u.val > 0) {
1914 /*declaration = True;*/
1915 }
1916 if (attr == DW_AT_decl_line && cts.szB > 0) {
1917 lineNo = (Int)cts.u.val;
1918 }
1919 if (attr == DW_AT_decl_file && cts.szB > 0) {
1920 Int ftabIx = (Int)cts.u.val;
1921 if (ftabIx >= 1
1922 && ftabIx < VG_(sizeXA)( parser->filenameTable )) {
1923 fileName = *(HChar**)
1924 VG_(indexXA)( parser->filenameTable, ftabIx );
1925 vg_assert(fileName);
1926 }
1927 if (0) VG_(printf)("XXX filename = %s\n", fileName);
1928 }
1929 }
1930 if (!global && dtag == DW_TAG_variable && level == 1) {
1931 /* Case of a static variable. It is better to declare
1932 it global as the variable is not really related to
1933 a PC range, as its address can be used by program
1934 counters outside of the ranges where it is visible . */
1935 global = True;
1936 }
1937
1938 /* We'll collect it under if one of the following three
1939 conditions holds:
1940 (1) has location and type -> completed
1941 (2) has type only -> is an abstract instance
1942 (3) has location and abs_ori -> is a concrete instance
1943 Name, filename and line number are all optional frills.
1944 */
1945 if ( /* 1 */ (gexpr && typeR != D3_INVALID_CUOFF)
1946 /* 2 */ || (typeR != D3_INVALID_CUOFF)
1947 /* 3 */ || (gexpr && abs_ori != (UWord)D3_INVALID_CUOFF) ) {
1948
1949 /* Add this variable to the list of interesting looking
1950 variables. Crucially, note along with it the address
1951 range(s) associated with the variable, which for locals
1952 will be the address ranges at the top of the varparser's
1953 stack. */
1954 GExpr* fbGX = NULL;
1955 Word i, nRanges;
1956 XArray* /* of AddrRange */ xa;
1957 TempVar* tv;
1958 /* Stack can't be empty; we put a dummy entry on it for the
1959 entire address range before starting with the DIEs for
1960 this CU. */
1961 vg_assert(parser->sp >= 0);
1962
1963 /* If this is a local variable (non-global), try to find
1964 the GExpr for the DW_AT_frame_base of the containing
1965 function. It should have been pushed on the stack at the
1966 time we encountered its DW_TAG_subprogram DIE, so the way
1967 to find it is to scan back down the stack looking for it.
1968 If there isn't an enclosing stack entry marked 'isFunc'
1969 then we must be seeing variable or formal param DIEs
1970 outside of a function, so we deem the Dwarf to be
1971 malformed if that happens. Note that the fbGX may be NULL
1972 if the containing DT_TAG_subprogram didn't supply a
1973 DW_AT_frame_base -- that's OK, but there must actually be
1974 a containing DW_TAG_subprogram. */
1975 if (!global) {
1976 Bool found = False;
1977 for (i = parser->sp; i >= 0; i--) {
1978 if (parser->isFunc[i]) {
1979 fbGX = parser->fbGX[i];
1980 found = True;
1981 break;
1982 }
1983 }
1984 if (!found) {
1985 if (0 && VG_(clo_verbosity) >= 0) {
1986 VG_(message)(Vg_DebugMsg,
1987 "warning: parse_var_DIE: non-global variable "
1988 "outside DW_TAG_subprogram\n");
1989 }
1990 /* goto_bad_DIE; */
1991 /* This seems to happen a lot. Just ignore it -- if,
1992 when we come to evaluation of the location (guarded)
1993 expression, it requires a frame base value, and
1994 there's no expression for that, then evaluation as a
1995 whole will fail. Harmless - a bit of a waste of
1996 cycles but nothing more. */
1997 }
1998 }
1999
2000 /* re "global ? 0 : parser->sp" (twice), if the var is
2001 marked 'global' then we must put it at the global scope,
2002 as only the global scope (level 0) covers the entire PC
2003 address space. It is asserted elsewhere that level 0
2004 always covers the entire address space. */
2005 xa = parser->ranges[global ? 0 : parser->sp];
2006 nRanges = VG_(sizeXA)(xa);
2007 vg_assert(nRanges >= 0);
2008
2009 tv = ML_(dinfo_zalloc)( "di.readdwarf3.pvD.1", sizeof(TempVar) );
2010 tv->name = name;
2011 tv->level = global ? 0 : parser->sp;
2012 tv->typeR = typeR;
2013 tv->gexpr = gexpr;
2014 tv->fbGX = fbGX;
2015 tv->fName = fileName;
2016 tv->fLine = lineNo;
2017 tv->dioff = posn;
2018 tv->absOri = abs_ori;
2019
2020 /* See explanation on definition of type TempVar for the
2021 reason for this elaboration. */
2022 tv->nRanges = nRanges;
2023 tv->rngOneMin = 0;
2024 tv->rngOneMax = 0;
2025 tv->rngMany = NULL;
2026 if (nRanges == 1) {
2027 AddrRange* range = VG_(indexXA)(xa, 0);
2028 tv->rngOneMin = range->aMin;
2029 tv->rngOneMax = range->aMax;
2030 }
2031 else if (nRanges > 1) {
2032 /* See if we already have a range list which is
2033 structurally identical. If so, use that; if not, clone
2034 this one, and add it to our collection. */
2035 UWord keyW, valW;
2036 if (VG_(lookupFM)( rangestree, &keyW, &valW, (UWord)xa )) {
2037 XArray* old = (XArray*)keyW;
2038 tl_assert(valW == 0);
2039 tl_assert(old != xa);
2040 tv->rngMany = old;
2041 } else {
2042 XArray* cloned = VG_(cloneXA)( "di.readdwarf3.pvD.2", xa );
2043 tv->rngMany = cloned;
2044 VG_(addToFM)( rangestree, (UWord)cloned, 0 );
2045 }
2046 }
2047
2048 VG_(addToXA)( tempvars, &tv );
2049
2050 TRACE_D3(" Recording this variable, with %ld PC range(s)\n",
2051 VG_(sizeXA)(xa) );
2052 /* collect stats on how effective the ->ranges special
2053 casing is */
2054 if (0) {
2055 static Int ntot=0, ngt=0;
2056 ntot++;
2057 if (tv->rngMany) ngt++;
2058 if (0 == (ntot % 100000))
2059 VG_(printf)("XXXX %d tot, %d cloned\n", ntot, ngt);
2060 }
2061
2062 }
2063
2064 /* Here are some other weird cases seen in the wild:
2065
2066 We have a variable with a name and a type, but no
2067 location. I guess that's a sign that it has been
2068 optimised away. Ignore it. Here's an example:
2069
2070 static Int lc_compar(void* n1, void* n2) {
2071 MC_Chunk* mc1 = *(MC_Chunk**)n1;
2072 MC_Chunk* mc2 = *(MC_Chunk**)n2;
2073 return (mc1->data < mc2->data ? -1 : 1);
2074 }
2075
2076 Both mc1 and mc2 are like this
2077 <2><5bc>: Abbrev Number: 21 (DW_TAG_variable)
2078 DW_AT_name : mc1
2079 DW_AT_decl_file : 1
2080 DW_AT_decl_line : 216
2081 DW_AT_type : <5d3>
2082
2083 whereas n1 and n2 do have locations specified.
2084
2085 ---------------------------------------------
2086
2087 We see a DW_TAG_formal_parameter with a type, but
2088 no name and no location. It's probably part of a function type
2089 construction, thusly, hence ignore it:
2090 <1><2b4>: Abbrev Number: 12 (DW_TAG_subroutine_type)
2091 DW_AT_sibling : <2c9>
2092 DW_AT_prototyped : 1
2093 DW_AT_type : <114>
2094 <2><2be>: Abbrev Number: 13 (DW_TAG_formal_parameter)
2095 DW_AT_type : <13e>
2096 <2><2c3>: Abbrev Number: 13 (DW_TAG_formal_parameter)
2097 DW_AT_type : <133>
2098
2099 ---------------------------------------------
2100
2101 Is very minimal, like this:
2102 <4><81d>: Abbrev Number: 44 (DW_TAG_variable)
2103 DW_AT_abstract_origin: <7ba>
2104 What that signifies I have no idea. Ignore.
2105
2106 ----------------------------------------------
2107
2108 Is very minimal, like this:
2109 <200f>: DW_TAG_formal_parameter
2110 DW_AT_abstract_ori: <1f4c>
2111 DW_AT_location : 13440
2112 What that signifies I have no idea. Ignore.
2113 It might be significant, though: the variable at least
2114 has a location and so might exist somewhere.
2115 Maybe we should handle this.
2116
2117 ---------------------------------------------
2118
2119 <22407>: DW_TAG_variable
2120 DW_AT_name : (indirect string, offset: 0x6579):
2121 vgPlain_trampoline_stuff_start
2122 DW_AT_decl_file : 29
2123 DW_AT_decl_line : 56
2124 DW_AT_external : 1
2125 DW_AT_declaration : 1
2126
2127 Nameless and typeless variable that has a location? Who
2128 knows. Not me.
2129 <2><3d178>: Abbrev Number: 22 (DW_TAG_variable)
2130 DW_AT_location : 9 byte block: 3 c0 c7 13 38 0 0 0 0
2131 (DW_OP_addr: 3813c7c0)
2132
2133 No, really. Check it out. gcc is quite simply borked.
2134 <3><168cc>: Abbrev Number: 141 (DW_TAG_variable)
2135 // followed by no attributes, and the next DIE is a sibling,
2136 // not a child
2137 */
2138 }
2139 return;
2140
2141 bad_DIE:
2142 set_position_of_Cursor( c_die, saved_die_c_offset );
2143 set_position_of_Cursor( c_abbv, saved_abbv_c_offset );
2144 posn = uncook_die( cc, posn, &debug_types_flag, &alt_flag );
2145 VG_(printf)(" <%d><%lx>: %s", level, posn, ML_(pp_DW_TAG)( dtag ) );
2146 if (debug_types_flag) {
2147 VG_(printf)(" (in .debug_types)");
2148 }
2149 else if (alt_flag) {
2150 VG_(printf)(" (in alternate .debug_info)");
2151 }
2152 VG_(printf)("\n");
2153 while (True) {
2154 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
2155 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2156 if (attr == 0 && form == 0) break;
2157 VG_(printf)(" %18s: ", ML_(pp_DW_AT)(attr));
2158 /* Get the form contents, so as to print them */
2159 get_Form_contents( &cts, cc, c_die, True, form );
2160 VG_(printf)("\t\n");
2161 }
2162 VG_(printf)("\n");
2163 cc->barf("parse_var_DIE: confused by the above DIE");
2164 /*NOTREACHED*/
2165 }
2166
2167
2168 /*------------------------------------------------------------*/
2169 /*--- ---*/
2170 /*--- Parsing of type-related DIEs ---*/
2171 /*--- ---*/
2172 /*------------------------------------------------------------*/
2173
2174 #define N_D3_TYPE_STACK 16
2175
2176 typedef
2177 struct {
2178 /* What source language? 'A'=Ada83/95,
2179 'C'=C/C++,
2180 'F'=Fortran,
2181 '?'=other
2182 Established once per compilation unit. */
2183 UChar language;
2184 /* A stack of types which are currently under construction */
2185 Int sp; /* [sp] is innermost active entry; sp==-1 for empty
2186 stack */
2187 /* Note that the TyEnts in qparentE are temporary copies of the
2188 ones accumulating in the main tyent array. So it is not safe
2189 to free up anything on them when popping them off the stack
2190 (iow, it isn't safe to use TyEnt__make_EMPTY on them). Just
2191 memset them to zero when done. */
2192 TyEnt qparentE[N_D3_TYPE_STACK]; /* parent TyEnts */
2193 Int qlevel[N_D3_TYPE_STACK];
2194
2195 }
2196 D3TypeParser;
2197
typestack_show(D3TypeParser * parser,const HChar * str)2198 static void typestack_show ( D3TypeParser* parser, const HChar* str ) {
2199 Word i;
2200 VG_(printf)(" typestack (%s) {\n", str);
2201 for (i = 0; i <= parser->sp; i++) {
2202 VG_(printf)(" [%ld] (level %d): ", i, parser->qlevel[i]);
2203 ML_(pp_TyEnt)( &parser->qparentE[i] );
2204 VG_(printf)("\n");
2205 }
2206 VG_(printf)(" }\n");
2207 }
2208
2209 /* Remove from the stack, all entries with .level > 'level' */
2210 static
typestack_preen(D3TypeParser * parser,Bool td3,Int level)2211 void typestack_preen ( D3TypeParser* parser, Bool td3, Int level )
2212 {
2213 Bool changed = False;
2214 vg_assert(parser->sp < N_D3_TYPE_STACK);
2215 while (True) {
2216 vg_assert(parser->sp >= -1);
2217 if (parser->sp == -1) break;
2218 if (parser->qlevel[parser->sp] <= level) break;
2219 if (0)
2220 TRACE_D3("BBBBAAAA typestack_pop [newsp=%d]\n", parser->sp-1);
2221 vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
2222 VG_(memset)(&parser->qparentE[parser->sp], 0, sizeof(TyEnt));
2223 parser->qparentE[parser->sp].cuOff = D3_INVALID_CUOFF;
2224 parser->qparentE[parser->sp].tag = Te_EMPTY;
2225 parser->qlevel[parser->sp] = 0;
2226 parser->sp--;
2227 changed = True;
2228 }
2229 if (changed && td3)
2230 typestack_show( parser, "after preen" );
2231 }
2232
typestack_is_empty(D3TypeParser * parser)2233 static Bool typestack_is_empty ( D3TypeParser* parser ) {
2234 vg_assert(parser->sp >= -1 && parser->sp < N_D3_TYPE_STACK);
2235 return parser->sp == -1;
2236 }
2237
typestack_push(CUConst * cc,D3TypeParser * parser,Bool td3,TyEnt * parentE,Int level)2238 static void typestack_push ( CUConst* cc,
2239 D3TypeParser* parser,
2240 Bool td3,
2241 TyEnt* parentE, Int level ) {
2242 if (0)
2243 TRACE_D3("BBBBAAAA typestack_push[newsp=%d]: %d %05lx\n",
2244 parser->sp+1, level, parentE->cuOff);
2245
2246 /* First we need to zap everything >= 'level', as we are about to
2247 replace any previous entry at 'level', so .. */
2248 typestack_preen(parser, /*td3*/False, level-1);
2249
2250 vg_assert(parser->sp >= -1);
2251 vg_assert(parser->sp < N_D3_TYPE_STACK);
2252 if (parser->sp == N_D3_TYPE_STACK-1)
2253 cc->barf("typestack_push: N_D3_TYPE_STACK is too low; "
2254 "increase and recompile");
2255 if (parser->sp >= 0)
2256 vg_assert(parser->qlevel[parser->sp] < level);
2257 parser->sp++;
2258 vg_assert(parser->qparentE[parser->sp].tag == Te_EMPTY);
2259 vg_assert(parser->qlevel[parser->sp] == 0);
2260 vg_assert(parentE);
2261 vg_assert(ML_(TyEnt__is_type)(parentE));
2262 vg_assert(parentE->cuOff != D3_INVALID_CUOFF);
2263 parser->qparentE[parser->sp] = *parentE;
2264 parser->qlevel[parser->sp] = level;
2265 if (td3)
2266 typestack_show( parser, "after push" );
2267 }
2268
2269 /* True if the subrange type being parsed gives the bounds of an array. */
subrange_type_denotes_array_bounds(D3TypeParser * parser,DW_TAG dtag)2270 static Bool subrange_type_denotes_array_bounds ( D3TypeParser* parser,
2271 DW_TAG dtag ) {
2272 vg_assert(dtag == DW_TAG_subrange_type);
2273 /* For most languages, a subrange_type dtag always gives the
2274 bounds of an array.
2275 For Ada, there are additional conditions as a subrange_type
2276 is also used for other purposes. */
2277 if (parser->language != 'A')
2278 /* not Ada, so it definitely denotes an array bound. */
2279 return True;
2280 else
2281 /* Extra constraints for Ada: it only denotes an array bound if .. */
2282 return (! typestack_is_empty(parser)
2283 && parser->qparentE[parser->sp].tag == Te_TyArray);
2284 }
2285
2286 /* Parse a type-related DIE. 'parser' holds the current parser state.
2287 'admin' is where the completed types are dumped. 'dtag' is the tag
2288 for this DIE. 'c_die' points to the start of the data fields (FORM
2289 stuff) for the DIE. c_abbv points to the start of the (name,form)
2290 pairs which describe the DIE.
2291
2292 We may find the DIE uninteresting, in which case we should ignore
2293 it.
2294
2295 What happens: the DIE is examined. If uninteresting, it is ignored.
2296 Otherwise, the DIE gives rise to two things:
2297
2298 (1) the offset of this DIE in the CU -- the cuOffset, a UWord
2299 (2) a TyAdmin structure, which holds the type, or related stuff
2300
2301 (2) is added at the end of 'tyadmins', at some index, say 'i'.
2302
2303 A pair (cuOffset, i) is added to 'tydict'.
2304
2305 Hence 'tyadmins' holds the actual type entities, and 'tydict' holds
2306 a mapping from cuOffset to the index of the corresponding entry in
2307 'tyadmin'.
2308
2309 When resolving a cuOffset to a TyAdmin, first look up the cuOffset
2310 in the tydict (by binary search). This gives an index into
2311 tyadmins, and the required entity lives in tyadmins at that index.
2312 */
2313 __attribute__((noinline))
parse_type_DIE(XArray * tyents,D3TypeParser * parser,DW_TAG dtag,UWord posn,Int level,Cursor * c_die,Cursor * c_abbv,CUConst * cc,Bool td3)2314 static void parse_type_DIE ( /*MOD*/XArray* /* of TyEnt */ tyents,
2315 /*MOD*/D3TypeParser* parser,
2316 DW_TAG dtag,
2317 UWord posn,
2318 Int level,
2319 Cursor* c_die,
2320 Cursor* c_abbv,
2321 CUConst* cc,
2322 Bool td3 )
2323 {
2324 FormContents cts;
2325 TyEnt typeE;
2326 TyEnt atomE;
2327 TyEnt fieldE;
2328 TyEnt boundE;
2329 Bool debug_types_flag;
2330 Bool alt_flag;
2331
2332 UWord saved_die_c_offset = get_position_of_Cursor( c_die );
2333 UWord saved_abbv_c_offset = get_position_of_Cursor( c_abbv );
2334
2335 VG_(memset)( &typeE, 0xAA, sizeof(typeE) );
2336 VG_(memset)( &atomE, 0xAA, sizeof(atomE) );
2337 VG_(memset)( &fieldE, 0xAA, sizeof(fieldE) );
2338 VG_(memset)( &boundE, 0xAA, sizeof(boundE) );
2339
2340 /* If we've returned to a level at or above any previously noted
2341 parent, un-note it, so we don't believe we're still collecting
2342 its children. */
2343 typestack_preen( parser, td3, level-1 );
2344
2345 if (dtag == DW_TAG_compile_unit
2346 || dtag == DW_TAG_type_unit
2347 || dtag == DW_TAG_partial_unit) {
2348 /* See if we can find DW_AT_language, since it is important for
2349 establishing array bounds (see DW_TAG_subrange_type below in
2350 this fn) */
2351 while (True) {
2352 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
2353 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2354 if (attr == 0 && form == 0) break;
2355 get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2356 if (attr != DW_AT_language)
2357 continue;
2358 if (cts.szB <= 0)
2359 goto_bad_DIE;
2360 switch (cts.u.val) {
2361 case DW_LANG_C89: case DW_LANG_C:
2362 case DW_LANG_C_plus_plus: case DW_LANG_ObjC:
2363 case DW_LANG_ObjC_plus_plus: case DW_LANG_UPC:
2364 case DW_LANG_Upc: case DW_LANG_C99:
2365 parser->language = 'C'; break;
2366 case DW_LANG_Fortran77: case DW_LANG_Fortran90:
2367 case DW_LANG_Fortran95:
2368 parser->language = 'F'; break;
2369 case DW_LANG_Ada83: case DW_LANG_Ada95:
2370 parser->language = 'A'; break;
2371 case DW_LANG_Cobol74:
2372 case DW_LANG_Cobol85: case DW_LANG_Pascal83:
2373 case DW_LANG_Modula2: case DW_LANG_Java:
2374 case DW_LANG_PLI:
2375 case DW_LANG_D: case DW_LANG_Python:
2376 case DW_LANG_Mips_Assembler:
2377 parser->language = '?'; break;
2378 default:
2379 goto_bad_DIE;
2380 }
2381 }
2382 }
2383
2384 if (dtag == DW_TAG_base_type) {
2385 /* We can pick up a new base type any time. */
2386 VG_(memset)(&typeE, 0, sizeof(typeE));
2387 typeE.cuOff = D3_INVALID_CUOFF;
2388 typeE.tag = Te_TyBase;
2389 while (True) {
2390 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
2391 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2392 if (attr == 0 && form == 0) break;
2393 get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2394 if (attr == DW_AT_name && cts.szB < 0) {
2395 typeE.Te.TyBase.name
2396 = ML_(cur_read_strdup)( cts.u.cur,
2397 "di.readdwarf3.ptD.base_type.1" );
2398 }
2399 if (attr == DW_AT_byte_size && cts.szB > 0) {
2400 typeE.Te.TyBase.szB = cts.u.val;
2401 }
2402 if (attr == DW_AT_encoding && cts.szB > 0) {
2403 switch (cts.u.val) {
2404 case DW_ATE_unsigned: case DW_ATE_unsigned_char:
2405 case DW_ATE_UTF: /* since DWARF4, e.g. char16_t from C++ */
2406 case DW_ATE_boolean:/* FIXME - is this correct? */
2407 case DW_ATE_unsigned_fixed:
2408 typeE.Te.TyBase.enc = 'U'; break;
2409 case DW_ATE_signed: case DW_ATE_signed_char:
2410 case DW_ATE_signed_fixed:
2411 typeE.Te.TyBase.enc = 'S'; break;
2412 case DW_ATE_float:
2413 typeE.Te.TyBase.enc = 'F'; break;
2414 case DW_ATE_complex_float:
2415 typeE.Te.TyBase.enc = 'C'; break;
2416 default:
2417 goto_bad_DIE;
2418 }
2419 }
2420 }
2421
2422 /* Invent a name if it doesn't have one. gcc-4.3
2423 -ftree-vectorize is observed to emit nameless base types. */
2424 if (!typeE.Te.TyBase.name)
2425 typeE.Te.TyBase.name
2426 = ML_(dinfo_strdup)( "di.readdwarf3.ptD.base_type.2",
2427 "<anon_base_type>" );
2428
2429 /* Do we have something that looks sane? */
2430 if (/* must have a name */
2431 typeE.Te.TyBase.name == NULL
2432 /* and a plausible size. Yes, really 32: "complex long
2433 double" apparently has size=32 */
2434 || typeE.Te.TyBase.szB < 0 || typeE.Te.TyBase.szB > 32
2435 /* and a plausible encoding */
2436 || (typeE.Te.TyBase.enc != 'U'
2437 && typeE.Te.TyBase.enc != 'S'
2438 && typeE.Te.TyBase.enc != 'F'
2439 && typeE.Te.TyBase.enc != 'C'))
2440 goto_bad_DIE;
2441 /* Last minute hack: if we see this
2442 <1><515>: DW_TAG_base_type
2443 DW_AT_byte_size : 0
2444 DW_AT_encoding : 5
2445 DW_AT_name : void
2446 convert it into a real Void type. */
2447 if (typeE.Te.TyBase.szB == 0
2448 && 0 == VG_(strcmp)("void", typeE.Te.TyBase.name)) {
2449 ML_(TyEnt__make_EMPTY)(&typeE);
2450 typeE.tag = Te_TyVoid;
2451 typeE.Te.TyVoid.isFake = False; /* it's a real one! */
2452 }
2453
2454 goto acquire_Type;
2455 }
2456
2457 /*
2458 * An example of DW_TAG_rvalue_reference_type:
2459 *
2460 * $ readelf --debug-dump /usr/lib/debug/usr/lib/libstdc++.so.6.0.16.debug
2461 * <1><1014>: Abbrev Number: 55 (DW_TAG_rvalue_reference_type)
2462 * <1015> DW_AT_byte_size : 4
2463 * <1016> DW_AT_type : <0xe52>
2464 */
2465 if (dtag == DW_TAG_pointer_type || dtag == DW_TAG_reference_type
2466 || dtag == DW_TAG_ptr_to_member_type
2467 || dtag == DW_TAG_rvalue_reference_type) {
2468 /* This seems legit for _pointer_type and _reference_type. I
2469 don't know if rolling _ptr_to_member_type in here really is
2470 legit, but it's better than not handling it at all. */
2471 VG_(memset)(&typeE, 0, sizeof(typeE));
2472 typeE.cuOff = D3_INVALID_CUOFF;
2473 switch (dtag) {
2474 case DW_TAG_pointer_type:
2475 typeE.tag = Te_TyPtr;
2476 break;
2477 case DW_TAG_reference_type:
2478 typeE.tag = Te_TyRef;
2479 break;
2480 case DW_TAG_ptr_to_member_type:
2481 typeE.tag = Te_TyPtrMbr;
2482 break;
2483 case DW_TAG_rvalue_reference_type:
2484 typeE.tag = Te_TyRvalRef;
2485 break;
2486 default:
2487 vg_assert(False);
2488 }
2489 /* target type defaults to void */
2490 typeE.Te.TyPorR.typeR = D3_FAKEVOID_CUOFF;
2491 /* These four type kinds don't *have* to specify their size, in
2492 which case we assume it's a machine word. But if they do
2493 specify it, it must be a machine word :-) This probably
2494 assumes that the word size of the Dwarf3 we're reading is the
2495 same size as that on the machine. gcc appears to give a size
2496 whereas icc9 doesn't. */
2497 typeE.Te.TyPorR.szB = sizeof(UWord);
2498 while (True) {
2499 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
2500 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2501 if (attr == 0 && form == 0) break;
2502 get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2503 if (attr == DW_AT_byte_size && cts.szB > 0) {
2504 typeE.Te.TyPorR.szB = cts.u.val;
2505 }
2506 if (attr == DW_AT_type && cts.szB > 0) {
2507 typeE.Te.TyPorR.typeR
2508 = cook_die_using_form( cc, (UWord)cts.u.val, form );
2509 }
2510 }
2511 /* Do we have something that looks sane? */
2512 if (typeE.Te.TyPorR.szB != sizeof(UWord))
2513 goto_bad_DIE;
2514 else
2515 goto acquire_Type;
2516 }
2517
2518 if (dtag == DW_TAG_enumeration_type) {
2519 /* Create a new Type to hold the results. */
2520 VG_(memset)(&typeE, 0, sizeof(typeE));
2521 typeE.cuOff = posn;
2522 typeE.tag = Te_TyEnum;
2523 Bool is_decl = False;
2524 typeE.Te.TyEnum.atomRs
2525 = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.enum_type.1",
2526 ML_(dinfo_free),
2527 sizeof(UWord) );
2528 while (True) {
2529 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
2530 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2531 if (attr == 0 && form == 0) break;
2532 get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2533 if (attr == DW_AT_name && cts.szB < 0) {
2534 typeE.Te.TyEnum.name
2535 = ML_(cur_read_strdup)( cts.u.cur,
2536 "di.readdwarf3.pTD.enum_type.2" );
2537 }
2538 if (attr == DW_AT_byte_size && cts.szB > 0) {
2539 typeE.Te.TyEnum.szB = cts.u.val;
2540 }
2541 if (attr == DW_AT_declaration) {
2542 is_decl = True;
2543 }
2544 }
2545
2546 if (!typeE.Te.TyEnum.name)
2547 typeE.Te.TyEnum.name
2548 = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enum_type.3",
2549 "<anon_enum_type>" );
2550
2551 /* Do we have something that looks sane? */
2552 if (typeE.Te.TyEnum.szB == 0
2553 /* we must know the size */
2554 /* but not for Ada, which uses such dummy
2555 enumerations as helper for gdb ada mode.
2556 Also GCC allows incomplete enums as GNU extension.
2557 http://gcc.gnu.org/onlinedocs/gcc/Incomplete-Enums.html
2558 These are marked as DW_AT_declaration and won't have
2559 a size. They can only be used in declaration or as
2560 pointer types. You can't allocate variables or storage
2561 using such an enum type. (Also GCC seems to have a bug
2562 that will put such an enumeration_type into a .debug_types
2563 unit which should only contain complete types.) */
2564 && (parser->language != 'A' && !is_decl)) {
2565 goto_bad_DIE;
2566 }
2567
2568 /* On't stack! */
2569 typestack_push( cc, parser, td3, &typeE, level );
2570 goto acquire_Type;
2571 }
2572
2573 /* gcc (GCC) 4.4.0 20081017 (experimental) occasionally produces
2574 DW_TAG_enumerator with only a DW_AT_name but no
2575 DW_AT_const_value. This is in violation of the Dwarf3 standard,
2576 and appears to be a new "feature" of gcc - versions 4.3.x and
2577 earlier do not appear to do this. So accept DW_TAG_enumerator
2578 which only have a name but no value. An example:
2579
2580 <1><180>: Abbrev Number: 6 (DW_TAG_enumeration_type)
2581 <181> DW_AT_name : (indirect string, offset: 0xda70):
2582 QtMsgType
2583 <185> DW_AT_byte_size : 4
2584 <186> DW_AT_decl_file : 14
2585 <187> DW_AT_decl_line : 1480
2586 <189> DW_AT_sibling : <0x1a7>
2587 <2><18d>: Abbrev Number: 7 (DW_TAG_enumerator)
2588 <18e> DW_AT_name : (indirect string, offset: 0x9e18):
2589 QtDebugMsg
2590 <2><192>: Abbrev Number: 7 (DW_TAG_enumerator)
2591 <193> DW_AT_name : (indirect string, offset: 0x1505f):
2592 QtWarningMsg
2593 <2><197>: Abbrev Number: 7 (DW_TAG_enumerator)
2594 <198> DW_AT_name : (indirect string, offset: 0x16f4a):
2595 QtCriticalMsg
2596 <2><19c>: Abbrev Number: 7 (DW_TAG_enumerator)
2597 <19d> DW_AT_name : (indirect string, offset: 0x156dd):
2598 QtFatalMsg
2599 <2><1a1>: Abbrev Number: 7 (DW_TAG_enumerator)
2600 <1a2> DW_AT_name : (indirect string, offset: 0x13660):
2601 QtSystemMsg
2602 */
2603 if (dtag == DW_TAG_enumerator) {
2604 VG_(memset)( &atomE, 0, sizeof(atomE) );
2605 atomE.cuOff = posn;
2606 atomE.tag = Te_Atom;
2607 while (True) {
2608 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
2609 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2610 if (attr == 0 && form == 0) break;
2611 get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2612 if (attr == DW_AT_name && cts.szB < 0) {
2613 atomE.Te.Atom.name
2614 = ML_(cur_read_strdup)( cts.u.cur,
2615 "di.readdwarf3.pTD.enumerator.1" );
2616 }
2617 if (attr == DW_AT_const_value && cts.szB > 0) {
2618 atomE.Te.Atom.value = cts.u.val;
2619 atomE.Te.Atom.valueKnown = True;
2620 }
2621 }
2622 /* Do we have something that looks sane? */
2623 if (atomE.Te.Atom.name == NULL)
2624 goto_bad_DIE;
2625 /* Do we have a plausible parent? */
2626 if (typestack_is_empty(parser)) goto_bad_DIE;
2627 vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
2628 vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
2629 if (level != parser->qlevel[parser->sp]+1) goto_bad_DIE;
2630 if (parser->qparentE[parser->sp].tag != Te_TyEnum) goto_bad_DIE;
2631 /* Record this child in the parent */
2632 vg_assert(parser->qparentE[parser->sp].Te.TyEnum.atomRs);
2633 VG_(addToXA)( parser->qparentE[parser->sp].Te.TyEnum.atomRs,
2634 &atomE );
2635 /* And record the child itself */
2636 goto acquire_Atom;
2637 }
2638
2639 /* Treat DW_TAG_class_type as if it was a DW_TAG_structure_type. I
2640 don't know if this is correct, but it at least makes this reader
2641 usable for gcc-4.3 produced Dwarf3. */
2642 if (dtag == DW_TAG_structure_type || dtag == DW_TAG_class_type
2643 || dtag == DW_TAG_union_type) {
2644 Bool have_szB = False;
2645 Bool is_decl = False;
2646 Bool is_spec = False;
2647 /* Create a new Type to hold the results. */
2648 VG_(memset)(&typeE, 0, sizeof(typeE));
2649 typeE.cuOff = posn;
2650 typeE.tag = Te_TyStOrUn;
2651 typeE.Te.TyStOrUn.name = NULL;
2652 typeE.Te.TyStOrUn.typeR = D3_INVALID_CUOFF;
2653 typeE.Te.TyStOrUn.fieldRs
2654 = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.pTD.struct_type.1",
2655 ML_(dinfo_free),
2656 sizeof(UWord) );
2657 typeE.Te.TyStOrUn.complete = True;
2658 typeE.Te.TyStOrUn.isStruct = dtag == DW_TAG_structure_type
2659 || dtag == DW_TAG_class_type;
2660 while (True) {
2661 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
2662 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2663 if (attr == 0 && form == 0) break;
2664 get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2665 if (attr == DW_AT_name && cts.szB < 0) {
2666 typeE.Te.TyStOrUn.name
2667 = ML_(cur_read_strdup)( cts.u.cur,
2668 "di.readdwarf3.ptD.struct_type.2" );
2669 }
2670 if (attr == DW_AT_byte_size && cts.szB >= 0) {
2671 typeE.Te.TyStOrUn.szB = cts.u.val;
2672 have_szB = True;
2673 }
2674 if (attr == DW_AT_declaration && cts.szB > 0 && cts.u.val > 0) {
2675 is_decl = True;
2676 }
2677 if (attr == DW_AT_specification && cts.szB > 0 && cts.u.val > 0) {
2678 is_spec = True;
2679 }
2680 if (attr == DW_AT_signature && form == DW_FORM_ref_sig8
2681 && cts.szB > 0) {
2682 have_szB = True;
2683 typeE.Te.TyStOrUn.szB = 8;
2684 typeE.Te.TyStOrUn.typeR
2685 = cook_die_using_form( cc, (UWord)cts.u.val, form );
2686 }
2687 }
2688 /* Do we have something that looks sane? */
2689 if (is_decl && (!is_spec)) {
2690 /* It's a DW_AT_declaration. We require the name but
2691 nothing else. */
2692 /* JRS 2012-06-28: following discussion w/ tromey, if the the
2693 type doesn't have name, just make one up, and accept it.
2694 It might be referred to by other DIEs, so ignoring it
2695 doesn't seem like a safe option. */
2696 if (typeE.Te.TyStOrUn.name == NULL)
2697 typeE.Te.TyStOrUn.name
2698 = ML_(dinfo_strdup)( "di.readdwarf3.ptD.struct_type.3",
2699 "<anon_struct_type>" );
2700 typeE.Te.TyStOrUn.complete = False;
2701 /* JRS 2009 Aug 10: <possible kludge>? */
2702 /* Push this tyent on the stack, even though it's incomplete.
2703 It appears that gcc-4.4 on Fedora 11 will sometimes create
2704 DW_TAG_member entries for it, and so we need to have a
2705 plausible parent present in order for that to work. See
2706 #200029 comments 8 and 9. */
2707 typestack_push( cc, parser, td3, &typeE, level );
2708 /* </possible kludge> */
2709 goto acquire_Type;
2710 }
2711 if ((!is_decl) /* && (!is_spec) */) {
2712 /* this is the common, ordinary case */
2713 /* The name can be present, or not */
2714 if (!have_szB) {
2715 /* We must know the size.
2716 But in Ada, record with discriminants might have no size.
2717 But in C, VLA in the middle of a struct (gcc extension)
2718 might have no size.
2719 Instead, some GNAT dwarf extensions and/or dwarf entries
2720 allow to calculate the struct size at runtime.
2721 We cannot do that (yet?) so, the temporary kludge is to use
2722 a small size. */
2723 typeE.Te.TyStOrUn.szB = 1;
2724 }
2725 /* On't stack! */
2726 typestack_push( cc, parser, td3, &typeE, level );
2727 goto acquire_Type;
2728 }
2729 else {
2730 /* don't know how to handle any other variants just now */
2731 goto_bad_DIE;
2732 }
2733 }
2734
2735 if (dtag == DW_TAG_member) {
2736 /* Acquire member entries for both DW_TAG_structure_type and
2737 DW_TAG_union_type. They differ minorly, in that struct
2738 members must have a DW_AT_data_member_location expression
2739 whereas union members must not. */
2740 Bool parent_is_struct;
2741 VG_(memset)( &fieldE, 0, sizeof(fieldE) );
2742 fieldE.cuOff = posn;
2743 fieldE.tag = Te_Field;
2744 fieldE.Te.Field.typeR = D3_INVALID_CUOFF;
2745 while (True) {
2746 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
2747 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2748 if (attr == 0 && form == 0) break;
2749 get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2750 if (attr == DW_AT_name && cts.szB < 0) {
2751 fieldE.Te.Field.name
2752 = ML_(cur_read_strdup)( cts.u.cur,
2753 "di.readdwarf3.ptD.member.1" );
2754 }
2755 if (attr == DW_AT_type && cts.szB > 0) {
2756 fieldE.Te.Field.typeR
2757 = cook_die_using_form( cc, (UWord)cts.u.val, form );
2758 }
2759 /* There are 2 different cases for DW_AT_data_member_location.
2760 If it is a constant class attribute, it contains byte offset
2761 from the beginning of the containing entity.
2762 Otherwise it is a location expression. */
2763 if (attr == DW_AT_data_member_location && cts.szB > 0) {
2764 fieldE.Te.Field.nLoc = -1;
2765 fieldE.Te.Field.pos.offset = cts.u.val;
2766 }
2767 if (attr == DW_AT_data_member_location && cts.szB <= 0) {
2768 fieldE.Te.Field.nLoc = (UWord)(-cts.szB);
2769 fieldE.Te.Field.pos.loc
2770 = ML_(cur_read_memdup)( cts.u.cur,
2771 (SizeT)fieldE.Te.Field.nLoc,
2772 "di.readdwarf3.ptD.member.2" );
2773 }
2774 }
2775 /* Do we have a plausible parent? */
2776 if (typestack_is_empty(parser)) goto_bad_DIE;
2777 vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
2778 vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
2779 if (level != parser->qlevel[parser->sp]+1) goto_bad_DIE;
2780 if (parser->qparentE[parser->sp].tag != Te_TyStOrUn) goto_bad_DIE;
2781 /* Do we have something that looks sane? If this a member of a
2782 struct, we must have a location expression; but if a member
2783 of a union that is irrelevant (D3 spec sec 5.6.6). We ought
2784 to reject in the latter case, but some compilers have been
2785 observed to emit constant-zero expressions. So just ignore
2786 them. */
2787 parent_is_struct
2788 = parser->qparentE[parser->sp].Te.TyStOrUn.isStruct;
2789 if (!fieldE.Te.Field.name)
2790 fieldE.Te.Field.name
2791 = ML_(dinfo_strdup)( "di.readdwarf3.ptD.member.3",
2792 "<anon_field>" );
2793 vg_assert(fieldE.Te.Field.name);
2794 if (fieldE.Te.Field.typeR == D3_INVALID_CUOFF)
2795 goto_bad_DIE;
2796 if (fieldE.Te.Field.nLoc) {
2797 if (!parent_is_struct) {
2798 /* If this is a union type, pretend we haven't seen the data
2799 member location expression, as it is by definition
2800 redundant (it must be zero). */
2801 if (fieldE.Te.Field.nLoc > 0)
2802 ML_(dinfo_free)(fieldE.Te.Field.pos.loc);
2803 fieldE.Te.Field.pos.loc = NULL;
2804 fieldE.Te.Field.nLoc = 0;
2805 }
2806 /* Record this child in the parent */
2807 fieldE.Te.Field.isStruct = parent_is_struct;
2808 vg_assert(parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs);
2809 VG_(addToXA)( parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs,
2810 &posn );
2811 /* And record the child itself */
2812 goto acquire_Field;
2813 } else {
2814 /* Member with no location - this can happen with static
2815 const members in C++ code which are compile time constants
2816 that do no exist in the class. They're not of any interest
2817 to us so we ignore them. */
2818 ML_(TyEnt__make_EMPTY)(&fieldE);
2819 }
2820 }
2821
2822 if (dtag == DW_TAG_array_type) {
2823 VG_(memset)(&typeE, 0, sizeof(typeE));
2824 typeE.cuOff = posn;
2825 typeE.tag = Te_TyArray;
2826 typeE.Te.TyArray.typeR = D3_INVALID_CUOFF;
2827 typeE.Te.TyArray.boundRs
2828 = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.array_type.1",
2829 ML_(dinfo_free),
2830 sizeof(UWord) );
2831 while (True) {
2832 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
2833 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2834 if (attr == 0 && form == 0) break;
2835 get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2836 if (attr == DW_AT_type && cts.szB > 0) {
2837 typeE.Te.TyArray.typeR
2838 = cook_die_using_form( cc, (UWord)cts.u.val, form );
2839 }
2840 }
2841 if (typeE.Te.TyArray.typeR == D3_INVALID_CUOFF)
2842 goto_bad_DIE;
2843 /* On't stack! */
2844 typestack_push( cc, parser, td3, &typeE, level );
2845 goto acquire_Type;
2846 }
2847
2848 /* this is a subrange type defining the bounds of an array. */
2849 if (dtag == DW_TAG_subrange_type
2850 && subrange_type_denotes_array_bounds(parser, dtag)) {
2851 Bool have_lower = False;
2852 Bool have_upper = False;
2853 Bool have_count = False;
2854 Long lower = 0;
2855 Long upper = 0;
2856
2857 switch (parser->language) {
2858 case 'C': have_lower = True; lower = 0; break;
2859 case 'F': have_lower = True; lower = 1; break;
2860 case '?': have_lower = False; break;
2861 case 'A': have_lower = False; break;
2862 default: vg_assert(0); /* assured us by handling of
2863 DW_TAG_compile_unit in this fn */
2864 }
2865
2866 VG_(memset)( &boundE, 0, sizeof(boundE) );
2867 boundE.cuOff = D3_INVALID_CUOFF;
2868 boundE.tag = Te_Bound;
2869 while (True) {
2870 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
2871 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2872 if (attr == 0 && form == 0) break;
2873 get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2874 if (attr == DW_AT_lower_bound && cts.szB > 0) {
2875 lower = (Long)cts.u.val;
2876 have_lower = True;
2877 }
2878 if (attr == DW_AT_upper_bound && cts.szB > 0) {
2879 upper = (Long)cts.u.val;
2880 have_upper = True;
2881 }
2882 if (attr == DW_AT_count && cts.szB > 0) {
2883 /*count = (Long)cts.u.val;*/
2884 have_count = True;
2885 }
2886 }
2887 /* FIXME: potentially skip the rest if no parent present, since
2888 it could be the case that this subrange type is free-standing
2889 (not being used to describe the bounds of a containing array
2890 type) */
2891 /* Do we have a plausible parent? */
2892 if (typestack_is_empty(parser)) goto_bad_DIE;
2893 vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
2894 vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
2895 if (level != parser->qlevel[parser->sp]+1) goto_bad_DIE;
2896 if (parser->qparentE[parser->sp].tag != Te_TyArray) goto_bad_DIE;
2897
2898 /* Figure out if we have a definite range or not */
2899 if (have_lower && have_upper && (!have_count)) {
2900 boundE.Te.Bound.knownL = True;
2901 boundE.Te.Bound.knownU = True;
2902 boundE.Te.Bound.boundL = lower;
2903 boundE.Te.Bound.boundU = upper;
2904 }
2905 else if (have_lower && (!have_upper) && (!have_count)) {
2906 boundE.Te.Bound.knownL = True;
2907 boundE.Te.Bound.knownU = False;
2908 boundE.Te.Bound.boundL = lower;
2909 boundE.Te.Bound.boundU = 0;
2910 }
2911 else if ((!have_lower) && have_upper && (!have_count)) {
2912 boundE.Te.Bound.knownL = False;
2913 boundE.Te.Bound.knownU = True;
2914 boundE.Te.Bound.boundL = 0;
2915 boundE.Te.Bound.boundU = upper;
2916 }
2917 else if ((!have_lower) && (!have_upper) && (!have_count)) {
2918 boundE.Te.Bound.knownL = False;
2919 boundE.Te.Bound.knownU = False;
2920 boundE.Te.Bound.boundL = 0;
2921 boundE.Te.Bound.boundU = 0;
2922 } else {
2923 /* FIXME: handle more cases */
2924 goto_bad_DIE;
2925 }
2926
2927 /* Record this bound in the parent */
2928 boundE.cuOff = posn;
2929 vg_assert(parser->qparentE[parser->sp].Te.TyArray.boundRs);
2930 VG_(addToXA)( parser->qparentE[parser->sp].Te.TyArray.boundRs,
2931 &boundE.cuOff );
2932 /* And record the child itself */
2933 goto acquire_Bound;
2934 }
2935
2936 /* typedef or subrange_type other than array bounds. */
2937 if (dtag == DW_TAG_typedef
2938 || (dtag == DW_TAG_subrange_type
2939 && !subrange_type_denotes_array_bounds(parser, dtag))) {
2940 /* subrange_type other than array bound is only for Ada. */
2941 vg_assert (dtag == DW_TAG_typedef || parser->language == 'A');
2942 /* We can pick up a new typedef/subrange_type any time. */
2943 VG_(memset)(&typeE, 0, sizeof(typeE));
2944 typeE.cuOff = D3_INVALID_CUOFF;
2945 typeE.tag = Te_TyTyDef;
2946 typeE.Te.TyTyDef.name = NULL;
2947 typeE.Te.TyTyDef.typeR = D3_INVALID_CUOFF;
2948 while (True) {
2949 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
2950 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2951 if (attr == 0 && form == 0) break;
2952 get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2953 if (attr == DW_AT_name && cts.szB < 0) {
2954 typeE.Te.TyTyDef.name
2955 = ML_(cur_read_strdup)( cts.u.cur,
2956 "di.readdwarf3.ptD.typedef.1" );
2957 }
2958 if (attr == DW_AT_type && cts.szB > 0) {
2959 typeE.Te.TyTyDef.typeR
2960 = cook_die_using_form( cc, (UWord)cts.u.val, form );
2961 }
2962 }
2963 /* Do we have something that looks sane?
2964 gcc gnat Ada generates minimal typedef
2965 such as the below
2966 <6><91cc>: DW_TAG_typedef
2967 DW_AT_abstract_ori: <9066>
2968 g++ for OMP can generate artificial functions that have
2969 parameters that refer to pointers to unnamed typedefs.
2970 See https://bugs.kde.org/show_bug.cgi?id=273475
2971 So we cannot require a name for a DW_TAG_typedef.
2972 */
2973 goto acquire_Type;
2974 }
2975
2976 if (dtag == DW_TAG_subroutine_type) {
2977 /* function type? just record that one fact and ask no
2978 further questions. */
2979 VG_(memset)(&typeE, 0, sizeof(typeE));
2980 typeE.cuOff = D3_INVALID_CUOFF;
2981 typeE.tag = Te_TyFn;
2982 goto acquire_Type;
2983 }
2984
2985 if (dtag == DW_TAG_volatile_type || dtag == DW_TAG_const_type) {
2986 Int have_ty = 0;
2987 VG_(memset)(&typeE, 0, sizeof(typeE));
2988 typeE.cuOff = D3_INVALID_CUOFF;
2989 typeE.tag = Te_TyQual;
2990 typeE.Te.TyQual.qual
2991 = dtag == DW_TAG_volatile_type ? 'V' : 'C';
2992 /* target type defaults to 'void' */
2993 typeE.Te.TyQual.typeR = D3_FAKEVOID_CUOFF;
2994 while (True) {
2995 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
2996 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
2997 if (attr == 0 && form == 0) break;
2998 get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2999 if (attr == DW_AT_type && cts.szB > 0) {
3000 typeE.Te.TyQual.typeR
3001 = cook_die_using_form( cc, (UWord)cts.u.val, form );
3002 have_ty++;
3003 }
3004 }
3005 /* gcc sometimes generates DW_TAG_const/volatile_type without
3006 DW_AT_type and GDB appears to interpret the type as 'const
3007 void' (resp. 'volatile void'). So just allow it .. */
3008 if (have_ty == 1 || have_ty == 0)
3009 goto acquire_Type;
3010 else
3011 goto_bad_DIE;
3012 }
3013
3014 /*
3015 * Treat DW_TAG_unspecified_type as type void. An example of DW_TAG_unspecified_type:
3016 *
3017 * $ readelf --debug-dump /usr/lib/debug/usr/lib/libstdc++.so.6.0.16.debug
3018 * <1><10d4>: Abbrev Number: 53 (DW_TAG_unspecified_type)
3019 * <10d5> DW_AT_name : (indirect string, offset: 0xdb7): decltype(nullptr)
3020 */
3021 if (dtag == DW_TAG_unspecified_type) {
3022 VG_(memset)(&typeE, 0, sizeof(typeE));
3023 typeE.cuOff = D3_INVALID_CUOFF;
3024 typeE.tag = Te_TyQual;
3025 typeE.Te.TyQual.typeR = D3_FAKEVOID_CUOFF;
3026 goto acquire_Type;
3027 }
3028
3029 /* else ignore this DIE */
3030 return;
3031 /*NOTREACHED*/
3032
3033 acquire_Type:
3034 if (0) VG_(printf)("YYYY Acquire Type\n");
3035 vg_assert(ML_(TyEnt__is_type)( &typeE ));
3036 vg_assert(typeE.cuOff == D3_INVALID_CUOFF || typeE.cuOff == posn);
3037 typeE.cuOff = posn;
3038 VG_(addToXA)( tyents, &typeE );
3039 return;
3040 /*NOTREACHED*/
3041
3042 acquire_Atom:
3043 if (0) VG_(printf)("YYYY Acquire Atom\n");
3044 vg_assert(atomE.tag == Te_Atom);
3045 vg_assert(atomE.cuOff == D3_INVALID_CUOFF || atomE.cuOff == posn);
3046 atomE.cuOff = posn;
3047 VG_(addToXA)( tyents, &atomE );
3048 return;
3049 /*NOTREACHED*/
3050
3051 acquire_Field:
3052 /* For union members, Expr should be absent */
3053 if (0) VG_(printf)("YYYY Acquire Field\n");
3054 vg_assert(fieldE.tag == Te_Field);
3055 vg_assert(fieldE.Te.Field.nLoc <= 0 || fieldE.Te.Field.pos.loc != NULL);
3056 vg_assert(fieldE.Te.Field.nLoc != 0 || fieldE.Te.Field.pos.loc == NULL);
3057 if (fieldE.Te.Field.isStruct) {
3058 vg_assert(fieldE.Te.Field.nLoc != 0);
3059 } else {
3060 vg_assert(fieldE.Te.Field.nLoc == 0);
3061 }
3062 vg_assert(fieldE.cuOff == D3_INVALID_CUOFF || fieldE.cuOff == posn);
3063 fieldE.cuOff = posn;
3064 VG_(addToXA)( tyents, &fieldE );
3065 return;
3066 /*NOTREACHED*/
3067
3068 acquire_Bound:
3069 if (0) VG_(printf)("YYYY Acquire Bound\n");
3070 vg_assert(boundE.tag == Te_Bound);
3071 vg_assert(boundE.cuOff == D3_INVALID_CUOFF || boundE.cuOff == posn);
3072 boundE.cuOff = posn;
3073 VG_(addToXA)( tyents, &boundE );
3074 return;
3075 /*NOTREACHED*/
3076
3077 bad_DIE:
3078 set_position_of_Cursor( c_die, saved_die_c_offset );
3079 set_position_of_Cursor( c_abbv, saved_abbv_c_offset );
3080 posn = uncook_die( cc, posn, &debug_types_flag, &alt_flag );
3081 VG_(printf)(" <%d><%lx>: %s", level, posn, ML_(pp_DW_TAG)( dtag ) );
3082 if (debug_types_flag) {
3083 VG_(printf)(" (in .debug_types)");
3084 } else if (alt_flag) {
3085 VG_(printf)(" (in alternate .debug_info)");
3086 }
3087 VG_(printf)("\n");
3088 while (True) {
3089 DW_AT attr = (DW_AT) get_ULEB128( c_abbv );
3090 DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
3091 if (attr == 0 && form == 0) break;
3092 VG_(printf)(" %18s: ", ML_(pp_DW_AT)(attr));
3093 /* Get the form contents, so as to print them */
3094 get_Form_contents( &cts, cc, c_die, True, form );
3095 VG_(printf)("\t\n");
3096 }
3097 VG_(printf)("\n");
3098 cc->barf("parse_type_DIE: confused by the above DIE");
3099 /*NOTREACHED*/
3100 }
3101
3102
3103 /*------------------------------------------------------------*/
3104 /*--- ---*/
3105 /*--- Compression of type DIE information ---*/
3106 /*--- ---*/
3107 /*------------------------------------------------------------*/
3108
chase_cuOff(Bool * changed,XArray * ents,TyEntIndexCache * ents_cache,UWord cuOff)3109 static UWord chase_cuOff ( Bool* changed,
3110 XArray* /* of TyEnt */ ents,
3111 TyEntIndexCache* ents_cache,
3112 UWord cuOff )
3113 {
3114 TyEnt* ent;
3115 ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, cuOff );
3116
3117 if (!ent) {
3118 VG_(printf)("chase_cuOff: no entry for 0x%05lx\n", cuOff);
3119 *changed = False;
3120 return cuOff;
3121 }
3122
3123 vg_assert(ent->tag != Te_EMPTY);
3124 if (ent->tag != Te_INDIR) {
3125 *changed = False;
3126 return cuOff;
3127 } else {
3128 vg_assert(ent->Te.INDIR.indR < cuOff);
3129 *changed = True;
3130 return ent->Te.INDIR.indR;
3131 }
3132 }
3133
3134 static
chase_cuOffs_in_XArray(Bool * changed,XArray * ents,TyEntIndexCache * ents_cache,XArray * cuOffs)3135 void chase_cuOffs_in_XArray ( Bool* changed,
3136 XArray* /* of TyEnt */ ents,
3137 TyEntIndexCache* ents_cache,
3138 /*MOD*/XArray* /* of UWord */ cuOffs )
3139 {
3140 Bool b2 = False;
3141 Word i, n = VG_(sizeXA)( cuOffs );
3142 for (i = 0; i < n; i++) {
3143 Bool b = False;
3144 UWord* p = VG_(indexXA)( cuOffs, i );
3145 *p = chase_cuOff( &b, ents, ents_cache, *p );
3146 if (b)
3147 b2 = True;
3148 }
3149 *changed = b2;
3150 }
3151
TyEnt__subst_R_fields(XArray * ents,TyEntIndexCache * ents_cache,TyEnt * te)3152 static Bool TyEnt__subst_R_fields ( XArray* /* of TyEnt */ ents,
3153 TyEntIndexCache* ents_cache,
3154 /*MOD*/TyEnt* te )
3155 {
3156 Bool b, changed = False;
3157 switch (te->tag) {
3158 case Te_EMPTY:
3159 break;
3160 case Te_INDIR:
3161 te->Te.INDIR.indR
3162 = chase_cuOff( &b, ents, ents_cache, te->Te.INDIR.indR );
3163 if (b) changed = True;
3164 break;
3165 case Te_UNKNOWN:
3166 break;
3167 case Te_Atom:
3168 break;
3169 case Te_Field:
3170 te->Te.Field.typeR
3171 = chase_cuOff( &b, ents, ents_cache, te->Te.Field.typeR );
3172 if (b) changed = True;
3173 break;
3174 case Te_Bound:
3175 break;
3176 case Te_TyBase:
3177 break;
3178 case Te_TyPtr:
3179 case Te_TyRef:
3180 case Te_TyPtrMbr:
3181 case Te_TyRvalRef:
3182 te->Te.TyPorR.typeR
3183 = chase_cuOff( &b, ents, ents_cache, te->Te.TyPorR.typeR );
3184 if (b) changed = True;
3185 break;
3186 case Te_TyTyDef:
3187 te->Te.TyTyDef.typeR
3188 = chase_cuOff( &b, ents, ents_cache, te->Te.TyTyDef.typeR );
3189 if (b) changed = True;
3190 break;
3191 case Te_TyStOrUn:
3192 chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyStOrUn.fieldRs );
3193 if (b) changed = True;
3194 break;
3195 case Te_TyEnum:
3196 chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyEnum.atomRs );
3197 if (b) changed = True;
3198 break;
3199 case Te_TyArray:
3200 te->Te.TyArray.typeR
3201 = chase_cuOff( &b, ents, ents_cache, te->Te.TyArray.typeR );
3202 if (b) changed = True;
3203 chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyArray.boundRs );
3204 if (b) changed = True;
3205 break;
3206 case Te_TyFn:
3207 break;
3208 case Te_TyQual:
3209 te->Te.TyQual.typeR
3210 = chase_cuOff( &b, ents, ents_cache, te->Te.TyQual.typeR );
3211 if (b) changed = True;
3212 break;
3213 case Te_TyVoid:
3214 break;
3215 default:
3216 ML_(pp_TyEnt)(te);
3217 vg_assert(0);
3218 }
3219 return changed;
3220 }
3221
3222 /* Make a pass over 'ents'. For each tyent, inspect the target of any
3223 'R' or 'Rs' fields (those which refer to other tyents), and replace
3224 any which point to INDIR nodes with the target of the indirection
3225 (which should not itself be an indirection). In summary, this
3226 routine shorts out all references to indirection nodes. */
3227 static
dedup_types_substitution_pass(XArray * ents,TyEntIndexCache * ents_cache)3228 Word dedup_types_substitution_pass ( /*MOD*/XArray* /* of TyEnt */ ents,
3229 TyEntIndexCache* ents_cache )
3230 {
3231 Word i, n, nChanged = 0;
3232 Bool b;
3233 n = VG_(sizeXA)( ents );
3234 for (i = 0; i < n; i++) {
3235 TyEnt* ent = VG_(indexXA)( ents, i );
3236 vg_assert(ent->tag != Te_EMPTY);
3237 /* We have to substitute everything, even indirections, so as to
3238 ensure that chains of indirections don't build up. */
3239 b = TyEnt__subst_R_fields( ents, ents_cache, ent );
3240 if (b)
3241 nChanged++;
3242 }
3243
3244 return nChanged;
3245 }
3246
3247
3248 /* Make a pass over 'ents', building a dictionary of TyEnts as we go.
3249 Look up each new tyent in the dictionary in turn. If it is already
3250 in the dictionary, replace this tyent with an indirection to the
3251 existing one, and delete any malloc'd stuff hanging off this one.
3252 In summary, this routine commons up all tyents that are identical
3253 as defined by TyEnt__cmp_by_all_except_cuOff. */
3254 static
dedup_types_commoning_pass(XArray * ents)3255 Word dedup_types_commoning_pass ( /*MOD*/XArray* /* of TyEnt */ ents )
3256 {
3257 Word n, i, nDeleted;
3258 WordFM* dict; /* TyEnt* -> void */
3259 TyEnt* ent;
3260 UWord keyW, valW;
3261
3262 dict = VG_(newFM)(
3263 ML_(dinfo_zalloc), "di.readdwarf3.dtcp.1",
3264 ML_(dinfo_free),
3265 (Word(*)(UWord,UWord)) ML_(TyEnt__cmp_by_all_except_cuOff)
3266 );
3267
3268 nDeleted = 0;
3269 n = VG_(sizeXA)( ents );
3270 for (i = 0; i < n; i++) {
3271 ent = VG_(indexXA)( ents, i );
3272 vg_assert(ent->tag != Te_EMPTY);
3273
3274 /* Ignore indirections, although check that they are
3275 not forming a cycle. */
3276 if (ent->tag == Te_INDIR) {
3277 vg_assert(ent->Te.INDIR.indR < ent->cuOff);
3278 continue;
3279 }
3280
3281 keyW = valW = 0;
3282 if (VG_(lookupFM)( dict, &keyW, &valW, (UWord)ent )) {
3283 /* it's already in the dictionary. */
3284 TyEnt* old = (TyEnt*)keyW;
3285 vg_assert(valW == 0);
3286 vg_assert(old != ent);
3287 vg_assert(old->tag != Te_INDIR);
3288 /* since we are traversing the array in increasing order of
3289 cuOff: */
3290 vg_assert(old->cuOff < ent->cuOff);
3291 /* So anyway, dump this entry and replace it with an
3292 indirection to the one in the dictionary. Note that the
3293 assertion above guarantees that we cannot create cycles of
3294 indirections, since we are always creating an indirection
3295 to a tyent with a cuOff lower than this one. */
3296 ML_(TyEnt__make_EMPTY)( ent );
3297 ent->tag = Te_INDIR;
3298 ent->Te.INDIR.indR = old->cuOff;
3299 nDeleted++;
3300 } else {
3301 /* not in dictionary; add it and keep going. */
3302 VG_(addToFM)( dict, (UWord)ent, 0 );
3303 }
3304 }
3305
3306 VG_(deleteFM)( dict, NULL, NULL );
3307
3308 return nDeleted;
3309 }
3310
3311
3312 static
dedup_types(Bool td3,XArray * ents,TyEntIndexCache * ents_cache)3313 void dedup_types ( Bool td3,
3314 /*MOD*/XArray* /* of TyEnt */ ents,
3315 TyEntIndexCache* ents_cache )
3316 {
3317 Word m, n, i, nDel, nSubst, nThresh;
3318 if (0) td3 = True;
3319
3320 n = VG_(sizeXA)( ents );
3321
3322 /* If a commoning pass and a substitution pass both make fewer than
3323 this many changes, just stop. It's pointless to burn up CPU
3324 time trying to compress the last 1% or so out of the array. */
3325 nThresh = n / 200;
3326
3327 /* First we must sort .ents by its .cuOff fields, so we
3328 can index into it. */
3329 VG_(setCmpFnXA)( ents, (XACmpFn_t) ML_(TyEnt__cmp_by_cuOff_only) );
3330 VG_(sortXA)( ents );
3331
3332 /* Now repeatedly do commoning and substitution passes over
3333 the array, until there are no more changes. */
3334 do {
3335 nDel = dedup_types_commoning_pass ( ents );
3336 nSubst = dedup_types_substitution_pass ( ents, ents_cache );
3337 vg_assert(nDel >= 0 && nSubst >= 0);
3338 TRACE_D3(" %ld deletions, %ld substitutions\n", nDel, nSubst);
3339 } while (nDel > nThresh || nSubst > nThresh);
3340
3341 /* Sanity check: all INDIR nodes should point at a non-INDIR thing.
3342 In fact this should be true at the end of every loop iteration
3343 above (a commoning pass followed by a substitution pass), but
3344 checking it on every iteration is excessively expensive. Note,
3345 this loop also computes 'm' for the stats printing below it. */
3346 m = 0;
3347 n = VG_(sizeXA)( ents );
3348 for (i = 0; i < n; i++) {
3349 TyEnt *ent, *ind;
3350 ent = VG_(indexXA)( ents, i );
3351 if (ent->tag != Te_INDIR) continue;
3352 m++;
3353 ind = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
3354 ent->Te.INDIR.indR );
3355 vg_assert(ind);
3356 vg_assert(ind->tag != Te_INDIR);
3357 }
3358
3359 TRACE_D3("Overall: %ld before, %ld after\n", n, n-m);
3360 }
3361
3362
3363 /*------------------------------------------------------------*/
3364 /*--- ---*/
3365 /*--- Resolution of references to type DIEs ---*/
3366 /*--- ---*/
3367 /*------------------------------------------------------------*/
3368
3369 /* Make a pass through the (temporary) variables array. Examine the
3370 type of each variable, check is it found, and chase any Te_INDIRs.
3371 Postcondition is: each variable has a typeR field that refers to a
3372 valid type in tyents, or a Te_UNKNOWN, and is certainly guaranteed
3373 not to refer to a Te_INDIR. (This is so that we can throw all the
3374 Te_INDIRs away later). */
3375
3376 __attribute__((noinline))
resolve_variable_types(void (* barf)(const HChar *),XArray * ents,TyEntIndexCache * ents_cache,XArray * vars)3377 static void resolve_variable_types (
3378 void (*barf)( const HChar* ) __attribute__((noreturn)),
3379 /*R-O*/XArray* /* of TyEnt */ ents,
3380 /*MOD*/TyEntIndexCache* ents_cache,
3381 /*MOD*/XArray* /* of TempVar* */ vars
3382 )
3383 {
3384 Word i, n;
3385 n = VG_(sizeXA)( vars );
3386 for (i = 0; i < n; i++) {
3387 TempVar* var = *(TempVar**)VG_(indexXA)( vars, i );
3388 /* This is the stated type of the variable. But it might be
3389 an indirection, so be careful. */
3390 TyEnt* ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
3391 var->typeR );
3392 if (ent && ent->tag == Te_INDIR) {
3393 ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
3394 ent->Te.INDIR.indR );
3395 vg_assert(ent);
3396 vg_assert(ent->tag != Te_INDIR);
3397 }
3398
3399 /* Deal first with "normal" cases */
3400 if (ent && ML_(TyEnt__is_type)(ent)) {
3401 var->typeR = ent->cuOff;
3402 continue;
3403 }
3404
3405 /* If there's no ent, it probably we did not manage to read a
3406 type at the cuOffset which is stated as being this variable's
3407 type. Maybe a deficiency in parse_type_DIE. Complain. */
3408 if (ent == NULL) {
3409 VG_(printf)("\n: Invalid cuOff = 0x%05lx\n", var->typeR );
3410 barf("resolve_variable_types: "
3411 "cuOff does not refer to a known type");
3412 }
3413 vg_assert(ent);
3414 /* If ent has any other tag, something bad happened, along the
3415 lines of var->typeR not referring to a type at all. */
3416 vg_assert(ent->tag == Te_UNKNOWN);
3417 /* Just accept it; the type will be useless, but at least keep
3418 going. */
3419 var->typeR = ent->cuOff;
3420 }
3421 }
3422
3423
3424 /*------------------------------------------------------------*/
3425 /*--- ---*/
3426 /*--- Parsing of Compilation Units ---*/
3427 /*--- ---*/
3428 /*------------------------------------------------------------*/
3429
cmp_TempVar_by_dioff(const void * v1,const void * v2)3430 static Int cmp_TempVar_by_dioff ( const void* v1, const void* v2 ) {
3431 const TempVar* t1 = *(const TempVar *const *)v1;
3432 const TempVar* t2 = *(const TempVar *const *)v2;
3433 if (t1->dioff < t2->dioff) return -1;
3434 if (t1->dioff > t2->dioff) return 1;
3435 return 0;
3436 }
3437
read_DIE(WordFM * rangestree,XArray * tyents,XArray * tempvars,XArray * gexprs,D3TypeParser * typarser,D3VarParser * varparser,Cursor * c,Bool td3,CUConst * cc,Int level)3438 static void read_DIE (
3439 /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
3440 /*MOD*/XArray* /* of TyEnt */ tyents,
3441 /*MOD*/XArray* /* of TempVar* */ tempvars,
3442 /*MOD*/XArray* /* of GExpr* */ gexprs,
3443 /*MOD*/D3TypeParser* typarser,
3444 /*MOD*/D3VarParser* varparser,
3445 Cursor* c, Bool td3, CUConst* cc, Int level
3446 )
3447 {
3448 Cursor abbv;
3449 ULong atag, abbv_code;
3450 UWord posn;
3451 UInt has_children;
3452 UWord start_die_c_offset, start_abbv_c_offset;
3453 UWord after_die_c_offset, after_abbv_c_offset;
3454
3455 /* --- Deal with this DIE --- */
3456 posn = cook_die( cc, get_position_of_Cursor( c ) );
3457 abbv_code = get_ULEB128( c );
3458 set_abbv_Cursor( &abbv, td3, cc, abbv_code );
3459 atag = get_ULEB128( &abbv );
3460 TRACE_D3("\n");
3461 TRACE_D3(" <%d><%lx>: Abbrev Number: %llu (%s)\n",
3462 level, posn, abbv_code, ML_(pp_DW_TAG)( atag ) );
3463
3464 if (atag == 0)
3465 cc->barf("read_DIE: invalid zero tag on DIE");
3466
3467 has_children = get_UChar( &abbv );
3468 if (has_children != DW_children_no && has_children != DW_children_yes)
3469 cc->barf("read_DIE: invalid has_children value");
3470
3471 /* We're set up to look at the fields of this DIE. Hand it off to
3472 any parser(s) that want to see it. Since they will in general
3473 advance both the DIE and abbrev cursors, remember their current
3474 settings so that we can then back up and do one final pass over
3475 the DIE, to print out its contents. */
3476
3477 start_die_c_offset = get_position_of_Cursor( c );
3478 start_abbv_c_offset = get_position_of_Cursor( &abbv );
3479
3480 while (True) {
3481 FormContents cts;
3482 ULong at_name = get_ULEB128( &abbv );
3483 ULong at_form = get_ULEB128( &abbv );
3484 if (at_name == 0 && at_form == 0) break;
3485 TRACE_D3(" %18s: ", ML_(pp_DW_AT)(at_name));
3486 /* Get the form contents, but ignore them; the only purpose is
3487 to print them, if td3 is True */
3488 get_Form_contents( &cts, cc, c, td3, (DW_FORM)at_form );
3489 TRACE_D3("\t");
3490 TRACE_D3("\n");
3491 }
3492
3493 after_die_c_offset = get_position_of_Cursor( c );
3494 after_abbv_c_offset = get_position_of_Cursor( &abbv );
3495
3496 set_position_of_Cursor( c, start_die_c_offset );
3497 set_position_of_Cursor( &abbv, start_abbv_c_offset );
3498
3499 parse_type_DIE( tyents,
3500 typarser,
3501 (DW_TAG)atag,
3502 posn,
3503 level,
3504 c, /* DIE cursor */
3505 &abbv, /* abbrev cursor */
3506 cc,
3507 td3 );
3508
3509 set_position_of_Cursor( c, start_die_c_offset );
3510 set_position_of_Cursor( &abbv, start_abbv_c_offset );
3511
3512 parse_var_DIE( rangestree,
3513 tempvars,
3514 gexprs,
3515 varparser,
3516 (DW_TAG)atag,
3517 posn,
3518 level,
3519 c, /* DIE cursor */
3520 &abbv, /* abbrev cursor */
3521 cc,
3522 td3 );
3523
3524 set_position_of_Cursor( c, after_die_c_offset );
3525 set_position_of_Cursor( &abbv, after_abbv_c_offset );
3526
3527 /* --- Now recurse into its children, if any --- */
3528 if (has_children == DW_children_yes) {
3529 if (0) TRACE_D3("BEGIN children of level %d\n", level);
3530 while (True) {
3531 atag = peek_ULEB128( c );
3532 if (atag == 0) break;
3533 read_DIE( rangestree, tyents, tempvars, gexprs,
3534 typarser, varparser,
3535 c, td3, cc, level+1 );
3536 }
3537 /* Now we need to eat the terminating zero */
3538 atag = get_ULEB128( c );
3539 vg_assert(atag == 0);
3540 if (0) TRACE_D3("END children of level %d\n", level);
3541 }
3542
3543 }
3544
3545
3546 static
new_dwarf3_reader_wrk(struct _DebugInfo * di,void (* barf)(const HChar *),DiSlice escn_debug_info,DiSlice escn_debug_types,DiSlice escn_debug_abbv,DiSlice escn_debug_line,DiSlice escn_debug_str,DiSlice escn_debug_ranges,DiSlice escn_debug_loc,DiSlice escn_debug_info_alt,DiSlice escn_debug_abbv_alt,DiSlice escn_debug_line_alt,DiSlice escn_debug_str_alt)3547 void new_dwarf3_reader_wrk (
3548 struct _DebugInfo* di,
3549 __attribute__((noreturn)) void (*barf)( const HChar* ),
3550 DiSlice escn_debug_info, DiSlice escn_debug_types,
3551 DiSlice escn_debug_abbv, DiSlice escn_debug_line,
3552 DiSlice escn_debug_str, DiSlice escn_debug_ranges,
3553 DiSlice escn_debug_loc, DiSlice escn_debug_info_alt,
3554 DiSlice escn_debug_abbv_alt, DiSlice escn_debug_line_alt,
3555 DiSlice escn_debug_str_alt
3556 )
3557 {
3558 XArray* /* of TyEnt */ tyents;
3559 XArray* /* of TyEnt */ tyents_to_keep;
3560 XArray* /* of GExpr* */ gexprs;
3561 XArray* /* of TempVar* */ tempvars;
3562 WordFM* /* of (XArray* of AddrRange, void) */ rangestree;
3563 TyEntIndexCache* tyents_cache = NULL;
3564 TyEntIndexCache* tyents_to_keep_cache = NULL;
3565 TempVar *varp, *varp2;
3566 GExpr* gexpr;
3567 Cursor abbv; /* for showing .debug_abbrev */
3568 Cursor info; /* primary cursor for parsing .debug_info */
3569 Cursor ranges; /* for showing .debug_ranges */
3570 D3TypeParser typarser;
3571 D3VarParser varparser;
3572 Addr dr_base;
3573 UWord dr_offset;
3574 Word i, j, n;
3575 Bool td3 = di->trace_symtab;
3576 XArray* /* of TempVar* */ dioff_lookup_tab;
3577 Int pass;
3578 VgHashTable signature_types;
3579 #if 0
3580 /* This doesn't work properly because it assumes all entries are
3581 packed end to end, with no holes. But that doesn't always
3582 appear to be the case, so it loses sync. And the D3 spec
3583 doesn't appear to require a no-hole situation either. */
3584 /* Display .debug_loc */
3585 Addr dl_base;
3586 UWord dl_offset;
3587 Cursor loc; /* for showing .debug_loc */
3588 TRACE_SYMTAB("\n");
3589 TRACE_SYMTAB("\n------ The contents of .debug_loc ------\n");
3590 TRACE_SYMTAB(" Offset Begin End Expression\n");
3591 init_Cursor( &loc, debug_loc_img,
3592 debug_loc_sz, 0, barf,
3593 "Overrun whilst reading .debug_loc section(1)" );
3594 dl_base = 0;
3595 dl_offset = 0;
3596 while (True) {
3597 UWord w1, w2;
3598 UWord len;
3599 if (is_at_end_Cursor( &loc ))
3600 break;
3601
3602 /* Read a (host-)word pair. This is something of a hack since
3603 the word size to read is really dictated by the ELF file;
3604 however, we assume we're reading a file with the same
3605 word-sizeness as the host. Reasonably enough. */
3606 w1 = get_UWord( &loc );
3607 w2 = get_UWord( &loc );
3608
3609 if (w1 == 0 && w2 == 0) {
3610 /* end of list. reset 'base' */
3611 TRACE_D3(" %08lx <End of list>\n", dl_offset);
3612 dl_base = 0;
3613 dl_offset = get_position_of_Cursor( &loc );
3614 continue;
3615 }
3616
3617 if (w1 == -1UL) {
3618 /* new value for 'base' */
3619 TRACE_D3(" %08lx %16lx %08lx (base address)\n",
3620 dl_offset, w1, w2);
3621 dl_base = w2;
3622 continue;
3623 }
3624
3625 /* else a location expression follows */
3626 TRACE_D3(" %08lx %08lx %08lx ",
3627 dl_offset, w1 + dl_base, w2 + dl_base);
3628 len = (UWord)get_UShort( &loc );
3629 while (len > 0) {
3630 UChar byte = get_UChar( &loc );
3631 TRACE_D3("%02x", (UInt)byte);
3632 len--;
3633 }
3634 TRACE_SYMTAB("\n");
3635 }
3636 #endif
3637
3638 /* Display .debug_ranges */
3639 TRACE_SYMTAB("\n");
3640 TRACE_SYMTAB("\n------ The contents of .debug_ranges ------\n");
3641 TRACE_SYMTAB(" Offset Begin End\n");
3642 if (ML_(sli_is_valid)(escn_debug_ranges)) {
3643 init_Cursor( &ranges, escn_debug_ranges, 0, barf,
3644 "Overrun whilst reading .debug_ranges section(1)" );
3645 dr_base = 0;
3646 dr_offset = 0;
3647 while (True) {
3648 UWord w1, w2;
3649
3650 if (is_at_end_Cursor( &ranges ))
3651 break;
3652
3653 /* Read a (host-)word pair. This is something of a hack since
3654 the word size to read is really dictated by the ELF file;
3655 however, we assume we're reading a file with the same
3656 word-sizeness as the host. Reasonably enough. */
3657 w1 = get_UWord( &ranges );
3658 w2 = get_UWord( &ranges );
3659
3660 if (w1 == 0 && w2 == 0) {
3661 /* end of list. reset 'base' */
3662 TRACE_D3(" %08lx <End of list>\n", dr_offset);
3663 dr_base = 0;
3664 dr_offset = get_position_of_Cursor( &ranges );
3665 continue;
3666 }
3667
3668 if (w1 == -1UL) {
3669 /* new value for 'base' */
3670 TRACE_D3(" %08lx %16lx %08lx (base address)\n",
3671 dr_offset, w1, w2);
3672 dr_base = w2;
3673 continue;
3674 }
3675
3676 /* else a range [w1+base, w2+base) is denoted */
3677 TRACE_D3(" %08lx %08lx %08lx\n",
3678 dr_offset, w1 + dr_base, w2 + dr_base);
3679 }
3680 }
3681
3682 /* Display .debug_abbrev */
3683 TRACE_SYMTAB("\n");
3684 TRACE_SYMTAB("\n------ The contents of .debug_abbrev ------\n");
3685 if (ML_(sli_is_valid)(escn_debug_abbv)) {
3686 init_Cursor( &abbv, escn_debug_abbv, 0, barf,
3687 "Overrun whilst reading .debug_abbrev section" );
3688 while (True) {
3689 if (is_at_end_Cursor( &abbv ))
3690 break;
3691 /* Read one abbreviation table */
3692 TRACE_D3(" Number TAG\n");
3693 while (True) {
3694 ULong atag;
3695 UInt has_children;
3696 ULong acode = get_ULEB128( &abbv );
3697 if (acode == 0) break; /* end of the table */
3698 atag = get_ULEB128( &abbv );
3699 has_children = get_UChar( &abbv );
3700 TRACE_D3(" %llu %s [%s]\n",
3701 acode, ML_(pp_DW_TAG)(atag),
3702 ML_(pp_DW_children)(has_children));
3703 while (True) {
3704 ULong at_name = get_ULEB128( &abbv );
3705 ULong at_form = get_ULEB128( &abbv );
3706 if (at_name == 0 && at_form == 0) break;
3707 TRACE_D3(" %18s %s\n",
3708 ML_(pp_DW_AT)(at_name), ML_(pp_DW_FORM)(at_form));
3709 }
3710 }
3711 }
3712 }
3713 TRACE_SYMTAB("\n");
3714
3715 /* We'll park the harvested type information in here. Also create
3716 a fake "void" entry with offset D3_FAKEVOID_CUOFF, so we always
3717 have at least one type entry to refer to. D3_FAKEVOID_CUOFF is
3718 huge and presumably will not occur in any valid DWARF3 file --
3719 it would need to have a .debug_info section 4GB long for that to
3720 happen. These type entries end up in the DebugInfo. */
3721 tyents = VG_(newXA)( ML_(dinfo_zalloc),
3722 "di.readdwarf3.ndrw.1 (TyEnt temp array)",
3723 ML_(dinfo_free), sizeof(TyEnt) );
3724 { TyEnt tyent;
3725 VG_(memset)(&tyent, 0, sizeof(tyent));
3726 tyent.tag = Te_TyVoid;
3727 tyent.cuOff = D3_FAKEVOID_CUOFF;
3728 tyent.Te.TyVoid.isFake = True;
3729 VG_(addToXA)( tyents, &tyent );
3730 }
3731 { TyEnt tyent;
3732 VG_(memset)(&tyent, 0, sizeof(tyent));
3733 tyent.tag = Te_UNKNOWN;
3734 tyent.cuOff = D3_INVALID_CUOFF;
3735 VG_(addToXA)( tyents, &tyent );
3736 }
3737
3738 /* This is a tree used to unique-ify the range lists that are
3739 manufactured by parse_var_DIE. References to the keys in the
3740 tree wind up in .rngMany fields in TempVars. We'll need to
3741 delete this tree, and the XArrays attached to it, at the end of
3742 this function. */
3743 rangestree = VG_(newFM)( ML_(dinfo_zalloc),
3744 "di.readdwarf3.ndrw.2 (rangestree)",
3745 ML_(dinfo_free),
3746 (Word(*)(UWord,UWord))cmp__XArrays_of_AddrRange );
3747
3748 /* List of variables we're accumulating. These don't end up in the
3749 DebugInfo; instead their contents are handed to ML_(addVar) and
3750 the list elements are then deleted. */
3751 tempvars = VG_(newXA)( ML_(dinfo_zalloc),
3752 "di.readdwarf3.ndrw.3 (TempVar*s array)",
3753 ML_(dinfo_free),
3754 sizeof(TempVar*) );
3755
3756 /* List of GExprs we're accumulating. These wind up in the
3757 DebugInfo. */
3758 gexprs = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.4",
3759 ML_(dinfo_free), sizeof(GExpr*) );
3760
3761 /* We need a D3TypeParser to keep track of partially constructed
3762 types. It'll be discarded as soon as we've completed the CU,
3763 since the resulting information is tipped in to 'tyents' as it
3764 is generated. */
3765 VG_(memset)( &typarser, 0, sizeof(typarser) );
3766 typarser.sp = -1;
3767 typarser.language = '?';
3768 for (i = 0; i < N_D3_TYPE_STACK; i++) {
3769 typarser.qparentE[i].tag = Te_EMPTY;
3770 typarser.qparentE[i].cuOff = D3_INVALID_CUOFF;
3771 }
3772
3773 VG_(memset)( &varparser, 0, sizeof(varparser) );
3774 varparser.sp = -1;
3775
3776 signature_types = VG_(HT_construct) ("signature_types");
3777
3778 /* Do an initial pass to scan the .debug_types section, if any, and
3779 fill in the signatured types hash table. This lets us handle
3780 mapping from a type signature to a (cooked) DIE offset directly
3781 in get_Form_contents. */
3782 if (ML_(sli_is_valid)(escn_debug_types)) {
3783 init_Cursor( &info, escn_debug_types, 0, barf,
3784 "Overrun whilst reading .debug_types section" );
3785 TRACE_D3("\n------ Collecting signatures from "
3786 ".debug_types section ------\n");
3787
3788 while (True) {
3789 UWord cu_start_offset, cu_offset_now;
3790 CUConst cc;
3791
3792 cu_start_offset = get_position_of_Cursor( &info );
3793 TRACE_D3("\n");
3794 TRACE_D3(" Compilation Unit @ offset 0x%lx:\n", cu_start_offset);
3795 /* parse_CU_header initialises the CU's set_abbv_Cursor cache
3796 (saC_cache) */
3797 parse_CU_Header( &cc, td3, &info, escn_debug_abbv, True, False );
3798
3799 /* Needed by cook_die. */
3800 cc.types_cuOff_bias = escn_debug_info.szB;
3801
3802 record_signatured_type( signature_types, cc.type_signature,
3803 cook_die( &cc, cc.type_offset ));
3804
3805 /* Until proven otherwise we assume we don't need the icc9
3806 workaround in this case; see the DIE-reading loop below
3807 for details. */
3808 cu_offset_now = (cu_start_offset + cc.unit_length
3809 + (cc.is_dw64 ? 12 : 4));
3810
3811 if (cu_offset_now >= escn_debug_types.szB)
3812 break;
3813
3814 set_position_of_Cursor ( &info, cu_offset_now );
3815 }
3816 }
3817
3818 /* Perform three DIE-reading passes. The first pass reads DIEs from
3819 alternate .debug_info (if any), the second pass reads DIEs from
3820 .debug_info, and the third pass reads DIEs from .debug_types.
3821 Moving the body of this loop into a separate function would
3822 require a large number of arguments to be passed in, so it is
3823 kept inline instead. */
3824 for (pass = 0; pass < 3; ++pass) {
3825 ULong section_size;
3826
3827 if (pass == 0) {
3828 if (!ML_(sli_is_valid)(escn_debug_info_alt))
3829 continue;
3830 /* Now loop over the Compilation Units listed in the alternate
3831 .debug_info section (see D3SPEC sec 7.5) paras 1 and 2.
3832 Each compilation unit contains a Compilation Unit Header
3833 followed by precisely one DW_TAG_compile_unit or
3834 DW_TAG_partial_unit DIE. */
3835 init_Cursor( &info, escn_debug_info_alt, 0, barf,
3836 "Overrun whilst reading alternate .debug_info section" );
3837 section_size = escn_debug_info_alt.szB;
3838
3839 TRACE_D3("\n------ Parsing alternate .debug_info section ------\n");
3840 } else if (pass == 1) {
3841 /* Now loop over the Compilation Units listed in the .debug_info
3842 section (see D3SPEC sec 7.5) paras 1 and 2. Each compilation
3843 unit contains a Compilation Unit Header followed by precisely
3844 one DW_TAG_compile_unit or DW_TAG_partial_unit DIE. */
3845 init_Cursor( &info, escn_debug_info, 0, barf,
3846 "Overrun whilst reading .debug_info section" );
3847 section_size = escn_debug_info.szB;
3848
3849 TRACE_D3("\n------ Parsing .debug_info section ------\n");
3850 } else {
3851 if (!ML_(sli_is_valid)(escn_debug_types))
3852 continue;
3853 init_Cursor( &info, escn_debug_types, 0, barf,
3854 "Overrun whilst reading .debug_types section" );
3855 section_size = escn_debug_types.szB;
3856
3857 TRACE_D3("\n------ Parsing .debug_types section ------\n");
3858 }
3859
3860 while (True) {
3861 ULong cu_start_offset, cu_offset_now;
3862 CUConst cc;
3863 /* It may be that the stated size of this CU is larger than the
3864 amount of stuff actually in it. icc9 seems to generate CUs
3865 thusly. We use these variables to figure out if this is
3866 indeed the case, and if so how many bytes we need to skip to
3867 get to the start of the next CU. Not skipping those bytes
3868 causes us to misidentify the start of the next CU, and it all
3869 goes badly wrong after that (not surprisingly). */
3870 UWord cu_size_including_IniLen, cu_amount_used;
3871
3872 /* It seems icc9 finishes the DIE info before debug_info_sz
3873 bytes have been used up. So be flexible, and declare the
3874 sequence complete if there is not enough remaining bytes to
3875 hold even the smallest conceivable CU header. (11 bytes I
3876 reckon). */
3877 /* JRS 23Jan09: I suspect this is no longer necessary now that
3878 the code below contains a 'while (cu_amount_used <
3879 cu_size_including_IniLen ...' style loop, which skips over
3880 any leftover bytes at the end of a CU in the case where the
3881 CU's stated size is larger than its actual size (as
3882 determined by reading all its DIEs). However, for prudence,
3883 I'll leave the following test in place. I can't see that a
3884 CU header can be smaller than 11 bytes, so I don't think
3885 there's any harm possible through the test -- it just adds
3886 robustness. */
3887 Word avail = get_remaining_length_Cursor( &info );
3888 if (avail < 11) {
3889 if (avail > 0)
3890 TRACE_D3("new_dwarf3_reader_wrk: warning: "
3891 "%ld unused bytes after end of DIEs\n", avail);
3892 break;
3893 }
3894
3895 /* Check the varparser's stack is in a sane state. */
3896 vg_assert(varparser.sp == -1);
3897 for (i = 0; i < N_D3_VAR_STACK; i++) {
3898 vg_assert(varparser.ranges[i] == NULL);
3899 vg_assert(varparser.level[i] == 0);
3900 }
3901 for (i = 0; i < N_D3_TYPE_STACK; i++) {
3902 vg_assert(typarser.qparentE[i].cuOff == D3_INVALID_CUOFF);
3903 vg_assert(typarser.qparentE[i].tag == Te_EMPTY);
3904 vg_assert(typarser.qlevel[i] == 0);
3905 }
3906
3907 cu_start_offset = get_position_of_Cursor( &info );
3908 TRACE_D3("\n");
3909 TRACE_D3(" Compilation Unit @ offset 0x%llx:\n", cu_start_offset);
3910 /* parse_CU_header initialises the CU's set_abbv_Cursor cache
3911 (saC_cache) */
3912 if (pass == 0) {
3913 parse_CU_Header( &cc, td3, &info, escn_debug_abbv_alt,
3914 False, True );
3915 } else {
3916 parse_CU_Header( &cc, td3, &info, escn_debug_abbv,
3917 pass == 2, False );
3918 }
3919 cc.escn_debug_str = pass == 0 ? escn_debug_str_alt
3920 : escn_debug_str;
3921 cc.escn_debug_ranges = escn_debug_ranges;
3922 cc.escn_debug_loc = escn_debug_loc;
3923 cc.escn_debug_line = pass == 0 ? escn_debug_line_alt
3924 : escn_debug_line;
3925 cc.escn_debug_info = pass == 0 ? escn_debug_info_alt
3926 : escn_debug_info;
3927 cc.escn_debug_types = escn_debug_types;
3928 cc.escn_debug_info_alt = escn_debug_info_alt;
3929 cc.escn_debug_str_alt = escn_debug_str_alt;
3930 cc.types_cuOff_bias = escn_debug_info.szB;
3931 cc.alt_cuOff_bias = escn_debug_info.szB + escn_debug_types.szB;
3932 cc.cu_start_offset = cu_start_offset;
3933 cc.di = di;
3934 /* The CU's svma can be deduced by looking at the AT_low_pc
3935 value in the top level TAG_compile_unit, which is the topmost
3936 DIE. We'll leave it for the 'varparser' to acquire that info
3937 and fill it in -- since it is the only party to want to know
3938 it. */
3939 cc.cu_svma_known = False;
3940 cc.cu_svma = 0;
3941
3942 cc.signature_types = signature_types;
3943
3944 /* Create a fake outermost-level range covering the entire
3945 address range. So we always have *something* to catch all
3946 variable declarations. */
3947 varstack_push( &cc, &varparser, td3,
3948 unitary_range_list(0UL, ~0UL),
3949 -1, False/*isFunc*/, NULL/*fbGX*/ );
3950
3951 /* And set up the file name table. When we come across the top
3952 level DIE for this CU (which is what the next call to
3953 read_DIE should process) we will copy all the file names out
3954 of the .debug_line img area and use this table to look up the
3955 copies when we later see filename numbers in DW_TAG_variables
3956 etc. */
3957 vg_assert(!varparser.filenameTable );
3958 varparser.filenameTable
3959 = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.5",
3960 ML_(dinfo_free),
3961 sizeof(UChar*) );
3962 vg_assert(varparser.filenameTable);
3963
3964 /* Now read the one-and-only top-level DIE for this CU. */
3965 vg_assert(varparser.sp == 0);
3966 read_DIE( rangestree,
3967 tyents, tempvars, gexprs,
3968 &typarser, &varparser,
3969 &info, td3, &cc, 0 );
3970
3971 cu_offset_now = get_position_of_Cursor( &info );
3972
3973 if (0) VG_(printf)("Travelled: %llu size %llu\n",
3974 cu_offset_now - cc.cu_start_offset,
3975 cc.unit_length + (cc.is_dw64 ? 12 : 4));
3976
3977 /* How big the CU claims it is .. */
3978 cu_size_including_IniLen = cc.unit_length + (cc.is_dw64 ? 12 : 4);
3979 /* .. vs how big we have found it to be */
3980 cu_amount_used = cu_offset_now - cc.cu_start_offset;
3981
3982 if (1) TRACE_D3("offset now %lld, d-i-size %lld\n",
3983 cu_offset_now, section_size);
3984 if (cu_offset_now > section_size)
3985 barf("toplevel DIEs beyond end of CU");
3986
3987 /* If the CU is bigger than it claims to be, we've got a serious
3988 problem. */
3989 if (cu_amount_used > cu_size_including_IniLen)
3990 barf("CU's actual size appears to be larger than it claims it is");
3991
3992 /* If the CU is smaller than it claims to be, we need to skip some
3993 bytes. Loop updates cu_offset_new and cu_amount_used. */
3994 while (cu_amount_used < cu_size_including_IniLen
3995 && get_remaining_length_Cursor( &info ) > 0) {
3996 if (0) VG_(printf)("SKIP\n");
3997 (void)get_UChar( &info );
3998 cu_offset_now = get_position_of_Cursor( &info );
3999 cu_amount_used = cu_offset_now - cc.cu_start_offset;
4000 }
4001
4002 /* Preen to level -2. DIEs have level >= 0 so -2 cannot occur
4003 anywhere else at all. Our fake the-entire-address-space
4004 range is at level -1, so preening to -2 should completely
4005 empty the stack out. */
4006 TRACE_D3("\n");
4007 varstack_preen( &varparser, td3, -2 );
4008 /* Similarly, empty the type stack out. */
4009 typestack_preen( &typarser, td3, -2 );
4010
4011 TRACE_D3("set_abbv_Cursor cache: %lu queries, %lu misses\n",
4012 cc.saC_cache_queries, cc.saC_cache_misses);
4013
4014 vg_assert(varparser.filenameTable );
4015 VG_(deleteXA)( varparser.filenameTable );
4016 varparser.filenameTable = NULL;
4017
4018 if (cu_offset_now == section_size)
4019 break;
4020 /* else keep going */
4021 }
4022 }
4023
4024 /* From here on we're post-processing the stuff we got
4025 out of the .debug_info section. */
4026 if (td3) {
4027 TRACE_D3("\n");
4028 ML_(pp_TyEnts)(tyents, "Initial type entity (TyEnt) array");
4029 TRACE_D3("\n");
4030 TRACE_D3("------ Compressing type entries ------\n");
4031 }
4032
4033 tyents_cache = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.6",
4034 sizeof(TyEntIndexCache) );
4035 ML_(TyEntIndexCache__invalidate)( tyents_cache );
4036 dedup_types( td3, tyents, tyents_cache );
4037 if (td3) {
4038 TRACE_D3("\n");
4039 ML_(pp_TyEnts)(tyents, "After type entity (TyEnt) compression");
4040 }
4041
4042 TRACE_D3("\n");
4043 TRACE_D3("------ Resolving the types of variables ------\n" );
4044 resolve_variable_types( barf, tyents, tyents_cache, tempvars );
4045
4046 /* Copy all the non-INDIR tyents into a new table. For large
4047 .so's, about 90% of the tyents will by now have been resolved to
4048 INDIRs, and we no longer need them, and so don't need to store
4049 them. */
4050 tyents_to_keep
4051 = VG_(newXA)( ML_(dinfo_zalloc),
4052 "di.readdwarf3.ndrw.7 (TyEnt to-keep array)",
4053 ML_(dinfo_free), sizeof(TyEnt) );
4054 n = VG_(sizeXA)( tyents );
4055 for (i = 0; i < n; i++) {
4056 TyEnt* ent = VG_(indexXA)( tyents, i );
4057 if (ent->tag != Te_INDIR)
4058 VG_(addToXA)( tyents_to_keep, ent );
4059 }
4060
4061 VG_(deleteXA)( tyents );
4062 tyents = NULL;
4063 ML_(dinfo_free)( tyents_cache );
4064 tyents_cache = NULL;
4065
4066 /* Sort tyents_to_keep so we can lookup in it. A complete (if
4067 minor) waste of time, since tyents itself is sorted, but
4068 necessary since VG_(lookupXA) refuses to cooperate if we
4069 don't. */
4070 VG_(setCmpFnXA)( tyents_to_keep, (XACmpFn_t) ML_(TyEnt__cmp_by_cuOff_only) );
4071 VG_(sortXA)( tyents_to_keep );
4072
4073 /* Enable cacheing on tyents_to_keep */
4074 tyents_to_keep_cache
4075 = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.8",
4076 sizeof(TyEntIndexCache) );
4077 ML_(TyEntIndexCache__invalidate)( tyents_to_keep_cache );
4078
4079 /* And record the tyents in the DebugInfo. We do this before
4080 starting to hand variables to ML_(addVar), since if ML_(addVar)
4081 wants to do debug printing (of the types of said vars) then it
4082 will need the tyents.*/
4083 vg_assert(!di->admin_tyents);
4084 di->admin_tyents = tyents_to_keep;
4085
4086 /* Bias all the location expressions. */
4087 TRACE_D3("\n");
4088 TRACE_D3("------ Biasing the location expressions ------\n" );
4089
4090 n = VG_(sizeXA)( gexprs );
4091 for (i = 0; i < n; i++) {
4092 gexpr = *(GExpr**)VG_(indexXA)( gexprs, i );
4093 bias_GX( gexpr, di );
4094 }
4095
4096 TRACE_D3("\n");
4097 TRACE_D3("------ Acquired the following variables: ------\n\n");
4098
4099 /* Park (pointers to) all the vars in an XArray, so we can look up
4100 abstract origins quickly. The array is sorted (hence, looked-up
4101 by) the .dioff fields. Since the .dioffs should be in strictly
4102 ascending order, there is no need to sort the array after
4103 construction. The ascendingness is however asserted for. */
4104 dioff_lookup_tab
4105 = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.9",
4106 ML_(dinfo_free),
4107 sizeof(TempVar*) );
4108 vg_assert(dioff_lookup_tab);
4109
4110 n = VG_(sizeXA)( tempvars );
4111 Word first_primary_var = 0;
4112 for (first_primary_var = 0;
4113 escn_debug_info_alt.szB/*really?*/ && first_primary_var < n;
4114 first_primary_var++) {
4115 varp = *(TempVar**)VG_(indexXA)( tempvars, first_primary_var );
4116 if (varp->dioff < escn_debug_info.szB + escn_debug_types.szB)
4117 break;
4118 }
4119 for (i = 0; i < n; i++) {
4120 varp = *(TempVar**)VG_(indexXA)( tempvars, (i + first_primary_var) % n );
4121 if (i > first_primary_var) {
4122 varp2 = *(TempVar**)VG_(indexXA)( tempvars,
4123 (i + first_primary_var - 1) % n );
4124 /* why should this hold? Only, I think, because we've
4125 constructed the array by reading .debug_info sequentially,
4126 and so the array .dioff fields should reflect that, and be
4127 strictly ascending. */
4128 vg_assert(varp2->dioff < varp->dioff);
4129 }
4130 VG_(addToXA)( dioff_lookup_tab, &varp );
4131 }
4132 VG_(setCmpFnXA)( dioff_lookup_tab, cmp_TempVar_by_dioff );
4133 VG_(sortXA)( dioff_lookup_tab ); /* POINTLESS; FIXME: rm */
4134
4135 /* Now visit each var. Collect up as much info as possible for
4136 each var and hand it to ML_(addVar). */
4137 n = VG_(sizeXA)( tempvars );
4138 for (j = 0; j < n; j++) {
4139 TyEnt* ent;
4140 varp = *(TempVar**)VG_(indexXA)( tempvars, j );
4141
4142 /* Possibly show .. */
4143 if (td3) {
4144 VG_(printf)("<%lx> addVar: level %d: %s :: ",
4145 varp->dioff,
4146 varp->level,
4147 varp->name ? varp->name : "<anon_var>" );
4148 if (varp->typeR) {
4149 ML_(pp_TyEnt_C_ishly)( tyents_to_keep, varp->typeR );
4150 } else {
4151 VG_(printf)("NULL");
4152 }
4153 VG_(printf)("\n Loc=");
4154 if (varp->gexpr) {
4155 ML_(pp_GX)(varp->gexpr);
4156 } else {
4157 VG_(printf)("NULL");
4158 }
4159 VG_(printf)("\n");
4160 if (varp->fbGX) {
4161 VG_(printf)(" FrB=");
4162 ML_(pp_GX)( varp->fbGX );
4163 VG_(printf)("\n");
4164 } else {
4165 VG_(printf)(" FrB=none\n");
4166 }
4167 VG_(printf)(" declared at: %s:%d\n",
4168 varp->fName ? varp->fName : "NULL",
4169 varp->fLine );
4170 if (varp->absOri != (UWord)D3_INVALID_CUOFF)
4171 VG_(printf)(" abstract origin: <%lx>\n", varp->absOri);
4172 }
4173
4174 /* Skip variables which have no location. These must be
4175 abstract instances; they are useless as-is since with no
4176 location they have no specified memory location. They will
4177 presumably be referred to via the absOri fields of other
4178 variables. */
4179 if (!varp->gexpr) {
4180 TRACE_D3(" SKIP (no location)\n\n");
4181 continue;
4182 }
4183
4184 /* So it has a location, at least. If it refers to some other
4185 entry through its absOri field, pull in further info through
4186 that. */
4187 if (varp->absOri != (UWord)D3_INVALID_CUOFF) {
4188 Bool found;
4189 Word ixFirst, ixLast;
4190 TempVar key;
4191 TempVar* keyp = &key;
4192 TempVar *varAI;
4193 VG_(memset)(&key, 0, sizeof(key)); /* not necessary */
4194 key.dioff = varp->absOri; /* this is what we want to find */
4195 found = VG_(lookupXA)( dioff_lookup_tab, &keyp,
4196 &ixFirst, &ixLast );
4197 if (!found) {
4198 /* barf("DW_AT_abstract_origin can't be resolved"); */
4199 TRACE_D3(" SKIP (DW_AT_abstract_origin can't be resolved)\n\n");
4200 continue;
4201 }
4202 /* If the following fails, there is more than one entry with
4203 the same dioff. Which can't happen. */
4204 vg_assert(ixFirst == ixLast);
4205 varAI = *(TempVar**)VG_(indexXA)( dioff_lookup_tab, ixFirst );
4206 /* stay sane */
4207 vg_assert(varAI);
4208 vg_assert(varAI->dioff == varp->absOri);
4209
4210 /* Copy what useful info we can. */
4211 if (varAI->typeR && !varp->typeR)
4212 varp->typeR = varAI->typeR;
4213 if (varAI->name && !varp->name)
4214 varp->name = varAI->name;
4215 if (varAI->fName && !varp->fName)
4216 varp->fName = varAI->fName;
4217 if (varAI->fLine > 0 && varp->fLine == 0)
4218 varp->fLine = varAI->fLine;
4219 }
4220
4221 /* Give it a name if it doesn't have one. */
4222 if (!varp->name)
4223 varp->name = ML_(addStr)( di, "<anon_var>", -1 );
4224
4225 /* So now does it have enough info to be useful? */
4226 /* NOTE: re typeR: this is a hack. If typeR is Te_UNKNOWN then
4227 the type didn't get resolved. Really, in that case
4228 something's broken earlier on, and should be fixed, rather
4229 than just skipping the variable. */
4230 ent = ML_(TyEnts__index_by_cuOff)( tyents_to_keep,
4231 tyents_to_keep_cache,
4232 varp->typeR );
4233 /* The next two assertions should be guaranteed by
4234 our previous call to resolve_variable_types. */
4235 vg_assert(ent);
4236 vg_assert(ML_(TyEnt__is_type)(ent) || ent->tag == Te_UNKNOWN);
4237
4238 if (ent->tag == Te_UNKNOWN) continue;
4239
4240 vg_assert(varp->gexpr);
4241 vg_assert(varp->name);
4242 vg_assert(varp->typeR);
4243 vg_assert(varp->level >= 0);
4244
4245 /* Ok. So we're going to keep it. Call ML_(addVar) once for
4246 each address range in which the variable exists. */
4247 TRACE_D3(" ACQUIRE for range(s) ");
4248 { AddrRange oneRange;
4249 AddrRange* varPcRanges;
4250 Word nVarPcRanges;
4251 /* Set up to iterate over address ranges, however
4252 represented. */
4253 if (varp->nRanges == 0 || varp->nRanges == 1) {
4254 vg_assert(!varp->rngMany);
4255 if (varp->nRanges == 0) {
4256 vg_assert(varp->rngOneMin == 0);
4257 vg_assert(varp->rngOneMax == 0);
4258 }
4259 nVarPcRanges = varp->nRanges;
4260 oneRange.aMin = varp->rngOneMin;
4261 oneRange.aMax = varp->rngOneMax;
4262 varPcRanges = &oneRange;
4263 } else {
4264 vg_assert(varp->rngMany);
4265 vg_assert(varp->rngOneMin == 0);
4266 vg_assert(varp->rngOneMax == 0);
4267 nVarPcRanges = VG_(sizeXA)(varp->rngMany);
4268 vg_assert(nVarPcRanges >= 2);
4269 vg_assert(nVarPcRanges == (Word)varp->nRanges);
4270 varPcRanges = VG_(indexXA)(varp->rngMany, 0);
4271 }
4272 if (varp->level == 0)
4273 vg_assert( nVarPcRanges == 1 );
4274 /* and iterate */
4275 for (i = 0; i < nVarPcRanges; i++) {
4276 Addr pcMin = varPcRanges[i].aMin;
4277 Addr pcMax = varPcRanges[i].aMax;
4278 vg_assert(pcMin <= pcMax);
4279 /* Level 0 is the global address range. So at level 0 we
4280 don't want to bias pcMin/pcMax; but at all other levels
4281 we do since those are derived from svmas in the Dwarf
4282 we're reading. Be paranoid ... */
4283 if (varp->level == 0) {
4284 vg_assert(pcMin == (Addr)0);
4285 vg_assert(pcMax == ~(Addr)0);
4286 } else {
4287 /* vg_assert(pcMin > (Addr)0);
4288 No .. we can legitimately expect to see ranges like
4289 0x0-0x11D (pre-biasing, of course). */
4290 vg_assert(pcMax < ~(Addr)0);
4291 }
4292
4293 /* Apply text biasing, for non-global variables. */
4294 if (varp->level > 0) {
4295 pcMin += di->text_debug_bias;
4296 pcMax += di->text_debug_bias;
4297 }
4298
4299 if (i > 0 && (i%2) == 0)
4300 TRACE_D3("\n ");
4301 TRACE_D3("[%#lx,%#lx] ", pcMin, pcMax );
4302
4303 ML_(addVar)(
4304 di, varp->level,
4305 pcMin, pcMax,
4306 varp->name, varp->typeR,
4307 varp->gexpr, varp->fbGX,
4308 varp->fName, varp->fLine, td3
4309 );
4310 }
4311 }
4312
4313 TRACE_D3("\n\n");
4314 /* and move on to the next var */
4315 }
4316
4317 /* Now free all the TempVars */
4318 n = VG_(sizeXA)( tempvars );
4319 for (i = 0; i < n; i++) {
4320 varp = *(TempVar**)VG_(indexXA)( tempvars, i );
4321 ML_(dinfo_free)(varp);
4322 }
4323 VG_(deleteXA)( tempvars );
4324 tempvars = NULL;
4325
4326 /* and the temp lookup table */
4327 VG_(deleteXA)( dioff_lookup_tab );
4328
4329 /* and the ranges tree. Note that we need to also free the XArrays
4330 which constitute the keys, hence pass VG_(deleteXA) as a
4331 key-finalizer. */
4332 VG_(deleteFM)( rangestree, (void(*)(UWord))VG_(deleteXA), NULL );
4333
4334 /* and the tyents_to_keep cache */
4335 ML_(dinfo_free)( tyents_to_keep_cache );
4336 tyents_to_keep_cache = NULL;
4337
4338 vg_assert( varparser.filenameTable == NULL );
4339
4340 /* And the signatured type hash. */
4341 VG_(HT_destruct) ( signature_types, ML_(dinfo_free) );
4342
4343 /* record the GExprs in di so they can be freed later */
4344 vg_assert(!di->admin_gexprs);
4345 di->admin_gexprs = gexprs;
4346 }
4347
4348
4349 /*------------------------------------------------------------*/
4350 /*--- ---*/
4351 /*--- The "new" DWARF3 reader -- top level control logic ---*/
4352 /*--- ---*/
4353 /*------------------------------------------------------------*/
4354
4355 static Bool d3rd_jmpbuf_valid = False;
4356 static const HChar* d3rd_jmpbuf_reason = NULL;
4357 static VG_MINIMAL_JMP_BUF(d3rd_jmpbuf);
4358
barf(const HChar * reason)4359 static __attribute__((noreturn)) void barf ( const HChar* reason ) {
4360 vg_assert(d3rd_jmpbuf_valid);
4361 d3rd_jmpbuf_reason = reason;
4362 VG_MINIMAL_LONGJMP(d3rd_jmpbuf);
4363 /*NOTREACHED*/
4364 vg_assert(0);
4365 }
4366
4367
4368 void
ML_(new_dwarf3_reader)4369 ML_(new_dwarf3_reader) (
4370 struct _DebugInfo* di,
4371 DiSlice escn_debug_info, DiSlice escn_debug_types,
4372 DiSlice escn_debug_abbv, DiSlice escn_debug_line,
4373 DiSlice escn_debug_str, DiSlice escn_debug_ranges,
4374 DiSlice escn_debug_loc, DiSlice escn_debug_info_alt,
4375 DiSlice escn_debug_abbv_alt, DiSlice escn_debug_line_alt,
4376 DiSlice escn_debug_str_alt
4377 )
4378 {
4379 volatile Int jumped;
4380 volatile Bool td3 = di->trace_symtab;
4381
4382 /* Run the _wrk function to read the dwarf3. If it succeeds, it
4383 just returns normally. If there is any failure, it longjmp's
4384 back here, having first set d3rd_jmpbuf_reason to something
4385 useful. */
4386 vg_assert(d3rd_jmpbuf_valid == False);
4387 vg_assert(d3rd_jmpbuf_reason == NULL);
4388
4389 d3rd_jmpbuf_valid = True;
4390 jumped = VG_MINIMAL_SETJMP(d3rd_jmpbuf);
4391 if (jumped == 0) {
4392 /* try this ... */
4393 new_dwarf3_reader_wrk( di, barf,
4394 escn_debug_info, escn_debug_types,
4395 escn_debug_abbv, escn_debug_line,
4396 escn_debug_str, escn_debug_ranges,
4397 escn_debug_loc, escn_debug_info_alt,
4398 escn_debug_abbv_alt, escn_debug_line_alt,
4399 escn_debug_str_alt );
4400 d3rd_jmpbuf_valid = False;
4401 TRACE_D3("\n------ .debug_info reading was successful ------\n");
4402 } else {
4403 /* It longjmp'd. */
4404 d3rd_jmpbuf_valid = False;
4405 /* Can't longjump without giving some sort of reason. */
4406 vg_assert(d3rd_jmpbuf_reason != NULL);
4407
4408 TRACE_D3("\n------ .debug_info reading failed ------\n");
4409
4410 ML_(symerr)(di, True, d3rd_jmpbuf_reason);
4411 }
4412
4413 d3rd_jmpbuf_valid = False;
4414 d3rd_jmpbuf_reason = NULL;
4415 }
4416
4417
4418
4419 /* --- Unused code fragments which might be useful one day. --- */
4420
4421 #if 0
4422 /* Read the arange tables */
4423 TRACE_SYMTAB("\n");
4424 TRACE_SYMTAB("\n------ The contents of .debug_arange ------\n");
4425 init_Cursor( &aranges, debug_aranges_img,
4426 debug_aranges_sz, 0, barf,
4427 "Overrun whilst reading .debug_aranges section" );
4428 while (True) {
4429 ULong len, d_i_offset;
4430 Bool is64;
4431 UShort version;
4432 UChar asize, segsize;
4433
4434 if (is_at_end_Cursor( &aranges ))
4435 break;
4436 /* Read one arange thingy */
4437 /* initial_length field */
4438 len = get_Initial_Length( &is64, &aranges,
4439 "in .debug_aranges: invalid initial-length field" );
4440 version = get_UShort( &aranges );
4441 d_i_offset = get_Dwarfish_UWord( &aranges, is64 );
4442 asize = get_UChar( &aranges );
4443 segsize = get_UChar( &aranges );
4444 TRACE_D3(" Length: %llu\n", len);
4445 TRACE_D3(" Version: %d\n", (Int)version);
4446 TRACE_D3(" Offset into .debug_info: %llx\n", d_i_offset);
4447 TRACE_D3(" Pointer Size: %d\n", (Int)asize);
4448 TRACE_D3(" Segment Size: %d\n", (Int)segsize);
4449 TRACE_D3("\n");
4450 TRACE_D3(" Address Length\n");
4451
4452 while ((get_position_of_Cursor( &aranges ) % (2 * asize)) > 0) {
4453 (void)get_UChar( & aranges );
4454 }
4455 while (True) {
4456 ULong address = get_Dwarfish_UWord( &aranges, asize==8 );
4457 ULong length = get_Dwarfish_UWord( &aranges, asize==8 );
4458 TRACE_D3(" 0x%016llx 0x%llx\n", address, length);
4459 if (address == 0 && length == 0) break;
4460 }
4461 }
4462 TRACE_SYMTAB("\n");
4463 #endif
4464
4465 #endif // defined(VGO_linux) || defined(VGO_darwin)
4466
4467 /*--------------------------------------------------------------------*/
4468 /*--- end ---*/
4469 /*--------------------------------------------------------------------*/
4470