1
2 /*--------------------------------------------------------------------*/
3 /*--- Reading of syms & debug info from ELF .so/executable files. ---*/
4 /*--- readelf.c ---*/
5 /*--------------------------------------------------------------------*/
6
7 /*
8 This file is part of Valgrind, a dynamic binary instrumentation
9 framework.
10
11 Copyright (C) 2000-2015 Julian Seward
12 jseward@acm.org
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30 */
31
32 #if defined(VGO_linux) || defined(VGO_solaris)
33
34 #include "pub_core_basics.h"
35 #include "pub_core_vki.h"
36 #include "pub_core_debuginfo.h"
37 #include "pub_core_libcbase.h"
38 #include "pub_core_libcprint.h"
39 #include "pub_core_libcassert.h"
40 #include "pub_core_machine.h" /* VG_ELF_CLASS */
41 #include "pub_core_options.h"
42 #include "pub_core_oset.h"
43 #include "pub_core_tooliface.h" /* VG_(needs) */
44 #include "pub_core_xarray.h"
45 #include "priv_misc.h" /* dinfo_zalloc/free/strdup */
46 #include "priv_image.h"
47 #include "priv_d3basics.h"
48 #include "priv_tytypes.h"
49 #include "priv_storage.h"
50 #include "priv_readelf.h" /* self */
51 #include "priv_readdwarf.h" /* 'cos ELF contains DWARF */
52 #include "priv_readdwarf3.h"
53 #include "priv_readexidx.h"
54
55 /* --- !!! --- EXTERNAL HEADERS start --- !!! --- */
56 #include <elf.h>
57 #if defined(VGO_solaris)
58 #include <sys/link.h> /* ElfXX_Dyn, DT_* */
59 #endif
60 /* --- !!! --- EXTERNAL HEADERS end --- !!! --- */
61
62 /*------------------------------------------------------------*/
63 /*--- 32/64-bit parameterisation ---*/
64 /*------------------------------------------------------------*/
65
66 /* For all the ELF macros and types which specify '32' or '64',
67 select the correct variant for this platform and give it
68 an 'XX' name. Then use the 'XX' variant consistently in
69 the rest of this file.
70 */
71 #if VG_WORDSIZE == 4
72 # define ElfXX_Ehdr Elf32_Ehdr
73 # define ElfXX_Shdr Elf32_Shdr
74 # define ElfXX_Phdr Elf32_Phdr
75 # define ElfXX_Nhdr Elf32_Nhdr
76 # define ElfXX_Sym Elf32_Sym
77 # define ElfXX_Off Elf32_Off
78 # define ElfXX_Word Elf32_Word
79 # define ElfXX_Addr Elf32_Addr
80 # define ElfXX_Dyn Elf32_Dyn
81 # define ELFXX_ST_BIND ELF32_ST_BIND
82 # define ELFXX_ST_TYPE ELF32_ST_TYPE
83
84 #elif VG_WORDSIZE == 8
85 # define ElfXX_Ehdr Elf64_Ehdr
86 # define ElfXX_Shdr Elf64_Shdr
87 # define ElfXX_Phdr Elf64_Phdr
88 # define ElfXX_Nhdr Elf64_Nhdr
89 # define ElfXX_Sym Elf64_Sym
90 # define ElfXX_Off Elf64_Off
91 # define ElfXX_Word Elf64_Word
92 # define ElfXX_Addr Elf64_Addr
93 # define ElfXX_Dyn Elf64_Dyn
94 # define ELFXX_ST_BIND ELF64_ST_BIND
95 # define ELFXX_ST_TYPE ELF64_ST_TYPE
96
97 #else
98 # error "VG_WORDSIZE should be 4 or 8"
99 #endif
100
101
102 /*------------------------------------------------------------*/
103 /*--- ---*/
104 /*--- Read symbol table and line info from ELF files. ---*/
105 /*--- ---*/
106 /*------------------------------------------------------------*/
107
108 /* readelf.c parses ELF files and acquires symbol table info from
109 them. It calls onwards to readdwarf.c to read DWARF2/3 line number
110 and call frame info found. */
111
112 /* Identify an ELF object file by peering at the first few bytes of
113 it. */
114
ML_(is_elf_object_file)115 Bool ML_(is_elf_object_file)( const void* image, SizeT n_image, Bool rel_ok )
116 {
117 const ElfXX_Ehdr* ehdr = image;
118 Int ok = 1;
119
120 if (n_image < sizeof(ElfXX_Ehdr))
121 return False;
122
123 ok &= (ehdr->e_ident[EI_MAG0] == 0x7F
124 && ehdr->e_ident[EI_MAG1] == 'E'
125 && ehdr->e_ident[EI_MAG2] == 'L'
126 && ehdr->e_ident[EI_MAG3] == 'F');
127 ok &= (ehdr->e_ident[EI_CLASS] == VG_ELF_CLASS
128 && ehdr->e_ident[EI_DATA] == VG_ELF_DATA2XXX
129 && ehdr->e_ident[EI_VERSION] == EV_CURRENT);
130 ok &= (ehdr->e_type == ET_EXEC || ehdr->e_type == ET_DYN
131 || (rel_ok && ehdr->e_type == ET_REL));
132 ok &= (ehdr->e_machine == VG_ELF_MACHINE);
133 ok &= (ehdr->e_version == EV_CURRENT);
134 ok &= (ehdr->e_shstrndx != SHN_UNDEF);
135 ok &= (ehdr->e_shoff != 0 && ehdr->e_shnum != 0);
136 ok &= ((ehdr->e_phoff != 0 && ehdr->e_phnum != 0)
137 || ehdr->e_type == ET_REL);
138
139 return ok ? True : False;
140 }
141
142
143 /* The same thing, but operating on a DiImage instead. */
144
is_elf_object_file_by_DiImage(DiImage * img,Bool rel_ok)145 static Bool is_elf_object_file_by_DiImage( DiImage* img, Bool rel_ok )
146 {
147 /* Be sure this doesn't make the frame too big. */
148 vg_assert(sizeof(ElfXX_Ehdr) <= 512);
149
150 ElfXX_Ehdr ehdr;
151 if (!ML_(img_valid)(img, 0, sizeof(ehdr)))
152 return False;
153
154 ML_(img_get)(&ehdr, img, 0, sizeof(ehdr));
155 return ML_(is_elf_object_file)( &ehdr, sizeof(ehdr), rel_ok );
156 }
157
158
159 /* Show a raw ELF symbol, given its in-image address and name. */
160
161 static
show_raw_elf_symbol(DiImage * strtab_img,Int i,const ElfXX_Sym * sym,DiOffT sym_name_ioff,Addr sym_svma,Bool ppc64_linux_format)162 void show_raw_elf_symbol ( DiImage* strtab_img,
163 Int i,
164 const ElfXX_Sym* sym,
165 DiOffT sym_name_ioff, Addr sym_svma,
166 Bool ppc64_linux_format )
167 {
168 const HChar* space = ppc64_linux_format ? " " : "";
169 VG_(printf)("raw symbol [%4d]: ", i);
170 switch (ELFXX_ST_BIND(sym->st_info)) {
171 case STB_LOCAL: VG_(printf)("LOC "); break;
172 case STB_GLOBAL: VG_(printf)("GLO "); break;
173 case STB_WEAK: VG_(printf)("WEA "); break;
174 case STB_LOPROC: VG_(printf)("lop "); break;
175 case STB_HIPROC: VG_(printf)("hip "); break;
176 default: VG_(printf)("??? "); break;
177 }
178 switch (ELFXX_ST_TYPE(sym->st_info)) {
179 case STT_NOTYPE: VG_(printf)("NOT "); break;
180 case STT_OBJECT: VG_(printf)("OBJ "); break;
181 case STT_FUNC: VG_(printf)("FUN "); break;
182 case STT_SECTION: VG_(printf)("SEC "); break;
183 case STT_FILE: VG_(printf)("FIL "); break;
184 case STT_LOPROC: VG_(printf)("lop "); break;
185 case STT_HIPROC: VG_(printf)("hip "); break;
186 default: VG_(printf)("??? "); break;
187 }
188 HChar* sym_name = NULL;
189 if (sym->st_name)
190 sym_name = ML_(img_strdup)(strtab_img, "di.sres.1", sym_name_ioff);
191 VG_(printf)(": svma %#010lx, %ssz %4llu %s\n",
192 sym_svma, space, (ULong)(sym->st_size + 0UL),
193 (sym_name ? sym_name : "NONAME") );
194 if (sym_name)
195 ML_(dinfo_free)(sym_name);
196 }
197
198
199 /* Decide whether SYM is something we should collect, and if so, copy
200 relevant info to the _OUT arguments. For {x86,amd64,ppc32}-linux
201 this is straightforward - the name, address, size are copied out
202 unchanged.
203
204 There is a bit of a kludge re data symbols (see KLUDGED BSS CHECK
205 below): we assume that the .bss is mapped immediately after .data,
206 and so accept any data symbol which exists in the range [start of
207 .data, size of .data + size of .bss). I don't know if this is
208 really correct/justifiable, or not.
209
210 For ppc64be-linux it's more complex. If the symbol is seen to be in
211 the .opd section, it is taken to be a function descriptor, and so
212 a dereference is attempted, in order to get hold of the real entry
213 point address. Also as part of the dereference, there is an attempt
214 to calculate the TOC pointer (R2 value) associated with the symbol.
215
216 To support the ppc64be-linux pre-"dotless" ABI (prior to gcc 4.0.0),
217 if the symbol is seen to be outside the .opd section and its name
218 starts with a dot, an .opd deference is not attempted, and no TOC
219 pointer is calculated, but the leading dot is removed from the
220 name.
221
222 As a result, on ppc64be-linux, the caller of this function may have
223 to piece together the real size, address, name of the symbol from
224 multiple calls to this function. Ugly and confusing.
225 */
226 static
get_elf_symbol_info(struct _DebugInfo * di,const ElfXX_Sym * sym,DiOffT sym_name_ioff,const DiSlice * escn_strtab,Addr sym_svma,Bool symtab_in_debug,const DiSlice * escn_opd,PtrdiffT opd_bias,DiOffT * sym_name_out_ioff,SymAVMAs * sym_avmas_out,Int * sym_size_out,Bool * from_opd_out,Bool * is_text_out,Bool * is_ifunc)227 Bool get_elf_symbol_info (
228 /* INPUTS */
229 struct _DebugInfo* di, /* containing DebugInfo */
230 const ElfXX_Sym* sym, /* ELF symbol */
231 DiOffT sym_name_ioff, /* name, may be absent (DiOffT_INVALID) */
232 const DiSlice* escn_strtab, /* holds the name */
233 Addr sym_svma, /* address as stated in the object file */
234 Bool symtab_in_debug, /* symbol table is in the debug file */
235 const DiSlice* escn_opd, /* the .opd (ppc64be-linux only) */
236 PtrdiffT opd_bias, /* for biasing AVMAs found in .opd */
237 /* OUTPUTS */
238 DiOffT* sym_name_out_ioff, /* name (in strtab) we should record */
239 SymAVMAs* sym_avmas_out, /* sym avmas we should record */
240 Int* sym_size_out, /* symbol size */
241 Bool* from_opd_out, /* ppc64be-linux only: did we deref an
242 .opd entry? */
243 Bool* is_text_out, /* is this a text symbol? */
244 Bool* is_ifunc /* is this a STT_GNU_IFUNC function ?*/
245 )
246 {
247 Bool plausible;
248 # if defined(VGP_ppc64be_linux)
249 Bool is_in_opd;
250 # endif
251 Bool in_text, in_data, in_sdata, in_rodata, in_bss, in_sbss;
252 Addr text_svma, data_svma, sdata_svma, rodata_svma, bss_svma, sbss_svma;
253 PtrdiffT text_bias, data_bias, sdata_bias, rodata_bias, bss_bias, sbss_bias;
254
255 /* Set defaults */
256 *sym_name_out_ioff = sym_name_ioff;
257 (*sym_avmas_out).main = sym_svma; /* we will bias this shortly */
258 *is_text_out = True;
259 SET_TOCPTR_AVMA(*sym_avmas_out, 0); /* default to unknown/inapplicable */
260 SET_LOCAL_EP_AVMA(*sym_avmas_out, 0); /* default to unknown/inapplicable */
261 *from_opd_out = False;
262 *is_ifunc = False;
263
264 /* Get the symbol size, but restrict it to fit in a signed 32 bit
265 int. Also, deal with the stupid case of negative size by making
266 the size be 1. Note that sym->st_size has type UWord,
267 effectively. */
268 { Word size_tmp = (Word)sym->st_size;
269 Word max_Int = (1LL << 31) - 1;
270 if (size_tmp < 0) size_tmp = 1;
271 if (size_tmp > max_Int) size_tmp = max_Int;
272 *sym_size_out = (Int)size_tmp;
273 }
274 /* After this point refer only to *sym_size_out and not to
275 sym->st_size. */
276
277 /* Figure out if we're interested in the symbol. Firstly, is it of
278 the right flavour? */
279 plausible
280 = (ELFXX_ST_BIND(sym->st_info) == STB_GLOBAL
281 || ELFXX_ST_BIND(sym->st_info) == STB_LOCAL
282 || ELFXX_ST_BIND(sym->st_info) == STB_WEAK
283 )
284 &&
285 (ELFXX_ST_TYPE(sym->st_info) == STT_FUNC
286 || ELFXX_ST_TYPE(sym->st_info) == STT_OBJECT
287 # ifdef STT_GNU_IFUNC
288 || ELFXX_ST_TYPE(sym->st_info) == STT_GNU_IFUNC
289 # endif
290 );
291
292 /* Work out the svma and bias for each section as it will appear in
293 addresses in the symbol table. */
294 if (symtab_in_debug) {
295 text_svma = di->text_debug_svma;
296 text_bias = di->text_debug_bias;
297 data_svma = di->data_debug_svma;
298 data_bias = di->data_debug_bias;
299 sdata_svma = di->sdata_debug_svma;
300 sdata_bias = di->sdata_debug_bias;
301 rodata_svma = di->rodata_debug_svma;
302 rodata_bias = di->rodata_debug_bias;
303 bss_svma = di->bss_debug_svma;
304 bss_bias = di->bss_debug_bias;
305 sbss_svma = di->sbss_debug_svma;
306 sbss_bias = di->sbss_debug_bias;
307 } else {
308 text_svma = di->text_svma;
309 text_bias = di->text_bias;
310 data_svma = di->data_svma;
311 data_bias = di->data_bias;
312 sdata_svma = di->sdata_svma;
313 sdata_bias = di->sdata_bias;
314 rodata_svma = di->rodata_svma;
315 rodata_bias = di->rodata_bias;
316 bss_svma = di->bss_svma;
317 bss_bias = di->bss_bias;
318 sbss_svma = di->sbss_svma;
319 sbss_bias = di->sbss_bias;
320 }
321
322 /* Now bias (*sym_avmas_out).main accordingly by figuring out exactly which
323 section the symbol is from and bias accordingly. Screws up if
324 the previously deduced section svma address ranges are wrong. */
325 if (di->text_present
326 && di->text_size > 0
327 && sym_svma >= text_svma
328 && sym_svma < text_svma + di->text_size) {
329 *is_text_out = True;
330 (*sym_avmas_out).main += text_bias;
331 } else
332 if (di->data_present
333 && di->data_size > 0
334 && sym_svma >= data_svma
335 && sym_svma < data_svma + di->data_size) {
336 *is_text_out = False;
337 (*sym_avmas_out).main += data_bias;
338 } else
339 if (di->sdata_present
340 && di->sdata_size > 0
341 && sym_svma >= sdata_svma
342 && sym_svma < sdata_svma + di->sdata_size) {
343 *is_text_out = False;
344 (*sym_avmas_out).main += sdata_bias;
345 } else
346 if (di->rodata_present
347 && di->rodata_size > 0
348 && sym_svma >= rodata_svma
349 && sym_svma < rodata_svma + di->rodata_size) {
350 *is_text_out = False;
351 (*sym_avmas_out).main += rodata_bias;
352 } else
353 if (di->bss_present
354 && di->bss_size > 0
355 && sym_svma >= bss_svma
356 && sym_svma < bss_svma + di->bss_size) {
357 *is_text_out = False;
358 (*sym_avmas_out).main += bss_bias;
359 } else
360 if (di->sbss_present
361 && di->sbss_size > 0
362 && sym_svma >= sbss_svma
363 && sym_svma < sbss_svma + di->sbss_size) {
364 *is_text_out = False;
365 (*sym_avmas_out).main += sbss_bias;
366 } else {
367 /* Assume it's in .text. Is this a good idea? */
368 *is_text_out = True;
369 (*sym_avmas_out).main += text_bias;
370 }
371
372 # ifdef STT_GNU_IFUNC
373 /* Check for indirect functions. */
374 if (*is_text_out
375 && ELFXX_ST_TYPE(sym->st_info) == STT_GNU_IFUNC) {
376 *is_ifunc = True;
377 }
378 # endif
379
380 # if defined(VGP_ppc64be_linux)
381 /* Allow STT_NOTYPE in the very special case where we're running on
382 ppc64be-linux and the symbol is one which the .opd-chasing hack
383 below will chase. */
384 if (!plausible
385 && *is_text_out
386 && ELFXX_ST_TYPE(sym->st_info) == STT_NOTYPE
387 && *sym_size_out > 0
388 && di->opd_present
389 && di->opd_size > 0
390 && (*sym_avmas_out).main >= di->opd_avma
391 && (*sym_avmas_out).main < di->opd_avma + di->opd_size)
392 plausible = True;
393 # endif
394
395 if (!plausible)
396 return False;
397
398 /* Ignore if nameless. */
399 if (sym_name_ioff == DiOffT_INVALID
400 || /* VG_(strlen)(sym_name) == 0 */
401 /* equivalent but cheaper ... */
402 ML_(img_get_UChar)(escn_strtab->img, sym_name_ioff) == '\0') {
403 if (TRACE_SYMTAB_ENABLED) {
404 HChar* sym_name = ML_(img_strdup)(escn_strtab->img,
405 "di.gesi.1", sym_name_ioff);
406 TRACE_SYMTAB(" ignore -- nameless: %s\n", sym_name);
407 if (sym_name) ML_(dinfo_free)(sym_name);
408 }
409 return False;
410 }
411
412 /* Ignore if zero-sized. Except on Android:
413
414 On Android 2.3.5, some of the symbols that Memcheck needs to
415 intercept (for noise reduction purposes) have zero size, due to
416 lack of .size directives in handwritten assembly sources. So we
417 can't reject them out of hand -- instead give them a bogusly
418 large size and let canonicaliseSymtab trim them so they don't
419 overlap any following symbols. At least the following symbols
420 are known to be affected:
421
422 in /system/lib/libc.so: strlen strcmp strcpy memcmp memcpy
423 in /system/bin/linker: __dl_strcmp __dl_strlen
424 */
425 if (*sym_size_out == 0) {
426 # if defined(VGPV_arm_linux_android) \
427 || defined(VGPV_x86_linux_android) \
428 || defined(VGPV_mips32_linux_android) \
429 || defined(VGPV_arm64_linux_android)
430 *sym_size_out = 2048;
431 # else
432 if (TRACE_SYMTAB_ENABLED) {
433 HChar* sym_name = ML_(img_strdup)(escn_strtab->img,
434 "di.gesi.2", sym_name_ioff);
435 TRACE_SYMTAB(" ignore -- size=0: %s\n", sym_name);
436 if (sym_name) ML_(dinfo_free)(sym_name);
437 }
438 return False;
439 # endif
440 }
441
442 /* This seems to significantly reduce the number of junk
443 symbols, and particularly reduces the number of
444 overlapping address ranges. Don't ask me why ... */
445 if ((Int)sym->st_value == 0) {
446 if (TRACE_SYMTAB_ENABLED) {
447 HChar* sym_name = ML_(img_strdup)(escn_strtab->img,
448 "di.gesi.3", sym_name_ioff);
449 TRACE_SYMTAB( " ignore -- valu=0: %s\n", sym_name);
450 if (sym_name) ML_(dinfo_free)(sym_name);
451 }
452 return False;
453 }
454
455 /* If it's apparently in a GOT or PLT, it's really a reference to a
456 symbol defined elsewhere, so ignore it. */
457 if (di->got_present
458 && di->got_size > 0
459 && (*sym_avmas_out).main >= di->got_avma
460 && (*sym_avmas_out).main < di->got_avma + di->got_size) {
461 if (TRACE_SYMTAB_ENABLED) {
462 HChar* sym_name = ML_(img_strdup)(escn_strtab->img,
463 "di.gesi.4", sym_name_ioff);
464 TRACE_SYMTAB(" ignore -- in GOT: %s\n", sym_name);
465 if (sym_name) ML_(dinfo_free)(sym_name);
466 }
467 return False;
468 }
469 if (di->plt_present
470 && di->plt_size > 0
471 && (*sym_avmas_out).main >= di->plt_avma
472 && (*sym_avmas_out).main < di->plt_avma + di->plt_size) {
473 if (TRACE_SYMTAB_ENABLED) {
474 HChar* sym_name = ML_(img_strdup)(escn_strtab->img,
475 "di.gesi.5", sym_name_ioff);
476 TRACE_SYMTAB(" ignore -- in PLT: %s\n", sym_name);
477 if (sym_name) ML_(dinfo_free)(sym_name);
478 }
479 return False;
480 }
481
482 /* ppc64be-linux nasty hack: if the symbol is in an .opd section,
483 then really what we have is the address of a function
484 descriptor. So use the first word of that as the function's
485 text.
486
487 See thread starting at
488 http://gcc.gnu.org/ml/gcc-patches/2004-08/msg00557.html
489 */
490 # if defined(VGP_ppc64be_linux)
491 /* Host and guest may have different Endianess, used by BE only */
492 is_in_opd = False;
493 # endif
494
495 if (di->opd_present
496 && di->opd_size > 0
497 && (*sym_avmas_out).main >= di->opd_avma
498 && (*sym_avmas_out).main < di->opd_avma + di->opd_size) {
499 # if !defined(VGP_ppc64be_linux)
500 if (TRACE_SYMTAB_ENABLED) {
501 HChar* sym_name = ML_(img_strdup)(escn_strtab->img,
502 "di.gesi.6", sym_name_ioff);
503 TRACE_SYMTAB(" ignore -- in OPD: %s\n", sym_name);
504 if (sym_name) ML_(dinfo_free)(sym_name);
505 }
506 return False;
507 # else
508 Int offset_in_opd;
509 Bool details = 1||False;
510
511 if (details)
512 TRACE_SYMTAB("opdXXX: opd_bias %p, sym_svma_out %p\n",
513 (void*)(opd_bias), (void*)(*sym_avmas_out).main);
514
515 if (!VG_IS_8_ALIGNED((*sym_avmas_out).main)) {
516 if (TRACE_SYMTAB_ENABLED) {
517 HChar* sym_name = ML_(img_strdup)(escn_strtab->img,
518 "di.gesi.6a", sym_name_ioff);
519 TRACE_SYMTAB(" ignore -- not 8-aligned: %s\n", sym_name);
520 if (sym_name) ML_(dinfo_free)(sym_name);
521 }
522 return False;
523 }
524
525 /* (*sym_avmas_out).main is a avma pointing into the .opd section. We
526 know the vma of the opd section start, so we can figure out
527 how far into the opd section this is. */
528
529 offset_in_opd = (Addr)(*sym_avmas_out).main - (Addr)(di->opd_avma);
530 if (offset_in_opd < 0 || offset_in_opd >= di->opd_size) {
531 if (TRACE_SYMTAB_ENABLED) {
532 HChar* sym_name = ML_(img_strdup)(escn_strtab->img,
533 "di.gesi.6a", sym_name_ioff);
534 TRACE_SYMTAB(" ignore -- invalid OPD offset: %s\n", sym_name);
535 if (sym_name) ML_(dinfo_free)(sym_name);
536 }
537 return False;
538 }
539
540 /* Now we want to know what's at that offset in the .opd
541 section. We can't look in the running image since it won't
542 necessarily have been mapped. But we can consult the oimage.
543 opd_img is the start address of the .opd in the oimage.
544 Hence: */
545
546 ULong fn_descr[2]; /* is actually 3 words, but we need only 2 */
547 if (!ML_(img_valid)(escn_opd->img, escn_opd->ioff + offset_in_opd,
548 sizeof(fn_descr))) {
549 if (TRACE_SYMTAB_ENABLED) {
550 HChar* sym_name = ML_(img_strdup)(escn_strtab->img,
551 "di.gesi.6b", sym_name_ioff);
552 TRACE_SYMTAB(" ignore -- invalid OPD fn_descr offset: %s\n",
553 sym_name);
554 if (sym_name) ML_(dinfo_free)(sym_name);
555
556 }
557 return False;
558 }
559
560 /* This can't fail now, because we just checked the offset
561 above. */
562 ML_(img_get)(&fn_descr[0], escn_opd->img,
563 escn_opd->ioff + offset_in_opd, sizeof(fn_descr));
564
565 if (details)
566 TRACE_SYMTAB("opdXXY: offset %d, fn_descr %p\n",
567 offset_in_opd, fn_descr);
568 if (details)
569 TRACE_SYMTAB("opdXXZ: *fn_descr %p\n", (void*)(fn_descr[0]));
570
571 /* opd_bias is the what we have to add to SVMAs found in .opd to
572 get plausible .text AVMAs for the entry point, and .data
573 AVMAs (presumably) for the TOC locations. We use the caller
574 supplied value (which is di->text_bias) for both of these.
575 Not sure why that is correct - it seems to work, and sounds
576 OK for fn_descr[0], but surely we need to use the data bias
577 and not the text bias for fn_descr[1] ? Oh Well.
578 */
579 (*sym_avmas_out).main = fn_descr[0] + opd_bias;
580 SET_TOCPTR_AVMA(*sym_avmas_out, fn_descr[1] + opd_bias);
581 *from_opd_out = True;
582 is_in_opd = True;
583
584 /* Do a final sanity check: if the symbol falls outside the
585 DebugInfo's mapped range, ignore it. Since (*sym_avmas_out).main has
586 been updated, that can be achieved simply by falling through
587 to the test below. */
588
589 # endif /* ppc64-linux nasty hack */
590 }
591
592 /* Here's yet another ppc64-linux hack. Get rid of leading dot if
593 the symbol is outside .opd. */
594 # if defined(VGP_ppc64be_linux)
595 if (di->opd_size > 0
596 && !is_in_opd
597 && *sym_name_out_ioff != DiOffT_INVALID
598 && ML_(img_get_UChar)(escn_strtab->img, *sym_name_out_ioff) == '.') {
599 vg_assert(!(*from_opd_out));
600 (*sym_name_out_ioff)++;
601 }
602 # endif
603
604 /* If no part of the symbol falls within the mapped range,
605 ignore it. */
606
607 in_text
608 = di->text_present
609 && di->text_size > 0
610 && !((*sym_avmas_out).main + *sym_size_out <= di->text_avma
611 || (*sym_avmas_out).main >= di->text_avma + di->text_size);
612
613 in_data
614 = di->data_present
615 && di->data_size > 0
616 && !((*sym_avmas_out).main + *sym_size_out <= di->data_avma
617 || (*sym_avmas_out).main >= di->data_avma + di->data_size);
618
619 in_sdata
620 = di->sdata_present
621 && di->sdata_size > 0
622 && !((*sym_avmas_out).main + *sym_size_out <= di->sdata_avma
623 || (*sym_avmas_out).main >= di->sdata_avma + di->sdata_size);
624
625 in_rodata
626 = di->rodata_present
627 && di->rodata_size > 0
628 && !((*sym_avmas_out).main + *sym_size_out <= di->rodata_avma
629 || (*sym_avmas_out).main >= di->rodata_avma + di->rodata_size);
630
631 in_bss
632 = di->bss_present
633 && di->bss_size > 0
634 && !((*sym_avmas_out).main + *sym_size_out <= di->bss_avma
635 || (*sym_avmas_out).main >= di->bss_avma + di->bss_size);
636
637 in_sbss
638 = di->sbss_present
639 && di->sbss_size > 0
640 && !((*sym_avmas_out).main + *sym_size_out <= di->sbss_avma
641 || (*sym_avmas_out).main >= di->sbss_avma + di->sbss_size);
642
643
644 if (*is_text_out) {
645 /* This used to reject any symbol falling outside the text
646 segment ("if (!in_text) ..."). Now it is relaxed slightly,
647 to reject only symbols which fall outside the area mapped
648 r-x. This is in accordance with r7427. See
649 "Comment_Regarding_Text_Range_Checks" in storage.c for
650 background. */
651 Bool in_rx;
652 vg_assert(di->fsm.have_rx_map);
653 /* This could actually wrap around and cause
654 ML_(find_rx_mapping) to assert. But that seems so unlikely,
655 let's wait for it to happen before fixing it. */
656 in_rx = (ML_(find_rx_mapping)(
657 di,
658 (*sym_avmas_out).main,
659 (*sym_avmas_out).main + *sym_size_out) != NULL);
660 if (in_text)
661 vg_assert(in_rx);
662 if (!in_rx) {
663 TRACE_SYMTAB(
664 "ignore -- %#lx .. %#lx outside .text svma range %#lx .. %#lx\n",
665 (*sym_avmas_out).main, (*sym_avmas_out).main + *sym_size_out,
666 di->text_avma,
667 di->text_avma + di->text_size);
668 return False;
669 }
670 } else {
671 if (!(in_data || in_sdata || in_rodata || in_bss || in_sbss)) {
672 TRACE_SYMTAB(
673 "ignore -- %#lx .. %#lx outside .data / .sdata / .rodata "
674 "/ .bss / .sbss svma ranges\n",
675 (*sym_avmas_out).main, (*sym_avmas_out).main + *sym_size_out);
676 return False;
677 }
678 }
679
680 # if defined(VGP_ppc64be_linux)
681 if (di->opd_present && di->opd_size > 0) {
682 vg_assert((*sym_avmas_out).main + *sym_size_out <= di->opd_avma
683 || (*sym_avmas_out).main >= di->opd_avma + di->opd_size);
684 }
685 #endif
686
687 # if defined(VGP_ppc64le_linux)
688 /* PPC64 LE ABI uses three bits in the st_other field to indicate the number
689 * of instructions between the function's global and local entry points. An
690 * offset of 0 indicates that there is one entry point. The value must be:
691 *
692 * 0 - one entry point, local and global are the same
693 * 1 - reserved
694 * 2 - local entry point is one instruction after the global entry point
695 * 3 - local entry point is two instructions after the global entry point
696 * 4 - local entry point is four instructions after the global entry point
697 * 5 - local entry point is eight instructions after the global entry point
698 * 6 - local entry point is sixteen instructions after the global entry point
699 * 7 - reserved
700 *
701 * Extract the three bit field from the other field is done by:
702 * (other_field & STO_PPC64_LOCAL_MASK) >> STO_PPC_LOCAL_BIT
703 *
704 * where the #define values are given in include/elf/powerpc.h file for
705 * the PPC binutils.
706 *
707 * conversion of the three bit field to bytes is given by
708 *
709 * ((1 << bit_field) >> 2) << 2
710 */
711
712 #define STO_PPC64_LOCAL_BIT 5
713 #define STO_PPC64_LOCAL_MASK (7 << STO_PPC64_LOCAL_BIT)
714 {
715 unsigned int bit_field, dist_to_local_entry;
716 /* extract the other filed */
717 bit_field = (sym->st_other & STO_PPC64_LOCAL_MASK) >> STO_PPC64_LOCAL_BIT;
718
719 if ((bit_field > 0) && (bit_field < 7)) {
720 /* store the local entry point address */
721 dist_to_local_entry = ((1 << bit_field) >> 2) << 2;
722 SET_LOCAL_EP_AVMA(*sym_avmas_out,
723 (*sym_avmas_out).main + dist_to_local_entry);
724
725 if (TRACE_SYMTAB_ENABLED) {
726 HChar* sym_name = ML_(img_strdup)(escn_strtab->img,
727 "di.gesi.5", sym_name_ioff);
728 VG_(printf)("Local entry point: %s at %#010x\n",
729 sym_name,
730 (unsigned int)GET_LOCAL_EP_AVMA(*sym_avmas_out));
731 }
732 }
733 }
734 # endif
735
736 /* Acquire! */
737 return True;
738 }
739
740
741 /* Read an ELF symbol table (normal or dynamic). This one is for the
742 "normal" case ({x86,amd64,ppc32,arm,mips32,mips64, ppc64le}-linux). */
743 static
744 __attribute__((unused)) /* not referred to on all targets */
read_elf_symtab__normal(struct _DebugInfo * di,const HChar * tab_name,DiSlice * escn_symtab,DiSlice * escn_strtab,DiSlice * escn_opd,Bool symtab_in_debug)745 void read_elf_symtab__normal(
746 struct _DebugInfo* di, const HChar* tab_name,
747 DiSlice* escn_symtab,
748 DiSlice* escn_strtab,
749 DiSlice* escn_opd, /* ppc64be-linux only */
750 Bool symtab_in_debug
751 )
752 {
753 if (escn_strtab->img == NULL || escn_symtab->img == NULL) {
754 HChar buf[VG_(strlen)(tab_name) + 40];
755 VG_(sprintf)(buf, " object doesn't have a %s", tab_name);
756 ML_(symerr)(di, False, buf);
757 return;
758 }
759
760 TRACE_SYMTAB("\n--- Reading (ELF, standard) %s (%llu entries) ---\n",
761 tab_name, escn_symtab->szB/sizeof(ElfXX_Sym) );
762
763 /* Perhaps should start at i = 1; ELF docs suggest that entry
764 0 always denotes 'unknown symbol'. */
765 Word i;
766 for (i = 1; i < (Word)(escn_symtab->szB/sizeof(ElfXX_Sym)); i++) {
767 ElfXX_Sym sym;
768 ML_(img_get)(&sym, escn_symtab->img,
769 escn_symtab->ioff + i * sizeof(ElfXX_Sym), sizeof(sym));
770 DiOffT sym_name = escn_strtab->ioff + sym.st_name;
771 Addr sym_svma = sym.st_value;
772
773 if (di->trace_symtab)
774 show_raw_elf_symbol(escn_strtab->img, i,
775 &sym, sym_name, sym_svma, False);
776
777 SymAVMAs sym_avmas_really;
778 Int sym_size = 0;
779 Bool from_opd = False, is_text = False, is_ifunc = False;
780 DiOffT sym_name_really = DiOffT_INVALID;
781 sym_avmas_really.main = 0;
782 SET_TOCPTR_AVMA(sym_avmas_really, 0);
783 SET_LOCAL_EP_AVMA(sym_avmas_really, 0);
784 if (get_elf_symbol_info(di, &sym, sym_name, escn_strtab,
785 sym_svma, symtab_in_debug,
786 escn_opd, di->text_bias,
787 &sym_name_really,
788 &sym_avmas_really,
789 &sym_size,
790 &from_opd, &is_text, &is_ifunc)) {
791
792 DiSym disym;
793 VG_(memset)(&disym, 0, sizeof(disym));
794 HChar* cstr = ML_(img_strdup)(escn_strtab->img,
795 "di.res__n.1", sym_name_really);
796 disym.avmas = sym_avmas_really;
797 disym.pri_name = ML_(addStr) ( di, cstr, -1 );
798 disym.sec_names = NULL;
799 disym.size = sym_size;
800 disym.isText = is_text;
801 disym.isIFunc = is_ifunc;
802 if (cstr) { ML_(dinfo_free)(cstr); cstr = NULL; }
803 vg_assert(disym.pri_name);
804 vg_assert(GET_TOCPTR_AVMA(disym.avmas) == 0);
805 /* has no role except on ppc64be-linux */
806 ML_(addSym) ( di, &disym );
807
808 if (TRACE_SYMTAB_ENABLED) {
809 TRACE_SYMTAB(" rec(%c) [%4ld]: "
810 " val %#010lx, sz %4d %s\n",
811 is_text ? 't' : 'd',
812 i,
813 disym.avmas.main,
814 (Int)disym.size,
815 disym.pri_name
816 );
817 if (GET_LOCAL_EP_AVMA(disym.avmas) != 0) {
818 TRACE_SYMTAB(" local entry point %#010lx\n",
819 GET_LOCAL_EP_AVMA(disym.avmas));
820 }
821 }
822
823 }
824 }
825 }
826
827
828 /* Read an ELF symbol table (normal or dynamic). This one is for
829 ppc64be-linux, which requires special treatment. */
830
831 typedef
832 struct {
833 Addr addr;
834 DiOffT name;
835 /* We have to store also the DiImage* so as to give context for
836 |name|. This is not part of the key (in terms of lookup) but
837 there's no easy other way to do this. Ugly. */
838 DiImage* img;
839 }
840 TempSymKey;
841
842 typedef
843 struct {
844 TempSymKey key;
845 Addr tocptr;
846 Int size;
847 Bool from_opd;
848 Bool is_text;
849 Bool is_ifunc;
850 }
851 TempSym;
852
cmp_TempSymKey(const TempSymKey * key1,const TempSym * elem2)853 static Word cmp_TempSymKey ( const TempSymKey* key1, const TempSym* elem2 )
854 {
855 /* Stay sane ... */
856 vg_assert(key1->img == elem2->key.img);
857 vg_assert(key1->img != NULL);
858 if (key1->addr < elem2->key.addr) return -1;
859 if (key1->addr > elem2->key.addr) return 1;
860 vg_assert(key1->name != DiOffT_INVALID);
861 vg_assert(elem2->key.name != DiOffT_INVALID);
862 return (Word)ML_(img_strcmp)(key1->img, key1->name, elem2->key.name);
863 }
864
865 static
866 __attribute__((unused)) /* not referred to on all targets */
read_elf_symtab__ppc64be_linux(struct _DebugInfo * di,const HChar * tab_name,DiSlice * escn_symtab,DiSlice * escn_strtab,DiSlice * escn_opd,Bool symtab_in_debug)867 void read_elf_symtab__ppc64be_linux(
868 struct _DebugInfo* di, const HChar* tab_name,
869 DiSlice* escn_symtab,
870 DiSlice* escn_strtab,
871 DiSlice* escn_opd, /* ppc64be-linux only */
872 Bool symtab_in_debug
873 )
874 {
875 Word i;
876 Int old_size;
877 Bool modify_size, modify_tocptr;
878 OSet *oset;
879 TempSymKey key;
880 TempSym *elem;
881 TempSym *prev;
882
883 if (escn_strtab->img == NULL || escn_symtab->img == NULL) {
884 HChar buf[VG_(strlen)(tab_name) + 40];
885 VG_(sprintf)(buf, " object doesn't have a %s", tab_name);
886 ML_(symerr)(di, False, buf);
887 return;
888 }
889
890 TRACE_SYMTAB("\n--- Reading (ELF, ppc64be-linux) %s (%llu entries) ---\n",
891 tab_name, escn_symtab->szB/sizeof(ElfXX_Sym) );
892
893 oset = VG_(OSetGen_Create)( offsetof(TempSym,key),
894 (OSetCmp_t)cmp_TempSymKey,
895 ML_(dinfo_zalloc), "di.respl.1",
896 ML_(dinfo_free) );
897
898 /* Perhaps should start at i = 1; ELF docs suggest that entry
899 0 always denotes 'unknown symbol'. */
900 for (i = 1; i < (Word)(escn_symtab->szB/sizeof(ElfXX_Sym)); i++) {
901 ElfXX_Sym sym;
902 ML_(img_get)(&sym, escn_symtab->img,
903 escn_symtab->ioff + i * sizeof(ElfXX_Sym), sizeof(sym));
904 DiOffT sym_name = escn_strtab->ioff + sym.st_name;
905 Addr sym_svma = sym.st_value;
906
907 if (di->trace_symtab)
908 show_raw_elf_symbol(escn_strtab->img, i,
909 &sym, sym_name, sym_svma, True);
910
911 SymAVMAs sym_avmas_really;
912 Int sym_size = 0;
913 Bool from_opd = False, is_text = False, is_ifunc = False;
914 DiOffT sym_name_really = DiOffT_INVALID;
915 DiSym disym;
916 VG_(memset)(&disym, 0, sizeof(disym));
917 sym_avmas_really.main = 0;
918 SET_TOCPTR_AVMA(sym_avmas_really, 0);
919 SET_LOCAL_EP_AVMA(sym_avmas_really, 0);
920 if (get_elf_symbol_info(di, &sym, sym_name, escn_strtab,
921 sym_svma, symtab_in_debug,
922 escn_opd, di->text_bias,
923 &sym_name_really,
924 &sym_avmas_really,
925 &sym_size,
926 &from_opd, &is_text, &is_ifunc)) {
927
928 /* Check if we've seen this (name,addr) key before. */
929 key.addr = sym_avmas_really.main;
930 key.name = sym_name_really;
931 key.img = escn_strtab->img;
932 prev = VG_(OSetGen_Lookup)( oset, &key );
933
934 if (prev) {
935
936 /* Seen it before. Fold in whatever new info we can. */
937 modify_size = False;
938 modify_tocptr = False;
939 old_size = 0;
940
941 if (prev->from_opd && !from_opd
942 && (prev->size == 24 || prev->size == 16)
943 && sym_size != prev->size) {
944 /* Existing one is an opd-redirect, with a bogus size,
945 so the only useful new fact we have is the real size
946 of the symbol. */
947 modify_size = True;
948 old_size = prev->size;
949 prev->size = sym_size;
950 }
951 else
952 if (!prev->from_opd && from_opd
953 && (sym_size == 24 || sym_size == 16)) {
954 /* Existing one is non-opd, new one is opd. What we
955 can acquire from the new one is the TOC ptr to be
956 used. Since the existing sym is non-toc, it
957 shouldn't currently have an known TOC ptr. */
958 vg_assert(prev->tocptr == 0);
959 modify_tocptr = True;
960 prev->tocptr = GET_TOCPTR_AVMA(sym_avmas_really);
961 }
962 else {
963 /* ignore. can we do better here? */
964 }
965
966 /* Only one or the other is possible (I think) */
967 vg_assert(!(modify_size && modify_tocptr));
968
969 if (modify_size && di->trace_symtab) {
970 VG_(printf)(" modify (old sz %4d) "
971 " val %#010lx, toc %#010lx, sz %4d %llu\n",
972 old_size,
973 prev->key.addr,
974 prev->tocptr,
975 prev->size,
976 prev->key.name
977 );
978 }
979 if (modify_tocptr && di->trace_symtab) {
980 VG_(printf)(" modify (upd tocptr) "
981 " val %#010lx, toc %#010lx, sz %4d %llu\n",
982 prev->key.addr,
983 prev->tocptr,
984 prev->size,
985 prev->key.name
986 );
987 }
988
989 } else {
990
991 /* A new (name,addr) key. Add and continue. */
992 elem = VG_(OSetGen_AllocNode)(oset, sizeof(TempSym));
993 elem->key = key;
994 elem->tocptr = GET_TOCPTR_AVMA(sym_avmas_really);
995 elem->size = sym_size;
996 elem->from_opd = from_opd;
997 elem->is_text = is_text;
998 elem->is_ifunc = is_ifunc;
999 VG_(OSetGen_Insert)(oset, elem);
1000 if (di->trace_symtab) {
1001 HChar* str = ML_(img_strdup)(escn_strtab->img, "di.respl.2",
1002 elem->key.name);
1003 VG_(printf)(" to-oset [%4ld]: "
1004 " val %#010lx, toc %#010lx, sz %4d %s\n",
1005 i,
1006 elem->key.addr,
1007 elem->tocptr,
1008 (Int) elem->size,
1009 str
1010 );
1011 if (str) ML_(dinfo_free)(str);
1012 }
1013
1014 }
1015 }
1016 }
1017
1018 /* All the syms that matter are in the oset. Now pull them out,
1019 build a "standard" symbol table, and nuke the oset. */
1020
1021 i = 0;
1022 VG_(OSetGen_ResetIter)( oset );
1023
1024 while ( (elem = VG_(OSetGen_Next)(oset)) ) {
1025 DiSym disym;
1026 VG_(memset)(&disym, 0, sizeof(disym));
1027 HChar* cstr = ML_(img_strdup)(escn_strtab->img,
1028 "di.res__ppc64.1", elem->key.name);
1029 disym.avmas.main = elem->key.addr;
1030 SET_TOCPTR_AVMA(disym.avmas, elem->tocptr);
1031 SET_LOCAL_EP_AVMA(disym.avmas, 0); // ppc64be does not use local_ep.
1032 disym.pri_name = ML_(addStr) ( di, cstr, -1 );
1033 disym.sec_names = NULL;
1034 disym.size = elem->size;
1035 disym.isText = elem->is_text;
1036 disym.isIFunc = elem->is_ifunc;
1037 if (cstr) { ML_(dinfo_free)(cstr); cstr = NULL; }
1038 vg_assert(disym.pri_name != NULL);
1039
1040 ML_(addSym) ( di, &disym );
1041 if (di->trace_symtab) {
1042 VG_(printf)(" rec(%c) [%4ld]: "
1043 " val %#010lx, toc %#010lx, sz %4d %s\n",
1044 disym.isText ? 't' : 'd',
1045 i,
1046 disym.avmas.main,
1047 GET_TOCPTR_AVMA(disym.avmas),
1048 (Int) disym.size,
1049 disym.pri_name
1050 );
1051 }
1052 i++;
1053 }
1054
1055 VG_(OSetGen_Destroy)( oset );
1056 }
1057
1058
1059 /*
1060 * Look for a build-id in an ELF image. The build-id specification
1061 * can be found here:
1062 *
1063 * http://fedoraproject.org/wiki/RolandMcGrath/BuildID
1064 *
1065 * Returned string must be freed by the caller.
1066 */
1067 static
find_buildid(DiImage * img,Bool rel_ok,Bool search_shdrs)1068 HChar* find_buildid(DiImage* img, Bool rel_ok, Bool search_shdrs)
1069 {
1070 HChar* buildid = NULL;
1071
1072 # ifdef NT_GNU_BUILD_ID
1073 if (is_elf_object_file_by_DiImage(img, rel_ok)) {
1074 Word i;
1075
1076 ElfXX_Ehdr ehdr;
1077 ML_(img_get)(&ehdr, img, 0, sizeof(ehdr));
1078 for (i = 0; i < ehdr.e_phnum; i++) {
1079 ElfXX_Phdr phdr;
1080 ML_(img_get)(&phdr, img,
1081 ehdr.e_phoff + i * ehdr.e_phentsize, sizeof(phdr));
1082
1083 if (phdr.p_type == PT_NOTE) {
1084 ElfXX_Off note_ioff = phdr.p_offset;
1085
1086 while (note_ioff < phdr.p_offset + phdr.p_filesz) {
1087 ElfXX_Nhdr note;
1088 ML_(img_get)(¬e, img, (DiOffT)note_ioff, sizeof(note));
1089 DiOffT name_ioff = note_ioff + sizeof(ElfXX_Nhdr);
1090 DiOffT desc_ioff = name_ioff + ((note.n_namesz + 3) & ~3);
1091 if (ML_(img_strcmp_c)(img, name_ioff, ELF_NOTE_GNU) == 0
1092 && note.n_type == NT_GNU_BUILD_ID) {
1093 buildid = ML_(dinfo_zalloc)("di.fbi.1",
1094 note.n_descsz * 2 + 1);
1095 Word j;
1096 for (j = 0; j < note.n_descsz; j++) {
1097 UChar desc_j = ML_(img_get_UChar)(img, desc_ioff + j);
1098 VG_(sprintf)(buildid + VG_(strlen)(buildid),
1099 "%02x", (UInt)desc_j);
1100 }
1101 }
1102
1103 note_ioff = note_ioff + sizeof(ElfXX_Nhdr)
1104 + ((note.n_namesz + 3) & ~3)
1105 + ((note.n_descsz + 3) & ~3);
1106 }
1107 }
1108 }
1109
1110 /* Normally we would only search shdrs for ET_REL files, but when
1111 we search for a separate .debug file phdrs might not be there
1112 (they are never loaded) or have been corrupted, so try again
1113 against shdrs. */
1114 if (buildid || (!rel_ok && !search_shdrs))
1115 return buildid;
1116
1117 for (i = 0; i < ehdr.e_shnum; i++) {
1118 ElfXX_Shdr shdr;
1119 ML_(img_get)(&shdr, img,
1120 ehdr.e_shoff + i * ehdr.e_shentsize, sizeof(shdr));
1121
1122 if (shdr.sh_type == SHT_NOTE) {
1123 ElfXX_Off note_ioff = shdr.sh_offset;
1124
1125 while (note_ioff < shdr.sh_offset + shdr.sh_size) {
1126 ElfXX_Nhdr note;
1127 ML_(img_get)(¬e, img, (DiOffT)note_ioff, sizeof(note));
1128 DiOffT name_ioff = note_ioff + sizeof(ElfXX_Nhdr);
1129 DiOffT desc_ioff = name_ioff + ((note.n_namesz + 3) & ~3);
1130
1131 if (ML_(img_strcmp_c)(img, name_ioff, ELF_NOTE_GNU) == 0
1132 && note.n_type == NT_GNU_BUILD_ID) {
1133 buildid = ML_(dinfo_zalloc)("di.fbi.2",
1134 note.n_descsz * 2 + 1);
1135 Word j;
1136 for (j = 0; j < note.n_descsz; j++) {
1137 UChar desc_j = ML_(img_get_UChar)(img, desc_ioff + j);
1138 VG_(sprintf)(buildid + VG_(strlen)(buildid),
1139 "%02x", (UInt)desc_j);
1140 }
1141 }
1142
1143 note_ioff = note_ioff + sizeof(ElfXX_Nhdr)
1144 + ((note.n_namesz + 3) & ~3)
1145 + ((note.n_descsz + 3) & ~3);
1146 }
1147 }
1148 }
1149 }
1150 # endif /* def NT_GNU_BUILD_ID */
1151
1152 return buildid;
1153 }
1154
1155
1156 /* Try and open a separate debug file, ignoring any where the CRC does
1157 not match the value from the main object file. Returned DiImage
1158 must be discarded by the caller.
1159
1160 If |serverAddr| is NULL, |name| is expected to be a fully qualified
1161 (absolute) path to the file in the local filesystem. If
1162 |serverAddr| is non-NULL, it is expected to be an IPv4 and port
1163 spec of the form "d.d.d.d:d" or "d.d.d.d", and |name| is expected
1164 to be a plain filename (no path components at all).
1165 */
1166 static
open_debug_file(const HChar * name,const HChar * buildid,UInt crc,Bool rel_ok,const HChar * serverAddr)1167 DiImage* open_debug_file( const HChar* name, const HChar* buildid, UInt crc,
1168 Bool rel_ok, const HChar* serverAddr )
1169 {
1170 DiImage* dimg
1171 = serverAddr ? ML_(img_from_di_server)(name, serverAddr)
1172 : ML_(img_from_local_file)(name);
1173 if (dimg == NULL)
1174 return NULL;
1175
1176 if (VG_(clo_verbosity) > 1) {
1177 if (serverAddr)
1178 VG_(message)(Vg_DebugMsg, " Considering %s on server %s ..\n",
1179 name, serverAddr);
1180 else
1181 VG_(message)(Vg_DebugMsg, " Considering %s ..\n", name);
1182 }
1183
1184 /* We will always check the crc if we have one (altfiles don't have one)
1185 for now because we might be opening the main file again by any other
1186 name, and that obviously also has the same buildid. More efficient
1187 would be an fstat bases check or a check that the file actually
1188 contains .debug* sections. */
1189 if (buildid && crc == 0) {
1190 HChar* debug_buildid = find_buildid(dimg, rel_ok, True);
1191 if (debug_buildid == NULL || VG_(strcmp)(buildid, debug_buildid) != 0) {
1192 ML_(img_done)(dimg);
1193 if (VG_(clo_verbosity) > 1)
1194 VG_(message)(Vg_DebugMsg,
1195 " .. build-id mismatch (found %s wanted %s)\n",
1196 debug_buildid, buildid);
1197 ML_(dinfo_free)(debug_buildid);
1198 return NULL;
1199 }
1200 ML_(dinfo_free)(debug_buildid);
1201 if (VG_(clo_verbosity) > 1)
1202 VG_(message)(Vg_DebugMsg, " .. build-id is valid\n");
1203 } else {
1204 UInt calccrc = ML_(img_calc_gnu_debuglink_crc32)(dimg);
1205 if (calccrc != crc) {
1206 ML_(img_done)(dimg);
1207 if (VG_(clo_verbosity) > 1)
1208 VG_(message)(Vg_DebugMsg,
1209 " .. CRC mismatch (computed %08x wanted %08x)\n", calccrc, crc);
1210 return NULL;
1211 }
1212
1213 if (VG_(clo_verbosity) > 1)
1214 VG_(message)(Vg_DebugMsg, " .. CRC is valid\n");
1215 }
1216
1217 return dimg;
1218 }
1219
1220
1221 /* Try to find a separate debug file for a given object file. If
1222 found, return its DiImage, which should be freed by the caller. If
1223 |buildid| is non-NULL, then a debug object matching it is
1224 acceptable. If |buildid| is NULL or doesn't specify a findable
1225 debug object, then we look in various places to find a file with
1226 the specified CRC. And if that doesn't work out then we give
1227 up. */
1228 static
find_debug_file(struct _DebugInfo * di,const HChar * objpath,const HChar * buildid,const HChar * debugname,UInt crc,Bool rel_ok)1229 DiImage* find_debug_file( struct _DebugInfo* di,
1230 const HChar* objpath, const HChar* buildid,
1231 const HChar* debugname, UInt crc, Bool rel_ok )
1232 {
1233 const HChar* extrapath = VG_(clo_extra_debuginfo_path);
1234 const HChar* serverpath = VG_(clo_debuginfo_server);
1235
1236 DiImage* dimg = NULL; /* the img that we found */
1237 HChar* debugpath = NULL; /* where we found it */
1238
1239 if (buildid != NULL) {
1240 debugpath = ML_(dinfo_zalloc)("di.fdf.1",
1241 VG_(strlen)(buildid) + 33);
1242
1243 VG_(sprintf)(debugpath, "/usr/lib/debug/.build-id/%c%c/%s.debug",
1244 buildid[0], buildid[1], buildid + 2);
1245
1246 dimg = open_debug_file(debugpath, buildid, 0, rel_ok, NULL);
1247 if (!dimg) {
1248 ML_(dinfo_free)(debugpath);
1249 debugpath = NULL;
1250 }
1251 }
1252
1253 if (dimg == NULL && debugname != NULL) {
1254 HChar *objdir = ML_(dinfo_strdup)("di.fdf.2", objpath);
1255 HChar *objdirptr;
1256
1257 if ((objdirptr = VG_(strrchr)(objdir, '/')) != NULL)
1258 *objdirptr = '\0';
1259
1260 debugpath = ML_(dinfo_zalloc)(
1261 "di.fdf.3",
1262 VG_(strlen)(objdir) + VG_(strlen)(debugname) + 64
1263 + (extrapath ? VG_(strlen)(extrapath) : 0)
1264 + (serverpath ? VG_(strlen)(serverpath) : 0));
1265
1266 VG_(sprintf)(debugpath, "%s/%s", objdir, debugname);
1267 dimg = open_debug_file(debugpath, buildid, crc, rel_ok, NULL);
1268 if (dimg != NULL) goto dimg_ok;
1269
1270 VG_(sprintf)(debugpath, "%s/.debug/%s", objdir, debugname);
1271 dimg = open_debug_file(debugpath, buildid, crc, rel_ok, NULL);
1272 if (dimg != NULL) goto dimg_ok;
1273
1274 VG_(sprintf)(debugpath, "/usr/lib/debug%s/%s", objdir, debugname);
1275 dimg = open_debug_file(debugpath, buildid, crc, rel_ok, NULL);
1276 if (dimg != NULL) goto dimg_ok;
1277
1278 if (extrapath) {
1279 VG_(sprintf)(debugpath, "%s%s/%s", extrapath,
1280 objdir, debugname);
1281 dimg = open_debug_file(debugpath, buildid, crc, rel_ok, NULL);
1282 if (dimg != NULL) goto dimg_ok;
1283 }
1284
1285 if (serverpath) {
1286 /* When looking on the debuginfo server, always just pass the
1287 basename. */
1288 const HChar* basename = debugname;
1289 if (VG_(strstr)(basename, "/") != NULL) {
1290 basename = VG_(strrchr)(basename, '/') + 1;
1291 }
1292 VG_(sprintf)(debugpath, "%s on %s", basename, serverpath);
1293 dimg = open_debug_file(basename, buildid, crc, rel_ok, serverpath);
1294 if (dimg) goto dimg_ok;
1295 }
1296
1297 dimg_ok:
1298
1299 ML_(dinfo_free)(objdir);
1300 }
1301
1302 if (dimg != NULL) {
1303 vg_assert(debugpath);
1304 TRACE_SYMTAB("\n");
1305 TRACE_SYMTAB("------ Found a debuginfo file: %s\n", debugpath);
1306
1307 /* Only set once, we might be called again for opening the altfile. */
1308 if (di->fsm.dbgname == NULL)
1309 di->fsm.dbgname = ML_(dinfo_strdup)("di.fdf.4", debugpath);
1310 }
1311
1312 if (debugpath)
1313 ML_(dinfo_free)(debugpath);
1314
1315 return dimg;
1316 }
1317
1318
1319 /* Try to find a separate debug file for a given object file, in a
1320 hacky and dangerous way: check only the --extra-debuginfo-path and
1321 the --debuginfo-server. And don't do a consistency check. */
1322 static
find_debug_file_ad_hoc(const DebugInfo * di,const HChar * objpath)1323 DiImage* find_debug_file_ad_hoc( const DebugInfo* di,
1324 const HChar* objpath )
1325 {
1326 const HChar* extrapath = VG_(clo_extra_debuginfo_path);
1327 const HChar* serverpath = VG_(clo_debuginfo_server);
1328
1329 DiImage* dimg = NULL; /* the img that we found */
1330 HChar* debugpath = NULL; /* where we found it */
1331
1332 HChar *objdir = ML_(dinfo_strdup)("di.fdfah.1", objpath);
1333 HChar *objdirptr;
1334
1335 if ((objdirptr = VG_(strrchr)(objdir, '/')) != NULL)
1336 *objdirptr = '\0';
1337
1338 debugpath = ML_(dinfo_zalloc)(
1339 "di.fdfah.3",
1340 VG_(strlen)(objdir) + 64
1341 + (extrapath ? VG_(strlen)(extrapath) : 0)
1342 + (serverpath ? VG_(strlen)(serverpath) : 0));
1343
1344 if (extrapath) {
1345 VG_(sprintf)(debugpath, "%s/%s", extrapath, objpath);
1346 dimg = ML_(img_from_local_file)(debugpath);
1347 if (dimg != NULL) {
1348 if (VG_(clo_verbosity) > 1) {
1349 VG_(message)(Vg_DebugMsg, " Using (POSSIBLY MISMATCHED) %s\n",
1350 debugpath);
1351 }
1352 goto dimg_ok;
1353 }
1354 }
1355 if (serverpath) {
1356 /* When looking on the debuginfo server, always just pass the
1357 basename. */
1358 const HChar* basename = objpath;
1359 if (VG_(strstr)(basename, "/") != NULL) {
1360 basename = VG_(strrchr)(basename, '/') + 1;
1361 }
1362 VG_(sprintf)(debugpath, "%s on %s", basename, serverpath);
1363 dimg = ML_(img_from_di_server)(basename, serverpath);
1364 if (dimg != NULL) {
1365 if (VG_(clo_verbosity) > 1) {
1366 VG_(message)(Vg_DebugMsg, " Using (POSSIBLY MISMATCHED) %s\n",
1367 debugpath);
1368 }
1369 goto dimg_ok;
1370 }
1371 }
1372
1373 dimg_ok:
1374
1375 ML_(dinfo_free)(objdir);
1376
1377 if (dimg != NULL) {
1378 vg_assert(debugpath);
1379 TRACE_SYMTAB("\n");
1380 TRACE_SYMTAB("------ Found an ad_hoc debuginfo file: %s\n", debugpath);
1381 }
1382
1383 if (debugpath)
1384 ML_(dinfo_free)(debugpath);
1385
1386 return dimg;
1387 }
1388
1389
INDEX_BIS(DiOffT base,UWord idx,UWord scale)1390 static DiOffT INDEX_BIS ( DiOffT base, UWord idx, UWord scale ) {
1391 // This is a bit stupid. Really, idx and scale ought to be
1392 // 64-bit quantities, always.
1393 return base + (DiOffT)idx * (DiOffT)scale;
1394 }
1395
1396
1397 /* Find the file offset corresponding to SVMA by using the program
1398 headers. This is taken from binutils-2.17/binutils/readelf.c
1399 offset_from_vma(). */
1400 static
file_offset_from_svma(Bool * ok,Addr svma,DiImage * img,DiOffT phdr_ioff,Word phdr_nent,Word phdr_ent_szB)1401 Word file_offset_from_svma ( /*OUT*/Bool* ok,
1402 Addr svma,
1403 DiImage* img,
1404 DiOffT phdr_ioff,
1405 Word phdr_nent,
1406 Word phdr_ent_szB )
1407 {
1408 Word i;
1409 for (i = 0; i < phdr_nent; i++) {
1410 ElfXX_Phdr seg;
1411 ML_(img_get)(&seg, img,
1412 INDEX_BIS(phdr_ioff, i, phdr_ent_szB), sizeof(seg));
1413 if (seg.p_type != PT_LOAD)
1414 continue;
1415 if (svma >= (seg.p_vaddr & -seg.p_align)
1416 && svma + 1 <= seg.p_vaddr + seg.p_filesz) {
1417 *ok = True;
1418 return svma - seg.p_vaddr + seg.p_offset;
1419 }
1420 }
1421 *ok = False;
1422 return 0;
1423 }
1424
1425
1426 /* The central function for reading ELF debug info. For the
1427 object/exe specified by the DebugInfo, find ELF sections, then read
1428 the symbols, line number info, file name info, CFA (stack-unwind
1429 info) and anything else we want, into the tables within the
1430 supplied DebugInfo.
1431 */
1432
ML_(read_elf_debug_info)1433 Bool ML_(read_elf_debug_info) ( struct _DebugInfo* di )
1434 {
1435 /* This function is long and complex. That, and the presence of
1436 nested scopes, means it's not always easy to see which parts are
1437 in loops/conditionals and which aren't. To make it easier to
1438 follow, points executed exactly once -- that is, those which are
1439 the top level of the function -- are marked TOPLEVEL.
1440 */
1441 /* Consistent terminology for local variable names, without which
1442 it's almost unfollowably complex:
1443
1444 In which file?
1445 in the main ELF file *_m*
1446 in the debuginfo file *_d*
1447 in the alt debuginfo file *_a*
1448
1449 What kind of thing?
1450 _{m,d,a}img a DiImage*
1451 _{m,d,a}ioff an offset in the image (DiOffT)
1452 _{m,d,a}nent "number of entries"
1453 _{m,d,a}ent_szB "size in bytes of an entry"
1454 ehdr_{m,d,a} ELF header
1455 phdr Program header
1456 shdr Section header
1457 a_X a temporary X
1458 _escn an DiSlice (elf section info) variable
1459 szB size in bytes
1460 */
1461
1462
1463 /* TOPLEVEL */
1464 Bool res, ok;
1465 Word i, j;
1466 Bool dynbss_present = False;
1467 Bool sdynbss_present = False;
1468
1469 /* Image for the main ELF file we're working with. */
1470 DiImage* mimg = NULL;
1471
1472 /* Ditto for any ELF debuginfo file that we might happen to load. */
1473 DiImage* dimg = NULL;
1474
1475 /* Ditto for alternate ELF debuginfo file that we might happen to load. */
1476 DiImage* aimg = NULL;
1477
1478 /* ELF header offset for the main file. Should be zero since the
1479 ELF header is at start of file. */
1480 DiOffT ehdr_mioff = 0;
1481
1482 /* Program header table image addr, # entries, entry size */
1483 DiOffT phdr_mioff = 0;
1484 UWord phdr_mnent = 0;
1485 UWord phdr_ment_szB = 0;
1486
1487 /* Section header image addr, # entries, entry size. Also the
1488 associated string table. */
1489 DiOffT shdr_mioff = 0;
1490 UWord shdr_mnent = 0;
1491 UWord shdr_ment_szB = 0;
1492 DiOffT shdr_strtab_mioff = 0;
1493
1494 /* SVMAs covered by rx and rw segments and corresponding biases.
1495 Normally each object would provide just one rx and one rw area,
1496 but various ELF mangling tools create objects with multiple
1497 such entries, hence the generality. */
1498 typedef
1499 struct {
1500 Addr svma_base;
1501 Addr svma_limit;
1502 PtrdiffT bias;
1503 Bool exec;
1504 }
1505 RangeAndBias;
1506
1507 XArray* /* of RangeAndBias */ svma_ranges = NULL;
1508
1509 vg_assert(di);
1510 vg_assert(di->fsm.have_rx_map == True);
1511 vg_assert(di->fsm.have_rw_map == True);
1512 vg_assert(di->have_dinfo == False);
1513 vg_assert(di->fsm.filename);
1514 vg_assert(!di->symtab);
1515 vg_assert(!di->loctab);
1516 vg_assert(!di->inltab);
1517 vg_assert(!di->cfsi_base);
1518 vg_assert(!di->cfsi_m_ix);
1519 vg_assert(!di->cfsi_rd);
1520 vg_assert(!di->cfsi_exprs);
1521 vg_assert(!di->strpool);
1522 vg_assert(!di->fndnpool);
1523 vg_assert(!di->soname);
1524
1525 {
1526 Bool has_nonempty_rx = False;
1527 Bool has_nonempty_rw = False;
1528 for (i = 0; i < VG_(sizeXA)(di->fsm.maps); i++) {
1529 DebugInfoMapping* map = VG_(indexXA)(di->fsm.maps, i);
1530 if (!map->rx && !map->rw)
1531 continue;
1532 if (map->rx && map->size > 0)
1533 has_nonempty_rx = True;
1534 if (map->rw && map->size > 0)
1535 has_nonempty_rw = True;
1536 /* If this doesn't hold true, it means that m_syswrap/m_aspacemgr
1537 managed to do a mapping where the start isn't page aligned.
1538 Which sounds pretty bogus to me. */
1539 vg_assert(VG_IS_PAGE_ALIGNED(map->avma));
1540 }
1541 vg_assert(has_nonempty_rx);
1542 vg_assert(has_nonempty_rw);
1543 }
1544
1545 /* ----------------------------------------------------------
1546 At this point, there is very little information in the
1547 DebugInfo. We only know that something that looks like an ELF
1548 file has been mapped rx-ishly and rw-ishly as recorded in the
1549 di->fsm.maps array items. First we examine the file's ELF
1550 Program Header, and, by comparing that against the di->fsm.maps
1551 info, try to figure out the AVMAs for the sections we care
1552 about, that should have been mapped: text, data, sdata, bss,
1553 got, plt, and toc.
1554 ---------------------------------------------------------- */
1555
1556 res = False;
1557
1558 if (VG_(clo_verbosity) > 1 || VG_(clo_trace_redir))
1559 VG_(message)(Vg_DebugMsg, "Reading syms from %s\n",
1560 di->fsm.filename );
1561
1562 /* Connect to the primary object image, so that we can read symbols
1563 and line number info out of it. It will be disconnected
1564 immediately thereafter; it is only connected transiently. */
1565 mimg = ML_(img_from_local_file)(di->fsm.filename);
1566 if (mimg == NULL) {
1567 VG_(message)(Vg_UserMsg, "warning: connection to image %s failed\n",
1568 di->fsm.filename );
1569 VG_(message)(Vg_UserMsg, " no symbols or debug info loaded\n" );
1570 return False;
1571 }
1572
1573 /* Ok, the object image is available. Now verify that it is a
1574 valid ELF .so or executable image. */
1575 ok = is_elf_object_file_by_DiImage(mimg, False);
1576 if (!ok) {
1577 ML_(symerr)(di, True, "Invalid ELF Header");
1578 goto out;
1579 }
1580
1581 /* Find where the program and section header tables are, and give
1582 up if either is missing or outside the image (bogus). */
1583 ElfXX_Ehdr ehdr_m;
1584 vg_assert(ehdr_mioff == 0); // ensured by its initialisation
1585 ok = ML_(img_valid)(mimg, ehdr_mioff, sizeof(ehdr_m));
1586 vg_assert(ok); // ML_(is_elf_object_file) should ensure this
1587 ML_(img_get)(&ehdr_m, mimg, ehdr_mioff, sizeof(ehdr_m));
1588
1589 phdr_mioff = ehdr_mioff + ehdr_m.e_phoff;
1590 phdr_mnent = ehdr_m.e_phnum;
1591 phdr_ment_szB = ehdr_m.e_phentsize;
1592
1593 shdr_mioff = ehdr_mioff + ehdr_m.e_shoff;
1594 shdr_mnent = ehdr_m.e_shnum;
1595 shdr_ment_szB = ehdr_m.e_shentsize;
1596
1597 TRACE_SYMTAB("------ Basic facts about the object ------\n");
1598 TRACE_SYMTAB("object: n_oimage %llu\n",
1599 (ULong)ML_(img_size)(mimg));
1600 TRACE_SYMTAB("phdr: ioff %llu nent %lu ent_szB %lu\n",
1601 phdr_mioff, phdr_mnent, phdr_ment_szB);
1602 TRACE_SYMTAB("shdr: ioff %llu nent %lu ent_szB %lu\n",
1603 shdr_mioff, shdr_mnent, shdr_ment_szB);
1604 for (i = 0; i < VG_(sizeXA)(di->fsm.maps); i++) {
1605 const DebugInfoMapping* map = VG_(indexXA)(di->fsm.maps, i);
1606 if (map->rx)
1607 TRACE_SYMTAB("rx_map: avma %#lx size %lu foff %ld\n",
1608 map->avma, map->size, map->foff);
1609 }
1610 for (i = 0; i < VG_(sizeXA)(di->fsm.maps); i++) {
1611 const DebugInfoMapping* map = VG_(indexXA)(di->fsm.maps, i);
1612 if (map->rw)
1613 TRACE_SYMTAB("rw_map: avma %#lx size %lu foff %ld\n",
1614 map->avma, map->size, map->foff);
1615 }
1616
1617 if (phdr_mnent == 0
1618 || !ML_(img_valid)(mimg, phdr_mioff, phdr_mnent * phdr_ment_szB)) {
1619 ML_(symerr)(di, True, "Missing or invalid ELF Program Header Table");
1620 goto out;
1621 }
1622
1623 if (shdr_mnent == 0
1624 || !ML_(img_valid)(mimg, shdr_mioff, shdr_mnent * shdr_ment_szB)) {
1625 ML_(symerr)(di, True, "Missing or invalid ELF Section Header Table");
1626 goto out;
1627 }
1628
1629 /* Also find the section header's string table, and validate. */
1630 /* checked previously by is_elf_object_file: */
1631 vg_assert(ehdr_m.e_shstrndx != SHN_UNDEF);
1632
1633 // shdr_mioff is the offset of the section header table
1634 // and we need the ehdr_m.e_shstrndx'th entry
1635 { ElfXX_Shdr a_shdr;
1636 ML_(img_get)(&a_shdr, mimg,
1637 INDEX_BIS(shdr_mioff, ehdr_m.e_shstrndx, shdr_ment_szB),
1638 sizeof(a_shdr));
1639 shdr_strtab_mioff
1640 = ehdr_mioff /* isn't this always zero? */ + a_shdr.sh_offset;
1641
1642 if (!ML_(img_valid)(mimg, shdr_strtab_mioff,
1643 1/*bogus, but we don't know the real size*/ )) {
1644 ML_(symerr)(di, True, "Invalid ELF Section Header String Table");
1645 goto out;
1646 }
1647 }
1648
1649 TRACE_SYMTAB("shdr: string table at %llu\n", shdr_strtab_mioff);
1650
1651 svma_ranges = VG_(newXA)(ML_(dinfo_zalloc), "di.relfdi.1",
1652 ML_(dinfo_free), sizeof(RangeAndBias));
1653
1654 /* TOPLEVEL */
1655 /* Look through the program header table, and:
1656 - copy information from suitable PT_LOAD entries into svma_ranges
1657 - find (or fake up) the .soname for this object.
1658 */
1659 TRACE_SYMTAB("\n");
1660 TRACE_SYMTAB("------ Examining the program headers ------\n");
1661 vg_assert(di->soname == NULL);
1662 {
1663 /* TOPLEVEL */
1664 ElfXX_Addr prev_svma = 0;
1665
1666 for (i = 0; i < phdr_mnent; i++) {
1667 ElfXX_Phdr a_phdr;
1668 ML_(img_get)(&a_phdr, mimg,
1669 INDEX_BIS(phdr_mioff, i, phdr_ment_szB),
1670 sizeof(a_phdr));
1671
1672 /* Make sure the PT_LOADable entries are in order and
1673 non-overlapping. This in turn means the address ranges
1674 slurped into svma_ranges are in order and
1675 non-overlapping. */
1676
1677 if (a_phdr.p_type == PT_LOAD) {
1678 TRACE_SYMTAB("PT_LOAD[%ld]: p_vaddr %#lx (prev %#lx)\n",
1679 i, (UWord)a_phdr.p_vaddr, (UWord)prev_svma);
1680 TRACE_SYMTAB("PT_LOAD[%ld]: p_offset %lu, p_filesz %lu,"
1681 " perms %c%c%c\n",
1682 i, (UWord)a_phdr.p_offset, (UWord)a_phdr.p_filesz,
1683 a_phdr.p_flags & PF_R ? 'r' : '-',
1684 a_phdr.p_flags & PF_W ? 'w' : '-',
1685 a_phdr.p_flags & PF_X ? 'x' : '-');
1686 if (a_phdr.p_vaddr < prev_svma) {
1687 ML_(symerr)(di, True,
1688 "ELF Program Headers are not in ascending order");
1689 goto out;
1690 }
1691 prev_svma = a_phdr.p_vaddr;
1692 if (a_phdr.p_memsz > 0) {
1693 Bool loaded = False;
1694 for (j = 0; j < VG_(sizeXA)(di->fsm.maps); j++) {
1695 const DebugInfoMapping* map = VG_(indexXA)(di->fsm.maps, j);
1696 if ( (map->rx || map->rw)
1697 && map->size > 0 /* stay sane */
1698 && a_phdr.p_offset >= map->foff
1699 && a_phdr.p_offset < map->foff + map->size
1700 && a_phdr.p_offset + a_phdr.p_filesz
1701 <= map->foff + map->size) {
1702 RangeAndBias item;
1703 item.svma_base = a_phdr.p_vaddr;
1704 item.svma_limit = a_phdr.p_vaddr + a_phdr.p_memsz;
1705 item.bias = map->avma - map->foff
1706 + a_phdr.p_offset - a_phdr.p_vaddr;
1707 if (map->rw
1708 && (a_phdr.p_flags & (PF_R | PF_W))
1709 == (PF_R | PF_W)) {
1710 item.exec = False;
1711 VG_(addToXA)(svma_ranges, &item);
1712 TRACE_SYMTAB(
1713 "PT_LOAD[%ld]: acquired as rw, bias 0x%lx\n",
1714 i, (UWord)item.bias);
1715 loaded = True;
1716 }
1717 if (map->rx
1718 && (a_phdr.p_flags & (PF_R | PF_X))
1719 == (PF_R | PF_X)) {
1720 item.exec = True;
1721 VG_(addToXA)(svma_ranges, &item);
1722 TRACE_SYMTAB(
1723 "PT_LOAD[%ld]: acquired as rx, bias 0x%lx\n",
1724 i, (UWord)item.bias);
1725 loaded = True;
1726 }
1727 }
1728 }
1729 if (!loaded) {
1730 ML_(symerr)(di, False,
1731 "ELF section outside all mapped regions");
1732 /* This problem might be solved by further memory mappings.
1733 Avoid the vg_assert(!di->soname) at the beginning of this
1734 function if DYNAMIC section has been already processed. */
1735 if (di->soname) {
1736 ML_(dinfo_free)(di->soname);
1737 di->soname = NULL;
1738 }
1739 goto out;
1740 }
1741 }
1742 }
1743
1744 /* Try to get the soname. If there isn't one, use "NONE".
1745 The seginfo needs to have some kind of soname in order to
1746 facilitate writing redirect functions, since all redirect
1747 specifications require a soname (pattern). */
1748 if (a_phdr.p_type == PT_DYNAMIC && di->soname == NULL) {
1749 Word stroff = -1;
1750 DiOffT strtab_mioff = DiOffT_INVALID;
1751 for (j = 0; True/*exit check is in the loop*/; j++) {
1752 ElfXX_Dyn t_dyn_m; /* dyn_img[j] */
1753 ML_(img_get)(&t_dyn_m, mimg,
1754 INDEX_BIS(ehdr_mioff + a_phdr.p_offset,
1755 j, sizeof(ElfXX_Dyn)),
1756 sizeof(t_dyn_m));
1757 if (t_dyn_m.d_tag == DT_NULL)
1758 break;
1759
1760 switch (t_dyn_m.d_tag) {
1761 case DT_SONAME: {
1762 stroff = t_dyn_m.d_un.d_val;
1763 break;
1764 }
1765 case DT_STRTAB: {
1766 Bool ok2 = False;
1767 Word offset = file_offset_from_svma(
1768 &ok2, t_dyn_m.d_un.d_ptr, mimg,
1769 phdr_mioff, phdr_mnent, phdr_ment_szB
1770 );
1771 if (ok2 && strtab_mioff == DiOffT_INVALID) {
1772 // Check for obviously bogus offsets.
1773 if (!ML_(img_valid)(mimg, offset, 1)) {
1774 ML_(symerr)(di, True, "Invalid DT_STRTAB offset");
1775 goto out;
1776 }
1777 strtab_mioff = ehdr_mioff + offset;
1778 vg_assert(ehdr_mioff == 0); // should always be
1779 }
1780 break;
1781 }
1782 default:
1783 break;
1784 }
1785 }
1786 if (stroff != -1 && strtab_mioff != DiOffT_INVALID) {
1787 di->soname = ML_(img_strdup)(mimg, "di.redi.1",
1788 strtab_mioff + stroff);
1789 TRACE_SYMTAB("Found soname = %s\n", di->soname);
1790 }
1791 }
1792 } /* for (i = 0; i < phdr_Mnent; i++) ... */
1793 /* TOPLEVEL */
1794
1795 } /* examine the program headers (local scope) */
1796
1797 /* TOPLEVEL */
1798
1799 /* If, after looking at all the program headers, we still didn't
1800 find a soname, add a fake one. */
1801 if (di->soname == NULL) {
1802 TRACE_SYMTAB("No soname found; using (fake) \"NONE\"\n");
1803 di->soname = ML_(dinfo_strdup)("di.redi.2", "NONE");
1804 }
1805
1806 vg_assert(VG_(sizeXA)(svma_ranges) != 0);
1807
1808 /* Now read the section table. */
1809 TRACE_SYMTAB("\n");
1810 TRACE_SYMTAB("------ Examining the section headers ------\n");
1811 for (i = 0; i < VG_(sizeXA)(di->fsm.maps); i++) {
1812 const DebugInfoMapping* map = VG_(indexXA)(di->fsm.maps, i);
1813 if (map->rx)
1814 TRACE_SYMTAB("rx: at %#lx are mapped foffsets %ld .. %lu\n",
1815 map->avma, map->foff, map->foff + map->size - 1 );
1816 }
1817 TRACE_SYMTAB("rx: contains these svma regions:\n");
1818 for (i = 0; i < VG_(sizeXA)(svma_ranges); i++) {
1819 const RangeAndBias* reg = VG_(indexXA)(svma_ranges, i);
1820 if (reg->exec)
1821 TRACE_SYMTAB(" svmas %#lx .. %#lx with bias %#lx\n",
1822 reg->svma_base, reg->svma_limit - 1, (UWord)reg->bias );
1823 }
1824 for (i = 0; i < VG_(sizeXA)(di->fsm.maps); i++) {
1825 const DebugInfoMapping* map = VG_(indexXA)(di->fsm.maps, i);
1826 if (map->rw)
1827 TRACE_SYMTAB("rw: at %#lx are mapped foffsets %ld .. %lu\n",
1828 map->avma, map->foff, map->foff + map->size - 1 );
1829 }
1830 TRACE_SYMTAB("rw: contains these svma regions:\n");
1831 for (i = 0; i < VG_(sizeXA)(svma_ranges); i++) {
1832 const RangeAndBias* reg = VG_(indexXA)(svma_ranges, i);
1833 if (!reg->exec)
1834 TRACE_SYMTAB(" svmas %#lx .. %#lx with bias %#lx\n",
1835 reg->svma_base, reg->svma_limit - 1, (UWord)reg->bias );
1836 }
1837
1838 /* TOPLEVEL */
1839 /* Iterate over section headers */
1840 for (i = 0; i < shdr_mnent; i++) {
1841 ElfXX_Shdr a_shdr;
1842 ML_(img_get)(&a_shdr, mimg,
1843 INDEX_BIS(shdr_mioff, i, shdr_ment_szB), sizeof(a_shdr));
1844 DiOffT name_mioff = shdr_strtab_mioff + a_shdr.sh_name;
1845 HChar* name = ML_(img_strdup)(mimg, "di.redi_name.1", name_mioff);
1846 Addr svma = a_shdr.sh_addr;
1847 OffT foff = a_shdr.sh_offset;
1848 UWord size = a_shdr.sh_size; /* Do not change this to be signed. */
1849 UInt alyn = a_shdr.sh_addralign;
1850 Bool nobits = a_shdr.sh_type == SHT_NOBITS;
1851 /* Look through our collection of info obtained from the PT_LOAD
1852 headers, and make 'inrx' and 'inrw' point to the first entry
1853 in each that intersects 'avma'. If in each case none is found,
1854 leave the relevant pointer at NULL. */
1855 RangeAndBias* inrx = NULL;
1856 RangeAndBias* inrw = NULL;
1857 for (j = 0; j < VG_(sizeXA)(svma_ranges); j++) {
1858 RangeAndBias* rng = VG_(indexXA)(svma_ranges, j);
1859 if (svma >= rng->svma_base && svma < rng->svma_limit) {
1860 if (!inrx && rng->exec) {
1861 inrx = rng;
1862 } else if (!inrw && !rng->exec) {
1863 inrw = rng;
1864 }
1865 if (inrx && inrw)
1866 break;
1867 }
1868 }
1869
1870 TRACE_SYMTAB(" [sec %2ld] %s %s al%2u foff %6ld .. %6lu "
1871 " svma %p name \"%s\"\n",
1872 i, inrx ? "rx" : " ", inrw ? "rw" : " ", alyn,
1873 foff, foff+size-1, (void*)svma, name);
1874
1875 /* Check for sane-sized segments. SHT_NOBITS sections have zero
1876 size in the file and their offsets are just conceptual. */
1877 if (!nobits &&
1878 (foff >= ML_(img_size)(mimg) || foff + size > ML_(img_size)(mimg))) {
1879 ML_(symerr)(di, True, "ELF Section extends beyond image end");
1880 goto out;
1881 }
1882
1883 /* Check for a sane alignment value. */
1884 if (alyn > 0 && -1 == VG_(log2)(alyn)) {
1885 ML_(symerr)(di, True, "ELF Section contains invalid "
1886 ".sh_addralign value");
1887 goto out;
1888 }
1889
1890 /* Ignore zero sized sections. */
1891 if (size == 0) {
1892 TRACE_SYMTAB("zero sized section \"%s\", ignoring\n", name);
1893 ML_(dinfo_free)(name);
1894 continue;
1895 }
1896
1897 # define BAD(_secname) \
1898 do { ML_(symerr)(di, True, \
1899 "Can't make sense of " _secname \
1900 " section mapping"); \
1901 /* make sure we don't assert if we find */ \
1902 /* ourselves back in this routine later, */ \
1903 /* with the same di */ \
1904 di->soname = NULL; \
1905 goto out; \
1906 } while (0)
1907
1908 /* Find avma-s for: .text .data .sdata .rodata .bss .sbss .plt .got .opd
1909 and .eh_frame */
1910
1911 /* Accept .text where mapped as rx (code), even if zero-sized */
1912 if (0 == VG_(strcmp)(name, ".text")) {
1913 if (inrx && !di->text_present) {
1914 di->text_present = True;
1915 di->text_svma = svma;
1916 di->text_avma = svma + inrx->bias;
1917 di->text_size = size;
1918 di->text_bias = inrx->bias;
1919 di->text_debug_svma = svma;
1920 di->text_debug_bias = inrx->bias;
1921 TRACE_SYMTAB("acquiring .text svma = %#lx .. %#lx\n",
1922 di->text_svma,
1923 di->text_svma + di->text_size - 1);
1924 TRACE_SYMTAB("acquiring .text avma = %#lx .. %#lx\n",
1925 di->text_avma,
1926 di->text_avma + di->text_size - 1);
1927 TRACE_SYMTAB("acquiring .text bias = %#lx\n", (UWord)di->text_bias);
1928 } else {
1929 BAD(".text");
1930 }
1931 }
1932
1933 /* Accept .data where mapped as rw (data), even if zero-sized */
1934 if (0 == VG_(strcmp)(name, ".data")) {
1935 if (inrw && !di->data_present) {
1936 di->data_present = True;
1937 di->data_svma = svma;
1938 di->data_avma = svma + inrw->bias;
1939 di->data_size = size;
1940 di->data_bias = inrw->bias;
1941 di->data_debug_svma = svma;
1942 di->data_debug_bias = inrw->bias;
1943 TRACE_SYMTAB("acquiring .data svma = %#lx .. %#lx\n",
1944 di->data_svma,
1945 di->data_svma + di->data_size - 1);
1946 TRACE_SYMTAB("acquiring .data avma = %#lx .. %#lx\n",
1947 di->data_avma,
1948 di->data_avma + di->data_size - 1);
1949 TRACE_SYMTAB("acquiring .data bias = %#lx\n", (UWord)di->data_bias);
1950 } else {
1951 BAD(".data");
1952 }
1953 }
1954
1955 /* Accept .sdata where mapped as rw (data) */
1956 if (0 == VG_(strcmp)(name, ".sdata")) {
1957 if (inrw && !di->sdata_present) {
1958 di->sdata_present = True;
1959 di->sdata_svma = svma;
1960 di->sdata_avma = svma + inrw->bias;
1961 di->sdata_size = size;
1962 di->sdata_bias = inrw->bias;
1963 di->sdata_debug_svma = svma;
1964 di->sdata_debug_bias = inrw->bias;
1965 TRACE_SYMTAB("acquiring .sdata svma = %#lx .. %#lx\n",
1966 di->sdata_svma,
1967 di->sdata_svma + di->sdata_size - 1);
1968 TRACE_SYMTAB("acquiring .sdata avma = %#lx .. %#lx\n",
1969 di->sdata_avma,
1970 di->sdata_avma + di->sdata_size - 1);
1971 TRACE_SYMTAB("acquiring .sdata bias = %#lx\n",
1972 (UWord)di->sdata_bias);
1973 } else {
1974 BAD(".sdata");
1975 }
1976 }
1977
1978 /* Accept .rodata where mapped as rx (data), even if zero-sized */
1979 if (0 == VG_(strcmp)(name, ".rodata")) {
1980 if (inrx && !di->rodata_present) {
1981 di->rodata_present = True;
1982 di->rodata_svma = svma;
1983 di->rodata_avma = svma + inrx->bias;
1984 di->rodata_size = size;
1985 di->rodata_bias = inrx->bias;
1986 di->rodata_debug_svma = svma;
1987 di->rodata_debug_bias = inrx->bias;
1988 /* NB was 'inrw' prior to r11794 */
1989 TRACE_SYMTAB("acquiring .rodata svma = %#lx .. %#lx\n",
1990 di->rodata_svma,
1991 di->rodata_svma + di->rodata_size - 1);
1992 TRACE_SYMTAB("acquiring .rodata avma = %#lx .. %#lx\n",
1993 di->rodata_avma,
1994 di->rodata_avma + di->rodata_size - 1);
1995 TRACE_SYMTAB("acquiring .rodata bias = %#lx\n",
1996 (UWord)di->rodata_bias);
1997 } else {
1998 BAD(".rodata");
1999 }
2000 }
2001
2002 if (0 == VG_(strcmp)(name, ".dynbss")) {
2003 if (inrw && !di->bss_present) {
2004 dynbss_present = True;
2005 di->bss_present = True;
2006 di->bss_svma = svma;
2007 di->bss_avma = svma + inrw->bias;
2008 di->bss_size = size;
2009 di->bss_bias = inrw->bias;
2010 di->bss_debug_svma = svma;
2011 di->bss_debug_bias = inrw->bias;
2012 TRACE_SYMTAB("acquiring .dynbss svma = %#lx .. %#lx\n",
2013 di->bss_svma,
2014 di->bss_svma + di->bss_size - 1);
2015 TRACE_SYMTAB("acquiring .dynbss avma = %#lx .. %#lx\n",
2016 di->bss_avma,
2017 di->bss_avma + di->bss_size - 1);
2018 TRACE_SYMTAB("acquiring .dynbss bias = %#lx\n",
2019 (UWord)di->bss_bias);
2020 }
2021 }
2022
2023 /* Accept .bss where mapped as rw (data), even if zero-sized */
2024 if (0 == VG_(strcmp)(name, ".bss")) {
2025 if (inrw && dynbss_present) {
2026 vg_assert(di->bss_present);
2027 dynbss_present = False;
2028 vg_assert(di->bss_svma + di->bss_size == svma);
2029 di->bss_size += size;
2030 TRACE_SYMTAB("acquiring .bss svma = %#lx .. %#lx\n",
2031 svma, svma + size - 1);
2032 TRACE_SYMTAB("acquiring .bss avma = %#lx .. %#lx\n",
2033 svma + inrw->bias, svma + inrw->bias + size - 1);
2034 TRACE_SYMTAB("acquiring .bss bias = %#lx\n",
2035 (UWord)di->bss_bias);
2036 } else
2037
2038 if (inrw && !di->bss_present) {
2039 di->bss_present = True;
2040 di->bss_svma = svma;
2041 di->bss_avma = svma + inrw->bias;
2042 di->bss_size = size;
2043 di->bss_bias = inrw->bias;
2044 di->bss_debug_svma = svma;
2045 di->bss_debug_bias = inrw->bias;
2046 TRACE_SYMTAB("acquiring .bss svma = %#lx .. %#lx\n",
2047 di->bss_svma,
2048 di->bss_svma + di->bss_size - 1);
2049 TRACE_SYMTAB("acquiring .bss avma = %#lx .. %#lx\n",
2050 di->bss_avma,
2051 di->bss_avma + di->bss_size - 1);
2052 TRACE_SYMTAB("acquiring .bss bias = %#lx\n",
2053 (UWord)di->bss_bias);
2054 } else
2055
2056 /* Now one from the wtf?! department ... */
2057 if (inrx && (!inrw) && !di->bss_present) {
2058 /* File contains a .bss, but it got mapped as rx only.
2059 This is very strange. For now, just pretend we didn't
2060 see it :-) */
2061 di->bss_present = False;
2062 di->bss_svma = 0;
2063 di->bss_avma = 0;
2064 di->bss_size = 0;
2065 di->bss_bias = 0;
2066 di->bss_debug_svma = 0;
2067 di->bss_debug_bias = 0;
2068 if (!VG_(clo_xml)) {
2069 VG_(message)(Vg_UserMsg,
2070 "Warning: the following file's .bss is "
2071 "mapped r-x only - ignoring .bss syms\n");
2072 VG_(message)(Vg_UserMsg, " %s\n", di->fsm.filename
2073 ? di->fsm.filename
2074 : "(null?!)" );
2075 }
2076 } else
2077
2078 if ((!inrw) && (!inrx) && !di->bss_present) {
2079 /* File contains a .bss, but it didn't get mapped. Ignore. */
2080 di->bss_present = False;
2081 di->bss_svma = 0;
2082 di->bss_avma = 0;
2083 di->bss_size = 0;
2084 di->bss_bias = 0;
2085 } else {
2086 BAD(".bss");
2087 }
2088 }
2089
2090 if (0 == VG_(strcmp)(name, ".sdynbss")) {
2091 if (inrw && !di->sbss_present) {
2092 sdynbss_present = True;
2093 di->sbss_present = True;
2094 di->sbss_svma = svma;
2095 di->sbss_avma = svma + inrw->bias;
2096 di->sbss_size = size;
2097 di->sbss_bias = inrw->bias;
2098 di->sbss_debug_svma = svma;
2099 di->sbss_debug_bias = inrw->bias;
2100 TRACE_SYMTAB("acquiring .sdynbss svma = %#lx .. %#lx\n",
2101 di->sbss_svma,
2102 di->sbss_svma + di->sbss_size - 1);
2103 TRACE_SYMTAB("acquiring .sdynbss avma = %#lx .. %#lx\n",
2104 di->sbss_avma,
2105 di->sbss_avma + di->sbss_size - 1);
2106 TRACE_SYMTAB("acquiring .sdynbss bias = %#lx\n",
2107 (UWord)di->sbss_bias);
2108 }
2109 }
2110
2111 /* Accept .sbss where mapped as rw (data) */
2112 if (0 == VG_(strcmp)(name, ".sbss")) {
2113 if (inrw && sdynbss_present) {
2114 vg_assert(di->sbss_present);
2115 sdynbss_present = False;
2116 vg_assert(di->sbss_svma + di->sbss_size == svma);
2117 di->sbss_size += size;
2118 TRACE_SYMTAB("acquiring .sbss svma = %#lx .. %#lx\n",
2119 svma, svma + size - 1);
2120 TRACE_SYMTAB("acquiring .sbss avma = %#lx .. %#lx\n",
2121 svma + inrw->bias, svma + inrw->bias + size - 1);
2122 TRACE_SYMTAB("acquiring .sbss bias = %#lx\n", (UWord)di->sbss_bias);
2123 } else
2124
2125 if (inrw && !di->sbss_present) {
2126 di->sbss_present = True;
2127 di->sbss_svma = svma;
2128 di->sbss_avma = svma + inrw->bias;
2129 di->sbss_size = size;
2130 di->sbss_bias = inrw->bias;
2131 di->sbss_debug_svma = svma;
2132 di->sbss_debug_bias = inrw->bias;
2133 TRACE_SYMTAB("acquiring .sbss svma = %#lx .. %#lx\n",
2134 di->sbss_svma,
2135 di->sbss_svma + di->sbss_size - 1);
2136 TRACE_SYMTAB("acquiring .sbss avma = %#lx .. %#lx\n",
2137 di->sbss_avma,
2138 di->sbss_avma + di->sbss_size - 1);
2139 TRACE_SYMTAB("acquiring .sbss bias = %#lx\n", (UWord)di->sbss_bias);
2140 } else {
2141 BAD(".sbss");
2142 }
2143 }
2144
2145 /* Accept .got where mapped as rw (data) */
2146 if (0 == VG_(strcmp)(name, ".got")) {
2147 if (inrw && !di->got_present) {
2148 di->got_present = True;
2149 di->got_avma = svma + inrw->bias;
2150 di->got_size = size;
2151 TRACE_SYMTAB("acquiring .got avma = %#lx\n", di->got_avma);
2152 } else {
2153 BAD(".got");
2154 }
2155 }
2156
2157 /* Accept .got.plt where mapped as rw (data) */
2158 if (0 == VG_(strcmp)(name, ".got.plt")) {
2159 if (inrw && !di->gotplt_present) {
2160 di->gotplt_present = True;
2161 di->gotplt_avma = svma + inrw->bias;
2162 di->gotplt_size = size;
2163 TRACE_SYMTAB("acquiring .got.plt avma = %#lx\n", di->gotplt_avma);
2164 } else if (size != 0) {
2165 BAD(".got.plt");
2166 }
2167 }
2168
2169 /* PLT is different on different platforms, it seems. */
2170 # if defined(VGP_x86_linux) || defined(VGP_amd64_linux) \
2171 || defined(VGP_arm_linux) || defined (VGP_s390x_linux) \
2172 || defined(VGP_mips32_linux) || defined(VGP_mips64_linux) \
2173 || defined(VGP_arm64_linux) || defined(VGP_tilegx_linux) \
2174 || defined(VGP_x86_solaris) || defined(VGP_amd64_solaris)
2175 /* Accept .plt where mapped as rx (code) */
2176 if (0 == VG_(strcmp)(name, ".plt")) {
2177 if (inrx && !di->plt_present) {
2178 di->plt_present = True;
2179 di->plt_avma = svma + inrx->bias;
2180 di->plt_size = size;
2181 TRACE_SYMTAB("acquiring .plt avma = %#lx\n", di->plt_avma);
2182 } else {
2183 BAD(".plt");
2184 }
2185 }
2186 # elif defined(VGP_ppc32_linux)
2187 /* Accept .plt where mapped as rw (data) */
2188 if (0 == VG_(strcmp)(name, ".plt")) {
2189 if (inrw && !di->plt_present) {
2190 di->plt_present = True;
2191 di->plt_avma = svma + inrw->bias;
2192 di->plt_size = size;
2193 TRACE_SYMTAB("acquiring .plt avma = %#lx\n", di->plt_avma);
2194 } else {
2195 BAD(".plt");
2196 }
2197 }
2198 # elif defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
2199 /* Accept .plt where mapped as rw (data), or unmapped */
2200 if (0 == VG_(strcmp)(name, ".plt")) {
2201 if (inrw && !di->plt_present) {
2202 di->plt_present = True;
2203 di->plt_avma = svma + inrw->bias;
2204 di->plt_size = size;
2205 TRACE_SYMTAB("acquiring .plt avma = %#lx\n", di->plt_avma);
2206 } else
2207 if ((!inrw) && (!inrx) && size > 0 && !di->plt_present) {
2208 /* File contains a .plt, but it didn't get mapped.
2209 Presumably it is not required on this platform. At
2210 least don't reject the situation as invalid. */
2211 di->plt_present = True;
2212 di->plt_avma = 0;
2213 di->plt_size = 0;
2214 } else {
2215 BAD(".plt");
2216 }
2217 }
2218 # else
2219 # error "Unsupported platform"
2220 # endif
2221
2222 /* Accept .opd where mapped as rw (data) */
2223 if (0 == VG_(strcmp)(name, ".opd")) {
2224 if (inrw && !di->opd_present) {
2225 di->opd_present = True;
2226 di->opd_avma = svma + inrw->bias;
2227 di->opd_size = size;
2228 TRACE_SYMTAB("acquiring .opd avma = %#lx\n", di->opd_avma);
2229 } else {
2230 BAD(".opd");
2231 }
2232 }
2233
2234 /* Accept .eh_frame where mapped as rx (code). This seems to be
2235 the common case. However, if that doesn't pan out, try for
2236 rw (data) instead. We can handle up to N_EHFRAME_SECTS per
2237 ELF object. */
2238 if (0 == VG_(strcmp)(name, ".eh_frame")) {
2239 if (inrx && di->n_ehframe < N_EHFRAME_SECTS) {
2240 di->ehframe_avma[di->n_ehframe] = svma + inrx->bias;
2241 di->ehframe_size[di->n_ehframe] = size;
2242 TRACE_SYMTAB("acquiring .eh_frame avma = %#lx\n",
2243 di->ehframe_avma[di->n_ehframe]);
2244 di->n_ehframe++;
2245 } else
2246 if (inrw && di->n_ehframe < N_EHFRAME_SECTS) {
2247 di->ehframe_avma[di->n_ehframe] = svma + inrw->bias;
2248 di->ehframe_size[di->n_ehframe] = size;
2249 TRACE_SYMTAB("acquiring .eh_frame avma = %#lx\n",
2250 di->ehframe_avma[di->n_ehframe]);
2251 di->n_ehframe++;
2252 } else {
2253 BAD(".eh_frame");
2254 }
2255 }
2256
2257 /* Accept .ARM.exidx where mapped as rx (code). */
2258 /* FIXME: make sure the entire section is mapped in, not just
2259 the first address. */
2260 if (0 == VG_(strcmp)(name, ".ARM.exidx")) {
2261 if (inrx && !di->exidx_present) {
2262 di->exidx_present = True;
2263 di->exidx_svma = svma;
2264 di->exidx_avma = svma + inrx->bias;
2265 di->exidx_size = size;
2266 di->exidx_bias = inrx->bias;
2267 TRACE_SYMTAB("acquiring .exidx svma = %#lx .. %#lx\n",
2268 di->exidx_svma,
2269 di->exidx_svma + di->exidx_size - 1);
2270 TRACE_SYMTAB("acquiring .exidx avma = %#lx .. %#lx\n",
2271 di->exidx_avma,
2272 di->exidx_avma + di->exidx_size - 1);
2273 TRACE_SYMTAB("acquiring .exidx bias = %#lx\n",
2274 (UWord)di->exidx_bias);
2275 } else {
2276 BAD(".ARM.exidx");
2277 }
2278 }
2279
2280 /* Accept .ARM.extab where mapped as rx (code). */
2281 /* FIXME: make sure the entire section is mapped in, not just
2282 the first address. */
2283 if (0 == VG_(strcmp)(name, ".ARM.extab")) {
2284 if (inrx && !di->extab_present) {
2285 di->extab_present = True;
2286 di->extab_svma = svma;
2287 di->extab_avma = svma + inrx->bias;
2288 di->extab_size = size;
2289 di->extab_bias = inrx->bias;
2290 TRACE_SYMTAB("acquiring .extab svma = %#lx .. %#lx\n",
2291 di->extab_svma,
2292 di->extab_svma + di->extab_size - 1);
2293 TRACE_SYMTAB("acquiring .extab avma = %#lx .. %#lx\n",
2294 di->extab_avma,
2295 di->extab_avma + di->extab_size - 1);
2296 TRACE_SYMTAB("acquiring .extab bias = %#lx\n",
2297 (UWord)di->extab_bias);
2298 } else {
2299 BAD(".ARM.extab");
2300 }
2301 }
2302
2303 ML_(dinfo_free)(name);
2304
2305 # undef BAD
2306
2307 } /* iterate over the section headers */
2308
2309 /* TOPLEVEL */
2310 if (0) VG_(printf)("YYYY text_: avma %#lx size %lu bias %#lx\n",
2311 di->text_avma, di->text_size, (UWord)di->text_bias);
2312
2313 if (VG_(clo_verbosity) > 2 || VG_(clo_trace_redir))
2314 VG_(message)(Vg_DebugMsg, " svma %#010lx, avma %#010lx\n",
2315 di->text_avma - di->text_bias,
2316 di->text_avma );
2317
2318 TRACE_SYMTAB("\n");
2319 TRACE_SYMTAB("------ Finding image addresses "
2320 "for debug-info sections ------\n");
2321
2322 /* TOPLEVEL */
2323 /* Find interesting sections, read the symbol table(s), read any
2324 debug information. Each section is located either in the main,
2325 debug or alt-debug files, but only in one. For each section,
2326 |section_escn| records which of |mimg|, |dimg| or |aimg| we
2327 found it in, along with the section's image offset and its size.
2328 The triples (section_img, section_ioff, section_szB) are
2329 consistent, in that they are always either (NULL,
2330 DiOffT_INVALID, 0), or refer to the same image, and are all
2331 assigned together. */
2332 {
2333 /* TOPLEVEL */
2334 DiSlice strtab_escn = DiSlice_INVALID; // .strtab
2335 DiSlice symtab_escn = DiSlice_INVALID; // .symtab
2336 DiSlice dynstr_escn = DiSlice_INVALID; // .dynstr
2337 DiSlice dynsym_escn = DiSlice_INVALID; // .dynsym
2338 # if defined(VGO_solaris)
2339 DiSlice ldynsym_escn = DiSlice_INVALID; // .SUNW_ldynsym
2340 # endif
2341 DiSlice debuglink_escn = DiSlice_INVALID; // .gnu_debuglink
2342 DiSlice debugaltlink_escn = DiSlice_INVALID; // .gnu_debugaltlink
2343 DiSlice debug_line_escn = DiSlice_INVALID; // .debug_line (dwarf2)
2344 DiSlice debug_info_escn = DiSlice_INVALID; // .debug_info (dwarf2)
2345 DiSlice debug_types_escn = DiSlice_INVALID; // .debug_types (dwarf4)
2346 DiSlice debug_abbv_escn = DiSlice_INVALID; // .debug_abbrev (dwarf2)
2347 DiSlice debug_str_escn = DiSlice_INVALID; // .debug_str (dwarf2)
2348 DiSlice debug_ranges_escn = DiSlice_INVALID; // .debug_ranges (dwarf2)
2349 DiSlice debug_loc_escn = DiSlice_INVALID; // .debug_loc (dwarf2)
2350 DiSlice debug_frame_escn = DiSlice_INVALID; // .debug_frame (dwarf2)
2351 DiSlice debug_line_alt_escn = DiSlice_INVALID; // .debug_line (alt)
2352 DiSlice debug_info_alt_escn = DiSlice_INVALID; // .debug_info (alt)
2353 DiSlice debug_abbv_alt_escn = DiSlice_INVALID; // .debug_abbrev (alt)
2354 DiSlice debug_str_alt_escn = DiSlice_INVALID; // .debug_str (alt)
2355 DiSlice dwarf1d_escn = DiSlice_INVALID; // .debug (dwarf1)
2356 DiSlice dwarf1l_escn = DiSlice_INVALID; // .line (dwarf1)
2357 DiSlice opd_escn = DiSlice_INVALID; // .opd (dwarf2,
2358 // ppc64be-linux)
2359 DiSlice ehframe_escn[N_EHFRAME_SECTS]; // .eh_frame (dwarf2)
2360
2361 for (i = 0; i < N_EHFRAME_SECTS; i++)
2362 ehframe_escn[i] = DiSlice_INVALID;
2363
2364 /* Find all interesting sections */
2365
2366 UInt ehframe_mix = 0;
2367
2368 /* What FIND does: it finds the section called _SEC_NAME. The
2369 size of it is assigned to _SEC_SIZE. The address of the
2370 section in the transiently loaded oimage is assigned to
2371 _SEC_IMG. If the section is found, _POST_FX is executed
2372 after _SEC_NAME and _SEC_SIZE have been assigned to.
2373
2374 Even for sections which are marked loadable, the client's
2375 ld.so may not have loaded them yet, so there is no guarantee
2376 that we can safely prod around in any such area). Because
2377 the entire object file is transiently mapped aboard for
2378 inspection, it's always safe to inspect that area. */
2379
2380 /* TOPLEVEL */
2381 /* Iterate over section headers (again) */
2382 for (i = 0; i < ehdr_m.e_shnum; i++) {
2383
2384 # define FINDX(_sec_name, _sec_escn, _post_fx) \
2385 do { \
2386 ElfXX_Shdr a_shdr; \
2387 ML_(img_get)(&a_shdr, mimg, \
2388 INDEX_BIS(shdr_mioff, i, shdr_ment_szB), \
2389 sizeof(a_shdr)); \
2390 if (0 == ML_(img_strcmp_c)(mimg, shdr_strtab_mioff \
2391 + a_shdr.sh_name, _sec_name)) { \
2392 Bool nobits; \
2393 _sec_escn.img = mimg; \
2394 _sec_escn.ioff = (DiOffT)a_shdr.sh_offset; \
2395 _sec_escn.szB = a_shdr.sh_size; \
2396 nobits = a_shdr.sh_type == SHT_NOBITS; \
2397 vg_assert(_sec_escn.img != NULL); \
2398 vg_assert(_sec_escn.ioff != DiOffT_INVALID); \
2399 TRACE_SYMTAB( "%-18s: ioff %llu .. %llu\n", \
2400 _sec_name, (ULong)_sec_escn.ioff, \
2401 ((ULong)_sec_escn.ioff) + _sec_escn.szB - 1); \
2402 /* SHT_NOBITS sections have zero size in the file. */ \
2403 if (!nobits && \
2404 a_shdr.sh_offset + _sec_escn.szB > ML_(img_size)(mimg) ) { \
2405 ML_(symerr)(di, True, \
2406 " section beyond image end?!"); \
2407 goto out; \
2408 } \
2409 _post_fx; \
2410 } \
2411 } while (0);
2412
2413 /* Version with no post-effects */
2414 # define FIND(_sec_name, _sec_escn) \
2415 FINDX(_sec_name, _sec_escn, /**/)
2416
2417 /* NAME ElfSec */
2418 FIND(".dynsym", dynsym_escn)
2419 FIND(".dynstr", dynstr_escn)
2420 FIND(".symtab", symtab_escn)
2421 FIND(".strtab", strtab_escn)
2422 # if defined(VGO_solaris)
2423 FIND(".SUNW_ldynsym", ldynsym_escn)
2424 # endif
2425
2426 FIND(".gnu_debuglink", debuglink_escn)
2427 FIND(".gnu_debugaltlink", debugaltlink_escn)
2428
2429 FIND(".debug_line", debug_line_escn)
2430 FIND(".debug_info", debug_info_escn)
2431 FIND(".debug_types", debug_types_escn)
2432 FIND(".debug_abbrev", debug_abbv_escn)
2433 FIND(".debug_str", debug_str_escn)
2434 FIND(".debug_ranges", debug_ranges_escn)
2435 FIND(".debug_loc", debug_loc_escn)
2436 FIND(".debug_frame", debug_frame_escn)
2437
2438 FIND(".debug", dwarf1d_escn)
2439 FIND(".line", dwarf1l_escn)
2440
2441 FIND(".opd", opd_escn)
2442
2443 FINDX(".eh_frame", ehframe_escn[ehframe_mix],
2444 do { ehframe_mix++; vg_assert(ehframe_mix <= N_EHFRAME_SECTS);
2445 } while (0)
2446 )
2447 /* Comment_on_EH_FRAME_MULTIPLE_INSTANCES: w.r.t. .eh_frame
2448 multi-instance kludgery, how are we assured that the order
2449 in which we fill in ehframe_escn[] is consistent with the
2450 order in which we previously filled in di->ehframe_avma[]
2451 and di->ehframe_size[] ? By the fact that in both cases,
2452 these arrays were filled in by iterating over the section
2453 headers top-to-bottom. So both loops (this one and the
2454 previous one) encounter the .eh_frame entries in the same
2455 order and so fill in these arrays in a consistent order.
2456 */
2457
2458 # undef FINDX
2459 # undef FIND
2460 } /* Iterate over section headers (again) */
2461
2462 /* TOPLEVEL */
2463 /* Now, see if we can find a debuginfo object, and if so connect
2464 |dimg| to it. */
2465 vg_assert(dimg == NULL && aimg == NULL);
2466
2467 /* Look for a build-id */
2468 HChar* buildid = find_buildid(mimg, False, False);
2469
2470 /* Look for a debug image that matches either the build-id or
2471 the debuglink-CRC32 in the main image. If the main image
2472 doesn't contain either of those then this won't even bother
2473 to try looking. This looks in all known places, including
2474 the --extra-debuginfo-path if specified and on the
2475 --debuginfo-server if specified. */
2476 if (buildid != NULL || debuglink_escn.img != NULL) {
2477 /* Do have a debuglink section? */
2478 if (debuglink_escn.img != NULL) {
2479 UInt crc_offset
2480 = VG_ROUNDUP(ML_(img_strlen)(debuglink_escn.img,
2481 debuglink_escn.ioff)+1, 4);
2482 vg_assert(crc_offset + sizeof(UInt) <= debuglink_escn.szB);
2483
2484 /* Extract the CRC from the debuglink section */
2485 UInt crc = ML_(img_get_UInt)(debuglink_escn.img,
2486 debuglink_escn.ioff + crc_offset);
2487
2488 /* See if we can find a matching debug file */
2489 HChar* debuglink_str_m
2490 = ML_(img_strdup)(debuglink_escn.img,
2491 "di.redi_dlk.1", debuglink_escn.ioff);
2492 dimg = find_debug_file( di, di->fsm.filename, buildid,
2493 debuglink_str_m, crc, False );
2494 if (debuglink_str_m)
2495 ML_(dinfo_free)(debuglink_str_m);
2496 } else {
2497 /* See if we can find a matching debug file */
2498 dimg = find_debug_file( di, di->fsm.filename, buildid,
2499 NULL, 0, False );
2500 }
2501 }
2502
2503 if (buildid) {
2504 ML_(dinfo_free)(buildid);
2505 buildid = NULL; /* paranoia */
2506 }
2507
2508 /* As a last-ditch measure, try looking for in the
2509 --extra-debuginfo-path and/or on the --debuginfo-server, but
2510 only in the case where --allow-mismatched-debuginfo=yes.
2511 This is dangerous in that (1) it gives no assurance that the
2512 debuginfo object matches the main one, and hence (2) we will
2513 very likely get an assertion in the code below, if indeed
2514 there is a mismatch. Hence it is disabled by default
2515 (--allow-mismatched-debuginfo=no). Nevertheless it's
2516 sometimes a useful way of getting out of a tight spot.
2517
2518 Note that we're ignoring the name in the .gnu_debuglink
2519 section here, and just looking for a file of the same name
2520 either the extra-path or on the server. */
2521 if (dimg == NULL && VG_(clo_allow_mismatched_debuginfo)) {
2522 dimg = find_debug_file_ad_hoc( di, di->fsm.filename );
2523 }
2524
2525 /* TOPLEVEL */
2526 /* If we were successful in finding a debug image, pull various
2527 SVMA/bias/size and image addresses out of it. */
2528 if (dimg != NULL && is_elf_object_file_by_DiImage(dimg, False)) {
2529
2530 /* Pull out and validate program header and section header info */
2531 DiOffT ehdr_dioff = 0;
2532 ElfXX_Ehdr ehdr_dimg;
2533 ML_(img_get)(&ehdr_dimg, dimg, ehdr_dioff, sizeof(ehdr_dimg));
2534
2535 DiOffT phdr_dioff = ehdr_dimg.e_phoff;
2536 UWord phdr_dnent = ehdr_dimg.e_phnum;
2537 UWord phdr_dent_szB = ehdr_dimg.e_phentsize;
2538
2539 DiOffT shdr_dioff = ehdr_dimg.e_shoff;
2540 UWord shdr_dnent = ehdr_dimg.e_shnum;
2541 UWord shdr_dent_szB = ehdr_dimg.e_shentsize;
2542
2543 DiOffT shdr_strtab_dioff = DiOffT_INVALID;
2544
2545 /* SVMAs covered by rx and rw segments and corresponding bias. */
2546 Addr rx_dsvma_limit = 0;
2547 PtrdiffT rx_dbias = 0;
2548 Addr rw_dsvma_limit = 0;
2549 PtrdiffT rw_dbias = 0;
2550
2551 Bool need_symtab, need_dwarf2, need_dwarf1;
2552
2553 if (phdr_dnent == 0
2554 || !ML_(img_valid)(dimg, phdr_dioff,
2555 phdr_dnent * phdr_dent_szB)) {
2556 ML_(symerr)(di, True,
2557 "Missing or invalid ELF Program Header Table"
2558 " (debuginfo file)");
2559 goto out;
2560 }
2561
2562 if (shdr_dnent == 0
2563 || !ML_(img_valid)(dimg, shdr_dioff,
2564 shdr_dnent * shdr_dent_szB)) {
2565 ML_(symerr)(di, True,
2566 "Missing or invalid ELF Section Header Table"
2567 " (debuginfo file)");
2568 goto out;
2569 }
2570
2571 /* Also find the section header's string table, and validate. */
2572 /* checked previously by is_elf_object_file: */
2573 vg_assert(ehdr_dimg.e_shstrndx != SHN_UNDEF);
2574
2575 // shdr_dioff is the offset of the section header table
2576 // and we need the ehdr_dimg.e_shstrndx'th entry
2577 { ElfXX_Shdr a_shdr;
2578 ML_(img_get)(&a_shdr, dimg,
2579 INDEX_BIS(shdr_dioff, ehdr_dimg.e_shstrndx,
2580 shdr_dent_szB),
2581 sizeof(a_shdr));
2582 shdr_strtab_dioff = a_shdr.sh_offset;
2583 if (!ML_(img_valid)(dimg, shdr_strtab_dioff,
2584 1/*bogus, but we don't know the real size*/)) {
2585 ML_(symerr)(di, True,
2586 "Invalid ELF Section Header String Table"
2587 " (debuginfo file)");
2588 goto out;
2589 }
2590 }
2591
2592 for (i = 0; i < ehdr_dimg.e_phnum; i++) {
2593 ElfXX_Phdr a_phdr;
2594 ML_(img_get)(&a_phdr, dimg, INDEX_BIS(ehdr_dimg.e_phoff,
2595 i, phdr_dent_szB),
2596 sizeof(a_phdr));
2597 if (a_phdr.p_type == PT_LOAD) {
2598 for (j = 0; j < VG_(sizeXA)(di->fsm.maps); j++) {
2599 const DebugInfoMapping* map = VG_(indexXA)(di->fsm.maps, j);
2600 if ( a_phdr.p_offset >= map->foff
2601 && a_phdr.p_offset < map->foff + map->size
2602 && a_phdr.p_offset + a_phdr.p_filesz
2603 < map->foff + map->size) {
2604 if (map->rx && rx_dsvma_limit == 0) {
2605 rx_dsvma_limit = a_phdr.p_vaddr + a_phdr.p_memsz;
2606 rx_dbias = map->avma - map->foff + a_phdr.p_offset
2607 - a_phdr.p_vaddr;
2608 }
2609 if (map->rw && rw_dsvma_limit == 0) {
2610 rw_dsvma_limit = a_phdr.p_vaddr + a_phdr.p_memsz;
2611 rw_dbias = map->avma - map->foff + a_phdr.p_offset
2612 - a_phdr.p_vaddr;
2613 }
2614 break;
2615 }
2616 }
2617 }
2618 }
2619
2620 need_symtab = (symtab_escn.img == NULL);
2621 need_dwarf2 = (debug_info_escn.img == NULL);
2622 need_dwarf1 = (dwarf1d_escn.img == NULL);
2623
2624 /* Find all interesting sections in the debug image */
2625 for (i = 0; i < ehdr_dimg.e_shnum; i++) {
2626
2627 /* Find debug svma and bias information for sections
2628 we found in the main file. */
2629
2630 # define FIND(_sec, _seg) \
2631 do { \
2632 ElfXX_Shdr a_shdr; \
2633 ML_(img_get)(&a_shdr, dimg, \
2634 INDEX_BIS(shdr_dioff, i, shdr_dent_szB), \
2635 sizeof(a_shdr)); \
2636 if (di->_sec##_present \
2637 && 0 == ML_(img_strcmp_c)(dimg, shdr_strtab_dioff \
2638 + a_shdr.sh_name, "." #_sec)) { \
2639 vg_assert(di->_sec##_size == a_shdr.sh_size); \
2640 /* JRS 2013-Jun-01: the following assert doesn't contain */ \
2641 /* any ==s, which seems to me to be suspicious. */ \
2642 vg_assert(di->_sec##_avma + a_shdr.sh_addr + _seg##_dbias); \
2643 /* Assume we have a correct value for the main */ \
2644 /* object's bias. Use that to derive the debuginfo */ \
2645 /* object's bias, by adding the difference in SVMAs */ \
2646 /* for the corresponding sections in the two files. */ \
2647 /* That should take care of all prelinking effects. */ \
2648 di->_sec##_debug_svma = a_shdr.sh_addr; \
2649 di->_sec##_debug_bias \
2650 = di->_sec##_bias + \
2651 di->_sec##_svma - di->_sec##_debug_svma; \
2652 TRACE_SYMTAB("acquiring ." #_sec \
2653 " debug svma = %#lx .. %#lx\n", \
2654 di->_sec##_debug_svma, \
2655 di->_sec##_debug_svma + di->_sec##_size - 1); \
2656 TRACE_SYMTAB("acquiring ." #_sec " debug bias = %#lx\n", \
2657 (UWord)di->_sec##_debug_bias); \
2658 } \
2659 } while (0);
2660
2661 /* SECTION SEGMENT */
2662 FIND(text, rx)
2663 FIND(data, rw)
2664 FIND(sdata, rw)
2665 FIND(rodata, rw)
2666 FIND(bss, rw)
2667 FIND(sbss, rw)
2668
2669 # undef FIND
2670
2671 /* Same deal as previous FIND, except only do it for those
2672 sections which we didn't find in the main file. */
2673
2674 # define FIND(_condition, _sec_name, _sec_escn) \
2675 do { \
2676 ElfXX_Shdr a_shdr; \
2677 ML_(img_get)(&a_shdr, dimg, \
2678 INDEX_BIS(shdr_dioff, i, shdr_dent_szB), \
2679 sizeof(a_shdr)); \
2680 if (_condition \
2681 && 0 == ML_(img_strcmp_c)(dimg, shdr_strtab_dioff \
2682 + a_shdr.sh_name, _sec_name)) { \
2683 Bool nobits; \
2684 if (_sec_escn.img != NULL) { \
2685 ML_(symerr)(di, True, \
2686 " debuginfo section duplicates a" \
2687 " section in the main ELF file"); \
2688 goto out; \
2689 } \
2690 _sec_escn.img = dimg; \
2691 _sec_escn.ioff = (DiOffT)a_shdr.sh_offset; \
2692 _sec_escn.szB = a_shdr.sh_size; \
2693 nobits = a_shdr.sh_type == SHT_NOBITS; \
2694 vg_assert(_sec_escn.img != NULL); \
2695 vg_assert(_sec_escn.ioff != DiOffT_INVALID); \
2696 TRACE_SYMTAB( "%-18s: dioff %llu .. %llu\n", \
2697 _sec_name, \
2698 (ULong)_sec_escn.ioff, \
2699 ((ULong)_sec_escn.ioff) + _sec_escn.szB - 1); \
2700 /* SHT_NOBITS sections have zero size in the file. */ \
2701 if (!nobits && a_shdr.sh_offset \
2702 + _sec_escn.szB > ML_(img_size)(dimg)) { \
2703 ML_(symerr)(di, True, \
2704 " section beyond image end?!"); \
2705 goto out; \
2706 } \
2707 } \
2708 } while (0);
2709
2710 /* NEEDED? NAME ElfSec */
2711 FIND(need_symtab, ".symtab", symtab_escn)
2712 FIND(need_symtab, ".strtab", strtab_escn)
2713 FIND(need_dwarf2, ".debug_line", debug_line_escn)
2714 FIND(need_dwarf2, ".debug_info", debug_info_escn)
2715 FIND(need_dwarf2, ".debug_types", debug_types_escn)
2716
2717 FIND(need_dwarf2, ".debug_abbrev", debug_abbv_escn)
2718 FIND(need_dwarf2, ".debug_str", debug_str_escn)
2719 FIND(need_dwarf2, ".debug_ranges", debug_ranges_escn)
2720
2721 FIND(need_dwarf2, ".debug_loc", debug_loc_escn)
2722 FIND(need_dwarf2, ".debug_frame", debug_frame_escn)
2723
2724 FIND(need_dwarf2, ".gnu_debugaltlink", debugaltlink_escn)
2725
2726 FIND(need_dwarf1, ".debug", dwarf1d_escn)
2727 FIND(need_dwarf1, ".line", dwarf1l_escn)
2728
2729 # undef FIND
2730 } /* Find all interesting sections */
2731 } /* do we have a debug image? */
2732
2733 /* TOPLEVEL */
2734 /* Look for alternate debug image, and if found, connect |aimg|
2735 to it. */
2736 vg_assert(aimg == NULL);
2737
2738 if (debugaltlink_escn.img != NULL) {
2739 HChar* altfile_str_m
2740 = ML_(img_strdup)(debugaltlink_escn.img,
2741 "di.fbi.3", debugaltlink_escn.ioff);
2742 UInt buildid_offset = ML_(img_strlen)(debugaltlink_escn.img,
2743 debugaltlink_escn.ioff)+1;
2744
2745 vg_assert(buildid_offset < debugaltlink_escn.szB);
2746
2747 HChar *altbuildid
2748 = ML_(dinfo_zalloc)("di.fbi.4",
2749 (debugaltlink_escn.szB - buildid_offset)
2750 * 2 + 1);
2751
2752 /* The altfile might be relative to the debug file or main file. */
2753 HChar *dbgname = di->fsm.dbgname ? di->fsm.dbgname : di->fsm.filename;
2754
2755 for (j = 0; j < debugaltlink_escn.szB - buildid_offset; j++)
2756 VG_(sprintf)(
2757 altbuildid + 2 * j, "%02x",
2758 (UInt)ML_(img_get_UChar)(debugaltlink_escn.img,
2759 debugaltlink_escn.ioff
2760 + buildid_offset + j));
2761
2762 /* See if we can find a matching debug file */
2763 aimg = find_debug_file( di, dbgname, altbuildid,
2764 altfile_str_m, 0, True );
2765
2766 if (altfile_str_m)
2767 ML_(dinfo_free)(altfile_str_m);
2768 ML_(dinfo_free)(altbuildid);
2769 }
2770
2771 /* TOPLEVEL */
2772 /* If we were successful in finding alternate debug image, pull various
2773 size and image addresses out of it. */
2774 if (aimg != NULL && is_elf_object_file_by_DiImage(aimg, True)) {
2775
2776 /* Pull out and validate program header and section header info */
2777 DiOffT ehdr_aioff = 0;
2778 ElfXX_Ehdr ehdr_aimg;
2779 ML_(img_get)(&ehdr_aimg, aimg, ehdr_aioff, sizeof(ehdr_aimg));
2780
2781 DiOffT shdr_aioff = ehdr_aimg.e_shoff;
2782 UWord shdr_anent = ehdr_aimg.e_shnum;
2783 UWord shdr_aent_szB = ehdr_aimg.e_shentsize;
2784
2785 DiOffT shdr_strtab_aioff = DiOffT_INVALID;
2786
2787 if (shdr_anent == 0
2788 || !ML_(img_valid)(aimg, shdr_aioff,
2789 shdr_anent * shdr_aent_szB)) {
2790 ML_(symerr)(di, True,
2791 "Missing or invalid ELF Section Header Table"
2792 " (alternate debuginfo file)");
2793 goto out;
2794 }
2795
2796 /* Also find the section header's string table, and validate. */
2797 /* checked previously by is_elf_object_file: */
2798 vg_assert(ehdr_aimg.e_shstrndx != SHN_UNDEF);
2799
2800 // shdr_aioff is the offset of the section header table
2801 // and we need the ehdr_aimg.e_shstrndx'th entry
2802 { ElfXX_Shdr a_shdr;
2803 ML_(img_get)(&a_shdr, aimg,
2804 INDEX_BIS(shdr_aioff, ehdr_aimg.e_shstrndx,
2805 shdr_aent_szB),
2806 sizeof(a_shdr));
2807 shdr_strtab_aioff = a_shdr.sh_offset;
2808 if (!ML_(img_valid)(aimg, shdr_strtab_aioff,
2809 1/*bogus, but we don't know the real size*/)) {
2810 ML_(symerr)(di, True,
2811 "Invalid ELF Section Header String Table"
2812 " (alternate debuginfo file)");
2813 goto out;
2814 }
2815 }
2816
2817 /* Find all interesting sections */
2818 for (i = 0; i < ehdr_aimg.e_shnum; i++) {
2819
2820 # define FIND(_sec_name, _sec_escn) \
2821 do { \
2822 ElfXX_Shdr a_shdr; \
2823 ML_(img_get)(&a_shdr, aimg, \
2824 INDEX_BIS(shdr_aioff, i, shdr_aent_szB), \
2825 sizeof(a_shdr)); \
2826 if (0 == ML_(img_strcmp_c)(aimg, shdr_strtab_aioff \
2827 + a_shdr.sh_name, _sec_name)) { \
2828 if (_sec_escn.img != NULL) { \
2829 ML_(symerr)(di, True, \
2830 " alternate debuginfo section duplicates a" \
2831 " section in the main ELF file"); \
2832 goto out; \
2833 } \
2834 _sec_escn.img = aimg; \
2835 _sec_escn.ioff = (DiOffT)a_shdr.sh_offset; \
2836 _sec_escn.szB = a_shdr.sh_size; \
2837 vg_assert(_sec_escn.img != NULL); \
2838 vg_assert(_sec_escn.ioff != DiOffT_INVALID); \
2839 TRACE_SYMTAB( "%-18s: aioff %llu .. %llu\n", \
2840 _sec_name, \
2841 (ULong)_sec_escn.ioff, \
2842 ((ULong)_sec_escn.ioff) + _sec_escn.szB - 1); \
2843 } \
2844 } while (0);
2845
2846 /* NAME ElfSec */
2847 FIND(".debug_line", debug_line_alt_escn)
2848 FIND(".debug_info", debug_info_alt_escn)
2849 FIND(".debug_abbrev", debug_abbv_alt_escn)
2850 FIND(".debug_str", debug_str_alt_escn)
2851
2852 # undef FIND
2853 } /* Find all interesting sections */
2854 } /* do we have a debug image? */
2855
2856
2857 /* TOPLEVEL */
2858 /* Check some sizes */
2859 vg_assert((dynsym_escn.szB % sizeof(ElfXX_Sym)) == 0);
2860 vg_assert((symtab_escn.szB % sizeof(ElfXX_Sym)) == 0);
2861 # if defined(VGO_solaris)
2862 vg_assert((ldynsym_escn.szB % sizeof(ElfXX_Sym)) == 0);
2863 # endif
2864
2865 /* TOPLEVEL */
2866 /* Read symbols */
2867 {
2868 void (*read_elf_symtab)(struct _DebugInfo*, const HChar*,
2869 DiSlice*, DiSlice*, DiSlice*, Bool);
2870 Bool symtab_in_debug;
2871 # if defined(VGP_ppc64be_linux)
2872 read_elf_symtab = read_elf_symtab__ppc64be_linux;
2873 # else
2874 read_elf_symtab = read_elf_symtab__normal;
2875 # endif
2876 symtab_in_debug = symtab_escn.img == dimg;
2877 read_elf_symtab(di, "symbol table",
2878 &symtab_escn, &strtab_escn, &opd_escn,
2879 symtab_in_debug);
2880 read_elf_symtab(di, "dynamic symbol table",
2881 &dynsym_escn, &dynstr_escn, &opd_escn,
2882 False);
2883 # if defined(VGO_solaris)
2884 read_elf_symtab(di, "local dynamic symbol table",
2885 &ldynsym_escn, &dynstr_escn, &opd_escn,
2886 False);
2887 # endif
2888 }
2889
2890 /* TOPLEVEL */
2891 /* Read .eh_frame and .debug_frame (call-frame-info) if any. Do
2892 the .eh_frame section(s) first. */
2893 vg_assert(di->n_ehframe >= 0 && di->n_ehframe <= N_EHFRAME_SECTS);
2894 for (i = 0; i < di->n_ehframe; i++) {
2895 /* see Comment_on_EH_FRAME_MULTIPLE_INSTANCES above for why
2896 this next assertion should hold. */
2897 vg_assert(ML_(sli_is_valid)(ehframe_escn[i]));
2898 vg_assert(ehframe_escn[i].szB == di->ehframe_size[i]);
2899 ML_(read_callframe_info_dwarf3)( di,
2900 ehframe_escn[i],
2901 di->ehframe_avma[i],
2902 True/*is_ehframe*/ );
2903 }
2904 if (ML_(sli_is_valid)(debug_frame_escn)) {
2905 ML_(read_callframe_info_dwarf3)( di,
2906 debug_frame_escn,
2907 0/*assume zero avma*/,
2908 False/*!is_ehframe*/ );
2909 }
2910
2911 /* TOPLEVEL */
2912 /* jrs 2006-01-01: icc-8.1 has been observed to generate
2913 binaries without debug_str sections. Don't preclude
2914 debuginfo reading for that reason, but, in
2915 read_unitinfo_dwarf2, do check that debugstr is non-NULL
2916 before using it. */
2917 if (ML_(sli_is_valid)(debug_info_escn)
2918 && ML_(sli_is_valid)(debug_abbv_escn)
2919 && ML_(sli_is_valid)(debug_line_escn)) {
2920 /* The old reader: line numbers and unwind info only */
2921 ML_(read_debuginfo_dwarf3) ( di,
2922 debug_info_escn,
2923 debug_types_escn,
2924 debug_abbv_escn,
2925 debug_line_escn,
2926 debug_str_escn,
2927 debug_str_alt_escn );
2928 /* The new reader: read the DIEs in .debug_info to acquire
2929 information on variable types and locations or inline info.
2930 But only if the tool asks for it, or the user requests it on
2931 the command line. */
2932 if (VG_(clo_read_var_info) /* the user or tool asked for it */
2933 || VG_(clo_read_inline_info)) {
2934 ML_(new_dwarf3_reader)(
2935 di, debug_info_escn, debug_types_escn,
2936 debug_abbv_escn, debug_line_escn,
2937 debug_str_escn, debug_ranges_escn,
2938 debug_loc_escn, debug_info_alt_escn,
2939 debug_abbv_alt_escn, debug_line_alt_escn,
2940 debug_str_alt_escn
2941 );
2942 }
2943 }
2944
2945 /* TOPLEVEL */
2946 // JRS 31 July 2014: dwarf-1 reading is currently broken and
2947 // therefore deactivated.
2948 //if (dwarf1d_img && dwarf1l_img) {
2949 // ML_(read_debuginfo_dwarf1) ( di, dwarf1d_img, dwarf1d_sz,
2950 // dwarf1l_img, dwarf1l_sz );
2951 //}
2952
2953 # if defined(VGA_arm)
2954 /* TOPLEVEL */
2955 /* ARM32 only: read .exidx/.extab if present. Note we are
2956 reading these directly out of the mapped in (running) image.
2957 Also, read these only if no CFI based unwind info was
2958 acquired for this file.
2959
2960 An .exidx section is always required, but the .extab section
2961 can be optionally omitted, provided that .exidx does not
2962 refer to it. If the .exidx is erroneous and does refer to
2963 .extab even though .extab is missing, the range checks done
2964 by GET_EX_U32 in ExtabEntryExtract in readexidx.c should
2965 prevent any invalid memory accesses, and cause the .extab to
2966 be rejected as invalid.
2967
2968 FIXME:
2969 * check with m_aspacemgr that the entire [exidx_avma, +exidx_size)
2970 and [extab_avma, +extab_size) areas are readable, since we're
2971 reading this stuff out of the running image (not from a file/socket)
2972 and we don't want to segfault.
2973 * DebugInfo::exidx_bias and use text_bias instead.
2974 I think it's always the same.
2975 * remove DebugInfo::{extab_bias, exidx_svma, extab_svma} since
2976 they are never used.
2977 */
2978 if (di->exidx_present
2979 && di->cfsi_used == 0
2980 && di->text_present && di->text_size > 0) {
2981 Addr text_last_svma = di->text_svma + di->text_size - 1;
2982 ML_(read_exidx)( di, (UChar*)di->exidx_avma, di->exidx_size,
2983 (UChar*)di->extab_avma, di->extab_size,
2984 text_last_svma,
2985 di->exidx_bias );
2986 }
2987 # endif /* defined(VGA_arm) */
2988
2989 } /* "Find interesting sections, read the symbol table(s), read any debug
2990 information" (a local scope) */
2991
2992 /* TOPLEVEL */
2993 res = True;
2994
2995 /* If reading Dwarf3 variable type/location info, print a line
2996 showing the number of variables read for each object.
2997 (Currently disabled -- is a sanity-check mechanism for
2998 exp-sgcheck.) */
2999 if (0 && VG_(clo_read_var_info)) {
3000 UWord nVars = 0;
3001 if (di->varinfo) {
3002 for (j = 0; j < VG_(sizeXA)(di->varinfo); j++) {
3003 OSet* /* of DiAddrRange */ scope
3004 = *(OSet**)VG_(indexXA)(di->varinfo, j);
3005 vg_assert(scope);
3006 VG_(OSetGen_ResetIter)( scope );
3007 while (True) {
3008 DiAddrRange* range = VG_(OSetGen_Next)( scope );
3009 if (!range) break;
3010 vg_assert(range->vars);
3011 Word w = VG_(sizeXA)(range->vars);
3012 vg_assert(w >= 0);
3013 if (0) VG_(printf)("range %#lx %#lx %ld\n",
3014 range->aMin, range->aMax, w);
3015 nVars += (UWord)w;
3016 }
3017 }
3018 }
3019 VG_(umsg)("VARINFO: %7lu vars %7lu text_size %s\n",
3020 nVars, di->text_size, di->fsm.filename);
3021 }
3022 /* TOPLEVEL */
3023
3024 out:
3025 {
3026 /* Last, but not least, detach from the image(s). */
3027 if (mimg) ML_(img_done)(mimg);
3028 if (dimg) ML_(img_done)(dimg);
3029 if (aimg) ML_(img_done)(aimg);
3030
3031 if (svma_ranges) VG_(deleteXA)(svma_ranges);
3032
3033 return res;
3034 } /* out: */
3035
3036 /* NOTREACHED */
3037 }
3038
3039 #endif // defined(VGO_linux) || defined(VGO_solaris)
3040
3041 /*--------------------------------------------------------------------*/
3042 /*--- end ---*/
3043 /*--------------------------------------------------------------------*/
3044