1
2 /*--------------------------------------------------------------------*/
3 /*--- Reading of syms & debug info from ELF .so/executable files. ---*/
4 /*--- readelf.c ---*/
5 /*--------------------------------------------------------------------*/
6
7 /*
8 This file is part of Valgrind, a dynamic binary instrumentation
9 framework.
10
11 Copyright (C) 2000-2017 Julian Seward
12 jseward@acm.org
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30 */
31
32 #if defined(VGO_linux) || defined(VGO_solaris)
33
34 #include "pub_core_basics.h"
35 #include "pub_core_vki.h"
36 #include "pub_core_debuginfo.h"
37 #include "pub_core_libcbase.h"
38 #include "pub_core_libcprint.h"
39 #include "pub_core_libcassert.h"
40 #include "pub_core_machine.h" /* VG_ELF_CLASS */
41 #include "pub_core_options.h"
42 #include "pub_core_oset.h"
43 #include "pub_core_tooliface.h" /* VG_(needs) */
44 #include "pub_core_xarray.h"
45 #include "priv_misc.h" /* dinfo_zalloc/free/strdup */
46 #include "priv_image.h"
47 #include "priv_d3basics.h"
48 #include "priv_tytypes.h"
49 #include "priv_storage.h"
50 #include "priv_readelf.h" /* self */
51 #include "priv_readdwarf.h" /* 'cos ELF contains DWARF */
52 #include "priv_readdwarf3.h"
53 #include "priv_readexidx.h"
54 #include "config.h"
55
56 /* --- !!! --- EXTERNAL HEADERS start --- !!! --- */
57 #include <elf.h>
58 #if defined(VGO_solaris)
59 #include <sys/link.h> /* ElfXX_Dyn, DT_* */
60 #endif
61 /* --- !!! --- EXTERNAL HEADERS end --- !!! --- */
62
63 #if !defined(HAVE_ELF32_CHDR)
64 typedef struct {
65 Elf32_Word ch_type;
66 Elf32_Word ch_size;
67 Elf32_Word ch_addralign;
68 } Elf32_Chdr;
69 #endif
70
71 #if !defined(HAVE_ELF64_CHDR)
72 typedef struct {
73 Elf64_Word ch_type;
74 Elf64_Word ch_reserved;
75 Elf64_Xword ch_size;
76 Elf64_Xword ch_addralign;
77 } Elf64_Chdr;
78 #endif
79
80 #if !defined(SHF_COMPRESSED)
81 #define SHF_COMPRESSED (1 << 11)
82 #endif
83
84 #if !defined(ELFCOMPRESS_ZLIB)
85 #define ELFCOMPRESS_ZLIB 1
86 #endif
87
88 #define SIZE_OF_ZLIB_HEADER 12
89
90 /*------------------------------------------------------------*/
91 /*--- 32/64-bit parameterisation ---*/
92 /*------------------------------------------------------------*/
93
94 /* For all the ELF macros and types which specify '32' or '64',
95 select the correct variant for this platform and give it
96 an 'XX' name. Then use the 'XX' variant consistently in
97 the rest of this file.
98 */
99 #if VG_WORDSIZE == 4
100 # define ElfXX_Ehdr Elf32_Ehdr
101 # define ElfXX_Shdr Elf32_Shdr
102 # define ElfXX_Phdr Elf32_Phdr
103 # define ElfXX_Nhdr Elf32_Nhdr
104 # define ElfXX_Sym Elf32_Sym
105 # define ElfXX_Off Elf32_Off
106 # define ElfXX_Word Elf32_Word
107 # define ElfXX_Addr Elf32_Addr
108 # define ElfXX_Dyn Elf32_Dyn
109 # define ELFXX_ST_BIND ELF32_ST_BIND
110 # define ELFXX_ST_TYPE ELF32_ST_TYPE
111 # define ElfXX_Chdr Elf32_Chdr
112
113 #elif VG_WORDSIZE == 8
114 # define ElfXX_Ehdr Elf64_Ehdr
115 # define ElfXX_Shdr Elf64_Shdr
116 # define ElfXX_Phdr Elf64_Phdr
117 # define ElfXX_Nhdr Elf64_Nhdr
118 # define ElfXX_Sym Elf64_Sym
119 # define ElfXX_Off Elf64_Off
120 # define ElfXX_Word Elf64_Word
121 # define ElfXX_Addr Elf64_Addr
122 # define ElfXX_Dyn Elf64_Dyn
123 # define ELFXX_ST_BIND ELF64_ST_BIND
124 # define ELFXX_ST_TYPE ELF64_ST_TYPE
125 # define ElfXX_Chdr Elf64_Chdr
126
127 #else
128 # error "VG_WORDSIZE should be 4 or 8"
129 #endif
130
131
132 /*------------------------------------------------------------*/
133 /*--- ---*/
134 /*--- Read symbol table and line info from ELF files. ---*/
135 /*--- ---*/
136 /*------------------------------------------------------------*/
137
138 /* readelf.c parses ELF files and acquires symbol table info from
139 them. It calls onwards to readdwarf.c to read DWARF2/3 line number
140 and call frame info found. */
141
142 /* Identify an ELF object file by peering at the first few bytes of
143 it. */
144
ML_(is_elf_object_file)145 Bool ML_(is_elf_object_file)( const void* image, SizeT n_image, Bool rel_ok )
146 {
147 const ElfXX_Ehdr* ehdr = image;
148 Int ok = 1;
149
150 if (n_image < sizeof(ElfXX_Ehdr))
151 return False;
152
153 ok &= (ehdr->e_ident[EI_MAG0] == 0x7F
154 && ehdr->e_ident[EI_MAG1] == 'E'
155 && ehdr->e_ident[EI_MAG2] == 'L'
156 && ehdr->e_ident[EI_MAG3] == 'F');
157 ok &= (ehdr->e_ident[EI_CLASS] == VG_ELF_CLASS
158 && ehdr->e_ident[EI_DATA] == VG_ELF_DATA2XXX
159 && ehdr->e_ident[EI_VERSION] == EV_CURRENT);
160 ok &= (ehdr->e_type == ET_EXEC || ehdr->e_type == ET_DYN
161 || (rel_ok && ehdr->e_type == ET_REL));
162 ok &= (ehdr->e_machine == VG_ELF_MACHINE);
163 ok &= (ehdr->e_version == EV_CURRENT);
164 ok &= (ehdr->e_shstrndx != SHN_UNDEF);
165 ok &= (ehdr->e_shoff != 0 && ehdr->e_shnum != 0);
166 ok &= ((ehdr->e_phoff != 0 && ehdr->e_phnum != 0)
167 || ehdr->e_type == ET_REL);
168
169 return ok ? True : False;
170 }
171
172
173 /* The same thing, but operating on a DiImage instead. */
174
is_elf_object_file_by_DiImage(DiImage * img,Bool rel_ok)175 static Bool is_elf_object_file_by_DiImage( DiImage* img, Bool rel_ok )
176 {
177 /* Be sure this doesn't make the frame too big. */
178 vg_assert(sizeof(ElfXX_Ehdr) <= 512);
179
180 ElfXX_Ehdr ehdr;
181 if (!ML_(img_valid)(img, 0, sizeof(ehdr)))
182 return False;
183
184 ML_(img_get)(&ehdr, img, 0, sizeof(ehdr));
185 return ML_(is_elf_object_file)( &ehdr, sizeof(ehdr), rel_ok );
186 }
187
188
189 /* Show a raw ELF symbol, given its in-image address and name. */
190
191 static
show_raw_elf_symbol(DiImage * strtab_img,Int i,const ElfXX_Sym * sym,DiOffT sym_name_ioff,Addr sym_svma,Bool ppc64_linux_format)192 void show_raw_elf_symbol ( DiImage* strtab_img,
193 Int i,
194 const ElfXX_Sym* sym,
195 DiOffT sym_name_ioff, Addr sym_svma,
196 Bool ppc64_linux_format )
197 {
198 const HChar* space = ppc64_linux_format ? " " : "";
199 VG_(printf)("raw symbol [%4d]: ", i);
200 switch (ELFXX_ST_BIND(sym->st_info)) {
201 case STB_LOCAL: VG_(printf)("LOC "); break;
202 case STB_GLOBAL: VG_(printf)("GLO "); break;
203 case STB_WEAK: VG_(printf)("WEA "); break;
204 case STB_LOPROC: VG_(printf)("lop "); break;
205 case STB_HIPROC: VG_(printf)("hip "); break;
206 default: VG_(printf)("??? "); break;
207 }
208 switch (ELFXX_ST_TYPE(sym->st_info)) {
209 case STT_NOTYPE: VG_(printf)("NOT "); break;
210 case STT_OBJECT: VG_(printf)("OBJ "); break;
211 case STT_FUNC: VG_(printf)("FUN "); break;
212 case STT_SECTION: VG_(printf)("SEC "); break;
213 case STT_FILE: VG_(printf)("FIL "); break;
214 case STT_LOPROC: VG_(printf)("lop "); break;
215 case STT_HIPROC: VG_(printf)("hip "); break;
216 default: VG_(printf)("??? "); break;
217 }
218 HChar* sym_name = NULL;
219 if (sym->st_name)
220 sym_name = ML_(img_strdup)(strtab_img, "di.sres.1", sym_name_ioff);
221 VG_(printf)(": svma %#010lx, %ssz %4llu %s\n",
222 sym_svma, space, (ULong)(sym->st_size + 0UL),
223 (sym_name ? sym_name : "NONAME") );
224 if (sym_name)
225 ML_(dinfo_free)(sym_name);
226 }
227
228
229 /* Decide whether SYM is something we should collect, and if so, copy
230 relevant info to the _OUT arguments. For {x86,amd64,ppc32}-linux
231 this is straightforward - the name, address, size are copied out
232 unchanged.
233
234 There is a bit of a kludge re data symbols (see KLUDGED BSS CHECK
235 below): we assume that the .bss is mapped immediately after .data,
236 and so accept any data symbol which exists in the range [start of
237 .data, size of .data + size of .bss). I don't know if this is
238 really correct/justifiable, or not.
239
240 For ppc64be-linux it's more complex. If the symbol is seen to be in
241 the .opd section, it is taken to be a function descriptor, and so
242 a dereference is attempted, in order to get hold of the real entry
243 point address. Also as part of the dereference, there is an attempt
244 to calculate the TOC pointer (R2 value) associated with the symbol.
245
246 To support the ppc64be-linux pre-"dotless" ABI (prior to gcc 4.0.0),
247 if the symbol is seen to be outside the .opd section and its name
248 starts with a dot, an .opd deference is not attempted, and no TOC
249 pointer is calculated, but the leading dot is removed from the
250 name.
251
252 As a result, on ppc64be-linux, the caller of this function may have
253 to piece together the real size, address, name of the symbol from
254 multiple calls to this function. Ugly and confusing.
255 */
256 static
get_elf_symbol_info(struct _DebugInfo * di,const ElfXX_Sym * sym,DiOffT sym_name_ioff,const DiSlice * escn_strtab,Addr sym_svma,Bool symtab_in_debug,const DiSlice * escn_opd,PtrdiffT opd_bias,DiOffT * sym_name_out_ioff,SymAVMAs * sym_avmas_out,Int * sym_size_out,Bool * from_opd_out,Bool * is_text_out,Bool * is_ifunc_out,Bool * is_global_out)257 Bool get_elf_symbol_info (
258 /* INPUTS */
259 struct _DebugInfo* di, /* containing DebugInfo */
260 const ElfXX_Sym* sym, /* ELF symbol */
261 DiOffT sym_name_ioff, /* name, may be absent (DiOffT_INVALID) */
262 const DiSlice* escn_strtab, /* holds the name */
263 Addr sym_svma, /* address as stated in the object file */
264 Bool symtab_in_debug, /* symbol table is in the debug file */
265 const DiSlice* escn_opd, /* the .opd (ppc64be-linux only) */
266 PtrdiffT opd_bias, /* for biasing AVMAs found in .opd */
267 /* OUTPUTS */
268 DiOffT* sym_name_out_ioff, /* name (in strtab) we should record */
269 SymAVMAs* sym_avmas_out, /* sym avmas we should record */
270 Int* sym_size_out, /* symbol size */
271 Bool* from_opd_out, /* ppc64be-linux only: did we deref an
272 .opd entry? */
273 Bool* is_text_out, /* is this a text symbol? */
274 Bool* is_ifunc_out, /* is this a STT_GNU_IFUNC function ?*/
275 Bool* is_global_out /* is this a global symbol ?*/
276 )
277 {
278 Bool plausible;
279 # if defined(VGP_ppc64be_linux)
280 Bool is_in_opd;
281 # endif
282 Bool in_text, in_data, in_sdata, in_rodata, in_bss, in_sbss;
283 Addr text_svma, data_svma, sdata_svma, rodata_svma, bss_svma, sbss_svma;
284 PtrdiffT text_bias, data_bias, sdata_bias, rodata_bias, bss_bias, sbss_bias;
285
286 /* Set defaults */
287 *sym_name_out_ioff = sym_name_ioff;
288 (*sym_avmas_out).main = sym_svma; /* we will bias this shortly */
289 *is_text_out = True;
290 SET_TOCPTR_AVMA(*sym_avmas_out, 0); /* default to unknown/inapplicable */
291 SET_LOCAL_EP_AVMA(*sym_avmas_out, 0); /* default to unknown/inapplicable */
292 *from_opd_out = False;
293 *is_ifunc_out = False;
294 *is_global_out = False;
295
296 /* Get the symbol size, but restrict it to fit in a signed 32 bit
297 int. Also, deal with the stupid case of negative size by making
298 the size be 1. Note that sym->st_size has type UWord,
299 effectively. */
300 { Word size_tmp = (Word)sym->st_size;
301 Word max_Int = (1LL << 31) - 1;
302 if (size_tmp < 0) size_tmp = 1;
303 if (size_tmp > max_Int) size_tmp = max_Int;
304 *sym_size_out = (Int)size_tmp;
305 }
306 /* After this point refer only to *sym_size_out and not to
307 sym->st_size. */
308
309 /* Figure out if we're interested in the symbol. Firstly, is it of
310 the right flavour? */
311 plausible
312 = (ELFXX_ST_BIND(sym->st_info) == STB_GLOBAL
313 || ELFXX_ST_BIND(sym->st_info) == STB_LOCAL
314 || ELFXX_ST_BIND(sym->st_info) == STB_WEAK
315 )
316 &&
317 (ELFXX_ST_TYPE(sym->st_info) == STT_FUNC
318 || ELFXX_ST_TYPE(sym->st_info) == STT_OBJECT
319 # ifdef STT_GNU_IFUNC
320 || ELFXX_ST_TYPE(sym->st_info) == STT_GNU_IFUNC
321 # endif
322 );
323
324 /* Work out the svma and bias for each section as it will appear in
325 addresses in the symbol table. */
326 if (symtab_in_debug) {
327 text_svma = di->text_debug_svma;
328 text_bias = di->text_debug_bias;
329 data_svma = di->data_debug_svma;
330 data_bias = di->data_debug_bias;
331 sdata_svma = di->sdata_debug_svma;
332 sdata_bias = di->sdata_debug_bias;
333 rodata_svma = di->rodata_debug_svma;
334 rodata_bias = di->rodata_debug_bias;
335 bss_svma = di->bss_debug_svma;
336 bss_bias = di->bss_debug_bias;
337 sbss_svma = di->sbss_debug_svma;
338 sbss_bias = di->sbss_debug_bias;
339 } else {
340 text_svma = di->text_svma;
341 text_bias = di->text_bias;
342 data_svma = di->data_svma;
343 data_bias = di->data_bias;
344 sdata_svma = di->sdata_svma;
345 sdata_bias = di->sdata_bias;
346 rodata_svma = di->rodata_svma;
347 rodata_bias = di->rodata_bias;
348 bss_svma = di->bss_svma;
349 bss_bias = di->bss_bias;
350 sbss_svma = di->sbss_svma;
351 sbss_bias = di->sbss_bias;
352 }
353
354 /* Now bias (*sym_avmas_out).main accordingly by figuring out exactly which
355 section the symbol is from and bias accordingly. Screws up if
356 the previously deduced section svma address ranges are wrong. */
357 if (di->text_present
358 && di->text_size > 0
359 && sym_svma >= text_svma
360 && sym_svma < text_svma + di->text_size) {
361 *is_text_out = True;
362 (*sym_avmas_out).main += text_bias;
363 } else
364 if (di->data_present
365 && di->data_size > 0
366 && sym_svma >= data_svma
367 && sym_svma < data_svma + di->data_size) {
368 *is_text_out = False;
369 (*sym_avmas_out).main += data_bias;
370 } else
371 if (di->sdata_present
372 && di->sdata_size > 0
373 && sym_svma >= sdata_svma
374 && sym_svma < sdata_svma + di->sdata_size) {
375 *is_text_out = False;
376 (*sym_avmas_out).main += sdata_bias;
377 } else
378 if (di->rodata_present
379 && di->rodata_size > 0
380 && sym_svma >= rodata_svma
381 && sym_svma < rodata_svma + di->rodata_size) {
382 *is_text_out = False;
383 (*sym_avmas_out).main += rodata_bias;
384 } else
385 if (di->bss_present
386 && di->bss_size > 0
387 && sym_svma >= bss_svma
388 && sym_svma < bss_svma + di->bss_size) {
389 *is_text_out = False;
390 (*sym_avmas_out).main += bss_bias;
391 } else
392 if (di->sbss_present
393 && di->sbss_size > 0
394 && sym_svma >= sbss_svma
395 && sym_svma < sbss_svma + di->sbss_size) {
396 *is_text_out = False;
397 (*sym_avmas_out).main += sbss_bias;
398 } else {
399 /* Assume it's in .text. Is this a good idea? */
400 *is_text_out = True;
401 (*sym_avmas_out).main += text_bias;
402 }
403
404 # ifdef STT_GNU_IFUNC
405 /* Check for indirect functions. */
406 if (*is_text_out
407 && ELFXX_ST_TYPE(sym->st_info) == STT_GNU_IFUNC) {
408 *is_ifunc_out = True;
409 }
410 # endif
411
412 if (ELFXX_ST_BIND(sym->st_info) == STB_GLOBAL) {
413 *is_global_out = True;
414 }
415
416 # if defined(VGP_ppc64be_linux)
417 /* Allow STT_NOTYPE in the very special case where we're running on
418 ppc64be-linux and the symbol is one which the .opd-chasing hack
419 below will chase. */
420 if (!plausible
421 && *is_text_out
422 && ELFXX_ST_TYPE(sym->st_info) == STT_NOTYPE
423 && *sym_size_out > 0
424 && di->opd_present
425 && di->opd_size > 0
426 && (*sym_avmas_out).main >= di->opd_avma
427 && (*sym_avmas_out).main < di->opd_avma + di->opd_size)
428 plausible = True;
429 # endif
430
431 if (!plausible)
432 return False;
433
434 /* Ignore if nameless. */
435 if (sym_name_ioff == DiOffT_INVALID
436 || /* VG_(strlen)(sym_name) == 0 */
437 /* equivalent but cheaper ... */
438 ML_(img_get_UChar)(escn_strtab->img, sym_name_ioff) == '\0') {
439 if (TRACE_SYMTAB_ENABLED) {
440 HChar* sym_name = ML_(img_strdup)(escn_strtab->img,
441 "di.gesi.1", sym_name_ioff);
442 TRACE_SYMTAB(" ignore -- nameless: %s\n", sym_name);
443 if (sym_name) ML_(dinfo_free)(sym_name);
444 }
445 return False;
446 }
447
448 /* Ignore if zero-sized. Except on Android:
449
450 On Android 2.3.5, some of the symbols that Memcheck needs to
451 intercept (for noise reduction purposes) have zero size, due to
452 lack of .size directives in handwritten assembly sources. So we
453 can't reject them out of hand -- instead give them a bogusly
454 large size and let canonicaliseSymtab trim them so they don't
455 overlap any following symbols. At least the following symbols
456 are known to be affected:
457
458 in /system/lib/libc.so: strlen strcmp strcpy memcmp memcpy
459 in /system/bin/linker: __dl_strcmp __dl_strlen
460 */
461 if (*sym_size_out == 0) {
462 # if defined(VGPV_arm_linux_android) \
463 || defined(VGPV_x86_linux_android) \
464 || defined(VGPV_mips32_linux_android) \
465 || defined(VGPV_arm64_linux_android)
466 *sym_size_out = 2048;
467 # else
468 if (TRACE_SYMTAB_ENABLED) {
469 HChar* sym_name = ML_(img_strdup)(escn_strtab->img,
470 "di.gesi.2", sym_name_ioff);
471 TRACE_SYMTAB(" ignore -- size=0: %s\n", sym_name);
472 if (sym_name) ML_(dinfo_free)(sym_name);
473 }
474 return False;
475 # endif
476 }
477
478 /* This seems to significantly reduce the number of junk
479 symbols, and particularly reduces the number of
480 overlapping address ranges. Don't ask me why ... */
481 if ((Int)sym->st_value == 0) {
482 if (TRACE_SYMTAB_ENABLED) {
483 HChar* sym_name = ML_(img_strdup)(escn_strtab->img,
484 "di.gesi.3", sym_name_ioff);
485 TRACE_SYMTAB( " ignore -- valu=0: %s\n", sym_name);
486 if (sym_name) ML_(dinfo_free)(sym_name);
487 }
488 return False;
489 }
490
491 /* If it's apparently in a GOT or PLT, it's really a reference to a
492 symbol defined elsewhere, so ignore it. */
493 if (di->got_present
494 && di->got_size > 0
495 && (*sym_avmas_out).main >= di->got_avma
496 && (*sym_avmas_out).main < di->got_avma + di->got_size) {
497 if (TRACE_SYMTAB_ENABLED) {
498 HChar* sym_name = ML_(img_strdup)(escn_strtab->img,
499 "di.gesi.4", sym_name_ioff);
500 TRACE_SYMTAB(" ignore -- in GOT: %s\n", sym_name);
501 if (sym_name) ML_(dinfo_free)(sym_name);
502 }
503 return False;
504 }
505 if (di->plt_present
506 && di->plt_size > 0
507 && (*sym_avmas_out).main >= di->plt_avma
508 && (*sym_avmas_out).main < di->plt_avma + di->plt_size) {
509 if (TRACE_SYMTAB_ENABLED) {
510 HChar* sym_name = ML_(img_strdup)(escn_strtab->img,
511 "di.gesi.5", sym_name_ioff);
512 TRACE_SYMTAB(" ignore -- in PLT: %s\n", sym_name);
513 if (sym_name) ML_(dinfo_free)(sym_name);
514 }
515 return False;
516 }
517
518 /* ppc64be-linux nasty hack: if the symbol is in an .opd section,
519 then really what we have is the address of a function
520 descriptor. So use the first word of that as the function's
521 text.
522
523 See thread starting at
524 http://gcc.gnu.org/ml/gcc-patches/2004-08/msg00557.html
525 */
526 # if defined(VGP_ppc64be_linux)
527 /* Host and guest may have different Endianness, used by BE only */
528 is_in_opd = False;
529 # endif
530
531 if (di->opd_present
532 && di->opd_size > 0
533 && (*sym_avmas_out).main >= di->opd_avma
534 && (*sym_avmas_out).main < di->opd_avma + di->opd_size) {
535 # if !defined(VGP_ppc64be_linux)
536 if (TRACE_SYMTAB_ENABLED) {
537 HChar* sym_name = ML_(img_strdup)(escn_strtab->img,
538 "di.gesi.6", sym_name_ioff);
539 TRACE_SYMTAB(" ignore -- in OPD: %s\n", sym_name);
540 if (sym_name) ML_(dinfo_free)(sym_name);
541 }
542 return False;
543 # else
544 Int offset_in_opd;
545 Bool details = 1||False;
546
547 if (details)
548 TRACE_SYMTAB("opdXXX: opd_bias %p, sym_svma_out %p\n",
549 (void*)(opd_bias), (void*)(*sym_avmas_out).main);
550
551 if (!VG_IS_8_ALIGNED((*sym_avmas_out).main)) {
552 if (TRACE_SYMTAB_ENABLED) {
553 HChar* sym_name = ML_(img_strdup)(escn_strtab->img,
554 "di.gesi.6a", sym_name_ioff);
555 TRACE_SYMTAB(" ignore -- not 8-aligned: %s\n", sym_name);
556 if (sym_name) ML_(dinfo_free)(sym_name);
557 }
558 return False;
559 }
560
561 /* (*sym_avmas_out).main is a avma pointing into the .opd section. We
562 know the vma of the opd section start, so we can figure out
563 how far into the opd section this is. */
564
565 offset_in_opd = (Addr)(*sym_avmas_out).main - (Addr)(di->opd_avma);
566 if (offset_in_opd < 0 || offset_in_opd >= di->opd_size) {
567 if (TRACE_SYMTAB_ENABLED) {
568 HChar* sym_name = ML_(img_strdup)(escn_strtab->img,
569 "di.gesi.6a", sym_name_ioff);
570 TRACE_SYMTAB(" ignore -- invalid OPD offset: %s\n", sym_name);
571 if (sym_name) ML_(dinfo_free)(sym_name);
572 }
573 return False;
574 }
575
576 /* Now we want to know what's at that offset in the .opd
577 section. We can't look in the running image since it won't
578 necessarily have been mapped. But we can consult the oimage.
579 opd_img is the start address of the .opd in the oimage.
580 Hence: */
581
582 ULong fn_descr[2]; /* is actually 3 words, but we need only 2 */
583 if (!ML_(img_valid)(escn_opd->img, escn_opd->ioff + offset_in_opd,
584 sizeof(fn_descr))) {
585 if (TRACE_SYMTAB_ENABLED) {
586 HChar* sym_name = ML_(img_strdup)(escn_strtab->img,
587 "di.gesi.6b", sym_name_ioff);
588 TRACE_SYMTAB(" ignore -- invalid OPD fn_descr offset: %s\n",
589 sym_name);
590 if (sym_name) ML_(dinfo_free)(sym_name);
591
592 }
593 return False;
594 }
595
596 /* This can't fail now, because we just checked the offset
597 above. */
598 ML_(img_get)(&fn_descr[0], escn_opd->img,
599 escn_opd->ioff + offset_in_opd, sizeof(fn_descr));
600
601 if (details)
602 TRACE_SYMTAB("opdXXY: offset %d, fn_descr %p\n",
603 offset_in_opd, fn_descr);
604 if (details)
605 TRACE_SYMTAB("opdXXZ: *fn_descr %p\n", (void*)(fn_descr[0]));
606
607 /* opd_bias is the what we have to add to SVMAs found in .opd to
608 get plausible .text AVMAs for the entry point, and .data
609 AVMAs (presumably) for the TOC locations. We use the caller
610 supplied value (which is di->text_bias) for both of these.
611 Not sure why that is correct - it seems to work, and sounds
612 OK for fn_descr[0], but surely we need to use the data bias
613 and not the text bias for fn_descr[1] ? Oh Well.
614 */
615 (*sym_avmas_out).main = fn_descr[0] + opd_bias;
616 SET_TOCPTR_AVMA(*sym_avmas_out, fn_descr[1] + opd_bias);
617 *from_opd_out = True;
618 is_in_opd = True;
619
620 /* Do a final sanity check: if the symbol falls outside the
621 DebugInfo's mapped range, ignore it. Since (*sym_avmas_out).main has
622 been updated, that can be achieved simply by falling through
623 to the test below. */
624
625 # endif /* ppc64-linux nasty hack */
626 }
627
628 /* Here's yet another ppc64-linux hack. Get rid of leading dot if
629 the symbol is outside .opd. */
630 # if defined(VGP_ppc64be_linux)
631 if (di->opd_size > 0
632 && !is_in_opd
633 && *sym_name_out_ioff != DiOffT_INVALID
634 && ML_(img_get_UChar)(escn_strtab->img, *sym_name_out_ioff) == '.') {
635 vg_assert(!(*from_opd_out));
636 (*sym_name_out_ioff)++;
637 }
638 # endif
639
640 /* If no part of the symbol falls within the mapped range,
641 ignore it. */
642
643 in_text
644 = di->text_present
645 && di->text_size > 0
646 && !((*sym_avmas_out).main + *sym_size_out <= di->text_avma
647 || (*sym_avmas_out).main >= di->text_avma + di->text_size);
648
649 in_data
650 = di->data_present
651 && di->data_size > 0
652 && !((*sym_avmas_out).main + *sym_size_out <= di->data_avma
653 || (*sym_avmas_out).main >= di->data_avma + di->data_size);
654
655 in_sdata
656 = di->sdata_present
657 && di->sdata_size > 0
658 && !((*sym_avmas_out).main + *sym_size_out <= di->sdata_avma
659 || (*sym_avmas_out).main >= di->sdata_avma + di->sdata_size);
660
661 in_rodata
662 = di->rodata_present
663 && di->rodata_size > 0
664 && !((*sym_avmas_out).main + *sym_size_out <= di->rodata_avma
665 || (*sym_avmas_out).main >= di->rodata_avma + di->rodata_size);
666
667 in_bss
668 = di->bss_present
669 && di->bss_size > 0
670 && !((*sym_avmas_out).main + *sym_size_out <= di->bss_avma
671 || (*sym_avmas_out).main >= di->bss_avma + di->bss_size);
672
673 in_sbss
674 = di->sbss_present
675 && di->sbss_size > 0
676 && !((*sym_avmas_out).main + *sym_size_out <= di->sbss_avma
677 || (*sym_avmas_out).main >= di->sbss_avma + di->sbss_size);
678
679
680 if (*is_text_out) {
681 /* This used to reject any symbol falling outside the text
682 segment ("if (!in_text) ..."). Now it is relaxed slightly,
683 to reject only symbols which fall outside the area mapped
684 r-x. This is in accordance with r7427. See
685 "Comment_Regarding_Text_Range_Checks" in storage.c for
686 background. */
687 Bool in_rx;
688 vg_assert(di->fsm.have_rx_map);
689 /* This could actually wrap around and cause
690 ML_(find_rx_mapping) to assert. But that seems so unlikely,
691 let's wait for it to happen before fixing it. */
692 in_rx = (ML_(find_rx_mapping)(
693 di,
694 (*sym_avmas_out).main,
695 (*sym_avmas_out).main + *sym_size_out - 1) != NULL);
696 if (in_text)
697 vg_assert(in_rx);
698 if (!in_rx) {
699 TRACE_SYMTAB(
700 "ignore -- %#lx .. %#lx outside .text svma range %#lx .. %#lx\n",
701 (*sym_avmas_out).main, (*sym_avmas_out).main + *sym_size_out - 1,
702 di->text_avma,
703 di->text_avma + di->text_size - 1);
704 return False;
705 }
706 } else {
707 if (!(in_data || in_sdata || in_rodata || in_bss || in_sbss)) {
708 TRACE_SYMTAB(
709 "ignore -- %#lx .. %#lx outside .data / .sdata / .rodata "
710 "/ .bss / .sbss svma ranges\n",
711 (*sym_avmas_out).main, (*sym_avmas_out).main + *sym_size_out - 1);
712 return False;
713 }
714 }
715
716 # if defined(VGP_ppc64be_linux)
717 if (di->opd_present && di->opd_size > 0) {
718 vg_assert((*sym_avmas_out).main + *sym_size_out <= di->opd_avma
719 || (*sym_avmas_out).main >= di->opd_avma + di->opd_size);
720 }
721 #endif
722
723 # if defined(VGP_ppc64le_linux)
724 /* PPC64 LE ABI uses three bits in the st_other field to indicate the number
725 * of instructions between the function's global and local entry points. An
726 * offset of 0 indicates that there is one entry point. The value must be:
727 *
728 * 0 - one entry point, local and global are the same
729 * 1 - reserved
730 * 2 - local entry point is one instruction after the global entry point
731 * 3 - local entry point is two instructions after the global entry point
732 * 4 - local entry point is four instructions after the global entry point
733 * 5 - local entry point is eight instructions after the global entry point
734 * 6 - local entry point is sixteen instructions after the global entry point
735 * 7 - reserved
736 *
737 * Extract the three bit field from the other field is done by:
738 * (other_field & STO_PPC64_LOCAL_MASK) >> STO_PPC_LOCAL_BIT
739 *
740 * where the #define values are given in include/elf/powerpc.h file for
741 * the PPC binutils.
742 *
743 * conversion of the three bit field to bytes is given by
744 *
745 * ((1 << bit_field) >> 2) << 2
746 */
747
748 #define STO_PPC64_LOCAL_BIT 5
749 #define STO_PPC64_LOCAL_MASK (7 << STO_PPC64_LOCAL_BIT)
750 {
751 unsigned int bit_field, dist_to_local_entry;
752 /* extract the other filed */
753 bit_field = (sym->st_other & STO_PPC64_LOCAL_MASK) >> STO_PPC64_LOCAL_BIT;
754
755 if ((bit_field > 0) && (bit_field < 7)) {
756 /* store the local entry point address */
757 dist_to_local_entry = ((1 << bit_field) >> 2) << 2;
758 SET_LOCAL_EP_AVMA(*sym_avmas_out,
759 (*sym_avmas_out).main + dist_to_local_entry);
760
761 if (TRACE_SYMTAB_ENABLED) {
762 HChar* sym_name = ML_(img_strdup)(escn_strtab->img,
763 "di.gesi.5", sym_name_ioff);
764 VG_(printf)("Local entry point: %s at %#010x\n",
765 sym_name,
766 (unsigned int)GET_LOCAL_EP_AVMA(*sym_avmas_out));
767 }
768 }
769 }
770 # endif
771
772 /* Acquire! */
773 return True;
774 }
775
776
777 /* Read an ELF symbol table (normal or dynamic). This one is for the
778 "normal" case ({x86,amd64,ppc32,arm,mips32,mips64, ppc64le}-linux). */
779 static
780 __attribute__((unused)) /* not referred to on all targets */
read_elf_symtab__normal(struct _DebugInfo * di,const HChar * tab_name,DiSlice * escn_symtab,DiSlice * escn_strtab,DiSlice * escn_opd,Bool symtab_in_debug)781 void read_elf_symtab__normal(
782 struct _DebugInfo* di, const HChar* tab_name,
783 DiSlice* escn_symtab,
784 DiSlice* escn_strtab,
785 DiSlice* escn_opd, /* ppc64be-linux only */
786 Bool symtab_in_debug
787 )
788 {
789 if (escn_strtab->img == NULL || escn_symtab->img == NULL) {
790 HChar buf[VG_(strlen)(tab_name) + 40];
791 VG_(sprintf)(buf, " object doesn't have a %s", tab_name);
792 ML_(symerr)(di, False, buf);
793 return;
794 }
795
796 TRACE_SYMTAB("\n--- Reading (ELF, standard) %s (%llu entries) ---\n",
797 tab_name, escn_symtab->szB/sizeof(ElfXX_Sym) );
798
799 /* Perhaps should start at i = 1; ELF docs suggest that entry
800 0 always denotes 'unknown symbol'. */
801 Word i;
802 for (i = 1; i < (Word)(escn_symtab->szB/sizeof(ElfXX_Sym)); i++) {
803 ElfXX_Sym sym;
804 ML_(img_get)(&sym, escn_symtab->img,
805 escn_symtab->ioff + i * sizeof(ElfXX_Sym), sizeof(sym));
806 DiOffT sym_name = escn_strtab->ioff + sym.st_name;
807 Addr sym_svma = sym.st_value;
808
809 if (di->trace_symtab)
810 show_raw_elf_symbol(escn_strtab->img, i,
811 &sym, sym_name, sym_svma, False);
812
813 SymAVMAs sym_avmas_really;
814 Int sym_size = 0;
815 Bool from_opd = False, is_text = False, is_ifunc = False;
816 Bool is_global = False;
817 DiOffT sym_name_really = DiOffT_INVALID;
818 sym_avmas_really.main = 0;
819 SET_TOCPTR_AVMA(sym_avmas_really, 0);
820 SET_LOCAL_EP_AVMA(sym_avmas_really, 0);
821 if (get_elf_symbol_info(di, &sym, sym_name, escn_strtab,
822 sym_svma, symtab_in_debug,
823 escn_opd, di->text_bias,
824 &sym_name_really,
825 &sym_avmas_really,
826 &sym_size,
827 &from_opd, &is_text, &is_ifunc, &is_global)) {
828
829 DiSym disym;
830 VG_(memset)(&disym, 0, sizeof(disym));
831 HChar* cstr = ML_(img_strdup)(escn_strtab->img,
832 "di.res__n.1", sym_name_really);
833 disym.avmas = sym_avmas_really;
834 disym.pri_name = ML_(addStr) ( di, cstr, -1 );
835 disym.sec_names = NULL;
836 disym.size = sym_size;
837 disym.isText = is_text;
838 disym.isIFunc = is_ifunc;
839 disym.isGlobal = is_global;
840 if (cstr) { ML_(dinfo_free)(cstr); cstr = NULL; }
841 vg_assert(disym.pri_name);
842 vg_assert(GET_TOCPTR_AVMA(disym.avmas) == 0);
843 /* has no role except on ppc64be-linux */
844 ML_(addSym) ( di, &disym );
845
846 if (TRACE_SYMTAB_ENABLED) {
847 TRACE_SYMTAB(" rec(%c) [%4ld]: "
848 " val %#010lx, sz %4d %s\n",
849 is_text ? 't' : 'd',
850 i,
851 disym.avmas.main,
852 (Int)disym.size,
853 disym.pri_name
854 );
855 if (GET_LOCAL_EP_AVMA(disym.avmas) != 0) {
856 TRACE_SYMTAB(" local entry point %#010lx\n",
857 GET_LOCAL_EP_AVMA(disym.avmas));
858 }
859 }
860
861 }
862 }
863 }
864
865
866 /* Read an ELF symbol table (normal or dynamic). This one is for
867 ppc64be-linux, which requires special treatment. */
868
869 typedef
870 struct {
871 Addr addr;
872 DiOffT name;
873 /* We have to store also the DiImage* so as to give context for
874 |name|. This is not part of the key (in terms of lookup) but
875 there's no easy other way to do this. Ugly. */
876 DiImage* img;
877 }
878 TempSymKey;
879
880 typedef
881 struct {
882 TempSymKey key;
883 Addr tocptr;
884 Int size;
885 Bool from_opd;
886 Bool is_text;
887 Bool is_ifunc;
888 Bool is_global;
889 }
890 TempSym;
891
cmp_TempSymKey(const TempSymKey * key1,const TempSym * elem2)892 static Word cmp_TempSymKey ( const TempSymKey* key1, const TempSym* elem2 )
893 {
894 /* Stay sane ... */
895 vg_assert(key1->img == elem2->key.img);
896 vg_assert(key1->img != NULL);
897 if (key1->addr < elem2->key.addr) return -1;
898 if (key1->addr > elem2->key.addr) return 1;
899 vg_assert(key1->name != DiOffT_INVALID);
900 vg_assert(elem2->key.name != DiOffT_INVALID);
901 return (Word)ML_(img_strcmp)(key1->img, key1->name, elem2->key.name);
902 }
903
904 static
905 __attribute__((unused)) /* not referred to on all targets */
read_elf_symtab__ppc64be_linux(struct _DebugInfo * di,const HChar * tab_name,DiSlice * escn_symtab,DiSlice * escn_strtab,DiSlice * escn_opd,Bool symtab_in_debug)906 void read_elf_symtab__ppc64be_linux(
907 struct _DebugInfo* di, const HChar* tab_name,
908 DiSlice* escn_symtab,
909 DiSlice* escn_strtab,
910 DiSlice* escn_opd, /* ppc64be-linux only */
911 Bool symtab_in_debug
912 )
913 {
914 Word i;
915 Int old_size;
916 Bool modify_size, modify_tocptr;
917 OSet *oset;
918 TempSymKey key;
919 TempSym *elem;
920 TempSym *prev;
921
922 if (escn_strtab->img == NULL || escn_symtab->img == NULL) {
923 HChar buf[VG_(strlen)(tab_name) + 40];
924 VG_(sprintf)(buf, " object doesn't have a %s", tab_name);
925 ML_(symerr)(di, False, buf);
926 return;
927 }
928
929 TRACE_SYMTAB("\n--- Reading (ELF, ppc64be-linux) %s (%llu entries) ---\n",
930 tab_name, escn_symtab->szB/sizeof(ElfXX_Sym) );
931
932 oset = VG_(OSetGen_Create)( offsetof(TempSym,key),
933 (OSetCmp_t)cmp_TempSymKey,
934 ML_(dinfo_zalloc), "di.respl.1",
935 ML_(dinfo_free) );
936
937 /* Perhaps should start at i = 1; ELF docs suggest that entry
938 0 always denotes 'unknown symbol'. */
939 for (i = 1; i < (Word)(escn_symtab->szB/sizeof(ElfXX_Sym)); i++) {
940 ElfXX_Sym sym;
941 ML_(img_get)(&sym, escn_symtab->img,
942 escn_symtab->ioff + i * sizeof(ElfXX_Sym), sizeof(sym));
943 DiOffT sym_name = escn_strtab->ioff + sym.st_name;
944 Addr sym_svma = sym.st_value;
945
946 if (di->trace_symtab)
947 show_raw_elf_symbol(escn_strtab->img, i,
948 &sym, sym_name, sym_svma, True);
949
950 SymAVMAs sym_avmas_really;
951 Int sym_size = 0;
952 Bool from_opd = False, is_text = False, is_ifunc = False;
953 Bool is_global = False;
954 DiOffT sym_name_really = DiOffT_INVALID;
955 DiSym disym;
956 VG_(memset)(&disym, 0, sizeof(disym));
957 sym_avmas_really.main = 0;
958 SET_TOCPTR_AVMA(sym_avmas_really, 0);
959 SET_LOCAL_EP_AVMA(sym_avmas_really, 0);
960 if (get_elf_symbol_info(di, &sym, sym_name, escn_strtab,
961 sym_svma, symtab_in_debug,
962 escn_opd, di->text_bias,
963 &sym_name_really,
964 &sym_avmas_really,
965 &sym_size,
966 &from_opd, &is_text, &is_ifunc, &is_global)) {
967
968 /* Check if we've seen this (name,addr) key before. */
969 key.addr = sym_avmas_really.main;
970 key.name = sym_name_really;
971 key.img = escn_strtab->img;
972 prev = VG_(OSetGen_Lookup)( oset, &key );
973
974 if (prev) {
975
976 /* Seen it before. Fold in whatever new info we can. */
977 modify_size = False;
978 modify_tocptr = False;
979 old_size = 0;
980
981 if (prev->from_opd && !from_opd
982 && (prev->size == 24 || prev->size == 16)
983 && sym_size != prev->size) {
984 /* Existing one is an opd-redirect, with a bogus size,
985 so the only useful new fact we have is the real size
986 of the symbol. */
987 modify_size = True;
988 old_size = prev->size;
989 prev->size = sym_size;
990 }
991 else
992 if (!prev->from_opd && from_opd
993 && (sym_size == 24 || sym_size == 16)) {
994 /* Existing one is non-opd, new one is opd. What we
995 can acquire from the new one is the TOC ptr to be
996 used. Since the existing sym is non-toc, it
997 shouldn't currently have an known TOC ptr. */
998 vg_assert(prev->tocptr == 0);
999 modify_tocptr = True;
1000 prev->tocptr = GET_TOCPTR_AVMA(sym_avmas_really);
1001 }
1002 else {
1003 /* ignore. can we do better here? */
1004 }
1005
1006 /* Only one or the other is possible (I think) */
1007 vg_assert(!(modify_size && modify_tocptr));
1008
1009 if (modify_size && di->trace_symtab) {
1010 VG_(printf)(" modify (old sz %4d) "
1011 " val %#010lx, toc %#010lx, sz %4d %llu\n",
1012 old_size,
1013 prev->key.addr,
1014 prev->tocptr,
1015 prev->size,
1016 prev->key.name
1017 );
1018 }
1019 if (modify_tocptr && di->trace_symtab) {
1020 VG_(printf)(" modify (upd tocptr) "
1021 " val %#010lx, toc %#010lx, sz %4d %llu\n",
1022 prev->key.addr,
1023 prev->tocptr,
1024 prev->size,
1025 prev->key.name
1026 );
1027 }
1028
1029 } else {
1030
1031 /* A new (name,addr) key. Add and continue. */
1032 elem = VG_(OSetGen_AllocNode)(oset, sizeof(TempSym));
1033 elem->key = key;
1034 elem->tocptr = GET_TOCPTR_AVMA(sym_avmas_really);
1035 elem->size = sym_size;
1036 elem->from_opd = from_opd;
1037 elem->is_text = is_text;
1038 elem->is_ifunc = is_ifunc;
1039 elem->is_global = is_global;
1040 VG_(OSetGen_Insert)(oset, elem);
1041 if (di->trace_symtab) {
1042 HChar* str = ML_(img_strdup)(escn_strtab->img, "di.respl.2",
1043 elem->key.name);
1044 VG_(printf)(" to-oset [%4ld]: "
1045 " val %#010lx, toc %#010lx, sz %4d %s\n",
1046 i,
1047 elem->key.addr,
1048 elem->tocptr,
1049 (Int) elem->size,
1050 str
1051 );
1052 if (str) ML_(dinfo_free)(str);
1053 }
1054
1055 }
1056 }
1057 }
1058
1059 /* All the syms that matter are in the oset. Now pull them out,
1060 build a "standard" symbol table, and nuke the oset. */
1061
1062 i = 0;
1063 VG_(OSetGen_ResetIter)( oset );
1064
1065 while ( (elem = VG_(OSetGen_Next)(oset)) ) {
1066 DiSym disym;
1067 VG_(memset)(&disym, 0, sizeof(disym));
1068 HChar* cstr = ML_(img_strdup)(escn_strtab->img,
1069 "di.res__ppc64.1", elem->key.name);
1070 disym.avmas.main = elem->key.addr;
1071 SET_TOCPTR_AVMA(disym.avmas, elem->tocptr);
1072 SET_LOCAL_EP_AVMA(disym.avmas, 0); // ppc64be does not use local_ep.
1073 disym.pri_name = ML_(addStr) ( di, cstr, -1 );
1074 disym.sec_names = NULL;
1075 disym.size = elem->size;
1076 disym.isText = elem->is_text;
1077 disym.isIFunc = elem->is_ifunc;
1078 disym.isGlobal = elem->is_global;
1079 if (cstr) { ML_(dinfo_free)(cstr); cstr = NULL; }
1080 vg_assert(disym.pri_name != NULL);
1081
1082 ML_(addSym) ( di, &disym );
1083 if (di->trace_symtab) {
1084 VG_(printf)(" rec(%c%c%c) [%4ld]: "
1085 " val %#010lx, toc %#010lx, sz %4d %s\n",
1086 disym.isText ? 't' : 'd',
1087 disym.isIFunc ? 'i' : '-',
1088 disym.isGlobal ? 'g' : 'l',
1089 i,
1090 disym.avmas.main,
1091 GET_TOCPTR_AVMA(disym.avmas),
1092 (Int) disym.size,
1093 disym.pri_name
1094 );
1095 }
1096 i++;
1097 }
1098
1099 VG_(OSetGen_Destroy)( oset );
1100 }
1101
1102
1103 /*
1104 * Look for a build-id in an ELF image. The build-id specification
1105 * can be found here:
1106 *
1107 * http://fedoraproject.org/wiki/RolandMcGrath/BuildID
1108 *
1109 * Returned string must be freed by the caller.
1110 */
1111 static
find_buildid(DiImage * img,Bool rel_ok,Bool search_shdrs)1112 HChar* find_buildid(DiImage* img, Bool rel_ok, Bool search_shdrs)
1113 {
1114 HChar* buildid = NULL;
1115
1116 # ifdef NT_GNU_BUILD_ID
1117 if (is_elf_object_file_by_DiImage(img, rel_ok)) {
1118 Word i;
1119
1120 ElfXX_Ehdr ehdr;
1121 ML_(img_get)(&ehdr, img, 0, sizeof(ehdr));
1122 for (i = 0; i < ehdr.e_phnum; i++) {
1123 ElfXX_Phdr phdr;
1124 ML_(img_get)(&phdr, img,
1125 ehdr.e_phoff + i * ehdr.e_phentsize, sizeof(phdr));
1126
1127 if (phdr.p_type == PT_NOTE) {
1128 ElfXX_Off note_ioff = phdr.p_offset;
1129
1130 while (note_ioff < phdr.p_offset + phdr.p_filesz) {
1131 ElfXX_Nhdr note;
1132 ML_(img_get)(¬e, img, (DiOffT)note_ioff, sizeof(note));
1133 DiOffT name_ioff = note_ioff + sizeof(ElfXX_Nhdr);
1134 DiOffT desc_ioff = name_ioff + ((note.n_namesz + 3) & ~3);
1135 if (ML_(img_strcmp_c)(img, name_ioff, ELF_NOTE_GNU) == 0
1136 && note.n_type == NT_GNU_BUILD_ID) {
1137 buildid = ML_(dinfo_zalloc)("di.fbi.1",
1138 note.n_descsz * 2 + 1);
1139 Word j;
1140 for (j = 0; j < note.n_descsz; j++) {
1141 UChar desc_j = ML_(img_get_UChar)(img, desc_ioff + j);
1142 VG_(sprintf)(buildid + VG_(strlen)(buildid),
1143 "%02x", (UInt)desc_j);
1144 }
1145 }
1146
1147 note_ioff = note_ioff + sizeof(ElfXX_Nhdr)
1148 + ((note.n_namesz + 3) & ~3)
1149 + ((note.n_descsz + 3) & ~3);
1150 }
1151 }
1152 }
1153
1154 /* Normally we would only search shdrs for ET_REL files, but when
1155 we search for a separate .debug file phdrs might not be there
1156 (they are never loaded) or have been corrupted, so try again
1157 against shdrs. */
1158 if (buildid || (!rel_ok && !search_shdrs))
1159 return buildid;
1160
1161 for (i = 0; i < ehdr.e_shnum; i++) {
1162 ElfXX_Shdr shdr;
1163 ML_(img_get)(&shdr, img,
1164 ehdr.e_shoff + i * ehdr.e_shentsize, sizeof(shdr));
1165
1166 if (shdr.sh_type == SHT_NOTE) {
1167 ElfXX_Off note_ioff = shdr.sh_offset;
1168
1169 while (note_ioff < shdr.sh_offset + shdr.sh_size) {
1170 ElfXX_Nhdr note;
1171 ML_(img_get)(¬e, img, (DiOffT)note_ioff, sizeof(note));
1172 DiOffT name_ioff = note_ioff + sizeof(ElfXX_Nhdr);
1173 DiOffT desc_ioff = name_ioff + ((note.n_namesz + 3) & ~3);
1174
1175 if (ML_(img_strcmp_c)(img, name_ioff, ELF_NOTE_GNU) == 0
1176 && note.n_type == NT_GNU_BUILD_ID) {
1177 buildid = ML_(dinfo_zalloc)("di.fbi.2",
1178 note.n_descsz * 2 + 1);
1179 Word j;
1180 for (j = 0; j < note.n_descsz; j++) {
1181 UChar desc_j = ML_(img_get_UChar)(img, desc_ioff + j);
1182 VG_(sprintf)(buildid + VG_(strlen)(buildid),
1183 "%02x", (UInt)desc_j);
1184 }
1185 }
1186
1187 note_ioff = note_ioff + sizeof(ElfXX_Nhdr)
1188 + ((note.n_namesz + 3) & ~3)
1189 + ((note.n_descsz + 3) & ~3);
1190 }
1191 }
1192 }
1193 }
1194 # endif /* def NT_GNU_BUILD_ID */
1195
1196 return buildid;
1197 }
1198
1199
1200 /* Try and open a separate debug file, ignoring any where the CRC does
1201 not match the value from the main object file. Returned DiImage
1202 must be discarded by the caller.
1203
1204 If |serverAddr| is NULL, |name| is expected to be a fully qualified
1205 (absolute) path to the file in the local filesystem. If
1206 |serverAddr| is non-NULL, it is expected to be an IPv4 and port
1207 spec of the form "d.d.d.d:d" or "d.d.d.d", and |name| is expected
1208 to be a plain filename (no path components at all).
1209 */
1210 static
open_debug_file(const HChar * name,const HChar * buildid,UInt crc,Bool rel_ok,const HChar * serverAddr)1211 DiImage* open_debug_file( const HChar* name, const HChar* buildid, UInt crc,
1212 Bool rel_ok, const HChar* serverAddr )
1213 {
1214 DiImage* dimg
1215 = serverAddr ? ML_(img_from_di_server)(name, serverAddr)
1216 : ML_(img_from_local_file)(name);
1217 if (dimg == NULL)
1218 return NULL;
1219
1220 if (VG_(clo_verbosity) > 1) {
1221 if (serverAddr)
1222 VG_(message)(Vg_DebugMsg, " Considering %s on server %s ..\n",
1223 name, serverAddr);
1224 else
1225 VG_(message)(Vg_DebugMsg, " Considering %s ..\n", name);
1226 }
1227
1228 /* We will always check the crc if we have one (altfiles don't have one)
1229 for now because we might be opening the main file again by any other
1230 name, and that obviously also has the same buildid. More efficient
1231 would be an fstat bases check or a check that the file actually
1232 contains .debug* sections. */
1233 if (buildid && crc == 0) {
1234 HChar* debug_buildid = find_buildid(dimg, rel_ok, True);
1235 if (debug_buildid == NULL || VG_(strcmp)(buildid, debug_buildid) != 0) {
1236 ML_(img_done)(dimg);
1237 if (VG_(clo_verbosity) > 1)
1238 VG_(message)(Vg_DebugMsg,
1239 " .. build-id mismatch (found %s wanted %s)\n",
1240 debug_buildid, buildid);
1241 ML_(dinfo_free)(debug_buildid);
1242 return NULL;
1243 }
1244 ML_(dinfo_free)(debug_buildid);
1245 if (VG_(clo_verbosity) > 1)
1246 VG_(message)(Vg_DebugMsg, " .. build-id is valid\n");
1247 } else {
1248 UInt calccrc = ML_(img_calc_gnu_debuglink_crc32)(dimg);
1249 if (calccrc != crc) {
1250 ML_(img_done)(dimg);
1251 if (VG_(clo_verbosity) > 1)
1252 VG_(message)(Vg_DebugMsg,
1253 " .. CRC mismatch (computed %08x wanted %08x)\n", calccrc, crc);
1254 return NULL;
1255 }
1256
1257 if (VG_(clo_verbosity) > 1)
1258 VG_(message)(Vg_DebugMsg, " .. CRC is valid\n");
1259 }
1260
1261 return dimg;
1262 }
1263
1264
1265 /* Try to find a separate debug file for a given object file. If
1266 found, return its DiImage, which should be freed by the caller. If
1267 |buildid| is non-NULL, then a debug object matching it is
1268 acceptable. If |buildid| is NULL or doesn't specify a findable
1269 debug object, then we look in various places to find a file with
1270 the specified CRC. And if that doesn't work out then we give
1271 up. */
1272 static
find_debug_file(struct _DebugInfo * di,const HChar * objpath,const HChar * buildid,const HChar * debugname,UInt crc,Bool rel_ok)1273 DiImage* find_debug_file( struct _DebugInfo* di,
1274 const HChar* objpath, const HChar* buildid,
1275 const HChar* debugname, UInt crc, Bool rel_ok )
1276 {
1277 const HChar* extrapath = VG_(clo_extra_debuginfo_path);
1278 const HChar* serverpath = VG_(clo_debuginfo_server);
1279
1280 DiImage* dimg = NULL; /* the img that we found */
1281 HChar* debugpath = NULL; /* where we found it */
1282
1283 if (buildid != NULL) {
1284 debugpath = ML_(dinfo_zalloc)("di.fdf.1",
1285 VG_(strlen)(buildid) + 33);
1286
1287 VG_(sprintf)(debugpath, "/usr/lib/debug/.build-id/%c%c/%s.debug",
1288 buildid[0], buildid[1], buildid + 2);
1289
1290 dimg = open_debug_file(debugpath, buildid, 0, rel_ok, NULL);
1291 if (!dimg) {
1292 ML_(dinfo_free)(debugpath);
1293 debugpath = NULL;
1294 }
1295 }
1296
1297 if (dimg == NULL && debugname != NULL) {
1298 HChar *objdir = ML_(dinfo_strdup)("di.fdf.2", objpath);
1299 HChar *objdirptr;
1300
1301 if ((objdirptr = VG_(strrchr)(objdir, '/')) != NULL)
1302 *objdirptr = '\0';
1303
1304 debugpath = ML_(dinfo_zalloc)(
1305 "di.fdf.3",
1306 VG_(strlen)(objdir) + VG_(strlen)(debugname) + 64
1307 + (extrapath ? VG_(strlen)(extrapath) : 0)
1308 + (serverpath ? VG_(strlen)(serverpath) : 0));
1309
1310 VG_(sprintf)(debugpath, "%s/%s", objdir, debugname);
1311 dimg = open_debug_file(debugpath, buildid, crc, rel_ok, NULL);
1312 if (dimg != NULL) goto dimg_ok;
1313
1314 VG_(sprintf)(debugpath, "%s/.debug/%s", objdir, debugname);
1315 dimg = open_debug_file(debugpath, buildid, crc, rel_ok, NULL);
1316 if (dimg != NULL) goto dimg_ok;
1317
1318 VG_(sprintf)(debugpath, "/usr/lib/debug%s/%s", objdir, debugname);
1319 dimg = open_debug_file(debugpath, buildid, crc, rel_ok, NULL);
1320 if (dimg != NULL) goto dimg_ok;
1321
1322 if (extrapath) {
1323 VG_(sprintf)(debugpath, "%s%s/%s", extrapath,
1324 objdir, debugname);
1325 dimg = open_debug_file(debugpath, buildid, crc, rel_ok, NULL);
1326 if (dimg != NULL) goto dimg_ok;
1327 }
1328
1329 if (serverpath) {
1330 /* When looking on the debuginfo server, always just pass the
1331 basename. */
1332 const HChar* basename = debugname;
1333 if (VG_(strstr)(basename, "/") != NULL) {
1334 basename = VG_(strrchr)(basename, '/') + 1;
1335 }
1336 VG_(sprintf)(debugpath, "%s on %s", basename, serverpath);
1337 dimg = open_debug_file(basename, buildid, crc, rel_ok, serverpath);
1338 if (dimg) goto dimg_ok;
1339 }
1340
1341 dimg_ok:
1342
1343 ML_(dinfo_free)(objdir);
1344 }
1345
1346 if (dimg != NULL) {
1347 vg_assert(debugpath);
1348 TRACE_SYMTAB("\n");
1349 TRACE_SYMTAB("------ Found a debuginfo file: %s\n", debugpath);
1350
1351 /* Only set once, we might be called again for opening the altfile. */
1352 if (di->fsm.dbgname == NULL)
1353 di->fsm.dbgname = ML_(dinfo_strdup)("di.fdf.4", debugpath);
1354 }
1355
1356 if (debugpath)
1357 ML_(dinfo_free)(debugpath);
1358
1359 return dimg;
1360 }
1361
1362
1363 /* Try to find a separate debug file for a given object file, in a
1364 hacky and dangerous way: check only the --extra-debuginfo-path and
1365 the --debuginfo-server. And don't do a consistency check. */
1366 static
find_debug_file_ad_hoc(const DebugInfo * di,const HChar * objpath)1367 DiImage* find_debug_file_ad_hoc( const DebugInfo* di,
1368 const HChar* objpath )
1369 {
1370 const HChar* extrapath = VG_(clo_extra_debuginfo_path);
1371 const HChar* serverpath = VG_(clo_debuginfo_server);
1372
1373 DiImage* dimg = NULL; /* the img that we found */
1374 HChar* debugpath = NULL; /* where we found it */
1375
1376 HChar *objdir = ML_(dinfo_strdup)("di.fdfah.1", objpath);
1377 HChar *objdirptr;
1378
1379 if ((objdirptr = VG_(strrchr)(objdir, '/')) != NULL)
1380 *objdirptr = '\0';
1381
1382 debugpath = ML_(dinfo_zalloc)(
1383 "di.fdfah.3",
1384 VG_(strlen)(objdir) + 64
1385 + (extrapath ? VG_(strlen)(extrapath) : 0)
1386 + (serverpath ? VG_(strlen)(serverpath) : 0));
1387
1388 if (extrapath) {
1389 VG_(sprintf)(debugpath, "%s/%s", extrapath, objpath);
1390 dimg = ML_(img_from_local_file)(debugpath);
1391 if (dimg != NULL) {
1392 if (VG_(clo_verbosity) > 1) {
1393 VG_(message)(Vg_DebugMsg, " Using (POSSIBLY MISMATCHED) %s\n",
1394 debugpath);
1395 }
1396 goto dimg_ok;
1397 }
1398 }
1399 if (serverpath) {
1400 /* When looking on the debuginfo server, always just pass the
1401 basename. */
1402 const HChar* basename = objpath;
1403 if (VG_(strstr)(basename, "/") != NULL) {
1404 basename = VG_(strrchr)(basename, '/') + 1;
1405 }
1406 VG_(sprintf)(debugpath, "%s on %s", basename, serverpath);
1407 dimg = ML_(img_from_di_server)(basename, serverpath);
1408 if (dimg != NULL) {
1409 if (VG_(clo_verbosity) > 1) {
1410 VG_(message)(Vg_DebugMsg, " Using (POSSIBLY MISMATCHED) %s\n",
1411 debugpath);
1412 }
1413 goto dimg_ok;
1414 }
1415 }
1416
1417 dimg_ok:
1418
1419 ML_(dinfo_free)(objdir);
1420
1421 if (dimg != NULL) {
1422 vg_assert(debugpath);
1423 TRACE_SYMTAB("\n");
1424 TRACE_SYMTAB("------ Found an ad_hoc debuginfo file: %s\n", debugpath);
1425 }
1426
1427 if (debugpath)
1428 ML_(dinfo_free)(debugpath);
1429
1430 return dimg;
1431 }
1432
1433
INDEX_BIS(DiOffT base,UWord idx,UWord scale)1434 static DiOffT INDEX_BIS ( DiOffT base, UWord idx, UWord scale ) {
1435 // This is a bit stupid. Really, idx and scale ought to be
1436 // 64-bit quantities, always.
1437 return base + (DiOffT)idx * (DiOffT)scale;
1438 }
1439
1440
1441 /* Find the file offset corresponding to SVMA by using the program
1442 headers. This is taken from binutils-2.17/binutils/readelf.c
1443 offset_from_vma(). */
1444 static
file_offset_from_svma(Bool * ok,Addr svma,DiImage * img,DiOffT phdr_ioff,Word phdr_nent,Word phdr_ent_szB)1445 Word file_offset_from_svma ( /*OUT*/Bool* ok,
1446 Addr svma,
1447 DiImage* img,
1448 DiOffT phdr_ioff,
1449 Word phdr_nent,
1450 Word phdr_ent_szB )
1451 {
1452 Word i;
1453 for (i = 0; i < phdr_nent; i++) {
1454 ElfXX_Phdr seg;
1455 ML_(img_get)(&seg, img,
1456 INDEX_BIS(phdr_ioff, i, phdr_ent_szB), sizeof(seg));
1457 if (seg.p_type != PT_LOAD)
1458 continue;
1459 if (svma >= (seg.p_vaddr & -seg.p_align)
1460 && svma + 1 <= seg.p_vaddr + seg.p_filesz) {
1461 *ok = True;
1462 return svma - seg.p_vaddr + seg.p_offset;
1463 }
1464 }
1465 *ok = False;
1466 return 0;
1467 }
1468
1469 /* Check if section is compressed and modify DiSlice if it is.
1470 Returns False in case of unsupported compression type.
1471 */
check_compression(ElfXX_Shdr * h,DiSlice * s)1472 static Bool check_compression(ElfXX_Shdr* h, DiSlice* s) {
1473 if (h->sh_flags & SHF_COMPRESSED) {
1474 ElfXX_Chdr chdr;
1475 ML_(img_get)(&chdr, s->img, s->ioff, sizeof(ElfXX_Chdr));
1476 if (chdr.ch_type != ELFCOMPRESS_ZLIB)
1477 return False;
1478 s->ioff = ML_(img_mark_compressed_part)(s->img,
1479 s->ioff + sizeof(ElfXX_Chdr),
1480 s->szB - sizeof(ElfXX_Chdr),
1481 (SizeT)chdr.ch_size);
1482 s->szB = chdr.ch_size;
1483 } else if (h->sh_size > SIZE_OF_ZLIB_HEADER) {
1484 /* Read the zlib header. In this case, it should be "ZLIB"
1485 followed by the uncompressed section size, 8 bytes in BE order. */
1486 UChar tmp[SIZE_OF_ZLIB_HEADER];
1487 ML_(img_get)(tmp, s->img, s->ioff, SIZE_OF_ZLIB_HEADER);
1488 if (VG_(memcmp)(tmp, "ZLIB", 4) == 0) {
1489 SizeT size;
1490 # if (VG_WORDSIZE == 8)
1491 size = tmp[4]; size <<= 8;
1492 size += tmp[5]; size <<= 8;
1493 size += tmp[6]; size <<= 8;
1494 size += tmp[7]; size <<= 8;
1495 # else
1496 vg_assert((tmp[4] == 0) && (tmp[5] == 0) && (tmp[6] == 0)
1497 && (tmp[7] == 0));
1498 size = 0;
1499 # endif
1500 size += tmp[8]; size <<= 8;
1501 size += tmp[9]; size <<= 8;
1502 size += tmp[10]; size <<= 8;
1503 size += tmp[11];
1504 s->ioff = ML_(img_mark_compressed_part)(s->img,
1505 s->ioff + SIZE_OF_ZLIB_HEADER,
1506 s->szB - SIZE_OF_ZLIB_HEADER,
1507 size);
1508 s->szB = size;
1509 }
1510 }
1511 return True;
1512 }
1513
1514 /* The central function for reading ELF debug info. For the
1515 object/exe specified by the DebugInfo, find ELF sections, then read
1516 the symbols, line number info, file name info, CFA (stack-unwind
1517 info) and anything else we want, into the tables within the
1518 supplied DebugInfo.
1519 */
1520
ML_(read_elf_debug_info)1521 Bool ML_(read_elf_debug_info) ( struct _DebugInfo* di )
1522 {
1523 /* This function is long and complex. That, and the presence of
1524 nested scopes, means it's not always easy to see which parts are
1525 in loops/conditionals and which aren't. To make it easier to
1526 follow, points executed exactly once -- that is, those which are
1527 the top level of the function -- are marked TOPLEVEL.
1528 */
1529 /* Consistent terminology for local variable names, without which
1530 it's almost unfollowably complex:
1531
1532 In which file?
1533 in the main ELF file *_m*
1534 in the debuginfo file *_d*
1535 in the alt debuginfo file *_a*
1536
1537 What kind of thing?
1538 _{m,d,a}img a DiImage*
1539 _{m,d,a}ioff an offset in the image (DiOffT)
1540 _{m,d,a}nent "number of entries"
1541 _{m,d,a}ent_szB "size in bytes of an entry"
1542 ehdr_{m,d,a} ELF header
1543 phdr Program header
1544 shdr Section header
1545 a_X a temporary X
1546 _escn an DiSlice (elf section info) variable
1547 szB size in bytes
1548 */
1549
1550
1551 /* TOPLEVEL */
1552 Bool res, ok;
1553 Word i, j;
1554 Bool dynbss_present = False;
1555 Bool sdynbss_present = False;
1556
1557 /* Image for the main ELF file we're working with. */
1558 DiImage* mimg = NULL;
1559
1560 /* Ditto for any ELF debuginfo file that we might happen to load. */
1561 DiImage* dimg = NULL;
1562
1563 /* Ditto for alternate ELF debuginfo file that we might happen to load. */
1564 DiImage* aimg = NULL;
1565
1566 /* ELF header offset for the main file. Should be zero since the
1567 ELF header is at start of file. */
1568 DiOffT ehdr_mioff = 0;
1569
1570 /* Program header table image addr, # entries, entry size */
1571 DiOffT phdr_mioff = 0;
1572 UWord phdr_mnent = 0;
1573 UWord phdr_ment_szB = 0;
1574
1575 /* Section header image addr, # entries, entry size. Also the
1576 associated string table. */
1577 DiOffT shdr_mioff = 0;
1578 UWord shdr_mnent = 0;
1579 UWord shdr_ment_szB = 0;
1580 DiOffT shdr_strtab_mioff = 0;
1581
1582 /* SVMAs covered by rx and rw segments and corresponding biases.
1583 Normally each object would provide just one rx and one rw area,
1584 but various ELF mangling tools create objects with multiple
1585 such entries, hence the generality. */
1586 typedef
1587 struct {
1588 Addr svma_base;
1589 Addr svma_limit;
1590 PtrdiffT bias;
1591 Bool exec;
1592 }
1593 RangeAndBias;
1594
1595 XArray* /* of RangeAndBias */ svma_ranges = NULL;
1596
1597 # if defined(SOLARIS_PT_SUNDWTRACE_THRP)
1598 Addr dtrace_data_vaddr = 0;
1599 # endif
1600
1601 vg_assert(di);
1602 vg_assert(di->fsm.have_rx_map == True);
1603 vg_assert(di->fsm.have_rw_map == True);
1604 vg_assert(di->have_dinfo == False);
1605 vg_assert(di->fsm.filename);
1606 vg_assert(!di->symtab);
1607 vg_assert(!di->loctab);
1608 vg_assert(!di->inltab);
1609 vg_assert(!di->cfsi_base);
1610 vg_assert(!di->cfsi_m_ix);
1611 vg_assert(!di->cfsi_rd);
1612 vg_assert(!di->cfsi_exprs);
1613 vg_assert(!di->strpool);
1614 vg_assert(!di->fndnpool);
1615 vg_assert(!di->soname);
1616
1617 {
1618 Bool has_nonempty_rx = False;
1619 Bool has_nonempty_rw = False;
1620 for (i = 0; i < VG_(sizeXA)(di->fsm.maps); i++) {
1621 DebugInfoMapping* map = VG_(indexXA)(di->fsm.maps, i);
1622 if (!map->rx && !map->rw)
1623 continue;
1624 if (map->rx && map->size > 0)
1625 has_nonempty_rx = True;
1626 if (map->rw && map->size > 0)
1627 has_nonempty_rw = True;
1628 /* If this doesn't hold true, it means that m_syswrap/m_aspacemgr
1629 managed to do a mapping where the start isn't page aligned.
1630 Which sounds pretty bogus to me. */
1631 vg_assert(VG_IS_PAGE_ALIGNED(map->avma));
1632 }
1633 vg_assert(has_nonempty_rx);
1634 vg_assert(has_nonempty_rw);
1635 }
1636
1637 /* ----------------------------------------------------------
1638 At this point, there is very little information in the
1639 DebugInfo. We only know that something that looks like an ELF
1640 file has been mapped rx-ishly and rw-ishly as recorded in the
1641 di->fsm.maps array items. First we examine the file's ELF
1642 Program Header, and, by comparing that against the di->fsm.maps
1643 info, try to figure out the AVMAs for the sections we care
1644 about, that should have been mapped: text, data, sdata, bss,
1645 got, plt, and toc.
1646 ---------------------------------------------------------- */
1647
1648 res = False;
1649
1650 if (VG_(clo_verbosity) > 1 || VG_(clo_trace_redir))
1651 VG_(message)(Vg_DebugMsg, "Reading syms from %s\n",
1652 di->fsm.filename );
1653
1654 /* Connect to the primary object image, so that we can read symbols
1655 and line number info out of it. It will be disconnected
1656 immediately thereafter; it is only connected transiently. */
1657 mimg = ML_(img_from_local_file)(di->fsm.filename);
1658 if (mimg == NULL) {
1659 VG_(message)(Vg_UserMsg, "warning: connection to image %s failed\n",
1660 di->fsm.filename );
1661 VG_(message)(Vg_UserMsg, " no symbols or debug info loaded\n" );
1662 return False;
1663 }
1664
1665 /* Ok, the object image is available. Now verify that it is a
1666 valid ELF .so or executable image. */
1667 ok = is_elf_object_file_by_DiImage(mimg, False);
1668 if (!ok) {
1669 ML_(symerr)(di, True, "Invalid ELF Header");
1670 goto out;
1671 }
1672
1673 /* Find where the program and section header tables are, and give
1674 up if either is missing or outside the image (bogus). */
1675 ElfXX_Ehdr ehdr_m;
1676 vg_assert(ehdr_mioff == 0); // ensured by its initialisation
1677 ok = ML_(img_valid)(mimg, ehdr_mioff, sizeof(ehdr_m));
1678 vg_assert(ok); // ML_(is_elf_object_file) should ensure this
1679 ML_(img_get)(&ehdr_m, mimg, ehdr_mioff, sizeof(ehdr_m));
1680
1681 phdr_mioff = ehdr_mioff + ehdr_m.e_phoff;
1682 phdr_mnent = ehdr_m.e_phnum;
1683 phdr_ment_szB = ehdr_m.e_phentsize;
1684
1685 shdr_mioff = ehdr_mioff + ehdr_m.e_shoff;
1686 shdr_mnent = ehdr_m.e_shnum;
1687 shdr_ment_szB = ehdr_m.e_shentsize;
1688
1689 TRACE_SYMTAB("------ Basic facts about the object ------\n");
1690 TRACE_SYMTAB("object: n_oimage %llu\n",
1691 (ULong)ML_(img_size)(mimg));
1692 TRACE_SYMTAB("phdr: ioff %llu nent %lu ent_szB %lu\n",
1693 phdr_mioff, phdr_mnent, phdr_ment_szB);
1694 TRACE_SYMTAB("shdr: ioff %llu nent %lu ent_szB %lu\n",
1695 shdr_mioff, shdr_mnent, shdr_ment_szB);
1696 for (i = 0; i < VG_(sizeXA)(di->fsm.maps); i++) {
1697 const DebugInfoMapping* map = VG_(indexXA)(di->fsm.maps, i);
1698 if (map->rx)
1699 TRACE_SYMTAB("rx_map: avma %#lx size %lu foff %ld\n",
1700 map->avma, map->size, map->foff);
1701 }
1702 for (i = 0; i < VG_(sizeXA)(di->fsm.maps); i++) {
1703 const DebugInfoMapping* map = VG_(indexXA)(di->fsm.maps, i);
1704 if (map->rw)
1705 TRACE_SYMTAB("rw_map: avma %#lx size %lu foff %ld\n",
1706 map->avma, map->size, map->foff);
1707 }
1708
1709 if (phdr_mnent == 0
1710 || !ML_(img_valid)(mimg, phdr_mioff, phdr_mnent * phdr_ment_szB)) {
1711 ML_(symerr)(di, True, "Missing or invalid ELF Program Header Table");
1712 goto out;
1713 }
1714
1715 if (shdr_mnent == 0
1716 || !ML_(img_valid)(mimg, shdr_mioff, shdr_mnent * shdr_ment_szB)) {
1717 ML_(symerr)(di, True, "Missing or invalid ELF Section Header Table");
1718 goto out;
1719 }
1720
1721 /* Also find the section header's string table, and validate. */
1722 /* checked previously by is_elf_object_file: */
1723 vg_assert(ehdr_m.e_shstrndx != SHN_UNDEF);
1724
1725 // shdr_mioff is the offset of the section header table
1726 // and we need the ehdr_m.e_shstrndx'th entry
1727 { ElfXX_Shdr a_shdr;
1728 ML_(img_get)(&a_shdr, mimg,
1729 INDEX_BIS(shdr_mioff, ehdr_m.e_shstrndx, shdr_ment_szB),
1730 sizeof(a_shdr));
1731 shdr_strtab_mioff
1732 = ehdr_mioff /* isn't this always zero? */ + a_shdr.sh_offset;
1733
1734 if (!ML_(img_valid)(mimg, shdr_strtab_mioff,
1735 1/*bogus, but we don't know the real size*/ )) {
1736 ML_(symerr)(di, True, "Invalid ELF Section Header String Table");
1737 goto out;
1738 }
1739 }
1740
1741 TRACE_SYMTAB("shdr: string table at %llu\n", shdr_strtab_mioff);
1742
1743 svma_ranges = VG_(newXA)(ML_(dinfo_zalloc), "di.relfdi.1",
1744 ML_(dinfo_free), sizeof(RangeAndBias));
1745
1746 /* TOPLEVEL */
1747 /* Look through the program header table, and:
1748 - copy information from suitable PT_LOAD entries into svma_ranges
1749 - find (or fake up) the .soname for this object.
1750 */
1751 TRACE_SYMTAB("\n");
1752 TRACE_SYMTAB("------ Examining the program headers ------\n");
1753 vg_assert(di->soname == NULL);
1754 {
1755 /* TOPLEVEL */
1756 ElfXX_Addr prev_svma = 0;
1757
1758 for (i = 0; i < phdr_mnent; i++) {
1759 ElfXX_Phdr a_phdr;
1760 ML_(img_get)(&a_phdr, mimg,
1761 INDEX_BIS(phdr_mioff, i, phdr_ment_szB),
1762 sizeof(a_phdr));
1763
1764 /* Make sure the PT_LOADable entries are in order and
1765 non-overlapping. This in turn means the address ranges
1766 slurped into svma_ranges are in order and
1767 non-overlapping. */
1768
1769 if (a_phdr.p_type == PT_LOAD) {
1770 TRACE_SYMTAB("PT_LOAD[%ld]: p_vaddr %#lx (prev %#lx)\n",
1771 i, (UWord)a_phdr.p_vaddr, (UWord)prev_svma);
1772 TRACE_SYMTAB("PT_LOAD[%ld]: p_offset %lu, p_filesz %lu,"
1773 " perms %c%c%c\n",
1774 i, (UWord)a_phdr.p_offset, (UWord)a_phdr.p_filesz,
1775 a_phdr.p_flags & PF_R ? 'r' : '-',
1776 a_phdr.p_flags & PF_W ? 'w' : '-',
1777 a_phdr.p_flags & PF_X ? 'x' : '-');
1778 if (a_phdr.p_vaddr < prev_svma) {
1779 ML_(symerr)(di, True,
1780 "ELF Program Headers are not in ascending order");
1781 goto out;
1782 }
1783 prev_svma = a_phdr.p_vaddr;
1784 if (a_phdr.p_memsz > 0) {
1785 Bool loaded = False;
1786 for (j = 0; j < VG_(sizeXA)(di->fsm.maps); j++) {
1787 const DebugInfoMapping* map = VG_(indexXA)(di->fsm.maps, j);
1788 if ( (map->rx || map->rw)
1789 && map->size > 0 /* stay sane */
1790 && a_phdr.p_offset >= map->foff
1791 && a_phdr.p_offset < map->foff + map->size
1792 && a_phdr.p_offset + a_phdr.p_filesz
1793 <= map->foff + map->size) {
1794 RangeAndBias item;
1795 item.svma_base = a_phdr.p_vaddr;
1796 item.svma_limit = a_phdr.p_vaddr + a_phdr.p_memsz;
1797 item.bias = map->avma - map->foff
1798 + a_phdr.p_offset - a_phdr.p_vaddr;
1799 if (map->rw
1800 && (a_phdr.p_flags & (PF_R | PF_W))
1801 == (PF_R | PF_W)) {
1802 item.exec = False;
1803 VG_(addToXA)(svma_ranges, &item);
1804 TRACE_SYMTAB(
1805 "PT_LOAD[%ld]: acquired as rw, bias 0x%lx\n",
1806 i, (UWord)item.bias);
1807 loaded = True;
1808 }
1809 if (map->rx
1810 && (a_phdr.p_flags & (PF_R | PF_X))
1811 == (PF_R | PF_X)) {
1812 item.exec = True;
1813 VG_(addToXA)(svma_ranges, &item);
1814 TRACE_SYMTAB(
1815 "PT_LOAD[%ld]: acquired as rx, bias 0x%lx\n",
1816 i, (UWord)item.bias);
1817 loaded = True;
1818 }
1819 }
1820 }
1821 if (!loaded) {
1822 # if defined(SOLARIS_PT_SUNDWTRACE_THRP)
1823 if ((a_phdr.p_memsz == VKI_PT_SUNWDTRACE_SIZE)
1824 && ((a_phdr.p_flags & (PF_R | PF_W | PF_X)) == PF_R)) {
1825 TRACE_SYMTAB("PT_LOAD[%ld]: ignore dtrace_data program "
1826 "header\n", i);
1827 dtrace_data_vaddr = a_phdr.p_vaddr;
1828 continue;
1829 }
1830 # endif /* SOLARIS_PT_SUNDWTRACE_THRP */
1831
1832 ML_(symerr)(di, False,
1833 "ELF section outside all mapped regions");
1834 /* This problem might be solved by further memory mappings.
1835 Avoid the vg_assert(!di->soname) at the beginning of this
1836 function if DYNAMIC section has been already processed. */
1837 if (di->soname) {
1838 ML_(dinfo_free)(di->soname);
1839 di->soname = NULL;
1840 }
1841 goto out;
1842 }
1843 }
1844 }
1845
1846 /* Try to get the soname. If there isn't one, use "NONE".
1847 The seginfo needs to have some kind of soname in order to
1848 facilitate writing redirect functions, since all redirect
1849 specifications require a soname (pattern). */
1850 if (a_phdr.p_type == PT_DYNAMIC && di->soname == NULL) {
1851 Word stroff = -1;
1852 DiOffT strtab_mioff = DiOffT_INVALID;
1853 for (j = 0; True/*exit check is in the loop*/; j++) {
1854 ElfXX_Dyn t_dyn_m; /* dyn_img[j] */
1855 ML_(img_get)(&t_dyn_m, mimg,
1856 INDEX_BIS(ehdr_mioff + a_phdr.p_offset,
1857 j, sizeof(ElfXX_Dyn)),
1858 sizeof(t_dyn_m));
1859 if (t_dyn_m.d_tag == DT_NULL)
1860 break;
1861
1862 switch (t_dyn_m.d_tag) {
1863 case DT_SONAME: {
1864 stroff = t_dyn_m.d_un.d_val;
1865 break;
1866 }
1867 case DT_STRTAB: {
1868 Bool ok2 = False;
1869 Word offset = file_offset_from_svma(
1870 &ok2, t_dyn_m.d_un.d_ptr, mimg,
1871 phdr_mioff, phdr_mnent, phdr_ment_szB
1872 );
1873 if (ok2 && strtab_mioff == DiOffT_INVALID) {
1874 // Check for obviously bogus offsets.
1875 if (!ML_(img_valid)(mimg, offset, 1)) {
1876 ML_(symerr)(di, True, "Invalid DT_STRTAB offset");
1877 goto out;
1878 }
1879 strtab_mioff = ehdr_mioff + offset;
1880 vg_assert(ehdr_mioff == 0); // should always be
1881 }
1882 break;
1883 }
1884 default:
1885 break;
1886 }
1887 }
1888 if (stroff != -1 && strtab_mioff != DiOffT_INVALID) {
1889 di->soname = ML_(img_strdup)(mimg, "di.redi.1",
1890 strtab_mioff + stroff);
1891 TRACE_SYMTAB("Found soname = %s\n", di->soname);
1892 }
1893 }
1894 } /* for (i = 0; i < phdr_Mnent; i++) ... */
1895 /* TOPLEVEL */
1896
1897 } /* examine the program headers (local scope) */
1898
1899 /* TOPLEVEL */
1900
1901 /* If, after looking at all the program headers, we still didn't
1902 find a soname, add a fake one. */
1903 if (di->soname == NULL) {
1904 TRACE_SYMTAB("No soname found; using (fake) \"NONE\"\n");
1905 di->soname = ML_(dinfo_strdup)("di.redi.2", "NONE");
1906 }
1907
1908 vg_assert(VG_(sizeXA)(svma_ranges) != 0);
1909
1910 /* Now read the section table. */
1911 TRACE_SYMTAB("\n");
1912 TRACE_SYMTAB("------ Examining the section headers ------\n");
1913 for (i = 0; i < VG_(sizeXA)(di->fsm.maps); i++) {
1914 const DebugInfoMapping* map = VG_(indexXA)(di->fsm.maps, i);
1915 if (map->rx)
1916 TRACE_SYMTAB("rx: at %#lx are mapped foffsets %ld .. %lu\n",
1917 map->avma, map->foff, map->foff + map->size - 1 );
1918 }
1919 TRACE_SYMTAB("rx: contains these svma regions:\n");
1920 for (i = 0; i < VG_(sizeXA)(svma_ranges); i++) {
1921 const RangeAndBias* reg = VG_(indexXA)(svma_ranges, i);
1922 if (reg->exec)
1923 TRACE_SYMTAB(" svmas %#lx .. %#lx with bias %#lx\n",
1924 reg->svma_base, reg->svma_limit - 1, (UWord)reg->bias );
1925 }
1926 for (i = 0; i < VG_(sizeXA)(di->fsm.maps); i++) {
1927 const DebugInfoMapping* map = VG_(indexXA)(di->fsm.maps, i);
1928 if (map->rw)
1929 TRACE_SYMTAB("rw: at %#lx are mapped foffsets %ld .. %lu\n",
1930 map->avma, map->foff, map->foff + map->size - 1 );
1931 }
1932 TRACE_SYMTAB("rw: contains these svma regions:\n");
1933 for (i = 0; i < VG_(sizeXA)(svma_ranges); i++) {
1934 const RangeAndBias* reg = VG_(indexXA)(svma_ranges, i);
1935 if (!reg->exec)
1936 TRACE_SYMTAB(" svmas %#lx .. %#lx with bias %#lx\n",
1937 reg->svma_base, reg->svma_limit - 1, (UWord)reg->bias );
1938 }
1939
1940 /* TOPLEVEL */
1941 /* Iterate over section headers */
1942 for (i = 0; i < shdr_mnent; i++) {
1943 ElfXX_Shdr a_shdr;
1944 ML_(img_get)(&a_shdr, mimg,
1945 INDEX_BIS(shdr_mioff, i, shdr_ment_szB), sizeof(a_shdr));
1946 DiOffT name_mioff = shdr_strtab_mioff + a_shdr.sh_name;
1947 HChar* name = ML_(img_strdup)(mimg, "di.redi_name.2", name_mioff);
1948 Addr svma = a_shdr.sh_addr;
1949 OffT foff = a_shdr.sh_offset;
1950 UWord size = a_shdr.sh_size; /* Do not change this to be signed. */
1951 UInt alyn = a_shdr.sh_addralign;
1952 Bool nobits = a_shdr.sh_type == SHT_NOBITS;
1953 /* Look through our collection of info obtained from the PT_LOAD
1954 headers, and make 'inrx' and 'inrw' point to the first entry
1955 in each that intersects 'avma'. If in each case none is found,
1956 leave the relevant pointer at NULL. */
1957 RangeAndBias* inrx = NULL;
1958 RangeAndBias* inrw = NULL;
1959 for (j = 0; j < VG_(sizeXA)(svma_ranges); j++) {
1960 RangeAndBias* rng = VG_(indexXA)(svma_ranges, j);
1961 if (svma >= rng->svma_base && svma < rng->svma_limit) {
1962 if (!inrx && rng->exec) {
1963 inrx = rng;
1964 } else if (!inrw && !rng->exec) {
1965 inrw = rng;
1966 }
1967 if (inrx && inrw)
1968 break;
1969 }
1970 }
1971
1972 TRACE_SYMTAB(" [sec %2ld] %s %s al%4u foff %6ld .. %6lu "
1973 " svma %p name \"%s\"\n",
1974 i, inrx ? "rx" : " ", inrw ? "rw" : " ", alyn,
1975 foff, (size == 0) ? foff : foff+size-1, (void *) svma, name);
1976
1977 /* Check for sane-sized segments. SHT_NOBITS sections have zero
1978 size in the file and their offsets are just conceptual. */
1979 if (!nobits &&
1980 (foff >= ML_(img_size)(mimg) || foff + size > ML_(img_size)(mimg))) {
1981 ML_(symerr)(di, True, "ELF Section extends beyond image end");
1982 goto out;
1983 }
1984
1985 /* Check for a sane alignment value. */
1986 if (alyn > 0 && -1 == VG_(log2)(alyn)) {
1987 ML_(symerr)(di, True, "ELF Section contains invalid "
1988 ".sh_addralign value");
1989 goto out;
1990 }
1991
1992 /* Ignore zero sized sections. */
1993 if (size == 0) {
1994 TRACE_SYMTAB("zero sized section \"%s\", ignoring\n", name);
1995 ML_(dinfo_free)(name);
1996 continue;
1997 }
1998
1999 # define BAD(_secname) \
2000 do { ML_(symerr)(di, True, \
2001 "Can't make sense of " _secname \
2002 " section mapping"); \
2003 /* make sure we don't assert if we find */ \
2004 /* ourselves back in this routine later, */ \
2005 /* with the same di */ \
2006 di->soname = NULL; \
2007 goto out; \
2008 } while (0)
2009
2010 /* Find avma-s for: .text .data .sdata .rodata .bss .sbss .plt .got .opd
2011 and .eh_frame */
2012
2013 /* Accept .text where mapped as rx (code), even if zero-sized */
2014 if (0 == VG_(strcmp)(name, ".text")) {
2015 if (inrx && !di->text_present) {
2016 di->text_present = True;
2017 di->text_svma = svma;
2018 di->text_avma = svma + inrx->bias;
2019 di->text_size = size;
2020 di->text_bias = inrx->bias;
2021 di->text_debug_svma = svma;
2022 di->text_debug_bias = inrx->bias;
2023 TRACE_SYMTAB("acquiring .text svma = %#lx .. %#lx\n",
2024 di->text_svma,
2025 di->text_svma + di->text_size - 1);
2026 TRACE_SYMTAB("acquiring .text avma = %#lx .. %#lx\n",
2027 di->text_avma,
2028 di->text_avma + di->text_size - 1);
2029 TRACE_SYMTAB("acquiring .text bias = %#lx\n", (UWord)di->text_bias);
2030 } else {
2031 BAD(".text");
2032 }
2033 }
2034
2035 /* Accept .data where mapped as rw (data), even if zero-sized */
2036 if (0 == VG_(strcmp)(name, ".data")) {
2037 # if defined(SOLARIS_PT_SUNDWTRACE_THRP)
2038 if ((size == VKI_PT_SUNWDTRACE_SIZE) && (svma == dtrace_data_vaddr)) {
2039 TRACE_SYMTAB("ignoring .data section for dtrace_data "
2040 "%#lx .. %#lx\n", svma, svma + size - 1);
2041 } else
2042 # endif /* SOLARIS_PT_SUNDWTRACE_THRP */
2043 if (inrw && !di->data_present) {
2044 di->data_present = True;
2045 di->data_svma = svma;
2046 di->data_avma = svma + inrw->bias;
2047 di->data_size = size;
2048 di->data_bias = inrw->bias;
2049 di->data_debug_svma = svma;
2050 di->data_debug_bias = inrw->bias;
2051 TRACE_SYMTAB("acquiring .data svma = %#lx .. %#lx\n",
2052 di->data_svma,
2053 di->data_svma + di->data_size - 1);
2054 TRACE_SYMTAB("acquiring .data avma = %#lx .. %#lx\n",
2055 di->data_avma,
2056 di->data_avma + di->data_size - 1);
2057 TRACE_SYMTAB("acquiring .data bias = %#lx\n", (UWord)di->data_bias);
2058 } else {
2059 BAD(".data");
2060 }
2061 }
2062
2063 /* Accept .sdata where mapped as rw (data) */
2064 if (0 == VG_(strcmp)(name, ".sdata")) {
2065 if (inrw && !di->sdata_present) {
2066 di->sdata_present = True;
2067 di->sdata_svma = svma;
2068 di->sdata_avma = svma + inrw->bias;
2069 di->sdata_size = size;
2070 di->sdata_bias = inrw->bias;
2071 di->sdata_debug_svma = svma;
2072 di->sdata_debug_bias = inrw->bias;
2073 TRACE_SYMTAB("acquiring .sdata svma = %#lx .. %#lx\n",
2074 di->sdata_svma,
2075 di->sdata_svma + di->sdata_size - 1);
2076 TRACE_SYMTAB("acquiring .sdata avma = %#lx .. %#lx\n",
2077 di->sdata_avma,
2078 di->sdata_avma + di->sdata_size - 1);
2079 TRACE_SYMTAB("acquiring .sdata bias = %#lx\n",
2080 (UWord)di->sdata_bias);
2081 } else {
2082 BAD(".sdata");
2083 }
2084 }
2085
2086 /* Accept .rodata where mapped as rx (data), even if zero-sized */
2087 if (0 == VG_(strcmp)(name, ".rodata")) {
2088 if (inrx && !di->rodata_present) {
2089 di->rodata_present = True;
2090 di->rodata_svma = svma;
2091 di->rodata_avma = svma + inrx->bias;
2092 di->rodata_size = size;
2093 di->rodata_bias = inrx->bias;
2094 di->rodata_debug_svma = svma;
2095 di->rodata_debug_bias = inrx->bias;
2096 /* NB was 'inrw' prior to r11794 */
2097 TRACE_SYMTAB("acquiring .rodata svma = %#lx .. %#lx\n",
2098 di->rodata_svma,
2099 di->rodata_svma + di->rodata_size - 1);
2100 TRACE_SYMTAB("acquiring .rodata avma = %#lx .. %#lx\n",
2101 di->rodata_avma,
2102 di->rodata_avma + di->rodata_size - 1);
2103 TRACE_SYMTAB("acquiring .rodata bias = %#lx\n",
2104 (UWord)di->rodata_bias);
2105 } else {
2106 BAD(".rodata");
2107 }
2108 }
2109
2110 if (0 == VG_(strcmp)(name, ".dynbss")) {
2111 if (inrw && !di->bss_present) {
2112 dynbss_present = True;
2113 di->bss_present = True;
2114 di->bss_svma = svma;
2115 di->bss_avma = svma + inrw->bias;
2116 di->bss_size = size;
2117 di->bss_bias = inrw->bias;
2118 di->bss_debug_svma = svma;
2119 di->bss_debug_bias = inrw->bias;
2120 TRACE_SYMTAB("acquiring .dynbss svma = %#lx .. %#lx\n",
2121 di->bss_svma,
2122 di->bss_svma + di->bss_size - 1);
2123 TRACE_SYMTAB("acquiring .dynbss avma = %#lx .. %#lx\n",
2124 di->bss_avma,
2125 di->bss_avma + di->bss_size - 1);
2126 TRACE_SYMTAB("acquiring .dynbss bias = %#lx\n",
2127 (UWord)di->bss_bias);
2128 }
2129 }
2130
2131 /* Accept .bss where mapped as rw (data), even if zero-sized */
2132 if (0 == VG_(strcmp)(name, ".bss")) {
2133 if (inrw && dynbss_present) {
2134 vg_assert(di->bss_present);
2135 dynbss_present = False;
2136 vg_assert(di->bss_svma + di->bss_size == svma);
2137 di->bss_size += size;
2138 TRACE_SYMTAB("acquiring .bss svma = %#lx .. %#lx\n",
2139 svma, svma + size - 1);
2140 TRACE_SYMTAB("acquiring .bss avma = %#lx .. %#lx\n",
2141 svma + inrw->bias, svma + inrw->bias + size - 1);
2142 TRACE_SYMTAB("acquiring .bss bias = %#lx\n",
2143 (UWord)di->bss_bias);
2144 } else
2145
2146 if (inrw && !di->bss_present) {
2147 di->bss_present = True;
2148 di->bss_svma = svma;
2149 di->bss_avma = svma + inrw->bias;
2150 di->bss_size = size;
2151 di->bss_bias = inrw->bias;
2152 di->bss_debug_svma = svma;
2153 di->bss_debug_bias = inrw->bias;
2154 TRACE_SYMTAB("acquiring .bss svma = %#lx .. %#lx\n",
2155 di->bss_svma,
2156 di->bss_svma + di->bss_size - 1);
2157 TRACE_SYMTAB("acquiring .bss avma = %#lx .. %#lx\n",
2158 di->bss_avma,
2159 di->bss_avma + di->bss_size - 1);
2160 TRACE_SYMTAB("acquiring .bss bias = %#lx\n",
2161 (UWord)di->bss_bias);
2162 } else
2163
2164 /* Now one from the wtf?! department ... */
2165 if (inrx && (!inrw) && !di->bss_present) {
2166 /* File contains a .bss, but it got mapped as rx only.
2167 This is very strange. For now, just pretend we didn't
2168 see it :-) */
2169 di->bss_present = False;
2170 di->bss_svma = 0;
2171 di->bss_avma = 0;
2172 di->bss_size = 0;
2173 di->bss_bias = 0;
2174 di->bss_debug_svma = 0;
2175 di->bss_debug_bias = 0;
2176 if (!VG_(clo_xml)) {
2177 VG_(message)(Vg_UserMsg,
2178 "Warning: the following file's .bss is "
2179 "mapped r-x only - ignoring .bss syms\n");
2180 VG_(message)(Vg_UserMsg, " %s\n", di->fsm.filename
2181 ? di->fsm.filename
2182 : "(null?!)" );
2183 }
2184 } else
2185
2186 if ((!inrw) && (!inrx) && !di->bss_present) {
2187 /* File contains a .bss, but it didn't get mapped. Ignore. */
2188 di->bss_present = False;
2189 di->bss_svma = 0;
2190 di->bss_avma = 0;
2191 di->bss_size = 0;
2192 di->bss_bias = 0;
2193 } else {
2194 BAD(".bss");
2195 }
2196 }
2197
2198 if (0 == VG_(strcmp)(name, ".sdynbss")) {
2199 if (inrw && !di->sbss_present) {
2200 sdynbss_present = True;
2201 di->sbss_present = True;
2202 di->sbss_svma = svma;
2203 di->sbss_avma = svma + inrw->bias;
2204 di->sbss_size = size;
2205 di->sbss_bias = inrw->bias;
2206 di->sbss_debug_svma = svma;
2207 di->sbss_debug_bias = inrw->bias;
2208 TRACE_SYMTAB("acquiring .sdynbss svma = %#lx .. %#lx\n",
2209 di->sbss_svma,
2210 di->sbss_svma + di->sbss_size - 1);
2211 TRACE_SYMTAB("acquiring .sdynbss avma = %#lx .. %#lx\n",
2212 di->sbss_avma,
2213 di->sbss_avma + di->sbss_size - 1);
2214 TRACE_SYMTAB("acquiring .sdynbss bias = %#lx\n",
2215 (UWord)di->sbss_bias);
2216 }
2217 }
2218
2219 /* Accept .sbss where mapped as rw (data) */
2220 if (0 == VG_(strcmp)(name, ".sbss")) {
2221 if (inrw && sdynbss_present) {
2222 vg_assert(di->sbss_present);
2223 sdynbss_present = False;
2224 vg_assert(di->sbss_svma + di->sbss_size == svma);
2225 di->sbss_size += size;
2226 TRACE_SYMTAB("acquiring .sbss svma = %#lx .. %#lx\n",
2227 svma, svma + size - 1);
2228 TRACE_SYMTAB("acquiring .sbss avma = %#lx .. %#lx\n",
2229 svma + inrw->bias, svma + inrw->bias + size - 1);
2230 TRACE_SYMTAB("acquiring .sbss bias = %#lx\n", (UWord)di->sbss_bias);
2231 } else
2232
2233 if (inrw && !di->sbss_present) {
2234 di->sbss_present = True;
2235 di->sbss_svma = svma;
2236 di->sbss_avma = svma + inrw->bias;
2237 di->sbss_size = size;
2238 di->sbss_bias = inrw->bias;
2239 di->sbss_debug_svma = svma;
2240 di->sbss_debug_bias = inrw->bias;
2241 TRACE_SYMTAB("acquiring .sbss svma = %#lx .. %#lx\n",
2242 di->sbss_svma,
2243 di->sbss_svma + di->sbss_size - 1);
2244 TRACE_SYMTAB("acquiring .sbss avma = %#lx .. %#lx\n",
2245 di->sbss_avma,
2246 di->sbss_avma + di->sbss_size - 1);
2247 TRACE_SYMTAB("acquiring .sbss bias = %#lx\n", (UWord)di->sbss_bias);
2248 } else {
2249 BAD(".sbss");
2250 }
2251 }
2252
2253 /* Accept .got where mapped as rw (data) */
2254 if (0 == VG_(strcmp)(name, ".got")) {
2255 if (inrw && !di->got_present) {
2256 di->got_present = True;
2257 di->got_avma = svma + inrw->bias;
2258 di->got_size = size;
2259 TRACE_SYMTAB("acquiring .got avma = %#lx\n", di->got_avma);
2260 } else {
2261 BAD(".got");
2262 }
2263 }
2264
2265 /* Accept .got.plt where mapped as rw (data) */
2266 if (0 == VG_(strcmp)(name, ".got.plt")) {
2267 if (inrw && !di->gotplt_present) {
2268 di->gotplt_present = True;
2269 di->gotplt_avma = svma + inrw->bias;
2270 di->gotplt_size = size;
2271 TRACE_SYMTAB("acquiring .got.plt avma = %#lx\n", di->gotplt_avma);
2272 } else if (size != 0) {
2273 BAD(".got.plt");
2274 }
2275 }
2276
2277 /* PLT is different on different platforms, it seems. */
2278 # if defined(VGP_x86_linux) || defined(VGP_amd64_linux) \
2279 || defined(VGP_arm_linux) || defined (VGP_s390x_linux) \
2280 || defined(VGP_mips32_linux) || defined(VGP_mips64_linux) \
2281 || defined(VGP_arm64_linux) \
2282 || defined(VGP_x86_solaris) || defined(VGP_amd64_solaris)
2283 /* Accept .plt where mapped as rx (code) */
2284 if (0 == VG_(strcmp)(name, ".plt")) {
2285 if (inrx && !di->plt_present) {
2286 di->plt_present = True;
2287 di->plt_avma = svma + inrx->bias;
2288 di->plt_size = size;
2289 TRACE_SYMTAB("acquiring .plt avma = %#lx\n", di->plt_avma);
2290 } else {
2291 BAD(".plt");
2292 }
2293 }
2294 # elif defined(VGP_ppc32_linux)
2295 /* Accept .plt where mapped as rw (data) */
2296 if (0 == VG_(strcmp)(name, ".plt")) {
2297 if (inrw && !di->plt_present) {
2298 di->plt_present = True;
2299 di->plt_avma = svma + inrw->bias;
2300 di->plt_size = size;
2301 TRACE_SYMTAB("acquiring .plt avma = %#lx\n", di->plt_avma);
2302 } else {
2303 BAD(".plt");
2304 }
2305 }
2306 # elif defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
2307 /* Accept .plt where mapped as rw (data), or unmapped */
2308 if (0 == VG_(strcmp)(name, ".plt")) {
2309 if (inrw && !di->plt_present) {
2310 di->plt_present = True;
2311 di->plt_avma = svma + inrw->bias;
2312 di->plt_size = size;
2313 TRACE_SYMTAB("acquiring .plt avma = %#lx\n", di->plt_avma);
2314 } else
2315 if ((!inrw) && (!inrx) && size > 0 && !di->plt_present) {
2316 /* File contains a .plt, but it didn't get mapped.
2317 Presumably it is not required on this platform. At
2318 least don't reject the situation as invalid. */
2319 di->plt_present = True;
2320 di->plt_avma = 0;
2321 di->plt_size = 0;
2322 } else {
2323 BAD(".plt");
2324 }
2325 }
2326 # else
2327 # error "Unsupported platform"
2328 # endif
2329
2330 /* Accept .opd where mapped as rw (data) */
2331 if (0 == VG_(strcmp)(name, ".opd")) {
2332 if (inrw && !di->opd_present) {
2333 di->opd_present = True;
2334 di->opd_avma = svma + inrw->bias;
2335 di->opd_size = size;
2336 TRACE_SYMTAB("acquiring .opd avma = %#lx\n", di->opd_avma);
2337 } else {
2338 BAD(".opd");
2339 }
2340 }
2341
2342 /* Accept .eh_frame where mapped as rx (code). This seems to be
2343 the common case. However, if that doesn't pan out, try for
2344 rw (data) instead. We can handle up to N_EHFRAME_SECTS per
2345 ELF object. */
2346 if (0 == VG_(strcmp)(name, ".eh_frame")) {
2347 if (inrx && di->n_ehframe < N_EHFRAME_SECTS) {
2348 di->ehframe_avma[di->n_ehframe] = svma + inrx->bias;
2349 di->ehframe_size[di->n_ehframe] = size;
2350 TRACE_SYMTAB("acquiring .eh_frame avma = %#lx\n",
2351 di->ehframe_avma[di->n_ehframe]);
2352 di->n_ehframe++;
2353 } else
2354 if (inrw && di->n_ehframe < N_EHFRAME_SECTS) {
2355 di->ehframe_avma[di->n_ehframe] = svma + inrw->bias;
2356 di->ehframe_size[di->n_ehframe] = size;
2357 TRACE_SYMTAB("acquiring .eh_frame avma = %#lx\n",
2358 di->ehframe_avma[di->n_ehframe]);
2359 di->n_ehframe++;
2360 } else {
2361 BAD(".eh_frame");
2362 }
2363 }
2364
2365 /* Accept .ARM.exidx where mapped as rx (code). */
2366 /* FIXME: make sure the entire section is mapped in, not just
2367 the first address. */
2368 if (0 == VG_(strcmp)(name, ".ARM.exidx")) {
2369 if (inrx && !di->exidx_present) {
2370 di->exidx_present = True;
2371 di->exidx_svma = svma;
2372 di->exidx_avma = svma + inrx->bias;
2373 di->exidx_size = size;
2374 di->exidx_bias = inrx->bias;
2375 TRACE_SYMTAB("acquiring .exidx svma = %#lx .. %#lx\n",
2376 di->exidx_svma,
2377 di->exidx_svma + di->exidx_size - 1);
2378 TRACE_SYMTAB("acquiring .exidx avma = %#lx .. %#lx\n",
2379 di->exidx_avma,
2380 di->exidx_avma + di->exidx_size - 1);
2381 TRACE_SYMTAB("acquiring .exidx bias = %#lx\n",
2382 (UWord)di->exidx_bias);
2383 } else {
2384 BAD(".ARM.exidx");
2385 }
2386 }
2387
2388 /* Accept .ARM.extab where mapped as rx (code). */
2389 /* FIXME: make sure the entire section is mapped in, not just
2390 the first address. */
2391 if (0 == VG_(strcmp)(name, ".ARM.extab")) {
2392 if (inrx && !di->extab_present) {
2393 di->extab_present = True;
2394 di->extab_svma = svma;
2395 di->extab_avma = svma + inrx->bias;
2396 di->extab_size = size;
2397 di->extab_bias = inrx->bias;
2398 TRACE_SYMTAB("acquiring .extab svma = %#lx .. %#lx\n",
2399 di->extab_svma,
2400 di->extab_svma + di->extab_size - 1);
2401 TRACE_SYMTAB("acquiring .extab avma = %#lx .. %#lx\n",
2402 di->extab_avma,
2403 di->extab_avma + di->extab_size - 1);
2404 TRACE_SYMTAB("acquiring .extab bias = %#lx\n",
2405 (UWord)di->extab_bias);
2406 } else {
2407 BAD(".ARM.extab");
2408 }
2409 }
2410
2411 ML_(dinfo_free)(name);
2412
2413 # undef BAD
2414
2415 } /* iterate over the section headers */
2416
2417 /* TOPLEVEL */
2418 if (0) VG_(printf)("YYYY text_: avma %#lx size %lu bias %#lx\n",
2419 di->text_avma, di->text_size, (UWord)di->text_bias);
2420
2421 if (VG_(clo_verbosity) > 2 || VG_(clo_trace_redir))
2422 VG_(message)(Vg_DebugMsg, " svma %#010lx, avma %#010lx\n",
2423 di->text_avma - di->text_bias,
2424 di->text_avma );
2425
2426 TRACE_SYMTAB("\n");
2427 TRACE_SYMTAB("------ Finding image addresses "
2428 "for debug-info sections ------\n");
2429
2430 /* TOPLEVEL */
2431 /* Find interesting sections, read the symbol table(s), read any
2432 debug information. Each section is located either in the main,
2433 debug or alt-debug files, but only in one. For each section,
2434 |section_escn| records which of |mimg|, |dimg| or |aimg| we
2435 found it in, along with the section's image offset and its size.
2436 The triples (section_img, section_ioff, section_szB) are
2437 consistent, in that they are always either (NULL,
2438 DiOffT_INVALID, 0), or refer to the same image, and are all
2439 assigned together. */
2440 {
2441 /* TOPLEVEL */
2442 DiSlice strtab_escn = DiSlice_INVALID; // .strtab
2443 DiSlice symtab_escn = DiSlice_INVALID; // .symtab
2444 DiSlice dynstr_escn = DiSlice_INVALID; // .dynstr
2445 DiSlice dynsym_escn = DiSlice_INVALID; // .dynsym
2446 # if defined(VGO_solaris)
2447 DiSlice ldynsym_escn = DiSlice_INVALID; // .SUNW_ldynsym
2448 # endif
2449 DiSlice debuglink_escn = DiSlice_INVALID; // .gnu_debuglink
2450 DiSlice debugaltlink_escn = DiSlice_INVALID; // .gnu_debugaltlink
2451 DiSlice debug_line_escn = DiSlice_INVALID; // .debug_line (dwarf2)
2452 DiSlice debug_info_escn = DiSlice_INVALID; // .debug_info (dwarf2)
2453 DiSlice debug_types_escn = DiSlice_INVALID; // .debug_types (dwarf4)
2454 DiSlice debug_abbv_escn = DiSlice_INVALID; // .debug_abbrev (dwarf2)
2455 DiSlice debug_str_escn = DiSlice_INVALID; // .debug_str (dwarf2)
2456 DiSlice debug_ranges_escn = DiSlice_INVALID; // .debug_ranges (dwarf2)
2457 DiSlice debug_loc_escn = DiSlice_INVALID; // .debug_loc (dwarf2)
2458 DiSlice debug_frame_escn = DiSlice_INVALID; // .debug_frame (dwarf2)
2459 DiSlice debug_line_alt_escn = DiSlice_INVALID; // .debug_line (alt)
2460 DiSlice debug_info_alt_escn = DiSlice_INVALID; // .debug_info (alt)
2461 DiSlice debug_abbv_alt_escn = DiSlice_INVALID; // .debug_abbrev (alt)
2462 DiSlice debug_str_alt_escn = DiSlice_INVALID; // .debug_str (alt)
2463 DiSlice dwarf1d_escn = DiSlice_INVALID; // .debug (dwarf1)
2464 DiSlice dwarf1l_escn = DiSlice_INVALID; // .line (dwarf1)
2465 DiSlice opd_escn = DiSlice_INVALID; // .opd (dwarf2,
2466 // ppc64be-linux)
2467 DiSlice ehframe_escn[N_EHFRAME_SECTS]; // .eh_frame (dwarf2)
2468
2469 for (i = 0; i < N_EHFRAME_SECTS; i++)
2470 ehframe_escn[i] = DiSlice_INVALID;
2471
2472 /* Find all interesting sections */
2473
2474 UInt ehframe_mix = 0;
2475
2476 /* What FIND does: it finds the section called _SEC_NAME. The
2477 size of it is assigned to _SEC_SIZE. The address of the
2478 section in the transiently loaded oimage is assigned to
2479 _SEC_IMG. If the section is found, _POST_FX is executed
2480 after _SEC_NAME and _SEC_SIZE have been assigned to.
2481
2482 Even for sections which are marked loadable, the client's
2483 ld.so may not have loaded them yet, so there is no guarantee
2484 that we can safely prod around in any such area). Because
2485 the entire object file is transiently mapped aboard for
2486 inspection, it's always safe to inspect that area. */
2487
2488 /* TOPLEVEL */
2489 /* Iterate over section headers (again) */
2490 for (i = 0; i < ehdr_m.e_shnum; i++) {
2491
2492 # define FINDX(_sec_name, _sec_escn, _post_fx) \
2493 do { \
2494 ElfXX_Shdr a_shdr; \
2495 ML_(img_get)(&a_shdr, mimg, \
2496 INDEX_BIS(shdr_mioff, i, shdr_ment_szB), \
2497 sizeof(a_shdr)); \
2498 if (0 == ML_(img_strcmp_c)(mimg, shdr_strtab_mioff \
2499 + a_shdr.sh_name, _sec_name)) { \
2500 Bool nobits; \
2501 _sec_escn.img = mimg; \
2502 _sec_escn.ioff = (DiOffT)a_shdr.sh_offset; \
2503 _sec_escn.szB = a_shdr.sh_size; \
2504 if (!check_compression(&a_shdr, &_sec_escn)) { \
2505 ML_(symerr)(di, True, " Compression type is unsupported"); \
2506 goto out; \
2507 } \
2508 nobits = a_shdr.sh_type == SHT_NOBITS; \
2509 vg_assert(_sec_escn.img != NULL); \
2510 vg_assert(_sec_escn.ioff != DiOffT_INVALID); \
2511 TRACE_SYMTAB( "%-18s: ioff %llu .. %llu\n", \
2512 _sec_name, (ULong)a_shdr.sh_offset, \
2513 ((ULong)a_shdr.sh_offset) + a_shdr.sh_size - 1); \
2514 /* SHT_NOBITS sections have zero size in the file. */ \
2515 if (!nobits && \
2516 a_shdr.sh_offset + \
2517 a_shdr.sh_size > ML_(img_real_size)(mimg)) { \
2518 ML_(symerr)(di, True, \
2519 " section beyond image end?!"); \
2520 goto out; \
2521 } \
2522 _post_fx; \
2523 } \
2524 } while (0);
2525
2526 /* Version with no post-effects */
2527 # define FIND(_sec_name, _sec_escn) \
2528 FINDX(_sec_name, _sec_escn, /**/)
2529
2530 /* NAME ElfSec */
2531 FIND( ".dynsym", dynsym_escn)
2532 FIND( ".dynstr", dynstr_escn)
2533 FIND( ".symtab", symtab_escn)
2534 FIND( ".strtab", strtab_escn)
2535 # if defined(VGO_solaris)
2536 FIND( ".SUNW_ldynsym", ldynsym_escn)
2537 # endif
2538
2539 FIND( ".gnu_debuglink", debuglink_escn)
2540 FIND( ".gnu_debugaltlink", debugaltlink_escn)
2541
2542 FIND( ".debug_line", debug_line_escn)
2543 if (!ML_(sli_is_valid)(debug_line_escn))
2544 FIND(".zdebug_line", debug_line_escn)
2545
2546 FIND( ".debug_info", debug_info_escn)
2547 if (!ML_(sli_is_valid)(debug_info_escn))
2548 FIND(".zdebug_info", debug_info_escn)
2549
2550 FIND( ".debug_types", debug_types_escn)
2551 if (!ML_(sli_is_valid)(debug_types_escn))
2552 FIND(".zdebug_types", debug_types_escn)
2553
2554 FIND( ".debug_abbrev", debug_abbv_escn)
2555 if (!ML_(sli_is_valid)(debug_abbv_escn))
2556 FIND(".zdebug_abbrev", debug_abbv_escn)
2557
2558 FIND( ".debug_str", debug_str_escn)
2559 if (!ML_(sli_is_valid)(debug_str_escn))
2560 FIND(".zdebug_str", debug_str_escn)
2561
2562 FIND( ".debug_ranges", debug_ranges_escn)
2563 if (!ML_(sli_is_valid)(debug_ranges_escn))
2564 FIND(".zdebug_ranges", debug_ranges_escn)
2565
2566 FIND( ".debug_loc", debug_loc_escn)
2567 if (!ML_(sli_is_valid)(debug_loc_escn))
2568 FIND(".zdebug_loc", debug_loc_escn)
2569
2570 FIND( ".debug_frame", debug_frame_escn)
2571 if (!ML_(sli_is_valid)(debug_frame_escn))
2572 FIND(".zdebug_frame", debug_frame_escn)
2573
2574 FIND( ".debug", dwarf1d_escn)
2575 FIND( ".line", dwarf1l_escn)
2576
2577 FIND( ".opd", opd_escn)
2578
2579 FINDX( ".eh_frame", ehframe_escn[ehframe_mix],
2580 do { ehframe_mix++; vg_assert(ehframe_mix <= N_EHFRAME_SECTS);
2581 } while (0)
2582 )
2583 /* Comment_on_EH_FRAME_MULTIPLE_INSTANCES: w.r.t. .eh_frame
2584 multi-instance kludgery, how are we assured that the order
2585 in which we fill in ehframe_escn[] is consistent with the
2586 order in which we previously filled in di->ehframe_avma[]
2587 and di->ehframe_size[] ? By the fact that in both cases,
2588 these arrays were filled in by iterating over the section
2589 headers top-to-bottom. So both loops (this one and the
2590 previous one) encounter the .eh_frame entries in the same
2591 order and so fill in these arrays in a consistent order.
2592 */
2593
2594 # undef FINDX
2595 # undef FIND
2596 } /* Iterate over section headers (again) */
2597
2598 /* TOPLEVEL */
2599 /* Now, see if we can find a debuginfo object, and if so connect
2600 |dimg| to it. */
2601 vg_assert(dimg == NULL && aimg == NULL);
2602
2603 /* Look for a build-id */
2604 HChar* buildid = find_buildid(mimg, False, False);
2605
2606 /* Look for a debug image that matches either the build-id or
2607 the debuglink-CRC32 in the main image. If the main image
2608 doesn't contain either of those then this won't even bother
2609 to try looking. This looks in all known places, including
2610 the --extra-debuginfo-path if specified and on the
2611 --debuginfo-server if specified. */
2612 if (buildid != NULL || debuglink_escn.img != NULL) {
2613 /* Do have a debuglink section? */
2614 if (debuglink_escn.img != NULL) {
2615 UInt crc_offset
2616 = VG_ROUNDUP(ML_(img_strlen)(debuglink_escn.img,
2617 debuglink_escn.ioff)+1, 4);
2618 vg_assert(crc_offset + sizeof(UInt) <= debuglink_escn.szB);
2619
2620 /* Extract the CRC from the debuglink section */
2621 UInt crc = ML_(img_get_UInt)(debuglink_escn.img,
2622 debuglink_escn.ioff + crc_offset);
2623
2624 /* See if we can find a matching debug file */
2625 HChar* debuglink_str_m
2626 = ML_(img_strdup)(debuglink_escn.img,
2627 "di.redi_dlk.1", debuglink_escn.ioff);
2628 dimg = find_debug_file( di, di->fsm.filename, buildid,
2629 debuglink_str_m, crc, False );
2630 if (debuglink_str_m)
2631 ML_(dinfo_free)(debuglink_str_m);
2632 } else {
2633 /* See if we can find a matching debug file */
2634 dimg = find_debug_file( di, di->fsm.filename, buildid,
2635 NULL, 0, False );
2636 }
2637 }
2638
2639 if (buildid) {
2640 ML_(dinfo_free)(buildid);
2641 buildid = NULL; /* paranoia */
2642 }
2643
2644 /* As a last-ditch measure, try looking for in the
2645 --extra-debuginfo-path and/or on the --debuginfo-server, but
2646 only in the case where --allow-mismatched-debuginfo=yes.
2647 This is dangerous in that (1) it gives no assurance that the
2648 debuginfo object matches the main one, and hence (2) we will
2649 very likely get an assertion in the code below, if indeed
2650 there is a mismatch. Hence it is disabled by default
2651 (--allow-mismatched-debuginfo=no). Nevertheless it's
2652 sometimes a useful way of getting out of a tight spot.
2653
2654 Note that we're ignoring the name in the .gnu_debuglink
2655 section here, and just looking for a file of the same name
2656 either the extra-path or on the server. */
2657 if (dimg == NULL && VG_(clo_allow_mismatched_debuginfo)) {
2658 dimg = find_debug_file_ad_hoc( di, di->fsm.filename );
2659 }
2660
2661 /* TOPLEVEL */
2662 /* If we were successful in finding a debug image, pull various
2663 SVMA/bias/size and image addresses out of it. */
2664 if (dimg != NULL && is_elf_object_file_by_DiImage(dimg, False)) {
2665
2666 /* Pull out and validate program header and section header info */
2667 DiOffT ehdr_dioff = 0;
2668 ElfXX_Ehdr ehdr_dimg;
2669 ML_(img_get)(&ehdr_dimg, dimg, ehdr_dioff, sizeof(ehdr_dimg));
2670
2671 DiOffT phdr_dioff = ehdr_dimg.e_phoff;
2672 UWord phdr_dnent = ehdr_dimg.e_phnum;
2673 UWord phdr_dent_szB = ehdr_dimg.e_phentsize;
2674
2675 DiOffT shdr_dioff = ehdr_dimg.e_shoff;
2676 UWord shdr_dnent = ehdr_dimg.e_shnum;
2677 UWord shdr_dent_szB = ehdr_dimg.e_shentsize;
2678
2679 DiOffT shdr_strtab_dioff = DiOffT_INVALID;
2680
2681 /* SVMAs covered by rx and rw segments and corresponding bias. */
2682 Addr rx_dsvma_limit = 0;
2683 PtrdiffT rx_dbias = 0;
2684 Addr rw_dsvma_limit = 0;
2685 PtrdiffT rw_dbias = 0;
2686
2687 Bool need_symtab, need_dwarf2, need_dwarf1;
2688
2689 if (phdr_dnent == 0
2690 || !ML_(img_valid)(dimg, phdr_dioff,
2691 phdr_dnent * phdr_dent_szB)) {
2692 ML_(symerr)(di, True,
2693 "Missing or invalid ELF Program Header Table"
2694 " (debuginfo file)");
2695 goto out;
2696 }
2697
2698 if (shdr_dnent == 0
2699 || !ML_(img_valid)(dimg, shdr_dioff,
2700 shdr_dnent * shdr_dent_szB)) {
2701 ML_(symerr)(di, True,
2702 "Missing or invalid ELF Section Header Table"
2703 " (debuginfo file)");
2704 goto out;
2705 }
2706
2707 /* Also find the section header's string table, and validate. */
2708 /* checked previously by is_elf_object_file: */
2709 vg_assert(ehdr_dimg.e_shstrndx != SHN_UNDEF);
2710
2711 // shdr_dioff is the offset of the section header table
2712 // and we need the ehdr_dimg.e_shstrndx'th entry
2713 { ElfXX_Shdr a_shdr;
2714 ML_(img_get)(&a_shdr, dimg,
2715 INDEX_BIS(shdr_dioff, ehdr_dimg.e_shstrndx,
2716 shdr_dent_szB),
2717 sizeof(a_shdr));
2718 shdr_strtab_dioff = a_shdr.sh_offset;
2719 if (!ML_(img_valid)(dimg, shdr_strtab_dioff,
2720 1/*bogus, but we don't know the real size*/)) {
2721 ML_(symerr)(di, True,
2722 "Invalid ELF Section Header String Table"
2723 " (debuginfo file)");
2724 goto out;
2725 }
2726 }
2727
2728 for (i = 0; i < ehdr_dimg.e_phnum; i++) {
2729 ElfXX_Phdr a_phdr;
2730 ML_(img_get)(&a_phdr, dimg, INDEX_BIS(ehdr_dimg.e_phoff,
2731 i, phdr_dent_szB),
2732 sizeof(a_phdr));
2733 if (a_phdr.p_type == PT_LOAD) {
2734 for (j = 0; j < VG_(sizeXA)(di->fsm.maps); j++) {
2735 const DebugInfoMapping* map = VG_(indexXA)(di->fsm.maps, j);
2736 if ( a_phdr.p_offset >= map->foff
2737 && a_phdr.p_offset < map->foff + map->size
2738 && a_phdr.p_offset + a_phdr.p_filesz
2739 < map->foff + map->size) {
2740 if (map->rx && rx_dsvma_limit == 0) {
2741 rx_dsvma_limit = a_phdr.p_vaddr + a_phdr.p_memsz;
2742 rx_dbias = map->avma - map->foff + a_phdr.p_offset
2743 - a_phdr.p_vaddr;
2744 }
2745 if (map->rw && rw_dsvma_limit == 0) {
2746 rw_dsvma_limit = a_phdr.p_vaddr + a_phdr.p_memsz;
2747 rw_dbias = map->avma - map->foff + a_phdr.p_offset
2748 - a_phdr.p_vaddr;
2749 }
2750 break;
2751 }
2752 }
2753 }
2754 }
2755
2756 need_symtab = (symtab_escn.img == NULL);
2757 need_dwarf2 = (debug_info_escn.img == NULL);
2758 need_dwarf1 = (dwarf1d_escn.img == NULL);
2759
2760 /* Find all interesting sections in the debug image */
2761 for (i = 0; i < ehdr_dimg.e_shnum; i++) {
2762
2763 /* Find debug svma and bias information for sections
2764 we found in the main file. */
2765
2766 # define FIND(_sec, _seg) \
2767 do { \
2768 ElfXX_Shdr a_shdr; \
2769 ML_(img_get)(&a_shdr, dimg, \
2770 INDEX_BIS(shdr_dioff, i, shdr_dent_szB), \
2771 sizeof(a_shdr)); \
2772 if (di->_sec##_present \
2773 && 0 == ML_(img_strcmp_c)(dimg, shdr_strtab_dioff \
2774 + a_shdr.sh_name, "." #_sec)) { \
2775 vg_assert(di->_sec##_size == a_shdr.sh_size); \
2776 /* JRS 2013-Jun-01: the following assert doesn't contain */ \
2777 /* any ==s, which seems to me to be suspicious. */ \
2778 vg_assert(di->_sec##_avma + a_shdr.sh_addr + _seg##_dbias); \
2779 /* Assume we have a correct value for the main */ \
2780 /* object's bias. Use that to derive the debuginfo */ \
2781 /* object's bias, by adding the difference in SVMAs */ \
2782 /* for the corresponding sections in the two files. */ \
2783 /* That should take care of all prelinking effects. */ \
2784 di->_sec##_debug_svma = a_shdr.sh_addr; \
2785 di->_sec##_debug_bias \
2786 = di->_sec##_bias + \
2787 di->_sec##_svma - di->_sec##_debug_svma; \
2788 TRACE_SYMTAB("acquiring ." #_sec \
2789 " debug svma = %#lx .. %#lx\n", \
2790 di->_sec##_debug_svma, \
2791 di->_sec##_debug_svma + di->_sec##_size - 1); \
2792 TRACE_SYMTAB("acquiring ." #_sec " debug bias = %#lx\n", \
2793 (UWord)di->_sec##_debug_bias); \
2794 } \
2795 } while (0);
2796
2797 /* SECTION SEGMENT */
2798 FIND(text, rx)
2799 FIND(data, rw)
2800 FIND(sdata, rw)
2801 FIND(rodata, rw)
2802 FIND(bss, rw)
2803 FIND(sbss, rw)
2804
2805 # undef FIND
2806
2807 /* Same deal as previous FIND, except only do it for those
2808 sections which we didn't find in the main file. */
2809
2810 # define FIND(_condition, _sec_name, _sec_escn) \
2811 do { \
2812 ElfXX_Shdr a_shdr; \
2813 ML_(img_get)(&a_shdr, dimg, \
2814 INDEX_BIS(shdr_dioff, i, shdr_dent_szB), \
2815 sizeof(a_shdr)); \
2816 if (_condition \
2817 && 0 == ML_(img_strcmp_c)(dimg, shdr_strtab_dioff \
2818 + a_shdr.sh_name, _sec_name)) { \
2819 Bool nobits; \
2820 if (_sec_escn.img != NULL) { \
2821 ML_(symerr)(di, True, \
2822 " debuginfo section duplicates a" \
2823 " section in the main ELF file"); \
2824 goto out; \
2825 } \
2826 _sec_escn.img = dimg; \
2827 _sec_escn.ioff = (DiOffT)a_shdr.sh_offset; \
2828 _sec_escn.szB = a_shdr.sh_size; \
2829 if (!check_compression(&a_shdr, &_sec_escn)) { \
2830 ML_(symerr)(di, True, " Compression type is unsupported"); \
2831 goto out; \
2832 } \
2833 nobits = a_shdr.sh_type == SHT_NOBITS; \
2834 vg_assert(_sec_escn.img != NULL); \
2835 vg_assert(_sec_escn.ioff != DiOffT_INVALID); \
2836 TRACE_SYMTAB( "%-18s: dioff %llu .. %llu\n", \
2837 _sec_name, \
2838 (ULong)a_shdr.sh_offset, \
2839 ((ULong)a_shdr.sh_offset) + a_shdr.sh_size - 1); \
2840 /* SHT_NOBITS sections have zero size in the file. */ \
2841 if (!nobits && a_shdr.sh_offset \
2842 + a_shdr.sh_size > ML_(img_real_size)(_sec_escn.img)) { \
2843 ML_(symerr)(di, True, \
2844 " section beyond image end?!"); \
2845 goto out; \
2846 } \
2847 } \
2848 } while (0);
2849
2850 /* NEEDED? NAME ElfSec */
2851 FIND( need_symtab, ".symtab", symtab_escn)
2852 FIND( need_symtab, ".strtab", strtab_escn)
2853 FIND( need_dwarf2, ".debug_line", debug_line_escn)
2854 if (!ML_(sli_is_valid)(debug_line_escn))
2855 FIND(need_dwarf2, ".zdebug_line", debug_line_escn)
2856
2857 FIND( need_dwarf2, ".debug_info", debug_info_escn)
2858 if (!ML_(sli_is_valid)(debug_info_escn))
2859 FIND(need_dwarf2, ".zdebug_info", debug_info_escn)
2860
2861 FIND( need_dwarf2, ".debug_types", debug_types_escn)
2862 if (!ML_(sli_is_valid)(debug_types_escn))
2863 FIND(need_dwarf2, ".zdebug_types", debug_types_escn)
2864
2865 FIND( need_dwarf2, ".debug_abbrev", debug_abbv_escn)
2866 if (!ML_(sli_is_valid)(debug_abbv_escn))
2867 FIND(need_dwarf2, ".zdebug_abbrev", debug_abbv_escn)
2868
2869 FIND( need_dwarf2, ".debug_str", debug_str_escn)
2870 if (!ML_(sli_is_valid)(debug_str_escn))
2871 FIND(need_dwarf2, ".zdebug_str", debug_str_escn)
2872
2873 FIND( need_dwarf2, ".debug_ranges", debug_ranges_escn)
2874 if (!ML_(sli_is_valid)(debug_ranges_escn))
2875 FIND(need_dwarf2, ".zdebug_ranges", debug_ranges_escn)
2876
2877 FIND( need_dwarf2, ".debug_loc", debug_loc_escn)
2878 if (!ML_(sli_is_valid)(debug_loc_escn))
2879 FIND(need_dwarf2, ".zdebug_loc", debug_loc_escn)
2880
2881 FIND( need_dwarf2, ".debug_frame", debug_frame_escn)
2882 if (!ML_(sli_is_valid)(debug_frame_escn))
2883 FIND(need_dwarf2, ".zdebug_frame", debug_frame_escn)
2884
2885 FIND( need_dwarf2, ".gnu_debugaltlink", debugaltlink_escn)
2886
2887 FIND( need_dwarf1, ".debug", dwarf1d_escn)
2888 FIND( need_dwarf1, ".line", dwarf1l_escn)
2889
2890 # undef FIND
2891 } /* Find all interesting sections */
2892 } /* do we have a debug image? */
2893
2894 /* TOPLEVEL */
2895 /* Look for alternate debug image, and if found, connect |aimg|
2896 to it. */
2897 vg_assert(aimg == NULL);
2898
2899 if (debugaltlink_escn.img != NULL) {
2900 HChar* altfile_str_m
2901 = ML_(img_strdup)(debugaltlink_escn.img,
2902 "di.fbi.3", debugaltlink_escn.ioff);
2903 UInt buildid_offset = ML_(img_strlen)(debugaltlink_escn.img,
2904 debugaltlink_escn.ioff)+1;
2905
2906 vg_assert(buildid_offset < debugaltlink_escn.szB);
2907
2908 HChar *altbuildid
2909 = ML_(dinfo_zalloc)("di.fbi.4",
2910 (debugaltlink_escn.szB - buildid_offset)
2911 * 2 + 1);
2912
2913 /* The altfile might be relative to the debug file or main file. */
2914 HChar *dbgname = di->fsm.dbgname ? di->fsm.dbgname : di->fsm.filename;
2915
2916 for (j = 0; j < debugaltlink_escn.szB - buildid_offset; j++)
2917 VG_(sprintf)(
2918 altbuildid + 2 * j, "%02x",
2919 (UInt)ML_(img_get_UChar)(debugaltlink_escn.img,
2920 debugaltlink_escn.ioff
2921 + buildid_offset + j));
2922
2923 /* See if we can find a matching debug file */
2924 aimg = find_debug_file( di, dbgname, altbuildid,
2925 altfile_str_m, 0, True );
2926
2927 if (altfile_str_m)
2928 ML_(dinfo_free)(altfile_str_m);
2929 ML_(dinfo_free)(altbuildid);
2930 }
2931
2932 /* TOPLEVEL */
2933 /* If we were successful in finding alternate debug image, pull various
2934 size and image addresses out of it. */
2935 if (aimg != NULL && is_elf_object_file_by_DiImage(aimg, True)) {
2936
2937 /* Pull out and validate program header and section header info */
2938 DiOffT ehdr_aioff = 0;
2939 ElfXX_Ehdr ehdr_aimg;
2940 ML_(img_get)(&ehdr_aimg, aimg, ehdr_aioff, sizeof(ehdr_aimg));
2941
2942 DiOffT shdr_aioff = ehdr_aimg.e_shoff;
2943 UWord shdr_anent = ehdr_aimg.e_shnum;
2944 UWord shdr_aent_szB = ehdr_aimg.e_shentsize;
2945
2946 DiOffT shdr_strtab_aioff = DiOffT_INVALID;
2947
2948 if (shdr_anent == 0
2949 || !ML_(img_valid)(aimg, shdr_aioff,
2950 shdr_anent * shdr_aent_szB)) {
2951 ML_(symerr)(di, True,
2952 "Missing or invalid ELF Section Header Table"
2953 " (alternate debuginfo file)");
2954 goto out;
2955 }
2956
2957 /* Also find the section header's string table, and validate. */
2958 /* checked previously by is_elf_object_file: */
2959 vg_assert(ehdr_aimg.e_shstrndx != SHN_UNDEF);
2960
2961 // shdr_aioff is the offset of the section header table
2962 // and we need the ehdr_aimg.e_shstrndx'th entry
2963 { ElfXX_Shdr a_shdr;
2964 ML_(img_get)(&a_shdr, aimg,
2965 INDEX_BIS(shdr_aioff, ehdr_aimg.e_shstrndx,
2966 shdr_aent_szB),
2967 sizeof(a_shdr));
2968 shdr_strtab_aioff = a_shdr.sh_offset;
2969 if (!ML_(img_valid)(aimg, shdr_strtab_aioff,
2970 1/*bogus, but we don't know the real size*/)) {
2971 ML_(symerr)(di, True,
2972 "Invalid ELF Section Header String Table"
2973 " (alternate debuginfo file)");
2974 goto out;
2975 }
2976 }
2977
2978 /* Find all interesting sections */
2979 for (i = 0; i < ehdr_aimg.e_shnum; i++) {
2980
2981 # define FIND(_sec_name, _sec_escn) \
2982 do { \
2983 ElfXX_Shdr a_shdr; \
2984 ML_(img_get)(&a_shdr, aimg, \
2985 INDEX_BIS(shdr_aioff, i, shdr_aent_szB), \
2986 sizeof(a_shdr)); \
2987 if (0 == ML_(img_strcmp_c)(aimg, shdr_strtab_aioff \
2988 + a_shdr.sh_name, _sec_name)) { \
2989 if (_sec_escn.img != NULL) { \
2990 ML_(symerr)(di, True, \
2991 " alternate debuginfo section duplicates a" \
2992 " section in the main ELF file"); \
2993 goto out; \
2994 } \
2995 _sec_escn.img = aimg; \
2996 _sec_escn.ioff = (DiOffT)a_shdr.sh_offset; \
2997 _sec_escn.szB = a_shdr.sh_size; \
2998 if (!check_compression(&a_shdr, &_sec_escn)) { \
2999 ML_(symerr)(di, True, " Compression type is " \
3000 "unsupported"); \
3001 goto out; \
3002 } \
3003 vg_assert(_sec_escn.img != NULL); \
3004 vg_assert(_sec_escn.ioff != DiOffT_INVALID); \
3005 TRACE_SYMTAB( "%-18s: aioff %llu .. %llu\n", \
3006 _sec_name, \
3007 (ULong)a_shdr.sh_offset, \
3008 ((ULong)a_shdr.sh_offset) + a_shdr.sh_size - 1); \
3009 } \
3010 } while (0);
3011
3012 /* NAME ElfSec */
3013 FIND(".debug_line", debug_line_alt_escn)
3014 if (!ML_(sli_is_valid)(debug_line_alt_escn))
3015 FIND(".zdebug_line", debug_line_alt_escn)
3016
3017 FIND(".debug_info", debug_info_alt_escn)
3018 if (!ML_(sli_is_valid)(debug_info_alt_escn))
3019 FIND(".zdebug_info", debug_info_alt_escn)
3020
3021 FIND(".debug_abbrev", debug_abbv_alt_escn)
3022 if (!ML_(sli_is_valid)(debug_abbv_alt_escn))
3023 FIND(".zdebug_abbrev", debug_abbv_alt_escn)
3024
3025 FIND(".debug_str", debug_str_alt_escn)
3026 if (!ML_(sli_is_valid)(debug_str_alt_escn))
3027 FIND(".zdebug_str", debug_str_alt_escn)
3028
3029 # undef FIND
3030 } /* Find all interesting sections */
3031 } /* do we have a debug image? */
3032
3033
3034 /* TOPLEVEL */
3035 /* Check some sizes */
3036 vg_assert((dynsym_escn.szB % sizeof(ElfXX_Sym)) == 0);
3037 vg_assert((symtab_escn.szB % sizeof(ElfXX_Sym)) == 0);
3038 # if defined(VGO_solaris)
3039 vg_assert((ldynsym_escn.szB % sizeof(ElfXX_Sym)) == 0);
3040 # endif
3041
3042 /* TOPLEVEL */
3043 /* Read symbols */
3044 {
3045 void (*read_elf_symtab)(struct _DebugInfo*, const HChar*,
3046 DiSlice*, DiSlice*, DiSlice*, Bool);
3047 Bool symtab_in_debug;
3048 # if defined(VGP_ppc64be_linux)
3049 read_elf_symtab = read_elf_symtab__ppc64be_linux;
3050 # else
3051 read_elf_symtab = read_elf_symtab__normal;
3052 # endif
3053 symtab_in_debug = symtab_escn.img == dimg;
3054 read_elf_symtab(di, "symbol table",
3055 &symtab_escn, &strtab_escn, &opd_escn,
3056 symtab_in_debug);
3057 read_elf_symtab(di, "dynamic symbol table",
3058 &dynsym_escn, &dynstr_escn, &opd_escn,
3059 False);
3060 # if defined(VGO_solaris)
3061 read_elf_symtab(di, "local dynamic symbol table",
3062 &ldynsym_escn, &dynstr_escn, &opd_escn,
3063 False);
3064 # endif
3065 }
3066
3067 /* TOPLEVEL */
3068 /* Read .eh_frame and .debug_frame (call-frame-info) if any. Do
3069 the .eh_frame section(s) first. */
3070 vg_assert(di->n_ehframe >= 0 && di->n_ehframe <= N_EHFRAME_SECTS);
3071 for (i = 0; i < di->n_ehframe; i++) {
3072 /* see Comment_on_EH_FRAME_MULTIPLE_INSTANCES above for why
3073 this next assertion should hold. */
3074 vg_assert(ML_(sli_is_valid)(ehframe_escn[i]));
3075 vg_assert(ehframe_escn[i].szB == di->ehframe_size[i]);
3076 ML_(read_callframe_info_dwarf3)( di,
3077 ehframe_escn[i],
3078 di->ehframe_avma[i],
3079 True/*is_ehframe*/ );
3080 }
3081 if (ML_(sli_is_valid)(debug_frame_escn)) {
3082 ML_(read_callframe_info_dwarf3)( di,
3083 debug_frame_escn,
3084 0/*assume zero avma*/,
3085 False/*!is_ehframe*/ );
3086 }
3087
3088 /* TOPLEVEL */
3089 /* jrs 2006-01-01: icc-8.1 has been observed to generate
3090 binaries without debug_str sections. Don't preclude
3091 debuginfo reading for that reason, but, in
3092 read_unitinfo_dwarf2, do check that debugstr is non-NULL
3093 before using it. */
3094 if (ML_(sli_is_valid)(debug_info_escn)
3095 && ML_(sli_is_valid)(debug_abbv_escn)
3096 && ML_(sli_is_valid)(debug_line_escn)) {
3097 /* The old reader: line numbers and unwind info only */
3098 ML_(read_debuginfo_dwarf3) ( di,
3099 debug_info_escn,
3100 debug_types_escn,
3101 debug_abbv_escn,
3102 debug_line_escn,
3103 debug_str_escn,
3104 debug_str_alt_escn );
3105 /* The new reader: read the DIEs in .debug_info to acquire
3106 information on variable types and locations or inline info.
3107 But only if the tool asks for it, or the user requests it on
3108 the command line. */
3109 if (VG_(clo_read_var_info) /* the user or tool asked for it */
3110 || VG_(clo_read_inline_info)) {
3111 ML_(new_dwarf3_reader)(
3112 di, debug_info_escn, debug_types_escn,
3113 debug_abbv_escn, debug_line_escn,
3114 debug_str_escn, debug_ranges_escn,
3115 debug_loc_escn, debug_info_alt_escn,
3116 debug_abbv_alt_escn, debug_line_alt_escn,
3117 debug_str_alt_escn
3118 );
3119 }
3120 }
3121
3122 /* TOPLEVEL */
3123 // JRS 31 July 2014: dwarf-1 reading is currently broken and
3124 // therefore deactivated.
3125 //if (dwarf1d_img && dwarf1l_img) {
3126 // ML_(read_debuginfo_dwarf1) ( di, dwarf1d_img, dwarf1d_sz,
3127 // dwarf1l_img, dwarf1l_sz );
3128 //}
3129
3130 # if defined(VGA_arm)
3131 /* TOPLEVEL */
3132 /* ARM32 only: read .exidx/.extab if present. Note we are
3133 reading these directly out of the mapped in (running) image.
3134 Also, read these only if no CFI based unwind info was
3135 acquired for this file.
3136
3137 An .exidx section is always required, but the .extab section
3138 can be optionally omitted, provided that .exidx does not
3139 refer to it. If the .exidx is erroneous and does refer to
3140 .extab even though .extab is missing, the range checks done
3141 by GET_EX_U32 in ExtabEntryExtract in readexidx.c should
3142 prevent any invalid memory accesses, and cause the .extab to
3143 be rejected as invalid.
3144
3145 FIXME:
3146 * check with m_aspacemgr that the entire [exidx_avma, +exidx_size)
3147 and [extab_avma, +extab_size) areas are readable, since we're
3148 reading this stuff out of the running image (not from a file/socket)
3149 and we don't want to segfault.
3150 * DebugInfo::exidx_bias and use text_bias instead.
3151 I think it's always the same.
3152 * remove DebugInfo::{extab_bias, exidx_svma, extab_svma} since
3153 they are never used.
3154 */
3155 if (di->exidx_present
3156 && di->cfsi_used == 0
3157 && di->text_present && di->text_size > 0) {
3158 Addr text_last_svma = di->text_svma + di->text_size - 1;
3159 ML_(read_exidx)( di, (UChar*)di->exidx_avma, di->exidx_size,
3160 (UChar*)di->extab_avma, di->extab_size,
3161 text_last_svma,
3162 di->exidx_bias );
3163 }
3164 # endif /* defined(VGA_arm) */
3165
3166 } /* "Find interesting sections, read the symbol table(s), read any debug
3167 information" (a local scope) */
3168
3169 /* TOPLEVEL */
3170 res = True;
3171
3172 /* If reading Dwarf3 variable type/location info, print a line
3173 showing the number of variables read for each object.
3174 (Currently disabled -- is a sanity-check mechanism for
3175 exp-sgcheck.) */
3176 if (0 && VG_(clo_read_var_info)) {
3177 UWord nVars = 0;
3178 if (di->varinfo) {
3179 for (j = 0; j < VG_(sizeXA)(di->varinfo); j++) {
3180 OSet* /* of DiAddrRange */ scope
3181 = *(OSet**)VG_(indexXA)(di->varinfo, j);
3182 vg_assert(scope);
3183 VG_(OSetGen_ResetIter)( scope );
3184 while (True) {
3185 DiAddrRange* range = VG_(OSetGen_Next)( scope );
3186 if (!range) break;
3187 vg_assert(range->vars);
3188 Word w = VG_(sizeXA)(range->vars);
3189 vg_assert(w >= 0);
3190 if (0) VG_(printf)("range %#lx %#lx %ld\n",
3191 range->aMin, range->aMax, w);
3192 nVars += (UWord)w;
3193 }
3194 }
3195 }
3196 VG_(umsg)("VARINFO: %7lu vars %7lu text_size %s\n",
3197 nVars, di->text_size, di->fsm.filename);
3198 }
3199 /* TOPLEVEL */
3200
3201 out:
3202 {
3203 /* Last, but not least, detach from the image(s). */
3204 if (mimg) ML_(img_done)(mimg);
3205 if (dimg) ML_(img_done)(dimg);
3206 if (aimg) ML_(img_done)(aimg);
3207
3208 if (svma_ranges) VG_(deleteXA)(svma_ranges);
3209
3210 return res;
3211 } /* out: */
3212
3213 /* NOTREACHED */
3214 }
3215
3216 #endif // defined(VGO_linux) || defined(VGO_solaris)
3217
3218 /*--------------------------------------------------------------------*/
3219 /*--- end ---*/
3220 /*--------------------------------------------------------------------*/
3221