• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Find debugging and symbol information for a module in libdwfl.
2    Copyright (C) 2005-2011 Red Hat, Inc.
3    This file is part of Red Hat elfutils.
4 
5    Red Hat elfutils is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by the
7    Free Software Foundation; version 2 of the License.
8 
9    Red Hat elfutils is distributed in the hope that it will be useful, but
10    WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12    General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License along
15    with Red Hat elfutils; if not, write to the Free Software Foundation,
16    Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA.
17 
18    In addition, as a special exception, Red Hat, Inc. gives You the
19    additional right to link the code of Red Hat elfutils with code licensed
20    under any Open Source Initiative certified open source license
21    (http://www.opensource.org/licenses/index.php) which requires the
22    distribution of source code with any binary distribution and to
23    distribute linked combinations of the two.  Non-GPL Code permitted under
24    this exception must only link to the code of Red Hat elfutils through
25    those well defined interfaces identified in the file named EXCEPTION
26    found in the source code files (the "Approved Interfaces").  The files
27    of Non-GPL Code may instantiate templates or use macros or inline
28    functions from the Approved Interfaces without causing the resulting
29    work to be covered by the GNU General Public License.  Only Red Hat,
30    Inc. may make changes or additions to the list of Approved Interfaces.
31    Red Hat's grant of this exception is conditioned upon your not adding
32    any new exceptions.  If you wish to add a new Approved Interface or
33    exception, please contact Red Hat.  You must obey the GNU General Public
34    License in all respects for all of the Red Hat elfutils code and other
35    code used in conjunction with Red Hat elfutils except the Non-GPL Code
36    covered by this exception.  If you modify this file, you may extend this
37    exception to your version of the file, but you are not obligated to do
38    so.  If you do not wish to provide this exception without modification,
39    you must delete this exception statement from your version and license
40    this file solely under the GPL without exception.
41 
42    Red Hat elfutils is an included package of the Open Invention Network.
43    An included package of the Open Invention Network is a package for which
44    Open Invention Network licensees cross-license their patents.  No patent
45    license is granted, either expressly or impliedly, by designation as an
46    included package.  Should you wish to participate in the Open Invention
47    Network licensing program, please visit www.openinventionnetwork.com
48    <http://www.openinventionnetwork.com>.  */
49 
50 #include "libdwflP.h"
51 #include <fcntl.h>
52 #include <string.h>
53 #include <unistd.h>
54 #include "../libdw/libdwP.h"	/* DWARF_E_* values are here.  */
55 
56 
57 /* Open libelf FILE->fd and compute the load base of ELF as loaded in MOD.
58    When we return success, FILE->elf and FILE->vaddr are set up.  */
59 static inline Dwfl_Error
open_elf(Dwfl_Module * mod,struct dwfl_file * file)60 open_elf (Dwfl_Module *mod, struct dwfl_file *file)
61 {
62   if (file->elf == NULL)
63     {
64       /* CBFAIL uses errno if it's set, so clear it first in case we don't
65 	 set it with an open failure below.  */
66       errno = 0;
67 
68       /* If there was a pre-primed file name left that the callback left
69 	 behind, try to open that file name.  */
70       if (file->fd < 0 && file->name != NULL)
71 	file->fd = TEMP_FAILURE_RETRY (open64 (file->name, O_RDONLY));
72 
73       if (file->fd < 0)
74 	return CBFAIL;
75 
76       Dwfl_Error error = __libdw_open_file (&file->fd, &file->elf, true, false);
77       if (error != DWFL_E_NOERROR)
78 	return error;
79     }
80   else if (unlikely (elf_kind (file->elf) != ELF_K_ELF))
81     {
82       elf_end (file->elf);
83       file->elf = NULL;
84       close (file->fd);
85       file->fd = -1;
86       return DWFL_E_BADELF;
87     }
88 
89   GElf_Ehdr ehdr_mem, *ehdr = gelf_getehdr (file->elf, &ehdr_mem);
90   if (ehdr == NULL)
91     {
92     elf_error:
93       elf_end (file->elf);
94       file->elf = NULL;
95       close (file->fd);
96       file->fd = -1;
97       return DWFL_E (LIBELF, elf_errno ());
98     }
99 
100   if (mod->e_type != ET_REL)
101     {
102       /* In any non-ET_REL file, we compute the "synchronization address".
103 
104 	 We start with the address at the end of the first PT_LOAD
105 	 segment.  When prelink converts REL to RELA in an ET_DYN
106 	 file, it expands the space between the beginning of the
107 	 segment and the actual code/data addresses.  Since that
108 	 change wasn't made in the debug file, the distance from
109 	 p_vaddr to an address of interest (in an st_value or DWARF
110 	 data) now differs between the main and debug files.  The
111 	 distance from address_sync to an address of interest remains
112 	 consistent.
113 
114 	 If there are no section headers at all (full stripping), then
115 	 the end of the first segment is a valid synchronization address.
116 	 This cannot happen in a prelinked file, since prelink itself
117 	 relies on section headers for prelinking and for undoing it.
118 	 (If you do full stripping on a prelinked file, then you get what
119 	 you deserve--you can neither undo the prelinking, nor expect to
120 	 line it up with a debug file separated before prelinking.)
121 
122 	 However, when prelink processes an ET_EXEC file, it can do
123 	 something different.  There it juggles the "special" sections
124 	 (SHT_DYNSYM et al) to make space for the additional prelink
125 	 special sections.  Sometimes it will do this by moving a special
126 	 section like .dynstr after the real program sections in the first
127 	 PT_LOAD segment--i.e. to the end.  That changes the end address of
128 	 the segment, so it no longer lines up correctly and is not a valid
129 	 synchronization address to use.  Because of this, we need to apply
130 	 a different prelink-savvy means to discover the synchronization
131 	 address when there is a separate debug file and a prelinked main
132 	 file.  That is done in find_debuginfo, below.  */
133 
134       size_t phnum;
135       if (unlikely (elf_getphdrnum (file->elf, &phnum) != 0))
136 	goto elf_error;
137 
138       file->vaddr = file->address_sync = 0;
139       for (size_t i = 0; i < phnum; ++i)
140 	{
141 	  GElf_Phdr ph_mem;
142 	  GElf_Phdr *ph = gelf_getphdr (file->elf, i, &ph_mem);
143 	  if (unlikely (ph == NULL))
144 	    goto elf_error;
145 	  if (ph->p_type == PT_LOAD)
146 	    {
147 	      file->vaddr = ph->p_vaddr & -ph->p_align;
148 	      file->address_sync = ph->p_vaddr + ph->p_memsz;
149 	      break;
150 	    }
151 	}
152     }
153 
154   mod->e_type = ehdr->e_type;
155 
156   /* Relocatable Linux kernels are ET_EXEC but act like ET_DYN.  */
157   if (mod->e_type == ET_EXEC && file->vaddr != mod->low_addr)
158     mod->e_type = ET_DYN;
159 
160   return DWFL_E_NOERROR;
161 }
162 
163 /* Find the main ELF file for this module and open libelf on it.
164    When we return success, MOD->main.elf and MOD->main.bias are set up.  */
165 void
166 internal_function
__libdwfl_getelf(Dwfl_Module * mod)167 __libdwfl_getelf (Dwfl_Module *mod)
168 {
169   if (mod->main.elf != NULL	/* Already done.  */
170       || mod->elferr != DWFL_E_NOERROR)	/* Cached failure.  */
171     return;
172 
173   mod->main.fd = (*mod->dwfl->callbacks->find_elf) (MODCB_ARGS (mod),
174 						    &mod->main.name,
175 						    &mod->main.elf);
176   const bool fallback = mod->main.elf == NULL && mod->main.fd < 0;
177   mod->elferr = open_elf (mod, &mod->main);
178   if (mod->elferr != DWFL_E_NOERROR)
179     return;
180 
181   if (!mod->main.valid)
182     {
183       /* Clear any explicitly reported build ID, just in case it was wrong.
184 	 We'll fetch it from the file when asked.  */
185       free (mod->build_id_bits);
186       mod->build_id_bits = NULL;
187       mod->build_id_len = 0;
188     }
189   else if (fallback)
190     {
191       /* We have an authoritative build ID for this module, so
192 	 don't use a file by name that doesn't match that ID.  */
193 
194       assert (mod->build_id_len > 0);
195 
196       switch (__builtin_expect (__libdwfl_find_build_id (mod, false,
197 							 mod->main.elf), 2))
198 	{
199 	case 2:
200 	  /* Build ID matches as it should. */
201 	  return;
202 
203 	case -1:			/* ELF error.  */
204 	  mod->elferr = INTUSE(dwfl_errno) ();
205 	  break;
206 
207 	case 0:			/* File has no build ID note.  */
208 	case 1:			/* FIle has a build ID that does not match.  */
209 	  mod->elferr = DWFL_E_WRONG_ID_ELF;
210 	  break;
211 
212 	default:
213 	  abort ();
214 	}
215 
216       /* We get here when it was the right ELF file.  Clear it out.  */
217       elf_end (mod->main.elf);
218       mod->main.elf = NULL;
219       if (mod->main.fd >= 0)
220 	{
221 	  close (mod->main.fd);
222 	  mod->main.fd = -1;
223 	}
224     }
225 
226   mod->main_bias = mod->e_type == ET_REL ? 0 : mod->low_addr - mod->main.vaddr;
227 }
228 
229 /* Search an ELF file for a ".gnu_debuglink" section.  */
230 static const char *
find_debuglink(Elf * elf,GElf_Word * crc)231 find_debuglink (Elf *elf, GElf_Word *crc)
232 {
233   size_t shstrndx;
234   if (elf_getshdrstrndx (elf, &shstrndx) < 0)
235     return NULL;
236 
237   Elf_Scn *scn = NULL;
238   while ((scn = elf_nextscn (elf, scn)) != NULL)
239     {
240       GElf_Shdr shdr_mem;
241       GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_mem);
242       if (shdr == NULL)
243 	return NULL;
244 
245       const char *name = elf_strptr (elf, shstrndx, shdr->sh_name);
246       if (name == NULL)
247 	return NULL;
248 
249       if (!strcmp (name, ".gnu_debuglink"))
250 	break;
251     }
252 
253   if (scn == NULL)
254     return NULL;
255 
256   /* Found the .gnu_debuglink section.  Extract its contents.  */
257   Elf_Data *rawdata = elf_rawdata (scn, NULL);
258   if (rawdata == NULL)
259     return NULL;
260 
261   Elf_Data crcdata =
262     {
263       .d_type = ELF_T_WORD,
264       .d_buf = crc,
265       .d_size = sizeof *crc,
266       .d_version = EV_CURRENT,
267     };
268   Elf_Data conv =
269     {
270       .d_type = ELF_T_WORD,
271       .d_buf = rawdata->d_buf + rawdata->d_size - sizeof *crc,
272       .d_size = sizeof *crc,
273       .d_version = EV_CURRENT,
274     };
275 
276   GElf_Ehdr ehdr_mem;
277   GElf_Ehdr *ehdr = gelf_getehdr (elf, &ehdr_mem);
278   if (ehdr == NULL)
279     return NULL;
280 
281   Elf_Data *d = gelf_xlatetom (elf, &crcdata, &conv, ehdr->e_ident[EI_DATA]);
282   if (d == NULL)
283     return NULL;
284   assert (d == &crcdata);
285 
286   return rawdata->d_buf;
287 }
288 
289 /* If the main file might have been prelinked, then we need to
290    discover the correct synchronization address between the main and
291    debug files.  Because of prelink's section juggling, we cannot rely
292    on the address_sync computed from PT_LOAD segments (see open_elf).
293 
294    We will attempt to discover a synchronization address based on the
295    section headers instead.  But finding a section address that is
296    safe to use requires identifying which sections are SHT_PROGBITS.
297    We can do that in the main file, but in the debug file all the
298    allocated sections have been transformed into SHT_NOBITS so we have
299    lost the means to match them up correctly.
300 
301    The only method left to us is to decode the .gnu.prelink_undo
302    section in the prelinked main file.  This shows what the sections
303    looked like before prelink juggled them--when they still had a
304    direct correspondence to the debug file.  */
305 static Dwfl_Error
find_prelink_address_sync(Dwfl_Module * mod)306 find_prelink_address_sync (Dwfl_Module *mod)
307 {
308   /* The magic section is only identified by name.  */
309   size_t shstrndx;
310   if (elf_getshdrstrndx (mod->main.elf, &shstrndx) < 0)
311     return DWFL_E_LIBELF;
312 
313   Elf_Scn *scn = NULL;
314   while ((scn = elf_nextscn (mod->main.elf, scn)) != NULL)
315     {
316       GElf_Shdr shdr_mem;
317       GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_mem);
318       if (unlikely (shdr == NULL))
319 	return DWFL_E_LIBELF;
320       if (shdr->sh_type == SHT_PROGBITS
321 	  && !(shdr->sh_flags & SHF_ALLOC)
322 	  && shdr->sh_name != 0)
323 	{
324 	  const char *secname = elf_strptr (mod->main.elf, shstrndx,
325 					    shdr->sh_name);
326 	  if (unlikely (secname == NULL))
327 	    return DWFL_E_LIBELF;
328 	  if (!strcmp (secname, ".gnu.prelink_undo"))
329 	    break;
330 	}
331     }
332 
333   if (scn == NULL)
334     /* There was no .gnu.prelink_undo section.  */
335     return DWFL_E_NOERROR;
336 
337   Elf_Data *undodata = elf_rawdata (scn, NULL);
338   if (unlikely (undodata == NULL))
339     return DWFL_E_LIBELF;
340 
341   /* Decode the section.  It consists of the original ehdr, phdrs,
342      and shdrs (but omits section 0).  */
343 
344   union
345   {
346     Elf32_Ehdr e32;
347     Elf64_Ehdr e64;
348   } ehdr;
349   Elf_Data dst =
350     {
351       .d_buf = &ehdr,
352       .d_size = sizeof ehdr,
353       .d_type = ELF_T_EHDR,
354       .d_version = EV_CURRENT
355     };
356   Elf_Data src = *undodata;
357   src.d_size = gelf_fsize (mod->main.elf, ELF_T_EHDR, 1, EV_CURRENT);
358   src.d_type = ELF_T_EHDR;
359   if (unlikely (gelf_xlatetom (mod->main.elf, &dst, &src,
360 			       elf_getident (mod->main.elf, NULL)[EI_DATA])
361 		== NULL))
362     return DWFL_E_LIBELF;
363 
364   size_t shentsize = gelf_fsize (mod->main.elf, ELF_T_SHDR, 1, EV_CURRENT);
365   size_t phentsize = gelf_fsize (mod->main.elf, ELF_T_PHDR, 1, EV_CURRENT);
366 
367   uint_fast16_t phnum;
368   uint_fast16_t shnum;
369   if (ehdr.e32.e_ident[EI_CLASS] == ELFCLASS32)
370     {
371       if (ehdr.e32.e_shentsize != shentsize
372 	  || ehdr.e32.e_phentsize != phentsize)
373 	return DWFL_E_BAD_PRELINK;
374       phnum = ehdr.e32.e_phnum;
375       shnum = ehdr.e32.e_shnum;
376     }
377   else
378     {
379       if (ehdr.e64.e_shentsize != shentsize
380 	  || ehdr.e64.e_phentsize != phentsize)
381 	return DWFL_E_BAD_PRELINK;
382       phnum = ehdr.e64.e_phnum;
383       shnum = ehdr.e64.e_shnum;
384     }
385 
386   /* Since prelink does not store the zeroth section header in the undo
387      section, it cannot support SHN_XINDEX encoding.  */
388   if (unlikely (shnum >= SHN_LORESERVE)
389       || unlikely (undodata->d_size != (src.d_size
390 					+ phnum * phentsize
391 					+ (shnum - 1) * shentsize)))
392     return DWFL_E_BAD_PRELINK;
393 
394   /* We look at the allocated SHT_PROGBITS (or SHT_NOBITS) sections.  (Most
395      every file will have some SHT_PROGBITS sections, but it's possible to
396      have one with nothing but .bss, i.e. SHT_NOBITS.)  The special sections
397      that can be moved around have different sh_type values--except for
398      .interp, the section that became the PT_INTERP segment.  So we exclude
399      the SHT_PROGBITS section whose address matches the PT_INTERP p_vaddr.
400      For this reason, we must examine the phdrs first to find PT_INTERP.  */
401 
402   GElf_Addr main_interp = 0;
403   {
404     size_t main_phnum;
405     if (unlikely (elf_getphdrnum (mod->main.elf, &main_phnum)))
406       return DWFL_E_LIBELF;
407     for (size_t i = 0; i < main_phnum; ++i)
408       {
409 	GElf_Phdr phdr;
410 	if (unlikely (gelf_getphdr (mod->main.elf, i, &phdr) == NULL))
411 	  return DWFL_E_LIBELF;
412 	if (phdr.p_type == PT_INTERP)
413 	  {
414 	    main_interp = phdr.p_vaddr;
415 	    break;
416 	  }
417       }
418   }
419 
420   src.d_buf += src.d_size;
421   src.d_type = ELF_T_PHDR;
422   src.d_size = phnum * phentsize;
423 
424   GElf_Addr undo_interp = 0;
425   {
426     union
427     {
428       Elf32_Phdr p32[phnum];
429       Elf64_Phdr p64[phnum];
430     } phdr;
431     dst.d_buf = &phdr;
432     dst.d_size = sizeof phdr;
433     if (unlikely (gelf_xlatetom (mod->main.elf, &dst, &src,
434 				 ehdr.e32.e_ident[EI_DATA]) == NULL))
435       return DWFL_E_LIBELF;
436     if (ehdr.e32.e_ident[EI_CLASS] == ELFCLASS32)
437       {
438 	for (uint_fast16_t i = 0; i < phnum; ++i)
439 	  if (phdr.p32[i].p_type == PT_INTERP)
440 	    {
441 	      undo_interp = phdr.p32[i].p_vaddr;
442 	      break;
443 	    }
444       }
445     else
446       {
447 	for (uint_fast16_t i = 0; i < phnum; ++i)
448 	  if (phdr.p64[i].p_type == PT_INTERP)
449 	    {
450 	      undo_interp = phdr.p64[i].p_vaddr;
451 	      break;
452 	    }
453       }
454   }
455 
456   if (unlikely ((main_interp == 0) != (undo_interp == 0)))
457     return DWFL_E_BAD_PRELINK;
458 
459   src.d_buf += src.d_size;
460   src.d_type = ELF_T_SHDR;
461   src.d_size = gelf_fsize (mod->main.elf, ELF_T_SHDR, shnum - 1, EV_CURRENT);
462 
463   union
464   {
465     Elf32_Shdr s32[shnum - 1];
466     Elf64_Shdr s64[shnum - 1];
467   } shdr;
468   dst.d_buf = &shdr;
469   dst.d_size = sizeof shdr;
470   if (unlikely (gelf_xlatetom (mod->main.elf, &dst, &src,
471 			       ehdr.e32.e_ident[EI_DATA]) == NULL))
472     return DWFL_E_LIBELF;
473 
474   /* Now we can look at the original section headers of the main file
475      before it was prelinked.  First we'll apply our method to the main
476      file sections as they are after prelinking, to calculate the
477      synchronization address of the main file.  Then we'll apply that
478      same method to the saved section headers, to calculate the matching
479      synchronization address of the debug file.
480 
481      The method is to consider SHF_ALLOC sections that are either
482      SHT_PROGBITS or SHT_NOBITS, excluding the section whose sh_addr
483      matches the PT_INTERP p_vaddr.  The special sections that can be
484      moved by prelink have other types, except for .interp (which
485      becomes PT_INTERP).  The "real" sections cannot move as such, but
486      .bss can be split into .dynbss and .bss, with the total memory
487      image remaining the same but being spread across the two sections.
488      So we consider the highest section end, which still matches up.  */
489 
490   GElf_Addr highest;
491 
492   inline void consider_shdr (GElf_Addr interp,
493 			     GElf_Word sh_type,
494 			     GElf_Xword sh_flags,
495 			     GElf_Addr sh_addr,
496 			     GElf_Xword sh_size)
497   {
498     if ((sh_flags & SHF_ALLOC)
499 	&& ((sh_type == SHT_PROGBITS && sh_addr != interp)
500 	    || sh_type == SHT_NOBITS))
501       {
502 	const GElf_Addr sh_end = sh_addr + sh_size;
503 	if (sh_end > highest)
504 	  highest = sh_end;
505       }
506   }
507 
508   highest = 0;
509   scn = NULL;
510   while ((scn = elf_nextscn (mod->main.elf, scn)) != NULL)
511     {
512       GElf_Shdr sh_mem;
513       GElf_Shdr *sh = gelf_getshdr (scn, &sh_mem);
514       if (unlikely (sh == NULL))
515 	return DWFL_E_LIBELF;
516       consider_shdr (main_interp, sh->sh_type, sh->sh_flags,
517 		     sh->sh_addr, sh->sh_size);
518     }
519   if (highest > mod->main.vaddr)
520     {
521       mod->main.address_sync = highest;
522 
523       highest = 0;
524       if (ehdr.e32.e_ident[EI_CLASS] == ELFCLASS32)
525 	for (size_t i = 0; i < shnum - 1; ++i)
526 	  consider_shdr (undo_interp, shdr.s32[i].sh_type, shdr.s32[i].sh_flags,
527 			 shdr.s32[i].sh_addr, shdr.s32[i].sh_size);
528       else
529 	for (size_t i = 0; i < shnum - 1; ++i)
530 	  consider_shdr (undo_interp, shdr.s64[i].sh_type, shdr.s64[i].sh_flags,
531 			 shdr.s64[i].sh_addr, shdr.s64[i].sh_size);
532 
533       if (highest > mod->debug.vaddr)
534 	mod->debug.address_sync = highest;
535       else
536 	return DWFL_E_BAD_PRELINK;
537     }
538 
539   return DWFL_E_NOERROR;
540 }
541 
542 /* Find the separate debuginfo file for this module and open libelf on it.
543    When we return success, MOD->debug is set up.  */
544 static Dwfl_Error
find_debuginfo(Dwfl_Module * mod)545 find_debuginfo (Dwfl_Module *mod)
546 {
547   if (mod->debug.elf != NULL)
548     return DWFL_E_NOERROR;
549 
550   GElf_Word debuglink_crc = 0;
551   const char *debuglink_file = find_debuglink (mod->main.elf, &debuglink_crc);
552 
553   mod->debug.fd = (*mod->dwfl->callbacks->find_debuginfo) (MODCB_ARGS (mod),
554 							   mod->main.name,
555 							   debuglink_file,
556 							   debuglink_crc,
557 							   &mod->debug.name);
558   Dwfl_Error result = open_elf (mod, &mod->debug);
559   if (result == DWFL_E_NOERROR && mod->debug.address_sync != 0)
560     result = find_prelink_address_sync (mod);
561   return result;
562 }
563 
564 
565 /* Try to find a symbol table in FILE.
566    Returns DWFL_E_NOERROR if a proper one is found.
567    Returns DWFL_E_NO_SYMTAB if not, but still sets results for SHT_DYNSYM.  */
568 static Dwfl_Error
load_symtab(struct dwfl_file * file,struct dwfl_file ** symfile,Elf_Scn ** symscn,Elf_Scn ** xndxscn,size_t * syments,int * first_global,GElf_Word * strshndx)569 load_symtab (struct dwfl_file *file, struct dwfl_file **symfile,
570 	     Elf_Scn **symscn, Elf_Scn **xndxscn,
571 	     size_t *syments, int *first_global, GElf_Word *strshndx)
572 {
573   bool symtab = false;
574   Elf_Scn *scn = NULL;
575   while ((scn = elf_nextscn (file->elf, scn)) != NULL)
576     {
577       GElf_Shdr shdr_mem, *shdr = gelf_getshdr (scn, &shdr_mem);
578       if (shdr != NULL)
579 	switch (shdr->sh_type)
580 	  {
581 	  case SHT_SYMTAB:
582 	    symtab = true;
583 	    *symscn = scn;
584 	    *symfile = file;
585 	    *strshndx = shdr->sh_link;
586 	    *syments = shdr->sh_size / shdr->sh_entsize;
587 	    *first_global = shdr->sh_info;
588 	    if (*xndxscn != NULL)
589 	      return DWFL_E_NOERROR;
590 	    break;
591 
592 	  case SHT_DYNSYM:
593 	    if (symtab)
594 	      break;
595 	    /* Use this if need be, but keep looking for SHT_SYMTAB.  */
596 	    *symscn = scn;
597 	    *symfile = file;
598 	    *strshndx = shdr->sh_link;
599 	    *syments = shdr->sh_size / shdr->sh_entsize;
600 	    break;
601 
602 	  case SHT_SYMTAB_SHNDX:
603 	    *xndxscn = scn;
604 	    if (symtab)
605 	      return DWFL_E_NOERROR;
606 	    break;
607 
608 	  default:
609 	    break;
610 	  }
611     }
612 
613   if (symtab)
614     /* We found one, though no SHT_SYMTAB_SHNDX to go with it.  */
615     return DWFL_E_NOERROR;
616 
617   /* We found no SHT_SYMTAB, so any SHT_SYMTAB_SHNDX was bogus.
618      We might have found an SHT_DYNSYM and set *SYMSCN et al though.  */
619   *xndxscn = NULL;
620   return DWFL_E_NO_SYMTAB;
621 }
622 
623 
624 /* Translate addresses into file offsets.
625    OFFS[*] start out zero and remain zero if unresolved.  */
626 static void
find_offsets(Elf * elf,size_t phnum,size_t n,GElf_Addr addrs[n],GElf_Off offs[n])627 find_offsets (Elf *elf, size_t phnum, size_t n,
628 	      GElf_Addr addrs[n], GElf_Off offs[n])
629 {
630   size_t unsolved = n;
631   for (size_t i = 0; i < phnum; ++i)
632     {
633       GElf_Phdr phdr_mem;
634       GElf_Phdr *phdr = gelf_getphdr (elf, i, &phdr_mem);
635       if (phdr != NULL && phdr->p_type == PT_LOAD && phdr->p_memsz > 0)
636 	for (size_t j = 0; j < n; ++j)
637 	  if (offs[j] == 0
638 	      && addrs[j] >= phdr->p_vaddr
639 	      && addrs[j] - phdr->p_vaddr < phdr->p_filesz)
640 	    {
641 	      offs[j] = addrs[j] - phdr->p_vaddr + phdr->p_offset;
642 	      if (--unsolved == 0)
643 		break;
644 	    }
645     }
646 }
647 
648 /* Try to find a dynamic symbol table via phdrs.  */
649 static void
find_dynsym(Dwfl_Module * mod)650 find_dynsym (Dwfl_Module *mod)
651 {
652   GElf_Ehdr ehdr_mem;
653   GElf_Ehdr *ehdr = gelf_getehdr (mod->main.elf, &ehdr_mem);
654 
655   size_t phnum;
656   if (unlikely (elf_getphdrnum (mod->main.elf, &phnum) != 0))
657     return;
658 
659   for (size_t i = 0; i < phnum; ++i)
660     {
661       GElf_Phdr phdr_mem;
662       GElf_Phdr *phdr = gelf_getphdr (mod->main.elf, i, &phdr_mem);
663       if (phdr == NULL)
664 	break;
665 
666       if (phdr->p_type == PT_DYNAMIC)
667 	{
668 	  /* Examine the dynamic section for the pointers we need.  */
669 
670 	  Elf_Data *data = elf_getdata_rawchunk (mod->main.elf,
671 						 phdr->p_offset, phdr->p_filesz,
672 						 ELF_T_DYN);
673 	  if (data == NULL)
674 	    continue;
675 
676 	  enum
677 	    {
678 	      i_symtab,
679 	      i_strtab,
680 	      i_hash,
681 	      i_gnu_hash,
682 	      i_max
683 	    };
684 	  GElf_Addr addrs[i_max] = { 0, };
685 	  GElf_Xword strsz = 0;
686 	  size_t n = data->d_size / gelf_fsize (mod->main.elf,
687 						ELF_T_DYN, 1, EV_CURRENT);
688 	  for (size_t j = 0; j < n; ++j)
689 	    {
690 	      GElf_Dyn dyn_mem;
691 	      GElf_Dyn *dyn = gelf_getdyn (data, j, &dyn_mem);
692 	      if (dyn != NULL)
693 		switch (dyn->d_tag)
694 		  {
695 		  case DT_SYMTAB:
696 		    addrs[i_symtab] = dyn->d_un.d_ptr;
697 		    continue;
698 
699 		  case DT_HASH:
700 		    addrs[i_hash] = dyn->d_un.d_ptr;
701 		    continue;
702 
703 		  case DT_GNU_HASH:
704 		    addrs[i_gnu_hash] = dyn->d_un.d_ptr;
705 		    continue;
706 
707 		  case DT_STRTAB:
708 		    addrs[i_strtab] = dyn->d_un.d_ptr;
709 		    continue;
710 
711 		  case DT_STRSZ:
712 		    strsz = dyn->d_un.d_val;
713 		    continue;
714 
715 		  default:
716 		    continue;
717 
718 		  case DT_NULL:
719 		    break;
720 		  }
721 	      break;
722 	    }
723 
724 	  /* Translate pointers into file offsets.  */
725 	  GElf_Off offs[i_max] = { 0, };
726 	  find_offsets (mod->main.elf, phnum, i_max, addrs, offs);
727 
728 	  /* Figure out the size of the symbol table.  */
729 	  if (offs[i_hash] != 0)
730 	    {
731 	      /* In the original format, .hash says the size of .dynsym.  */
732 
733 	      size_t entsz = SH_ENTSIZE_HASH (ehdr);
734 	      data = elf_getdata_rawchunk (mod->main.elf,
735 					   offs[i_hash] + entsz, entsz,
736 					   entsz == 4 ? ELF_T_WORD
737 					   : ELF_T_XWORD);
738 	      if (data != NULL)
739 		mod->syments = (entsz == 4
740 				? *(const GElf_Word *) data->d_buf
741 				: *(const GElf_Xword *) data->d_buf);
742 	    }
743 	  if (offs[i_gnu_hash] != 0 && mod->syments == 0)
744 	    {
745 	      /* In the new format, we can derive it with some work.  */
746 
747 	      const struct
748 	      {
749 		Elf32_Word nbuckets;
750 		Elf32_Word symndx;
751 		Elf32_Word maskwords;
752 		Elf32_Word shift2;
753 	      } *header;
754 
755 	      data = elf_getdata_rawchunk (mod->main.elf, offs[i_gnu_hash],
756 					   sizeof *header, ELF_T_WORD);
757 	      if (data != NULL)
758 		{
759 		  header = data->d_buf;
760 		  Elf32_Word nbuckets = header->nbuckets;
761 		  Elf32_Word symndx = header->symndx;
762 		  GElf_Off buckets_at = (offs[i_gnu_hash] + sizeof *header
763 					 + (gelf_getclass (mod->main.elf)
764 					    * sizeof (Elf32_Word)
765 					    * header->maskwords));
766 
767 		  data = elf_getdata_rawchunk (mod->main.elf, buckets_at,
768 					       nbuckets * sizeof (Elf32_Word),
769 					       ELF_T_WORD);
770 		  if (data != NULL && symndx < nbuckets)
771 		    {
772 		      const Elf32_Word *const buckets = data->d_buf;
773 		      Elf32_Word maxndx = symndx;
774 		      for (Elf32_Word bucket = 0; bucket < nbuckets; ++bucket)
775 			if (buckets[bucket] > maxndx)
776 			  maxndx = buckets[bucket];
777 
778 		      GElf_Off hasharr_at = (buckets_at
779 					     + nbuckets * sizeof (Elf32_Word));
780 		      hasharr_at += (maxndx - symndx) * sizeof (Elf32_Word);
781 		      do
782 			{
783 			  data = elf_getdata_rawchunk (mod->main.elf,
784 						       hasharr_at,
785 						       sizeof (Elf32_Word),
786 						       ELF_T_WORD);
787 			  if (data != NULL
788 			      && (*(const Elf32_Word *) data->d_buf & 1u))
789 			    {
790 			      mod->syments = maxndx + 1;
791 			      break;
792 			    }
793 			  ++maxndx;
794 			  hasharr_at += sizeof (Elf32_Word);
795 			} while (data != NULL);
796 		    }
797 		}
798 	    }
799 	  if (offs[i_strtab] > offs[i_symtab] && mod->syments == 0)
800 	    mod->syments = ((offs[i_strtab] - offs[i_symtab])
801 			    / gelf_fsize (mod->main.elf,
802 					  ELF_T_SYM, 1, EV_CURRENT));
803 
804 	  if (mod->syments > 0)
805 	    {
806 	      mod->symdata = elf_getdata_rawchunk (mod->main.elf,
807 						   offs[i_symtab],
808 						   gelf_fsize (mod->main.elf,
809 							       ELF_T_SYM,
810 							       mod->syments,
811 							       EV_CURRENT),
812 						   ELF_T_SYM);
813 	      if (mod->symdata != NULL)
814 		{
815 		  mod->symstrdata = elf_getdata_rawchunk (mod->main.elf,
816 							  offs[i_strtab],
817 							  strsz,
818 							  ELF_T_BYTE);
819 		  if (mod->symstrdata == NULL)
820 		    mod->symdata = NULL;
821 		}
822 	      if (mod->symdata == NULL)
823 		mod->symerr = DWFL_E (LIBELF, elf_errno ());
824 	      else
825 		{
826 		  mod->symfile = &mod->main;
827 		  mod->symerr = DWFL_E_NOERROR;
828 		}
829 	      return;
830 	    }
831 	}
832     }
833 }
834 
835 /* Try to find a symbol table in either MOD->main.elf or MOD->debug.elf.  */
836 static void
find_symtab(Dwfl_Module * mod)837 find_symtab (Dwfl_Module *mod)
838 {
839   if (mod->symdata != NULL	/* Already done.  */
840       || mod->symerr != DWFL_E_NOERROR) /* Cached previous failure.  */
841     return;
842 
843   __libdwfl_getelf (mod);
844   mod->symerr = mod->elferr;
845   if (mod->symerr != DWFL_E_NOERROR)
846     return;
847 
848   mod->first_global = -1; /* Unknown, unless explicitly set by load_symtab.  */
849 
850   /* First see if the main ELF file has the debugging information.  */
851   Elf_Scn *symscn = NULL, *xndxscn = NULL;
852   GElf_Word strshndx;
853   mod->symerr = load_symtab (&mod->main, &mod->symfile, &symscn,
854 			     &xndxscn, &mod->syments, &mod->first_global,
855 			     &strshndx);
856   switch (mod->symerr)
857     {
858     default:
859       return;
860 
861     case DWFL_E_NOERROR:
862       break;
863 
864     case DWFL_E_NO_SYMTAB:
865       /* Now we have to look for a separate debuginfo file.  */
866       mod->symerr = find_debuginfo (mod);
867       switch (mod->symerr)
868 	{
869 	default:
870 	  return;
871 
872 	case DWFL_E_NOERROR:
873 	  mod->symerr = load_symtab (&mod->debug, &mod->symfile, &symscn,
874 				     &xndxscn, &mod->syments,
875 				     &mod->first_global, &strshndx);
876 	  break;
877 
878 	case DWFL_E_CB:		/* The find_debuginfo hook failed.  */
879 	  mod->symerr = DWFL_E_NO_SYMTAB;
880 	  break;
881 	}
882 
883       switch (mod->symerr)
884 	{
885 	default:
886 	  return;
887 
888 	case DWFL_E_NOERROR:
889 	  break;
890 
891 	case DWFL_E_NO_SYMTAB:
892 	  if (symscn != NULL)
893 	    {
894 	      /* We still have the dynamic symbol table.  */
895 	      mod->symerr = DWFL_E_NOERROR;
896 	      break;
897 	    }
898 
899 	  /* Last ditch, look for dynamic symbols without section headers.  */
900 	  find_dynsym (mod);
901 	  return;
902 	}
903       break;
904     }
905 
906   /* This does some sanity checks on the string table section.  */
907   if (elf_strptr (mod->symfile->elf, strshndx, 0) == NULL)
908     {
909     elferr:
910       mod->symerr = DWFL_E (LIBELF, elf_errno ());
911       return;
912     }
913 
914   /* Cache the data; MOD->syments and MOD->first_global were set above.  */
915 
916   mod->symstrdata = elf_getdata (elf_getscn (mod->symfile->elf, strshndx),
917 				 NULL);
918   if (mod->symstrdata == NULL)
919     goto elferr;
920 
921   if (xndxscn == NULL)
922     mod->symxndxdata = NULL;
923   else
924     {
925       mod->symxndxdata = elf_getdata (xndxscn, NULL);
926       if (mod->symxndxdata == NULL)
927 	goto elferr;
928     }
929 
930   mod->symdata = elf_getdata (symscn, NULL);
931   if (mod->symdata == NULL)
932     goto elferr;
933 }
934 
935 
936 /* Try to open a libebl backend for MOD.  */
937 Dwfl_Error
938 internal_function
__libdwfl_module_getebl(Dwfl_Module * mod)939 __libdwfl_module_getebl (Dwfl_Module *mod)
940 {
941   if (mod->ebl == NULL)
942     {
943       __libdwfl_getelf (mod);
944       if (mod->elferr != DWFL_E_NOERROR)
945 	return mod->elferr;
946 
947       mod->ebl = ebl_openbackend (mod->main.elf);
948       if (mod->ebl == NULL)
949 	return DWFL_E_LIBEBL;
950     }
951   return DWFL_E_NOERROR;
952 }
953 
954 /* Try to start up libdw on DEBUGFILE.  */
955 static Dwfl_Error
load_dw(Dwfl_Module * mod,struct dwfl_file * debugfile)956 load_dw (Dwfl_Module *mod, struct dwfl_file *debugfile)
957 {
958   if (mod->e_type == ET_REL && !debugfile->relocated)
959     {
960       const Dwfl_Callbacks *const cb = mod->dwfl->callbacks;
961 
962       /* The debugging sections have to be relocated.  */
963       if (cb->section_address == NULL)
964 	return DWFL_E_NOREL;
965 
966       Dwfl_Error error = __libdwfl_module_getebl (mod);
967       if (error != DWFL_E_NOERROR)
968 	return error;
969 
970       find_symtab (mod);
971       Dwfl_Error result = mod->symerr;
972       if (result == DWFL_E_NOERROR)
973 	result = __libdwfl_relocate (mod, debugfile->elf, true);
974       if (result != DWFL_E_NOERROR)
975 	return result;
976 
977       /* Don't keep the file descriptors around.  */
978       if (mod->main.fd != -1 && elf_cntl (mod->main.elf, ELF_C_FDREAD) == 0)
979 	{
980 	  close (mod->main.fd);
981 	  mod->main.fd = -1;
982 	}
983       if (debugfile->fd != -1 && elf_cntl (debugfile->elf, ELF_C_FDREAD) == 0)
984 	{
985 	  close (debugfile->fd);
986 	  debugfile->fd = -1;
987 	}
988     }
989 
990   mod->dw = INTUSE(dwarf_begin_elf) (debugfile->elf, DWARF_C_READ, NULL);
991   if (mod->dw == NULL)
992     {
993       int err = INTUSE(dwarf_errno) ();
994       return err == DWARF_E_NO_DWARF ? DWFL_E_NO_DWARF : DWFL_E (LIBDW, err);
995     }
996 
997   /* Until we have iterated through all CU's, we might do lazy lookups.  */
998   mod->lazycu = 1;
999 
1000   return DWFL_E_NOERROR;
1001 }
1002 
1003 /* Try to start up libdw on either the main file or the debuginfo file.  */
1004 static void
find_dw(Dwfl_Module * mod)1005 find_dw (Dwfl_Module *mod)
1006 {
1007   if (mod->dw != NULL		/* Already done.  */
1008       || mod->dwerr != DWFL_E_NOERROR) /* Cached previous failure.  */
1009     return;
1010 
1011   __libdwfl_getelf (mod);
1012   mod->dwerr = mod->elferr;
1013   if (mod->dwerr != DWFL_E_NOERROR)
1014     return;
1015 
1016   /* First see if the main ELF file has the debugging information.  */
1017   mod->dwerr = load_dw (mod, &mod->main);
1018   switch (mod->dwerr)
1019     {
1020     case DWFL_E_NOERROR:
1021       mod->debug.elf = mod->main.elf;
1022       mod->debug.address_sync = mod->main.address_sync;
1023       return;
1024 
1025     case DWFL_E_NO_DWARF:
1026       break;
1027 
1028     default:
1029       goto canonicalize;
1030     }
1031 
1032   /* Now we have to look for a separate debuginfo file.  */
1033   mod->dwerr = find_debuginfo (mod);
1034   switch (mod->dwerr)
1035     {
1036     case DWFL_E_NOERROR:
1037       mod->dwerr = load_dw (mod, &mod->debug);
1038       break;
1039 
1040     case DWFL_E_CB:		/* The find_debuginfo hook failed.  */
1041       mod->dwerr = DWFL_E_NO_DWARF;
1042       return;
1043 
1044     default:
1045       break;
1046     }
1047 
1048  canonicalize:
1049   mod->dwerr = __libdwfl_canon_error (mod->dwerr);
1050 }
1051 
1052 Dwarf *
dwfl_module_getdwarf(Dwfl_Module * mod,Dwarf_Addr * bias)1053 dwfl_module_getdwarf (Dwfl_Module *mod, Dwarf_Addr *bias)
1054 {
1055   if (mod == NULL)
1056     return NULL;
1057 
1058   find_dw (mod);
1059   if (mod->dwerr == DWFL_E_NOERROR)
1060     {
1061       /* If dwfl_module_getelf was used previously, then partial apply
1062 	 relocation to miscellaneous sections in the debug file too.  */
1063       if (mod->e_type == ET_REL
1064 	  && mod->main.relocated && ! mod->debug.relocated)
1065 	{
1066 	  mod->debug.relocated = true;
1067 	  if (mod->debug.elf != mod->main.elf)
1068 	    (void) __libdwfl_relocate (mod, mod->debug.elf, false);
1069 	}
1070 
1071       *bias = dwfl_adjusted_dwarf_addr (mod, 0);
1072       return mod->dw;
1073     }
1074 
1075   __libdwfl_seterrno (mod->dwerr);
1076   return NULL;
1077 }
INTDEF(dwfl_module_getdwarf)1078 INTDEF (dwfl_module_getdwarf)
1079 
1080 int
1081 dwfl_module_getsymtab (Dwfl_Module *mod)
1082 {
1083   if (mod == NULL)
1084     return -1;
1085 
1086   find_symtab (mod);
1087   if (mod->symerr == DWFL_E_NOERROR)
1088     return mod->syments;
1089 
1090   __libdwfl_seterrno (mod->symerr);
1091   return -1;
1092 }
1093 INTDEF (dwfl_module_getsymtab)
1094