1 /* Sniff out modules from ELF headers visible in memory segments.
2 Copyright (C) 2008 Red Hat, Inc.
3 This file is part of Red Hat elfutils.
4
5 Red Hat elfutils is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by the
7 Free Software Foundation; version 2 of the License.
8
9 Red Hat elfutils is distributed in the hope that it will be useful, but
10 WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 General Public License for more details.
13
14 You should have received a copy of the GNU General Public License along
15 with Red Hat elfutils; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA.
17
18 In addition, as a special exception, Red Hat, Inc. gives You the
19 additional right to link the code of Red Hat elfutils with code licensed
20 under any Open Source Initiative certified open source license
21 (http://www.opensource.org/licenses/index.php) which requires the
22 distribution of source code with any binary distribution and to
23 distribute linked combinations of the two. Non-GPL Code permitted under
24 this exception must only link to the code of Red Hat elfutils through
25 those well defined interfaces identified in the file named EXCEPTION
26 found in the source code files (the "Approved Interfaces"). The files
27 of Non-GPL Code may instantiate templates or use macros or inline
28 functions from the Approved Interfaces without causing the resulting
29 work to be covered by the GNU General Public License. Only Red Hat,
30 Inc. may make changes or additions to the list of Approved Interfaces.
31 Red Hat's grant of this exception is conditioned upon your not adding
32 any new exceptions. If you wish to add a new Approved Interface or
33 exception, please contact Red Hat. You must obey the GNU General Public
34 License in all respects for all of the Red Hat elfutils code and other
35 code used in conjunction with Red Hat elfutils except the Non-GPL Code
36 covered by this exception. If you modify this file, you may extend this
37 exception to your version of the file, but you are not obligated to do
38 so. If you do not wish to provide this exception without modification,
39 you must delete this exception statement from your version and license
40 this file solely under the GPL without exception.
41
42 Red Hat elfutils is an included package of the Open Invention Network.
43 An included package of the Open Invention Network is a package for which
44 Open Invention Network licensees cross-license their patents. No patent
45 license is granted, either expressly or impliedly, by designation as an
46 included package. Should you wish to participate in the Open Invention
47 Network licensing program, please visit www.openinventionnetwork.com
48 <http://www.openinventionnetwork.com>. */
49
50 #include <config.h>
51 #include "../libelf/libelfP.h" /* For NOTE_ALIGN. */
52 #undef _
53 #include "libdwflP.h"
54
55 #include <elf.h>
56 #include <gelf.h>
57 #include <inttypes.h>
58 #include <sys/param.h>
59 #include <alloca.h>
60 #include <endian.h>
61
62
63 /* A good size for the initial read from memory, if it's not too costly.
64 This more than covers the phdrs and note segment in the average 64-bit
65 binary. */
66
67 #define INITIAL_READ 1024
68
69 #if __BYTE_ORDER == __LITTLE_ENDIAN
70 # define MY_ELFDATA ELFDATA2LSB
71 #else
72 # define MY_ELFDATA ELFDATA2MSB
73 #endif
74
75
76 /* Return user segment index closest to ADDR but not above it. */
77 static int
addr_segndx(Dwfl * dwfl,size_t segment,GElf_Addr addr)78 addr_segndx (Dwfl *dwfl, size_t segment, GElf_Addr addr)
79 {
80 int ndx = dwfl->lookup_segndx[segment];
81 do
82 {
83 if (dwfl->lookup_segndx[segment] >= 0)
84 ndx = dwfl->lookup_segndx[segment];
85 ++segment;
86 }
87 while (segment < dwfl->lookup_elts - 1
88 && dwfl->lookup_addr[segment] < addr);
89
90 while (dwfl->lookup_segndx[segment] < 0
91 && segment < dwfl->lookup_elts - 1)
92 ++segment;
93
94 if (dwfl->lookup_segndx[segment] >= 0)
95 ndx = dwfl->lookup_segndx[segment];
96
97 return ndx;
98 }
99
100 int
dwfl_segment_report_module(Dwfl * dwfl,int ndx,const char * name,Dwfl_Memory_Callback * memory_callback,void * memory_callback_arg,Dwfl_Module_Callback * read_eagerly,void * read_eagerly_arg)101 dwfl_segment_report_module (Dwfl *dwfl, int ndx, const char *name,
102 Dwfl_Memory_Callback *memory_callback,
103 void *memory_callback_arg,
104 Dwfl_Module_Callback *read_eagerly,
105 void *read_eagerly_arg)
106 {
107 size_t segment = ndx;
108
109 if (segment >= dwfl->lookup_elts)
110 segment = dwfl->lookup_elts - 1;
111
112 while (segment > 0 && dwfl->lookup_segndx[segment] > ndx)
113 --segment;
114
115 while (dwfl->lookup_segndx[segment] < ndx)
116 if (++segment == dwfl->lookup_elts)
117 return 0;
118
119 GElf_Addr start = dwfl->lookup_addr[segment];
120
121 inline bool segment_read (int segndx,
122 void **buffer, size_t *buffer_available,
123 GElf_Addr addr, size_t minread)
124 {
125 return ! (*memory_callback) (dwfl, segndx, buffer, buffer_available,
126 addr, minread, memory_callback_arg);
127 }
128
129 inline void release_buffer (void **buffer, size_t *buffer_available)
130 {
131 if (*buffer != NULL)
132 (void) segment_read (-1, buffer, buffer_available, 0, 0);
133 }
134
135 /* First read in the file header and check its sanity. */
136
137 void *buffer = NULL;
138 size_t buffer_available = INITIAL_READ;
139
140 inline int finish (void)
141 {
142 release_buffer (&buffer, &buffer_available);
143 return ndx;
144 }
145
146 if (segment_read (ndx, &buffer, &buffer_available,
147 start, sizeof (Elf64_Ehdr))
148 || memcmp (buffer, ELFMAG, SELFMAG) != 0)
149 return finish ();
150
151 inline bool read_portion (void **data, size_t *data_size,
152 GElf_Addr vaddr, size_t filesz)
153 {
154 if (vaddr - start + filesz > buffer_available)
155 {
156 *data = NULL;
157 *data_size = filesz;
158 return segment_read (addr_segndx (dwfl, segment, vaddr),
159 data, data_size, vaddr, filesz);
160 }
161
162 /* We already have this whole note segment from our initial read. */
163 *data = vaddr - start + buffer;
164 *data_size = 0;
165 return false;
166 }
167
168 inline void finish_portion (void **data, size_t *data_size)
169 {
170 if (*data_size != 0)
171 release_buffer (data, data_size);
172 }
173
174 /* Extract the information we need from the file header. */
175 union
176 {
177 Elf32_Ehdr e32;
178 Elf64_Ehdr e64;
179 } ehdr;
180 GElf_Off phoff;
181 uint_fast16_t phnum;
182 uint_fast16_t phentsize;
183 GElf_Off shdrs_end;
184 Elf_Data xlatefrom =
185 {
186 .d_type = ELF_T_EHDR,
187 .d_buf = (void *) buffer,
188 .d_version = EV_CURRENT,
189 };
190 Elf_Data xlateto =
191 {
192 .d_type = ELF_T_EHDR,
193 .d_buf = &ehdr,
194 .d_size = sizeof ehdr,
195 .d_version = EV_CURRENT,
196 };
197 switch (((const unsigned char *) buffer)[EI_CLASS])
198 {
199 case ELFCLASS32:
200 xlatefrom.d_size = sizeof (Elf32_Ehdr);
201 if (elf32_xlatetom (&xlateto, &xlatefrom,
202 ((const unsigned char *) buffer)[EI_DATA]) == NULL)
203 return finish ();
204 phoff = ehdr.e32.e_phoff;
205 phnum = ehdr.e32.e_phnum;
206 phentsize = ehdr.e32.e_phentsize;
207 if (phentsize != sizeof (Elf32_Phdr))
208 return finish ();
209 shdrs_end = ehdr.e32.e_shoff + ehdr.e32.e_shnum * ehdr.e32.e_shentsize;
210 break;
211
212 case ELFCLASS64:
213 xlatefrom.d_size = sizeof (Elf64_Ehdr);
214 if (elf64_xlatetom (&xlateto, &xlatefrom,
215 ((const unsigned char *) buffer)[EI_DATA]) == NULL)
216 return finish ();
217 phoff = ehdr.e64.e_phoff;
218 phnum = ehdr.e64.e_phnum;
219 phentsize = ehdr.e64.e_phentsize;
220 if (phentsize != sizeof (Elf64_Phdr))
221 return finish ();
222 shdrs_end = ehdr.e64.e_shoff + ehdr.e64.e_shnum * ehdr.e64.e_shentsize;
223 break;
224
225 default:
226 return finish ();
227 }
228
229 /* The file header tells where to find the program headers.
230 These are what we need to find the boundaries of the module.
231 Without them, we don't have a module to report. */
232
233 if (phnum == 0)
234 return finish ();
235
236 xlatefrom.d_type = xlateto.d_type = ELF_T_PHDR;
237 xlatefrom.d_size = phnum * phentsize;
238
239 void *ph_buffer = NULL;
240 size_t ph_buffer_size = 0;
241 if (read_portion (&ph_buffer, &ph_buffer_size,
242 start + phoff, xlatefrom.d_size))
243 return finish ();
244
245 xlatefrom.d_buf = ph_buffer;
246
247 union
248 {
249 Elf32_Phdr p32[phnum];
250 Elf64_Phdr p64[phnum];
251 } phdrs;
252
253 xlateto.d_buf = &phdrs;
254 xlateto.d_size = sizeof phdrs;
255
256 /* Track the bounds of the file visible in memory. */
257 GElf_Off file_trimmed_end = 0; /* Proper p_vaddr + p_filesz end. */
258 GElf_Off file_end = 0; /* Rounded up to effective page size. */
259 GElf_Off contiguous = 0; /* Visible as contiguous file from START. */
260 GElf_Off total_filesz = 0; /* Total size of data to read. */
261
262 /* Collect the bias between START and the containing PT_LOAD's p_vaddr. */
263 GElf_Addr bias = 0;
264 bool found_bias = false;
265
266 /* Collect the unbiased bounds of the module here. */
267 GElf_Addr module_start = -1l;
268 GElf_Addr module_end = 0;
269
270 /* If we see PT_DYNAMIC, record it here. */
271 GElf_Addr dyn_vaddr = 0;
272 GElf_Xword dyn_filesz = 0;
273
274 /* Collect the build ID bits here. */
275 void *build_id = NULL;
276 size_t build_id_len = 0;
277 GElf_Addr build_id_vaddr = 0;
278
279 /* Consider a PT_NOTE we've found in the image. */
280 inline void consider_notes (GElf_Addr vaddr, GElf_Xword filesz)
281 {
282 /* If we have already seen a build ID, we don't care any more. */
283 if (build_id != NULL || filesz == 0)
284 return;
285
286 void *data;
287 size_t data_size;
288 if (read_portion (&data, &data_size, vaddr, filesz))
289 return;
290
291 assert (sizeof (Elf32_Nhdr) == sizeof (Elf64_Nhdr));
292
293 void *notes;
294 if (ehdr.e32.e_ident[EI_DATA] == MY_ELFDATA)
295 notes = data;
296 else
297 {
298 notes = malloc (filesz);
299 if (unlikely (notes == NULL))
300 return;
301 xlatefrom.d_type = xlateto.d_type = ELF_T_NHDR;
302 xlatefrom.d_buf = (void *) data;
303 xlatefrom.d_size = filesz;
304 xlateto.d_buf = notes;
305 xlateto.d_size = filesz;
306 if (elf32_xlatetom (&xlateto, &xlatefrom,
307 ehdr.e32.e_ident[EI_DATA]) == NULL)
308 goto done;
309 }
310
311 const GElf_Nhdr *nh = notes;
312 while ((const void *) nh < (const void *) notes + filesz)
313 {
314 const void *note_name = nh + 1;
315 const void *note_desc = note_name + NOTE_ALIGN (nh->n_namesz);
316 if (unlikely ((size_t) ((const void *) notes + filesz
317 - note_desc) < nh->n_descsz))
318 break;
319
320 if (nh->n_type == NT_GNU_BUILD_ID
321 && nh->n_descsz > 0
322 && nh->n_namesz == sizeof "GNU"
323 && !memcmp (note_name, "GNU", sizeof "GNU"))
324 {
325 build_id_vaddr = note_desc - (const void *) notes + vaddr;
326 build_id_len = nh->n_descsz;
327 build_id = malloc (nh->n_descsz);
328 if (likely (build_id != NULL))
329 memcpy (build_id, note_desc, build_id_len);
330 break;
331 }
332
333 nh = note_desc + NOTE_ALIGN (nh->n_descsz);
334 }
335
336 done:
337 if (notes != data)
338 free (notes);
339 finish_portion (&data, &data_size);
340 }
341
342 /* Consider each of the program headers we've read from the image. */
343 inline void consider_phdr (GElf_Word type,
344 GElf_Addr vaddr, GElf_Xword memsz,
345 GElf_Off offset, GElf_Xword filesz,
346 GElf_Xword align)
347 {
348 switch (type)
349 {
350 case PT_DYNAMIC:
351 dyn_vaddr = vaddr;
352 dyn_filesz = filesz;
353 break;
354
355 case PT_NOTE:
356 /* We calculate from the p_offset of the note segment,
357 because we don't yet know the bias for its p_vaddr. */
358 consider_notes (start + offset, filesz);
359 break;
360
361 case PT_LOAD:
362 align = dwfl->segment_align > 1 ? dwfl->segment_align : align ?: 1;
363
364 GElf_Addr vaddr_end = (vaddr + memsz + align - 1) & -align;
365 GElf_Addr filesz_vaddr = filesz < memsz ? vaddr + filesz : vaddr_end;
366 GElf_Off filesz_offset = filesz_vaddr - vaddr + offset;
367
368 if (file_trimmed_end < offset + filesz)
369 {
370 file_trimmed_end = offset + filesz;
371
372 /* Trim the last segment so we don't bother with zeros
373 in the last page that are off the end of the file.
374 However, if the extra bit in that page includes the
375 section headers, keep them. */
376 if (shdrs_end <= filesz_offset && shdrs_end > file_trimmed_end)
377 {
378 filesz += shdrs_end - file_trimmed_end;
379 file_trimmed_end = shdrs_end;
380 }
381 }
382
383 total_filesz += filesz;
384
385 if (file_end < filesz_offset)
386 {
387 file_end = filesz_offset;
388 if (filesz_vaddr - start == filesz_offset)
389 contiguous = file_end;
390 }
391
392 if (!found_bias && (offset & -align) == 0
393 && likely (filesz_offset >= phoff + phnum * phentsize))
394 {
395 bias = start - vaddr;
396 found_bias = true;
397 }
398
399 vaddr &= -align;
400 if (vaddr < module_start)
401 module_start = vaddr;
402
403 if (module_end < vaddr_end)
404 module_end = vaddr_end;
405 break;
406 }
407 }
408 if (ehdr.e32.e_ident[EI_CLASS] == ELFCLASS32)
409 {
410 if (elf32_xlatetom (&xlateto, &xlatefrom,
411 ehdr.e32.e_ident[EI_DATA]) == NULL)
412 found_bias = false; /* Trigger error check. */
413 else
414 for (uint_fast16_t i = 0; i < phnum; ++i)
415 consider_phdr (phdrs.p32[i].p_type,
416 phdrs.p32[i].p_vaddr, phdrs.p32[i].p_memsz,
417 phdrs.p32[i].p_offset, phdrs.p32[i].p_filesz,
418 phdrs.p32[i].p_align);
419 }
420 else
421 {
422 if (elf64_xlatetom (&xlateto, &xlatefrom,
423 ehdr.e32.e_ident[EI_DATA]) == NULL)
424 found_bias = false; /* Trigger error check. */
425 else
426 for (uint_fast16_t i = 0; i < phnum; ++i)
427 consider_phdr (phdrs.p64[i].p_type,
428 phdrs.p64[i].p_vaddr, phdrs.p64[i].p_memsz,
429 phdrs.p64[i].p_offset, phdrs.p64[i].p_filesz,
430 phdrs.p64[i].p_align);
431 }
432
433 finish_portion (&ph_buffer, &ph_buffer_size);
434
435 /* We must have seen the segment covering offset 0, or else the ELF
436 header we read at START was not produced by these program headers. */
437 if (unlikely (!found_bias))
438 return finish ();
439
440 /* Now we know enough to report a module for sure: its bounds. */
441 module_start += bias;
442 module_end += bias;
443
444 dyn_vaddr += bias;
445
446 /* Our return value now says to skip the segments contained
447 within the module.
448 XXX handle gaps
449 */
450 ndx = addr_segndx (dwfl, segment, module_end);
451
452 /* Examine its .dynamic section to get more interesting details.
453 If it has DT_SONAME, we'll use that as the module name.
454 We need its DT_STRTAB and DT_STRSZ to decipher DT_SONAME,
455 and they also tell us the essential portion of the file
456 for fetching symbols. */
457 GElf_Addr soname_stroff = 0;
458 GElf_Addr dynstr_vaddr = 0;
459 GElf_Xword dynstrsz = 0;
460 inline bool consider_dyn (GElf_Sxword tag, GElf_Xword val)
461 {
462 switch (tag)
463 {
464 default:
465 return false;
466
467 case DT_SONAME:
468 soname_stroff = val;
469 break;
470
471 case DT_STRTAB:
472 dynstr_vaddr = val;
473 break;
474
475 case DT_STRSZ:
476 dynstrsz = val;
477 break;
478 }
479
480 return soname_stroff != 0 && dynstr_vaddr != 0 && dynstrsz != 0;
481 }
482
483 const size_t dyn_entsize = (ehdr.e32.e_ident[EI_CLASS] == ELFCLASS32
484 ? sizeof (Elf32_Dyn) : sizeof (Elf64_Dyn));
485 void *dyn_data = NULL;
486 size_t dyn_data_size = 0;
487 if (dyn_filesz != 0 && dyn_filesz % dyn_entsize == 0
488 && ! read_portion (&dyn_data, &dyn_data_size, dyn_vaddr, dyn_filesz))
489 {
490 union
491 {
492 Elf32_Dyn d32[dyn_filesz / sizeof (Elf32_Dyn)];
493 Elf64_Dyn d64[dyn_filesz / sizeof (Elf64_Dyn)];
494 } dyn;
495
496 xlatefrom.d_type = xlateto.d_type = ELF_T_DYN;
497 xlatefrom.d_buf = (void *) dyn_data;
498 xlatefrom.d_size = dyn_filesz;
499 xlateto.d_buf = &dyn;
500 xlateto.d_size = sizeof dyn;
501
502 if (ehdr.e32.e_ident[EI_CLASS] == ELFCLASS32)
503 {
504 if (elf32_xlatetom (&xlateto, &xlatefrom,
505 ehdr.e32.e_ident[EI_DATA]) != NULL)
506 for (size_t i = 0; i < dyn_filesz / sizeof dyn.d32[0]; ++i)
507 if (consider_dyn (dyn.d32[i].d_tag, dyn.d32[i].d_un.d_val))
508 break;
509 }
510 else
511 {
512 if (elf64_xlatetom (&xlateto, &xlatefrom,
513 ehdr.e32.e_ident[EI_DATA]) != NULL)
514 for (size_t i = 0; i < dyn_filesz / sizeof dyn.d64[0]; ++i)
515 if (consider_dyn (dyn.d64[i].d_tag, dyn.d64[i].d_un.d_val))
516 break;
517 }
518 }
519 finish_portion (&dyn_data, &dyn_data_size);
520
521 /* We'll use the name passed in or a stupid default if not DT_SONAME. */
522 if (name == NULL)
523 name = ehdr.e32.e_type == ET_EXEC ? "[exe]" : "[dso]";
524
525 void *soname = NULL;
526 size_t soname_size = 0;
527 if (dynstrsz != 0 && dynstr_vaddr != 0)
528 {
529 /* We know the bounds of the .dynstr section. */
530 dynstr_vaddr += bias;
531 if (unlikely (dynstr_vaddr + dynstrsz > module_end))
532 dynstrsz = 0;
533
534 /* Try to get the DT_SONAME string. */
535 if (soname_stroff != 0 && soname_stroff < dynstrsz - 1
536 && ! read_portion (&soname, &soname_size,
537 dynstr_vaddr + soname_stroff, 0))
538 name = soname;
539 }
540
541 /* Now that we have chosen the module's name and bounds, report it.
542 If we found a build ID, report that too. */
543
544 Dwfl_Module *mod = INTUSE(dwfl_report_module) (dwfl, name,
545 module_start, module_end);
546 if (likely (mod != NULL) && build_id != NULL
547 && unlikely (INTUSE(dwfl_module_report_build_id) (mod,
548 build_id,
549 build_id_len,
550 build_id_vaddr)))
551 {
552 mod->gc = true;
553 mod = NULL;
554 }
555
556 /* At this point we do not need BUILD_ID or NAME any more.
557 They have been copied. */
558 free (build_id);
559 finish_portion (&soname, &soname_size);
560
561 if (unlikely (mod == NULL))
562 {
563 ndx = -1;
564 return finish ();
565 }
566
567 /* We have reported the module. Now let the caller decide whether we
568 should read the whole thing in right now. */
569
570 const GElf_Off cost = (contiguous < file_trimmed_end ? total_filesz
571 : buffer_available >= contiguous ? 0
572 : contiguous - buffer_available);
573 const GElf_Off worthwhile = ((dynstr_vaddr == 0 || dynstrsz == 0) ? 0
574 : dynstr_vaddr + dynstrsz - start);
575 const GElf_Off whole = MAX (file_trimmed_end, shdrs_end);
576
577 Elf *elf = NULL;
578 if ((*read_eagerly) (MODCB_ARGS (mod), &buffer, &buffer_available,
579 cost, worthwhile, whole, contiguous,
580 read_eagerly_arg, &elf)
581 && elf == NULL)
582 {
583 /* The caller wants to read the whole file in right now, but hasn't
584 done it for us. Fill in a local image of the virtual file. */
585
586 void *contents = calloc (1, file_trimmed_end);
587 if (unlikely (contents == NULL))
588 return finish ();
589
590 inline void final_read (size_t offset, GElf_Addr vaddr, size_t size)
591 {
592 void *into = contents + offset;
593 size_t read_size = size;
594 (void) segment_read (addr_segndx (dwfl, segment, vaddr),
595 &into, &read_size, vaddr, size);
596 }
597
598 if (contiguous < file_trimmed_end)
599 {
600 /* We can't use the memory image verbatim as the file image.
601 So we'll be reading into a local image of the virtual file. */
602
603 inline void read_phdr (GElf_Word type, GElf_Addr vaddr,
604 GElf_Off offset, GElf_Xword filesz)
605 {
606 if (type == PT_LOAD)
607 final_read (offset, vaddr + bias, filesz);
608 }
609
610 if (ehdr.e32.e_ident[EI_CLASS] == ELFCLASS32)
611 for (uint_fast16_t i = 0; i < phnum; ++i)
612 read_phdr (phdrs.p32[i].p_type, phdrs.p32[i].p_vaddr,
613 phdrs.p32[i].p_offset, phdrs.p32[i].p_filesz);
614 else
615 for (uint_fast16_t i = 0; i < phnum; ++i)
616 read_phdr (phdrs.p64[i].p_type, phdrs.p64[i].p_vaddr,
617 phdrs.p64[i].p_offset, phdrs.p64[i].p_filesz);
618 }
619 else
620 {
621 /* The whole file sits contiguous in memory,
622 but the caller didn't want to just do it. */
623
624 const size_t have = MIN (buffer_available, file_trimmed_end);
625 memcpy (contents, buffer, have);
626
627 if (have < file_trimmed_end)
628 final_read (have, start + have, file_trimmed_end - have);
629 }
630
631 elf = elf_memory (contents, file_trimmed_end);
632 if (unlikely (elf == NULL))
633 free (contents);
634 else
635 elf->flags |= ELF_F_MALLOCED;
636 }
637
638 if (elf != NULL)
639 {
640 /* Install the file in the module. */
641 mod->main.elf = elf;
642 mod->main.bias = bias;
643 }
644
645 return finish ();
646 }
647