• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*--------------------------------------------------------------------*/
3 /*--- User-mode execve() for ELF executables           m_ume_elf.c ---*/
4 /*--------------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2000-2013 Julian Seward
11       jseward@acm.org
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26    02111-1307, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 */
30 
31 #if defined(VGO_linux)
32 
33 #include "pub_core_basics.h"
34 #include "pub_core_vki.h"
35 
36 #include "pub_core_aspacemgr.h"     // various mapping fns
37 #include "pub_core_debuglog.h"
38 #include "pub_core_libcassert.h"    // VG_(exit), vg_assert
39 #include "pub_core_libcbase.h"      // VG_(memcmp), etc
40 #include "pub_core_libcprint.h"
41 #include "pub_core_libcfile.h"      // VG_(open) et al
42 #include "pub_core_machine.h"       // VG_ELF_CLASS (XXX: which should be moved)
43 #include "pub_core_mallocfree.h"    // VG_(malloc), VG_(free)
44 #include "pub_core_syscall.h"       // VG_(strerror)
45 #include "pub_core_ume.h"           // self
46 
47 #include "priv_ume.h"
48 
49 /* --- !!! --- EXTERNAL HEADERS start --- !!! --- */
50 #define _GNU_SOURCE
51 #define _FILE_OFFSET_BITS 64
52 /* This is for ELF types etc, and also the AT_ constants. */
53 #include <elf.h>
54 /* --- !!! --- EXTERNAL HEADERS end --- !!! --- */
55 
56 
57 #if     VG_WORDSIZE == 8
58 #define ESZ(x)  Elf64_##x
59 #elif   VG_WORDSIZE == 4
60 #define ESZ(x)  Elf32_##x
61 #else
62 #error VG_WORDSIZE needs to ==4 or ==8
63 #endif
64 
65 struct elfinfo
66 {
67    ESZ(Ehdr)    e;
68    ESZ(Phdr)    *p;
69    Int          fd;
70 };
71 
check_mmap(SysRes res,Addr base,SizeT len)72 static void check_mmap(SysRes res, Addr base, SizeT len)
73 {
74    if (sr_isError(res)) {
75       VG_(printf)("valgrind: mmap(0x%llx, %lld) failed in UME "
76                   "with error %lu (%s).\n",
77                   (ULong)base, (Long)len,
78                   sr_Err(res), VG_(strerror)(sr_Err(res)) );
79       if (sr_Err(res) == VKI_EINVAL) {
80          VG_(printf)("valgrind: this can be caused by executables with "
81                      "very large text, data or bss segments.\n");
82       }
83       VG_(exit)(1);
84    }
85 }
86 
87 /*------------------------------------------------------------*/
88 /*--- Loading ELF files                                    ---*/
89 /*------------------------------------------------------------*/
90 
91 static
readelf(Int fd,const HChar * filename)92 struct elfinfo *readelf(Int fd, const HChar *filename)
93 {
94    SysRes sres;
95    struct elfinfo *e = VG_(malloc)("ume.re.1", sizeof(*e));
96    Int phsz;
97 
98    vg_assert(e);
99    e->fd = fd;
100 
101    sres = VG_(pread)(fd, &e->e, sizeof(e->e), 0);
102    if (sr_isError(sres) || sr_Res(sres) != sizeof(e->e)) {
103       VG_(printf)("valgrind: %s: can't read ELF header: %s\n",
104                   filename, VG_(strerror)(sr_Err(sres)));
105       goto bad;
106    }
107 
108    if (VG_(memcmp)(&e->e.e_ident[0], ELFMAG, SELFMAG) != 0) {
109       VG_(printf)("valgrind: %s: bad ELF magic number\n", filename);
110       goto bad;
111    }
112    if (e->e.e_ident[EI_CLASS] != VG_ELF_CLASS) {
113       VG_(printf)("valgrind: wrong ELF executable class "
114                   "(eg. 32-bit instead of 64-bit)\n");
115       goto bad;
116    }
117    if (e->e.e_ident[EI_DATA] != VG_ELF_DATA2XXX) {
118       VG_(printf)("valgrind: executable has wrong endian-ness\n");
119       goto bad;
120    }
121    if (!(e->e.e_type == ET_EXEC || e->e.e_type == ET_DYN)) {
122       VG_(printf)("valgrind: this is not an executable\n");
123       goto bad;
124    }
125 
126    if (e->e.e_machine != VG_ELF_MACHINE) {
127       VG_(printf)("valgrind: executable is not for "
128                   "this architecture\n");
129       goto bad;
130    }
131 
132    if (e->e.e_phentsize != sizeof(ESZ(Phdr))) {
133       VG_(printf)("valgrind: sizeof ELF Phdr wrong\n");
134       goto bad;
135    }
136 
137    phsz = sizeof(ESZ(Phdr)) * e->e.e_phnum;
138    e->p = VG_(malloc)("ume.re.2", phsz);
139    vg_assert(e->p);
140 
141    sres = VG_(pread)(fd, e->p, phsz, e->e.e_phoff);
142    if (sr_isError(sres) || sr_Res(sres) != phsz) {
143       VG_(printf)("valgrind: can't read phdr: %s\n",
144                   VG_(strerror)(sr_Err(sres)));
145       VG_(free)(e->p);
146       goto bad;
147    }
148 
149    return e;
150 
151   bad:
152    VG_(free)(e);
153    return NULL;
154 }
155 
156 /* Map an ELF file.  Returns the brk address. */
157 static
mapelf(struct elfinfo * e,ESZ (Addr)base)158 ESZ(Addr) mapelf(struct elfinfo *e, ESZ(Addr) base)
159 {
160    Int    i;
161    SysRes res;
162    ESZ(Addr) elfbrk = 0;
163 
164    for (i = 0; i < e->e.e_phnum; i++) {
165       ESZ(Phdr) *ph = &e->p[i];
166       ESZ(Addr) addr, brkaddr;
167       ESZ(Word) memsz;
168 
169       if (ph->p_type != PT_LOAD)
170          continue;
171 
172       addr    = ph->p_vaddr+base;
173       memsz   = ph->p_memsz;
174       brkaddr = addr+memsz;
175 
176       if (brkaddr > elfbrk)
177          elfbrk = brkaddr;
178    }
179 
180    for (i = 0; i < e->e.e_phnum; i++) {
181       ESZ(Phdr) *ph = &e->p[i];
182       ESZ(Addr) addr, bss, brkaddr;
183       ESZ(Off) off;
184       ESZ(Word) filesz;
185       ESZ(Word) memsz;
186       unsigned prot = 0;
187 
188       if (ph->p_type != PT_LOAD)
189          continue;
190 
191       if (ph->p_flags & PF_X) prot |= VKI_PROT_EXEC;
192       if (ph->p_flags & PF_W) prot |= VKI_PROT_WRITE;
193       if (ph->p_flags & PF_R) prot |= VKI_PROT_READ;
194 
195       addr    = ph->p_vaddr+base;
196       off     = ph->p_offset;
197       filesz  = ph->p_filesz;
198       bss     = addr+filesz;
199       memsz   = ph->p_memsz;
200       brkaddr = addr+memsz;
201 
202       // Tom says: In the following, do what the Linux kernel does and only
203       // map the pages that are required instead of rounding everything to
204       // the specified alignment (ph->p_align).  (AMD64 doesn't work if you
205       // use ph->p_align -- part of stage2's memory gets trashed somehow.)
206       //
207       // The condition handles the case of a zero-length segment.
208       if (VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr) > 0) {
209          if (0) VG_(debugLog)(0,"ume","mmap_file_fixed_client #1\n");
210          res = VG_(am_mmap_file_fixed_client)(
211                   VG_PGROUNDDN(addr),
212                   VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr),
213                   prot, /*VKI_MAP_FIXED|VKI_MAP_PRIVATE, */
214                   e->fd, VG_PGROUNDDN(off)
215                );
216          if (0) VG_(am_show_nsegments)(0,"after #1");
217          check_mmap(res, VG_PGROUNDDN(addr),
218                          VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr));
219       }
220 
221       // if memsz > filesz, fill the remainder with zeroed pages
222       if (memsz > filesz) {
223          UInt bytes;
224 
225          bytes = VG_PGROUNDUP(brkaddr)-VG_PGROUNDUP(bss);
226          if (bytes > 0) {
227             if (0) VG_(debugLog)(0,"ume","mmap_anon_fixed_client #2\n");
228             res = VG_(am_mmap_anon_fixed_client)(
229                      VG_PGROUNDUP(bss), bytes,
230                      prot
231                   );
232             if (0) VG_(am_show_nsegments)(0,"after #2");
233             check_mmap(res, VG_PGROUNDUP(bss), bytes);
234          }
235 
236          bytes = bss & (VKI_PAGE_SIZE - 1);
237 
238          // The 'prot' condition allows for a read-only bss
239          if ((prot & VKI_PROT_WRITE) && (bytes > 0)) {
240             bytes = VKI_PAGE_SIZE - bytes;
241             VG_(memset)((void *)bss, 0, bytes);
242          }
243       }
244    }
245 
246    return elfbrk;
247 }
248 
VG_(match_ELF)249 Bool VG_(match_ELF)(const void *hdr, Int len)
250 {
251    const ESZ(Ehdr) *e = hdr;
252    return (len > sizeof(*e)) && VG_(memcmp)(&e->e_ident[0], ELFMAG, SELFMAG) == 0;
253 }
254 
255 
256 /* load_ELF pulls an ELF executable into the address space, prepares
257    it for execution, and writes info about it into INFO.  In
258    particular it fills in .init_eip, which is the starting point.
259 
260    Returns zero on success, non-zero (a VKI_E.. value) on failure.
261 
262    The sequence of activities is roughly as follows:
263 
264    - use readelf() to extract program header info from the exe file.
265 
266    - scan the program header, collecting info (not sure what all those
267      info-> fields are, or whether they are used, but still) and in
268      particular looking out fo the PT_INTERP header, which describes
269      the interpreter.  If such a field is found, the space needed to
270      hold the interpreter is computed into interp_size.
271 
272    - map the executable in, by calling mapelf().  This maps in all
273      loadable sections, and I _think_ also creates any .bss areas
274      required.  mapelf() returns the address just beyond the end of
275      the furthest-along mapping it creates.  The executable is mapped
276      starting at EBASE, which is usually read from it (eg, 0x8048000
277      etc) except if it's a PIE, in which case I'm not sure what
278      happens.
279 
280      The returned address is recorded in info->brkbase as the start
281      point of the brk (data) segment, as it is traditional to place
282      the data segment just after the executable.  Neither load_ELF nor
283      mapelf creates the brk segment, though: that is for the caller of
284      load_ELF to attend to.
285 
286    - If the initial phdr scan didn't find any mention of an
287      interpreter (interp == NULL), this must be a statically linked
288      executable, and we're pretty much done.
289 
290    - Otherwise, we need to use mapelf() a second time to load the
291      interpreter.  The interpreter can go anywhere, but mapelf() wants
292      to be told a specific address to put it at.  So an advisory query
293      is passed to aspacem, asking where it would put an anonymous
294      client mapping of size INTERP_SIZE.  That address is then used
295      as the mapping address for the interpreter.
296 
297    - The entry point in INFO is set to the interpreter's entry point,
298      and we're done.  */
VG_(load_ELF)299 Int VG_(load_ELF)(Int fd, const HChar* name, /*MOD*/ExeInfo* info)
300 {
301    SysRes sres;
302    struct elfinfo *e;
303    struct elfinfo *interp = NULL;
304    ESZ(Addr) minaddr = ~0;      /* lowest mapped address */
305    ESZ(Addr) maxaddr = 0;       /* highest mapped address */
306    ESZ(Addr) interp_addr = 0;   /* interpreter (ld.so) address */
307    ESZ(Word) interp_size = 0;   /* interpreter size */
308    /* ESZ(Word) interp_align = VKI_PAGE_SIZE; */ /* UNUSED */
309    Int i;
310    void *entry;
311    ESZ(Addr) ebase = 0;
312 
313 #  if defined(HAVE_PIE)
314    ebase = info->exe_base;
315 #  endif
316 
317    e = readelf(fd, name);
318 
319    if (e == NULL)
320       return VKI_ENOEXEC;
321 
322    /* The kernel maps position-independent executables at TASK_SIZE*2/3;
323       duplicate this behavior as close as we can. */
324    if (e->e.e_type == ET_DYN && ebase == 0) {
325       ebase = VG_PGROUNDDN(info->exe_base
326                            + (info->exe_end - info->exe_base) * 2 / 3);
327       /* We really don't want to load PIEs at zero or too close.  It
328          works, but it's unrobust (NULL pointer reads and writes
329          become legit, which is really bad) and causes problems for
330          exp-ptrcheck, which assumes all numbers below 1MB are
331          nonpointers.  So, hackily, move it above 1MB. */
332       /* Later .. it appears ppc32-linux tries to put [vdso] at 1MB,
333          which totally screws things up, because nothing else can go
334          there.  The size of [vdso] is around 2 or 3 pages, so bump
335          the hacky load addess along by 8 * VKI_PAGE_SIZE to be safe. */
336       /* Later .. on mips64 we can't use 0x108000, because mapelf will
337          fail. */
338 #     if defined(VGP_mips64_linux)
339       if (ebase < 0x100000)
340          ebase = 0x100000;
341 #     else
342       vg_assert(VKI_PAGE_SIZE >= 4096); /* stay sane */
343       ESZ(Addr) hacky_load_address = 0x100000 + 8 * VKI_PAGE_SIZE;
344       if (ebase < hacky_load_address)
345          ebase = hacky_load_address;
346 #     endif
347    }
348 
349    info->phnum = e->e.e_phnum;
350    info->entry = e->e.e_entry + ebase;
351    info->phdr = 0;
352    info->stack_prot = VKI_PROT_READ|VKI_PROT_WRITE|VKI_PROT_EXEC;
353 
354    for (i = 0; i < e->e.e_phnum; i++) {
355       ESZ(Phdr) *ph = &e->p[i];
356 
357       switch(ph->p_type) {
358       case PT_PHDR:
359          info->phdr = ph->p_vaddr + ebase;
360          break;
361 
362       case PT_LOAD:
363          if (ph->p_vaddr < minaddr)
364             minaddr = ph->p_vaddr;
365          if (ph->p_vaddr+ph->p_memsz > maxaddr)
366             maxaddr = ph->p_vaddr+ph->p_memsz;
367          break;
368 
369       case PT_INTERP: {
370          HChar *buf = VG_(malloc)("ume.LE.1", ph->p_filesz+1);
371          Int j;
372          Int intfd;
373          Int baseaddr_set;
374 
375          vg_assert(buf);
376          VG_(pread)(fd, buf, ph->p_filesz, ph->p_offset);
377          buf[ph->p_filesz] = '\0';
378 
379          sres = VG_(open)(buf, VKI_O_RDONLY, 0);
380          if (sr_isError(sres)) {
381             VG_(printf)("valgrind: m_ume.c: can't open interpreter\n");
382             VG_(exit)(1);
383          }
384          intfd = sr_Res(sres);
385 
386          interp = readelf(intfd, buf);
387          if (interp == NULL) {
388             VG_(printf)("valgrind: m_ume.c: can't read interpreter\n");
389             return 1;
390          }
391          VG_(free)(buf);
392 
393          baseaddr_set = 0;
394          for (j = 0; j < interp->e.e_phnum; j++) {
395             ESZ(Phdr) *iph = &interp->p[j];
396             ESZ(Addr) end;
397 
398             if (iph->p_type != PT_LOAD || iph->p_memsz == 0)
399                continue;
400 
401             if (!baseaddr_set) {
402                interp_addr  = iph->p_vaddr;
403                /* interp_align = iph->p_align; */ /* UNUSED */
404                baseaddr_set = 1;
405             }
406 
407             /* assumes that all segments in the interp are close */
408             end = (iph->p_vaddr - interp_addr) + iph->p_memsz;
409 
410             if (end > interp_size)
411                interp_size = end;
412          }
413          break;
414 
415 #     if defined(PT_GNU_STACK)
416       /* Android's elf.h doesn't appear to have PT_GNU_STACK. */
417       case PT_GNU_STACK:
418          if ((ph->p_flags & PF_X) == 0) info->stack_prot &= ~VKI_PROT_EXEC;
419          if ((ph->p_flags & PF_W) == 0) info->stack_prot &= ~VKI_PROT_WRITE;
420          if ((ph->p_flags & PF_R) == 0) info->stack_prot &= ~VKI_PROT_READ;
421          break;
422 #     endif
423 
424       default:
425          // do nothing
426          break;
427       }
428       }
429    }
430 
431    if (info->phdr == 0)
432       info->phdr = minaddr + ebase + e->e.e_phoff;
433 
434    if (info->exe_base != info->exe_end) {
435       if (minaddr >= maxaddr ||
436           (minaddr + ebase < info->exe_base ||
437            maxaddr + ebase > info->exe_end)) {
438          VG_(printf)("Executable range %p-%p is outside the\n"
439                      "acceptable range %p-%p\n",
440                      (char *)minaddr + ebase, (char *)maxaddr + ebase,
441                      (char *)info->exe_base,  (char *)info->exe_end);
442          return VKI_ENOMEM;
443       }
444    }
445 
446    info->brkbase = mapelf(e, ebase);    /* map the executable */
447 
448    if (info->brkbase == 0)
449       return VKI_ENOMEM;
450 
451    if (interp != NULL) {
452       /* reserve a chunk of address space for interpreter */
453       MapRequest mreq;
454       Addr       advised;
455       Bool       ok;
456 
457       /* Don't actually reserve the space.  Just get an advisory
458          indicating where it would be allocated, and pass that to
459          mapelf(), which in turn asks aspacem to do some fixed maps at
460          the specified address.  This is a bit of hack, but it should
461          work because there should be no intervening transactions with
462          aspacem which could cause those fixed maps to fail.
463 
464          Placement policy is:
465 
466          if the interpreter asks to be loaded at zero
467             ignore that and put it wherever we like (mappings at zero
468             are bad news)
469          else
470             try and put it where it asks for, but if that doesn't work,
471             just put it anywhere.
472       */
473       if (interp_addr == 0) {
474          mreq.rkind = MAny;
475          mreq.start = 0;
476          mreq.len   = interp_size;
477       } else {
478          mreq.rkind = MHint;
479          mreq.start = interp_addr;
480          mreq.len   = interp_size;
481       }
482 
483       advised = VG_(am_get_advisory)( &mreq, True/*client*/, &ok );
484 
485       if (!ok) {
486          /* bomb out */
487          SysRes res = VG_(mk_SysRes_Error)(VKI_EINVAL);
488          if (0) VG_(printf)("reserve for interp: failed\n");
489          check_mmap(res, (Addr)interp_addr, interp_size);
490          /*NOTREACHED*/
491       }
492 
493       (void)mapelf(interp, (ESZ(Addr))advised - interp_addr);
494 
495       VG_(close)(interp->fd);
496 
497       entry = (void *)(advised - interp_addr + interp->e.e_entry);
498       info->interp_offset = advised - interp_addr;
499 
500       VG_(free)(interp->p);
501       VG_(free)(interp);
502    } else
503       entry = (void *)(ebase + e->e.e_entry);
504 
505    info->exe_base = minaddr + ebase;
506    info->exe_end  = maxaddr + ebase;
507 
508 #if defined(VGP_ppc64_linux)
509    /* On PPC64, a func ptr is represented by a TOC entry ptr.  This
510       TOC entry contains three words; the first word is the function
511       address, the second word is the TOC ptr (r2), and the third word
512       is the static chain value. */
513    info->init_ip  = ((ULong*)entry)[0];
514    info->init_toc = ((ULong*)entry)[1];
515    info->init_ip  += info->interp_offset;
516    info->init_toc += info->interp_offset;
517 #else
518    info->init_ip  = (Addr)entry;
519    info->init_toc = 0; /* meaningless on this platform */
520 #endif
521    VG_(free)(e->p);
522    VG_(free)(e);
523 
524    return 0;
525 }
526 
527 #endif // defined(VGO_linux)
528 
529 /*--------------------------------------------------------------------*/
530 /*--- end                                                          ---*/
531 /*--------------------------------------------------------------------*/
532