• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*--------------------------------------------------------------------*/
3 /*--- User-mode execve() for ELF executables           m_ume_elf.c ---*/
4 /*--------------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2000-2013 Julian Seward
11       jseward@acm.org
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26    02111-1307, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 */
30 
31 #if defined(VGO_linux)
32 
33 #include "pub_core_basics.h"
34 #include "pub_core_vki.h"
35 
36 #include "pub_core_aspacemgr.h"     // various mapping fns
37 #include "pub_core_debuglog.h"
38 #include "pub_core_libcassert.h"    // VG_(exit), vg_assert
39 #include "pub_core_libcbase.h"      // VG_(memcmp), etc
40 #include "pub_core_libcprint.h"
41 #include "pub_core_libcfile.h"      // VG_(open) et al
42 #include "pub_core_machine.h"       // VG_ELF_CLASS (XXX: which should be moved)
43 #include "pub_core_mallocfree.h"    // VG_(malloc), VG_(free)
44 #include "pub_core_syscall.h"       // VG_(strerror)
45 #include "pub_core_ume.h"           // self
46 
47 #include "priv_ume.h"
48 
49 /* --- !!! --- EXTERNAL HEADERS start --- !!! --- */
50 #define _GNU_SOURCE
51 #define _FILE_OFFSET_BITS 64
52 /* This is for ELF types etc, and also the AT_ constants. */
53 #include <elf.h>
54 /* --- !!! --- EXTERNAL HEADERS end --- !!! --- */
55 
56 
57 #if     VG_WORDSIZE == 8
58 #define ESZ(x)  Elf64_##x
59 #elif   VG_WORDSIZE == 4
60 #define ESZ(x)  Elf32_##x
61 #else
62 #error VG_WORDSIZE needs to ==4 or ==8
63 #endif
64 
65 struct elfinfo
66 {
67    ESZ(Ehdr)    e;
68    ESZ(Phdr)    *p;
69    Int          fd;
70 };
71 
check_mmap(SysRes res,Addr base,SizeT len)72 static void check_mmap(SysRes res, Addr base, SizeT len)
73 {
74    if (sr_isError(res)) {
75       VG_(printf)("valgrind: mmap(0x%llx, %lld) failed in UME "
76                   "with error %lu (%s).\n",
77                   (ULong)base, (Long)len,
78                   sr_Err(res), VG_(strerror)(sr_Err(res)) );
79       if (sr_Err(res) == VKI_EINVAL) {
80          VG_(printf)("valgrind: this can be caused by executables with "
81                      "very large text, data or bss segments.\n");
82       }
83       VG_(exit)(1);
84    }
85 }
86 
87 /*------------------------------------------------------------*/
88 /*--- Loading ELF files                                    ---*/
89 /*------------------------------------------------------------*/
90 
91 static
readelf(Int fd,const HChar * filename)92 struct elfinfo *readelf(Int fd, const HChar *filename)
93 {
94    SysRes sres;
95    struct elfinfo *e = VG_(malloc)("ume.re.1", sizeof(*e));
96    Int phsz;
97 
98    e->fd = fd;
99 
100    sres = VG_(pread)(fd, &e->e, sizeof(e->e), 0);
101    if (sr_isError(sres) || sr_Res(sres) != sizeof(e->e)) {
102       VG_(printf)("valgrind: %s: can't read ELF header: %s\n",
103                   filename, VG_(strerror)(sr_Err(sres)));
104       goto bad;
105    }
106 
107    if (VG_(memcmp)(&e->e.e_ident[0], ELFMAG, SELFMAG) != 0) {
108       VG_(printf)("valgrind: %s: bad ELF magic number\n", filename);
109       goto bad;
110    }
111    if (e->e.e_ident[EI_CLASS] != VG_ELF_CLASS) {
112       VG_(printf)("valgrind: wrong ELF executable class "
113                   "(eg. 32-bit instead of 64-bit)\n");
114       goto bad;
115    }
116    if (e->e.e_ident[EI_DATA] != VG_ELF_DATA2XXX) {
117       VG_(printf)("valgrind: executable has wrong endian-ness\n");
118       goto bad;
119    }
120    if (!(e->e.e_type == ET_EXEC || e->e.e_type == ET_DYN)) {
121       VG_(printf)("valgrind: this is not an executable\n");
122       goto bad;
123    }
124 
125    if (e->e.e_machine != VG_ELF_MACHINE) {
126       VG_(printf)("valgrind: executable is not for "
127                   "this architecture\n");
128       goto bad;
129    }
130 
131    if (e->e.e_phentsize != sizeof(ESZ(Phdr))) {
132       VG_(printf)("valgrind: sizeof ELF Phdr wrong\n");
133       goto bad;
134    }
135 
136    phsz = sizeof(ESZ(Phdr)) * e->e.e_phnum;
137    e->p = VG_(malloc)("ume.re.2", phsz);
138 
139    sres = VG_(pread)(fd, e->p, phsz, e->e.e_phoff);
140    if (sr_isError(sres) || sr_Res(sres) != phsz) {
141       VG_(printf)("valgrind: can't read phdr: %s\n",
142                   VG_(strerror)(sr_Err(sres)));
143       VG_(free)(e->p);
144       goto bad;
145    }
146 
147    return e;
148 
149   bad:
150    VG_(free)(e);
151    return NULL;
152 }
153 
154 /* Map an ELF file.  Returns the brk address. */
155 static
mapelf(struct elfinfo * e,ESZ (Addr)base)156 ESZ(Addr) mapelf(struct elfinfo *e, ESZ(Addr) base)
157 {
158    Int    i;
159    SysRes res;
160    ESZ(Addr) elfbrk = 0;
161 
162    for (i = 0; i < e->e.e_phnum; i++) {
163       ESZ(Phdr) *ph = &e->p[i];
164       ESZ(Addr) addr, brkaddr;
165       ESZ(Word) memsz;
166 
167       if (ph->p_type != PT_LOAD)
168          continue;
169 
170       addr    = ph->p_vaddr+base;
171       memsz   = ph->p_memsz;
172       brkaddr = addr+memsz;
173 
174       if (brkaddr > elfbrk)
175          elfbrk = brkaddr;
176    }
177 
178    for (i = 0; i < e->e.e_phnum; i++) {
179       ESZ(Phdr) *ph = &e->p[i];
180       ESZ(Addr) addr, bss, brkaddr;
181       ESZ(Off) off;
182       ESZ(Word) filesz;
183       ESZ(Word) memsz;
184       unsigned prot = 0;
185 
186       if (ph->p_type != PT_LOAD)
187          continue;
188 
189       if (ph->p_flags & PF_X) prot |= VKI_PROT_EXEC;
190       if (ph->p_flags & PF_W) prot |= VKI_PROT_WRITE;
191       if (ph->p_flags & PF_R) prot |= VKI_PROT_READ;
192 
193       addr    = ph->p_vaddr+base;
194       off     = ph->p_offset;
195       filesz  = ph->p_filesz;
196       bss     = addr+filesz;
197       memsz   = ph->p_memsz;
198       brkaddr = addr+memsz;
199 
200       // Tom says: In the following, do what the Linux kernel does and only
201       // map the pages that are required instead of rounding everything to
202       // the specified alignment (ph->p_align).  (AMD64 doesn't work if you
203       // use ph->p_align -- part of stage2's memory gets trashed somehow.)
204       //
205       // The condition handles the case of a zero-length segment.
206       if (VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr) > 0) {
207          if (0) VG_(debugLog)(0,"ume","mmap_file_fixed_client #1\n");
208          res = VG_(am_mmap_file_fixed_client)(
209                   VG_PGROUNDDN(addr),
210                   VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr),
211                   prot, /*VKI_MAP_FIXED|VKI_MAP_PRIVATE, */
212                   e->fd, VG_PGROUNDDN(off)
213                );
214          if (0) VG_(am_show_nsegments)(0,"after #1");
215          check_mmap(res, VG_PGROUNDDN(addr),
216                          VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr));
217       }
218 
219       // if memsz > filesz, fill the remainder with zeroed pages
220       if (memsz > filesz) {
221          UInt bytes;
222 
223          bytes = VG_PGROUNDUP(brkaddr)-VG_PGROUNDUP(bss);
224          if (bytes > 0) {
225             if (0) VG_(debugLog)(0,"ume","mmap_anon_fixed_client #2\n");
226             res = VG_(am_mmap_anon_fixed_client)(
227                      VG_PGROUNDUP(bss), bytes,
228                      prot
229                   );
230             if (0) VG_(am_show_nsegments)(0,"after #2");
231             check_mmap(res, VG_PGROUNDUP(bss), bytes);
232          }
233 
234          bytes = bss & (VKI_PAGE_SIZE - 1);
235 
236          // The 'prot' condition allows for a read-only bss
237          if ((prot & VKI_PROT_WRITE) && (bytes > 0)) {
238             bytes = VKI_PAGE_SIZE - bytes;
239             VG_(memset)((void *)bss, 0, bytes);
240          }
241       }
242    }
243 
244    return elfbrk;
245 }
246 
VG_(match_ELF)247 Bool VG_(match_ELF)(const void *hdr, SizeT len)
248 {
249    const ESZ(Ehdr) *e = hdr;
250    return (len > sizeof(*e)) && VG_(memcmp)(&e->e_ident[0], ELFMAG, SELFMAG) == 0;
251 }
252 
253 
254 /* load_ELF pulls an ELF executable into the address space, prepares
255    it for execution, and writes info about it into INFO.  In
256    particular it fills in .init_eip, which is the starting point.
257 
258    Returns zero on success, non-zero (a VKI_E.. value) on failure.
259 
260    The sequence of activities is roughly as follows:
261 
262    - use readelf() to extract program header info from the exe file.
263 
264    - scan the program header, collecting info (not sure what all those
265      info-> fields are, or whether they are used, but still) and in
266      particular looking out fo the PT_INTERP header, which describes
267      the interpreter.  If such a field is found, the space needed to
268      hold the interpreter is computed into interp_size.
269 
270    - map the executable in, by calling mapelf().  This maps in all
271      loadable sections, and I _think_ also creates any .bss areas
272      required.  mapelf() returns the address just beyond the end of
273      the furthest-along mapping it creates.  The executable is mapped
274      starting at EBASE, which is usually read from it (eg, 0x8048000
275      etc) except if it's a PIE, in which case I'm not sure what
276      happens.
277 
278      The returned address is recorded in info->brkbase as the start
279      point of the brk (data) segment, as it is traditional to place
280      the data segment just after the executable.  Neither load_ELF nor
281      mapelf creates the brk segment, though: that is for the caller of
282      load_ELF to attend to.
283 
284    - If the initial phdr scan didn't find any mention of an
285      interpreter (interp == NULL), this must be a statically linked
286      executable, and we're pretty much done.
287 
288    - Otherwise, we need to use mapelf() a second time to load the
289      interpreter.  The interpreter can go anywhere, but mapelf() wants
290      to be told a specific address to put it at.  So an advisory query
291      is passed to aspacem, asking where it would put an anonymous
292      client mapping of size INTERP_SIZE.  That address is then used
293      as the mapping address for the interpreter.
294 
295    - The entry point in INFO is set to the interpreter's entry point,
296      and we're done.  */
VG_(load_ELF)297 Int VG_(load_ELF)(Int fd, const HChar* name, /*MOD*/ExeInfo* info)
298 {
299    SysRes sres;
300    struct elfinfo *e;
301    struct elfinfo *interp = NULL;
302    ESZ(Addr) minaddr = ~0;      /* lowest mapped address */
303    ESZ(Addr) maxaddr = 0;       /* highest mapped address */
304    ESZ(Addr) interp_addr = 0;   /* interpreter (ld.so) address */
305    ESZ(Word) interp_size = 0;   /* interpreter size */
306    /* ESZ(Word) interp_align = VKI_PAGE_SIZE; */ /* UNUSED */
307    Int i;
308    void *entry;
309    ESZ(Addr) ebase = 0;
310 
311 #  if defined(HAVE_PIE)
312    ebase = info->exe_base;
313 #  endif
314 
315    e = readelf(fd, name);
316 
317    if (e == NULL)
318       return VKI_ENOEXEC;
319 
320    /* The kernel maps position-independent executables at TASK_SIZE*2/3;
321       duplicate this behavior as close as we can. */
322    if (e->e.e_type == ET_DYN && ebase == 0) {
323       ebase = VG_PGROUNDDN(info->exe_base
324                            + (info->exe_end - info->exe_base) * 2 / 3);
325       /* We really don't want to load PIEs at zero or too close.  It
326          works, but it's unrobust (NULL pointer reads and writes
327          become legit, which is really bad) and causes problems for
328          exp-ptrcheck, which assumes all numbers below 1MB are
329          nonpointers.  So, hackily, move it above 1MB. */
330       /* Later .. it appears ppc32-linux tries to put [vdso] at 1MB,
331          which totally screws things up, because nothing else can go
332          there.  The size of [vdso] is around 2 or 3 pages, so bump
333          the hacky load addess along by 8 * VKI_PAGE_SIZE to be safe. */
334       /* Later .. on mips64 we can't use 0x108000, because mapelf will
335          fail. */
336 #     if defined(VGP_mips64_linux)
337       if (ebase < 0x100000)
338          ebase = 0x100000;
339 #     else
340       vg_assert(VKI_PAGE_SIZE >= 4096); /* stay sane */
341       ESZ(Addr) hacky_load_address = 0x100000 + 8 * VKI_PAGE_SIZE;
342       if (ebase < hacky_load_address)
343          ebase = hacky_load_address;
344 #     endif
345    }
346 
347    info->phnum = e->e.e_phnum;
348    info->entry = e->e.e_entry + ebase;
349    info->phdr = 0;
350    info->stack_prot = VKI_PROT_READ|VKI_PROT_WRITE|VKI_PROT_EXEC;
351 
352    for (i = 0; i < e->e.e_phnum; i++) {
353       ESZ(Phdr) *ph = &e->p[i];
354 
355       switch(ph->p_type) {
356       case PT_PHDR:
357          info->phdr = ph->p_vaddr + ebase;
358          break;
359 
360       case PT_LOAD:
361          if (ph->p_vaddr < minaddr)
362             minaddr = ph->p_vaddr;
363          if (ph->p_vaddr+ph->p_memsz > maxaddr)
364             maxaddr = ph->p_vaddr+ph->p_memsz;
365          break;
366 
367       case PT_INTERP: {
368          HChar *buf = VG_(malloc)("ume.LE.1", ph->p_filesz+1);
369          Int j;
370          Int intfd;
371          Int baseaddr_set;
372 
373          VG_(pread)(fd, buf, ph->p_filesz, ph->p_offset);
374          buf[ph->p_filesz] = '\0';
375 
376          sres = VG_(open)(buf, VKI_O_RDONLY, 0);
377          if (sr_isError(sres)) {
378             VG_(printf)("valgrind: m_ume.c: can't open interpreter\n");
379             VG_(exit)(1);
380          }
381          intfd = sr_Res(sres);
382 
383          interp = readelf(intfd, buf);
384          if (interp == NULL) {
385             VG_(printf)("valgrind: m_ume.c: can't read interpreter\n");
386             return 1;
387          }
388          VG_(free)(buf);
389 
390          baseaddr_set = 0;
391          for (j = 0; j < interp->e.e_phnum; j++) {
392             ESZ(Phdr) *iph = &interp->p[j];
393             ESZ(Addr) end;
394 
395             if (iph->p_type != PT_LOAD || iph->p_memsz == 0)
396                continue;
397 
398             if (!baseaddr_set) {
399                interp_addr  = iph->p_vaddr;
400                /* interp_align = iph->p_align; */ /* UNUSED */
401                baseaddr_set = 1;
402             }
403 
404             /* assumes that all segments in the interp are close */
405             end = (iph->p_vaddr - interp_addr) + iph->p_memsz;
406 
407             if (end > interp_size)
408                interp_size = end;
409          }
410          break;
411 
412 #     if defined(PT_GNU_STACK)
413       /* Android's elf.h doesn't appear to have PT_GNU_STACK. */
414       case PT_GNU_STACK:
415          if ((ph->p_flags & PF_X) == 0) info->stack_prot &= ~VKI_PROT_EXEC;
416          if ((ph->p_flags & PF_W) == 0) info->stack_prot &= ~VKI_PROT_WRITE;
417          if ((ph->p_flags & PF_R) == 0) info->stack_prot &= ~VKI_PROT_READ;
418          break;
419 #     endif
420 
421       default:
422          // do nothing
423          break;
424       }
425       }
426    }
427 
428    if (info->phdr == 0)
429       info->phdr = minaddr + ebase + e->e.e_phoff;
430 
431    if (info->exe_base != info->exe_end) {
432       if (minaddr >= maxaddr ||
433           (minaddr + ebase < info->exe_base ||
434            maxaddr + ebase > info->exe_end)) {
435          VG_(printf)("Executable range %p-%p is outside the\n"
436                      "acceptable range %p-%p\n",
437                      (char *)minaddr + ebase, (char *)maxaddr + ebase,
438                      (char *)info->exe_base,  (char *)info->exe_end);
439          return VKI_ENOMEM;
440       }
441    }
442 
443    info->brkbase = mapelf(e, ebase);    /* map the executable */
444 
445    if (info->brkbase == 0)
446       return VKI_ENOMEM;
447 
448    if (interp != NULL) {
449       /* reserve a chunk of address space for interpreter */
450       MapRequest mreq;
451       Addr       advised;
452       Bool       ok;
453 
454       /* Don't actually reserve the space.  Just get an advisory
455          indicating where it would be allocated, and pass that to
456          mapelf(), which in turn asks aspacem to do some fixed maps at
457          the specified address.  This is a bit of hack, but it should
458          work because there should be no intervening transactions with
459          aspacem which could cause those fixed maps to fail.
460 
461          Placement policy is:
462 
463          if the interpreter asks to be loaded at zero
464             ignore that and put it wherever we like (mappings at zero
465             are bad news)
466          else
467             try and put it where it asks for, but if that doesn't work,
468             just put it anywhere.
469       */
470       if (interp_addr == 0) {
471          mreq.rkind = MAny;
472          mreq.start = 0;
473          mreq.len   = interp_size;
474       } else {
475          mreq.rkind = MHint;
476          mreq.start = interp_addr;
477          mreq.len   = interp_size;
478       }
479 
480       advised = VG_(am_get_advisory)( &mreq, True/*client*/, &ok );
481 
482       if (!ok) {
483          /* bomb out */
484          SysRes res = VG_(mk_SysRes_Error)(VKI_EINVAL);
485          if (0) VG_(printf)("reserve for interp: failed\n");
486          check_mmap(res, (Addr)interp_addr, interp_size);
487          /*NOTREACHED*/
488       }
489 
490       (void)mapelf(interp, (ESZ(Addr))advised - interp_addr);
491 
492       VG_(close)(interp->fd);
493 
494       entry = (void *)(advised - interp_addr + interp->e.e_entry);
495 
496       info->interp_offset = advised - interp_addr;
497 
498       VG_(free)(interp->p);
499       VG_(free)(interp);
500    } else
501       entry = (void *)(ebase + e->e.e_entry);
502 
503    info->exe_base = minaddr + ebase;
504    info->exe_end  = maxaddr + ebase;
505 
506 #if defined(VGP_ppc64be_linux)
507    /* On PPC64BE, ELF ver 1, a func ptr is represented by a TOC entry ptr.
508       This TOC entry contains three words; the first word is the function
509       address, the second word is the TOC ptr (r2), and the third word
510       is the static chain value. */
511    info->init_ip  = ((ULong*)entry)[0];
512    info->init_toc = ((ULong*)entry)[1];
513    info->init_ip  += info->interp_offset;
514    info->init_toc += info->interp_offset;
515 #elif defined(VGP_ppc64le_linux)
516    /* On PPC64LE, ELF ver 2. API doesn't use a func ptr */
517    info->init_ip  = (Addr)entry;
518    info->init_toc = 0; /* meaningless on this platform */
519 #else
520    info->init_ip  = (Addr)entry;
521    info->init_toc = 0; /* meaningless on this platform */
522 #endif
523    VG_(free)(e->p);
524    VG_(free)(e);
525 
526    return 0;
527 }
528 
529 #endif // defined(VGO_linux)
530 
531 /*--------------------------------------------------------------------*/
532 /*--- end                                                          ---*/
533 /*--------------------------------------------------------------------*/
534