• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*--------------------------------------------------------------------*/
3 /*--- User-mode execve() for ELF executables           m_ume_elf.c ---*/
4 /*--------------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2000-2012 Julian Seward
11       jseward@acm.org
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26    02111-1307, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 */
30 
31 #if defined(VGO_linux)
32 
33 #include "pub_core_basics.h"
34 #include "pub_core_vki.h"
35 
36 #include "pub_core_aspacemgr.h"     // various mapping fns
37 #include "pub_core_debuglog.h"
38 #include "pub_core_libcassert.h"    // VG_(exit), vg_assert
39 #include "pub_core_libcbase.h"      // VG_(memcmp), etc
40 #include "pub_core_libcprint.h"
41 #include "pub_core_libcfile.h"      // VG_(open) et al
42 #include "pub_core_machine.h"       // VG_ELF_CLASS (XXX: which should be moved)
43 #include "pub_core_mallocfree.h"    // VG_(malloc), VG_(free)
44 #include "pub_core_syscall.h"       // VG_(strerror)
45 #include "pub_core_ume.h"           // self
46 
47 #include "priv_ume.h"
48 
49 /* --- !!! --- EXTERNAL HEADERS start --- !!! --- */
50 #define _GNU_SOURCE
51 #define _FILE_OFFSET_BITS 64
52 /* This is for ELF types etc, and also the AT_ constants. */
53 #include <elf.h>
54 /* --- !!! --- EXTERNAL HEADERS end --- !!! --- */
55 
56 
57 #if     VG_WORDSIZE == 8
58 #define ESZ(x)  Elf64_##x
59 #elif   VG_WORDSIZE == 4
60 #define ESZ(x)  Elf32_##x
61 #else
62 #error VG_WORDSIZE needs to ==4 or ==8
63 #endif
64 
65 struct elfinfo
66 {
67    ESZ(Ehdr)    e;
68    ESZ(Phdr)    *p;
69    Int          fd;
70 };
71 
check_mmap(SysRes res,Addr base,SizeT len)72 static void check_mmap(SysRes res, Addr base, SizeT len)
73 {
74    if (sr_isError(res)) {
75       VG_(printf)("valgrind: mmap(0x%llx, %lld) failed in UME "
76                   "with error %lu (%s).\n",
77                   (ULong)base, (Long)len,
78                   sr_Err(res), VG_(strerror)(sr_Err(res)) );
79       if (sr_Err(res) == VKI_EINVAL) {
80          VG_(printf)("valgrind: this can be caused by executables with "
81                      "very large text, data or bss segments.\n");
82       }
83       VG_(exit)(1);
84    }
85 }
86 
87 /*------------------------------------------------------------*/
88 /*--- Loading ELF files                                    ---*/
89 /*------------------------------------------------------------*/
90 
91 static
readelf(Int fd,const char * filename)92 struct elfinfo *readelf(Int fd, const char *filename)
93 {
94    SysRes sres;
95    struct elfinfo *e = VG_(malloc)("ume.re.1", sizeof(*e));
96    Int phsz;
97 
98    vg_assert(e);
99    e->fd = fd;
100 
101    sres = VG_(pread)(fd, &e->e, sizeof(e->e), 0);
102    if (sr_isError(sres) || sr_Res(sres) != sizeof(e->e)) {
103       VG_(printf)("valgrind: %s: can't read ELF header: %s\n",
104                   filename, VG_(strerror)(sr_Err(sres)));
105       goto bad;
106    }
107 
108    if (VG_(memcmp)(&e->e.e_ident[0], ELFMAG, SELFMAG) != 0) {
109       VG_(printf)("valgrind: %s: bad ELF magic number\n", filename);
110       goto bad;
111    }
112    if (e->e.e_ident[EI_CLASS] != VG_ELF_CLASS) {
113       VG_(printf)("valgrind: wrong ELF executable class "
114                   "(eg. 32-bit instead of 64-bit)\n");
115       goto bad;
116    }
117    if (e->e.e_ident[EI_DATA] != VG_ELF_DATA2XXX) {
118       VG_(printf)("valgrind: executable has wrong endian-ness\n");
119       goto bad;
120    }
121    if (!(e->e.e_type == ET_EXEC || e->e.e_type == ET_DYN)) {
122       VG_(printf)("valgrind: this is not an executable\n");
123       goto bad;
124    }
125 
126    if (e->e.e_machine != VG_ELF_MACHINE) {
127       VG_(printf)("valgrind: executable is not for "
128                   "this architecture\n");
129       goto bad;
130    }
131 
132    if (e->e.e_phentsize != sizeof(ESZ(Phdr))) {
133       VG_(printf)("valgrind: sizeof ELF Phdr wrong\n");
134       goto bad;
135    }
136 
137    phsz = sizeof(ESZ(Phdr)) * e->e.e_phnum;
138    e->p = VG_(malloc)("ume.re.2", phsz);
139    vg_assert(e->p);
140 
141    sres = VG_(pread)(fd, e->p, phsz, e->e.e_phoff);
142    if (sr_isError(sres) || sr_Res(sres) != phsz) {
143       VG_(printf)("valgrind: can't read phdr: %s\n",
144                   VG_(strerror)(sr_Err(sres)));
145       VG_(free)(e->p);
146       goto bad;
147    }
148 
149    return e;
150 
151   bad:
152    VG_(free)(e);
153    return NULL;
154 }
155 
156 /* Map an ELF file.  Returns the brk address. */
157 static
mapelf(struct elfinfo * e,ESZ (Addr)base)158 ESZ(Addr) mapelf(struct elfinfo *e, ESZ(Addr) base)
159 {
160    Int    i;
161    SysRes res;
162    ESZ(Addr) elfbrk = 0;
163 
164    for (i = 0; i < e->e.e_phnum; i++) {
165       ESZ(Phdr) *ph = &e->p[i];
166       ESZ(Addr) addr, brkaddr;
167       ESZ(Word) memsz;
168 
169       if (ph->p_type != PT_LOAD)
170          continue;
171 
172       addr    = ph->p_vaddr+base;
173       memsz   = ph->p_memsz;
174       brkaddr = addr+memsz;
175 
176       if (brkaddr > elfbrk)
177          elfbrk = brkaddr;
178    }
179 
180    for (i = 0; i < e->e.e_phnum; i++) {
181       ESZ(Phdr) *ph = &e->p[i];
182       ESZ(Addr) addr, bss, brkaddr;
183       ESZ(Off) off;
184       ESZ(Word) filesz;
185       ESZ(Word) memsz;
186       unsigned prot = 0;
187 
188       if (ph->p_type != PT_LOAD)
189          continue;
190 
191       if (ph->p_flags & PF_X) prot |= VKI_PROT_EXEC;
192       if (ph->p_flags & PF_W) prot |= VKI_PROT_WRITE;
193       if (ph->p_flags & PF_R) prot |= VKI_PROT_READ;
194 
195       addr    = ph->p_vaddr+base;
196       off     = ph->p_offset;
197       filesz  = ph->p_filesz;
198       bss     = addr+filesz;
199       memsz   = ph->p_memsz;
200       brkaddr = addr+memsz;
201 
202       // Tom says: In the following, do what the Linux kernel does and only
203       // map the pages that are required instead of rounding everything to
204       // the specified alignment (ph->p_align).  (AMD64 doesn't work if you
205       // use ph->p_align -- part of stage2's memory gets trashed somehow.)
206       //
207       // The condition handles the case of a zero-length segment.
208       if (VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr) > 0) {
209          if (0) VG_(debugLog)(0,"ume","mmap_file_fixed_client #1\n");
210          res = VG_(am_mmap_file_fixed_client)(
211                   VG_PGROUNDDN(addr),
212                   VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr),
213                   prot, /*VKI_MAP_FIXED|VKI_MAP_PRIVATE, */
214                   e->fd, VG_PGROUNDDN(off)
215                );
216          if (0) VG_(am_show_nsegments)(0,"after #1");
217          check_mmap(res, VG_PGROUNDDN(addr),
218                          VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr));
219       }
220 
221       // if memsz > filesz, fill the remainder with zeroed pages
222       if (memsz > filesz) {
223          UInt bytes;
224 
225          bytes = VG_PGROUNDUP(brkaddr)-VG_PGROUNDUP(bss);
226          if (bytes > 0) {
227             if (0) VG_(debugLog)(0,"ume","mmap_anon_fixed_client #2\n");
228             res = VG_(am_mmap_anon_fixed_client)(
229                      VG_PGROUNDUP(bss), bytes,
230                      prot
231                   );
232             if (0) VG_(am_show_nsegments)(0,"after #2");
233             check_mmap(res, VG_PGROUNDUP(bss), bytes);
234          }
235 
236          bytes = bss & (VKI_PAGE_SIZE - 1);
237 
238          // The 'prot' condition allows for a read-only bss
239          if ((prot & VKI_PROT_WRITE) && (bytes > 0)) {
240             bytes = VKI_PAGE_SIZE - bytes;
241             VG_(memset)((char *)bss, 0, bytes);
242          }
243       }
244    }
245 
246    return elfbrk;
247 }
248 
VG_(match_ELF)249 Bool VG_(match_ELF)(Char *hdr, Int len)
250 {
251    ESZ(Ehdr) *e = (ESZ(Ehdr) *)hdr;
252    return (len > sizeof(*e)) && VG_(memcmp)(&e->e_ident[0], ELFMAG, SELFMAG) == 0;
253 }
254 
255 
256 /* load_ELF pulls an ELF executable into the address space, prepares
257    it for execution, and writes info about it into INFO.  In
258    particular it fills in .init_eip, which is the starting point.
259 
260    Returns zero on success, non-zero (a VKI_E.. value) on failure.
261 
262    The sequence of activities is roughly as follows:
263 
264    - use readelf() to extract program header info from the exe file.
265 
266    - scan the program header, collecting info (not sure what all those
267      info-> fields are, or whether they are used, but still) and in
268      particular looking out fo the PT_INTERP header, which describes
269      the interpreter.  If such a field is found, the space needed to
270      hold the interpreter is computed into interp_size.
271 
272    - map the executable in, by calling mapelf().  This maps in all
273      loadable sections, and I _think_ also creates any .bss areas
274      required.  mapelf() returns the address just beyond the end of
275      the furthest-along mapping it creates.  The executable is mapped
276      starting at EBASE, which is usually read from it (eg, 0x8048000
277      etc) except if it's a PIE, in which case I'm not sure what
278      happens.
279 
280      The returned address is recorded in info->brkbase as the start
281      point of the brk (data) segment, as it is traditional to place
282      the data segment just after the executable.  Neither load_ELF nor
283      mapelf creates the brk segment, though: that is for the caller of
284      load_ELF to attend to.
285 
286    - If the initial phdr scan didn't find any mention of an
287      interpreter (interp == NULL), this must be a statically linked
288      executable, and we're pretty much done.
289 
290    - Otherwise, we need to use mapelf() a second time to load the
291      interpreter.  The interpreter can go anywhere, but mapelf() wants
292      to be told a specific address to put it at.  So an advisory query
293      is passed to aspacem, asking where it would put an anonymous
294      client mapping of size INTERP_SIZE.  That address is then used
295      as the mapping address for the interpreter.
296 
297    - The entry point in INFO is set to the interpreter's entry point,
298      and we're done.  */
VG_(load_ELF)299 Int VG_(load_ELF)(Int fd, const HChar* name, /*MOD*/ExeInfo* info)
300 {
301    SysRes sres;
302    struct elfinfo *e;
303    struct elfinfo *interp = NULL;
304    ESZ(Addr) minaddr = ~0;      /* lowest mapped address */
305    ESZ(Addr) maxaddr = 0;       /* highest mapped address */
306    ESZ(Addr) interp_addr = 0;   /* interpreter (ld.so) address */
307    ESZ(Word) interp_size = 0;   /* interpreter size */
308    /* ESZ(Word) interp_align = VKI_PAGE_SIZE; */ /* UNUSED */
309    Int i;
310    void *entry;
311    ESZ(Addr) ebase = 0;
312 
313    /* The difference between where the interpreter got mapped and
314       where it asked to be mapped.  Needed for computing the ppc64 ELF
315       entry point and initial tocptr (R2) value. */
316    ESZ(Word) interp_offset = 0;
317 
318 #ifdef HAVE_PIE
319    ebase = info->exe_base;
320 #endif
321 
322    e = readelf(fd, name);
323 
324    if (e == NULL)
325       return VKI_ENOEXEC;
326 
327    /* The kernel maps position-independent executables at TASK_SIZE*2/3;
328       duplicate this behavior as close as we can. */
329    if (e->e.e_type == ET_DYN && ebase == 0) {
330       ebase = VG_PGROUNDDN(info->exe_base
331                            + (info->exe_end - info->exe_base) * 2 / 3);
332       /* We really don't want to load PIEs at zero or too close.  It
333          works, but it's unrobust (NULL pointer reads and writes
334          become legit, which is really bad) and causes problems for
335          exp-ptrcheck, which assumes all numbers below 1MB are
336          nonpointers.  So, hackily, move it above 1MB. */
337       /* Later .. is appears ppc32-linux tries to put [vdso] at 1MB,
338          which totally screws things up, because nothing else can go
339          there.  So bump the hacky load addess along by 0x8000, to
340          0x108000. */
341       if (ebase < 0x108000)
342          ebase = 0x108000;
343    }
344 
345    info->phnum = e->e.e_phnum;
346    info->entry = e->e.e_entry + ebase;
347    info->phdr = 0;
348 
349    for (i = 0; i < e->e.e_phnum; i++) {
350       ESZ(Phdr) *ph = &e->p[i];
351 
352       switch(ph->p_type) {
353       case PT_PHDR:
354          info->phdr = ph->p_vaddr + ebase;
355          break;
356 
357       case PT_LOAD:
358          if (ph->p_vaddr < minaddr)
359             minaddr = ph->p_vaddr;
360          if (ph->p_vaddr+ph->p_memsz > maxaddr)
361             maxaddr = ph->p_vaddr+ph->p_memsz;
362          break;
363 
364       case PT_INTERP: {
365          HChar *buf = VG_(malloc)("ume.LE.1", ph->p_filesz+1);
366          Int j;
367          Int intfd;
368          Int baseaddr_set;
369 
370          vg_assert(buf);
371          VG_(pread)(fd, buf, ph->p_filesz, ph->p_offset);
372          buf[ph->p_filesz] = '\0';
373 
374          sres = VG_(open)(buf, VKI_O_RDONLY, 0);
375          if (sr_isError(sres)) {
376             VG_(printf)("valgrind: m_ume.c: can't open interpreter\n");
377             VG_(exit)(1);
378          }
379          intfd = sr_Res(sres);
380 
381          interp = readelf(intfd, buf);
382          if (interp == NULL) {
383             VG_(printf)("valgrind: m_ume.c: can't read interpreter\n");
384             return 1;
385          }
386          VG_(free)(buf);
387 
388          baseaddr_set = 0;
389          for (j = 0; j < interp->e.e_phnum; j++) {
390             ESZ(Phdr) *iph = &interp->p[j];
391             ESZ(Addr) end;
392 
393             if (iph->p_type != PT_LOAD || iph->p_memsz == 0)
394                continue;
395 
396             if (!baseaddr_set) {
397                interp_addr  = iph->p_vaddr;
398                /* interp_align = iph->p_align; */ /* UNUSED */
399                baseaddr_set = 1;
400             }
401 
402             /* assumes that all segments in the interp are close */
403             end = (iph->p_vaddr - interp_addr) + iph->p_memsz;
404 
405             if (end > interp_size)
406                interp_size = end;
407          }
408          break;
409 
410       default:
411          // do nothing
412          break;
413       }
414       }
415    }
416 
417    if (info->phdr == 0)
418       info->phdr = minaddr + ebase + e->e.e_phoff;
419 
420    if (info->exe_base != info->exe_end) {
421       if (minaddr >= maxaddr ||
422           (minaddr + ebase < info->exe_base ||
423            maxaddr + ebase > info->exe_end)) {
424          VG_(printf)("Executable range %p-%p is outside the\n"
425                      "acceptable range %p-%p\n",
426                      (char *)minaddr + ebase, (char *)maxaddr + ebase,
427                      (char *)info->exe_base,  (char *)info->exe_end);
428          return VKI_ENOMEM;
429       }
430    }
431 
432    info->brkbase = mapelf(e, ebase);    /* map the executable */
433 
434    if (info->brkbase == 0)
435       return VKI_ENOMEM;
436 
437    if (interp != NULL) {
438       /* reserve a chunk of address space for interpreter */
439       MapRequest mreq;
440       Addr       advised;
441       Bool       ok;
442 
443       /* Don't actually reserve the space.  Just get an advisory
444          indicating where it would be allocated, and pass that to
445          mapelf(), which in turn asks aspacem to do some fixed maps at
446          the specified address.  This is a bit of hack, but it should
447          work because there should be no intervening transactions with
448          aspacem which could cause those fixed maps to fail.
449 
450          Placement policy is:
451 
452          if the interpreter asks to be loaded at zero
453             ignore that and put it wherever we like (mappings at zero
454             are bad news)
455          else
456             try and put it where it asks for, but if that doesn't work,
457             just put it anywhere.
458       */
459       if (interp_addr == 0) {
460          mreq.rkind = MAny;
461          mreq.start = 0;
462          mreq.len   = interp_size;
463       } else {
464          mreq.rkind = MHint;
465          mreq.start = interp_addr;
466          mreq.len   = interp_size;
467       }
468 
469       advised = VG_(am_get_advisory)( &mreq, True/*client*/, &ok );
470 
471       if (!ok) {
472          /* bomb out */
473          SysRes res = VG_(mk_SysRes_Error)(VKI_EINVAL);
474          if (0) VG_(printf)("reserve for interp: failed\n");
475          check_mmap(res, (Addr)interp_addr, interp_size);
476          /*NOTREACHED*/
477       }
478 
479       (void)mapelf(interp, (ESZ(Addr))advised - interp_addr);
480 
481       VG_(close)(interp->fd);
482 
483       entry = (void *)(advised - interp_addr + interp->e.e_entry);
484       info->interp_base = (ESZ(Addr))advised;
485       interp_offset = advised - interp_addr;
486 
487       VG_(free)(interp->p);
488       VG_(free)(interp);
489    } else
490       entry = (void *)(ebase + e->e.e_entry);
491 
492    info->exe_base = minaddr + ebase;
493    info->exe_end  = maxaddr + ebase;
494 
495 #if defined(VGP_ppc64_linux)
496    /* On PPC64, a func ptr is represented by a TOC entry ptr.  This
497       TOC entry contains three words; the first word is the function
498       address, the second word is the TOC ptr (r2), and the third word
499       is the static chain value. */
500    info->init_ip  = ((ULong*)entry)[0];
501    info->init_toc = ((ULong*)entry)[1];
502    info->init_ip  += interp_offset;
503    info->init_toc += interp_offset;
504 #else
505    info->init_ip  = (Addr)entry;
506    info->init_toc = 0; /* meaningless on this platform */
507    (void) interp_offset; /* stop gcc complaining it is unused */
508 #endif
509    VG_(free)(e->p);
510    VG_(free)(e);
511 
512    return 0;
513 }
514 
515 #endif // defined(VGO_linux)
516 
517 /*--------------------------------------------------------------------*/
518 /*--- end                                                          ---*/
519 /*--------------------------------------------------------------------*/
520