1
2 /*--------------------------------------------------------------------*/
3 /*--- User-mode execve() for ELF executables m_ume_elf.c ---*/
4 /*--------------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2000-2013 Julian Seward
11 jseward@acm.org
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29 */
30
31 #if defined(VGO_linux)
32
33 #include "pub_core_basics.h"
34 #include "pub_core_vki.h"
35
36 #include "pub_core_aspacemgr.h" // various mapping fns
37 #include "pub_core_debuglog.h"
38 #include "pub_core_libcassert.h" // VG_(exit), vg_assert
39 #include "pub_core_libcbase.h" // VG_(memcmp), etc
40 #include "pub_core_libcprint.h"
41 #include "pub_core_libcfile.h" // VG_(open) et al
42 #include "pub_core_machine.h" // VG_ELF_CLASS (XXX: which should be moved)
43 #include "pub_core_mallocfree.h" // VG_(malloc), VG_(free)
44 #include "pub_core_syscall.h" // VG_(strerror)
45 #include "pub_core_ume.h" // self
46
47 #include "priv_ume.h"
48
49 /* --- !!! --- EXTERNAL HEADERS start --- !!! --- */
50 #define _GNU_SOURCE
51 #define _FILE_OFFSET_BITS 64
52 /* This is for ELF types etc, and also the AT_ constants. */
53 #include <elf.h>
54 /* --- !!! --- EXTERNAL HEADERS end --- !!! --- */
55
56
57 #if VG_WORDSIZE == 8
58 #define ESZ(x) Elf64_##x
59 #elif VG_WORDSIZE == 4
60 #define ESZ(x) Elf32_##x
61 #else
62 #error VG_WORDSIZE needs to ==4 or ==8
63 #endif
64
65 struct elfinfo
66 {
67 ESZ(Ehdr) e;
68 ESZ(Phdr) *p;
69 Int fd;
70 };
71
check_mmap(SysRes res,Addr base,SizeT len)72 static void check_mmap(SysRes res, Addr base, SizeT len)
73 {
74 if (sr_isError(res)) {
75 VG_(printf)("valgrind: mmap(0x%llx, %lld) failed in UME "
76 "with error %lu (%s).\n",
77 (ULong)base, (Long)len,
78 sr_Err(res), VG_(strerror)(sr_Err(res)) );
79 if (sr_Err(res) == VKI_EINVAL) {
80 VG_(printf)("valgrind: this can be caused by executables with "
81 "very large text, data or bss segments.\n");
82 }
83 VG_(exit)(1);
84 }
85 }
86
87 /*------------------------------------------------------------*/
88 /*--- Loading ELF files ---*/
89 /*------------------------------------------------------------*/
90
91 static
readelf(Int fd,const HChar * filename)92 struct elfinfo *readelf(Int fd, const HChar *filename)
93 {
94 SysRes sres;
95 struct elfinfo *e = VG_(malloc)("ume.re.1", sizeof(*e));
96 Int phsz;
97
98 vg_assert(e);
99 e->fd = fd;
100
101 sres = VG_(pread)(fd, &e->e, sizeof(e->e), 0);
102 if (sr_isError(sres) || sr_Res(sres) != sizeof(e->e)) {
103 VG_(printf)("valgrind: %s: can't read ELF header: %s\n",
104 filename, VG_(strerror)(sr_Err(sres)));
105 goto bad;
106 }
107
108 if (VG_(memcmp)(&e->e.e_ident[0], ELFMAG, SELFMAG) != 0) {
109 VG_(printf)("valgrind: %s: bad ELF magic number\n", filename);
110 goto bad;
111 }
112 if (e->e.e_ident[EI_CLASS] != VG_ELF_CLASS) {
113 VG_(printf)("valgrind: wrong ELF executable class "
114 "(eg. 32-bit instead of 64-bit)\n");
115 goto bad;
116 }
117 if (e->e.e_ident[EI_DATA] != VG_ELF_DATA2XXX) {
118 VG_(printf)("valgrind: executable has wrong endian-ness\n");
119 goto bad;
120 }
121 if (!(e->e.e_type == ET_EXEC || e->e.e_type == ET_DYN)) {
122 VG_(printf)("valgrind: this is not an executable\n");
123 goto bad;
124 }
125
126 if (e->e.e_machine != VG_ELF_MACHINE) {
127 VG_(printf)("valgrind: executable is not for "
128 "this architecture\n");
129 goto bad;
130 }
131
132 if (e->e.e_phentsize != sizeof(ESZ(Phdr))) {
133 VG_(printf)("valgrind: sizeof ELF Phdr wrong\n");
134 goto bad;
135 }
136
137 phsz = sizeof(ESZ(Phdr)) * e->e.e_phnum;
138 e->p = VG_(malloc)("ume.re.2", phsz);
139 vg_assert(e->p);
140
141 sres = VG_(pread)(fd, e->p, phsz, e->e.e_phoff);
142 if (sr_isError(sres) || sr_Res(sres) != phsz) {
143 VG_(printf)("valgrind: can't read phdr: %s\n",
144 VG_(strerror)(sr_Err(sres)));
145 VG_(free)(e->p);
146 goto bad;
147 }
148
149 return e;
150
151 bad:
152 VG_(free)(e);
153 return NULL;
154 }
155
156 /* Map an ELF file. Returns the brk address. */
157 static
mapelf(struct elfinfo * e,ESZ (Addr)base)158 ESZ(Addr) mapelf(struct elfinfo *e, ESZ(Addr) base)
159 {
160 Int i;
161 SysRes res;
162 ESZ(Addr) elfbrk = 0;
163
164 for (i = 0; i < e->e.e_phnum; i++) {
165 ESZ(Phdr) *ph = &e->p[i];
166 ESZ(Addr) addr, brkaddr;
167 ESZ(Word) memsz;
168
169 if (ph->p_type != PT_LOAD)
170 continue;
171
172 addr = ph->p_vaddr+base;
173 memsz = ph->p_memsz;
174 brkaddr = addr+memsz;
175
176 if (brkaddr > elfbrk)
177 elfbrk = brkaddr;
178 }
179
180 for (i = 0; i < e->e.e_phnum; i++) {
181 ESZ(Phdr) *ph = &e->p[i];
182 ESZ(Addr) addr, bss, brkaddr;
183 ESZ(Off) off;
184 ESZ(Word) filesz;
185 ESZ(Word) memsz;
186 unsigned prot = 0;
187
188 if (ph->p_type != PT_LOAD)
189 continue;
190
191 if (ph->p_flags & PF_X) prot |= VKI_PROT_EXEC;
192 if (ph->p_flags & PF_W) prot |= VKI_PROT_WRITE;
193 if (ph->p_flags & PF_R) prot |= VKI_PROT_READ;
194
195 addr = ph->p_vaddr+base;
196 off = ph->p_offset;
197 filesz = ph->p_filesz;
198 bss = addr+filesz;
199 memsz = ph->p_memsz;
200 brkaddr = addr+memsz;
201
202 // Tom says: In the following, do what the Linux kernel does and only
203 // map the pages that are required instead of rounding everything to
204 // the specified alignment (ph->p_align). (AMD64 doesn't work if you
205 // use ph->p_align -- part of stage2's memory gets trashed somehow.)
206 //
207 // The condition handles the case of a zero-length segment.
208 if (VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr) > 0) {
209 if (0) VG_(debugLog)(0,"ume","mmap_file_fixed_client #1\n");
210 res = VG_(am_mmap_file_fixed_client)(
211 VG_PGROUNDDN(addr),
212 VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr),
213 prot, /*VKI_MAP_FIXED|VKI_MAP_PRIVATE, */
214 e->fd, VG_PGROUNDDN(off)
215 );
216 if (0) VG_(am_show_nsegments)(0,"after #1");
217 check_mmap(res, VG_PGROUNDDN(addr),
218 VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr));
219 }
220
221 // if memsz > filesz, fill the remainder with zeroed pages
222 if (memsz > filesz) {
223 UInt bytes;
224
225 bytes = VG_PGROUNDUP(brkaddr)-VG_PGROUNDUP(bss);
226 if (bytes > 0) {
227 if (0) VG_(debugLog)(0,"ume","mmap_anon_fixed_client #2\n");
228 res = VG_(am_mmap_anon_fixed_client)(
229 VG_PGROUNDUP(bss), bytes,
230 prot
231 );
232 if (0) VG_(am_show_nsegments)(0,"after #2");
233 check_mmap(res, VG_PGROUNDUP(bss), bytes);
234 }
235
236 bytes = bss & (VKI_PAGE_SIZE - 1);
237
238 // The 'prot' condition allows for a read-only bss
239 if ((prot & VKI_PROT_WRITE) && (bytes > 0)) {
240 bytes = VKI_PAGE_SIZE - bytes;
241 VG_(memset)((void *)bss, 0, bytes);
242 }
243 }
244 }
245
246 return elfbrk;
247 }
248
VG_(match_ELF)249 Bool VG_(match_ELF)(const void *hdr, Int len)
250 {
251 const ESZ(Ehdr) *e = hdr;
252 return (len > sizeof(*e)) && VG_(memcmp)(&e->e_ident[0], ELFMAG, SELFMAG) == 0;
253 }
254
255
256 /* load_ELF pulls an ELF executable into the address space, prepares
257 it for execution, and writes info about it into INFO. In
258 particular it fills in .init_eip, which is the starting point.
259
260 Returns zero on success, non-zero (a VKI_E.. value) on failure.
261
262 The sequence of activities is roughly as follows:
263
264 - use readelf() to extract program header info from the exe file.
265
266 - scan the program header, collecting info (not sure what all those
267 info-> fields are, or whether they are used, but still) and in
268 particular looking out fo the PT_INTERP header, which describes
269 the interpreter. If such a field is found, the space needed to
270 hold the interpreter is computed into interp_size.
271
272 - map the executable in, by calling mapelf(). This maps in all
273 loadable sections, and I _think_ also creates any .bss areas
274 required. mapelf() returns the address just beyond the end of
275 the furthest-along mapping it creates. The executable is mapped
276 starting at EBASE, which is usually read from it (eg, 0x8048000
277 etc) except if it's a PIE, in which case I'm not sure what
278 happens.
279
280 The returned address is recorded in info->brkbase as the start
281 point of the brk (data) segment, as it is traditional to place
282 the data segment just after the executable. Neither load_ELF nor
283 mapelf creates the brk segment, though: that is for the caller of
284 load_ELF to attend to.
285
286 - If the initial phdr scan didn't find any mention of an
287 interpreter (interp == NULL), this must be a statically linked
288 executable, and we're pretty much done.
289
290 - Otherwise, we need to use mapelf() a second time to load the
291 interpreter. The interpreter can go anywhere, but mapelf() wants
292 to be told a specific address to put it at. So an advisory query
293 is passed to aspacem, asking where it would put an anonymous
294 client mapping of size INTERP_SIZE. That address is then used
295 as the mapping address for the interpreter.
296
297 - The entry point in INFO is set to the interpreter's entry point,
298 and we're done. */
VG_(load_ELF)299 Int VG_(load_ELF)(Int fd, const HChar* name, /*MOD*/ExeInfo* info)
300 {
301 SysRes sres;
302 struct elfinfo *e;
303 struct elfinfo *interp = NULL;
304 ESZ(Addr) minaddr = ~0; /* lowest mapped address */
305 ESZ(Addr) maxaddr = 0; /* highest mapped address */
306 ESZ(Addr) interp_addr = 0; /* interpreter (ld.so) address */
307 ESZ(Word) interp_size = 0; /* interpreter size */
308 /* ESZ(Word) interp_align = VKI_PAGE_SIZE; */ /* UNUSED */
309 Int i;
310 void *entry;
311 ESZ(Addr) ebase = 0;
312
313 # if defined(HAVE_PIE)
314 ebase = info->exe_base;
315 # endif
316
317 e = readelf(fd, name);
318
319 if (e == NULL)
320 return VKI_ENOEXEC;
321
322 /* The kernel maps position-independent executables at TASK_SIZE*2/3;
323 duplicate this behavior as close as we can. */
324 if (e->e.e_type == ET_DYN && ebase == 0) {
325 ebase = VG_PGROUNDDN(info->exe_base
326 + (info->exe_end - info->exe_base) * 2 / 3);
327 /* We really don't want to load PIEs at zero or too close. It
328 works, but it's unrobust (NULL pointer reads and writes
329 become legit, which is really bad) and causes problems for
330 exp-ptrcheck, which assumes all numbers below 1MB are
331 nonpointers. So, hackily, move it above 1MB. */
332 /* Later .. it appears ppc32-linux tries to put [vdso] at 1MB,
333 which totally screws things up, because nothing else can go
334 there. The size of [vdso] is around 2 or 3 pages, so bump
335 the hacky load addess along by 8 * VKI_PAGE_SIZE to be safe. */
336 /* Later .. on mips64 we can't use 0x108000, because mapelf will
337 fail. */
338 # if defined(VGP_mips64_linux)
339 if (ebase < 0x100000)
340 ebase = 0x100000;
341 # else
342 vg_assert(VKI_PAGE_SIZE >= 4096); /* stay sane */
343 ESZ(Addr) hacky_load_address = 0x100000 + 8 * VKI_PAGE_SIZE;
344 if (ebase < hacky_load_address)
345 ebase = hacky_load_address;
346 # endif
347 }
348
349 info->phnum = e->e.e_phnum;
350 info->entry = e->e.e_entry + ebase;
351 info->phdr = 0;
352 info->stack_prot = VKI_PROT_READ|VKI_PROT_WRITE|VKI_PROT_EXEC;
353
354 for (i = 0; i < e->e.e_phnum; i++) {
355 ESZ(Phdr) *ph = &e->p[i];
356
357 switch(ph->p_type) {
358 case PT_PHDR:
359 info->phdr = ph->p_vaddr + ebase;
360 break;
361
362 case PT_LOAD:
363 if (ph->p_vaddr < minaddr)
364 minaddr = ph->p_vaddr;
365 if (ph->p_vaddr+ph->p_memsz > maxaddr)
366 maxaddr = ph->p_vaddr+ph->p_memsz;
367 break;
368
369 case PT_INTERP: {
370 HChar *buf = VG_(malloc)("ume.LE.1", ph->p_filesz+1);
371 Int j;
372 Int intfd;
373 Int baseaddr_set;
374
375 vg_assert(buf);
376 VG_(pread)(fd, buf, ph->p_filesz, ph->p_offset);
377 buf[ph->p_filesz] = '\0';
378
379 sres = VG_(open)(buf, VKI_O_RDONLY, 0);
380 if (sr_isError(sres)) {
381 VG_(printf)("valgrind: m_ume.c: can't open interpreter\n");
382 VG_(exit)(1);
383 }
384 intfd = sr_Res(sres);
385
386 interp = readelf(intfd, buf);
387 if (interp == NULL) {
388 VG_(printf)("valgrind: m_ume.c: can't read interpreter\n");
389 return 1;
390 }
391 VG_(free)(buf);
392
393 baseaddr_set = 0;
394 for (j = 0; j < interp->e.e_phnum; j++) {
395 ESZ(Phdr) *iph = &interp->p[j];
396 ESZ(Addr) end;
397
398 if (iph->p_type != PT_LOAD || iph->p_memsz == 0)
399 continue;
400
401 if (!baseaddr_set) {
402 interp_addr = iph->p_vaddr;
403 /* interp_align = iph->p_align; */ /* UNUSED */
404 baseaddr_set = 1;
405 }
406
407 /* assumes that all segments in the interp are close */
408 end = (iph->p_vaddr - interp_addr) + iph->p_memsz;
409
410 if (end > interp_size)
411 interp_size = end;
412 }
413 break;
414
415 # if defined(PT_GNU_STACK)
416 /* Android's elf.h doesn't appear to have PT_GNU_STACK. */
417 case PT_GNU_STACK:
418 if ((ph->p_flags & PF_X) == 0) info->stack_prot &= ~VKI_PROT_EXEC;
419 if ((ph->p_flags & PF_W) == 0) info->stack_prot &= ~VKI_PROT_WRITE;
420 if ((ph->p_flags & PF_R) == 0) info->stack_prot &= ~VKI_PROT_READ;
421 break;
422 # endif
423
424 default:
425 // do nothing
426 break;
427 }
428 }
429 }
430
431 if (info->phdr == 0)
432 info->phdr = minaddr + ebase + e->e.e_phoff;
433
434 if (info->exe_base != info->exe_end) {
435 if (minaddr >= maxaddr ||
436 (minaddr + ebase < info->exe_base ||
437 maxaddr + ebase > info->exe_end)) {
438 VG_(printf)("Executable range %p-%p is outside the\n"
439 "acceptable range %p-%p\n",
440 (char *)minaddr + ebase, (char *)maxaddr + ebase,
441 (char *)info->exe_base, (char *)info->exe_end);
442 return VKI_ENOMEM;
443 }
444 }
445
446 info->brkbase = mapelf(e, ebase); /* map the executable */
447
448 if (info->brkbase == 0)
449 return VKI_ENOMEM;
450
451 if (interp != NULL) {
452 /* reserve a chunk of address space for interpreter */
453 MapRequest mreq;
454 Addr advised;
455 Bool ok;
456
457 /* Don't actually reserve the space. Just get an advisory
458 indicating where it would be allocated, and pass that to
459 mapelf(), which in turn asks aspacem to do some fixed maps at
460 the specified address. This is a bit of hack, but it should
461 work because there should be no intervening transactions with
462 aspacem which could cause those fixed maps to fail.
463
464 Placement policy is:
465
466 if the interpreter asks to be loaded at zero
467 ignore that and put it wherever we like (mappings at zero
468 are bad news)
469 else
470 try and put it where it asks for, but if that doesn't work,
471 just put it anywhere.
472 */
473 if (interp_addr == 0) {
474 mreq.rkind = MAny;
475 mreq.start = 0;
476 mreq.len = interp_size;
477 } else {
478 mreq.rkind = MHint;
479 mreq.start = interp_addr;
480 mreq.len = interp_size;
481 }
482
483 advised = VG_(am_get_advisory)( &mreq, True/*client*/, &ok );
484
485 if (!ok) {
486 /* bomb out */
487 SysRes res = VG_(mk_SysRes_Error)(VKI_EINVAL);
488 if (0) VG_(printf)("reserve for interp: failed\n");
489 check_mmap(res, (Addr)interp_addr, interp_size);
490 /*NOTREACHED*/
491 }
492
493 (void)mapelf(interp, (ESZ(Addr))advised - interp_addr);
494
495 VG_(close)(interp->fd);
496
497 entry = (void *)(advised - interp_addr + interp->e.e_entry);
498 info->interp_offset = advised - interp_addr;
499
500 VG_(free)(interp->p);
501 VG_(free)(interp);
502 } else
503 entry = (void *)(ebase + e->e.e_entry);
504
505 info->exe_base = minaddr + ebase;
506 info->exe_end = maxaddr + ebase;
507
508 #if defined(VGP_ppc64_linux)
509 /* On PPC64, a func ptr is represented by a TOC entry ptr. This
510 TOC entry contains three words; the first word is the function
511 address, the second word is the TOC ptr (r2), and the third word
512 is the static chain value. */
513 info->init_ip = ((ULong*)entry)[0];
514 info->init_toc = ((ULong*)entry)[1];
515 info->init_ip += info->interp_offset;
516 info->init_toc += info->interp_offset;
517 #else
518 info->init_ip = (Addr)entry;
519 info->init_toc = 0; /* meaningless on this platform */
520 #endif
521 VG_(free)(e->p);
522 VG_(free)(e);
523
524 return 0;
525 }
526
527 #endif // defined(VGO_linux)
528
529 /*--------------------------------------------------------------------*/
530 /*--- end ---*/
531 /*--------------------------------------------------------------------*/
532