1
2 /*--------------------------------------------------------------------*/
3 /*--- User-mode execve() for ELF executables m_ume_elf.c ---*/
4 /*--------------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2000-2013 Julian Seward
11 jseward@acm.org
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29 */
30
31 #if defined(VGO_linux)
32
33 #include "pub_core_basics.h"
34 #include "pub_core_vki.h"
35
36 #include "pub_core_aspacemgr.h" // various mapping fns
37 #include "pub_core_debuglog.h"
38 #include "pub_core_libcassert.h" // VG_(exit), vg_assert
39 #include "pub_core_libcbase.h" // VG_(memcmp), etc
40 #include "pub_core_libcprint.h"
41 #include "pub_core_libcfile.h" // VG_(open) et al
42 #include "pub_core_machine.h" // VG_ELF_CLASS (XXX: which should be moved)
43 #include "pub_core_mallocfree.h" // VG_(malloc), VG_(free)
44 #include "pub_core_syscall.h" // VG_(strerror)
45 #include "pub_core_ume.h" // self
46
47 #include "priv_ume.h"
48
49 /* --- !!! --- EXTERNAL HEADERS start --- !!! --- */
50 #define _GNU_SOURCE
51 #define _FILE_OFFSET_BITS 64
52 /* This is for ELF types etc, and also the AT_ constants. */
53 #include <elf.h>
54 /* --- !!! --- EXTERNAL HEADERS end --- !!! --- */
55
56
57 #if VG_WORDSIZE == 8
58 #define ESZ(x) Elf64_##x
59 #elif VG_WORDSIZE == 4
60 #define ESZ(x) Elf32_##x
61 #else
62 #error VG_WORDSIZE needs to ==4 or ==8
63 #endif
64
65 struct elfinfo
66 {
67 ESZ(Ehdr) e;
68 ESZ(Phdr) *p;
69 Int fd;
70 };
71
check_mmap(SysRes res,Addr base,SizeT len)72 static void check_mmap(SysRes res, Addr base, SizeT len)
73 {
74 if (sr_isError(res)) {
75 VG_(printf)("valgrind: mmap(0x%llx, %lld) failed in UME "
76 "with error %lu (%s).\n",
77 (ULong)base, (Long)len,
78 sr_Err(res), VG_(strerror)(sr_Err(res)) );
79 if (sr_Err(res) == VKI_EINVAL) {
80 VG_(printf)("valgrind: this can be caused by executables with "
81 "very large text, data or bss segments.\n");
82 }
83 VG_(exit)(1);
84 }
85 }
86
87 /*------------------------------------------------------------*/
88 /*--- Loading ELF files ---*/
89 /*------------------------------------------------------------*/
90
91 static
readelf(Int fd,const HChar * filename)92 struct elfinfo *readelf(Int fd, const HChar *filename)
93 {
94 SysRes sres;
95 struct elfinfo *e = VG_(malloc)("ume.re.1", sizeof(*e));
96 Int phsz;
97
98 e->fd = fd;
99
100 sres = VG_(pread)(fd, &e->e, sizeof(e->e), 0);
101 if (sr_isError(sres) || sr_Res(sres) != sizeof(e->e)) {
102 VG_(printf)("valgrind: %s: can't read ELF header: %s\n",
103 filename, VG_(strerror)(sr_Err(sres)));
104 goto bad;
105 }
106
107 if (VG_(memcmp)(&e->e.e_ident[0], ELFMAG, SELFMAG) != 0) {
108 VG_(printf)("valgrind: %s: bad ELF magic number\n", filename);
109 goto bad;
110 }
111 if (e->e.e_ident[EI_CLASS] != VG_ELF_CLASS) {
112 VG_(printf)("valgrind: wrong ELF executable class "
113 "(eg. 32-bit instead of 64-bit)\n");
114 goto bad;
115 }
116 if (e->e.e_ident[EI_DATA] != VG_ELF_DATA2XXX) {
117 VG_(printf)("valgrind: executable has wrong endian-ness\n");
118 goto bad;
119 }
120 if (!(e->e.e_type == ET_EXEC || e->e.e_type == ET_DYN)) {
121 VG_(printf)("valgrind: this is not an executable\n");
122 goto bad;
123 }
124
125 if (e->e.e_machine != VG_ELF_MACHINE) {
126 VG_(printf)("valgrind: executable is not for "
127 "this architecture\n");
128 goto bad;
129 }
130
131 if (e->e.e_phentsize != sizeof(ESZ(Phdr))) {
132 VG_(printf)("valgrind: sizeof ELF Phdr wrong\n");
133 goto bad;
134 }
135
136 phsz = sizeof(ESZ(Phdr)) * e->e.e_phnum;
137 e->p = VG_(malloc)("ume.re.2", phsz);
138
139 sres = VG_(pread)(fd, e->p, phsz, e->e.e_phoff);
140 if (sr_isError(sres) || sr_Res(sres) != phsz) {
141 VG_(printf)("valgrind: can't read phdr: %s\n",
142 VG_(strerror)(sr_Err(sres)));
143 VG_(free)(e->p);
144 goto bad;
145 }
146
147 return e;
148
149 bad:
150 VG_(free)(e);
151 return NULL;
152 }
153
154 /* Map an ELF file. Returns the brk address. */
155 static
mapelf(struct elfinfo * e,ESZ (Addr)base)156 ESZ(Addr) mapelf(struct elfinfo *e, ESZ(Addr) base)
157 {
158 Int i;
159 SysRes res;
160 ESZ(Addr) elfbrk = 0;
161
162 for (i = 0; i < e->e.e_phnum; i++) {
163 ESZ(Phdr) *ph = &e->p[i];
164 ESZ(Addr) addr, brkaddr;
165 ESZ(Word) memsz;
166
167 if (ph->p_type != PT_LOAD)
168 continue;
169
170 addr = ph->p_vaddr+base;
171 memsz = ph->p_memsz;
172 brkaddr = addr+memsz;
173
174 if (brkaddr > elfbrk)
175 elfbrk = brkaddr;
176 }
177
178 for (i = 0; i < e->e.e_phnum; i++) {
179 ESZ(Phdr) *ph = &e->p[i];
180 ESZ(Addr) addr, bss, brkaddr;
181 ESZ(Off) off;
182 ESZ(Word) filesz;
183 ESZ(Word) memsz;
184 unsigned prot = 0;
185
186 if (ph->p_type != PT_LOAD)
187 continue;
188
189 if (ph->p_flags & PF_X) prot |= VKI_PROT_EXEC;
190 if (ph->p_flags & PF_W) prot |= VKI_PROT_WRITE;
191 if (ph->p_flags & PF_R) prot |= VKI_PROT_READ;
192
193 addr = ph->p_vaddr+base;
194 off = ph->p_offset;
195 filesz = ph->p_filesz;
196 bss = addr+filesz;
197 memsz = ph->p_memsz;
198 brkaddr = addr+memsz;
199
200 // Tom says: In the following, do what the Linux kernel does and only
201 // map the pages that are required instead of rounding everything to
202 // the specified alignment (ph->p_align). (AMD64 doesn't work if you
203 // use ph->p_align -- part of stage2's memory gets trashed somehow.)
204 //
205 // The condition handles the case of a zero-length segment.
206 if (VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr) > 0) {
207 if (0) VG_(debugLog)(0,"ume","mmap_file_fixed_client #1\n");
208 res = VG_(am_mmap_file_fixed_client)(
209 VG_PGROUNDDN(addr),
210 VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr),
211 prot, /*VKI_MAP_FIXED|VKI_MAP_PRIVATE, */
212 e->fd, VG_PGROUNDDN(off)
213 );
214 if (0) VG_(am_show_nsegments)(0,"after #1");
215 check_mmap(res, VG_PGROUNDDN(addr),
216 VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr));
217 }
218
219 // if memsz > filesz, fill the remainder with zeroed pages
220 if (memsz > filesz) {
221 UInt bytes;
222
223 bytes = VG_PGROUNDUP(brkaddr)-VG_PGROUNDUP(bss);
224 if (bytes > 0) {
225 if (0) VG_(debugLog)(0,"ume","mmap_anon_fixed_client #2\n");
226 res = VG_(am_mmap_anon_fixed_client)(
227 VG_PGROUNDUP(bss), bytes,
228 prot
229 );
230 if (0) VG_(am_show_nsegments)(0,"after #2");
231 check_mmap(res, VG_PGROUNDUP(bss), bytes);
232 }
233
234 bytes = bss & (VKI_PAGE_SIZE - 1);
235
236 // The 'prot' condition allows for a read-only bss
237 if ((prot & VKI_PROT_WRITE) && (bytes > 0)) {
238 bytes = VKI_PAGE_SIZE - bytes;
239 VG_(memset)((void *)bss, 0, bytes);
240 }
241 }
242 }
243
244 return elfbrk;
245 }
246
VG_(match_ELF)247 Bool VG_(match_ELF)(const void *hdr, SizeT len)
248 {
249 const ESZ(Ehdr) *e = hdr;
250 return (len > sizeof(*e)) && VG_(memcmp)(&e->e_ident[0], ELFMAG, SELFMAG) == 0;
251 }
252
253
254 /* load_ELF pulls an ELF executable into the address space, prepares
255 it for execution, and writes info about it into INFO. In
256 particular it fills in .init_eip, which is the starting point.
257
258 Returns zero on success, non-zero (a VKI_E.. value) on failure.
259
260 The sequence of activities is roughly as follows:
261
262 - use readelf() to extract program header info from the exe file.
263
264 - scan the program header, collecting info (not sure what all those
265 info-> fields are, or whether they are used, but still) and in
266 particular looking out fo the PT_INTERP header, which describes
267 the interpreter. If such a field is found, the space needed to
268 hold the interpreter is computed into interp_size.
269
270 - map the executable in, by calling mapelf(). This maps in all
271 loadable sections, and I _think_ also creates any .bss areas
272 required. mapelf() returns the address just beyond the end of
273 the furthest-along mapping it creates. The executable is mapped
274 starting at EBASE, which is usually read from it (eg, 0x8048000
275 etc) except if it's a PIE, in which case I'm not sure what
276 happens.
277
278 The returned address is recorded in info->brkbase as the start
279 point of the brk (data) segment, as it is traditional to place
280 the data segment just after the executable. Neither load_ELF nor
281 mapelf creates the brk segment, though: that is for the caller of
282 load_ELF to attend to.
283
284 - If the initial phdr scan didn't find any mention of an
285 interpreter (interp == NULL), this must be a statically linked
286 executable, and we're pretty much done.
287
288 - Otherwise, we need to use mapelf() a second time to load the
289 interpreter. The interpreter can go anywhere, but mapelf() wants
290 to be told a specific address to put it at. So an advisory query
291 is passed to aspacem, asking where it would put an anonymous
292 client mapping of size INTERP_SIZE. That address is then used
293 as the mapping address for the interpreter.
294
295 - The entry point in INFO is set to the interpreter's entry point,
296 and we're done. */
VG_(load_ELF)297 Int VG_(load_ELF)(Int fd, const HChar* name, /*MOD*/ExeInfo* info)
298 {
299 SysRes sres;
300 struct elfinfo *e;
301 struct elfinfo *interp = NULL;
302 ESZ(Addr) minaddr = ~0; /* lowest mapped address */
303 ESZ(Addr) maxaddr = 0; /* highest mapped address */
304 ESZ(Addr) interp_addr = 0; /* interpreter (ld.so) address */
305 ESZ(Word) interp_size = 0; /* interpreter size */
306 /* ESZ(Word) interp_align = VKI_PAGE_SIZE; */ /* UNUSED */
307 Int i;
308 void *entry;
309 ESZ(Addr) ebase = 0;
310
311 # if defined(HAVE_PIE)
312 ebase = info->exe_base;
313 # endif
314
315 e = readelf(fd, name);
316
317 if (e == NULL)
318 return VKI_ENOEXEC;
319
320 /* The kernel maps position-independent executables at TASK_SIZE*2/3;
321 duplicate this behavior as close as we can. */
322 if (e->e.e_type == ET_DYN && ebase == 0) {
323 ebase = VG_PGROUNDDN(info->exe_base
324 + (info->exe_end - info->exe_base) * 2 / 3);
325 /* We really don't want to load PIEs at zero or too close. It
326 works, but it's unrobust (NULL pointer reads and writes
327 become legit, which is really bad) and causes problems for
328 exp-ptrcheck, which assumes all numbers below 1MB are
329 nonpointers. So, hackily, move it above 1MB. */
330 /* Later .. it appears ppc32-linux tries to put [vdso] at 1MB,
331 which totally screws things up, because nothing else can go
332 there. The size of [vdso] is around 2 or 3 pages, so bump
333 the hacky load addess along by 8 * VKI_PAGE_SIZE to be safe. */
334 /* Later .. on mips64 we can't use 0x108000, because mapelf will
335 fail. */
336 # if defined(VGP_mips64_linux)
337 if (ebase < 0x100000)
338 ebase = 0x100000;
339 # else
340 vg_assert(VKI_PAGE_SIZE >= 4096); /* stay sane */
341 ESZ(Addr) hacky_load_address = 0x100000 + 8 * VKI_PAGE_SIZE;
342 if (ebase < hacky_load_address)
343 ebase = hacky_load_address;
344 # endif
345 }
346
347 info->phnum = e->e.e_phnum;
348 info->entry = e->e.e_entry + ebase;
349 info->phdr = 0;
350 info->stack_prot = VKI_PROT_READ|VKI_PROT_WRITE|VKI_PROT_EXEC;
351
352 for (i = 0; i < e->e.e_phnum; i++) {
353 ESZ(Phdr) *ph = &e->p[i];
354
355 switch(ph->p_type) {
356 case PT_PHDR:
357 info->phdr = ph->p_vaddr + ebase;
358 break;
359
360 case PT_LOAD:
361 if (ph->p_vaddr < minaddr)
362 minaddr = ph->p_vaddr;
363 if (ph->p_vaddr+ph->p_memsz > maxaddr)
364 maxaddr = ph->p_vaddr+ph->p_memsz;
365 break;
366
367 case PT_INTERP: {
368 HChar *buf = VG_(malloc)("ume.LE.1", ph->p_filesz+1);
369 Int j;
370 Int intfd;
371 Int baseaddr_set;
372
373 VG_(pread)(fd, buf, ph->p_filesz, ph->p_offset);
374 buf[ph->p_filesz] = '\0';
375
376 sres = VG_(open)(buf, VKI_O_RDONLY, 0);
377 if (sr_isError(sres)) {
378 VG_(printf)("valgrind: m_ume.c: can't open interpreter\n");
379 VG_(exit)(1);
380 }
381 intfd = sr_Res(sres);
382
383 interp = readelf(intfd, buf);
384 if (interp == NULL) {
385 VG_(printf)("valgrind: m_ume.c: can't read interpreter\n");
386 return 1;
387 }
388 VG_(free)(buf);
389
390 baseaddr_set = 0;
391 for (j = 0; j < interp->e.e_phnum; j++) {
392 ESZ(Phdr) *iph = &interp->p[j];
393 ESZ(Addr) end;
394
395 if (iph->p_type != PT_LOAD || iph->p_memsz == 0)
396 continue;
397
398 if (!baseaddr_set) {
399 interp_addr = iph->p_vaddr;
400 /* interp_align = iph->p_align; */ /* UNUSED */
401 baseaddr_set = 1;
402 }
403
404 /* assumes that all segments in the interp are close */
405 end = (iph->p_vaddr - interp_addr) + iph->p_memsz;
406
407 if (end > interp_size)
408 interp_size = end;
409 }
410 break;
411
412 # if defined(PT_GNU_STACK)
413 /* Android's elf.h doesn't appear to have PT_GNU_STACK. */
414 case PT_GNU_STACK:
415 if ((ph->p_flags & PF_X) == 0) info->stack_prot &= ~VKI_PROT_EXEC;
416 if ((ph->p_flags & PF_W) == 0) info->stack_prot &= ~VKI_PROT_WRITE;
417 if ((ph->p_flags & PF_R) == 0) info->stack_prot &= ~VKI_PROT_READ;
418 break;
419 # endif
420
421 default:
422 // do nothing
423 break;
424 }
425 }
426 }
427
428 if (info->phdr == 0)
429 info->phdr = minaddr + ebase + e->e.e_phoff;
430
431 if (info->exe_base != info->exe_end) {
432 if (minaddr >= maxaddr ||
433 (minaddr + ebase < info->exe_base ||
434 maxaddr + ebase > info->exe_end)) {
435 VG_(printf)("Executable range %p-%p is outside the\n"
436 "acceptable range %p-%p\n",
437 (char *)minaddr + ebase, (char *)maxaddr + ebase,
438 (char *)info->exe_base, (char *)info->exe_end);
439 return VKI_ENOMEM;
440 }
441 }
442
443 info->brkbase = mapelf(e, ebase); /* map the executable */
444
445 if (info->brkbase == 0)
446 return VKI_ENOMEM;
447
448 if (interp != NULL) {
449 /* reserve a chunk of address space for interpreter */
450 MapRequest mreq;
451 Addr advised;
452 Bool ok;
453
454 /* Don't actually reserve the space. Just get an advisory
455 indicating where it would be allocated, and pass that to
456 mapelf(), which in turn asks aspacem to do some fixed maps at
457 the specified address. This is a bit of hack, but it should
458 work because there should be no intervening transactions with
459 aspacem which could cause those fixed maps to fail.
460
461 Placement policy is:
462
463 if the interpreter asks to be loaded at zero
464 ignore that and put it wherever we like (mappings at zero
465 are bad news)
466 else
467 try and put it where it asks for, but if that doesn't work,
468 just put it anywhere.
469 */
470 if (interp_addr == 0) {
471 mreq.rkind = MAny;
472 mreq.start = 0;
473 mreq.len = interp_size;
474 } else {
475 mreq.rkind = MHint;
476 mreq.start = interp_addr;
477 mreq.len = interp_size;
478 }
479
480 advised = VG_(am_get_advisory)( &mreq, True/*client*/, &ok );
481
482 if (!ok) {
483 /* bomb out */
484 SysRes res = VG_(mk_SysRes_Error)(VKI_EINVAL);
485 if (0) VG_(printf)("reserve for interp: failed\n");
486 check_mmap(res, (Addr)interp_addr, interp_size);
487 /*NOTREACHED*/
488 }
489
490 (void)mapelf(interp, (ESZ(Addr))advised - interp_addr);
491
492 VG_(close)(interp->fd);
493
494 entry = (void *)(advised - interp_addr + interp->e.e_entry);
495
496 info->interp_offset = advised - interp_addr;
497
498 VG_(free)(interp->p);
499 VG_(free)(interp);
500 } else
501 entry = (void *)(ebase + e->e.e_entry);
502
503 info->exe_base = minaddr + ebase;
504 info->exe_end = maxaddr + ebase;
505
506 #if defined(VGP_ppc64be_linux)
507 /* On PPC64BE, ELF ver 1, a func ptr is represented by a TOC entry ptr.
508 This TOC entry contains three words; the first word is the function
509 address, the second word is the TOC ptr (r2), and the third word
510 is the static chain value. */
511 info->init_ip = ((ULong*)entry)[0];
512 info->init_toc = ((ULong*)entry)[1];
513 info->init_ip += info->interp_offset;
514 info->init_toc += info->interp_offset;
515 #elif defined(VGP_ppc64le_linux)
516 /* On PPC64LE, ELF ver 2. API doesn't use a func ptr */
517 info->init_ip = (Addr)entry;
518 info->init_toc = 0; /* meaningless on this platform */
519 #else
520 info->init_ip = (Addr)entry;
521 info->init_toc = 0; /* meaningless on this platform */
522 #endif
523 VG_(free)(e->p);
524 VG_(free)(e);
525
526 return 0;
527 }
528
529 #endif // defined(VGO_linux)
530
531 /*--------------------------------------------------------------------*/
532 /*--- end ---*/
533 /*--------------------------------------------------------------------*/
534