1
2 /*--------------------------------------------------------------------*/
3 /*--- User-mode execve() for ELF executables m_ume_elf.c ---*/
4 /*--------------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2000-2012 Julian Seward
11 jseward@acm.org
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29 */
30
31 #if defined(VGO_linux)
32
33 #include "pub_core_basics.h"
34 #include "pub_core_vki.h"
35
36 #include "pub_core_aspacemgr.h" // various mapping fns
37 #include "pub_core_debuglog.h"
38 #include "pub_core_libcassert.h" // VG_(exit), vg_assert
39 #include "pub_core_libcbase.h" // VG_(memcmp), etc
40 #include "pub_core_libcprint.h"
41 #include "pub_core_libcfile.h" // VG_(open) et al
42 #include "pub_core_machine.h" // VG_ELF_CLASS (XXX: which should be moved)
43 #include "pub_core_mallocfree.h" // VG_(malloc), VG_(free)
44 #include "pub_core_syscall.h" // VG_(strerror)
45 #include "pub_core_ume.h" // self
46
47 #include "priv_ume.h"
48
49 /* --- !!! --- EXTERNAL HEADERS start --- !!! --- */
50 #define _GNU_SOURCE
51 #define _FILE_OFFSET_BITS 64
52 /* This is for ELF types etc, and also the AT_ constants. */
53 #include <elf.h>
54 /* --- !!! --- EXTERNAL HEADERS end --- !!! --- */
55
56
57 #if VG_WORDSIZE == 8
58 #define ESZ(x) Elf64_##x
59 #elif VG_WORDSIZE == 4
60 #define ESZ(x) Elf32_##x
61 #else
62 #error VG_WORDSIZE needs to ==4 or ==8
63 #endif
64
65 struct elfinfo
66 {
67 ESZ(Ehdr) e;
68 ESZ(Phdr) *p;
69 Int fd;
70 };
71
check_mmap(SysRes res,Addr base,SizeT len)72 static void check_mmap(SysRes res, Addr base, SizeT len)
73 {
74 if (sr_isError(res)) {
75 VG_(printf)("valgrind: mmap(0x%llx, %lld) failed in UME "
76 "with error %lu (%s).\n",
77 (ULong)base, (Long)len,
78 sr_Err(res), VG_(strerror)(sr_Err(res)) );
79 if (sr_Err(res) == VKI_EINVAL) {
80 VG_(printf)("valgrind: this can be caused by executables with "
81 "very large text, data or bss segments.\n");
82 }
83 VG_(exit)(1);
84 }
85 }
86
87 /*------------------------------------------------------------*/
88 /*--- Loading ELF files ---*/
89 /*------------------------------------------------------------*/
90
91 static
readelf(Int fd,const char * filename)92 struct elfinfo *readelf(Int fd, const char *filename)
93 {
94 SysRes sres;
95 struct elfinfo *e = VG_(malloc)("ume.re.1", sizeof(*e));
96 Int phsz;
97
98 vg_assert(e);
99 e->fd = fd;
100
101 sres = VG_(pread)(fd, &e->e, sizeof(e->e), 0);
102 if (sr_isError(sres) || sr_Res(sres) != sizeof(e->e)) {
103 VG_(printf)("valgrind: %s: can't read ELF header: %s\n",
104 filename, VG_(strerror)(sr_Err(sres)));
105 goto bad;
106 }
107
108 if (VG_(memcmp)(&e->e.e_ident[0], ELFMAG, SELFMAG) != 0) {
109 VG_(printf)("valgrind: %s: bad ELF magic number\n", filename);
110 goto bad;
111 }
112 if (e->e.e_ident[EI_CLASS] != VG_ELF_CLASS) {
113 VG_(printf)("valgrind: wrong ELF executable class "
114 "(eg. 32-bit instead of 64-bit)\n");
115 goto bad;
116 }
117 if (e->e.e_ident[EI_DATA] != VG_ELF_DATA2XXX) {
118 VG_(printf)("valgrind: executable has wrong endian-ness\n");
119 goto bad;
120 }
121 if (!(e->e.e_type == ET_EXEC || e->e.e_type == ET_DYN)) {
122 VG_(printf)("valgrind: this is not an executable\n");
123 goto bad;
124 }
125
126 if (e->e.e_machine != VG_ELF_MACHINE) {
127 VG_(printf)("valgrind: executable is not for "
128 "this architecture\n");
129 goto bad;
130 }
131
132 if (e->e.e_phentsize != sizeof(ESZ(Phdr))) {
133 VG_(printf)("valgrind: sizeof ELF Phdr wrong\n");
134 goto bad;
135 }
136
137 phsz = sizeof(ESZ(Phdr)) * e->e.e_phnum;
138 e->p = VG_(malloc)("ume.re.2", phsz);
139 vg_assert(e->p);
140
141 sres = VG_(pread)(fd, e->p, phsz, e->e.e_phoff);
142 if (sr_isError(sres) || sr_Res(sres) != phsz) {
143 VG_(printf)("valgrind: can't read phdr: %s\n",
144 VG_(strerror)(sr_Err(sres)));
145 VG_(free)(e->p);
146 goto bad;
147 }
148
149 return e;
150
151 bad:
152 VG_(free)(e);
153 return NULL;
154 }
155
156 /* Map an ELF file. Returns the brk address. */
157 static
mapelf(struct elfinfo * e,ESZ (Addr)base)158 ESZ(Addr) mapelf(struct elfinfo *e, ESZ(Addr) base)
159 {
160 Int i;
161 SysRes res;
162 ESZ(Addr) elfbrk = 0;
163
164 for (i = 0; i < e->e.e_phnum; i++) {
165 ESZ(Phdr) *ph = &e->p[i];
166 ESZ(Addr) addr, brkaddr;
167 ESZ(Word) memsz;
168
169 if (ph->p_type != PT_LOAD)
170 continue;
171
172 addr = ph->p_vaddr+base;
173 memsz = ph->p_memsz;
174 brkaddr = addr+memsz;
175
176 if (brkaddr > elfbrk)
177 elfbrk = brkaddr;
178 }
179
180 for (i = 0; i < e->e.e_phnum; i++) {
181 ESZ(Phdr) *ph = &e->p[i];
182 ESZ(Addr) addr, bss, brkaddr;
183 ESZ(Off) off;
184 ESZ(Word) filesz;
185 ESZ(Word) memsz;
186 unsigned prot = 0;
187
188 if (ph->p_type != PT_LOAD)
189 continue;
190
191 if (ph->p_flags & PF_X) prot |= VKI_PROT_EXEC;
192 if (ph->p_flags & PF_W) prot |= VKI_PROT_WRITE;
193 if (ph->p_flags & PF_R) prot |= VKI_PROT_READ;
194
195 addr = ph->p_vaddr+base;
196 off = ph->p_offset;
197 filesz = ph->p_filesz;
198 bss = addr+filesz;
199 memsz = ph->p_memsz;
200 brkaddr = addr+memsz;
201
202 // Tom says: In the following, do what the Linux kernel does and only
203 // map the pages that are required instead of rounding everything to
204 // the specified alignment (ph->p_align). (AMD64 doesn't work if you
205 // use ph->p_align -- part of stage2's memory gets trashed somehow.)
206 //
207 // The condition handles the case of a zero-length segment.
208 if (VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr) > 0) {
209 if (0) VG_(debugLog)(0,"ume","mmap_file_fixed_client #1\n");
210 res = VG_(am_mmap_file_fixed_client)(
211 VG_PGROUNDDN(addr),
212 VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr),
213 prot, /*VKI_MAP_FIXED|VKI_MAP_PRIVATE, */
214 e->fd, VG_PGROUNDDN(off)
215 );
216 if (0) VG_(am_show_nsegments)(0,"after #1");
217 check_mmap(res, VG_PGROUNDDN(addr),
218 VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr));
219 }
220
221 // if memsz > filesz, fill the remainder with zeroed pages
222 if (memsz > filesz) {
223 UInt bytes;
224
225 bytes = VG_PGROUNDUP(brkaddr)-VG_PGROUNDUP(bss);
226 if (bytes > 0) {
227 if (0) VG_(debugLog)(0,"ume","mmap_anon_fixed_client #2\n");
228 res = VG_(am_mmap_anon_fixed_client)(
229 VG_PGROUNDUP(bss), bytes,
230 prot
231 );
232 if (0) VG_(am_show_nsegments)(0,"after #2");
233 check_mmap(res, VG_PGROUNDUP(bss), bytes);
234 }
235
236 bytes = bss & (VKI_PAGE_SIZE - 1);
237
238 // The 'prot' condition allows for a read-only bss
239 if ((prot & VKI_PROT_WRITE) && (bytes > 0)) {
240 bytes = VKI_PAGE_SIZE - bytes;
241 VG_(memset)((char *)bss, 0, bytes);
242 }
243 }
244 }
245
246 return elfbrk;
247 }
248
VG_(match_ELF)249 Bool VG_(match_ELF)(Char *hdr, Int len)
250 {
251 ESZ(Ehdr) *e = (ESZ(Ehdr) *)hdr;
252 return (len > sizeof(*e)) && VG_(memcmp)(&e->e_ident[0], ELFMAG, SELFMAG) == 0;
253 }
254
255
256 /* load_ELF pulls an ELF executable into the address space, prepares
257 it for execution, and writes info about it into INFO. In
258 particular it fills in .init_eip, which is the starting point.
259
260 Returns zero on success, non-zero (a VKI_E.. value) on failure.
261
262 The sequence of activities is roughly as follows:
263
264 - use readelf() to extract program header info from the exe file.
265
266 - scan the program header, collecting info (not sure what all those
267 info-> fields are, or whether they are used, but still) and in
268 particular looking out fo the PT_INTERP header, which describes
269 the interpreter. If such a field is found, the space needed to
270 hold the interpreter is computed into interp_size.
271
272 - map the executable in, by calling mapelf(). This maps in all
273 loadable sections, and I _think_ also creates any .bss areas
274 required. mapelf() returns the address just beyond the end of
275 the furthest-along mapping it creates. The executable is mapped
276 starting at EBASE, which is usually read from it (eg, 0x8048000
277 etc) except if it's a PIE, in which case I'm not sure what
278 happens.
279
280 The returned address is recorded in info->brkbase as the start
281 point of the brk (data) segment, as it is traditional to place
282 the data segment just after the executable. Neither load_ELF nor
283 mapelf creates the brk segment, though: that is for the caller of
284 load_ELF to attend to.
285
286 - If the initial phdr scan didn't find any mention of an
287 interpreter (interp == NULL), this must be a statically linked
288 executable, and we're pretty much done.
289
290 - Otherwise, we need to use mapelf() a second time to load the
291 interpreter. The interpreter can go anywhere, but mapelf() wants
292 to be told a specific address to put it at. So an advisory query
293 is passed to aspacem, asking where it would put an anonymous
294 client mapping of size INTERP_SIZE. That address is then used
295 as the mapping address for the interpreter.
296
297 - The entry point in INFO is set to the interpreter's entry point,
298 and we're done. */
VG_(load_ELF)299 Int VG_(load_ELF)(Int fd, const HChar* name, /*MOD*/ExeInfo* info)
300 {
301 SysRes sres;
302 struct elfinfo *e;
303 struct elfinfo *interp = NULL;
304 ESZ(Addr) minaddr = ~0; /* lowest mapped address */
305 ESZ(Addr) maxaddr = 0; /* highest mapped address */
306 ESZ(Addr) interp_addr = 0; /* interpreter (ld.so) address */
307 ESZ(Word) interp_size = 0; /* interpreter size */
308 /* ESZ(Word) interp_align = VKI_PAGE_SIZE; */ /* UNUSED */
309 Int i;
310 void *entry;
311 ESZ(Addr) ebase = 0;
312
313 /* The difference between where the interpreter got mapped and
314 where it asked to be mapped. Needed for computing the ppc64 ELF
315 entry point and initial tocptr (R2) value. */
316 ESZ(Word) interp_offset = 0;
317
318 #ifdef HAVE_PIE
319 ebase = info->exe_base;
320 #endif
321
322 e = readelf(fd, name);
323
324 if (e == NULL)
325 return VKI_ENOEXEC;
326
327 /* The kernel maps position-independent executables at TASK_SIZE*2/3;
328 duplicate this behavior as close as we can. */
329 if (e->e.e_type == ET_DYN && ebase == 0) {
330 ebase = VG_PGROUNDDN(info->exe_base
331 + (info->exe_end - info->exe_base) * 2 / 3);
332 /* We really don't want to load PIEs at zero or too close. It
333 works, but it's unrobust (NULL pointer reads and writes
334 become legit, which is really bad) and causes problems for
335 exp-ptrcheck, which assumes all numbers below 1MB are
336 nonpointers. So, hackily, move it above 1MB. */
337 /* Later .. is appears ppc32-linux tries to put [vdso] at 1MB,
338 which totally screws things up, because nothing else can go
339 there. So bump the hacky load addess along by 0x8000, to
340 0x108000. */
341 if (ebase < 0x108000)
342 ebase = 0x108000;
343 }
344
345 info->phnum = e->e.e_phnum;
346 info->entry = e->e.e_entry + ebase;
347 info->phdr = 0;
348
349 for (i = 0; i < e->e.e_phnum; i++) {
350 ESZ(Phdr) *ph = &e->p[i];
351
352 switch(ph->p_type) {
353 case PT_PHDR:
354 info->phdr = ph->p_vaddr + ebase;
355 break;
356
357 case PT_LOAD:
358 if (ph->p_vaddr < minaddr)
359 minaddr = ph->p_vaddr;
360 if (ph->p_vaddr+ph->p_memsz > maxaddr)
361 maxaddr = ph->p_vaddr+ph->p_memsz;
362 break;
363
364 case PT_INTERP: {
365 HChar *buf = VG_(malloc)("ume.LE.1", ph->p_filesz+1);
366 Int j;
367 Int intfd;
368 Int baseaddr_set;
369
370 vg_assert(buf);
371 VG_(pread)(fd, buf, ph->p_filesz, ph->p_offset);
372 buf[ph->p_filesz] = '\0';
373
374 sres = VG_(open)(buf, VKI_O_RDONLY, 0);
375 if (sr_isError(sres)) {
376 VG_(printf)("valgrind: m_ume.c: can't open interpreter\n");
377 VG_(exit)(1);
378 }
379 intfd = sr_Res(sres);
380
381 interp = readelf(intfd, buf);
382 if (interp == NULL) {
383 VG_(printf)("valgrind: m_ume.c: can't read interpreter\n");
384 return 1;
385 }
386 VG_(free)(buf);
387
388 baseaddr_set = 0;
389 for (j = 0; j < interp->e.e_phnum; j++) {
390 ESZ(Phdr) *iph = &interp->p[j];
391 ESZ(Addr) end;
392
393 if (iph->p_type != PT_LOAD || iph->p_memsz == 0)
394 continue;
395
396 if (!baseaddr_set) {
397 interp_addr = iph->p_vaddr;
398 /* interp_align = iph->p_align; */ /* UNUSED */
399 baseaddr_set = 1;
400 }
401
402 /* assumes that all segments in the interp are close */
403 end = (iph->p_vaddr - interp_addr) + iph->p_memsz;
404
405 if (end > interp_size)
406 interp_size = end;
407 }
408 break;
409
410 default:
411 // do nothing
412 break;
413 }
414 }
415 }
416
417 if (info->phdr == 0)
418 info->phdr = minaddr + ebase + e->e.e_phoff;
419
420 if (info->exe_base != info->exe_end) {
421 if (minaddr >= maxaddr ||
422 (minaddr + ebase < info->exe_base ||
423 maxaddr + ebase > info->exe_end)) {
424 VG_(printf)("Executable range %p-%p is outside the\n"
425 "acceptable range %p-%p\n",
426 (char *)minaddr + ebase, (char *)maxaddr + ebase,
427 (char *)info->exe_base, (char *)info->exe_end);
428 return VKI_ENOMEM;
429 }
430 }
431
432 info->brkbase = mapelf(e, ebase); /* map the executable */
433
434 if (info->brkbase == 0)
435 return VKI_ENOMEM;
436
437 if (interp != NULL) {
438 /* reserve a chunk of address space for interpreter */
439 MapRequest mreq;
440 Addr advised;
441 Bool ok;
442
443 /* Don't actually reserve the space. Just get an advisory
444 indicating where it would be allocated, and pass that to
445 mapelf(), which in turn asks aspacem to do some fixed maps at
446 the specified address. This is a bit of hack, but it should
447 work because there should be no intervening transactions with
448 aspacem which could cause those fixed maps to fail.
449
450 Placement policy is:
451
452 if the interpreter asks to be loaded at zero
453 ignore that and put it wherever we like (mappings at zero
454 are bad news)
455 else
456 try and put it where it asks for, but if that doesn't work,
457 just put it anywhere.
458 */
459 if (interp_addr == 0) {
460 mreq.rkind = MAny;
461 mreq.start = 0;
462 mreq.len = interp_size;
463 } else {
464 mreq.rkind = MHint;
465 mreq.start = interp_addr;
466 mreq.len = interp_size;
467 }
468
469 advised = VG_(am_get_advisory)( &mreq, True/*client*/, &ok );
470
471 if (!ok) {
472 /* bomb out */
473 SysRes res = VG_(mk_SysRes_Error)(VKI_EINVAL);
474 if (0) VG_(printf)("reserve for interp: failed\n");
475 check_mmap(res, (Addr)interp_addr, interp_size);
476 /*NOTREACHED*/
477 }
478
479 (void)mapelf(interp, (ESZ(Addr))advised - interp_addr);
480
481 VG_(close)(interp->fd);
482
483 entry = (void *)(advised - interp_addr + interp->e.e_entry);
484 info->interp_base = (ESZ(Addr))advised;
485 interp_offset = advised - interp_addr;
486
487 VG_(free)(interp->p);
488 VG_(free)(interp);
489 } else
490 entry = (void *)(ebase + e->e.e_entry);
491
492 info->exe_base = minaddr + ebase;
493 info->exe_end = maxaddr + ebase;
494
495 #if defined(VGP_ppc64_linux)
496 /* On PPC64, a func ptr is represented by a TOC entry ptr. This
497 TOC entry contains three words; the first word is the function
498 address, the second word is the TOC ptr (r2), and the third word
499 is the static chain value. */
500 info->init_ip = ((ULong*)entry)[0];
501 info->init_toc = ((ULong*)entry)[1];
502 info->init_ip += interp_offset;
503 info->init_toc += interp_offset;
504 #else
505 info->init_ip = (Addr)entry;
506 info->init_toc = 0; /* meaningless on this platform */
507 (void) interp_offset; /* stop gcc complaining it is unused */
508 #endif
509 VG_(free)(e->p);
510 VG_(free)(e);
511
512 return 0;
513 }
514
515 #endif // defined(VGO_linux)
516
517 /*--------------------------------------------------------------------*/
518 /*--- end ---*/
519 /*--------------------------------------------------------------------*/
520