1 /*
2 * linux/fs/binfmt_elf.c
3 *
4 * These are the functions used to load ELF format executables as used
5 * on SVr4 machines. Information on the format may be found in the book
6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7 * Tools".
8 *
9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10 */
11
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/vmalloc.h>
31 #include <linux/security.h>
32 #include <linux/random.h>
33 #include <linux/elf.h>
34 #include <linux/utsname.h>
35 #include <linux/coredump.h>
36 #include <linux/sched.h>
37 #include <asm/uaccess.h>
38 #include <asm/param.h>
39 #include <asm/page.h>
40
41 #ifndef user_long_t
42 #define user_long_t long
43 #endif
44 #ifndef user_siginfo_t
45 #define user_siginfo_t siginfo_t
46 #endif
47
48 static int load_elf_binary(struct linux_binprm *bprm);
49 static int load_elf_library(struct file *);
50 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
51 int, int, unsigned long);
52
53 /*
54 * If we don't support core dumping, then supply a NULL so we
55 * don't even try.
56 */
57 #ifdef CONFIG_ELF_CORE
58 static int elf_core_dump(struct coredump_params *cprm);
59 #else
60 #define elf_core_dump NULL
61 #endif
62
63 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
64 #define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE
65 #else
66 #define ELF_MIN_ALIGN PAGE_SIZE
67 #endif
68
69 #ifndef ELF_CORE_EFLAGS
70 #define ELF_CORE_EFLAGS 0
71 #endif
72
73 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
74 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
75 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
76
77 static struct linux_binfmt elf_format = {
78 .module = THIS_MODULE,
79 .load_binary = load_elf_binary,
80 .load_shlib = load_elf_library,
81 .core_dump = elf_core_dump,
82 .min_coredump = ELF_EXEC_PAGESIZE,
83 };
84
85 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
86
set_brk(unsigned long start,unsigned long end)87 static int set_brk(unsigned long start, unsigned long end)
88 {
89 start = ELF_PAGEALIGN(start);
90 end = ELF_PAGEALIGN(end);
91 if (end > start) {
92 unsigned long addr;
93 addr = vm_brk(start, end - start);
94 if (BAD_ADDR(addr))
95 return addr;
96 }
97 current->mm->start_brk = current->mm->brk = end;
98 return 0;
99 }
100
101 /* We need to explicitly zero any fractional pages
102 after the data section (i.e. bss). This would
103 contain the junk from the file that should not
104 be in memory
105 */
padzero(unsigned long elf_bss)106 static int padzero(unsigned long elf_bss)
107 {
108 unsigned long nbyte;
109
110 nbyte = ELF_PAGEOFFSET(elf_bss);
111 if (nbyte) {
112 nbyte = ELF_MIN_ALIGN - nbyte;
113 if (clear_user((void __user *) elf_bss, nbyte))
114 return -EFAULT;
115 }
116 return 0;
117 }
118
119 /* Let's use some macros to make this stack manipulation a little clearer */
120 #ifdef CONFIG_STACK_GROWSUP
121 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
122 #define STACK_ROUND(sp, items) \
123 ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
124 #define STACK_ALLOC(sp, len) ({ \
125 elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
126 old_sp; })
127 #else
128 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
129 #define STACK_ROUND(sp, items) \
130 (((unsigned long) (sp - items)) &~ 15UL)
131 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
132 #endif
133
134 #ifndef ELF_BASE_PLATFORM
135 /*
136 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
137 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
138 * will be copied to the user stack in the same manner as AT_PLATFORM.
139 */
140 #define ELF_BASE_PLATFORM NULL
141 #endif
142
143 static int
create_elf_tables(struct linux_binprm * bprm,struct elfhdr * exec,unsigned long load_addr,unsigned long interp_load_addr)144 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
145 unsigned long load_addr, unsigned long interp_load_addr)
146 {
147 unsigned long p = bprm->p;
148 int argc = bprm->argc;
149 int envc = bprm->envc;
150 elf_addr_t __user *argv;
151 elf_addr_t __user *envp;
152 elf_addr_t __user *sp;
153 elf_addr_t __user *u_platform;
154 elf_addr_t __user *u_base_platform;
155 elf_addr_t __user *u_rand_bytes;
156 const char *k_platform = ELF_PLATFORM;
157 const char *k_base_platform = ELF_BASE_PLATFORM;
158 unsigned char k_rand_bytes[16];
159 int items;
160 elf_addr_t *elf_info;
161 int ei_index = 0;
162 const struct cred *cred = current_cred();
163 struct vm_area_struct *vma;
164
165 /*
166 * In some cases (e.g. Hyper-Threading), we want to avoid L1
167 * evictions by the processes running on the same package. One
168 * thing we can do is to shuffle the initial stack for them.
169 */
170
171 p = arch_align_stack(p);
172
173 /*
174 * If this architecture has a platform capability string, copy it
175 * to userspace. In some cases (Sparc), this info is impossible
176 * for userspace to get any other way, in others (i386) it is
177 * merely difficult.
178 */
179 u_platform = NULL;
180 if (k_platform) {
181 size_t len = strlen(k_platform) + 1;
182
183 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
184 if (__copy_to_user(u_platform, k_platform, len))
185 return -EFAULT;
186 }
187
188 /*
189 * If this architecture has a "base" platform capability
190 * string, copy it to userspace.
191 */
192 u_base_platform = NULL;
193 if (k_base_platform) {
194 size_t len = strlen(k_base_platform) + 1;
195
196 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
197 if (__copy_to_user(u_base_platform, k_base_platform, len))
198 return -EFAULT;
199 }
200
201 /*
202 * Generate 16 random bytes for userspace PRNG seeding.
203 */
204 get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
205 u_rand_bytes = (elf_addr_t __user *)
206 STACK_ALLOC(p, sizeof(k_rand_bytes));
207 if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
208 return -EFAULT;
209
210 /* Create the ELF interpreter info */
211 elf_info = (elf_addr_t *)current->mm->saved_auxv;
212 /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
213 #define NEW_AUX_ENT(id, val) \
214 do { \
215 elf_info[ei_index++] = id; \
216 elf_info[ei_index++] = val; \
217 } while (0)
218
219 #ifdef ARCH_DLINFO
220 /*
221 * ARCH_DLINFO must come first so PPC can do its special alignment of
222 * AUXV.
223 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
224 * ARCH_DLINFO changes
225 */
226 ARCH_DLINFO;
227 #endif
228 NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
229 NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
230 NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
231 NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
232 NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
233 NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
234 NEW_AUX_ENT(AT_BASE, interp_load_addr);
235 NEW_AUX_ENT(AT_FLAGS, 0);
236 NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
237 NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
238 NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
239 NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
240 NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
241 NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
242 NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
243 #ifdef ELF_HWCAP2
244 NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
245 #endif
246 NEW_AUX_ENT(AT_EXECFN, bprm->exec);
247 if (k_platform) {
248 NEW_AUX_ENT(AT_PLATFORM,
249 (elf_addr_t)(unsigned long)u_platform);
250 }
251 if (k_base_platform) {
252 NEW_AUX_ENT(AT_BASE_PLATFORM,
253 (elf_addr_t)(unsigned long)u_base_platform);
254 }
255 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
256 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
257 }
258 #undef NEW_AUX_ENT
259 /* AT_NULL is zero; clear the rest too */
260 memset(&elf_info[ei_index], 0,
261 sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
262
263 /* And advance past the AT_NULL entry. */
264 ei_index += 2;
265
266 sp = STACK_ADD(p, ei_index);
267
268 items = (argc + 1) + (envc + 1) + 1;
269 bprm->p = STACK_ROUND(sp, items);
270
271 /* Point sp at the lowest address on the stack */
272 #ifdef CONFIG_STACK_GROWSUP
273 sp = (elf_addr_t __user *)bprm->p - items - ei_index;
274 bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
275 #else
276 sp = (elf_addr_t __user *)bprm->p;
277 #endif
278
279
280 /*
281 * Grow the stack manually; some architectures have a limit on how
282 * far ahead a user-space access may be in order to grow the stack.
283 */
284 vma = find_extend_vma(current->mm, bprm->p);
285 if (!vma)
286 return -EFAULT;
287
288 /* Now, let's put argc (and argv, envp if appropriate) on the stack */
289 if (__put_user(argc, sp++))
290 return -EFAULT;
291 argv = sp;
292 envp = argv + argc + 1;
293
294 /* Populate argv and envp */
295 p = current->mm->arg_end = current->mm->arg_start;
296 while (argc-- > 0) {
297 size_t len;
298 if (__put_user((elf_addr_t)p, argv++))
299 return -EFAULT;
300 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
301 if (!len || len > MAX_ARG_STRLEN)
302 return -EINVAL;
303 p += len;
304 }
305 if (__put_user(0, argv))
306 return -EFAULT;
307 current->mm->arg_end = current->mm->env_start = p;
308 while (envc-- > 0) {
309 size_t len;
310 if (__put_user((elf_addr_t)p, envp++))
311 return -EFAULT;
312 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
313 if (!len || len > MAX_ARG_STRLEN)
314 return -EINVAL;
315 p += len;
316 }
317 if (__put_user(0, envp))
318 return -EFAULT;
319 current->mm->env_end = p;
320
321 /* Put the elf_info on the stack in the right place. */
322 sp = (elf_addr_t __user *)envp + 1;
323 if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
324 return -EFAULT;
325 return 0;
326 }
327
328 #ifndef elf_map
329
elf_map(struct file * filep,unsigned long addr,struct elf_phdr * eppnt,int prot,int type,unsigned long total_size)330 static unsigned long elf_map(struct file *filep, unsigned long addr,
331 struct elf_phdr *eppnt, int prot, int type,
332 unsigned long total_size)
333 {
334 unsigned long map_addr;
335 unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
336 unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
337 addr = ELF_PAGESTART(addr);
338 size = ELF_PAGEALIGN(size);
339
340 /* mmap() will return -EINVAL if given a zero size, but a
341 * segment with zero filesize is perfectly valid */
342 if (!size)
343 return addr;
344
345 /*
346 * total_size is the size of the ELF (interpreter) image.
347 * The _first_ mmap needs to know the full size, otherwise
348 * randomization might put this image into an overlapping
349 * position with the ELF binary image. (since size < total_size)
350 * So we first map the 'big' image - and unmap the remainder at
351 * the end. (which unmap is needed for ELF images with holes.)
352 */
353 if (total_size) {
354 total_size = ELF_PAGEALIGN(total_size);
355 map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
356 if (!BAD_ADDR(map_addr))
357 vm_munmap(map_addr+size, total_size-size);
358 } else
359 map_addr = vm_mmap(filep, addr, size, prot, type, off);
360
361 return(map_addr);
362 }
363
364 #endif /* !elf_map */
365
total_mapping_size(struct elf_phdr * cmds,int nr)366 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
367 {
368 int i, first_idx = -1, last_idx = -1;
369
370 for (i = 0; i < nr; i++) {
371 if (cmds[i].p_type == PT_LOAD) {
372 last_idx = i;
373 if (first_idx == -1)
374 first_idx = i;
375 }
376 }
377 if (first_idx == -1)
378 return 0;
379
380 return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
381 ELF_PAGESTART(cmds[first_idx].p_vaddr);
382 }
383
384 /**
385 * load_elf_phdrs() - load ELF program headers
386 * @elf_ex: ELF header of the binary whose program headers should be loaded
387 * @elf_file: the opened ELF binary file
388 *
389 * Loads ELF program headers from the binary file elf_file, which has the ELF
390 * header pointed to by elf_ex, into a newly allocated array. The caller is
391 * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
392 */
load_elf_phdrs(struct elfhdr * elf_ex,struct file * elf_file)393 static struct elf_phdr *load_elf_phdrs(struct elfhdr *elf_ex,
394 struct file *elf_file)
395 {
396 struct elf_phdr *elf_phdata = NULL;
397 int retval, size, err = -1;
398
399 /*
400 * If the size of this structure has changed, then punt, since
401 * we will be doing the wrong thing.
402 */
403 if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
404 goto out;
405
406 /* Sanity check the number of program headers... */
407 if (elf_ex->e_phnum < 1 ||
408 elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
409 goto out;
410
411 /* ...and their total size. */
412 size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
413 if (size > ELF_MIN_ALIGN)
414 goto out;
415
416 elf_phdata = kmalloc(size, GFP_KERNEL);
417 if (!elf_phdata)
418 goto out;
419
420 /* Read in the program headers */
421 retval = kernel_read(elf_file, elf_ex->e_phoff,
422 (char *)elf_phdata, size);
423 if (retval != size) {
424 err = (retval < 0) ? retval : -EIO;
425 goto out;
426 }
427
428 /* Success! */
429 err = 0;
430 out:
431 if (err) {
432 kfree(elf_phdata);
433 elf_phdata = NULL;
434 }
435 return elf_phdata;
436 }
437
438 #ifndef CONFIG_ARCH_BINFMT_ELF_STATE
439
440 /**
441 * struct arch_elf_state - arch-specific ELF loading state
442 *
443 * This structure is used to preserve architecture specific data during
444 * the loading of an ELF file, throughout the checking of architecture
445 * specific ELF headers & through to the point where the ELF load is
446 * known to be proceeding (ie. SET_PERSONALITY).
447 *
448 * This implementation is a dummy for architectures which require no
449 * specific state.
450 */
451 struct arch_elf_state {
452 };
453
454 #define INIT_ARCH_ELF_STATE {}
455
456 /**
457 * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
458 * @ehdr: The main ELF header
459 * @phdr: The program header to check
460 * @elf: The open ELF file
461 * @is_interp: True if the phdr is from the interpreter of the ELF being
462 * loaded, else false.
463 * @state: Architecture-specific state preserved throughout the process
464 * of loading the ELF.
465 *
466 * Inspects the program header phdr to validate its correctness and/or
467 * suitability for the system. Called once per ELF program header in the
468 * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
469 * interpreter.
470 *
471 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
472 * with that return code.
473 */
arch_elf_pt_proc(struct elfhdr * ehdr,struct elf_phdr * phdr,struct file * elf,bool is_interp,struct arch_elf_state * state)474 static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
475 struct elf_phdr *phdr,
476 struct file *elf, bool is_interp,
477 struct arch_elf_state *state)
478 {
479 /* Dummy implementation, always proceed */
480 return 0;
481 }
482
483 /**
484 * arch_check_elf() - check a PT_LOPROC..PT_HIPROC ELF program header
485 * @ehdr: The main ELF header
486 * @has_interp: True if the ELF has an interpreter, else false.
487 * @state: Architecture-specific state preserved throughout the process
488 * of loading the ELF.
489 *
490 * Provides a final opportunity for architecture code to reject the loading
491 * of the ELF & cause an exec syscall to return an error. This is called after
492 * all program headers to be checked by arch_elf_pt_proc have been.
493 *
494 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
495 * with that return code.
496 */
arch_check_elf(struct elfhdr * ehdr,bool has_interp,struct arch_elf_state * state)497 static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
498 struct arch_elf_state *state)
499 {
500 /* Dummy implementation, always proceed */
501 return 0;
502 }
503
504 #endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
505
506 /* This is much more generalized than the library routine read function,
507 so we keep this separate. Technically the library read function
508 is only provided so that we can read a.out libraries that have
509 an ELF header */
510
load_elf_interp(struct elfhdr * interp_elf_ex,struct file * interpreter,unsigned long * interp_map_addr,unsigned long no_base,struct elf_phdr * interp_elf_phdata)511 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
512 struct file *interpreter, unsigned long *interp_map_addr,
513 unsigned long no_base, struct elf_phdr *interp_elf_phdata)
514 {
515 struct elf_phdr *eppnt;
516 unsigned long load_addr = 0;
517 int load_addr_set = 0;
518 unsigned long last_bss = 0, elf_bss = 0;
519 unsigned long error = ~0UL;
520 unsigned long total_size;
521 int i;
522
523 /* First of all, some simple consistency checks */
524 if (interp_elf_ex->e_type != ET_EXEC &&
525 interp_elf_ex->e_type != ET_DYN)
526 goto out;
527 if (!elf_check_arch(interp_elf_ex))
528 goto out;
529 if (!interpreter->f_op || !interpreter->f_op->mmap)
530 goto out;
531
532 total_size = total_mapping_size(interp_elf_phdata,
533 interp_elf_ex->e_phnum);
534 if (!total_size) {
535 error = -EINVAL;
536 goto out;
537 }
538
539 eppnt = interp_elf_phdata;
540 for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
541 if (eppnt->p_type == PT_LOAD) {
542 int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
543 int elf_prot = 0;
544 unsigned long vaddr = 0;
545 unsigned long k, map_addr;
546
547 if (eppnt->p_flags & PF_R)
548 elf_prot = PROT_READ;
549 if (eppnt->p_flags & PF_W)
550 elf_prot |= PROT_WRITE;
551 if (eppnt->p_flags & PF_X)
552 elf_prot |= PROT_EXEC;
553 vaddr = eppnt->p_vaddr;
554 if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
555 elf_type |= MAP_FIXED;
556 else if (no_base && interp_elf_ex->e_type == ET_DYN)
557 load_addr = -vaddr;
558
559 map_addr = elf_map(interpreter, load_addr + vaddr,
560 eppnt, elf_prot, elf_type, total_size);
561 total_size = 0;
562 if (!*interp_map_addr)
563 *interp_map_addr = map_addr;
564 error = map_addr;
565 if (BAD_ADDR(map_addr))
566 goto out;
567
568 if (!load_addr_set &&
569 interp_elf_ex->e_type == ET_DYN) {
570 load_addr = map_addr - ELF_PAGESTART(vaddr);
571 load_addr_set = 1;
572 }
573
574 /*
575 * Check to see if the section's size will overflow the
576 * allowed task size. Note that p_filesz must always be
577 * <= p_memsize so it's only necessary to check p_memsz.
578 */
579 k = load_addr + eppnt->p_vaddr;
580 if (BAD_ADDR(k) ||
581 eppnt->p_filesz > eppnt->p_memsz ||
582 eppnt->p_memsz > TASK_SIZE ||
583 TASK_SIZE - eppnt->p_memsz < k) {
584 error = -ENOMEM;
585 goto out;
586 }
587
588 /*
589 * Find the end of the file mapping for this phdr, and
590 * keep track of the largest address we see for this.
591 */
592 k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
593 if (k > elf_bss)
594 elf_bss = k;
595
596 /*
597 * Do the same thing for the memory mapping - between
598 * elf_bss and last_bss is the bss section.
599 */
600 k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
601 if (k > last_bss)
602 last_bss = k;
603 }
604 }
605
606 if (last_bss > elf_bss) {
607 /*
608 * Now fill out the bss section. First pad the last page up
609 * to the page boundary, and then perform a mmap to make sure
610 * that there are zero-mapped pages up to and including the
611 * last bss page.
612 */
613 if (padzero(elf_bss)) {
614 error = -EFAULT;
615 goto out;
616 }
617
618 /* What we have mapped so far */
619 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
620
621 /* Map the last of the bss segment */
622 error = vm_brk(elf_bss, last_bss - elf_bss);
623 if (BAD_ADDR(error))
624 goto out;
625 }
626
627 error = load_addr;
628 out:
629 return error;
630 }
631
632 /*
633 * These are the functions used to load ELF style executables and shared
634 * libraries. There is no binary dependent code anywhere else.
635 */
636
637 #define INTERPRETER_NONE 0
638 #define INTERPRETER_ELF 2
639
640 #ifndef STACK_RND_MASK
641 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) /* 8MB of VA */
642 #endif
643
randomize_stack_top(unsigned long stack_top)644 static unsigned long randomize_stack_top(unsigned long stack_top)
645 {
646 unsigned int random_variable = 0;
647
648 if ((current->flags & PF_RANDOMIZE) &&
649 !(current->personality & ADDR_NO_RANDOMIZE)) {
650 random_variable = get_random_int() & STACK_RND_MASK;
651 random_variable <<= PAGE_SHIFT;
652 }
653 #ifdef CONFIG_STACK_GROWSUP
654 return PAGE_ALIGN(stack_top) + random_variable;
655 #else
656 return PAGE_ALIGN(stack_top) - random_variable;
657 #endif
658 }
659
load_elf_binary(struct linux_binprm * bprm)660 static int load_elf_binary(struct linux_binprm *bprm)
661 {
662 struct file *interpreter = NULL; /* to shut gcc up */
663 unsigned long load_addr = 0, load_bias = 0;
664 int load_addr_set = 0;
665 char * elf_interpreter = NULL;
666 unsigned long error;
667 struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
668 unsigned long elf_bss, elf_brk;
669 int retval, i;
670 unsigned long elf_entry;
671 unsigned long interp_load_addr = 0;
672 unsigned long start_code, end_code, start_data, end_data;
673 unsigned long reloc_func_desc __maybe_unused = 0;
674 int executable_stack = EXSTACK_DEFAULT;
675 unsigned long def_flags = 0;
676 struct pt_regs *regs = current_pt_regs();
677 struct {
678 struct elfhdr elf_ex;
679 struct elfhdr interp_elf_ex;
680 } *loc;
681 struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
682
683 loc = kmalloc(sizeof(*loc), GFP_KERNEL);
684 if (!loc) {
685 retval = -ENOMEM;
686 goto out_ret;
687 }
688
689 /* Get the exec-header */
690 loc->elf_ex = *((struct elfhdr *)bprm->buf);
691
692 retval = -ENOEXEC;
693 /* First of all, some simple consistency checks */
694 if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
695 goto out;
696
697 if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
698 goto out;
699 if (!elf_check_arch(&loc->elf_ex))
700 goto out;
701 if (!bprm->file->f_op || !bprm->file->f_op->mmap)
702 goto out;
703
704 elf_phdata = load_elf_phdrs(&loc->elf_ex, bprm->file);
705 if (!elf_phdata)
706 goto out;
707
708 elf_ppnt = elf_phdata;
709 elf_bss = 0;
710 elf_brk = 0;
711
712 start_code = ~0UL;
713 end_code = 0;
714 start_data = 0;
715 end_data = 0;
716
717 for (i = 0; i < loc->elf_ex.e_phnum; i++) {
718 if (elf_ppnt->p_type == PT_INTERP) {
719 /* This is the program interpreter used for
720 * shared libraries - for now assume that this
721 * is an a.out format binary
722 */
723 retval = -ENOEXEC;
724 if (elf_ppnt->p_filesz > PATH_MAX ||
725 elf_ppnt->p_filesz < 2)
726 goto out_free_ph;
727
728 retval = -ENOMEM;
729 elf_interpreter = kmalloc(elf_ppnt->p_filesz,
730 GFP_KERNEL);
731 if (!elf_interpreter)
732 goto out_free_ph;
733
734 retval = kernel_read(bprm->file, elf_ppnt->p_offset,
735 elf_interpreter,
736 elf_ppnt->p_filesz);
737 if (retval != elf_ppnt->p_filesz) {
738 if (retval >= 0)
739 retval = -EIO;
740 goto out_free_interp;
741 }
742 /* make sure path is NULL terminated */
743 retval = -ENOEXEC;
744 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
745 goto out_free_interp;
746
747 interpreter = open_exec(elf_interpreter);
748 retval = PTR_ERR(interpreter);
749 if (IS_ERR(interpreter))
750 goto out_free_interp;
751
752 /*
753 * If the binary is not readable then enforce
754 * mm->dumpable = 0 regardless of the interpreter's
755 * permissions.
756 */
757 would_dump(bprm, interpreter);
758
759 retval = kernel_read(interpreter, 0, bprm->buf,
760 BINPRM_BUF_SIZE);
761 if (retval != BINPRM_BUF_SIZE) {
762 if (retval >= 0)
763 retval = -EIO;
764 goto out_free_dentry;
765 }
766
767 /* Get the exec headers */
768 loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
769 break;
770 }
771 elf_ppnt++;
772 }
773
774 elf_ppnt = elf_phdata;
775 for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
776 switch (elf_ppnt->p_type) {
777 case PT_GNU_STACK:
778 if (elf_ppnt->p_flags & PF_X)
779 executable_stack = EXSTACK_ENABLE_X;
780 else
781 executable_stack = EXSTACK_DISABLE_X;
782 break;
783
784 case PT_LOPROC ... PT_HIPROC:
785 retval = arch_elf_pt_proc(&loc->elf_ex, elf_ppnt,
786 bprm->file, false,
787 &arch_state);
788 if (retval)
789 goto out_free_dentry;
790 break;
791 }
792
793 /* Some simple consistency checks for the interpreter */
794 if (elf_interpreter) {
795 retval = -ELIBBAD;
796 /* Not an ELF interpreter */
797 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
798 goto out_free_dentry;
799 /* Verify the interpreter has a valid arch */
800 if (!elf_check_arch(&loc->interp_elf_ex))
801 goto out_free_dentry;
802
803 /* Load the interpreter program headers */
804 interp_elf_phdata = load_elf_phdrs(&loc->interp_elf_ex,
805 interpreter);
806 if (!interp_elf_phdata)
807 goto out_free_dentry;
808
809 /* Pass PT_LOPROC..PT_HIPROC headers to arch code */
810 elf_ppnt = interp_elf_phdata;
811 for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++)
812 switch (elf_ppnt->p_type) {
813 case PT_LOPROC ... PT_HIPROC:
814 retval = arch_elf_pt_proc(&loc->interp_elf_ex,
815 elf_ppnt, interpreter,
816 true, &arch_state);
817 if (retval)
818 goto out_free_dentry;
819 break;
820 }
821 }
822
823 /*
824 * Allow arch code to reject the ELF at this point, whilst it's
825 * still possible to return an error to the code that invoked
826 * the exec syscall.
827 */
828 retval = arch_check_elf(&loc->elf_ex, !!interpreter, &arch_state);
829 if (retval)
830 goto out_free_dentry;
831
832 /* Flush all traces of the currently running executable */
833 retval = flush_old_exec(bprm);
834 if (retval)
835 goto out_free_dentry;
836
837 /* OK, This is the point of no return */
838 current->mm->def_flags = def_flags;
839
840 /* Do this immediately, since STACK_TOP as used in setup_arg_pages
841 may depend on the personality. */
842 SET_PERSONALITY2(loc->elf_ex, &arch_state);
843 if (elf_read_implies_exec(loc->elf_ex, executable_stack))
844 current->personality |= READ_IMPLIES_EXEC;
845
846 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
847 current->flags |= PF_RANDOMIZE;
848
849 setup_new_exec(bprm);
850
851 /* Do this so that we can load the interpreter, if need be. We will
852 change some of these later */
853 current->mm->free_area_cache = current->mm->mmap_base;
854 current->mm->cached_hole_size = 0;
855 retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
856 executable_stack);
857 if (retval < 0) {
858 send_sig(SIGKILL, current, 0);
859 goto out_free_dentry;
860 }
861
862 current->mm->start_stack = bprm->p;
863
864 /* Now we do a little grungy work by mmapping the ELF image into
865 the correct location in memory. */
866 for(i = 0, elf_ppnt = elf_phdata;
867 i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
868 int elf_prot = 0, elf_flags;
869 unsigned long k, vaddr;
870
871 if (elf_ppnt->p_type != PT_LOAD)
872 continue;
873
874 if (unlikely (elf_brk > elf_bss)) {
875 unsigned long nbyte;
876
877 /* There was a PT_LOAD segment with p_memsz > p_filesz
878 before this one. Map anonymous pages, if needed,
879 and clear the area. */
880 retval = set_brk(elf_bss + load_bias,
881 elf_brk + load_bias);
882 if (retval) {
883 send_sig(SIGKILL, current, 0);
884 goto out_free_dentry;
885 }
886 nbyte = ELF_PAGEOFFSET(elf_bss);
887 if (nbyte) {
888 nbyte = ELF_MIN_ALIGN - nbyte;
889 if (nbyte > elf_brk - elf_bss)
890 nbyte = elf_brk - elf_bss;
891 if (clear_user((void __user *)elf_bss +
892 load_bias, nbyte)) {
893 /*
894 * This bss-zeroing can fail if the ELF
895 * file specifies odd protections. So
896 * we don't check the return value
897 */
898 }
899 }
900 }
901
902 if (elf_ppnt->p_flags & PF_R)
903 elf_prot |= PROT_READ;
904 if (elf_ppnt->p_flags & PF_W)
905 elf_prot |= PROT_WRITE;
906 if (elf_ppnt->p_flags & PF_X)
907 elf_prot |= PROT_EXEC;
908
909 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
910
911 vaddr = elf_ppnt->p_vaddr;
912 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
913 elf_flags |= MAP_FIXED;
914 } else if (loc->elf_ex.e_type == ET_DYN) {
915 /* Try and get dynamic programs out of the way of the
916 * default mmap base, as well as whatever program they
917 * might try to exec. This is because the brk will
918 * follow the loader, and is not movable. */
919 #ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
920 /* Memory randomization might have been switched off
921 * in runtime via sysctl or explicit setting of
922 * personality flags.
923 * If that is the case, retain the original non-zero
924 * load_bias value in order to establish proper
925 * non-randomized mappings.
926 */
927 if (current->flags & PF_RANDOMIZE)
928 load_bias = 0;
929 else
930 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
931 #else
932 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
933 #endif
934 }
935
936 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
937 elf_prot, elf_flags, 0);
938 if (BAD_ADDR(error)) {
939 send_sig(SIGKILL, current, 0);
940 retval = IS_ERR((void *)error) ?
941 PTR_ERR((void*)error) : -EINVAL;
942 goto out_free_dentry;
943 }
944
945 if (!load_addr_set) {
946 load_addr_set = 1;
947 load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
948 if (loc->elf_ex.e_type == ET_DYN) {
949 load_bias += error -
950 ELF_PAGESTART(load_bias + vaddr);
951 load_addr += load_bias;
952 reloc_func_desc = load_bias;
953 }
954 }
955 k = elf_ppnt->p_vaddr;
956 if (k < start_code)
957 start_code = k;
958 if (start_data < k)
959 start_data = k;
960
961 /*
962 * Check to see if the section's size will overflow the
963 * allowed task size. Note that p_filesz must always be
964 * <= p_memsz so it is only necessary to check p_memsz.
965 */
966 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
967 elf_ppnt->p_memsz > TASK_SIZE ||
968 TASK_SIZE - elf_ppnt->p_memsz < k) {
969 /* set_brk can never work. Avoid overflows. */
970 send_sig(SIGKILL, current, 0);
971 retval = -EINVAL;
972 goto out_free_dentry;
973 }
974
975 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
976
977 if (k > elf_bss)
978 elf_bss = k;
979 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
980 end_code = k;
981 if (end_data < k)
982 end_data = k;
983 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
984 if (k > elf_brk)
985 elf_brk = k;
986 }
987
988 loc->elf_ex.e_entry += load_bias;
989 elf_bss += load_bias;
990 elf_brk += load_bias;
991 start_code += load_bias;
992 end_code += load_bias;
993 start_data += load_bias;
994 end_data += load_bias;
995
996 /* Calling set_brk effectively mmaps the pages that we need
997 * for the bss and break sections. We must do this before
998 * mapping in the interpreter, to make sure it doesn't wind
999 * up getting placed where the bss needs to go.
1000 */
1001 retval = set_brk(elf_bss, elf_brk);
1002 if (retval) {
1003 send_sig(SIGKILL, current, 0);
1004 goto out_free_dentry;
1005 }
1006 if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
1007 send_sig(SIGSEGV, current, 0);
1008 retval = -EFAULT; /* Nobody gets to see this, but.. */
1009 goto out_free_dentry;
1010 }
1011
1012 if (elf_interpreter) {
1013 unsigned long interp_map_addr = 0;
1014
1015 elf_entry = load_elf_interp(&loc->interp_elf_ex,
1016 interpreter,
1017 &interp_map_addr,
1018 load_bias, interp_elf_phdata);
1019 if (!IS_ERR((void *)elf_entry)) {
1020 /*
1021 * load_elf_interp() returns relocation
1022 * adjustment
1023 */
1024 interp_load_addr = elf_entry;
1025 elf_entry += loc->interp_elf_ex.e_entry;
1026 }
1027 if (BAD_ADDR(elf_entry)) {
1028 force_sig(SIGSEGV, current);
1029 retval = IS_ERR((void *)elf_entry) ?
1030 (int)elf_entry : -EINVAL;
1031 goto out_free_dentry;
1032 }
1033 reloc_func_desc = interp_load_addr;
1034
1035 allow_write_access(interpreter);
1036 fput(interpreter);
1037 kfree(elf_interpreter);
1038 } else {
1039 elf_entry = loc->elf_ex.e_entry;
1040 if (BAD_ADDR(elf_entry)) {
1041 force_sig(SIGSEGV, current);
1042 retval = -EINVAL;
1043 goto out_free_dentry;
1044 }
1045 }
1046
1047 kfree(interp_elf_phdata);
1048 kfree(elf_phdata);
1049
1050 set_binfmt(&elf_format);
1051
1052 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1053 retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
1054 if (retval < 0) {
1055 send_sig(SIGKILL, current, 0);
1056 goto out;
1057 }
1058 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1059
1060 install_exec_creds(bprm);
1061 retval = create_elf_tables(bprm, &loc->elf_ex,
1062 load_addr, interp_load_addr);
1063 if (retval < 0) {
1064 send_sig(SIGKILL, current, 0);
1065 goto out;
1066 }
1067 /* N.B. passed_fileno might not be initialized? */
1068 current->mm->end_code = end_code;
1069 current->mm->start_code = start_code;
1070 current->mm->start_data = start_data;
1071 current->mm->end_data = end_data;
1072 current->mm->start_stack = bprm->p;
1073
1074 #ifdef arch_randomize_brk
1075 if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1076 current->mm->brk = current->mm->start_brk =
1077 arch_randomize_brk(current->mm);
1078 #ifdef CONFIG_COMPAT_BRK
1079 current->brk_randomized = 1;
1080 #endif
1081 }
1082 #endif
1083
1084 if (current->personality & MMAP_PAGE_ZERO) {
1085 /* Why this, you ask??? Well SVr4 maps page 0 as read-only,
1086 and some applications "depend" upon this behavior.
1087 Since we do not have the power to recompile these, we
1088 emulate the SVr4 behavior. Sigh. */
1089 error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1090 MAP_FIXED | MAP_PRIVATE, 0);
1091 }
1092
1093 #ifdef ELF_PLAT_INIT
1094 /*
1095 * The ABI may specify that certain registers be set up in special
1096 * ways (on i386 %edx is the address of a DT_FINI function, for
1097 * example. In addition, it may also specify (eg, PowerPC64 ELF)
1098 * that the e_entry field is the address of the function descriptor
1099 * for the startup routine, rather than the address of the startup
1100 * routine itself. This macro performs whatever initialization to
1101 * the regs structure is required as well as any relocations to the
1102 * function descriptor entries when executing dynamically links apps.
1103 */
1104 ELF_PLAT_INIT(regs, reloc_func_desc);
1105 #endif
1106
1107 start_thread(regs, elf_entry, bprm->p);
1108 retval = 0;
1109 out:
1110 kfree(loc);
1111 out_ret:
1112 return retval;
1113
1114 /* error cleanup */
1115 out_free_dentry:
1116 kfree(interp_elf_phdata);
1117 allow_write_access(interpreter);
1118 if (interpreter)
1119 fput(interpreter);
1120 out_free_interp:
1121 kfree(elf_interpreter);
1122 out_free_ph:
1123 kfree(elf_phdata);
1124 goto out;
1125 }
1126
1127 /* This is really simpleminded and specialized - we are loading an
1128 a.out library that is given an ELF header. */
load_elf_library(struct file * file)1129 static int load_elf_library(struct file *file)
1130 {
1131 struct elf_phdr *elf_phdata;
1132 struct elf_phdr *eppnt;
1133 unsigned long elf_bss, bss, len;
1134 int retval, error, i, j;
1135 struct elfhdr elf_ex;
1136
1137 error = -ENOEXEC;
1138 retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1139 if (retval != sizeof(elf_ex))
1140 goto out;
1141
1142 if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1143 goto out;
1144
1145 /* First of all, some simple consistency checks */
1146 if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1147 !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1148 goto out;
1149
1150 /* Now read in all of the header information */
1151
1152 j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1153 /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1154
1155 error = -ENOMEM;
1156 elf_phdata = kmalloc(j, GFP_KERNEL);
1157 if (!elf_phdata)
1158 goto out;
1159
1160 eppnt = elf_phdata;
1161 error = -ENOEXEC;
1162 retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1163 if (retval != j)
1164 goto out_free_ph;
1165
1166 for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1167 if ((eppnt + i)->p_type == PT_LOAD)
1168 j++;
1169 if (j != 1)
1170 goto out_free_ph;
1171
1172 while (eppnt->p_type != PT_LOAD)
1173 eppnt++;
1174
1175 /* Now use mmap to map the library into memory. */
1176 error = vm_mmap(file,
1177 ELF_PAGESTART(eppnt->p_vaddr),
1178 (eppnt->p_filesz +
1179 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1180 PROT_READ | PROT_WRITE | PROT_EXEC,
1181 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1182 (eppnt->p_offset -
1183 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1184 if (error != ELF_PAGESTART(eppnt->p_vaddr))
1185 goto out_free_ph;
1186
1187 elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1188 if (padzero(elf_bss)) {
1189 error = -EFAULT;
1190 goto out_free_ph;
1191 }
1192
1193 len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1194 ELF_MIN_ALIGN - 1);
1195 bss = eppnt->p_memsz + eppnt->p_vaddr;
1196 if (bss > len)
1197 vm_brk(len, bss - len);
1198 error = 0;
1199
1200 out_free_ph:
1201 kfree(elf_phdata);
1202 out:
1203 return error;
1204 }
1205
1206 #ifdef CONFIG_ELF_CORE
1207 /*
1208 * ELF core dumper
1209 *
1210 * Modelled on fs/exec.c:aout_core_dump()
1211 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1212 */
1213
1214 /*
1215 * The purpose of always_dump_vma() is to make sure that special kernel mappings
1216 * that are useful for post-mortem analysis are included in every core dump.
1217 * In that way we ensure that the core dump is fully interpretable later
1218 * without matching up the same kernel and hardware config to see what PC values
1219 * meant. These special mappings include - vDSO, vsyscall, and other
1220 * architecture specific mappings
1221 */
always_dump_vma(struct vm_area_struct * vma)1222 static bool always_dump_vma(struct vm_area_struct *vma)
1223 {
1224 /* Any vsyscall mappings? */
1225 if (vma == get_gate_vma(vma->vm_mm))
1226 return true;
1227 /*
1228 * arch_vma_name() returns non-NULL for special architecture mappings,
1229 * such as vDSO sections.
1230 */
1231 if (arch_vma_name(vma))
1232 return true;
1233
1234 return false;
1235 }
1236
1237 /*
1238 * Decide what to dump of a segment, part, all or none.
1239 */
vma_dump_size(struct vm_area_struct * vma,unsigned long mm_flags)1240 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1241 unsigned long mm_flags)
1242 {
1243 #define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type))
1244
1245 /* always dump the vdso and vsyscall sections */
1246 if (always_dump_vma(vma))
1247 goto whole;
1248
1249 if (vma->vm_flags & VM_DONTDUMP)
1250 return 0;
1251
1252 /* Hugetlb memory check */
1253 if (vma->vm_flags & VM_HUGETLB) {
1254 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1255 goto whole;
1256 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1257 goto whole;
1258 return 0;
1259 }
1260
1261 /* Do not dump I/O mapped devices or special mappings */
1262 if (vma->vm_flags & VM_IO)
1263 return 0;
1264
1265 /* By default, dump shared memory if mapped from an anonymous file. */
1266 if (vma->vm_flags & VM_SHARED) {
1267 if (file_inode(vma->vm_file)->i_nlink == 0 ?
1268 FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1269 goto whole;
1270 return 0;
1271 }
1272
1273 /* Dump segments that have been written to. */
1274 if (vma->anon_vma && FILTER(ANON_PRIVATE))
1275 goto whole;
1276 if (vma->vm_file == NULL)
1277 return 0;
1278
1279 if (FILTER(MAPPED_PRIVATE))
1280 goto whole;
1281
1282 /*
1283 * If this looks like the beginning of a DSO or executable mapping,
1284 * check for an ELF header. If we find one, dump the first page to
1285 * aid in determining what was mapped here.
1286 */
1287 if (FILTER(ELF_HEADERS) &&
1288 vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1289 u32 __user *header = (u32 __user *) vma->vm_start;
1290 u32 word;
1291 mm_segment_t fs = get_fs();
1292 /*
1293 * Doing it this way gets the constant folded by GCC.
1294 */
1295 union {
1296 u32 cmp;
1297 char elfmag[SELFMAG];
1298 } magic;
1299 BUILD_BUG_ON(SELFMAG != sizeof word);
1300 magic.elfmag[EI_MAG0] = ELFMAG0;
1301 magic.elfmag[EI_MAG1] = ELFMAG1;
1302 magic.elfmag[EI_MAG2] = ELFMAG2;
1303 magic.elfmag[EI_MAG3] = ELFMAG3;
1304 /*
1305 * Switch to the user "segment" for get_user(),
1306 * then put back what elf_core_dump() had in place.
1307 */
1308 set_fs(USER_DS);
1309 if (unlikely(get_user(word, header)))
1310 word = 0;
1311 set_fs(fs);
1312 if (word == magic.cmp)
1313 return PAGE_SIZE;
1314 }
1315
1316 #undef FILTER
1317
1318 return 0;
1319
1320 whole:
1321 return vma->vm_end - vma->vm_start;
1322 }
1323
1324 /* An ELF note in memory */
1325 struct memelfnote
1326 {
1327 const char *name;
1328 int type;
1329 unsigned int datasz;
1330 void *data;
1331 };
1332
notesize(struct memelfnote * en)1333 static int notesize(struct memelfnote *en)
1334 {
1335 int sz;
1336
1337 sz = sizeof(struct elf_note);
1338 sz += roundup(strlen(en->name) + 1, 4);
1339 sz += roundup(en->datasz, 4);
1340
1341 return sz;
1342 }
1343
1344 #define DUMP_WRITE(addr, nr, foffset) \
1345 do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1346
alignfile(struct file * file,loff_t * foffset)1347 static int alignfile(struct file *file, loff_t *foffset)
1348 {
1349 static const char buf[4] = { 0, };
1350 DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1351 return 1;
1352 }
1353
writenote(struct memelfnote * men,struct file * file,loff_t * foffset)1354 static int writenote(struct memelfnote *men, struct file *file,
1355 loff_t *foffset)
1356 {
1357 struct elf_note en;
1358 en.n_namesz = strlen(men->name) + 1;
1359 en.n_descsz = men->datasz;
1360 en.n_type = men->type;
1361
1362 DUMP_WRITE(&en, sizeof(en), foffset);
1363 DUMP_WRITE(men->name, en.n_namesz, foffset);
1364 if (!alignfile(file, foffset))
1365 return 0;
1366 DUMP_WRITE(men->data, men->datasz, foffset);
1367 if (!alignfile(file, foffset))
1368 return 0;
1369
1370 return 1;
1371 }
1372 #undef DUMP_WRITE
1373
fill_elf_header(struct elfhdr * elf,int segs,u16 machine,u32 flags)1374 static void fill_elf_header(struct elfhdr *elf, int segs,
1375 u16 machine, u32 flags)
1376 {
1377 memset(elf, 0, sizeof(*elf));
1378
1379 memcpy(elf->e_ident, ELFMAG, SELFMAG);
1380 elf->e_ident[EI_CLASS] = ELF_CLASS;
1381 elf->e_ident[EI_DATA] = ELF_DATA;
1382 elf->e_ident[EI_VERSION] = EV_CURRENT;
1383 elf->e_ident[EI_OSABI] = ELF_OSABI;
1384
1385 elf->e_type = ET_CORE;
1386 elf->e_machine = machine;
1387 elf->e_version = EV_CURRENT;
1388 elf->e_phoff = sizeof(struct elfhdr);
1389 elf->e_flags = flags;
1390 elf->e_ehsize = sizeof(struct elfhdr);
1391 elf->e_phentsize = sizeof(struct elf_phdr);
1392 elf->e_phnum = segs;
1393
1394 return;
1395 }
1396
fill_elf_note_phdr(struct elf_phdr * phdr,int sz,loff_t offset)1397 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1398 {
1399 phdr->p_type = PT_NOTE;
1400 phdr->p_offset = offset;
1401 phdr->p_vaddr = 0;
1402 phdr->p_paddr = 0;
1403 phdr->p_filesz = sz;
1404 phdr->p_memsz = 0;
1405 phdr->p_flags = 0;
1406 phdr->p_align = 0;
1407 return;
1408 }
1409
fill_note(struct memelfnote * note,const char * name,int type,unsigned int sz,void * data)1410 static void fill_note(struct memelfnote *note, const char *name, int type,
1411 unsigned int sz, void *data)
1412 {
1413 note->name = name;
1414 note->type = type;
1415 note->datasz = sz;
1416 note->data = data;
1417 return;
1418 }
1419
1420 /*
1421 * fill up all the fields in prstatus from the given task struct, except
1422 * registers which need to be filled up separately.
1423 */
fill_prstatus(struct elf_prstatus * prstatus,struct task_struct * p,long signr)1424 static void fill_prstatus(struct elf_prstatus *prstatus,
1425 struct task_struct *p, long signr)
1426 {
1427 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1428 prstatus->pr_sigpend = p->pending.signal.sig[0];
1429 prstatus->pr_sighold = p->blocked.sig[0];
1430 rcu_read_lock();
1431 prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1432 rcu_read_unlock();
1433 prstatus->pr_pid = task_pid_vnr(p);
1434 prstatus->pr_pgrp = task_pgrp_vnr(p);
1435 prstatus->pr_sid = task_session_vnr(p);
1436 if (thread_group_leader(p)) {
1437 struct task_cputime cputime;
1438
1439 /*
1440 * This is the record for the group leader. It shows the
1441 * group-wide total, not its individual thread total.
1442 */
1443 thread_group_cputime(p, &cputime);
1444 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1445 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1446 } else {
1447 cputime_t utime, stime;
1448
1449 task_cputime(p, &utime, &stime);
1450 cputime_to_timeval(utime, &prstatus->pr_utime);
1451 cputime_to_timeval(stime, &prstatus->pr_stime);
1452 }
1453 cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1454 cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1455 }
1456
fill_psinfo(struct elf_prpsinfo * psinfo,struct task_struct * p,struct mm_struct * mm)1457 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1458 struct mm_struct *mm)
1459 {
1460 const struct cred *cred;
1461 unsigned int i, len;
1462
1463 /* first copy the parameters from user space */
1464 memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1465
1466 len = mm->arg_end - mm->arg_start;
1467 if (len >= ELF_PRARGSZ)
1468 len = ELF_PRARGSZ-1;
1469 if (copy_from_user(&psinfo->pr_psargs,
1470 (const char __user *)mm->arg_start, len))
1471 return -EFAULT;
1472 for(i = 0; i < len; i++)
1473 if (psinfo->pr_psargs[i] == 0)
1474 psinfo->pr_psargs[i] = ' ';
1475 psinfo->pr_psargs[len] = 0;
1476
1477 rcu_read_lock();
1478 psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1479 rcu_read_unlock();
1480 psinfo->pr_pid = task_pid_vnr(p);
1481 psinfo->pr_pgrp = task_pgrp_vnr(p);
1482 psinfo->pr_sid = task_session_vnr(p);
1483
1484 i = p->state ? ffz(~p->state) + 1 : 0;
1485 psinfo->pr_state = i;
1486 psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1487 psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1488 psinfo->pr_nice = task_nice(p);
1489 psinfo->pr_flag = p->flags;
1490 rcu_read_lock();
1491 cred = __task_cred(p);
1492 SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1493 SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1494 rcu_read_unlock();
1495 strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1496
1497 return 0;
1498 }
1499
fill_auxv_note(struct memelfnote * note,struct mm_struct * mm)1500 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1501 {
1502 elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1503 int i = 0;
1504 do
1505 i += 2;
1506 while (auxv[i - 2] != AT_NULL);
1507 fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1508 }
1509
fill_siginfo_note(struct memelfnote * note,user_siginfo_t * csigdata,siginfo_t * siginfo)1510 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1511 siginfo_t *siginfo)
1512 {
1513 mm_segment_t old_fs = get_fs();
1514 set_fs(KERNEL_DS);
1515 copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1516 set_fs(old_fs);
1517 fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1518 }
1519
1520 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1521 /*
1522 * Format of NT_FILE note:
1523 *
1524 * long count -- how many files are mapped
1525 * long page_size -- units for file_ofs
1526 * array of [COUNT] elements of
1527 * long start
1528 * long end
1529 * long file_ofs
1530 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1531 */
fill_files_note(struct memelfnote * note)1532 static void fill_files_note(struct memelfnote *note)
1533 {
1534 struct vm_area_struct *vma;
1535 unsigned count, size, names_ofs, remaining, n;
1536 user_long_t *data;
1537 user_long_t *start_end_ofs;
1538 char *name_base, *name_curpos;
1539
1540 /* *Estimated* file count and total data size needed */
1541 count = current->mm->map_count;
1542 size = count * 64;
1543
1544 names_ofs = (2 + 3 * count) * sizeof(data[0]);
1545 alloc:
1546 if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1547 goto err;
1548 size = round_up(size, PAGE_SIZE);
1549 data = vmalloc(size);
1550 if (!data)
1551 goto err;
1552
1553 start_end_ofs = data + 2;
1554 name_base = name_curpos = ((char *)data) + names_ofs;
1555 remaining = size - names_ofs;
1556 count = 0;
1557 for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1558 struct file *file;
1559 const char *filename;
1560
1561 file = vma->vm_file;
1562 if (!file)
1563 continue;
1564 filename = d_path(&file->f_path, name_curpos, remaining);
1565 if (IS_ERR(filename)) {
1566 if (PTR_ERR(filename) == -ENAMETOOLONG) {
1567 vfree(data);
1568 size = size * 5 / 4;
1569 goto alloc;
1570 }
1571 continue;
1572 }
1573
1574 /* d_path() fills at the end, move name down */
1575 /* n = strlen(filename) + 1: */
1576 n = (name_curpos + remaining) - filename;
1577 remaining = filename - name_curpos;
1578 memmove(name_curpos, filename, n);
1579 name_curpos += n;
1580
1581 *start_end_ofs++ = vma->vm_start;
1582 *start_end_ofs++ = vma->vm_end;
1583 *start_end_ofs++ = vma->vm_pgoff;
1584 count++;
1585 }
1586
1587 /* Now we know exact count of files, can store it */
1588 data[0] = count;
1589 data[1] = PAGE_SIZE;
1590 /*
1591 * Count usually is less than current->mm->map_count,
1592 * we need to move filenames down.
1593 */
1594 n = current->mm->map_count - count;
1595 if (n != 0) {
1596 unsigned shift_bytes = n * 3 * sizeof(data[0]);
1597 memmove(name_base - shift_bytes, name_base,
1598 name_curpos - name_base);
1599 name_curpos -= shift_bytes;
1600 }
1601
1602 size = name_curpos - (char *)data;
1603 fill_note(note, "CORE", NT_FILE, size, data);
1604 err: ;
1605 }
1606
1607 #ifdef CORE_DUMP_USE_REGSET
1608 #include <linux/regset.h>
1609
1610 struct elf_thread_core_info {
1611 struct elf_thread_core_info *next;
1612 struct task_struct *task;
1613 struct elf_prstatus prstatus;
1614 struct memelfnote notes[0];
1615 };
1616
1617 struct elf_note_info {
1618 struct elf_thread_core_info *thread;
1619 struct memelfnote psinfo;
1620 struct memelfnote signote;
1621 struct memelfnote auxv;
1622 struct memelfnote files;
1623 user_siginfo_t csigdata;
1624 size_t size;
1625 int thread_notes;
1626 };
1627
1628 /*
1629 * When a regset has a writeback hook, we call it on each thread before
1630 * dumping user memory. On register window machines, this makes sure the
1631 * user memory backing the register data is up to date before we read it.
1632 */
do_thread_regset_writeback(struct task_struct * task,const struct user_regset * regset)1633 static void do_thread_regset_writeback(struct task_struct *task,
1634 const struct user_regset *regset)
1635 {
1636 if (regset->writeback)
1637 regset->writeback(task, regset, 1);
1638 }
1639
1640 #ifndef PR_REG_SIZE
1641 #define PR_REG_SIZE(S) sizeof(S)
1642 #endif
1643
1644 #ifndef PRSTATUS_SIZE
1645 #define PRSTATUS_SIZE(S) sizeof(S)
1646 #endif
1647
1648 #ifndef PR_REG_PTR
1649 #define PR_REG_PTR(S) (&((S)->pr_reg))
1650 #endif
1651
1652 #ifndef SET_PR_FPVALID
1653 #define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1654 #endif
1655
fill_thread_core_info(struct elf_thread_core_info * t,const struct user_regset_view * view,long signr,size_t * total)1656 static int fill_thread_core_info(struct elf_thread_core_info *t,
1657 const struct user_regset_view *view,
1658 long signr, size_t *total)
1659 {
1660 unsigned int i;
1661
1662 /*
1663 * NT_PRSTATUS is the one special case, because the regset data
1664 * goes into the pr_reg field inside the note contents, rather
1665 * than being the whole note contents. We fill the reset in here.
1666 * We assume that regset 0 is NT_PRSTATUS.
1667 */
1668 fill_prstatus(&t->prstatus, t->task, signr);
1669 (void) view->regsets[0].get(t->task, &view->regsets[0],
1670 0, PR_REG_SIZE(t->prstatus.pr_reg),
1671 PR_REG_PTR(&t->prstatus), NULL);
1672
1673 fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1674 PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1675 *total += notesize(&t->notes[0]);
1676
1677 do_thread_regset_writeback(t->task, &view->regsets[0]);
1678
1679 /*
1680 * Each other regset might generate a note too. For each regset
1681 * that has no core_note_type or is inactive, we leave t->notes[i]
1682 * all zero and we'll know to skip writing it later.
1683 */
1684 for (i = 1; i < view->n; ++i) {
1685 const struct user_regset *regset = &view->regsets[i];
1686 do_thread_regset_writeback(t->task, regset);
1687 if (regset->core_note_type && regset->get &&
1688 (!regset->active || regset->active(t->task, regset))) {
1689 int ret;
1690 size_t size = regset->n * regset->size;
1691 void *data = kmalloc(size, GFP_KERNEL);
1692 if (unlikely(!data))
1693 return 0;
1694 ret = regset->get(t->task, regset,
1695 0, size, data, NULL);
1696 if (unlikely(ret))
1697 kfree(data);
1698 else {
1699 if (regset->core_note_type != NT_PRFPREG)
1700 fill_note(&t->notes[i], "LINUX",
1701 regset->core_note_type,
1702 size, data);
1703 else {
1704 SET_PR_FPVALID(&t->prstatus, 1);
1705 fill_note(&t->notes[i], "CORE",
1706 NT_PRFPREG, size, data);
1707 }
1708 *total += notesize(&t->notes[i]);
1709 }
1710 }
1711 }
1712
1713 return 1;
1714 }
1715
fill_note_info(struct elfhdr * elf,int phdrs,struct elf_note_info * info,siginfo_t * siginfo,struct pt_regs * regs)1716 static int fill_note_info(struct elfhdr *elf, int phdrs,
1717 struct elf_note_info *info,
1718 siginfo_t *siginfo, struct pt_regs *regs)
1719 {
1720 struct task_struct *dump_task = current;
1721 const struct user_regset_view *view = task_user_regset_view(dump_task);
1722 struct elf_thread_core_info *t;
1723 struct elf_prpsinfo *psinfo;
1724 struct core_thread *ct;
1725 unsigned int i;
1726
1727 info->size = 0;
1728 info->thread = NULL;
1729
1730 psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1731 if (psinfo == NULL) {
1732 info->psinfo.data = NULL; /* So we don't free this wrongly */
1733 return 0;
1734 }
1735
1736 fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1737
1738 /*
1739 * Figure out how many notes we're going to need for each thread.
1740 */
1741 info->thread_notes = 0;
1742 for (i = 0; i < view->n; ++i)
1743 if (view->regsets[i].core_note_type != 0)
1744 ++info->thread_notes;
1745
1746 /*
1747 * Sanity check. We rely on regset 0 being in NT_PRSTATUS,
1748 * since it is our one special case.
1749 */
1750 if (unlikely(info->thread_notes == 0) ||
1751 unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1752 WARN_ON(1);
1753 return 0;
1754 }
1755
1756 /*
1757 * Initialize the ELF file header.
1758 */
1759 fill_elf_header(elf, phdrs,
1760 view->e_machine, view->e_flags);
1761
1762 /*
1763 * Allocate a structure for each thread.
1764 */
1765 for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1766 t = kzalloc(offsetof(struct elf_thread_core_info,
1767 notes[info->thread_notes]),
1768 GFP_KERNEL);
1769 if (unlikely(!t))
1770 return 0;
1771
1772 t->task = ct->task;
1773 if (ct->task == dump_task || !info->thread) {
1774 t->next = info->thread;
1775 info->thread = t;
1776 } else {
1777 /*
1778 * Make sure to keep the original task at
1779 * the head of the list.
1780 */
1781 t->next = info->thread->next;
1782 info->thread->next = t;
1783 }
1784 }
1785
1786 /*
1787 * Now fill in each thread's information.
1788 */
1789 for (t = info->thread; t != NULL; t = t->next)
1790 if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1791 return 0;
1792
1793 /*
1794 * Fill in the two process-wide notes.
1795 */
1796 fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1797 info->size += notesize(&info->psinfo);
1798
1799 fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1800 info->size += notesize(&info->signote);
1801
1802 fill_auxv_note(&info->auxv, current->mm);
1803 info->size += notesize(&info->auxv);
1804
1805 fill_files_note(&info->files);
1806 info->size += notesize(&info->files);
1807
1808 return 1;
1809 }
1810
get_note_info_size(struct elf_note_info * info)1811 static size_t get_note_info_size(struct elf_note_info *info)
1812 {
1813 return info->size;
1814 }
1815
1816 /*
1817 * Write all the notes for each thread. When writing the first thread, the
1818 * process-wide notes are interleaved after the first thread-specific note.
1819 */
write_note_info(struct elf_note_info * info,struct file * file,loff_t * foffset)1820 static int write_note_info(struct elf_note_info *info,
1821 struct file *file, loff_t *foffset)
1822 {
1823 bool first = 1;
1824 struct elf_thread_core_info *t = info->thread;
1825
1826 do {
1827 int i;
1828
1829 if (!writenote(&t->notes[0], file, foffset))
1830 return 0;
1831
1832 if (first && !writenote(&info->psinfo, file, foffset))
1833 return 0;
1834 if (first && !writenote(&info->signote, file, foffset))
1835 return 0;
1836 if (first && !writenote(&info->auxv, file, foffset))
1837 return 0;
1838 if (first && !writenote(&info->files, file, foffset))
1839 return 0;
1840
1841 for (i = 1; i < info->thread_notes; ++i)
1842 if (t->notes[i].data &&
1843 !writenote(&t->notes[i], file, foffset))
1844 return 0;
1845
1846 first = 0;
1847 t = t->next;
1848 } while (t);
1849
1850 return 1;
1851 }
1852
free_note_info(struct elf_note_info * info)1853 static void free_note_info(struct elf_note_info *info)
1854 {
1855 struct elf_thread_core_info *threads = info->thread;
1856 while (threads) {
1857 unsigned int i;
1858 struct elf_thread_core_info *t = threads;
1859 threads = t->next;
1860 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1861 for (i = 1; i < info->thread_notes; ++i)
1862 kfree(t->notes[i].data);
1863 kfree(t);
1864 }
1865 kfree(info->psinfo.data);
1866 vfree(info->files.data);
1867 }
1868
1869 #else
1870
1871 /* Here is the structure in which status of each thread is captured. */
1872 struct elf_thread_status
1873 {
1874 struct list_head list;
1875 struct elf_prstatus prstatus; /* NT_PRSTATUS */
1876 elf_fpregset_t fpu; /* NT_PRFPREG */
1877 struct task_struct *thread;
1878 #ifdef ELF_CORE_COPY_XFPREGS
1879 elf_fpxregset_t xfpu; /* ELF_CORE_XFPREG_TYPE */
1880 #endif
1881 struct memelfnote notes[3];
1882 int num_notes;
1883 };
1884
1885 /*
1886 * In order to add the specific thread information for the elf file format,
1887 * we need to keep a linked list of every threads pr_status and then create
1888 * a single section for them in the final core file.
1889 */
elf_dump_thread_status(long signr,struct elf_thread_status * t)1890 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1891 {
1892 int sz = 0;
1893 struct task_struct *p = t->thread;
1894 t->num_notes = 0;
1895
1896 fill_prstatus(&t->prstatus, p, signr);
1897 elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1898
1899 fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1900 &(t->prstatus));
1901 t->num_notes++;
1902 sz += notesize(&t->notes[0]);
1903
1904 if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1905 &t->fpu))) {
1906 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1907 &(t->fpu));
1908 t->num_notes++;
1909 sz += notesize(&t->notes[1]);
1910 }
1911
1912 #ifdef ELF_CORE_COPY_XFPREGS
1913 if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1914 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1915 sizeof(t->xfpu), &t->xfpu);
1916 t->num_notes++;
1917 sz += notesize(&t->notes[2]);
1918 }
1919 #endif
1920 return sz;
1921 }
1922
1923 struct elf_note_info {
1924 struct memelfnote *notes;
1925 struct elf_prstatus *prstatus; /* NT_PRSTATUS */
1926 struct elf_prpsinfo *psinfo; /* NT_PRPSINFO */
1927 struct list_head thread_list;
1928 elf_fpregset_t *fpu;
1929 #ifdef ELF_CORE_COPY_XFPREGS
1930 elf_fpxregset_t *xfpu;
1931 #endif
1932 user_siginfo_t csigdata;
1933 int thread_status_size;
1934 int numnote;
1935 };
1936
elf_note_info_init(struct elf_note_info * info)1937 static int elf_note_info_init(struct elf_note_info *info)
1938 {
1939 memset(info, 0, sizeof(*info));
1940 INIT_LIST_HEAD(&info->thread_list);
1941
1942 /* Allocate space for ELF notes */
1943 info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1944 if (!info->notes)
1945 return 0;
1946 info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1947 if (!info->psinfo)
1948 return 0;
1949 info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1950 if (!info->prstatus)
1951 return 0;
1952 info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1953 if (!info->fpu)
1954 return 0;
1955 #ifdef ELF_CORE_COPY_XFPREGS
1956 info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1957 if (!info->xfpu)
1958 return 0;
1959 #endif
1960 return 1;
1961 }
1962
fill_note_info(struct elfhdr * elf,int phdrs,struct elf_note_info * info,siginfo_t * siginfo,struct pt_regs * regs)1963 static int fill_note_info(struct elfhdr *elf, int phdrs,
1964 struct elf_note_info *info,
1965 siginfo_t *siginfo, struct pt_regs *regs)
1966 {
1967 struct list_head *t;
1968
1969 if (!elf_note_info_init(info))
1970 return 0;
1971
1972 if (siginfo->si_signo) {
1973 struct core_thread *ct;
1974 struct elf_thread_status *ets;
1975
1976 for (ct = current->mm->core_state->dumper.next;
1977 ct; ct = ct->next) {
1978 ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1979 if (!ets)
1980 return 0;
1981
1982 ets->thread = ct->task;
1983 list_add(&ets->list, &info->thread_list);
1984 }
1985
1986 list_for_each(t, &info->thread_list) {
1987 int sz;
1988
1989 ets = list_entry(t, struct elf_thread_status, list);
1990 sz = elf_dump_thread_status(siginfo->si_signo, ets);
1991 info->thread_status_size += sz;
1992 }
1993 }
1994 /* now collect the dump for the current */
1995 memset(info->prstatus, 0, sizeof(*info->prstatus));
1996 fill_prstatus(info->prstatus, current, siginfo->si_signo);
1997 elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1998
1999 /* Set up header */
2000 fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
2001
2002 /*
2003 * Set up the notes in similar form to SVR4 core dumps made
2004 * with info from their /proc.
2005 */
2006
2007 fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
2008 sizeof(*info->prstatus), info->prstatus);
2009 fill_psinfo(info->psinfo, current->group_leader, current->mm);
2010 fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
2011 sizeof(*info->psinfo), info->psinfo);
2012
2013 fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
2014 fill_auxv_note(info->notes + 3, current->mm);
2015 fill_files_note(info->notes + 4);
2016
2017 info->numnote = 5;
2018
2019 /* Try to dump the FPU. */
2020 info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
2021 info->fpu);
2022 if (info->prstatus->pr_fpvalid)
2023 fill_note(info->notes + info->numnote++,
2024 "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
2025 #ifdef ELF_CORE_COPY_XFPREGS
2026 if (elf_core_copy_task_xfpregs(current, info->xfpu))
2027 fill_note(info->notes + info->numnote++,
2028 "LINUX", ELF_CORE_XFPREG_TYPE,
2029 sizeof(*info->xfpu), info->xfpu);
2030 #endif
2031
2032 return 1;
2033 }
2034
get_note_info_size(struct elf_note_info * info)2035 static size_t get_note_info_size(struct elf_note_info *info)
2036 {
2037 int sz = 0;
2038 int i;
2039
2040 for (i = 0; i < info->numnote; i++)
2041 sz += notesize(info->notes + i);
2042
2043 sz += info->thread_status_size;
2044
2045 return sz;
2046 }
2047
write_note_info(struct elf_note_info * info,struct file * file,loff_t * foffset)2048 static int write_note_info(struct elf_note_info *info,
2049 struct file *file, loff_t *foffset)
2050 {
2051 int i;
2052 struct list_head *t;
2053
2054 for (i = 0; i < info->numnote; i++)
2055 if (!writenote(info->notes + i, file, foffset))
2056 return 0;
2057
2058 /* write out the thread status notes section */
2059 list_for_each(t, &info->thread_list) {
2060 struct elf_thread_status *tmp =
2061 list_entry(t, struct elf_thread_status, list);
2062
2063 for (i = 0; i < tmp->num_notes; i++)
2064 if (!writenote(&tmp->notes[i], file, foffset))
2065 return 0;
2066 }
2067
2068 return 1;
2069 }
2070
free_note_info(struct elf_note_info * info)2071 static void free_note_info(struct elf_note_info *info)
2072 {
2073 while (!list_empty(&info->thread_list)) {
2074 struct list_head *tmp = info->thread_list.next;
2075 list_del(tmp);
2076 kfree(list_entry(tmp, struct elf_thread_status, list));
2077 }
2078
2079 /* Free data allocated by fill_files_note(): */
2080 vfree(info->notes[4].data);
2081
2082 kfree(info->prstatus);
2083 kfree(info->psinfo);
2084 kfree(info->notes);
2085 kfree(info->fpu);
2086 #ifdef ELF_CORE_COPY_XFPREGS
2087 kfree(info->xfpu);
2088 #endif
2089 }
2090
2091 #endif
2092
first_vma(struct task_struct * tsk,struct vm_area_struct * gate_vma)2093 static struct vm_area_struct *first_vma(struct task_struct *tsk,
2094 struct vm_area_struct *gate_vma)
2095 {
2096 struct vm_area_struct *ret = tsk->mm->mmap;
2097
2098 if (ret)
2099 return ret;
2100 return gate_vma;
2101 }
2102 /*
2103 * Helper function for iterating across a vma list. It ensures that the caller
2104 * will visit `gate_vma' prior to terminating the search.
2105 */
next_vma(struct vm_area_struct * this_vma,struct vm_area_struct * gate_vma)2106 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2107 struct vm_area_struct *gate_vma)
2108 {
2109 struct vm_area_struct *ret;
2110
2111 ret = this_vma->vm_next;
2112 if (ret)
2113 return ret;
2114 if (this_vma == gate_vma)
2115 return NULL;
2116 return gate_vma;
2117 }
2118
fill_extnum_info(struct elfhdr * elf,struct elf_shdr * shdr4extnum,elf_addr_t e_shoff,int segs)2119 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2120 elf_addr_t e_shoff, int segs)
2121 {
2122 elf->e_shoff = e_shoff;
2123 elf->e_shentsize = sizeof(*shdr4extnum);
2124 elf->e_shnum = 1;
2125 elf->e_shstrndx = SHN_UNDEF;
2126
2127 memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2128
2129 shdr4extnum->sh_type = SHT_NULL;
2130 shdr4extnum->sh_size = elf->e_shnum;
2131 shdr4extnum->sh_link = elf->e_shstrndx;
2132 shdr4extnum->sh_info = segs;
2133 }
2134
elf_core_vma_data_size(struct vm_area_struct * gate_vma,unsigned long mm_flags)2135 static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
2136 unsigned long mm_flags)
2137 {
2138 struct vm_area_struct *vma;
2139 size_t size = 0;
2140
2141 for (vma = first_vma(current, gate_vma); vma != NULL;
2142 vma = next_vma(vma, gate_vma))
2143 size += vma_dump_size(vma, mm_flags);
2144 return size;
2145 }
2146
2147 /*
2148 * Actual dumper
2149 *
2150 * This is a two-pass process; first we find the offsets of the bits,
2151 * and then they are actually written out. If we run out of core limit
2152 * we just truncate.
2153 */
elf_core_dump(struct coredump_params * cprm)2154 static int elf_core_dump(struct coredump_params *cprm)
2155 {
2156 int has_dumped = 0;
2157 mm_segment_t fs;
2158 int segs;
2159 size_t size = 0;
2160 struct vm_area_struct *vma, *gate_vma;
2161 struct elfhdr *elf = NULL;
2162 loff_t offset = 0, dataoff, foffset;
2163 struct elf_note_info info;
2164 struct elf_phdr *phdr4note = NULL;
2165 struct elf_shdr *shdr4extnum = NULL;
2166 Elf_Half e_phnum;
2167 elf_addr_t e_shoff;
2168
2169 /*
2170 * We no longer stop all VM operations.
2171 *
2172 * This is because those proceses that could possibly change map_count
2173 * or the mmap / vma pages are now blocked in do_exit on current
2174 * finishing this core dump.
2175 *
2176 * Only ptrace can touch these memory addresses, but it doesn't change
2177 * the map_count or the pages allocated. So no possibility of crashing
2178 * exists while dumping the mm->vm_next areas to the core file.
2179 */
2180
2181 /* alloc memory for large data structures: too large to be on stack */
2182 elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2183 if (!elf)
2184 goto out;
2185 /*
2186 * The number of segs are recored into ELF header as 16bit value.
2187 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2188 */
2189 segs = current->mm->map_count;
2190 segs += elf_core_extra_phdrs();
2191
2192 gate_vma = get_gate_vma(current->mm);
2193 if (gate_vma != NULL)
2194 segs++;
2195
2196 /* for notes section */
2197 segs++;
2198
2199 /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2200 * this, kernel supports extended numbering. Have a look at
2201 * include/linux/elf.h for further information. */
2202 e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2203
2204 /*
2205 * Collect all the non-memory information about the process for the
2206 * notes. This also sets up the file header.
2207 */
2208 if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2209 goto cleanup;
2210
2211 has_dumped = 1;
2212
2213 fs = get_fs();
2214 set_fs(KERNEL_DS);
2215
2216 offset += sizeof(*elf); /* Elf header */
2217 offset += segs * sizeof(struct elf_phdr); /* Program headers */
2218 foffset = offset;
2219
2220 /* Write notes phdr entry */
2221 {
2222 size_t sz = get_note_info_size(&info);
2223
2224 sz += elf_coredump_extra_notes_size();
2225
2226 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2227 if (!phdr4note)
2228 goto end_coredump;
2229
2230 fill_elf_note_phdr(phdr4note, sz, offset);
2231 offset += sz;
2232 }
2233
2234 dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2235
2236 offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
2237 offset += elf_core_extra_data_size();
2238 e_shoff = offset;
2239
2240 if (e_phnum == PN_XNUM) {
2241 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2242 if (!shdr4extnum)
2243 goto end_coredump;
2244 fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2245 }
2246
2247 offset = dataoff;
2248
2249 size += sizeof(*elf);
2250 if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
2251 goto end_coredump;
2252
2253 size += sizeof(*phdr4note);
2254 if (size > cprm->limit
2255 || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
2256 goto end_coredump;
2257
2258 /* Write program headers for segments dump */
2259 for (vma = first_vma(current, gate_vma); vma != NULL;
2260 vma = next_vma(vma, gate_vma)) {
2261 struct elf_phdr phdr;
2262
2263 phdr.p_type = PT_LOAD;
2264 phdr.p_offset = offset;
2265 phdr.p_vaddr = vma->vm_start;
2266 phdr.p_paddr = 0;
2267 phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
2268 phdr.p_memsz = vma->vm_end - vma->vm_start;
2269 offset += phdr.p_filesz;
2270 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2271 if (vma->vm_flags & VM_WRITE)
2272 phdr.p_flags |= PF_W;
2273 if (vma->vm_flags & VM_EXEC)
2274 phdr.p_flags |= PF_X;
2275 phdr.p_align = ELF_EXEC_PAGESIZE;
2276
2277 size += sizeof(phdr);
2278 if (size > cprm->limit
2279 || !dump_write(cprm->file, &phdr, sizeof(phdr)))
2280 goto end_coredump;
2281 }
2282
2283 if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
2284 goto end_coredump;
2285
2286 /* write out the notes section */
2287 if (!write_note_info(&info, cprm->file, &foffset))
2288 goto end_coredump;
2289
2290 if (elf_coredump_extra_notes_write(cprm->file, &foffset))
2291 goto end_coredump;
2292
2293 /* Align to page */
2294 if (!dump_seek(cprm->file, dataoff - foffset))
2295 goto end_coredump;
2296
2297 for (vma = first_vma(current, gate_vma); vma != NULL;
2298 vma = next_vma(vma, gate_vma)) {
2299 unsigned long addr;
2300 unsigned long end;
2301
2302 end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
2303
2304 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2305 struct page *page;
2306 int stop;
2307
2308 page = get_dump_page(addr);
2309 if (page) {
2310 void *kaddr = kmap(page);
2311 stop = ((size += PAGE_SIZE) > cprm->limit) ||
2312 !dump_write(cprm->file, kaddr,
2313 PAGE_SIZE);
2314 kunmap(page);
2315 page_cache_release(page);
2316 } else
2317 stop = !dump_seek(cprm->file, PAGE_SIZE);
2318 if (stop)
2319 goto end_coredump;
2320 }
2321 }
2322
2323 if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
2324 goto end_coredump;
2325
2326 if (e_phnum == PN_XNUM) {
2327 size += sizeof(*shdr4extnum);
2328 if (size > cprm->limit
2329 || !dump_write(cprm->file, shdr4extnum,
2330 sizeof(*shdr4extnum)))
2331 goto end_coredump;
2332 }
2333
2334 end_coredump:
2335 set_fs(fs);
2336
2337 cleanup:
2338 free_note_info(&info);
2339 kfree(shdr4extnum);
2340 kfree(phdr4note);
2341 kfree(elf);
2342 out:
2343 return has_dumped;
2344 }
2345
2346 #endif /* CONFIG_ELF_CORE */
2347
init_elf_binfmt(void)2348 static int __init init_elf_binfmt(void)
2349 {
2350 register_binfmt(&elf_format);
2351 return 0;
2352 }
2353
exit_elf_binfmt(void)2354 static void __exit exit_elf_binfmt(void)
2355 {
2356 /* Remove the COFF and ELF loaders. */
2357 unregister_binfmt(&elf_format);
2358 }
2359
2360 core_initcall(init_elf_binfmt);
2361 module_exit(exit_elf_binfmt);
2362 MODULE_LICENSE("GPL");
2363