1 /*
2 * linux/fs/binfmt_elf.c
3 *
4 * These are the functions used to load ELF format executables as used
5 * on SVr4 machines. Information on the format may be found in the book
6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7 * Tools".
8 *
9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10 */
11
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/vmalloc.h>
31 #include <linux/security.h>
32 #include <linux/random.h>
33 #include <linux/elf.h>
34 #include <linux/elf-randomize.h>
35 #include <linux/utsname.h>
36 #include <linux/coredump.h>
37 #include <linux/sched.h>
38 #include <linux/dax.h>
39 #include <asm/uaccess.h>
40 #include <asm/param.h>
41 #include <asm/page.h>
42
43 #ifndef user_long_t
44 #define user_long_t long
45 #endif
46 #ifndef user_siginfo_t
47 #define user_siginfo_t siginfo_t
48 #endif
49
50 static int load_elf_binary(struct linux_binprm *bprm);
51 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
52 int, int, unsigned long);
53
54 #ifdef CONFIG_USELIB
55 static int load_elf_library(struct file *);
56 #else
57 #define load_elf_library NULL
58 #endif
59
60 /*
61 * If we don't support core dumping, then supply a NULL so we
62 * don't even try.
63 */
64 #ifdef CONFIG_ELF_CORE
65 static int elf_core_dump(struct coredump_params *cprm);
66 #else
67 #define elf_core_dump NULL
68 #endif
69
70 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
71 #define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE
72 #else
73 #define ELF_MIN_ALIGN PAGE_SIZE
74 #endif
75
76 #ifndef ELF_CORE_EFLAGS
77 #define ELF_CORE_EFLAGS 0
78 #endif
79
80 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
81 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
82 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
83
84 static struct linux_binfmt elf_format = {
85 .module = THIS_MODULE,
86 .load_binary = load_elf_binary,
87 .load_shlib = load_elf_library,
88 .core_dump = elf_core_dump,
89 .min_coredump = ELF_EXEC_PAGESIZE,
90 };
91
92 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
93
set_brk(unsigned long start,unsigned long end)94 static int set_brk(unsigned long start, unsigned long end)
95 {
96 start = ELF_PAGEALIGN(start);
97 end = ELF_PAGEALIGN(end);
98 if (end > start) {
99 unsigned long addr;
100 addr = vm_brk(start, end - start);
101 if (BAD_ADDR(addr))
102 return addr;
103 }
104 current->mm->start_brk = current->mm->brk = end;
105 return 0;
106 }
107
108 /* We need to explicitly zero any fractional pages
109 after the data section (i.e. bss). This would
110 contain the junk from the file that should not
111 be in memory
112 */
padzero(unsigned long elf_bss)113 static int padzero(unsigned long elf_bss)
114 {
115 unsigned long nbyte;
116
117 nbyte = ELF_PAGEOFFSET(elf_bss);
118 if (nbyte) {
119 nbyte = ELF_MIN_ALIGN - nbyte;
120 if (clear_user((void __user *) elf_bss, nbyte))
121 return -EFAULT;
122 }
123 return 0;
124 }
125
126 /* Let's use some macros to make this stack manipulation a little clearer */
127 #ifdef CONFIG_STACK_GROWSUP
128 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
129 #define STACK_ROUND(sp, items) \
130 ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
131 #define STACK_ALLOC(sp, len) ({ \
132 elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
133 old_sp; })
134 #else
135 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
136 #define STACK_ROUND(sp, items) \
137 (((unsigned long) (sp - items)) &~ 15UL)
138 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
139 #endif
140
141 #ifndef ELF_BASE_PLATFORM
142 /*
143 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
144 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
145 * will be copied to the user stack in the same manner as AT_PLATFORM.
146 */
147 #define ELF_BASE_PLATFORM NULL
148 #endif
149
150 static int
create_elf_tables(struct linux_binprm * bprm,struct elfhdr * exec,unsigned long load_addr,unsigned long interp_load_addr)151 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
152 unsigned long load_addr, unsigned long interp_load_addr)
153 {
154 unsigned long p = bprm->p;
155 int argc = bprm->argc;
156 int envc = bprm->envc;
157 elf_addr_t __user *argv;
158 elf_addr_t __user *envp;
159 elf_addr_t __user *sp;
160 elf_addr_t __user *u_platform;
161 elf_addr_t __user *u_base_platform;
162 elf_addr_t __user *u_rand_bytes;
163 const char *k_platform = ELF_PLATFORM;
164 const char *k_base_platform = ELF_BASE_PLATFORM;
165 unsigned char k_rand_bytes[16];
166 int items;
167 elf_addr_t *elf_info;
168 int ei_index = 0;
169 const struct cred *cred = current_cred();
170 struct vm_area_struct *vma;
171
172 /*
173 * In some cases (e.g. Hyper-Threading), we want to avoid L1
174 * evictions by the processes running on the same package. One
175 * thing we can do is to shuffle the initial stack for them.
176 */
177
178 p = arch_align_stack(p);
179
180 /*
181 * If this architecture has a platform capability string, copy it
182 * to userspace. In some cases (Sparc), this info is impossible
183 * for userspace to get any other way, in others (i386) it is
184 * merely difficult.
185 */
186 u_platform = NULL;
187 if (k_platform) {
188 size_t len = strlen(k_platform) + 1;
189
190 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
191 if (__copy_to_user(u_platform, k_platform, len))
192 return -EFAULT;
193 }
194
195 /*
196 * If this architecture has a "base" platform capability
197 * string, copy it to userspace.
198 */
199 u_base_platform = NULL;
200 if (k_base_platform) {
201 size_t len = strlen(k_base_platform) + 1;
202
203 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
204 if (__copy_to_user(u_base_platform, k_base_platform, len))
205 return -EFAULT;
206 }
207
208 /*
209 * Generate 16 random bytes for userspace PRNG seeding.
210 */
211 get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
212 u_rand_bytes = (elf_addr_t __user *)
213 STACK_ALLOC(p, sizeof(k_rand_bytes));
214 if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
215 return -EFAULT;
216
217 /* Create the ELF interpreter info */
218 elf_info = (elf_addr_t *)current->mm->saved_auxv;
219 /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
220 #define NEW_AUX_ENT(id, val) \
221 do { \
222 elf_info[ei_index++] = id; \
223 elf_info[ei_index++] = val; \
224 } while (0)
225
226 #ifdef ARCH_DLINFO
227 /*
228 * ARCH_DLINFO must come first so PPC can do its special alignment of
229 * AUXV.
230 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
231 * ARCH_DLINFO changes
232 */
233 ARCH_DLINFO;
234 #endif
235 NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
236 NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
237 NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
238 NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
239 NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
240 NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
241 NEW_AUX_ENT(AT_BASE, interp_load_addr);
242 NEW_AUX_ENT(AT_FLAGS, 0);
243 NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
244 NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
245 NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
246 NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
247 NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
248 NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
249 NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
250 #ifdef ELF_HWCAP2
251 NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
252 #endif
253 NEW_AUX_ENT(AT_EXECFN, bprm->exec);
254 if (k_platform) {
255 NEW_AUX_ENT(AT_PLATFORM,
256 (elf_addr_t)(unsigned long)u_platform);
257 }
258 if (k_base_platform) {
259 NEW_AUX_ENT(AT_BASE_PLATFORM,
260 (elf_addr_t)(unsigned long)u_base_platform);
261 }
262 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
263 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
264 }
265 #undef NEW_AUX_ENT
266 /* AT_NULL is zero; clear the rest too */
267 memset(&elf_info[ei_index], 0,
268 sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
269
270 /* And advance past the AT_NULL entry. */
271 ei_index += 2;
272
273 sp = STACK_ADD(p, ei_index);
274
275 items = (argc + 1) + (envc + 1) + 1;
276 bprm->p = STACK_ROUND(sp, items);
277
278 /* Point sp at the lowest address on the stack */
279 #ifdef CONFIG_STACK_GROWSUP
280 sp = (elf_addr_t __user *)bprm->p - items - ei_index;
281 bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
282 #else
283 sp = (elf_addr_t __user *)bprm->p;
284 #endif
285
286
287 /*
288 * Grow the stack manually; some architectures have a limit on how
289 * far ahead a user-space access may be in order to grow the stack.
290 */
291 vma = find_extend_vma(current->mm, bprm->p);
292 if (!vma)
293 return -EFAULT;
294
295 /* Now, let's put argc (and argv, envp if appropriate) on the stack */
296 if (__put_user(argc, sp++))
297 return -EFAULT;
298 argv = sp;
299 envp = argv + argc + 1;
300
301 /* Populate argv and envp */
302 p = current->mm->arg_end = current->mm->arg_start;
303 while (argc-- > 0) {
304 size_t len;
305 if (__put_user((elf_addr_t)p, argv++))
306 return -EFAULT;
307 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
308 if (!len || len > MAX_ARG_STRLEN)
309 return -EINVAL;
310 p += len;
311 }
312 if (__put_user(0, argv))
313 return -EFAULT;
314 current->mm->arg_end = current->mm->env_start = p;
315 while (envc-- > 0) {
316 size_t len;
317 if (__put_user((elf_addr_t)p, envp++))
318 return -EFAULT;
319 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
320 if (!len || len > MAX_ARG_STRLEN)
321 return -EINVAL;
322 p += len;
323 }
324 if (__put_user(0, envp))
325 return -EFAULT;
326 current->mm->env_end = p;
327
328 /* Put the elf_info on the stack in the right place. */
329 sp = (elf_addr_t __user *)envp + 1;
330 if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
331 return -EFAULT;
332 return 0;
333 }
334
335 #ifndef elf_map
336
elf_map(struct file * filep,unsigned long addr,struct elf_phdr * eppnt,int prot,int type,unsigned long total_size)337 static unsigned long elf_map(struct file *filep, unsigned long addr,
338 struct elf_phdr *eppnt, int prot, int type,
339 unsigned long total_size)
340 {
341 unsigned long map_addr;
342 unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
343 unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
344 addr = ELF_PAGESTART(addr);
345 size = ELF_PAGEALIGN(size);
346
347 /* mmap() will return -EINVAL if given a zero size, but a
348 * segment with zero filesize is perfectly valid */
349 if (!size)
350 return addr;
351
352 /*
353 * total_size is the size of the ELF (interpreter) image.
354 * The _first_ mmap needs to know the full size, otherwise
355 * randomization might put this image into an overlapping
356 * position with the ELF binary image. (since size < total_size)
357 * So we first map the 'big' image - and unmap the remainder at
358 * the end. (which unmap is needed for ELF images with holes.)
359 */
360 if (total_size) {
361 total_size = ELF_PAGEALIGN(total_size);
362 map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
363 if (!BAD_ADDR(map_addr))
364 vm_munmap(map_addr+size, total_size-size);
365 } else
366 map_addr = vm_mmap(filep, addr, size, prot, type, off);
367
368 return(map_addr);
369 }
370
371 #endif /* !elf_map */
372
total_mapping_size(struct elf_phdr * cmds,int nr)373 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
374 {
375 int i, first_idx = -1, last_idx = -1;
376
377 for (i = 0; i < nr; i++) {
378 if (cmds[i].p_type == PT_LOAD) {
379 last_idx = i;
380 if (first_idx == -1)
381 first_idx = i;
382 }
383 }
384 if (first_idx == -1)
385 return 0;
386
387 return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
388 ELF_PAGESTART(cmds[first_idx].p_vaddr);
389 }
390
391 /**
392 * load_elf_phdrs() - load ELF program headers
393 * @elf_ex: ELF header of the binary whose program headers should be loaded
394 * @elf_file: the opened ELF binary file
395 *
396 * Loads ELF program headers from the binary file elf_file, which has the ELF
397 * header pointed to by elf_ex, into a newly allocated array. The caller is
398 * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
399 */
load_elf_phdrs(struct elfhdr * elf_ex,struct file * elf_file)400 static struct elf_phdr *load_elf_phdrs(struct elfhdr *elf_ex,
401 struct file *elf_file)
402 {
403 struct elf_phdr *elf_phdata = NULL;
404 int retval, size, err = -1;
405
406 /*
407 * If the size of this structure has changed, then punt, since
408 * we will be doing the wrong thing.
409 */
410 if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
411 goto out;
412
413 /* Sanity check the number of program headers... */
414 if (elf_ex->e_phnum < 1 ||
415 elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
416 goto out;
417
418 /* ...and their total size. */
419 size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
420 if (size > ELF_MIN_ALIGN)
421 goto out;
422
423 elf_phdata = kmalloc(size, GFP_KERNEL);
424 if (!elf_phdata)
425 goto out;
426
427 /* Read in the program headers */
428 retval = kernel_read(elf_file, elf_ex->e_phoff,
429 (char *)elf_phdata, size);
430 if (retval != size) {
431 err = (retval < 0) ? retval : -EIO;
432 goto out;
433 }
434
435 /* Success! */
436 err = 0;
437 out:
438 if (err) {
439 kfree(elf_phdata);
440 elf_phdata = NULL;
441 }
442 return elf_phdata;
443 }
444
445 #ifndef CONFIG_ARCH_BINFMT_ELF_STATE
446
447 /**
448 * struct arch_elf_state - arch-specific ELF loading state
449 *
450 * This structure is used to preserve architecture specific data during
451 * the loading of an ELF file, throughout the checking of architecture
452 * specific ELF headers & through to the point where the ELF load is
453 * known to be proceeding (ie. SET_PERSONALITY).
454 *
455 * This implementation is a dummy for architectures which require no
456 * specific state.
457 */
458 struct arch_elf_state {
459 };
460
461 #define INIT_ARCH_ELF_STATE {}
462
463 /**
464 * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
465 * @ehdr: The main ELF header
466 * @phdr: The program header to check
467 * @elf: The open ELF file
468 * @is_interp: True if the phdr is from the interpreter of the ELF being
469 * loaded, else false.
470 * @state: Architecture-specific state preserved throughout the process
471 * of loading the ELF.
472 *
473 * Inspects the program header phdr to validate its correctness and/or
474 * suitability for the system. Called once per ELF program header in the
475 * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
476 * interpreter.
477 *
478 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
479 * with that return code.
480 */
arch_elf_pt_proc(struct elfhdr * ehdr,struct elf_phdr * phdr,struct file * elf,bool is_interp,struct arch_elf_state * state)481 static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
482 struct elf_phdr *phdr,
483 struct file *elf, bool is_interp,
484 struct arch_elf_state *state)
485 {
486 /* Dummy implementation, always proceed */
487 return 0;
488 }
489
490 /**
491 * arch_check_elf() - check an ELF executable
492 * @ehdr: The main ELF header
493 * @has_interp: True if the ELF has an interpreter, else false.
494 * @interp_ehdr: The interpreter's ELF header
495 * @state: Architecture-specific state preserved throughout the process
496 * of loading the ELF.
497 *
498 * Provides a final opportunity for architecture code to reject the loading
499 * of the ELF & cause an exec syscall to return an error. This is called after
500 * all program headers to be checked by arch_elf_pt_proc have been.
501 *
502 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
503 * with that return code.
504 */
arch_check_elf(struct elfhdr * ehdr,bool has_interp,struct elfhdr * interp_ehdr,struct arch_elf_state * state)505 static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
506 struct elfhdr *interp_ehdr,
507 struct arch_elf_state *state)
508 {
509 /* Dummy implementation, always proceed */
510 return 0;
511 }
512
513 #endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
514
515 /* This is much more generalized than the library routine read function,
516 so we keep this separate. Technically the library read function
517 is only provided so that we can read a.out libraries that have
518 an ELF header */
519
load_elf_interp(struct elfhdr * interp_elf_ex,struct file * interpreter,unsigned long * interp_map_addr,unsigned long no_base,struct elf_phdr * interp_elf_phdata)520 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
521 struct file *interpreter, unsigned long *interp_map_addr,
522 unsigned long no_base, struct elf_phdr *interp_elf_phdata)
523 {
524 struct elf_phdr *eppnt;
525 unsigned long load_addr = 0;
526 int load_addr_set = 0;
527 unsigned long last_bss = 0, elf_bss = 0;
528 unsigned long error = ~0UL;
529 unsigned long total_size;
530 int i;
531
532 /* First of all, some simple consistency checks */
533 if (interp_elf_ex->e_type != ET_EXEC &&
534 interp_elf_ex->e_type != ET_DYN)
535 goto out;
536 if (!elf_check_arch(interp_elf_ex))
537 goto out;
538 if (!interpreter->f_op->mmap)
539 goto out;
540
541 total_size = total_mapping_size(interp_elf_phdata,
542 interp_elf_ex->e_phnum);
543 if (!total_size) {
544 error = -EINVAL;
545 goto out;
546 }
547
548 eppnt = interp_elf_phdata;
549 for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
550 if (eppnt->p_type == PT_LOAD) {
551 int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
552 int elf_prot = 0;
553 unsigned long vaddr = 0;
554 unsigned long k, map_addr;
555
556 if (eppnt->p_flags & PF_R)
557 elf_prot = PROT_READ;
558 if (eppnt->p_flags & PF_W)
559 elf_prot |= PROT_WRITE;
560 if (eppnt->p_flags & PF_X)
561 elf_prot |= PROT_EXEC;
562 vaddr = eppnt->p_vaddr;
563 if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
564 elf_type |= MAP_FIXED;
565 else if (no_base && interp_elf_ex->e_type == ET_DYN)
566 load_addr = -vaddr;
567
568 map_addr = elf_map(interpreter, load_addr + vaddr,
569 eppnt, elf_prot, elf_type, total_size);
570 total_size = 0;
571 if (!*interp_map_addr)
572 *interp_map_addr = map_addr;
573 error = map_addr;
574 if (BAD_ADDR(map_addr))
575 goto out;
576
577 if (!load_addr_set &&
578 interp_elf_ex->e_type == ET_DYN) {
579 load_addr = map_addr - ELF_PAGESTART(vaddr);
580 load_addr_set = 1;
581 }
582
583 /*
584 * Check to see if the section's size will overflow the
585 * allowed task size. Note that p_filesz must always be
586 * <= p_memsize so it's only necessary to check p_memsz.
587 */
588 k = load_addr + eppnt->p_vaddr;
589 if (BAD_ADDR(k) ||
590 eppnt->p_filesz > eppnt->p_memsz ||
591 eppnt->p_memsz > TASK_SIZE ||
592 TASK_SIZE - eppnt->p_memsz < k) {
593 error = -ENOMEM;
594 goto out;
595 }
596
597 /*
598 * Find the end of the file mapping for this phdr, and
599 * keep track of the largest address we see for this.
600 */
601 k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
602 if (k > elf_bss)
603 elf_bss = k;
604
605 /*
606 * Do the same thing for the memory mapping - between
607 * elf_bss and last_bss is the bss section.
608 */
609 k = load_addr + eppnt->p_vaddr + eppnt->p_memsz;
610 if (k > last_bss)
611 last_bss = k;
612 }
613 }
614
615 /*
616 * Now fill out the bss section: first pad the last page from
617 * the file up to the page boundary, and zero it from elf_bss
618 * up to the end of the page.
619 */
620 if (padzero(elf_bss)) {
621 error = -EFAULT;
622 goto out;
623 }
624 /*
625 * Next, align both the file and mem bss up to the page size,
626 * since this is where elf_bss was just zeroed up to, and where
627 * last_bss will end after the vm_brk() below.
628 */
629 elf_bss = ELF_PAGEALIGN(elf_bss);
630 last_bss = ELF_PAGEALIGN(last_bss);
631 /* Finally, if there is still more bss to allocate, do it. */
632 if (last_bss > elf_bss) {
633 error = vm_brk(elf_bss, last_bss - elf_bss);
634 if (BAD_ADDR(error))
635 goto out;
636 }
637
638 error = load_addr;
639 out:
640 return error;
641 }
642
643 /*
644 * These are the functions used to load ELF style executables and shared
645 * libraries. There is no binary dependent code anywhere else.
646 */
647
648 #ifndef STACK_RND_MASK
649 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) /* 8MB of VA */
650 #endif
651
randomize_stack_top(unsigned long stack_top)652 static unsigned long randomize_stack_top(unsigned long stack_top)
653 {
654 unsigned long random_variable = 0;
655
656 if ((current->flags & PF_RANDOMIZE) &&
657 !(current->personality & ADDR_NO_RANDOMIZE)) {
658 random_variable = get_random_long();
659 random_variable &= STACK_RND_MASK;
660 random_variable <<= PAGE_SHIFT;
661 }
662 #ifdef CONFIG_STACK_GROWSUP
663 return PAGE_ALIGN(stack_top) + random_variable;
664 #else
665 return PAGE_ALIGN(stack_top) - random_variable;
666 #endif
667 }
668
load_elf_binary(struct linux_binprm * bprm)669 static int load_elf_binary(struct linux_binprm *bprm)
670 {
671 struct file *interpreter = NULL; /* to shut gcc up */
672 unsigned long load_addr = 0, load_bias = 0;
673 int load_addr_set = 0;
674 char * elf_interpreter = NULL;
675 unsigned long error;
676 struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
677 unsigned long elf_bss, elf_brk;
678 int retval, i;
679 unsigned long elf_entry;
680 unsigned long interp_load_addr = 0;
681 unsigned long start_code, end_code, start_data, end_data;
682 unsigned long reloc_func_desc __maybe_unused = 0;
683 int executable_stack = EXSTACK_DEFAULT;
684 struct pt_regs *regs = current_pt_regs();
685 struct {
686 struct elfhdr elf_ex;
687 struct elfhdr interp_elf_ex;
688 } *loc;
689 struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
690
691 loc = kmalloc(sizeof(*loc), GFP_KERNEL);
692 if (!loc) {
693 retval = -ENOMEM;
694 goto out_ret;
695 }
696
697 /* Get the exec-header */
698 loc->elf_ex = *((struct elfhdr *)bprm->buf);
699
700 retval = -ENOEXEC;
701 /* First of all, some simple consistency checks */
702 if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
703 goto out;
704
705 if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
706 goto out;
707 if (!elf_check_arch(&loc->elf_ex))
708 goto out;
709 if (!bprm->file->f_op->mmap)
710 goto out;
711
712 elf_phdata = load_elf_phdrs(&loc->elf_ex, bprm->file);
713 if (!elf_phdata)
714 goto out;
715
716 elf_ppnt = elf_phdata;
717 elf_bss = 0;
718 elf_brk = 0;
719
720 start_code = ~0UL;
721 end_code = 0;
722 start_data = 0;
723 end_data = 0;
724
725 for (i = 0; i < loc->elf_ex.e_phnum; i++) {
726 if (elf_ppnt->p_type == PT_INTERP) {
727 /* This is the program interpreter used for
728 * shared libraries - for now assume that this
729 * is an a.out format binary
730 */
731 retval = -ENOEXEC;
732 if (elf_ppnt->p_filesz > PATH_MAX ||
733 elf_ppnt->p_filesz < 2)
734 goto out_free_ph;
735
736 retval = -ENOMEM;
737 elf_interpreter = kmalloc(elf_ppnt->p_filesz,
738 GFP_KERNEL);
739 if (!elf_interpreter)
740 goto out_free_ph;
741
742 retval = kernel_read(bprm->file, elf_ppnt->p_offset,
743 elf_interpreter,
744 elf_ppnt->p_filesz);
745 if (retval != elf_ppnt->p_filesz) {
746 if (retval >= 0)
747 retval = -EIO;
748 goto out_free_interp;
749 }
750 /* make sure path is NULL terminated */
751 retval = -ENOEXEC;
752 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
753 goto out_free_interp;
754
755 interpreter = open_exec(elf_interpreter);
756 retval = PTR_ERR(interpreter);
757 if (IS_ERR(interpreter))
758 goto out_free_interp;
759
760 /*
761 * If the binary is not readable then enforce
762 * mm->dumpable = 0 regardless of the interpreter's
763 * permissions.
764 */
765 would_dump(bprm, interpreter);
766
767 /* Get the exec headers */
768 retval = kernel_read(interpreter, 0,
769 (void *)&loc->interp_elf_ex,
770 sizeof(loc->interp_elf_ex));
771 if (retval != sizeof(loc->interp_elf_ex)) {
772 if (retval >= 0)
773 retval = -EIO;
774 goto out_free_dentry;
775 }
776
777 break;
778 }
779 elf_ppnt++;
780 }
781
782 elf_ppnt = elf_phdata;
783 for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
784 switch (elf_ppnt->p_type) {
785 case PT_GNU_STACK:
786 if (elf_ppnt->p_flags & PF_X)
787 executable_stack = EXSTACK_ENABLE_X;
788 else
789 executable_stack = EXSTACK_DISABLE_X;
790 break;
791
792 case PT_LOPROC ... PT_HIPROC:
793 retval = arch_elf_pt_proc(&loc->elf_ex, elf_ppnt,
794 bprm->file, false,
795 &arch_state);
796 if (retval)
797 goto out_free_dentry;
798 break;
799 }
800
801 /* Some simple consistency checks for the interpreter */
802 if (elf_interpreter) {
803 retval = -ELIBBAD;
804 /* Not an ELF interpreter */
805 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
806 goto out_free_dentry;
807 /* Verify the interpreter has a valid arch */
808 if (!elf_check_arch(&loc->interp_elf_ex))
809 goto out_free_dentry;
810
811 /* Load the interpreter program headers */
812 interp_elf_phdata = load_elf_phdrs(&loc->interp_elf_ex,
813 interpreter);
814 if (!interp_elf_phdata)
815 goto out_free_dentry;
816
817 /* Pass PT_LOPROC..PT_HIPROC headers to arch code */
818 elf_ppnt = interp_elf_phdata;
819 for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++)
820 switch (elf_ppnt->p_type) {
821 case PT_LOPROC ... PT_HIPROC:
822 retval = arch_elf_pt_proc(&loc->interp_elf_ex,
823 elf_ppnt, interpreter,
824 true, &arch_state);
825 if (retval)
826 goto out_free_dentry;
827 break;
828 }
829 }
830
831 /*
832 * Allow arch code to reject the ELF at this point, whilst it's
833 * still possible to return an error to the code that invoked
834 * the exec syscall.
835 */
836 retval = arch_check_elf(&loc->elf_ex,
837 !!interpreter, &loc->interp_elf_ex,
838 &arch_state);
839 if (retval)
840 goto out_free_dentry;
841
842 /* Flush all traces of the currently running executable */
843 retval = flush_old_exec(bprm);
844 if (retval)
845 goto out_free_dentry;
846
847 /* Do this immediately, since STACK_TOP as used in setup_arg_pages
848 may depend on the personality. */
849 SET_PERSONALITY2(loc->elf_ex, &arch_state);
850 if (elf_read_implies_exec(loc->elf_ex, executable_stack))
851 current->personality |= READ_IMPLIES_EXEC;
852
853 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
854 current->flags |= PF_RANDOMIZE;
855
856 setup_new_exec(bprm);
857 install_exec_creds(bprm);
858
859 /* Do this so that we can load the interpreter, if need be. We will
860 change some of these later */
861 retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
862 executable_stack);
863 if (retval < 0)
864 goto out_free_dentry;
865
866 current->mm->start_stack = bprm->p;
867
868 /* Now we do a little grungy work by mmapping the ELF image into
869 the correct location in memory. */
870 for(i = 0, elf_ppnt = elf_phdata;
871 i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
872 int elf_prot = 0, elf_flags;
873 unsigned long k, vaddr;
874 unsigned long total_size = 0;
875
876 if (elf_ppnt->p_type != PT_LOAD)
877 continue;
878
879 if (unlikely (elf_brk > elf_bss)) {
880 unsigned long nbyte;
881
882 /* There was a PT_LOAD segment with p_memsz > p_filesz
883 before this one. Map anonymous pages, if needed,
884 and clear the area. */
885 retval = set_brk(elf_bss + load_bias,
886 elf_brk + load_bias);
887 if (retval)
888 goto out_free_dentry;
889 nbyte = ELF_PAGEOFFSET(elf_bss);
890 if (nbyte) {
891 nbyte = ELF_MIN_ALIGN - nbyte;
892 if (nbyte > elf_brk - elf_bss)
893 nbyte = elf_brk - elf_bss;
894 if (clear_user((void __user *)elf_bss +
895 load_bias, nbyte)) {
896 /*
897 * This bss-zeroing can fail if the ELF
898 * file specifies odd protections. So
899 * we don't check the return value
900 */
901 }
902 }
903 }
904
905 if (elf_ppnt->p_flags & PF_R)
906 elf_prot |= PROT_READ;
907 if (elf_ppnt->p_flags & PF_W)
908 elf_prot |= PROT_WRITE;
909 if (elf_ppnt->p_flags & PF_X)
910 elf_prot |= PROT_EXEC;
911
912 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
913
914 vaddr = elf_ppnt->p_vaddr;
915 /*
916 * If we are loading ET_EXEC or we have already performed
917 * the ET_DYN load_addr calculations, proceed normally.
918 */
919 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
920 elf_flags |= MAP_FIXED;
921 } else if (loc->elf_ex.e_type == ET_DYN) {
922 /*
923 * This logic is run once for the first LOAD Program
924 * Header for ET_DYN binaries to calculate the
925 * randomization (load_bias) for all the LOAD
926 * Program Headers, and to calculate the entire
927 * size of the ELF mapping (total_size). (Note that
928 * load_addr_set is set to true later once the
929 * initial mapping is performed.)
930 *
931 * There are effectively two types of ET_DYN
932 * binaries: programs (i.e. PIE: ET_DYN with INTERP)
933 * and loaders (ET_DYN without INTERP, since they
934 * _are_ the ELF interpreter). The loaders must
935 * be loaded away from programs since the program
936 * may otherwise collide with the loader (especially
937 * for ET_EXEC which does not have a randomized
938 * position). For example to handle invocations of
939 * "./ld.so someprog" to test out a new version of
940 * the loader, the subsequent program that the
941 * loader loads must avoid the loader itself, so
942 * they cannot share the same load range. Sufficient
943 * room for the brk must be allocated with the
944 * loader as well, since brk must be available with
945 * the loader.
946 *
947 * Therefore, programs are loaded offset from
948 * ELF_ET_DYN_BASE and loaders are loaded into the
949 * independently randomized mmap region (0 load_bias
950 * without MAP_FIXED).
951 */
952 if (elf_interpreter) {
953 load_bias = ELF_ET_DYN_BASE;
954 if (current->flags & PF_RANDOMIZE)
955 load_bias += arch_mmap_rnd();
956 elf_flags |= MAP_FIXED;
957 } else
958 load_bias = 0;
959
960 /*
961 * Since load_bias is used for all subsequent loading
962 * calculations, we must lower it by the first vaddr
963 * so that the remaining calculations based on the
964 * ELF vaddrs will be correctly offset. The result
965 * is then page aligned.
966 */
967 load_bias = ELF_PAGESTART(load_bias - vaddr);
968
969 total_size = total_mapping_size(elf_phdata,
970 loc->elf_ex.e_phnum);
971 if (!total_size) {
972 retval = -EINVAL;
973 goto out_free_dentry;
974 }
975 }
976
977 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
978 elf_prot, elf_flags, total_size);
979 if (BAD_ADDR(error)) {
980 retval = IS_ERR((void *)error) ?
981 PTR_ERR((void*)error) : -EINVAL;
982 goto out_free_dentry;
983 }
984
985 if (!load_addr_set) {
986 load_addr_set = 1;
987 load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
988 if (loc->elf_ex.e_type == ET_DYN) {
989 load_bias += error -
990 ELF_PAGESTART(load_bias + vaddr);
991 load_addr += load_bias;
992 reloc_func_desc = load_bias;
993 }
994 }
995 k = elf_ppnt->p_vaddr;
996 if (k < start_code)
997 start_code = k;
998 if (start_data < k)
999 start_data = k;
1000
1001 /*
1002 * Check to see if the section's size will overflow the
1003 * allowed task size. Note that p_filesz must always be
1004 * <= p_memsz so it is only necessary to check p_memsz.
1005 */
1006 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
1007 elf_ppnt->p_memsz > TASK_SIZE ||
1008 TASK_SIZE - elf_ppnt->p_memsz < k) {
1009 /* set_brk can never work. Avoid overflows. */
1010 retval = -EINVAL;
1011 goto out_free_dentry;
1012 }
1013
1014 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
1015
1016 if (k > elf_bss)
1017 elf_bss = k;
1018 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
1019 end_code = k;
1020 if (end_data < k)
1021 end_data = k;
1022 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
1023 if (k > elf_brk)
1024 elf_brk = k;
1025 }
1026
1027 loc->elf_ex.e_entry += load_bias;
1028 elf_bss += load_bias;
1029 elf_brk += load_bias;
1030 start_code += load_bias;
1031 end_code += load_bias;
1032 start_data += load_bias;
1033 end_data += load_bias;
1034
1035 /* Calling set_brk effectively mmaps the pages that we need
1036 * for the bss and break sections. We must do this before
1037 * mapping in the interpreter, to make sure it doesn't wind
1038 * up getting placed where the bss needs to go.
1039 */
1040 retval = set_brk(elf_bss, elf_brk);
1041 if (retval)
1042 goto out_free_dentry;
1043 if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
1044 retval = -EFAULT; /* Nobody gets to see this, but.. */
1045 goto out_free_dentry;
1046 }
1047
1048 if (elf_interpreter) {
1049 unsigned long interp_map_addr = 0;
1050
1051 elf_entry = load_elf_interp(&loc->interp_elf_ex,
1052 interpreter,
1053 &interp_map_addr,
1054 load_bias, interp_elf_phdata);
1055 if (!IS_ERR((void *)elf_entry)) {
1056 /*
1057 * load_elf_interp() returns relocation
1058 * adjustment
1059 */
1060 interp_load_addr = elf_entry;
1061 elf_entry += loc->interp_elf_ex.e_entry;
1062 }
1063 if (BAD_ADDR(elf_entry)) {
1064 retval = IS_ERR((void *)elf_entry) ?
1065 (int)elf_entry : -EINVAL;
1066 goto out_free_dentry;
1067 }
1068 reloc_func_desc = interp_load_addr;
1069
1070 allow_write_access(interpreter);
1071 fput(interpreter);
1072 kfree(elf_interpreter);
1073 } else {
1074 elf_entry = loc->elf_ex.e_entry;
1075 if (BAD_ADDR(elf_entry)) {
1076 retval = -EINVAL;
1077 goto out_free_dentry;
1078 }
1079 }
1080
1081 kfree(interp_elf_phdata);
1082 kfree(elf_phdata);
1083
1084 set_binfmt(&elf_format);
1085
1086 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1087 retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
1088 if (retval < 0)
1089 goto out;
1090 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1091
1092 retval = create_elf_tables(bprm, &loc->elf_ex,
1093 load_addr, interp_load_addr);
1094 if (retval < 0)
1095 goto out;
1096 /* N.B. passed_fileno might not be initialized? */
1097 current->mm->end_code = end_code;
1098 current->mm->start_code = start_code;
1099 current->mm->start_data = start_data;
1100 current->mm->end_data = end_data;
1101 current->mm->start_stack = bprm->p;
1102
1103 if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1104 /*
1105 * For architectures with ELF randomization, when executing
1106 * a loader directly (i.e. no interpreter listed in ELF
1107 * headers), move the brk area out of the mmap region
1108 * (since it grows up, and may collide early with the stack
1109 * growing down), and into the unused ELF_ET_DYN_BASE region.
1110 */
1111 if (IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) &&
1112 loc->elf_ex.e_type == ET_DYN && !interpreter)
1113 current->mm->brk = current->mm->start_brk =
1114 ELF_ET_DYN_BASE;
1115
1116 current->mm->brk = current->mm->start_brk =
1117 arch_randomize_brk(current->mm);
1118 #ifdef compat_brk_randomized
1119 current->brk_randomized = 1;
1120 #endif
1121 }
1122
1123 if (current->personality & MMAP_PAGE_ZERO) {
1124 /* Why this, you ask??? Well SVr4 maps page 0 as read-only,
1125 and some applications "depend" upon this behavior.
1126 Since we do not have the power to recompile these, we
1127 emulate the SVr4 behavior. Sigh. */
1128 error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1129 MAP_FIXED | MAP_PRIVATE, 0);
1130 }
1131
1132 #ifdef ELF_PLAT_INIT
1133 /*
1134 * The ABI may specify that certain registers be set up in special
1135 * ways (on i386 %edx is the address of a DT_FINI function, for
1136 * example. In addition, it may also specify (eg, PowerPC64 ELF)
1137 * that the e_entry field is the address of the function descriptor
1138 * for the startup routine, rather than the address of the startup
1139 * routine itself. This macro performs whatever initialization to
1140 * the regs structure is required as well as any relocations to the
1141 * function descriptor entries when executing dynamically links apps.
1142 */
1143 ELF_PLAT_INIT(regs, reloc_func_desc);
1144 #endif
1145
1146 start_thread(regs, elf_entry, bprm->p);
1147 retval = 0;
1148 out:
1149 kfree(loc);
1150 out_ret:
1151 return retval;
1152
1153 /* error cleanup */
1154 out_free_dentry:
1155 kfree(interp_elf_phdata);
1156 allow_write_access(interpreter);
1157 if (interpreter)
1158 fput(interpreter);
1159 out_free_interp:
1160 kfree(elf_interpreter);
1161 out_free_ph:
1162 kfree(elf_phdata);
1163 goto out;
1164 }
1165
1166 #ifdef CONFIG_USELIB
1167 /* This is really simpleminded and specialized - we are loading an
1168 a.out library that is given an ELF header. */
load_elf_library(struct file * file)1169 static int load_elf_library(struct file *file)
1170 {
1171 struct elf_phdr *elf_phdata;
1172 struct elf_phdr *eppnt;
1173 unsigned long elf_bss, bss, len;
1174 int retval, error, i, j;
1175 struct elfhdr elf_ex;
1176
1177 error = -ENOEXEC;
1178 retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1179 if (retval != sizeof(elf_ex))
1180 goto out;
1181
1182 if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1183 goto out;
1184
1185 /* First of all, some simple consistency checks */
1186 if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1187 !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1188 goto out;
1189
1190 /* Now read in all of the header information */
1191
1192 j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1193 /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1194
1195 error = -ENOMEM;
1196 elf_phdata = kmalloc(j, GFP_KERNEL);
1197 if (!elf_phdata)
1198 goto out;
1199
1200 eppnt = elf_phdata;
1201 error = -ENOEXEC;
1202 retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1203 if (retval != j)
1204 goto out_free_ph;
1205
1206 for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1207 if ((eppnt + i)->p_type == PT_LOAD)
1208 j++;
1209 if (j != 1)
1210 goto out_free_ph;
1211
1212 while (eppnt->p_type != PT_LOAD)
1213 eppnt++;
1214
1215 /* Now use mmap to map the library into memory. */
1216 error = vm_mmap(file,
1217 ELF_PAGESTART(eppnt->p_vaddr),
1218 (eppnt->p_filesz +
1219 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1220 PROT_READ | PROT_WRITE | PROT_EXEC,
1221 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1222 (eppnt->p_offset -
1223 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1224 if (error != ELF_PAGESTART(eppnt->p_vaddr))
1225 goto out_free_ph;
1226
1227 elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1228 if (padzero(elf_bss)) {
1229 error = -EFAULT;
1230 goto out_free_ph;
1231 }
1232
1233 len = ELF_PAGEALIGN(eppnt->p_filesz + eppnt->p_vaddr);
1234 bss = ELF_PAGEALIGN(eppnt->p_memsz + eppnt->p_vaddr);
1235 if (bss > len) {
1236 error = vm_brk(len, bss - len);
1237 if (BAD_ADDR(error))
1238 goto out_free_ph;
1239 }
1240 error = 0;
1241
1242 out_free_ph:
1243 kfree(elf_phdata);
1244 out:
1245 return error;
1246 }
1247 #endif /* #ifdef CONFIG_USELIB */
1248
1249 #ifdef CONFIG_ELF_CORE
1250 /*
1251 * ELF core dumper
1252 *
1253 * Modelled on fs/exec.c:aout_core_dump()
1254 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1255 */
1256
1257 /*
1258 * The purpose of always_dump_vma() is to make sure that special kernel mappings
1259 * that are useful for post-mortem analysis are included in every core dump.
1260 * In that way we ensure that the core dump is fully interpretable later
1261 * without matching up the same kernel and hardware config to see what PC values
1262 * meant. These special mappings include - vDSO, vsyscall, and other
1263 * architecture specific mappings
1264 */
always_dump_vma(struct vm_area_struct * vma)1265 static bool always_dump_vma(struct vm_area_struct *vma)
1266 {
1267 /* Any vsyscall mappings? */
1268 if (vma == get_gate_vma(vma->vm_mm))
1269 return true;
1270
1271 /*
1272 * Assume that all vmas with a .name op should always be dumped.
1273 * If this changes, a new vm_ops field can easily be added.
1274 */
1275 if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
1276 return true;
1277
1278 /*
1279 * arch_vma_name() returns non-NULL for special architecture mappings,
1280 * such as vDSO sections.
1281 */
1282 if (arch_vma_name(vma))
1283 return true;
1284
1285 return false;
1286 }
1287
1288 /*
1289 * Decide what to dump of a segment, part, all or none.
1290 */
vma_dump_size(struct vm_area_struct * vma,unsigned long mm_flags)1291 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1292 unsigned long mm_flags)
1293 {
1294 #define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type))
1295
1296 /* always dump the vdso and vsyscall sections */
1297 if (always_dump_vma(vma))
1298 goto whole;
1299
1300 if (vma->vm_flags & VM_DONTDUMP)
1301 return 0;
1302
1303 /* support for DAX */
1304 if (vma_is_dax(vma)) {
1305 if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED))
1306 goto whole;
1307 if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE))
1308 goto whole;
1309 return 0;
1310 }
1311
1312 /* Hugetlb memory check */
1313 if (vma->vm_flags & VM_HUGETLB) {
1314 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1315 goto whole;
1316 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1317 goto whole;
1318 return 0;
1319 }
1320
1321 /* Do not dump I/O mapped devices or special mappings */
1322 if (vma->vm_flags & VM_IO)
1323 return 0;
1324
1325 /* By default, dump shared memory if mapped from an anonymous file. */
1326 if (vma->vm_flags & VM_SHARED) {
1327 if (file_inode(vma->vm_file)->i_nlink == 0 ?
1328 FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1329 goto whole;
1330 return 0;
1331 }
1332
1333 /* Dump segments that have been written to. */
1334 if (vma->anon_vma && FILTER(ANON_PRIVATE))
1335 goto whole;
1336 if (vma->vm_file == NULL)
1337 return 0;
1338
1339 if (FILTER(MAPPED_PRIVATE))
1340 goto whole;
1341
1342 /*
1343 * If this looks like the beginning of a DSO or executable mapping,
1344 * check for an ELF header. If we find one, dump the first page to
1345 * aid in determining what was mapped here.
1346 */
1347 if (FILTER(ELF_HEADERS) &&
1348 vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1349 u32 __user *header = (u32 __user *) vma->vm_start;
1350 u32 word;
1351 mm_segment_t fs = get_fs();
1352 /*
1353 * Doing it this way gets the constant folded by GCC.
1354 */
1355 union {
1356 u32 cmp;
1357 char elfmag[SELFMAG];
1358 } magic;
1359 BUILD_BUG_ON(SELFMAG != sizeof word);
1360 magic.elfmag[EI_MAG0] = ELFMAG0;
1361 magic.elfmag[EI_MAG1] = ELFMAG1;
1362 magic.elfmag[EI_MAG2] = ELFMAG2;
1363 magic.elfmag[EI_MAG3] = ELFMAG3;
1364 /*
1365 * Switch to the user "segment" for get_user(),
1366 * then put back what elf_core_dump() had in place.
1367 */
1368 set_fs(USER_DS);
1369 if (unlikely(get_user(word, header)))
1370 word = 0;
1371 set_fs(fs);
1372 if (word == magic.cmp)
1373 return PAGE_SIZE;
1374 }
1375
1376 #undef FILTER
1377
1378 return 0;
1379
1380 whole:
1381 return vma->vm_end - vma->vm_start;
1382 }
1383
1384 /* An ELF note in memory */
1385 struct memelfnote
1386 {
1387 const char *name;
1388 int type;
1389 unsigned int datasz;
1390 void *data;
1391 };
1392
notesize(struct memelfnote * en)1393 static int notesize(struct memelfnote *en)
1394 {
1395 int sz;
1396
1397 sz = sizeof(struct elf_note);
1398 sz += roundup(strlen(en->name) + 1, 4);
1399 sz += roundup(en->datasz, 4);
1400
1401 return sz;
1402 }
1403
writenote(struct memelfnote * men,struct coredump_params * cprm)1404 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1405 {
1406 struct elf_note en;
1407 en.n_namesz = strlen(men->name) + 1;
1408 en.n_descsz = men->datasz;
1409 en.n_type = men->type;
1410
1411 return dump_emit(cprm, &en, sizeof(en)) &&
1412 dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1413 dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1414 }
1415
fill_elf_header(struct elfhdr * elf,int segs,u16 machine,u32 flags)1416 static void fill_elf_header(struct elfhdr *elf, int segs,
1417 u16 machine, u32 flags)
1418 {
1419 memset(elf, 0, sizeof(*elf));
1420
1421 memcpy(elf->e_ident, ELFMAG, SELFMAG);
1422 elf->e_ident[EI_CLASS] = ELF_CLASS;
1423 elf->e_ident[EI_DATA] = ELF_DATA;
1424 elf->e_ident[EI_VERSION] = EV_CURRENT;
1425 elf->e_ident[EI_OSABI] = ELF_OSABI;
1426
1427 elf->e_type = ET_CORE;
1428 elf->e_machine = machine;
1429 elf->e_version = EV_CURRENT;
1430 elf->e_phoff = sizeof(struct elfhdr);
1431 elf->e_flags = flags;
1432 elf->e_ehsize = sizeof(struct elfhdr);
1433 elf->e_phentsize = sizeof(struct elf_phdr);
1434 elf->e_phnum = segs;
1435
1436 return;
1437 }
1438
fill_elf_note_phdr(struct elf_phdr * phdr,int sz,loff_t offset)1439 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1440 {
1441 phdr->p_type = PT_NOTE;
1442 phdr->p_offset = offset;
1443 phdr->p_vaddr = 0;
1444 phdr->p_paddr = 0;
1445 phdr->p_filesz = sz;
1446 phdr->p_memsz = 0;
1447 phdr->p_flags = 0;
1448 phdr->p_align = 0;
1449 return;
1450 }
1451
fill_note(struct memelfnote * note,const char * name,int type,unsigned int sz,void * data)1452 static void fill_note(struct memelfnote *note, const char *name, int type,
1453 unsigned int sz, void *data)
1454 {
1455 note->name = name;
1456 note->type = type;
1457 note->datasz = sz;
1458 note->data = data;
1459 return;
1460 }
1461
1462 /*
1463 * fill up all the fields in prstatus from the given task struct, except
1464 * registers which need to be filled up separately.
1465 */
fill_prstatus(struct elf_prstatus * prstatus,struct task_struct * p,long signr)1466 static void fill_prstatus(struct elf_prstatus *prstatus,
1467 struct task_struct *p, long signr)
1468 {
1469 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1470 prstatus->pr_sigpend = p->pending.signal.sig[0];
1471 prstatus->pr_sighold = p->blocked.sig[0];
1472 rcu_read_lock();
1473 prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1474 rcu_read_unlock();
1475 prstatus->pr_pid = task_pid_vnr(p);
1476 prstatus->pr_pgrp = task_pgrp_vnr(p);
1477 prstatus->pr_sid = task_session_vnr(p);
1478 if (thread_group_leader(p)) {
1479 struct task_cputime cputime;
1480
1481 /*
1482 * This is the record for the group leader. It shows the
1483 * group-wide total, not its individual thread total.
1484 */
1485 thread_group_cputime(p, &cputime);
1486 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1487 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1488 } else {
1489 cputime_t utime, stime;
1490
1491 task_cputime(p, &utime, &stime);
1492 cputime_to_timeval(utime, &prstatus->pr_utime);
1493 cputime_to_timeval(stime, &prstatus->pr_stime);
1494 }
1495 cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1496 cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1497 }
1498
fill_psinfo(struct elf_prpsinfo * psinfo,struct task_struct * p,struct mm_struct * mm)1499 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1500 struct mm_struct *mm)
1501 {
1502 const struct cred *cred;
1503 unsigned int i, len;
1504
1505 /* first copy the parameters from user space */
1506 memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1507
1508 len = mm->arg_end - mm->arg_start;
1509 if (len >= ELF_PRARGSZ)
1510 len = ELF_PRARGSZ-1;
1511 if (copy_from_user(&psinfo->pr_psargs,
1512 (const char __user *)mm->arg_start, len))
1513 return -EFAULT;
1514 for(i = 0; i < len; i++)
1515 if (psinfo->pr_psargs[i] == 0)
1516 psinfo->pr_psargs[i] = ' ';
1517 psinfo->pr_psargs[len] = 0;
1518
1519 rcu_read_lock();
1520 psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1521 rcu_read_unlock();
1522 psinfo->pr_pid = task_pid_vnr(p);
1523 psinfo->pr_pgrp = task_pgrp_vnr(p);
1524 psinfo->pr_sid = task_session_vnr(p);
1525
1526 i = p->state ? ffz(~p->state) + 1 : 0;
1527 psinfo->pr_state = i;
1528 psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1529 psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1530 psinfo->pr_nice = task_nice(p);
1531 psinfo->pr_flag = p->flags;
1532 rcu_read_lock();
1533 cred = __task_cred(p);
1534 SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1535 SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1536 rcu_read_unlock();
1537 strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1538
1539 return 0;
1540 }
1541
fill_auxv_note(struct memelfnote * note,struct mm_struct * mm)1542 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1543 {
1544 elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1545 int i = 0;
1546 do
1547 i += 2;
1548 while (auxv[i - 2] != AT_NULL);
1549 fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1550 }
1551
fill_siginfo_note(struct memelfnote * note,user_siginfo_t * csigdata,const siginfo_t * siginfo)1552 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1553 const siginfo_t *siginfo)
1554 {
1555 mm_segment_t old_fs = get_fs();
1556 set_fs(KERNEL_DS);
1557 copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1558 set_fs(old_fs);
1559 fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1560 }
1561
1562 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1563 /*
1564 * Format of NT_FILE note:
1565 *
1566 * long count -- how many files are mapped
1567 * long page_size -- units for file_ofs
1568 * array of [COUNT] elements of
1569 * long start
1570 * long end
1571 * long file_ofs
1572 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1573 */
fill_files_note(struct memelfnote * note)1574 static int fill_files_note(struct memelfnote *note)
1575 {
1576 struct vm_area_struct *vma;
1577 unsigned count, size, names_ofs, remaining, n;
1578 user_long_t *data;
1579 user_long_t *start_end_ofs;
1580 char *name_base, *name_curpos;
1581
1582 /* *Estimated* file count and total data size needed */
1583 count = current->mm->map_count;
1584 size = count * 64;
1585
1586 names_ofs = (2 + 3 * count) * sizeof(data[0]);
1587 alloc:
1588 if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1589 return -EINVAL;
1590 size = round_up(size, PAGE_SIZE);
1591 data = vmalloc(size);
1592 if (!data)
1593 return -ENOMEM;
1594
1595 start_end_ofs = data + 2;
1596 name_base = name_curpos = ((char *)data) + names_ofs;
1597 remaining = size - names_ofs;
1598 count = 0;
1599 for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1600 struct file *file;
1601 const char *filename;
1602
1603 file = vma->vm_file;
1604 if (!file)
1605 continue;
1606 filename = file_path(file, name_curpos, remaining);
1607 if (IS_ERR(filename)) {
1608 if (PTR_ERR(filename) == -ENAMETOOLONG) {
1609 vfree(data);
1610 size = size * 5 / 4;
1611 goto alloc;
1612 }
1613 continue;
1614 }
1615
1616 /* file_path() fills at the end, move name down */
1617 /* n = strlen(filename) + 1: */
1618 n = (name_curpos + remaining) - filename;
1619 remaining = filename - name_curpos;
1620 memmove(name_curpos, filename, n);
1621 name_curpos += n;
1622
1623 *start_end_ofs++ = vma->vm_start;
1624 *start_end_ofs++ = vma->vm_end;
1625 *start_end_ofs++ = vma->vm_pgoff;
1626 count++;
1627 }
1628
1629 /* Now we know exact count of files, can store it */
1630 data[0] = count;
1631 data[1] = PAGE_SIZE;
1632 /*
1633 * Count usually is less than current->mm->map_count,
1634 * we need to move filenames down.
1635 */
1636 n = current->mm->map_count - count;
1637 if (n != 0) {
1638 unsigned shift_bytes = n * 3 * sizeof(data[0]);
1639 memmove(name_base - shift_bytes, name_base,
1640 name_curpos - name_base);
1641 name_curpos -= shift_bytes;
1642 }
1643
1644 size = name_curpos - (char *)data;
1645 fill_note(note, "CORE", NT_FILE, size, data);
1646 return 0;
1647 }
1648
1649 #ifdef CORE_DUMP_USE_REGSET
1650 #include <linux/regset.h>
1651
1652 struct elf_thread_core_info {
1653 struct elf_thread_core_info *next;
1654 struct task_struct *task;
1655 struct elf_prstatus prstatus;
1656 struct memelfnote notes[0];
1657 };
1658
1659 struct elf_note_info {
1660 struct elf_thread_core_info *thread;
1661 struct memelfnote psinfo;
1662 struct memelfnote signote;
1663 struct memelfnote auxv;
1664 struct memelfnote files;
1665 user_siginfo_t csigdata;
1666 size_t size;
1667 int thread_notes;
1668 };
1669
1670 /*
1671 * When a regset has a writeback hook, we call it on each thread before
1672 * dumping user memory. On register window machines, this makes sure the
1673 * user memory backing the register data is up to date before we read it.
1674 */
do_thread_regset_writeback(struct task_struct * task,const struct user_regset * regset)1675 static void do_thread_regset_writeback(struct task_struct *task,
1676 const struct user_regset *regset)
1677 {
1678 if (regset->writeback)
1679 regset->writeback(task, regset, 1);
1680 }
1681
1682 #ifndef PR_REG_SIZE
1683 #define PR_REG_SIZE(S) sizeof(S)
1684 #endif
1685
1686 #ifndef PRSTATUS_SIZE
1687 #define PRSTATUS_SIZE(S) sizeof(S)
1688 #endif
1689
1690 #ifndef PR_REG_PTR
1691 #define PR_REG_PTR(S) (&((S)->pr_reg))
1692 #endif
1693
1694 #ifndef SET_PR_FPVALID
1695 #define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1696 #endif
1697
fill_thread_core_info(struct elf_thread_core_info * t,const struct user_regset_view * view,long signr,size_t * total)1698 static int fill_thread_core_info(struct elf_thread_core_info *t,
1699 const struct user_regset_view *view,
1700 long signr, size_t *total)
1701 {
1702 unsigned int i;
1703
1704 /*
1705 * NT_PRSTATUS is the one special case, because the regset data
1706 * goes into the pr_reg field inside the note contents, rather
1707 * than being the whole note contents. We fill the reset in here.
1708 * We assume that regset 0 is NT_PRSTATUS.
1709 */
1710 fill_prstatus(&t->prstatus, t->task, signr);
1711 (void) view->regsets[0].get(t->task, &view->regsets[0],
1712 0, PR_REG_SIZE(t->prstatus.pr_reg),
1713 PR_REG_PTR(&t->prstatus), NULL);
1714
1715 fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1716 PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1717 *total += notesize(&t->notes[0]);
1718
1719 do_thread_regset_writeback(t->task, &view->regsets[0]);
1720
1721 /*
1722 * Each other regset might generate a note too. For each regset
1723 * that has no core_note_type or is inactive, we leave t->notes[i]
1724 * all zero and we'll know to skip writing it later.
1725 */
1726 for (i = 1; i < view->n; ++i) {
1727 const struct user_regset *regset = &view->regsets[i];
1728 do_thread_regset_writeback(t->task, regset);
1729 if (regset->core_note_type && regset->get &&
1730 (!regset->active || regset->active(t->task, regset) > 0)) {
1731 int ret;
1732 size_t size = regset->n * regset->size;
1733 void *data = kzalloc(size, GFP_KERNEL);
1734 if (unlikely(!data))
1735 return 0;
1736 ret = regset->get(t->task, regset,
1737 0, size, data, NULL);
1738 if (unlikely(ret))
1739 kfree(data);
1740 else {
1741 if (regset->core_note_type != NT_PRFPREG)
1742 fill_note(&t->notes[i], "LINUX",
1743 regset->core_note_type,
1744 size, data);
1745 else {
1746 SET_PR_FPVALID(&t->prstatus, 1);
1747 fill_note(&t->notes[i], "CORE",
1748 NT_PRFPREG, size, data);
1749 }
1750 *total += notesize(&t->notes[i]);
1751 }
1752 }
1753 }
1754
1755 return 1;
1756 }
1757
fill_note_info(struct elfhdr * elf,int phdrs,struct elf_note_info * info,const siginfo_t * siginfo,struct pt_regs * regs)1758 static int fill_note_info(struct elfhdr *elf, int phdrs,
1759 struct elf_note_info *info,
1760 const siginfo_t *siginfo, struct pt_regs *regs)
1761 {
1762 struct task_struct *dump_task = current;
1763 const struct user_regset_view *view = task_user_regset_view(dump_task);
1764 struct elf_thread_core_info *t;
1765 struct elf_prpsinfo *psinfo;
1766 struct core_thread *ct;
1767 unsigned int i;
1768
1769 info->size = 0;
1770 info->thread = NULL;
1771
1772 psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1773 if (psinfo == NULL) {
1774 info->psinfo.data = NULL; /* So we don't free this wrongly */
1775 return 0;
1776 }
1777
1778 fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1779
1780 /*
1781 * Figure out how many notes we're going to need for each thread.
1782 */
1783 info->thread_notes = 0;
1784 for (i = 0; i < view->n; ++i)
1785 if (view->regsets[i].core_note_type != 0)
1786 ++info->thread_notes;
1787
1788 /*
1789 * Sanity check. We rely on regset 0 being in NT_PRSTATUS,
1790 * since it is our one special case.
1791 */
1792 if (unlikely(info->thread_notes == 0) ||
1793 unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1794 WARN_ON(1);
1795 return 0;
1796 }
1797
1798 /*
1799 * Initialize the ELF file header.
1800 */
1801 fill_elf_header(elf, phdrs,
1802 view->e_machine, view->e_flags);
1803
1804 /*
1805 * Allocate a structure for each thread.
1806 */
1807 for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1808 t = kzalloc(offsetof(struct elf_thread_core_info,
1809 notes[info->thread_notes]),
1810 GFP_KERNEL);
1811 if (unlikely(!t))
1812 return 0;
1813
1814 t->task = ct->task;
1815 if (ct->task == dump_task || !info->thread) {
1816 t->next = info->thread;
1817 info->thread = t;
1818 } else {
1819 /*
1820 * Make sure to keep the original task at
1821 * the head of the list.
1822 */
1823 t->next = info->thread->next;
1824 info->thread->next = t;
1825 }
1826 }
1827
1828 /*
1829 * Now fill in each thread's information.
1830 */
1831 for (t = info->thread; t != NULL; t = t->next)
1832 if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1833 return 0;
1834
1835 /*
1836 * Fill in the two process-wide notes.
1837 */
1838 fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1839 info->size += notesize(&info->psinfo);
1840
1841 fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1842 info->size += notesize(&info->signote);
1843
1844 fill_auxv_note(&info->auxv, current->mm);
1845 info->size += notesize(&info->auxv);
1846
1847 if (fill_files_note(&info->files) == 0)
1848 info->size += notesize(&info->files);
1849
1850 return 1;
1851 }
1852
get_note_info_size(struct elf_note_info * info)1853 static size_t get_note_info_size(struct elf_note_info *info)
1854 {
1855 return info->size;
1856 }
1857
1858 /*
1859 * Write all the notes for each thread. When writing the first thread, the
1860 * process-wide notes are interleaved after the first thread-specific note.
1861 */
write_note_info(struct elf_note_info * info,struct coredump_params * cprm)1862 static int write_note_info(struct elf_note_info *info,
1863 struct coredump_params *cprm)
1864 {
1865 bool first = true;
1866 struct elf_thread_core_info *t = info->thread;
1867
1868 do {
1869 int i;
1870
1871 if (!writenote(&t->notes[0], cprm))
1872 return 0;
1873
1874 if (first && !writenote(&info->psinfo, cprm))
1875 return 0;
1876 if (first && !writenote(&info->signote, cprm))
1877 return 0;
1878 if (first && !writenote(&info->auxv, cprm))
1879 return 0;
1880 if (first && info->files.data &&
1881 !writenote(&info->files, cprm))
1882 return 0;
1883
1884 for (i = 1; i < info->thread_notes; ++i)
1885 if (t->notes[i].data &&
1886 !writenote(&t->notes[i], cprm))
1887 return 0;
1888
1889 first = false;
1890 t = t->next;
1891 } while (t);
1892
1893 return 1;
1894 }
1895
free_note_info(struct elf_note_info * info)1896 static void free_note_info(struct elf_note_info *info)
1897 {
1898 struct elf_thread_core_info *threads = info->thread;
1899 while (threads) {
1900 unsigned int i;
1901 struct elf_thread_core_info *t = threads;
1902 threads = t->next;
1903 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1904 for (i = 1; i < info->thread_notes; ++i)
1905 kfree(t->notes[i].data);
1906 kfree(t);
1907 }
1908 kfree(info->psinfo.data);
1909 vfree(info->files.data);
1910 }
1911
1912 #else
1913
1914 /* Here is the structure in which status of each thread is captured. */
1915 struct elf_thread_status
1916 {
1917 struct list_head list;
1918 struct elf_prstatus prstatus; /* NT_PRSTATUS */
1919 elf_fpregset_t fpu; /* NT_PRFPREG */
1920 struct task_struct *thread;
1921 #ifdef ELF_CORE_COPY_XFPREGS
1922 elf_fpxregset_t xfpu; /* ELF_CORE_XFPREG_TYPE */
1923 #endif
1924 struct memelfnote notes[3];
1925 int num_notes;
1926 };
1927
1928 /*
1929 * In order to add the specific thread information for the elf file format,
1930 * we need to keep a linked list of every threads pr_status and then create
1931 * a single section for them in the final core file.
1932 */
elf_dump_thread_status(long signr,struct elf_thread_status * t)1933 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1934 {
1935 int sz = 0;
1936 struct task_struct *p = t->thread;
1937 t->num_notes = 0;
1938
1939 fill_prstatus(&t->prstatus, p, signr);
1940 elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1941
1942 fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1943 &(t->prstatus));
1944 t->num_notes++;
1945 sz += notesize(&t->notes[0]);
1946
1947 if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1948 &t->fpu))) {
1949 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1950 &(t->fpu));
1951 t->num_notes++;
1952 sz += notesize(&t->notes[1]);
1953 }
1954
1955 #ifdef ELF_CORE_COPY_XFPREGS
1956 if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1957 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1958 sizeof(t->xfpu), &t->xfpu);
1959 t->num_notes++;
1960 sz += notesize(&t->notes[2]);
1961 }
1962 #endif
1963 return sz;
1964 }
1965
1966 struct elf_note_info {
1967 struct memelfnote *notes;
1968 struct memelfnote *notes_files;
1969 struct elf_prstatus *prstatus; /* NT_PRSTATUS */
1970 struct elf_prpsinfo *psinfo; /* NT_PRPSINFO */
1971 struct list_head thread_list;
1972 elf_fpregset_t *fpu;
1973 #ifdef ELF_CORE_COPY_XFPREGS
1974 elf_fpxregset_t *xfpu;
1975 #endif
1976 user_siginfo_t csigdata;
1977 int thread_status_size;
1978 int numnote;
1979 };
1980
elf_note_info_init(struct elf_note_info * info)1981 static int elf_note_info_init(struct elf_note_info *info)
1982 {
1983 memset(info, 0, sizeof(*info));
1984 INIT_LIST_HEAD(&info->thread_list);
1985
1986 /* Allocate space for ELF notes */
1987 info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1988 if (!info->notes)
1989 return 0;
1990 info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1991 if (!info->psinfo)
1992 return 0;
1993 info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1994 if (!info->prstatus)
1995 return 0;
1996 info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1997 if (!info->fpu)
1998 return 0;
1999 #ifdef ELF_CORE_COPY_XFPREGS
2000 info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
2001 if (!info->xfpu)
2002 return 0;
2003 #endif
2004 return 1;
2005 }
2006
fill_note_info(struct elfhdr * elf,int phdrs,struct elf_note_info * info,const siginfo_t * siginfo,struct pt_regs * regs)2007 static int fill_note_info(struct elfhdr *elf, int phdrs,
2008 struct elf_note_info *info,
2009 const siginfo_t *siginfo, struct pt_regs *regs)
2010 {
2011 struct list_head *t;
2012 struct core_thread *ct;
2013 struct elf_thread_status *ets;
2014
2015 if (!elf_note_info_init(info))
2016 return 0;
2017
2018 for (ct = current->mm->core_state->dumper.next;
2019 ct; ct = ct->next) {
2020 ets = kzalloc(sizeof(*ets), GFP_KERNEL);
2021 if (!ets)
2022 return 0;
2023
2024 ets->thread = ct->task;
2025 list_add(&ets->list, &info->thread_list);
2026 }
2027
2028 list_for_each(t, &info->thread_list) {
2029 int sz;
2030
2031 ets = list_entry(t, struct elf_thread_status, list);
2032 sz = elf_dump_thread_status(siginfo->si_signo, ets);
2033 info->thread_status_size += sz;
2034 }
2035 /* now collect the dump for the current */
2036 memset(info->prstatus, 0, sizeof(*info->prstatus));
2037 fill_prstatus(info->prstatus, current, siginfo->si_signo);
2038 elf_core_copy_regs(&info->prstatus->pr_reg, regs);
2039
2040 /* Set up header */
2041 fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
2042
2043 /*
2044 * Set up the notes in similar form to SVR4 core dumps made
2045 * with info from their /proc.
2046 */
2047
2048 fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
2049 sizeof(*info->prstatus), info->prstatus);
2050 fill_psinfo(info->psinfo, current->group_leader, current->mm);
2051 fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
2052 sizeof(*info->psinfo), info->psinfo);
2053
2054 fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
2055 fill_auxv_note(info->notes + 3, current->mm);
2056 info->numnote = 4;
2057
2058 if (fill_files_note(info->notes + info->numnote) == 0) {
2059 info->notes_files = info->notes + info->numnote;
2060 info->numnote++;
2061 }
2062
2063 /* Try to dump the FPU. */
2064 info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
2065 info->fpu);
2066 if (info->prstatus->pr_fpvalid)
2067 fill_note(info->notes + info->numnote++,
2068 "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
2069 #ifdef ELF_CORE_COPY_XFPREGS
2070 if (elf_core_copy_task_xfpregs(current, info->xfpu))
2071 fill_note(info->notes + info->numnote++,
2072 "LINUX", ELF_CORE_XFPREG_TYPE,
2073 sizeof(*info->xfpu), info->xfpu);
2074 #endif
2075
2076 return 1;
2077 }
2078
get_note_info_size(struct elf_note_info * info)2079 static size_t get_note_info_size(struct elf_note_info *info)
2080 {
2081 int sz = 0;
2082 int i;
2083
2084 for (i = 0; i < info->numnote; i++)
2085 sz += notesize(info->notes + i);
2086
2087 sz += info->thread_status_size;
2088
2089 return sz;
2090 }
2091
write_note_info(struct elf_note_info * info,struct coredump_params * cprm)2092 static int write_note_info(struct elf_note_info *info,
2093 struct coredump_params *cprm)
2094 {
2095 int i;
2096 struct list_head *t;
2097
2098 for (i = 0; i < info->numnote; i++)
2099 if (!writenote(info->notes + i, cprm))
2100 return 0;
2101
2102 /* write out the thread status notes section */
2103 list_for_each(t, &info->thread_list) {
2104 struct elf_thread_status *tmp =
2105 list_entry(t, struct elf_thread_status, list);
2106
2107 for (i = 0; i < tmp->num_notes; i++)
2108 if (!writenote(&tmp->notes[i], cprm))
2109 return 0;
2110 }
2111
2112 return 1;
2113 }
2114
free_note_info(struct elf_note_info * info)2115 static void free_note_info(struct elf_note_info *info)
2116 {
2117 while (!list_empty(&info->thread_list)) {
2118 struct list_head *tmp = info->thread_list.next;
2119 list_del(tmp);
2120 kfree(list_entry(tmp, struct elf_thread_status, list));
2121 }
2122
2123 /* Free data possibly allocated by fill_files_note(): */
2124 if (info->notes_files)
2125 vfree(info->notes_files->data);
2126
2127 kfree(info->prstatus);
2128 kfree(info->psinfo);
2129 kfree(info->notes);
2130 kfree(info->fpu);
2131 #ifdef ELF_CORE_COPY_XFPREGS
2132 kfree(info->xfpu);
2133 #endif
2134 }
2135
2136 #endif
2137
first_vma(struct task_struct * tsk,struct vm_area_struct * gate_vma)2138 static struct vm_area_struct *first_vma(struct task_struct *tsk,
2139 struct vm_area_struct *gate_vma)
2140 {
2141 struct vm_area_struct *ret = tsk->mm->mmap;
2142
2143 if (ret)
2144 return ret;
2145 return gate_vma;
2146 }
2147 /*
2148 * Helper function for iterating across a vma list. It ensures that the caller
2149 * will visit `gate_vma' prior to terminating the search.
2150 */
next_vma(struct vm_area_struct * this_vma,struct vm_area_struct * gate_vma)2151 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2152 struct vm_area_struct *gate_vma)
2153 {
2154 struct vm_area_struct *ret;
2155
2156 ret = this_vma->vm_next;
2157 if (ret)
2158 return ret;
2159 if (this_vma == gate_vma)
2160 return NULL;
2161 return gate_vma;
2162 }
2163
fill_extnum_info(struct elfhdr * elf,struct elf_shdr * shdr4extnum,elf_addr_t e_shoff,int segs)2164 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2165 elf_addr_t e_shoff, int segs)
2166 {
2167 elf->e_shoff = e_shoff;
2168 elf->e_shentsize = sizeof(*shdr4extnum);
2169 elf->e_shnum = 1;
2170 elf->e_shstrndx = SHN_UNDEF;
2171
2172 memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2173
2174 shdr4extnum->sh_type = SHT_NULL;
2175 shdr4extnum->sh_size = elf->e_shnum;
2176 shdr4extnum->sh_link = elf->e_shstrndx;
2177 shdr4extnum->sh_info = segs;
2178 }
2179
2180 /*
2181 * Actual dumper
2182 *
2183 * This is a two-pass process; first we find the offsets of the bits,
2184 * and then they are actually written out. If we run out of core limit
2185 * we just truncate.
2186 */
elf_core_dump(struct coredump_params * cprm)2187 static int elf_core_dump(struct coredump_params *cprm)
2188 {
2189 int has_dumped = 0;
2190 mm_segment_t fs;
2191 int segs, i;
2192 size_t vma_data_size = 0;
2193 struct vm_area_struct *vma, *gate_vma;
2194 struct elfhdr *elf = NULL;
2195 loff_t offset = 0, dataoff;
2196 struct elf_note_info info = { };
2197 struct elf_phdr *phdr4note = NULL;
2198 struct elf_shdr *shdr4extnum = NULL;
2199 Elf_Half e_phnum;
2200 elf_addr_t e_shoff;
2201 elf_addr_t *vma_filesz = NULL;
2202
2203 /*
2204 * We no longer stop all VM operations.
2205 *
2206 * This is because those proceses that could possibly change map_count
2207 * or the mmap / vma pages are now blocked in do_exit on current
2208 * finishing this core dump.
2209 *
2210 * Only ptrace can touch these memory addresses, but it doesn't change
2211 * the map_count or the pages allocated. So no possibility of crashing
2212 * exists while dumping the mm->vm_next areas to the core file.
2213 */
2214
2215 /* alloc memory for large data structures: too large to be on stack */
2216 elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2217 if (!elf)
2218 goto out;
2219 /*
2220 * The number of segs are recored into ELF header as 16bit value.
2221 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2222 */
2223 segs = current->mm->map_count;
2224 segs += elf_core_extra_phdrs();
2225
2226 gate_vma = get_gate_vma(current->mm);
2227 if (gate_vma != NULL)
2228 segs++;
2229
2230 /* for notes section */
2231 segs++;
2232
2233 /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2234 * this, kernel supports extended numbering. Have a look at
2235 * include/linux/elf.h for further information. */
2236 e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2237
2238 /*
2239 * Collect all the non-memory information about the process for the
2240 * notes. This also sets up the file header.
2241 */
2242 if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2243 goto cleanup;
2244
2245 has_dumped = 1;
2246
2247 fs = get_fs();
2248 set_fs(KERNEL_DS);
2249
2250 offset += sizeof(*elf); /* Elf header */
2251 offset += segs * sizeof(struct elf_phdr); /* Program headers */
2252
2253 /* Write notes phdr entry */
2254 {
2255 size_t sz = get_note_info_size(&info);
2256
2257 sz += elf_coredump_extra_notes_size();
2258
2259 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2260 if (!phdr4note)
2261 goto end_coredump;
2262
2263 fill_elf_note_phdr(phdr4note, sz, offset);
2264 offset += sz;
2265 }
2266
2267 dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2268
2269 vma_filesz = kmalloc_array(segs - 1, sizeof(*vma_filesz), GFP_KERNEL);
2270 if (!vma_filesz)
2271 goto end_coredump;
2272
2273 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2274 vma = next_vma(vma, gate_vma)) {
2275 unsigned long dump_size;
2276
2277 dump_size = vma_dump_size(vma, cprm->mm_flags);
2278 vma_filesz[i++] = dump_size;
2279 vma_data_size += dump_size;
2280 }
2281
2282 offset += vma_data_size;
2283 offset += elf_core_extra_data_size();
2284 e_shoff = offset;
2285
2286 if (e_phnum == PN_XNUM) {
2287 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2288 if (!shdr4extnum)
2289 goto end_coredump;
2290 fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2291 }
2292
2293 offset = dataoff;
2294
2295 if (!dump_emit(cprm, elf, sizeof(*elf)))
2296 goto end_coredump;
2297
2298 if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2299 goto end_coredump;
2300
2301 /* Write program headers for segments dump */
2302 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2303 vma = next_vma(vma, gate_vma)) {
2304 struct elf_phdr phdr;
2305
2306 phdr.p_type = PT_LOAD;
2307 phdr.p_offset = offset;
2308 phdr.p_vaddr = vma->vm_start;
2309 phdr.p_paddr = 0;
2310 phdr.p_filesz = vma_filesz[i++];
2311 phdr.p_memsz = vma->vm_end - vma->vm_start;
2312 offset += phdr.p_filesz;
2313 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2314 if (vma->vm_flags & VM_WRITE)
2315 phdr.p_flags |= PF_W;
2316 if (vma->vm_flags & VM_EXEC)
2317 phdr.p_flags |= PF_X;
2318 phdr.p_align = ELF_EXEC_PAGESIZE;
2319
2320 if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2321 goto end_coredump;
2322 }
2323
2324 if (!elf_core_write_extra_phdrs(cprm, offset))
2325 goto end_coredump;
2326
2327 /* write out the notes section */
2328 if (!write_note_info(&info, cprm))
2329 goto end_coredump;
2330
2331 if (elf_coredump_extra_notes_write(cprm))
2332 goto end_coredump;
2333
2334 /* Align to page */
2335 if (!dump_skip(cprm, dataoff - cprm->written))
2336 goto end_coredump;
2337
2338 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2339 vma = next_vma(vma, gate_vma)) {
2340 unsigned long addr;
2341 unsigned long end;
2342
2343 end = vma->vm_start + vma_filesz[i++];
2344
2345 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2346 struct page *page;
2347 int stop;
2348
2349 page = get_dump_page(addr);
2350 if (page) {
2351 void *kaddr = kmap(page);
2352 stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2353 kunmap(page);
2354 page_cache_release(page);
2355 } else
2356 stop = !dump_skip(cprm, PAGE_SIZE);
2357 if (stop)
2358 goto end_coredump;
2359 }
2360 }
2361 dump_truncate(cprm);
2362
2363 if (!elf_core_write_extra_data(cprm))
2364 goto end_coredump;
2365
2366 if (e_phnum == PN_XNUM) {
2367 if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2368 goto end_coredump;
2369 }
2370
2371 end_coredump:
2372 set_fs(fs);
2373
2374 cleanup:
2375 free_note_info(&info);
2376 kfree(shdr4extnum);
2377 kfree(vma_filesz);
2378 kfree(phdr4note);
2379 kfree(elf);
2380 out:
2381 return has_dumped;
2382 }
2383
2384 #endif /* CONFIG_ELF_CORE */
2385
init_elf_binfmt(void)2386 static int __init init_elf_binfmt(void)
2387 {
2388 register_binfmt(&elf_format);
2389 return 0;
2390 }
2391
exit_elf_binfmt(void)2392 static void __exit exit_elf_binfmt(void)
2393 {
2394 /* Remove the COFF and ELF loaders. */
2395 unregister_binfmt(&elf_format);
2396 }
2397
2398 core_initcall(init_elf_binfmt);
2399 module_exit(exit_elf_binfmt);
2400 MODULE_LICENSE("GPL");
2401