• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11 
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/vmalloc.h>
31 #include <linux/security.h>
32 #include <linux/random.h>
33 #include <linux/elf.h>
34 #include <linux/utsname.h>
35 #include <linux/coredump.h>
36 #include <linux/sched.h>
37 #include <asm/uaccess.h>
38 #include <asm/param.h>
39 #include <asm/page.h>
40 
41 #ifndef user_long_t
42 #define user_long_t long
43 #endif
44 #ifndef user_siginfo_t
45 #define user_siginfo_t siginfo_t
46 #endif
47 
48 static int load_elf_binary(struct linux_binprm *bprm);
49 static int load_elf_library(struct file *);
50 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
51 				int, int, unsigned long);
52 
53 /*
54  * If we don't support core dumping, then supply a NULL so we
55  * don't even try.
56  */
57 #ifdef CONFIG_ELF_CORE
58 static int elf_core_dump(struct coredump_params *cprm);
59 #else
60 #define elf_core_dump	NULL
61 #endif
62 
63 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
64 #define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
65 #else
66 #define ELF_MIN_ALIGN	PAGE_SIZE
67 #endif
68 
69 #ifndef ELF_CORE_EFLAGS
70 #define ELF_CORE_EFLAGS	0
71 #endif
72 
73 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
74 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
75 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
76 
77 static struct linux_binfmt elf_format = {
78 	.module		= THIS_MODULE,
79 	.load_binary	= load_elf_binary,
80 	.load_shlib	= load_elf_library,
81 	.core_dump	= elf_core_dump,
82 	.min_coredump	= ELF_EXEC_PAGESIZE,
83 };
84 
85 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
86 
set_brk(unsigned long start,unsigned long end)87 static int set_brk(unsigned long start, unsigned long end)
88 {
89 	start = ELF_PAGEALIGN(start);
90 	end = ELF_PAGEALIGN(end);
91 	if (end > start) {
92 		unsigned long addr;
93 		addr = vm_brk(start, end - start);
94 		if (BAD_ADDR(addr))
95 			return addr;
96 	}
97 	current->mm->start_brk = current->mm->brk = end;
98 	return 0;
99 }
100 
101 /* We need to explicitly zero any fractional pages
102    after the data section (i.e. bss).  This would
103    contain the junk from the file that should not
104    be in memory
105  */
padzero(unsigned long elf_bss)106 static int padzero(unsigned long elf_bss)
107 {
108 	unsigned long nbyte;
109 
110 	nbyte = ELF_PAGEOFFSET(elf_bss);
111 	if (nbyte) {
112 		nbyte = ELF_MIN_ALIGN - nbyte;
113 		if (clear_user((void __user *) elf_bss, nbyte))
114 			return -EFAULT;
115 	}
116 	return 0;
117 }
118 
119 /* Let's use some macros to make this stack manipulation a little clearer */
120 #ifdef CONFIG_STACK_GROWSUP
121 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
122 #define STACK_ROUND(sp, items) \
123 	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
124 #define STACK_ALLOC(sp, len) ({ \
125 	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
126 	old_sp; })
127 #else
128 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
129 #define STACK_ROUND(sp, items) \
130 	(((unsigned long) (sp - items)) &~ 15UL)
131 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
132 #endif
133 
134 #ifndef ELF_BASE_PLATFORM
135 /*
136  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
137  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
138  * will be copied to the user stack in the same manner as AT_PLATFORM.
139  */
140 #define ELF_BASE_PLATFORM NULL
141 #endif
142 
143 static int
create_elf_tables(struct linux_binprm * bprm,struct elfhdr * exec,unsigned long load_addr,unsigned long interp_load_addr)144 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
145 		unsigned long load_addr, unsigned long interp_load_addr)
146 {
147 	unsigned long p = bprm->p;
148 	int argc = bprm->argc;
149 	int envc = bprm->envc;
150 	elf_addr_t __user *argv;
151 	elf_addr_t __user *envp;
152 	elf_addr_t __user *sp;
153 	elf_addr_t __user *u_platform;
154 	elf_addr_t __user *u_base_platform;
155 	elf_addr_t __user *u_rand_bytes;
156 	const char *k_platform = ELF_PLATFORM;
157 	const char *k_base_platform = ELF_BASE_PLATFORM;
158 	unsigned char k_rand_bytes[16];
159 	int items;
160 	elf_addr_t *elf_info;
161 	int ei_index = 0;
162 	const struct cred *cred = current_cred();
163 	struct vm_area_struct *vma;
164 
165 	/*
166 	 * In some cases (e.g. Hyper-Threading), we want to avoid L1
167 	 * evictions by the processes running on the same package. One
168 	 * thing we can do is to shuffle the initial stack for them.
169 	 */
170 
171 	p = arch_align_stack(p);
172 
173 	/*
174 	 * If this architecture has a platform capability string, copy it
175 	 * to userspace.  In some cases (Sparc), this info is impossible
176 	 * for userspace to get any other way, in others (i386) it is
177 	 * merely difficult.
178 	 */
179 	u_platform = NULL;
180 	if (k_platform) {
181 		size_t len = strlen(k_platform) + 1;
182 
183 		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
184 		if (__copy_to_user(u_platform, k_platform, len))
185 			return -EFAULT;
186 	}
187 
188 	/*
189 	 * If this architecture has a "base" platform capability
190 	 * string, copy it to userspace.
191 	 */
192 	u_base_platform = NULL;
193 	if (k_base_platform) {
194 		size_t len = strlen(k_base_platform) + 1;
195 
196 		u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
197 		if (__copy_to_user(u_base_platform, k_base_platform, len))
198 			return -EFAULT;
199 	}
200 
201 	/*
202 	 * Generate 16 random bytes for userspace PRNG seeding.
203 	 */
204 	get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
205 	u_rand_bytes = (elf_addr_t __user *)
206 		       STACK_ALLOC(p, sizeof(k_rand_bytes));
207 	if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
208 		return -EFAULT;
209 
210 	/* Create the ELF interpreter info */
211 	elf_info = (elf_addr_t *)current->mm->saved_auxv;
212 	/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
213 #define NEW_AUX_ENT(id, val) \
214 	do { \
215 		elf_info[ei_index++] = id; \
216 		elf_info[ei_index++] = val; \
217 	} while (0)
218 
219 #ifdef ARCH_DLINFO
220 	/*
221 	 * ARCH_DLINFO must come first so PPC can do its special alignment of
222 	 * AUXV.
223 	 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
224 	 * ARCH_DLINFO changes
225 	 */
226 	ARCH_DLINFO;
227 #endif
228 	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
229 	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
230 	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
231 	NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
232 	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
233 	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
234 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
235 	NEW_AUX_ENT(AT_FLAGS, 0);
236 	NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
237 	NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
238 	NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
239 	NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
240 	NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
241  	NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
242 	NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
243 #ifdef ELF_HWCAP2
244 	NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
245 #endif
246 	NEW_AUX_ENT(AT_EXECFN, bprm->exec);
247 	if (k_platform) {
248 		NEW_AUX_ENT(AT_PLATFORM,
249 			    (elf_addr_t)(unsigned long)u_platform);
250 	}
251 	if (k_base_platform) {
252 		NEW_AUX_ENT(AT_BASE_PLATFORM,
253 			    (elf_addr_t)(unsigned long)u_base_platform);
254 	}
255 	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
256 		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
257 	}
258 #undef NEW_AUX_ENT
259 	/* AT_NULL is zero; clear the rest too */
260 	memset(&elf_info[ei_index], 0,
261 	       sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
262 
263 	/* And advance past the AT_NULL entry.  */
264 	ei_index += 2;
265 
266 	sp = STACK_ADD(p, ei_index);
267 
268 	items = (argc + 1) + (envc + 1) + 1;
269 	bprm->p = STACK_ROUND(sp, items);
270 
271 	/* Point sp at the lowest address on the stack */
272 #ifdef CONFIG_STACK_GROWSUP
273 	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
274 	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
275 #else
276 	sp = (elf_addr_t __user *)bprm->p;
277 #endif
278 
279 
280 	/*
281 	 * Grow the stack manually; some architectures have a limit on how
282 	 * far ahead a user-space access may be in order to grow the stack.
283 	 */
284 	vma = find_extend_vma(current->mm, bprm->p);
285 	if (!vma)
286 		return -EFAULT;
287 
288 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
289 	if (__put_user(argc, sp++))
290 		return -EFAULT;
291 	argv = sp;
292 	envp = argv + argc + 1;
293 
294 	/* Populate argv and envp */
295 	p = current->mm->arg_end = current->mm->arg_start;
296 	while (argc-- > 0) {
297 		size_t len;
298 		if (__put_user((elf_addr_t)p, argv++))
299 			return -EFAULT;
300 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
301 		if (!len || len > MAX_ARG_STRLEN)
302 			return -EINVAL;
303 		p += len;
304 	}
305 	if (__put_user(0, argv))
306 		return -EFAULT;
307 	current->mm->arg_end = current->mm->env_start = p;
308 	while (envc-- > 0) {
309 		size_t len;
310 		if (__put_user((elf_addr_t)p, envp++))
311 			return -EFAULT;
312 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
313 		if (!len || len > MAX_ARG_STRLEN)
314 			return -EINVAL;
315 		p += len;
316 	}
317 	if (__put_user(0, envp))
318 		return -EFAULT;
319 	current->mm->env_end = p;
320 
321 	/* Put the elf_info on the stack in the right place.  */
322 	sp = (elf_addr_t __user *)envp + 1;
323 	if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
324 		return -EFAULT;
325 	return 0;
326 }
327 
328 #ifndef elf_map
329 
elf_map(struct file * filep,unsigned long addr,struct elf_phdr * eppnt,int prot,int type,unsigned long total_size)330 static unsigned long elf_map(struct file *filep, unsigned long addr,
331 		struct elf_phdr *eppnt, int prot, int type,
332 		unsigned long total_size)
333 {
334 	unsigned long map_addr;
335 	unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
336 	unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
337 	addr = ELF_PAGESTART(addr);
338 	size = ELF_PAGEALIGN(size);
339 
340 	/* mmap() will return -EINVAL if given a zero size, but a
341 	 * segment with zero filesize is perfectly valid */
342 	if (!size)
343 		return addr;
344 
345 	/*
346 	* total_size is the size of the ELF (interpreter) image.
347 	* The _first_ mmap needs to know the full size, otherwise
348 	* randomization might put this image into an overlapping
349 	* position with the ELF binary image. (since size < total_size)
350 	* So we first map the 'big' image - and unmap the remainder at
351 	* the end. (which unmap is needed for ELF images with holes.)
352 	*/
353 	if (total_size) {
354 		total_size = ELF_PAGEALIGN(total_size);
355 		map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
356 		if (!BAD_ADDR(map_addr))
357 			vm_munmap(map_addr+size, total_size-size);
358 	} else
359 		map_addr = vm_mmap(filep, addr, size, prot, type, off);
360 
361 	return(map_addr);
362 }
363 
364 #endif /* !elf_map */
365 
total_mapping_size(struct elf_phdr * cmds,int nr)366 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
367 {
368 	int i, first_idx = -1, last_idx = -1;
369 
370 	for (i = 0; i < nr; i++) {
371 		if (cmds[i].p_type == PT_LOAD) {
372 			last_idx = i;
373 			if (first_idx == -1)
374 				first_idx = i;
375 		}
376 	}
377 	if (first_idx == -1)
378 		return 0;
379 
380 	return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
381 				ELF_PAGESTART(cmds[first_idx].p_vaddr);
382 }
383 
384 /**
385  * load_elf_phdrs() - load ELF program headers
386  * @elf_ex:   ELF header of the binary whose program headers should be loaded
387  * @elf_file: the opened ELF binary file
388  *
389  * Loads ELF program headers from the binary file elf_file, which has the ELF
390  * header pointed to by elf_ex, into a newly allocated array. The caller is
391  * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
392  */
load_elf_phdrs(struct elfhdr * elf_ex,struct file * elf_file)393 static struct elf_phdr *load_elf_phdrs(struct elfhdr *elf_ex,
394 				       struct file *elf_file)
395 {
396 	struct elf_phdr *elf_phdata = NULL;
397 	int retval, size, err = -1;
398 
399 	/*
400 	 * If the size of this structure has changed, then punt, since
401 	 * we will be doing the wrong thing.
402 	 */
403 	if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
404 		goto out;
405 
406 	/* Sanity check the number of program headers... */
407 	if (elf_ex->e_phnum < 1 ||
408 		elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
409 		goto out;
410 
411 	/* ...and their total size. */
412 	size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
413 	if (size > ELF_MIN_ALIGN)
414 		goto out;
415 
416 	elf_phdata = kmalloc(size, GFP_KERNEL);
417 	if (!elf_phdata)
418 		goto out;
419 
420 	/* Read in the program headers */
421 	retval = kernel_read(elf_file, elf_ex->e_phoff,
422 			     (char *)elf_phdata, size);
423 	if (retval != size) {
424 		err = (retval < 0) ? retval : -EIO;
425 		goto out;
426 	}
427 
428 	/* Success! */
429 	err = 0;
430 out:
431 	if (err) {
432 		kfree(elf_phdata);
433 		elf_phdata = NULL;
434 	}
435 	return elf_phdata;
436 }
437 
438 #ifndef CONFIG_ARCH_BINFMT_ELF_STATE
439 
440 /**
441  * struct arch_elf_state - arch-specific ELF loading state
442  *
443  * This structure is used to preserve architecture specific data during
444  * the loading of an ELF file, throughout the checking of architecture
445  * specific ELF headers & through to the point where the ELF load is
446  * known to be proceeding (ie. SET_PERSONALITY).
447  *
448  * This implementation is a dummy for architectures which require no
449  * specific state.
450  */
451 struct arch_elf_state {
452 };
453 
454 #define INIT_ARCH_ELF_STATE {}
455 
456 /**
457  * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
458  * @ehdr:	The main ELF header
459  * @phdr:	The program header to check
460  * @elf:	The open ELF file
461  * @is_interp:	True if the phdr is from the interpreter of the ELF being
462  *		loaded, else false.
463  * @state:	Architecture-specific state preserved throughout the process
464  *		of loading the ELF.
465  *
466  * Inspects the program header phdr to validate its correctness and/or
467  * suitability for the system. Called once per ELF program header in the
468  * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
469  * interpreter.
470  *
471  * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
472  *         with that return code.
473  */
arch_elf_pt_proc(struct elfhdr * ehdr,struct elf_phdr * phdr,struct file * elf,bool is_interp,struct arch_elf_state * state)474 static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
475 				   struct elf_phdr *phdr,
476 				   struct file *elf, bool is_interp,
477 				   struct arch_elf_state *state)
478 {
479 	/* Dummy implementation, always proceed */
480 	return 0;
481 }
482 
483 /**
484  * arch_check_elf() - check a PT_LOPROC..PT_HIPROC ELF program header
485  * @ehdr:	The main ELF header
486  * @has_interp:	True if the ELF has an interpreter, else false.
487  * @state:	Architecture-specific state preserved throughout the process
488  *		of loading the ELF.
489  *
490  * Provides a final opportunity for architecture code to reject the loading
491  * of the ELF & cause an exec syscall to return an error. This is called after
492  * all program headers to be checked by arch_elf_pt_proc have been.
493  *
494  * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
495  *         with that return code.
496  */
arch_check_elf(struct elfhdr * ehdr,bool has_interp,struct arch_elf_state * state)497 static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
498 				 struct arch_elf_state *state)
499 {
500 	/* Dummy implementation, always proceed */
501 	return 0;
502 }
503 
504 #endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
505 
506 /* This is much more generalized than the library routine read function,
507    so we keep this separate.  Technically the library read function
508    is only provided so that we can read a.out libraries that have
509    an ELF header */
510 
load_elf_interp(struct elfhdr * interp_elf_ex,struct file * interpreter,unsigned long * interp_map_addr,unsigned long no_base,struct elf_phdr * interp_elf_phdata)511 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
512 		struct file *interpreter, unsigned long *interp_map_addr,
513 		unsigned long no_base, struct elf_phdr *interp_elf_phdata)
514 {
515 	struct elf_phdr *eppnt;
516 	unsigned long load_addr = 0;
517 	int load_addr_set = 0;
518 	unsigned long last_bss = 0, elf_bss = 0;
519 	unsigned long error = ~0UL;
520 	unsigned long total_size;
521 	int i;
522 
523 	/* First of all, some simple consistency checks */
524 	if (interp_elf_ex->e_type != ET_EXEC &&
525 	    interp_elf_ex->e_type != ET_DYN)
526 		goto out;
527 	if (!elf_check_arch(interp_elf_ex))
528 		goto out;
529 	if (!interpreter->f_op || !interpreter->f_op->mmap)
530 		goto out;
531 
532 	total_size = total_mapping_size(interp_elf_phdata,
533 					interp_elf_ex->e_phnum);
534 	if (!total_size) {
535 		error = -EINVAL;
536 		goto out;
537 	}
538 
539 	eppnt = interp_elf_phdata;
540 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
541 		if (eppnt->p_type == PT_LOAD) {
542 			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
543 			int elf_prot = 0;
544 			unsigned long vaddr = 0;
545 			unsigned long k, map_addr;
546 
547 			if (eppnt->p_flags & PF_R)
548 		    		elf_prot = PROT_READ;
549 			if (eppnt->p_flags & PF_W)
550 				elf_prot |= PROT_WRITE;
551 			if (eppnt->p_flags & PF_X)
552 				elf_prot |= PROT_EXEC;
553 			vaddr = eppnt->p_vaddr;
554 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
555 				elf_type |= MAP_FIXED;
556 			else if (no_base && interp_elf_ex->e_type == ET_DYN)
557 				load_addr = -vaddr;
558 
559 			map_addr = elf_map(interpreter, load_addr + vaddr,
560 					eppnt, elf_prot, elf_type, total_size);
561 			total_size = 0;
562 			if (!*interp_map_addr)
563 				*interp_map_addr = map_addr;
564 			error = map_addr;
565 			if (BAD_ADDR(map_addr))
566 				goto out;
567 
568 			if (!load_addr_set &&
569 			    interp_elf_ex->e_type == ET_DYN) {
570 				load_addr = map_addr - ELF_PAGESTART(vaddr);
571 				load_addr_set = 1;
572 			}
573 
574 			/*
575 			 * Check to see if the section's size will overflow the
576 			 * allowed task size. Note that p_filesz must always be
577 			 * <= p_memsize so it's only necessary to check p_memsz.
578 			 */
579 			k = load_addr + eppnt->p_vaddr;
580 			if (BAD_ADDR(k) ||
581 			    eppnt->p_filesz > eppnt->p_memsz ||
582 			    eppnt->p_memsz > TASK_SIZE ||
583 			    TASK_SIZE - eppnt->p_memsz < k) {
584 				error = -ENOMEM;
585 				goto out;
586 			}
587 
588 			/*
589 			 * Find the end of the file mapping for this phdr, and
590 			 * keep track of the largest address we see for this.
591 			 */
592 			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
593 			if (k > elf_bss)
594 				elf_bss = k;
595 
596 			/*
597 			 * Do the same thing for the memory mapping - between
598 			 * elf_bss and last_bss is the bss section.
599 			 */
600 			k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
601 			if (k > last_bss)
602 				last_bss = k;
603 		}
604 	}
605 
606 	if (last_bss > elf_bss) {
607 		/*
608 		 * Now fill out the bss section.  First pad the last page up
609 		 * to the page boundary, and then perform a mmap to make sure
610 		 * that there are zero-mapped pages up to and including the
611 		 * last bss page.
612 		 */
613 		if (padzero(elf_bss)) {
614 			error = -EFAULT;
615 			goto out;
616 		}
617 
618 		/* What we have mapped so far */
619 		elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
620 
621 		/* Map the last of the bss segment */
622 		error = vm_brk(elf_bss, last_bss - elf_bss);
623 		if (BAD_ADDR(error))
624 			goto out;
625 	}
626 
627 	error = load_addr;
628 out:
629 	return error;
630 }
631 
632 /*
633  * These are the functions used to load ELF style executables and shared
634  * libraries.  There is no binary dependent code anywhere else.
635  */
636 
637 #define INTERPRETER_NONE 0
638 #define INTERPRETER_ELF 2
639 
640 #ifndef STACK_RND_MASK
641 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))	/* 8MB of VA */
642 #endif
643 
randomize_stack_top(unsigned long stack_top)644 static unsigned long randomize_stack_top(unsigned long stack_top)
645 {
646 	unsigned int random_variable = 0;
647 
648 	if ((current->flags & PF_RANDOMIZE) &&
649 		!(current->personality & ADDR_NO_RANDOMIZE)) {
650 		random_variable = get_random_int() & STACK_RND_MASK;
651 		random_variable <<= PAGE_SHIFT;
652 	}
653 #ifdef CONFIG_STACK_GROWSUP
654 	return PAGE_ALIGN(stack_top) + random_variable;
655 #else
656 	return PAGE_ALIGN(stack_top) - random_variable;
657 #endif
658 }
659 
load_elf_binary(struct linux_binprm * bprm)660 static int load_elf_binary(struct linux_binprm *bprm)
661 {
662 	struct file *interpreter = NULL; /* to shut gcc up */
663  	unsigned long load_addr = 0, load_bias = 0;
664 	int load_addr_set = 0;
665 	char * elf_interpreter = NULL;
666 	unsigned long error;
667 	struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
668 	unsigned long elf_bss, elf_brk;
669 	int retval, i;
670 	unsigned long elf_entry;
671 	unsigned long interp_load_addr = 0;
672 	unsigned long start_code, end_code, start_data, end_data;
673 	unsigned long reloc_func_desc __maybe_unused = 0;
674 	int executable_stack = EXSTACK_DEFAULT;
675 	unsigned long def_flags = 0;
676 	struct pt_regs *regs = current_pt_regs();
677 	struct {
678 		struct elfhdr elf_ex;
679 		struct elfhdr interp_elf_ex;
680 	} *loc;
681 	struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
682 
683 	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
684 	if (!loc) {
685 		retval = -ENOMEM;
686 		goto out_ret;
687 	}
688 
689 	/* Get the exec-header */
690 	loc->elf_ex = *((struct elfhdr *)bprm->buf);
691 
692 	retval = -ENOEXEC;
693 	/* First of all, some simple consistency checks */
694 	if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
695 		goto out;
696 
697 	if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
698 		goto out;
699 	if (!elf_check_arch(&loc->elf_ex))
700 		goto out;
701 	if (!bprm->file->f_op || !bprm->file->f_op->mmap)
702 		goto out;
703 
704 	elf_phdata = load_elf_phdrs(&loc->elf_ex, bprm->file);
705 	if (!elf_phdata)
706 		goto out;
707 
708 	elf_ppnt = elf_phdata;
709 	elf_bss = 0;
710 	elf_brk = 0;
711 
712 	start_code = ~0UL;
713 	end_code = 0;
714 	start_data = 0;
715 	end_data = 0;
716 
717 	for (i = 0; i < loc->elf_ex.e_phnum; i++) {
718 		if (elf_ppnt->p_type == PT_INTERP) {
719 			/* This is the program interpreter used for
720 			 * shared libraries - for now assume that this
721 			 * is an a.out format binary
722 			 */
723 			retval = -ENOEXEC;
724 			if (elf_ppnt->p_filesz > PATH_MAX ||
725 			    elf_ppnt->p_filesz < 2)
726 				goto out_free_ph;
727 
728 			retval = -ENOMEM;
729 			elf_interpreter = kmalloc(elf_ppnt->p_filesz,
730 						  GFP_KERNEL);
731 			if (!elf_interpreter)
732 				goto out_free_ph;
733 
734 			retval = kernel_read(bprm->file, elf_ppnt->p_offset,
735 					     elf_interpreter,
736 					     elf_ppnt->p_filesz);
737 			if (retval != elf_ppnt->p_filesz) {
738 				if (retval >= 0)
739 					retval = -EIO;
740 				goto out_free_interp;
741 			}
742 			/* make sure path is NULL terminated */
743 			retval = -ENOEXEC;
744 			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
745 				goto out_free_interp;
746 
747 			interpreter = open_exec(elf_interpreter);
748 			retval = PTR_ERR(interpreter);
749 			if (IS_ERR(interpreter))
750 				goto out_free_interp;
751 
752 			/*
753 			 * If the binary is not readable then enforce
754 			 * mm->dumpable = 0 regardless of the interpreter's
755 			 * permissions.
756 			 */
757 			would_dump(bprm, interpreter);
758 
759 			retval = kernel_read(interpreter, 0, bprm->buf,
760 					     BINPRM_BUF_SIZE);
761 			if (retval != BINPRM_BUF_SIZE) {
762 				if (retval >= 0)
763 					retval = -EIO;
764 				goto out_free_dentry;
765 			}
766 
767 			/* Get the exec headers */
768 			loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
769 			break;
770 		}
771 		elf_ppnt++;
772 	}
773 
774 	elf_ppnt = elf_phdata;
775 	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
776 		switch (elf_ppnt->p_type) {
777 		case PT_GNU_STACK:
778 			if (elf_ppnt->p_flags & PF_X)
779 				executable_stack = EXSTACK_ENABLE_X;
780 			else
781 				executable_stack = EXSTACK_DISABLE_X;
782 			break;
783 
784 		case PT_LOPROC ... PT_HIPROC:
785 			retval = arch_elf_pt_proc(&loc->elf_ex, elf_ppnt,
786 						  bprm->file, false,
787 						  &arch_state);
788 			if (retval)
789 				goto out_free_dentry;
790 			break;
791 		}
792 
793 	/* Some simple consistency checks for the interpreter */
794 	if (elf_interpreter) {
795 		retval = -ELIBBAD;
796 		/* Not an ELF interpreter */
797 		if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
798 			goto out_free_dentry;
799 		/* Verify the interpreter has a valid arch */
800 		if (!elf_check_arch(&loc->interp_elf_ex))
801 			goto out_free_dentry;
802 
803 		/* Load the interpreter program headers */
804 		interp_elf_phdata = load_elf_phdrs(&loc->interp_elf_ex,
805 						   interpreter);
806 		if (!interp_elf_phdata)
807 			goto out_free_dentry;
808 
809 		/* Pass PT_LOPROC..PT_HIPROC headers to arch code */
810 		elf_ppnt = interp_elf_phdata;
811 		for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++)
812 			switch (elf_ppnt->p_type) {
813 			case PT_LOPROC ... PT_HIPROC:
814 				retval = arch_elf_pt_proc(&loc->interp_elf_ex,
815 							  elf_ppnt, interpreter,
816 							  true, &arch_state);
817 				if (retval)
818 					goto out_free_dentry;
819 				break;
820 			}
821 	}
822 
823 	/*
824 	 * Allow arch code to reject the ELF at this point, whilst it's
825 	 * still possible to return an error to the code that invoked
826 	 * the exec syscall.
827 	 */
828 	retval = arch_check_elf(&loc->elf_ex, !!interpreter, &arch_state);
829 	if (retval)
830 		goto out_free_dentry;
831 
832 	/* Flush all traces of the currently running executable */
833 	retval = flush_old_exec(bprm);
834 	if (retval)
835 		goto out_free_dentry;
836 
837 	/* OK, This is the point of no return */
838 	current->mm->def_flags = def_flags;
839 
840 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
841 	   may depend on the personality.  */
842 	SET_PERSONALITY2(loc->elf_ex, &arch_state);
843 	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
844 		current->personality |= READ_IMPLIES_EXEC;
845 
846 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
847 		current->flags |= PF_RANDOMIZE;
848 
849 	setup_new_exec(bprm);
850 
851 	/* Do this so that we can load the interpreter, if need be.  We will
852 	   change some of these later */
853 	current->mm->free_area_cache = current->mm->mmap_base;
854 	current->mm->cached_hole_size = 0;
855 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
856 				 executable_stack);
857 	if (retval < 0) {
858 		send_sig(SIGKILL, current, 0);
859 		goto out_free_dentry;
860 	}
861 
862 	current->mm->start_stack = bprm->p;
863 
864 	/* Now we do a little grungy work by mmapping the ELF image into
865 	   the correct location in memory. */
866 	for(i = 0, elf_ppnt = elf_phdata;
867 	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
868 		int elf_prot = 0, elf_flags;
869 		unsigned long k, vaddr;
870 
871 		if (elf_ppnt->p_type != PT_LOAD)
872 			continue;
873 
874 		if (unlikely (elf_brk > elf_bss)) {
875 			unsigned long nbyte;
876 
877 			/* There was a PT_LOAD segment with p_memsz > p_filesz
878 			   before this one. Map anonymous pages, if needed,
879 			   and clear the area.  */
880 			retval = set_brk(elf_bss + load_bias,
881 					 elf_brk + load_bias);
882 			if (retval) {
883 				send_sig(SIGKILL, current, 0);
884 				goto out_free_dentry;
885 			}
886 			nbyte = ELF_PAGEOFFSET(elf_bss);
887 			if (nbyte) {
888 				nbyte = ELF_MIN_ALIGN - nbyte;
889 				if (nbyte > elf_brk - elf_bss)
890 					nbyte = elf_brk - elf_bss;
891 				if (clear_user((void __user *)elf_bss +
892 							load_bias, nbyte)) {
893 					/*
894 					 * This bss-zeroing can fail if the ELF
895 					 * file specifies odd protections. So
896 					 * we don't check the return value
897 					 */
898 				}
899 			}
900 		}
901 
902 		if (elf_ppnt->p_flags & PF_R)
903 			elf_prot |= PROT_READ;
904 		if (elf_ppnt->p_flags & PF_W)
905 			elf_prot |= PROT_WRITE;
906 		if (elf_ppnt->p_flags & PF_X)
907 			elf_prot |= PROT_EXEC;
908 
909 		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
910 
911 		vaddr = elf_ppnt->p_vaddr;
912 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
913 			elf_flags |= MAP_FIXED;
914 		} else if (loc->elf_ex.e_type == ET_DYN) {
915 			/* Try and get dynamic programs out of the way of the
916 			 * default mmap base, as well as whatever program they
917 			 * might try to exec.  This is because the brk will
918 			 * follow the loader, and is not movable.  */
919 #ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
920 			/* Memory randomization might have been switched off
921 			 * in runtime via sysctl or explicit setting of
922 			 * personality flags.
923 			 * If that is the case, retain the original non-zero
924 			 * load_bias value in order to establish proper
925 			 * non-randomized mappings.
926 			 */
927 			if (current->flags & PF_RANDOMIZE)
928 				load_bias = 0;
929 			else
930 				load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
931 #else
932 			load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
933 #endif
934 		}
935 
936 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
937 				elf_prot, elf_flags, 0);
938 		if (BAD_ADDR(error)) {
939 			send_sig(SIGKILL, current, 0);
940 			retval = IS_ERR((void *)error) ?
941 				PTR_ERR((void*)error) : -EINVAL;
942 			goto out_free_dentry;
943 		}
944 
945 		if (!load_addr_set) {
946 			load_addr_set = 1;
947 			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
948 			if (loc->elf_ex.e_type == ET_DYN) {
949 				load_bias += error -
950 				             ELF_PAGESTART(load_bias + vaddr);
951 				load_addr += load_bias;
952 				reloc_func_desc = load_bias;
953 			}
954 		}
955 		k = elf_ppnt->p_vaddr;
956 		if (k < start_code)
957 			start_code = k;
958 		if (start_data < k)
959 			start_data = k;
960 
961 		/*
962 		 * Check to see if the section's size will overflow the
963 		 * allowed task size. Note that p_filesz must always be
964 		 * <= p_memsz so it is only necessary to check p_memsz.
965 		 */
966 		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
967 		    elf_ppnt->p_memsz > TASK_SIZE ||
968 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
969 			/* set_brk can never work. Avoid overflows. */
970 			send_sig(SIGKILL, current, 0);
971 			retval = -EINVAL;
972 			goto out_free_dentry;
973 		}
974 
975 		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
976 
977 		if (k > elf_bss)
978 			elf_bss = k;
979 		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
980 			end_code = k;
981 		if (end_data < k)
982 			end_data = k;
983 		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
984 		if (k > elf_brk)
985 			elf_brk = k;
986 	}
987 
988 	loc->elf_ex.e_entry += load_bias;
989 	elf_bss += load_bias;
990 	elf_brk += load_bias;
991 	start_code += load_bias;
992 	end_code += load_bias;
993 	start_data += load_bias;
994 	end_data += load_bias;
995 
996 	/* Calling set_brk effectively mmaps the pages that we need
997 	 * for the bss and break sections.  We must do this before
998 	 * mapping in the interpreter, to make sure it doesn't wind
999 	 * up getting placed where the bss needs to go.
1000 	 */
1001 	retval = set_brk(elf_bss, elf_brk);
1002 	if (retval) {
1003 		send_sig(SIGKILL, current, 0);
1004 		goto out_free_dentry;
1005 	}
1006 	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
1007 		send_sig(SIGSEGV, current, 0);
1008 		retval = -EFAULT; /* Nobody gets to see this, but.. */
1009 		goto out_free_dentry;
1010 	}
1011 
1012 	if (elf_interpreter) {
1013 		unsigned long interp_map_addr = 0;
1014 
1015 		elf_entry = load_elf_interp(&loc->interp_elf_ex,
1016 					    interpreter,
1017 					    &interp_map_addr,
1018 					    load_bias, interp_elf_phdata);
1019 		if (!IS_ERR((void *)elf_entry)) {
1020 			/*
1021 			 * load_elf_interp() returns relocation
1022 			 * adjustment
1023 			 */
1024 			interp_load_addr = elf_entry;
1025 			elf_entry += loc->interp_elf_ex.e_entry;
1026 		}
1027 		if (BAD_ADDR(elf_entry)) {
1028 			force_sig(SIGSEGV, current);
1029 			retval = IS_ERR((void *)elf_entry) ?
1030 					(int)elf_entry : -EINVAL;
1031 			goto out_free_dentry;
1032 		}
1033 		reloc_func_desc = interp_load_addr;
1034 
1035 		allow_write_access(interpreter);
1036 		fput(interpreter);
1037 		kfree(elf_interpreter);
1038 	} else {
1039 		elf_entry = loc->elf_ex.e_entry;
1040 		if (BAD_ADDR(elf_entry)) {
1041 			force_sig(SIGSEGV, current);
1042 			retval = -EINVAL;
1043 			goto out_free_dentry;
1044 		}
1045 	}
1046 
1047 	kfree(interp_elf_phdata);
1048 	kfree(elf_phdata);
1049 
1050 	set_binfmt(&elf_format);
1051 
1052 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1053 	retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
1054 	if (retval < 0) {
1055 		send_sig(SIGKILL, current, 0);
1056 		goto out;
1057 	}
1058 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1059 
1060 	install_exec_creds(bprm);
1061 	retval = create_elf_tables(bprm, &loc->elf_ex,
1062 			  load_addr, interp_load_addr);
1063 	if (retval < 0) {
1064 		send_sig(SIGKILL, current, 0);
1065 		goto out;
1066 	}
1067 	/* N.B. passed_fileno might not be initialized? */
1068 	current->mm->end_code = end_code;
1069 	current->mm->start_code = start_code;
1070 	current->mm->start_data = start_data;
1071 	current->mm->end_data = end_data;
1072 	current->mm->start_stack = bprm->p;
1073 
1074 #ifdef arch_randomize_brk
1075 	if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1076 		current->mm->brk = current->mm->start_brk =
1077 			arch_randomize_brk(current->mm);
1078 #ifdef CONFIG_COMPAT_BRK
1079 		current->brk_randomized = 1;
1080 #endif
1081 	}
1082 #endif
1083 
1084 	if (current->personality & MMAP_PAGE_ZERO) {
1085 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1086 		   and some applications "depend" upon this behavior.
1087 		   Since we do not have the power to recompile these, we
1088 		   emulate the SVr4 behavior. Sigh. */
1089 		error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1090 				MAP_FIXED | MAP_PRIVATE, 0);
1091 	}
1092 
1093 #ifdef ELF_PLAT_INIT
1094 	/*
1095 	 * The ABI may specify that certain registers be set up in special
1096 	 * ways (on i386 %edx is the address of a DT_FINI function, for
1097 	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1098 	 * that the e_entry field is the address of the function descriptor
1099 	 * for the startup routine, rather than the address of the startup
1100 	 * routine itself.  This macro performs whatever initialization to
1101 	 * the regs structure is required as well as any relocations to the
1102 	 * function descriptor entries when executing dynamically links apps.
1103 	 */
1104 	ELF_PLAT_INIT(regs, reloc_func_desc);
1105 #endif
1106 
1107 	start_thread(regs, elf_entry, bprm->p);
1108 	retval = 0;
1109 out:
1110 	kfree(loc);
1111 out_ret:
1112 	return retval;
1113 
1114 	/* error cleanup */
1115 out_free_dentry:
1116 	kfree(interp_elf_phdata);
1117 	allow_write_access(interpreter);
1118 	if (interpreter)
1119 		fput(interpreter);
1120 out_free_interp:
1121 	kfree(elf_interpreter);
1122 out_free_ph:
1123 	kfree(elf_phdata);
1124 	goto out;
1125 }
1126 
1127 /* This is really simpleminded and specialized - we are loading an
1128    a.out library that is given an ELF header. */
load_elf_library(struct file * file)1129 static int load_elf_library(struct file *file)
1130 {
1131 	struct elf_phdr *elf_phdata;
1132 	struct elf_phdr *eppnt;
1133 	unsigned long elf_bss, bss, len;
1134 	int retval, error, i, j;
1135 	struct elfhdr elf_ex;
1136 
1137 	error = -ENOEXEC;
1138 	retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1139 	if (retval != sizeof(elf_ex))
1140 		goto out;
1141 
1142 	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1143 		goto out;
1144 
1145 	/* First of all, some simple consistency checks */
1146 	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1147 	    !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1148 		goto out;
1149 
1150 	/* Now read in all of the header information */
1151 
1152 	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1153 	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1154 
1155 	error = -ENOMEM;
1156 	elf_phdata = kmalloc(j, GFP_KERNEL);
1157 	if (!elf_phdata)
1158 		goto out;
1159 
1160 	eppnt = elf_phdata;
1161 	error = -ENOEXEC;
1162 	retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1163 	if (retval != j)
1164 		goto out_free_ph;
1165 
1166 	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1167 		if ((eppnt + i)->p_type == PT_LOAD)
1168 			j++;
1169 	if (j != 1)
1170 		goto out_free_ph;
1171 
1172 	while (eppnt->p_type != PT_LOAD)
1173 		eppnt++;
1174 
1175 	/* Now use mmap to map the library into memory. */
1176 	error = vm_mmap(file,
1177 			ELF_PAGESTART(eppnt->p_vaddr),
1178 			(eppnt->p_filesz +
1179 			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1180 			PROT_READ | PROT_WRITE | PROT_EXEC,
1181 			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1182 			(eppnt->p_offset -
1183 			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1184 	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1185 		goto out_free_ph;
1186 
1187 	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1188 	if (padzero(elf_bss)) {
1189 		error = -EFAULT;
1190 		goto out_free_ph;
1191 	}
1192 
1193 	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1194 			    ELF_MIN_ALIGN - 1);
1195 	bss = eppnt->p_memsz + eppnt->p_vaddr;
1196 	if (bss > len)
1197 		vm_brk(len, bss - len);
1198 	error = 0;
1199 
1200 out_free_ph:
1201 	kfree(elf_phdata);
1202 out:
1203 	return error;
1204 }
1205 
1206 #ifdef CONFIG_ELF_CORE
1207 /*
1208  * ELF core dumper
1209  *
1210  * Modelled on fs/exec.c:aout_core_dump()
1211  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1212  */
1213 
1214 /*
1215  * The purpose of always_dump_vma() is to make sure that special kernel mappings
1216  * that are useful for post-mortem analysis are included in every core dump.
1217  * In that way we ensure that the core dump is fully interpretable later
1218  * without matching up the same kernel and hardware config to see what PC values
1219  * meant. These special mappings include - vDSO, vsyscall, and other
1220  * architecture specific mappings
1221  */
always_dump_vma(struct vm_area_struct * vma)1222 static bool always_dump_vma(struct vm_area_struct *vma)
1223 {
1224 	/* Any vsyscall mappings? */
1225 	if (vma == get_gate_vma(vma->vm_mm))
1226 		return true;
1227 	/*
1228 	 * arch_vma_name() returns non-NULL for special architecture mappings,
1229 	 * such as vDSO sections.
1230 	 */
1231 	if (arch_vma_name(vma))
1232 		return true;
1233 
1234 	return false;
1235 }
1236 
1237 /*
1238  * Decide what to dump of a segment, part, all or none.
1239  */
vma_dump_size(struct vm_area_struct * vma,unsigned long mm_flags)1240 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1241 				   unsigned long mm_flags)
1242 {
1243 #define FILTER(type)	(mm_flags & (1UL << MMF_DUMP_##type))
1244 
1245 	/* always dump the vdso and vsyscall sections */
1246 	if (always_dump_vma(vma))
1247 		goto whole;
1248 
1249 	if (vma->vm_flags & VM_DONTDUMP)
1250 		return 0;
1251 
1252 	/* Hugetlb memory check */
1253 	if (vma->vm_flags & VM_HUGETLB) {
1254 		if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1255 			goto whole;
1256 		if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1257 			goto whole;
1258 		return 0;
1259 	}
1260 
1261 	/* Do not dump I/O mapped devices or special mappings */
1262 	if (vma->vm_flags & VM_IO)
1263 		return 0;
1264 
1265 	/* By default, dump shared memory if mapped from an anonymous file. */
1266 	if (vma->vm_flags & VM_SHARED) {
1267 		if (file_inode(vma->vm_file)->i_nlink == 0 ?
1268 		    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1269 			goto whole;
1270 		return 0;
1271 	}
1272 
1273 	/* Dump segments that have been written to.  */
1274 	if (vma->anon_vma && FILTER(ANON_PRIVATE))
1275 		goto whole;
1276 	if (vma->vm_file == NULL)
1277 		return 0;
1278 
1279 	if (FILTER(MAPPED_PRIVATE))
1280 		goto whole;
1281 
1282 	/*
1283 	 * If this looks like the beginning of a DSO or executable mapping,
1284 	 * check for an ELF header.  If we find one, dump the first page to
1285 	 * aid in determining what was mapped here.
1286 	 */
1287 	if (FILTER(ELF_HEADERS) &&
1288 	    vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1289 		u32 __user *header = (u32 __user *) vma->vm_start;
1290 		u32 word;
1291 		mm_segment_t fs = get_fs();
1292 		/*
1293 		 * Doing it this way gets the constant folded by GCC.
1294 		 */
1295 		union {
1296 			u32 cmp;
1297 			char elfmag[SELFMAG];
1298 		} magic;
1299 		BUILD_BUG_ON(SELFMAG != sizeof word);
1300 		magic.elfmag[EI_MAG0] = ELFMAG0;
1301 		magic.elfmag[EI_MAG1] = ELFMAG1;
1302 		magic.elfmag[EI_MAG2] = ELFMAG2;
1303 		magic.elfmag[EI_MAG3] = ELFMAG3;
1304 		/*
1305 		 * Switch to the user "segment" for get_user(),
1306 		 * then put back what elf_core_dump() had in place.
1307 		 */
1308 		set_fs(USER_DS);
1309 		if (unlikely(get_user(word, header)))
1310 			word = 0;
1311 		set_fs(fs);
1312 		if (word == magic.cmp)
1313 			return PAGE_SIZE;
1314 	}
1315 
1316 #undef	FILTER
1317 
1318 	return 0;
1319 
1320 whole:
1321 	return vma->vm_end - vma->vm_start;
1322 }
1323 
1324 /* An ELF note in memory */
1325 struct memelfnote
1326 {
1327 	const char *name;
1328 	int type;
1329 	unsigned int datasz;
1330 	void *data;
1331 };
1332 
notesize(struct memelfnote * en)1333 static int notesize(struct memelfnote *en)
1334 {
1335 	int sz;
1336 
1337 	sz = sizeof(struct elf_note);
1338 	sz += roundup(strlen(en->name) + 1, 4);
1339 	sz += roundup(en->datasz, 4);
1340 
1341 	return sz;
1342 }
1343 
1344 #define DUMP_WRITE(addr, nr, foffset)	\
1345 	do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1346 
alignfile(struct file * file,loff_t * foffset)1347 static int alignfile(struct file *file, loff_t *foffset)
1348 {
1349 	static const char buf[4] = { 0, };
1350 	DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1351 	return 1;
1352 }
1353 
writenote(struct memelfnote * men,struct file * file,loff_t * foffset)1354 static int writenote(struct memelfnote *men, struct file *file,
1355 			loff_t *foffset)
1356 {
1357 	struct elf_note en;
1358 	en.n_namesz = strlen(men->name) + 1;
1359 	en.n_descsz = men->datasz;
1360 	en.n_type = men->type;
1361 
1362 	DUMP_WRITE(&en, sizeof(en), foffset);
1363 	DUMP_WRITE(men->name, en.n_namesz, foffset);
1364 	if (!alignfile(file, foffset))
1365 		return 0;
1366 	DUMP_WRITE(men->data, men->datasz, foffset);
1367 	if (!alignfile(file, foffset))
1368 		return 0;
1369 
1370 	return 1;
1371 }
1372 #undef DUMP_WRITE
1373 
fill_elf_header(struct elfhdr * elf,int segs,u16 machine,u32 flags)1374 static void fill_elf_header(struct elfhdr *elf, int segs,
1375 			    u16 machine, u32 flags)
1376 {
1377 	memset(elf, 0, sizeof(*elf));
1378 
1379 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1380 	elf->e_ident[EI_CLASS] = ELF_CLASS;
1381 	elf->e_ident[EI_DATA] = ELF_DATA;
1382 	elf->e_ident[EI_VERSION] = EV_CURRENT;
1383 	elf->e_ident[EI_OSABI] = ELF_OSABI;
1384 
1385 	elf->e_type = ET_CORE;
1386 	elf->e_machine = machine;
1387 	elf->e_version = EV_CURRENT;
1388 	elf->e_phoff = sizeof(struct elfhdr);
1389 	elf->e_flags = flags;
1390 	elf->e_ehsize = sizeof(struct elfhdr);
1391 	elf->e_phentsize = sizeof(struct elf_phdr);
1392 	elf->e_phnum = segs;
1393 
1394 	return;
1395 }
1396 
fill_elf_note_phdr(struct elf_phdr * phdr,int sz,loff_t offset)1397 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1398 {
1399 	phdr->p_type = PT_NOTE;
1400 	phdr->p_offset = offset;
1401 	phdr->p_vaddr = 0;
1402 	phdr->p_paddr = 0;
1403 	phdr->p_filesz = sz;
1404 	phdr->p_memsz = 0;
1405 	phdr->p_flags = 0;
1406 	phdr->p_align = 0;
1407 	return;
1408 }
1409 
fill_note(struct memelfnote * note,const char * name,int type,unsigned int sz,void * data)1410 static void fill_note(struct memelfnote *note, const char *name, int type,
1411 		unsigned int sz, void *data)
1412 {
1413 	note->name = name;
1414 	note->type = type;
1415 	note->datasz = sz;
1416 	note->data = data;
1417 	return;
1418 }
1419 
1420 /*
1421  * fill up all the fields in prstatus from the given task struct, except
1422  * registers which need to be filled up separately.
1423  */
fill_prstatus(struct elf_prstatus * prstatus,struct task_struct * p,long signr)1424 static void fill_prstatus(struct elf_prstatus *prstatus,
1425 		struct task_struct *p, long signr)
1426 {
1427 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1428 	prstatus->pr_sigpend = p->pending.signal.sig[0];
1429 	prstatus->pr_sighold = p->blocked.sig[0];
1430 	rcu_read_lock();
1431 	prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1432 	rcu_read_unlock();
1433 	prstatus->pr_pid = task_pid_vnr(p);
1434 	prstatus->pr_pgrp = task_pgrp_vnr(p);
1435 	prstatus->pr_sid = task_session_vnr(p);
1436 	if (thread_group_leader(p)) {
1437 		struct task_cputime cputime;
1438 
1439 		/*
1440 		 * This is the record for the group leader.  It shows the
1441 		 * group-wide total, not its individual thread total.
1442 		 */
1443 		thread_group_cputime(p, &cputime);
1444 		cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1445 		cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1446 	} else {
1447 		cputime_t utime, stime;
1448 
1449 		task_cputime(p, &utime, &stime);
1450 		cputime_to_timeval(utime, &prstatus->pr_utime);
1451 		cputime_to_timeval(stime, &prstatus->pr_stime);
1452 	}
1453 	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1454 	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1455 }
1456 
fill_psinfo(struct elf_prpsinfo * psinfo,struct task_struct * p,struct mm_struct * mm)1457 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1458 		       struct mm_struct *mm)
1459 {
1460 	const struct cred *cred;
1461 	unsigned int i, len;
1462 
1463 	/* first copy the parameters from user space */
1464 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1465 
1466 	len = mm->arg_end - mm->arg_start;
1467 	if (len >= ELF_PRARGSZ)
1468 		len = ELF_PRARGSZ-1;
1469 	if (copy_from_user(&psinfo->pr_psargs,
1470 		           (const char __user *)mm->arg_start, len))
1471 		return -EFAULT;
1472 	for(i = 0; i < len; i++)
1473 		if (psinfo->pr_psargs[i] == 0)
1474 			psinfo->pr_psargs[i] = ' ';
1475 	psinfo->pr_psargs[len] = 0;
1476 
1477 	rcu_read_lock();
1478 	psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1479 	rcu_read_unlock();
1480 	psinfo->pr_pid = task_pid_vnr(p);
1481 	psinfo->pr_pgrp = task_pgrp_vnr(p);
1482 	psinfo->pr_sid = task_session_vnr(p);
1483 
1484 	i = p->state ? ffz(~p->state) + 1 : 0;
1485 	psinfo->pr_state = i;
1486 	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1487 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1488 	psinfo->pr_nice = task_nice(p);
1489 	psinfo->pr_flag = p->flags;
1490 	rcu_read_lock();
1491 	cred = __task_cred(p);
1492 	SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1493 	SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1494 	rcu_read_unlock();
1495 	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1496 
1497 	return 0;
1498 }
1499 
fill_auxv_note(struct memelfnote * note,struct mm_struct * mm)1500 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1501 {
1502 	elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1503 	int i = 0;
1504 	do
1505 		i += 2;
1506 	while (auxv[i - 2] != AT_NULL);
1507 	fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1508 }
1509 
fill_siginfo_note(struct memelfnote * note,user_siginfo_t * csigdata,siginfo_t * siginfo)1510 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1511 		siginfo_t *siginfo)
1512 {
1513 	mm_segment_t old_fs = get_fs();
1514 	set_fs(KERNEL_DS);
1515 	copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1516 	set_fs(old_fs);
1517 	fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1518 }
1519 
1520 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1521 /*
1522  * Format of NT_FILE note:
1523  *
1524  * long count     -- how many files are mapped
1525  * long page_size -- units for file_ofs
1526  * array of [COUNT] elements of
1527  *   long start
1528  *   long end
1529  *   long file_ofs
1530  * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1531  */
fill_files_note(struct memelfnote * note)1532 static void fill_files_note(struct memelfnote *note)
1533 {
1534 	struct vm_area_struct *vma;
1535 	unsigned count, size, names_ofs, remaining, n;
1536 	user_long_t *data;
1537 	user_long_t *start_end_ofs;
1538 	char *name_base, *name_curpos;
1539 
1540 	/* *Estimated* file count and total data size needed */
1541 	count = current->mm->map_count;
1542 	size = count * 64;
1543 
1544 	names_ofs = (2 + 3 * count) * sizeof(data[0]);
1545  alloc:
1546 	if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1547 		goto err;
1548 	size = round_up(size, PAGE_SIZE);
1549 	data = vmalloc(size);
1550 	if (!data)
1551 		goto err;
1552 
1553 	start_end_ofs = data + 2;
1554 	name_base = name_curpos = ((char *)data) + names_ofs;
1555 	remaining = size - names_ofs;
1556 	count = 0;
1557 	for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1558 		struct file *file;
1559 		const char *filename;
1560 
1561 		file = vma->vm_file;
1562 		if (!file)
1563 			continue;
1564 		filename = d_path(&file->f_path, name_curpos, remaining);
1565 		if (IS_ERR(filename)) {
1566 			if (PTR_ERR(filename) == -ENAMETOOLONG) {
1567 				vfree(data);
1568 				size = size * 5 / 4;
1569 				goto alloc;
1570 			}
1571 			continue;
1572 		}
1573 
1574 		/* d_path() fills at the end, move name down */
1575 		/* n = strlen(filename) + 1: */
1576 		n = (name_curpos + remaining) - filename;
1577 		remaining = filename - name_curpos;
1578 		memmove(name_curpos, filename, n);
1579 		name_curpos += n;
1580 
1581 		*start_end_ofs++ = vma->vm_start;
1582 		*start_end_ofs++ = vma->vm_end;
1583 		*start_end_ofs++ = vma->vm_pgoff;
1584 		count++;
1585 	}
1586 
1587 	/* Now we know exact count of files, can store it */
1588 	data[0] = count;
1589 	data[1] = PAGE_SIZE;
1590 	/*
1591 	 * Count usually is less than current->mm->map_count,
1592 	 * we need to move filenames down.
1593 	 */
1594 	n = current->mm->map_count - count;
1595 	if (n != 0) {
1596 		unsigned shift_bytes = n * 3 * sizeof(data[0]);
1597 		memmove(name_base - shift_bytes, name_base,
1598 			name_curpos - name_base);
1599 		name_curpos -= shift_bytes;
1600 	}
1601 
1602 	size = name_curpos - (char *)data;
1603 	fill_note(note, "CORE", NT_FILE, size, data);
1604  err: ;
1605 }
1606 
1607 #ifdef CORE_DUMP_USE_REGSET
1608 #include <linux/regset.h>
1609 
1610 struct elf_thread_core_info {
1611 	struct elf_thread_core_info *next;
1612 	struct task_struct *task;
1613 	struct elf_prstatus prstatus;
1614 	struct memelfnote notes[0];
1615 };
1616 
1617 struct elf_note_info {
1618 	struct elf_thread_core_info *thread;
1619 	struct memelfnote psinfo;
1620 	struct memelfnote signote;
1621 	struct memelfnote auxv;
1622 	struct memelfnote files;
1623 	user_siginfo_t csigdata;
1624 	size_t size;
1625 	int thread_notes;
1626 };
1627 
1628 /*
1629  * When a regset has a writeback hook, we call it on each thread before
1630  * dumping user memory.  On register window machines, this makes sure the
1631  * user memory backing the register data is up to date before we read it.
1632  */
do_thread_regset_writeback(struct task_struct * task,const struct user_regset * regset)1633 static void do_thread_regset_writeback(struct task_struct *task,
1634 				       const struct user_regset *regset)
1635 {
1636 	if (regset->writeback)
1637 		regset->writeback(task, regset, 1);
1638 }
1639 
1640 #ifndef PR_REG_SIZE
1641 #define PR_REG_SIZE(S) sizeof(S)
1642 #endif
1643 
1644 #ifndef PRSTATUS_SIZE
1645 #define PRSTATUS_SIZE(S) sizeof(S)
1646 #endif
1647 
1648 #ifndef PR_REG_PTR
1649 #define PR_REG_PTR(S) (&((S)->pr_reg))
1650 #endif
1651 
1652 #ifndef SET_PR_FPVALID
1653 #define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1654 #endif
1655 
fill_thread_core_info(struct elf_thread_core_info * t,const struct user_regset_view * view,long signr,size_t * total)1656 static int fill_thread_core_info(struct elf_thread_core_info *t,
1657 				 const struct user_regset_view *view,
1658 				 long signr, size_t *total)
1659 {
1660 	unsigned int i;
1661 
1662 	/*
1663 	 * NT_PRSTATUS is the one special case, because the regset data
1664 	 * goes into the pr_reg field inside the note contents, rather
1665 	 * than being the whole note contents.  We fill the reset in here.
1666 	 * We assume that regset 0 is NT_PRSTATUS.
1667 	 */
1668 	fill_prstatus(&t->prstatus, t->task, signr);
1669 	(void) view->regsets[0].get(t->task, &view->regsets[0],
1670 				    0, PR_REG_SIZE(t->prstatus.pr_reg),
1671 				    PR_REG_PTR(&t->prstatus), NULL);
1672 
1673 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1674 		  PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1675 	*total += notesize(&t->notes[0]);
1676 
1677 	do_thread_regset_writeback(t->task, &view->regsets[0]);
1678 
1679 	/*
1680 	 * Each other regset might generate a note too.  For each regset
1681 	 * that has no core_note_type or is inactive, we leave t->notes[i]
1682 	 * all zero and we'll know to skip writing it later.
1683 	 */
1684 	for (i = 1; i < view->n; ++i) {
1685 		const struct user_regset *regset = &view->regsets[i];
1686 		do_thread_regset_writeback(t->task, regset);
1687 		if (regset->core_note_type && regset->get &&
1688 		    (!regset->active || regset->active(t->task, regset))) {
1689 			int ret;
1690 			size_t size = regset->n * regset->size;
1691 			void *data = kmalloc(size, GFP_KERNEL);
1692 			if (unlikely(!data))
1693 				return 0;
1694 			ret = regset->get(t->task, regset,
1695 					  0, size, data, NULL);
1696 			if (unlikely(ret))
1697 				kfree(data);
1698 			else {
1699 				if (regset->core_note_type != NT_PRFPREG)
1700 					fill_note(&t->notes[i], "LINUX",
1701 						  regset->core_note_type,
1702 						  size, data);
1703 				else {
1704 					SET_PR_FPVALID(&t->prstatus, 1);
1705 					fill_note(&t->notes[i], "CORE",
1706 						  NT_PRFPREG, size, data);
1707 				}
1708 				*total += notesize(&t->notes[i]);
1709 			}
1710 		}
1711 	}
1712 
1713 	return 1;
1714 }
1715 
fill_note_info(struct elfhdr * elf,int phdrs,struct elf_note_info * info,siginfo_t * siginfo,struct pt_regs * regs)1716 static int fill_note_info(struct elfhdr *elf, int phdrs,
1717 			  struct elf_note_info *info,
1718 			  siginfo_t *siginfo, struct pt_regs *regs)
1719 {
1720 	struct task_struct *dump_task = current;
1721 	const struct user_regset_view *view = task_user_regset_view(dump_task);
1722 	struct elf_thread_core_info *t;
1723 	struct elf_prpsinfo *psinfo;
1724 	struct core_thread *ct;
1725 	unsigned int i;
1726 
1727 	info->size = 0;
1728 	info->thread = NULL;
1729 
1730 	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1731 	if (psinfo == NULL) {
1732 		info->psinfo.data = NULL; /* So we don't free this wrongly */
1733 		return 0;
1734 	}
1735 
1736 	fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1737 
1738 	/*
1739 	 * Figure out how many notes we're going to need for each thread.
1740 	 */
1741 	info->thread_notes = 0;
1742 	for (i = 0; i < view->n; ++i)
1743 		if (view->regsets[i].core_note_type != 0)
1744 			++info->thread_notes;
1745 
1746 	/*
1747 	 * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1748 	 * since it is our one special case.
1749 	 */
1750 	if (unlikely(info->thread_notes == 0) ||
1751 	    unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1752 		WARN_ON(1);
1753 		return 0;
1754 	}
1755 
1756 	/*
1757 	 * Initialize the ELF file header.
1758 	 */
1759 	fill_elf_header(elf, phdrs,
1760 			view->e_machine, view->e_flags);
1761 
1762 	/*
1763 	 * Allocate a structure for each thread.
1764 	 */
1765 	for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1766 		t = kzalloc(offsetof(struct elf_thread_core_info,
1767 				     notes[info->thread_notes]),
1768 			    GFP_KERNEL);
1769 		if (unlikely(!t))
1770 			return 0;
1771 
1772 		t->task = ct->task;
1773 		if (ct->task == dump_task || !info->thread) {
1774 			t->next = info->thread;
1775 			info->thread = t;
1776 		} else {
1777 			/*
1778 			 * Make sure to keep the original task at
1779 			 * the head of the list.
1780 			 */
1781 			t->next = info->thread->next;
1782 			info->thread->next = t;
1783 		}
1784 	}
1785 
1786 	/*
1787 	 * Now fill in each thread's information.
1788 	 */
1789 	for (t = info->thread; t != NULL; t = t->next)
1790 		if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1791 			return 0;
1792 
1793 	/*
1794 	 * Fill in the two process-wide notes.
1795 	 */
1796 	fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1797 	info->size += notesize(&info->psinfo);
1798 
1799 	fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1800 	info->size += notesize(&info->signote);
1801 
1802 	fill_auxv_note(&info->auxv, current->mm);
1803 	info->size += notesize(&info->auxv);
1804 
1805 	fill_files_note(&info->files);
1806 	info->size += notesize(&info->files);
1807 
1808 	return 1;
1809 }
1810 
get_note_info_size(struct elf_note_info * info)1811 static size_t get_note_info_size(struct elf_note_info *info)
1812 {
1813 	return info->size;
1814 }
1815 
1816 /*
1817  * Write all the notes for each thread.  When writing the first thread, the
1818  * process-wide notes are interleaved after the first thread-specific note.
1819  */
write_note_info(struct elf_note_info * info,struct file * file,loff_t * foffset)1820 static int write_note_info(struct elf_note_info *info,
1821 			   struct file *file, loff_t *foffset)
1822 {
1823 	bool first = 1;
1824 	struct elf_thread_core_info *t = info->thread;
1825 
1826 	do {
1827 		int i;
1828 
1829 		if (!writenote(&t->notes[0], file, foffset))
1830 			return 0;
1831 
1832 		if (first && !writenote(&info->psinfo, file, foffset))
1833 			return 0;
1834 		if (first && !writenote(&info->signote, file, foffset))
1835 			return 0;
1836 		if (first && !writenote(&info->auxv, file, foffset))
1837 			return 0;
1838 		if (first && !writenote(&info->files, file, foffset))
1839 			return 0;
1840 
1841 		for (i = 1; i < info->thread_notes; ++i)
1842 			if (t->notes[i].data &&
1843 			    !writenote(&t->notes[i], file, foffset))
1844 				return 0;
1845 
1846 		first = 0;
1847 		t = t->next;
1848 	} while (t);
1849 
1850 	return 1;
1851 }
1852 
free_note_info(struct elf_note_info * info)1853 static void free_note_info(struct elf_note_info *info)
1854 {
1855 	struct elf_thread_core_info *threads = info->thread;
1856 	while (threads) {
1857 		unsigned int i;
1858 		struct elf_thread_core_info *t = threads;
1859 		threads = t->next;
1860 		WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1861 		for (i = 1; i < info->thread_notes; ++i)
1862 			kfree(t->notes[i].data);
1863 		kfree(t);
1864 	}
1865 	kfree(info->psinfo.data);
1866 	vfree(info->files.data);
1867 }
1868 
1869 #else
1870 
1871 /* Here is the structure in which status of each thread is captured. */
1872 struct elf_thread_status
1873 {
1874 	struct list_head list;
1875 	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1876 	elf_fpregset_t fpu;		/* NT_PRFPREG */
1877 	struct task_struct *thread;
1878 #ifdef ELF_CORE_COPY_XFPREGS
1879 	elf_fpxregset_t xfpu;		/* ELF_CORE_XFPREG_TYPE */
1880 #endif
1881 	struct memelfnote notes[3];
1882 	int num_notes;
1883 };
1884 
1885 /*
1886  * In order to add the specific thread information for the elf file format,
1887  * we need to keep a linked list of every threads pr_status and then create
1888  * a single section for them in the final core file.
1889  */
elf_dump_thread_status(long signr,struct elf_thread_status * t)1890 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1891 {
1892 	int sz = 0;
1893 	struct task_struct *p = t->thread;
1894 	t->num_notes = 0;
1895 
1896 	fill_prstatus(&t->prstatus, p, signr);
1897 	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1898 
1899 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1900 		  &(t->prstatus));
1901 	t->num_notes++;
1902 	sz += notesize(&t->notes[0]);
1903 
1904 	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1905 								&t->fpu))) {
1906 		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1907 			  &(t->fpu));
1908 		t->num_notes++;
1909 		sz += notesize(&t->notes[1]);
1910 	}
1911 
1912 #ifdef ELF_CORE_COPY_XFPREGS
1913 	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1914 		fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1915 			  sizeof(t->xfpu), &t->xfpu);
1916 		t->num_notes++;
1917 		sz += notesize(&t->notes[2]);
1918 	}
1919 #endif
1920 	return sz;
1921 }
1922 
1923 struct elf_note_info {
1924 	struct memelfnote *notes;
1925 	struct elf_prstatus *prstatus;	/* NT_PRSTATUS */
1926 	struct elf_prpsinfo *psinfo;	/* NT_PRPSINFO */
1927 	struct list_head thread_list;
1928 	elf_fpregset_t *fpu;
1929 #ifdef ELF_CORE_COPY_XFPREGS
1930 	elf_fpxregset_t *xfpu;
1931 #endif
1932 	user_siginfo_t csigdata;
1933 	int thread_status_size;
1934 	int numnote;
1935 };
1936 
elf_note_info_init(struct elf_note_info * info)1937 static int elf_note_info_init(struct elf_note_info *info)
1938 {
1939 	memset(info, 0, sizeof(*info));
1940 	INIT_LIST_HEAD(&info->thread_list);
1941 
1942 	/* Allocate space for ELF notes */
1943 	info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1944 	if (!info->notes)
1945 		return 0;
1946 	info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1947 	if (!info->psinfo)
1948 		return 0;
1949 	info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1950 	if (!info->prstatus)
1951 		return 0;
1952 	info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1953 	if (!info->fpu)
1954 		return 0;
1955 #ifdef ELF_CORE_COPY_XFPREGS
1956 	info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1957 	if (!info->xfpu)
1958 		return 0;
1959 #endif
1960 	return 1;
1961 }
1962 
fill_note_info(struct elfhdr * elf,int phdrs,struct elf_note_info * info,siginfo_t * siginfo,struct pt_regs * regs)1963 static int fill_note_info(struct elfhdr *elf, int phdrs,
1964 			  struct elf_note_info *info,
1965 			  siginfo_t *siginfo, struct pt_regs *regs)
1966 {
1967 	struct list_head *t;
1968 
1969 	if (!elf_note_info_init(info))
1970 		return 0;
1971 
1972 	if (siginfo->si_signo) {
1973 		struct core_thread *ct;
1974 		struct elf_thread_status *ets;
1975 
1976 		for (ct = current->mm->core_state->dumper.next;
1977 						ct; ct = ct->next) {
1978 			ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1979 			if (!ets)
1980 				return 0;
1981 
1982 			ets->thread = ct->task;
1983 			list_add(&ets->list, &info->thread_list);
1984 		}
1985 
1986 		list_for_each(t, &info->thread_list) {
1987 			int sz;
1988 
1989 			ets = list_entry(t, struct elf_thread_status, list);
1990 			sz = elf_dump_thread_status(siginfo->si_signo, ets);
1991 			info->thread_status_size += sz;
1992 		}
1993 	}
1994 	/* now collect the dump for the current */
1995 	memset(info->prstatus, 0, sizeof(*info->prstatus));
1996 	fill_prstatus(info->prstatus, current, siginfo->si_signo);
1997 	elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1998 
1999 	/* Set up header */
2000 	fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
2001 
2002 	/*
2003 	 * Set up the notes in similar form to SVR4 core dumps made
2004 	 * with info from their /proc.
2005 	 */
2006 
2007 	fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
2008 		  sizeof(*info->prstatus), info->prstatus);
2009 	fill_psinfo(info->psinfo, current->group_leader, current->mm);
2010 	fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
2011 		  sizeof(*info->psinfo), info->psinfo);
2012 
2013 	fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
2014 	fill_auxv_note(info->notes + 3, current->mm);
2015 	fill_files_note(info->notes + 4);
2016 
2017 	info->numnote = 5;
2018 
2019 	/* Try to dump the FPU. */
2020 	info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
2021 							       info->fpu);
2022 	if (info->prstatus->pr_fpvalid)
2023 		fill_note(info->notes + info->numnote++,
2024 			  "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
2025 #ifdef ELF_CORE_COPY_XFPREGS
2026 	if (elf_core_copy_task_xfpregs(current, info->xfpu))
2027 		fill_note(info->notes + info->numnote++,
2028 			  "LINUX", ELF_CORE_XFPREG_TYPE,
2029 			  sizeof(*info->xfpu), info->xfpu);
2030 #endif
2031 
2032 	return 1;
2033 }
2034 
get_note_info_size(struct elf_note_info * info)2035 static size_t get_note_info_size(struct elf_note_info *info)
2036 {
2037 	int sz = 0;
2038 	int i;
2039 
2040 	for (i = 0; i < info->numnote; i++)
2041 		sz += notesize(info->notes + i);
2042 
2043 	sz += info->thread_status_size;
2044 
2045 	return sz;
2046 }
2047 
write_note_info(struct elf_note_info * info,struct file * file,loff_t * foffset)2048 static int write_note_info(struct elf_note_info *info,
2049 			   struct file *file, loff_t *foffset)
2050 {
2051 	int i;
2052 	struct list_head *t;
2053 
2054 	for (i = 0; i < info->numnote; i++)
2055 		if (!writenote(info->notes + i, file, foffset))
2056 			return 0;
2057 
2058 	/* write out the thread status notes section */
2059 	list_for_each(t, &info->thread_list) {
2060 		struct elf_thread_status *tmp =
2061 				list_entry(t, struct elf_thread_status, list);
2062 
2063 		for (i = 0; i < tmp->num_notes; i++)
2064 			if (!writenote(&tmp->notes[i], file, foffset))
2065 				return 0;
2066 	}
2067 
2068 	return 1;
2069 }
2070 
free_note_info(struct elf_note_info * info)2071 static void free_note_info(struct elf_note_info *info)
2072 {
2073 	while (!list_empty(&info->thread_list)) {
2074 		struct list_head *tmp = info->thread_list.next;
2075 		list_del(tmp);
2076 		kfree(list_entry(tmp, struct elf_thread_status, list));
2077 	}
2078 
2079 	/* Free data allocated by fill_files_note(): */
2080 	vfree(info->notes[4].data);
2081 
2082 	kfree(info->prstatus);
2083 	kfree(info->psinfo);
2084 	kfree(info->notes);
2085 	kfree(info->fpu);
2086 #ifdef ELF_CORE_COPY_XFPREGS
2087 	kfree(info->xfpu);
2088 #endif
2089 }
2090 
2091 #endif
2092 
first_vma(struct task_struct * tsk,struct vm_area_struct * gate_vma)2093 static struct vm_area_struct *first_vma(struct task_struct *tsk,
2094 					struct vm_area_struct *gate_vma)
2095 {
2096 	struct vm_area_struct *ret = tsk->mm->mmap;
2097 
2098 	if (ret)
2099 		return ret;
2100 	return gate_vma;
2101 }
2102 /*
2103  * Helper function for iterating across a vma list.  It ensures that the caller
2104  * will visit `gate_vma' prior to terminating the search.
2105  */
next_vma(struct vm_area_struct * this_vma,struct vm_area_struct * gate_vma)2106 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2107 					struct vm_area_struct *gate_vma)
2108 {
2109 	struct vm_area_struct *ret;
2110 
2111 	ret = this_vma->vm_next;
2112 	if (ret)
2113 		return ret;
2114 	if (this_vma == gate_vma)
2115 		return NULL;
2116 	return gate_vma;
2117 }
2118 
fill_extnum_info(struct elfhdr * elf,struct elf_shdr * shdr4extnum,elf_addr_t e_shoff,int segs)2119 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2120 			     elf_addr_t e_shoff, int segs)
2121 {
2122 	elf->e_shoff = e_shoff;
2123 	elf->e_shentsize = sizeof(*shdr4extnum);
2124 	elf->e_shnum = 1;
2125 	elf->e_shstrndx = SHN_UNDEF;
2126 
2127 	memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2128 
2129 	shdr4extnum->sh_type = SHT_NULL;
2130 	shdr4extnum->sh_size = elf->e_shnum;
2131 	shdr4extnum->sh_link = elf->e_shstrndx;
2132 	shdr4extnum->sh_info = segs;
2133 }
2134 
elf_core_vma_data_size(struct vm_area_struct * gate_vma,unsigned long mm_flags)2135 static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
2136 				     unsigned long mm_flags)
2137 {
2138 	struct vm_area_struct *vma;
2139 	size_t size = 0;
2140 
2141 	for (vma = first_vma(current, gate_vma); vma != NULL;
2142 	     vma = next_vma(vma, gate_vma))
2143 		size += vma_dump_size(vma, mm_flags);
2144 	return size;
2145 }
2146 
2147 /*
2148  * Actual dumper
2149  *
2150  * This is a two-pass process; first we find the offsets of the bits,
2151  * and then they are actually written out.  If we run out of core limit
2152  * we just truncate.
2153  */
elf_core_dump(struct coredump_params * cprm)2154 static int elf_core_dump(struct coredump_params *cprm)
2155 {
2156 	int has_dumped = 0;
2157 	mm_segment_t fs;
2158 	int segs;
2159 	size_t size = 0;
2160 	struct vm_area_struct *vma, *gate_vma;
2161 	struct elfhdr *elf = NULL;
2162 	loff_t offset = 0, dataoff, foffset;
2163 	struct elf_note_info info;
2164 	struct elf_phdr *phdr4note = NULL;
2165 	struct elf_shdr *shdr4extnum = NULL;
2166 	Elf_Half e_phnum;
2167 	elf_addr_t e_shoff;
2168 
2169 	/*
2170 	 * We no longer stop all VM operations.
2171 	 *
2172 	 * This is because those proceses that could possibly change map_count
2173 	 * or the mmap / vma pages are now blocked in do_exit on current
2174 	 * finishing this core dump.
2175 	 *
2176 	 * Only ptrace can touch these memory addresses, but it doesn't change
2177 	 * the map_count or the pages allocated. So no possibility of crashing
2178 	 * exists while dumping the mm->vm_next areas to the core file.
2179 	 */
2180 
2181 	/* alloc memory for large data structures: too large to be on stack */
2182 	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2183 	if (!elf)
2184 		goto out;
2185 	/*
2186 	 * The number of segs are recored into ELF header as 16bit value.
2187 	 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2188 	 */
2189 	segs = current->mm->map_count;
2190 	segs += elf_core_extra_phdrs();
2191 
2192 	gate_vma = get_gate_vma(current->mm);
2193 	if (gate_vma != NULL)
2194 		segs++;
2195 
2196 	/* for notes section */
2197 	segs++;
2198 
2199 	/* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2200 	 * this, kernel supports extended numbering. Have a look at
2201 	 * include/linux/elf.h for further information. */
2202 	e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2203 
2204 	/*
2205 	 * Collect all the non-memory information about the process for the
2206 	 * notes.  This also sets up the file header.
2207 	 */
2208 	if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2209 		goto cleanup;
2210 
2211 	has_dumped = 1;
2212 
2213 	fs = get_fs();
2214 	set_fs(KERNEL_DS);
2215 
2216 	offset += sizeof(*elf);				/* Elf header */
2217 	offset += segs * sizeof(struct elf_phdr);	/* Program headers */
2218 	foffset = offset;
2219 
2220 	/* Write notes phdr entry */
2221 	{
2222 		size_t sz = get_note_info_size(&info);
2223 
2224 		sz += elf_coredump_extra_notes_size();
2225 
2226 		phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2227 		if (!phdr4note)
2228 			goto end_coredump;
2229 
2230 		fill_elf_note_phdr(phdr4note, sz, offset);
2231 		offset += sz;
2232 	}
2233 
2234 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2235 
2236 	offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
2237 	offset += elf_core_extra_data_size();
2238 	e_shoff = offset;
2239 
2240 	if (e_phnum == PN_XNUM) {
2241 		shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2242 		if (!shdr4extnum)
2243 			goto end_coredump;
2244 		fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2245 	}
2246 
2247 	offset = dataoff;
2248 
2249 	size += sizeof(*elf);
2250 	if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
2251 		goto end_coredump;
2252 
2253 	size += sizeof(*phdr4note);
2254 	if (size > cprm->limit
2255 	    || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
2256 		goto end_coredump;
2257 
2258 	/* Write program headers for segments dump */
2259 	for (vma = first_vma(current, gate_vma); vma != NULL;
2260 			vma = next_vma(vma, gate_vma)) {
2261 		struct elf_phdr phdr;
2262 
2263 		phdr.p_type = PT_LOAD;
2264 		phdr.p_offset = offset;
2265 		phdr.p_vaddr = vma->vm_start;
2266 		phdr.p_paddr = 0;
2267 		phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
2268 		phdr.p_memsz = vma->vm_end - vma->vm_start;
2269 		offset += phdr.p_filesz;
2270 		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2271 		if (vma->vm_flags & VM_WRITE)
2272 			phdr.p_flags |= PF_W;
2273 		if (vma->vm_flags & VM_EXEC)
2274 			phdr.p_flags |= PF_X;
2275 		phdr.p_align = ELF_EXEC_PAGESIZE;
2276 
2277 		size += sizeof(phdr);
2278 		if (size > cprm->limit
2279 		    || !dump_write(cprm->file, &phdr, sizeof(phdr)))
2280 			goto end_coredump;
2281 	}
2282 
2283 	if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
2284 		goto end_coredump;
2285 
2286  	/* write out the notes section */
2287 	if (!write_note_info(&info, cprm->file, &foffset))
2288 		goto end_coredump;
2289 
2290 	if (elf_coredump_extra_notes_write(cprm->file, &foffset))
2291 		goto end_coredump;
2292 
2293 	/* Align to page */
2294 	if (!dump_seek(cprm->file, dataoff - foffset))
2295 		goto end_coredump;
2296 
2297 	for (vma = first_vma(current, gate_vma); vma != NULL;
2298 			vma = next_vma(vma, gate_vma)) {
2299 		unsigned long addr;
2300 		unsigned long end;
2301 
2302 		end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
2303 
2304 		for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2305 			struct page *page;
2306 			int stop;
2307 
2308 			page = get_dump_page(addr);
2309 			if (page) {
2310 				void *kaddr = kmap(page);
2311 				stop = ((size += PAGE_SIZE) > cprm->limit) ||
2312 					!dump_write(cprm->file, kaddr,
2313 						    PAGE_SIZE);
2314 				kunmap(page);
2315 				page_cache_release(page);
2316 			} else
2317 				stop = !dump_seek(cprm->file, PAGE_SIZE);
2318 			if (stop)
2319 				goto end_coredump;
2320 		}
2321 	}
2322 
2323 	if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
2324 		goto end_coredump;
2325 
2326 	if (e_phnum == PN_XNUM) {
2327 		size += sizeof(*shdr4extnum);
2328 		if (size > cprm->limit
2329 		    || !dump_write(cprm->file, shdr4extnum,
2330 				   sizeof(*shdr4extnum)))
2331 			goto end_coredump;
2332 	}
2333 
2334 end_coredump:
2335 	set_fs(fs);
2336 
2337 cleanup:
2338 	free_note_info(&info);
2339 	kfree(shdr4extnum);
2340 	kfree(phdr4note);
2341 	kfree(elf);
2342 out:
2343 	return has_dumped;
2344 }
2345 
2346 #endif		/* CONFIG_ELF_CORE */
2347 
init_elf_binfmt(void)2348 static int __init init_elf_binfmt(void)
2349 {
2350 	register_binfmt(&elf_format);
2351 	return 0;
2352 }
2353 
exit_elf_binfmt(void)2354 static void __exit exit_elf_binfmt(void)
2355 {
2356 	/* Remove the COFF and ELF loaders. */
2357 	unregister_binfmt(&elf_format);
2358 }
2359 
2360 core_initcall(init_elf_binfmt);
2361 module_exit(exit_elf_binfmt);
2362 MODULE_LICENSE("GPL");
2363