• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11 
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/vmalloc.h>
31 #include <linux/security.h>
32 #include <linux/random.h>
33 #include <linux/elf.h>
34 #include <linux/utsname.h>
35 #include <linux/coredump.h>
36 #include <linux/sched.h>
37 #include <asm/uaccess.h>
38 #include <asm/param.h>
39 #include <asm/page.h>
40 
41 #ifndef user_long_t
42 #define user_long_t long
43 #endif
44 #ifndef user_siginfo_t
45 #define user_siginfo_t siginfo_t
46 #endif
47 
48 static int load_elf_binary(struct linux_binprm *bprm);
49 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
50 				int, int, unsigned long);
51 
52 #ifdef CONFIG_USELIB
53 static int load_elf_library(struct file *);
54 #else
55 #define load_elf_library NULL
56 #endif
57 
58 /*
59  * If we don't support core dumping, then supply a NULL so we
60  * don't even try.
61  */
62 #ifdef CONFIG_ELF_CORE
63 static int elf_core_dump(struct coredump_params *cprm);
64 #else
65 #define elf_core_dump	NULL
66 #endif
67 
68 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
69 #define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
70 #else
71 #define ELF_MIN_ALIGN	PAGE_SIZE
72 #endif
73 
74 #ifndef ELF_CORE_EFLAGS
75 #define ELF_CORE_EFLAGS	0
76 #endif
77 
78 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
79 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
80 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
81 
82 static struct linux_binfmt elf_format = {
83 	.module		= THIS_MODULE,
84 	.load_binary	= load_elf_binary,
85 	.load_shlib	= load_elf_library,
86 	.core_dump	= elf_core_dump,
87 	.min_coredump	= ELF_EXEC_PAGESIZE,
88 };
89 
90 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
91 
set_brk(unsigned long start,unsigned long end)92 static int set_brk(unsigned long start, unsigned long end)
93 {
94 	start = ELF_PAGEALIGN(start);
95 	end = ELF_PAGEALIGN(end);
96 	if (end > start) {
97 		unsigned long addr;
98 		addr = vm_brk(start, end - start);
99 		if (BAD_ADDR(addr))
100 			return addr;
101 	}
102 	current->mm->start_brk = current->mm->brk = end;
103 	return 0;
104 }
105 
106 /* We need to explicitly zero any fractional pages
107    after the data section (i.e. bss).  This would
108    contain the junk from the file that should not
109    be in memory
110  */
padzero(unsigned long elf_bss)111 static int padzero(unsigned long elf_bss)
112 {
113 	unsigned long nbyte;
114 
115 	nbyte = ELF_PAGEOFFSET(elf_bss);
116 	if (nbyte) {
117 		nbyte = ELF_MIN_ALIGN - nbyte;
118 		if (clear_user((void __user *) elf_bss, nbyte))
119 			return -EFAULT;
120 	}
121 	return 0;
122 }
123 
124 /* Let's use some macros to make this stack manipulation a little clearer */
125 #ifdef CONFIG_STACK_GROWSUP
126 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
127 #define STACK_ROUND(sp, items) \
128 	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
129 #define STACK_ALLOC(sp, len) ({ \
130 	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
131 	old_sp; })
132 #else
133 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
134 #define STACK_ROUND(sp, items) \
135 	(((unsigned long) (sp - items)) &~ 15UL)
136 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
137 #endif
138 
139 #ifndef ELF_BASE_PLATFORM
140 /*
141  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
142  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
143  * will be copied to the user stack in the same manner as AT_PLATFORM.
144  */
145 #define ELF_BASE_PLATFORM NULL
146 #endif
147 
148 static int
create_elf_tables(struct linux_binprm * bprm,struct elfhdr * exec,unsigned long load_addr,unsigned long interp_load_addr)149 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
150 		unsigned long load_addr, unsigned long interp_load_addr)
151 {
152 	unsigned long p = bprm->p;
153 	int argc = bprm->argc;
154 	int envc = bprm->envc;
155 	elf_addr_t __user *argv;
156 	elf_addr_t __user *envp;
157 	elf_addr_t __user *sp;
158 	elf_addr_t __user *u_platform;
159 	elf_addr_t __user *u_base_platform;
160 	elf_addr_t __user *u_rand_bytes;
161 	const char *k_platform = ELF_PLATFORM;
162 	const char *k_base_platform = ELF_BASE_PLATFORM;
163 	unsigned char k_rand_bytes[16];
164 	int items;
165 	elf_addr_t *elf_info;
166 	int ei_index = 0;
167 	const struct cred *cred = current_cred();
168 	struct vm_area_struct *vma;
169 
170 	/*
171 	 * In some cases (e.g. Hyper-Threading), we want to avoid L1
172 	 * evictions by the processes running on the same package. One
173 	 * thing we can do is to shuffle the initial stack for them.
174 	 */
175 
176 	p = arch_align_stack(p);
177 
178 	/*
179 	 * If this architecture has a platform capability string, copy it
180 	 * to userspace.  In some cases (Sparc), this info is impossible
181 	 * for userspace to get any other way, in others (i386) it is
182 	 * merely difficult.
183 	 */
184 	u_platform = NULL;
185 	if (k_platform) {
186 		size_t len = strlen(k_platform) + 1;
187 
188 		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
189 		if (__copy_to_user(u_platform, k_platform, len))
190 			return -EFAULT;
191 	}
192 
193 	/*
194 	 * If this architecture has a "base" platform capability
195 	 * string, copy it to userspace.
196 	 */
197 	u_base_platform = NULL;
198 	if (k_base_platform) {
199 		size_t len = strlen(k_base_platform) + 1;
200 
201 		u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
202 		if (__copy_to_user(u_base_platform, k_base_platform, len))
203 			return -EFAULT;
204 	}
205 
206 	/*
207 	 * Generate 16 random bytes for userspace PRNG seeding.
208 	 */
209 	get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
210 	u_rand_bytes = (elf_addr_t __user *)
211 		       STACK_ALLOC(p, sizeof(k_rand_bytes));
212 	if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
213 		return -EFAULT;
214 
215 	/* Create the ELF interpreter info */
216 	elf_info = (elf_addr_t *)current->mm->saved_auxv;
217 	/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
218 #define NEW_AUX_ENT(id, val) \
219 	do { \
220 		elf_info[ei_index++] = id; \
221 		elf_info[ei_index++] = val; \
222 	} while (0)
223 
224 #ifdef ARCH_DLINFO
225 	/*
226 	 * ARCH_DLINFO must come first so PPC can do its special alignment of
227 	 * AUXV.
228 	 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
229 	 * ARCH_DLINFO changes
230 	 */
231 	ARCH_DLINFO;
232 #endif
233 	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
234 	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
235 	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
236 	NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
237 	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
238 	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
239 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
240 	NEW_AUX_ENT(AT_FLAGS, 0);
241 	NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
242 	NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
243 	NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
244 	NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
245 	NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
246  	NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
247 	NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
248 #ifdef ELF_HWCAP2
249 	NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
250 #endif
251 	NEW_AUX_ENT(AT_EXECFN, bprm->exec);
252 	if (k_platform) {
253 		NEW_AUX_ENT(AT_PLATFORM,
254 			    (elf_addr_t)(unsigned long)u_platform);
255 	}
256 	if (k_base_platform) {
257 		NEW_AUX_ENT(AT_BASE_PLATFORM,
258 			    (elf_addr_t)(unsigned long)u_base_platform);
259 	}
260 	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
261 		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
262 	}
263 #undef NEW_AUX_ENT
264 	/* AT_NULL is zero; clear the rest too */
265 	memset(&elf_info[ei_index], 0,
266 	       sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
267 
268 	/* And advance past the AT_NULL entry.  */
269 	ei_index += 2;
270 
271 	sp = STACK_ADD(p, ei_index);
272 
273 	items = (argc + 1) + (envc + 1) + 1;
274 	bprm->p = STACK_ROUND(sp, items);
275 
276 	/* Point sp at the lowest address on the stack */
277 #ifdef CONFIG_STACK_GROWSUP
278 	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
279 	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
280 #else
281 	sp = (elf_addr_t __user *)bprm->p;
282 #endif
283 
284 
285 	/*
286 	 * Grow the stack manually; some architectures have a limit on how
287 	 * far ahead a user-space access may be in order to grow the stack.
288 	 */
289 	vma = find_extend_vma(current->mm, bprm->p);
290 	if (!vma)
291 		return -EFAULT;
292 
293 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
294 	if (__put_user(argc, sp++))
295 		return -EFAULT;
296 	argv = sp;
297 	envp = argv + argc + 1;
298 
299 	/* Populate argv and envp */
300 	p = current->mm->arg_end = current->mm->arg_start;
301 	while (argc-- > 0) {
302 		size_t len;
303 		if (__put_user((elf_addr_t)p, argv++))
304 			return -EFAULT;
305 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
306 		if (!len || len > MAX_ARG_STRLEN)
307 			return -EINVAL;
308 		p += len;
309 	}
310 	if (__put_user(0, argv))
311 		return -EFAULT;
312 	current->mm->arg_end = current->mm->env_start = p;
313 	while (envc-- > 0) {
314 		size_t len;
315 		if (__put_user((elf_addr_t)p, envp++))
316 			return -EFAULT;
317 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
318 		if (!len || len > MAX_ARG_STRLEN)
319 			return -EINVAL;
320 		p += len;
321 	}
322 	if (__put_user(0, envp))
323 		return -EFAULT;
324 	current->mm->env_end = p;
325 
326 	/* Put the elf_info on the stack in the right place.  */
327 	sp = (elf_addr_t __user *)envp + 1;
328 	if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
329 		return -EFAULT;
330 	return 0;
331 }
332 
333 #ifndef elf_map
334 
elf_map(struct file * filep,unsigned long addr,struct elf_phdr * eppnt,int prot,int type,unsigned long total_size)335 static unsigned long elf_map(struct file *filep, unsigned long addr,
336 		struct elf_phdr *eppnt, int prot, int type,
337 		unsigned long total_size)
338 {
339 	unsigned long map_addr;
340 	unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
341 	unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
342 	addr = ELF_PAGESTART(addr);
343 	size = ELF_PAGEALIGN(size);
344 
345 	/* mmap() will return -EINVAL if given a zero size, but a
346 	 * segment with zero filesize is perfectly valid */
347 	if (!size)
348 		return addr;
349 
350 	/*
351 	* total_size is the size of the ELF (interpreter) image.
352 	* The _first_ mmap needs to know the full size, otherwise
353 	* randomization might put this image into an overlapping
354 	* position with the ELF binary image. (since size < total_size)
355 	* So we first map the 'big' image - and unmap the remainder at
356 	* the end. (which unmap is needed for ELF images with holes.)
357 	*/
358 	if (total_size) {
359 		total_size = ELF_PAGEALIGN(total_size);
360 		map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
361 		if (!BAD_ADDR(map_addr))
362 			vm_munmap(map_addr+size, total_size-size);
363 	} else
364 		map_addr = vm_mmap(filep, addr, size, prot, type, off);
365 
366 	return(map_addr);
367 }
368 
369 #endif /* !elf_map */
370 
total_mapping_size(struct elf_phdr * cmds,int nr)371 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
372 {
373 	int i, first_idx = -1, last_idx = -1;
374 
375 	for (i = 0; i < nr; i++) {
376 		if (cmds[i].p_type == PT_LOAD) {
377 			last_idx = i;
378 			if (first_idx == -1)
379 				first_idx = i;
380 		}
381 	}
382 	if (first_idx == -1)
383 		return 0;
384 
385 	return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
386 				ELF_PAGESTART(cmds[first_idx].p_vaddr);
387 }
388 
389 /**
390  * load_elf_phdrs() - load ELF program headers
391  * @elf_ex:   ELF header of the binary whose program headers should be loaded
392  * @elf_file: the opened ELF binary file
393  *
394  * Loads ELF program headers from the binary file elf_file, which has the ELF
395  * header pointed to by elf_ex, into a newly allocated array. The caller is
396  * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
397  */
load_elf_phdrs(struct elfhdr * elf_ex,struct file * elf_file)398 static struct elf_phdr *load_elf_phdrs(struct elfhdr *elf_ex,
399 				       struct file *elf_file)
400 {
401 	struct elf_phdr *elf_phdata = NULL;
402 	int retval, size, err = -1;
403 
404 	/*
405 	 * If the size of this structure has changed, then punt, since
406 	 * we will be doing the wrong thing.
407 	 */
408 	if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
409 		goto out;
410 
411 	/* Sanity check the number of program headers... */
412 	if (elf_ex->e_phnum < 1 ||
413 		elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
414 		goto out;
415 
416 	/* ...and their total size. */
417 	size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
418 	if (size > ELF_MIN_ALIGN)
419 		goto out;
420 
421 	elf_phdata = kmalloc(size, GFP_KERNEL);
422 	if (!elf_phdata)
423 		goto out;
424 
425 	/* Read in the program headers */
426 	retval = kernel_read(elf_file, elf_ex->e_phoff,
427 			     (char *)elf_phdata, size);
428 	if (retval != size) {
429 		err = (retval < 0) ? retval : -EIO;
430 		goto out;
431 	}
432 
433 	/* Success! */
434 	err = 0;
435 out:
436 	if (err) {
437 		kfree(elf_phdata);
438 		elf_phdata = NULL;
439 	}
440 	return elf_phdata;
441 }
442 
443 #ifndef CONFIG_ARCH_BINFMT_ELF_STATE
444 
445 /**
446  * struct arch_elf_state - arch-specific ELF loading state
447  *
448  * This structure is used to preserve architecture specific data during
449  * the loading of an ELF file, throughout the checking of architecture
450  * specific ELF headers & through to the point where the ELF load is
451  * known to be proceeding (ie. SET_PERSONALITY).
452  *
453  * This implementation is a dummy for architectures which require no
454  * specific state.
455  */
456 struct arch_elf_state {
457 };
458 
459 #define INIT_ARCH_ELF_STATE {}
460 
461 /**
462  * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
463  * @ehdr:	The main ELF header
464  * @phdr:	The program header to check
465  * @elf:	The open ELF file
466  * @is_interp:	True if the phdr is from the interpreter of the ELF being
467  *		loaded, else false.
468  * @state:	Architecture-specific state preserved throughout the process
469  *		of loading the ELF.
470  *
471  * Inspects the program header phdr to validate its correctness and/or
472  * suitability for the system. Called once per ELF program header in the
473  * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
474  * interpreter.
475  *
476  * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
477  *         with that return code.
478  */
arch_elf_pt_proc(struct elfhdr * ehdr,struct elf_phdr * phdr,struct file * elf,bool is_interp,struct arch_elf_state * state)479 static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
480 				   struct elf_phdr *phdr,
481 				   struct file *elf, bool is_interp,
482 				   struct arch_elf_state *state)
483 {
484 	/* Dummy implementation, always proceed */
485 	return 0;
486 }
487 
488 /**
489  * arch_check_elf() - check a PT_LOPROC..PT_HIPROC ELF program header
490  * @ehdr:	The main ELF header
491  * @has_interp:	True if the ELF has an interpreter, else false.
492  * @state:	Architecture-specific state preserved throughout the process
493  *		of loading the ELF.
494  *
495  * Provides a final opportunity for architecture code to reject the loading
496  * of the ELF & cause an exec syscall to return an error. This is called after
497  * all program headers to be checked by arch_elf_pt_proc have been.
498  *
499  * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
500  *         with that return code.
501  */
arch_check_elf(struct elfhdr * ehdr,bool has_interp,struct arch_elf_state * state)502 static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
503 				 struct arch_elf_state *state)
504 {
505 	/* Dummy implementation, always proceed */
506 	return 0;
507 }
508 
509 #endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
510 
511 /* This is much more generalized than the library routine read function,
512    so we keep this separate.  Technically the library read function
513    is only provided so that we can read a.out libraries that have
514    an ELF header */
515 
load_elf_interp(struct elfhdr * interp_elf_ex,struct file * interpreter,unsigned long * interp_map_addr,unsigned long no_base,struct elf_phdr * interp_elf_phdata)516 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
517 		struct file *interpreter, unsigned long *interp_map_addr,
518 		unsigned long no_base, struct elf_phdr *interp_elf_phdata)
519 {
520 	struct elf_phdr *eppnt;
521 	unsigned long load_addr = 0;
522 	int load_addr_set = 0;
523 	unsigned long last_bss = 0, elf_bss = 0;
524 	unsigned long error = ~0UL;
525 	unsigned long total_size;
526 	int i;
527 
528 	/* First of all, some simple consistency checks */
529 	if (interp_elf_ex->e_type != ET_EXEC &&
530 	    interp_elf_ex->e_type != ET_DYN)
531 		goto out;
532 	if (!elf_check_arch(interp_elf_ex))
533 		goto out;
534 	if (!interpreter->f_op->mmap)
535 		goto out;
536 
537 	total_size = total_mapping_size(interp_elf_phdata,
538 					interp_elf_ex->e_phnum);
539 	if (!total_size) {
540 		error = -EINVAL;
541 		goto out;
542 	}
543 
544 	eppnt = interp_elf_phdata;
545 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
546 		if (eppnt->p_type == PT_LOAD) {
547 			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
548 			int elf_prot = 0;
549 			unsigned long vaddr = 0;
550 			unsigned long k, map_addr;
551 
552 			if (eppnt->p_flags & PF_R)
553 		    		elf_prot = PROT_READ;
554 			if (eppnt->p_flags & PF_W)
555 				elf_prot |= PROT_WRITE;
556 			if (eppnt->p_flags & PF_X)
557 				elf_prot |= PROT_EXEC;
558 			vaddr = eppnt->p_vaddr;
559 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
560 				elf_type |= MAP_FIXED;
561 			else if (no_base && interp_elf_ex->e_type == ET_DYN)
562 				load_addr = -vaddr;
563 
564 			map_addr = elf_map(interpreter, load_addr + vaddr,
565 					eppnt, elf_prot, elf_type, total_size);
566 			total_size = 0;
567 			if (!*interp_map_addr)
568 				*interp_map_addr = map_addr;
569 			error = map_addr;
570 			if (BAD_ADDR(map_addr))
571 				goto out;
572 
573 			if (!load_addr_set &&
574 			    interp_elf_ex->e_type == ET_DYN) {
575 				load_addr = map_addr - ELF_PAGESTART(vaddr);
576 				load_addr_set = 1;
577 			}
578 
579 			/*
580 			 * Check to see if the section's size will overflow the
581 			 * allowed task size. Note that p_filesz must always be
582 			 * <= p_memsize so it's only necessary to check p_memsz.
583 			 */
584 			k = load_addr + eppnt->p_vaddr;
585 			if (BAD_ADDR(k) ||
586 			    eppnt->p_filesz > eppnt->p_memsz ||
587 			    eppnt->p_memsz > TASK_SIZE ||
588 			    TASK_SIZE - eppnt->p_memsz < k) {
589 				error = -ENOMEM;
590 				goto out;
591 			}
592 
593 			/*
594 			 * Find the end of the file mapping for this phdr, and
595 			 * keep track of the largest address we see for this.
596 			 */
597 			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
598 			if (k > elf_bss)
599 				elf_bss = k;
600 
601 			/*
602 			 * Do the same thing for the memory mapping - between
603 			 * elf_bss and last_bss is the bss section.
604 			 */
605 			k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
606 			if (k > last_bss)
607 				last_bss = k;
608 		}
609 	}
610 
611 	if (last_bss > elf_bss) {
612 		/*
613 		 * Now fill out the bss section.  First pad the last page up
614 		 * to the page boundary, and then perform a mmap to make sure
615 		 * that there are zero-mapped pages up to and including the
616 		 * last bss page.
617 		 */
618 		if (padzero(elf_bss)) {
619 			error = -EFAULT;
620 			goto out;
621 		}
622 
623 		/* What we have mapped so far */
624 		elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
625 
626 		/* Map the last of the bss segment */
627 		error = vm_brk(elf_bss, last_bss - elf_bss);
628 		if (BAD_ADDR(error))
629 			goto out;
630 	}
631 
632 	error = load_addr;
633 out:
634 	return error;
635 }
636 
637 /*
638  * These are the functions used to load ELF style executables and shared
639  * libraries.  There is no binary dependent code anywhere else.
640  */
641 
642 #ifndef STACK_RND_MASK
643 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))	/* 8MB of VA */
644 #endif
645 
randomize_stack_top(unsigned long stack_top)646 static unsigned long randomize_stack_top(unsigned long stack_top)
647 {
648 	unsigned long random_variable = 0;
649 
650 	if ((current->flags & PF_RANDOMIZE) &&
651 		!(current->personality & ADDR_NO_RANDOMIZE)) {
652 		random_variable = (unsigned long) get_random_int();
653 		random_variable &= STACK_RND_MASK;
654 		random_variable <<= PAGE_SHIFT;
655 	}
656 #ifdef CONFIG_STACK_GROWSUP
657 	return PAGE_ALIGN(stack_top) + random_variable;
658 #else
659 	return PAGE_ALIGN(stack_top) - random_variable;
660 #endif
661 }
662 
load_elf_binary(struct linux_binprm * bprm)663 static int load_elf_binary(struct linux_binprm *bprm)
664 {
665 	struct file *interpreter = NULL; /* to shut gcc up */
666  	unsigned long load_addr = 0, load_bias = 0;
667 	int load_addr_set = 0;
668 	char * elf_interpreter = NULL;
669 	unsigned long error;
670 	struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
671 	unsigned long elf_bss, elf_brk;
672 	int retval, i;
673 	unsigned long elf_entry;
674 	unsigned long interp_load_addr = 0;
675 	unsigned long start_code, end_code, start_data, end_data;
676 	unsigned long reloc_func_desc __maybe_unused = 0;
677 	int executable_stack = EXSTACK_DEFAULT;
678 	struct pt_regs *regs = current_pt_regs();
679 	struct {
680 		struct elfhdr elf_ex;
681 		struct elfhdr interp_elf_ex;
682 	} *loc;
683 	struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
684 
685 	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
686 	if (!loc) {
687 		retval = -ENOMEM;
688 		goto out_ret;
689 	}
690 
691 	/* Get the exec-header */
692 	loc->elf_ex = *((struct elfhdr *)bprm->buf);
693 
694 	retval = -ENOEXEC;
695 	/* First of all, some simple consistency checks */
696 	if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
697 		goto out;
698 
699 	if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
700 		goto out;
701 	if (!elf_check_arch(&loc->elf_ex))
702 		goto out;
703 	if (!bprm->file->f_op->mmap)
704 		goto out;
705 
706 	elf_phdata = load_elf_phdrs(&loc->elf_ex, bprm->file);
707 	if (!elf_phdata)
708 		goto out;
709 
710 	elf_ppnt = elf_phdata;
711 	elf_bss = 0;
712 	elf_brk = 0;
713 
714 	start_code = ~0UL;
715 	end_code = 0;
716 	start_data = 0;
717 	end_data = 0;
718 
719 	for (i = 0; i < loc->elf_ex.e_phnum; i++) {
720 		if (elf_ppnt->p_type == PT_INTERP) {
721 			/* This is the program interpreter used for
722 			 * shared libraries - for now assume that this
723 			 * is an a.out format binary
724 			 */
725 			retval = -ENOEXEC;
726 			if (elf_ppnt->p_filesz > PATH_MAX ||
727 			    elf_ppnt->p_filesz < 2)
728 				goto out_free_ph;
729 
730 			retval = -ENOMEM;
731 			elf_interpreter = kmalloc(elf_ppnt->p_filesz,
732 						  GFP_KERNEL);
733 			if (!elf_interpreter)
734 				goto out_free_ph;
735 
736 			retval = kernel_read(bprm->file, elf_ppnt->p_offset,
737 					     elf_interpreter,
738 					     elf_ppnt->p_filesz);
739 			if (retval != elf_ppnt->p_filesz) {
740 				if (retval >= 0)
741 					retval = -EIO;
742 				goto out_free_interp;
743 			}
744 			/* make sure path is NULL terminated */
745 			retval = -ENOEXEC;
746 			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
747 				goto out_free_interp;
748 
749 			interpreter = open_exec(elf_interpreter);
750 			retval = PTR_ERR(interpreter);
751 			if (IS_ERR(interpreter))
752 				goto out_free_interp;
753 
754 			/*
755 			 * If the binary is not readable then enforce
756 			 * mm->dumpable = 0 regardless of the interpreter's
757 			 * permissions.
758 			 */
759 			would_dump(bprm, interpreter);
760 
761 			retval = kernel_read(interpreter, 0, bprm->buf,
762 					     BINPRM_BUF_SIZE);
763 			if (retval != BINPRM_BUF_SIZE) {
764 				if (retval >= 0)
765 					retval = -EIO;
766 				goto out_free_dentry;
767 			}
768 
769 			/* Get the exec headers */
770 			loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
771 			break;
772 		}
773 		elf_ppnt++;
774 	}
775 
776 	elf_ppnt = elf_phdata;
777 	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
778 		switch (elf_ppnt->p_type) {
779 		case PT_GNU_STACK:
780 			if (elf_ppnt->p_flags & PF_X)
781 				executable_stack = EXSTACK_ENABLE_X;
782 			else
783 				executable_stack = EXSTACK_DISABLE_X;
784 			break;
785 
786 		case PT_LOPROC ... PT_HIPROC:
787 			retval = arch_elf_pt_proc(&loc->elf_ex, elf_ppnt,
788 						  bprm->file, false,
789 						  &arch_state);
790 			if (retval)
791 				goto out_free_dentry;
792 			break;
793 		}
794 
795 	/* Some simple consistency checks for the interpreter */
796 	if (elf_interpreter) {
797 		retval = -ELIBBAD;
798 		/* Not an ELF interpreter */
799 		if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
800 			goto out_free_dentry;
801 		/* Verify the interpreter has a valid arch */
802 		if (!elf_check_arch(&loc->interp_elf_ex))
803 			goto out_free_dentry;
804 
805 		/* Load the interpreter program headers */
806 		interp_elf_phdata = load_elf_phdrs(&loc->interp_elf_ex,
807 						   interpreter);
808 		if (!interp_elf_phdata)
809 			goto out_free_dentry;
810 
811 		/* Pass PT_LOPROC..PT_HIPROC headers to arch code */
812 		elf_ppnt = interp_elf_phdata;
813 		for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++)
814 			switch (elf_ppnt->p_type) {
815 			case PT_LOPROC ... PT_HIPROC:
816 				retval = arch_elf_pt_proc(&loc->interp_elf_ex,
817 							  elf_ppnt, interpreter,
818 							  true, &arch_state);
819 				if (retval)
820 					goto out_free_dentry;
821 				break;
822 			}
823 	}
824 
825 	/*
826 	 * Allow arch code to reject the ELF at this point, whilst it's
827 	 * still possible to return an error to the code that invoked
828 	 * the exec syscall.
829 	 */
830 	retval = arch_check_elf(&loc->elf_ex, !!interpreter, &arch_state);
831 	if (retval)
832 		goto out_free_dentry;
833 
834 	/* Flush all traces of the currently running executable */
835 	retval = flush_old_exec(bprm);
836 	if (retval)
837 		goto out_free_dentry;
838 
839 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
840 	   may depend on the personality.  */
841 	SET_PERSONALITY2(loc->elf_ex, &arch_state);
842 	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
843 		current->personality |= READ_IMPLIES_EXEC;
844 
845 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
846 		current->flags |= PF_RANDOMIZE;
847 
848 	setup_new_exec(bprm);
849 
850 	/* Do this so that we can load the interpreter, if need be.  We will
851 	   change some of these later */
852 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
853 				 executable_stack);
854 	if (retval < 0)
855 		goto out_free_dentry;
856 
857 	current->mm->start_stack = bprm->p;
858 
859 	/* Now we do a little grungy work by mmapping the ELF image into
860 	   the correct location in memory. */
861 	for(i = 0, elf_ppnt = elf_phdata;
862 	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
863 		int elf_prot = 0, elf_flags;
864 		unsigned long k, vaddr;
865 		unsigned long total_size = 0;
866 
867 		if (elf_ppnt->p_type != PT_LOAD)
868 			continue;
869 
870 		if (unlikely (elf_brk > elf_bss)) {
871 			unsigned long nbyte;
872 
873 			/* There was a PT_LOAD segment with p_memsz > p_filesz
874 			   before this one. Map anonymous pages, if needed,
875 			   and clear the area.  */
876 			retval = set_brk(elf_bss + load_bias,
877 					 elf_brk + load_bias);
878 			if (retval)
879 				goto out_free_dentry;
880 			nbyte = ELF_PAGEOFFSET(elf_bss);
881 			if (nbyte) {
882 				nbyte = ELF_MIN_ALIGN - nbyte;
883 				if (nbyte > elf_brk - elf_bss)
884 					nbyte = elf_brk - elf_bss;
885 				if (clear_user((void __user *)elf_bss +
886 							load_bias, nbyte)) {
887 					/*
888 					 * This bss-zeroing can fail if the ELF
889 					 * file specifies odd protections. So
890 					 * we don't check the return value
891 					 */
892 				}
893 			}
894 		}
895 
896 		if (elf_ppnt->p_flags & PF_R)
897 			elf_prot |= PROT_READ;
898 		if (elf_ppnt->p_flags & PF_W)
899 			elf_prot |= PROT_WRITE;
900 		if (elf_ppnt->p_flags & PF_X)
901 			elf_prot |= PROT_EXEC;
902 
903 		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
904 
905 		vaddr = elf_ppnt->p_vaddr;
906 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
907 			elf_flags |= MAP_FIXED;
908 		} else if (loc->elf_ex.e_type == ET_DYN) {
909 			/* Try and get dynamic programs out of the way of the
910 			 * default mmap base, as well as whatever program they
911 			 * might try to exec.  This is because the brk will
912 			 * follow the loader, and is not movable.  */
913 #ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
914 			/* Memory randomization might have been switched off
915 			 * in runtime via sysctl or explicit setting of
916 			 * personality flags.
917 			 * If that is the case, retain the original non-zero
918 			 * load_bias value in order to establish proper
919 			 * non-randomized mappings.
920 			 */
921 			if (current->flags & PF_RANDOMIZE)
922 				load_bias = 0;
923 			else
924 				load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
925 #else
926 			load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
927 #endif
928 			total_size = total_mapping_size(elf_phdata,
929 							loc->elf_ex.e_phnum);
930 			if (!total_size) {
931 				retval = -EINVAL;
932 				goto out_free_dentry;
933 			}
934 		}
935 
936 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
937 				elf_prot, elf_flags, total_size);
938 		if (BAD_ADDR(error)) {
939 			retval = IS_ERR((void *)error) ?
940 				PTR_ERR((void*)error) : -EINVAL;
941 			goto out_free_dentry;
942 		}
943 
944 		if (!load_addr_set) {
945 			load_addr_set = 1;
946 			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
947 			if (loc->elf_ex.e_type == ET_DYN) {
948 				load_bias += error -
949 				             ELF_PAGESTART(load_bias + vaddr);
950 				load_addr += load_bias;
951 				reloc_func_desc = load_bias;
952 			}
953 		}
954 		k = elf_ppnt->p_vaddr;
955 		if (k < start_code)
956 			start_code = k;
957 		if (start_data < k)
958 			start_data = k;
959 
960 		/*
961 		 * Check to see if the section's size will overflow the
962 		 * allowed task size. Note that p_filesz must always be
963 		 * <= p_memsz so it is only necessary to check p_memsz.
964 		 */
965 		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
966 		    elf_ppnt->p_memsz > TASK_SIZE ||
967 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
968 			/* set_brk can never work. Avoid overflows. */
969 			retval = -EINVAL;
970 			goto out_free_dentry;
971 		}
972 
973 		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
974 
975 		if (k > elf_bss)
976 			elf_bss = k;
977 		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
978 			end_code = k;
979 		if (end_data < k)
980 			end_data = k;
981 		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
982 		if (k > elf_brk)
983 			elf_brk = k;
984 	}
985 
986 	loc->elf_ex.e_entry += load_bias;
987 	elf_bss += load_bias;
988 	elf_brk += load_bias;
989 	start_code += load_bias;
990 	end_code += load_bias;
991 	start_data += load_bias;
992 	end_data += load_bias;
993 
994 	/* Calling set_brk effectively mmaps the pages that we need
995 	 * for the bss and break sections.  We must do this before
996 	 * mapping in the interpreter, to make sure it doesn't wind
997 	 * up getting placed where the bss needs to go.
998 	 */
999 	retval = set_brk(elf_bss, elf_brk);
1000 	if (retval)
1001 		goto out_free_dentry;
1002 	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
1003 		retval = -EFAULT; /* Nobody gets to see this, but.. */
1004 		goto out_free_dentry;
1005 	}
1006 
1007 	if (elf_interpreter) {
1008 		unsigned long interp_map_addr = 0;
1009 
1010 		elf_entry = load_elf_interp(&loc->interp_elf_ex,
1011 					    interpreter,
1012 					    &interp_map_addr,
1013 					    load_bias, interp_elf_phdata);
1014 		if (!IS_ERR((void *)elf_entry)) {
1015 			/*
1016 			 * load_elf_interp() returns relocation
1017 			 * adjustment
1018 			 */
1019 			interp_load_addr = elf_entry;
1020 			elf_entry += loc->interp_elf_ex.e_entry;
1021 		}
1022 		if (BAD_ADDR(elf_entry)) {
1023 			retval = IS_ERR((void *)elf_entry) ?
1024 					(int)elf_entry : -EINVAL;
1025 			goto out_free_dentry;
1026 		}
1027 		reloc_func_desc = interp_load_addr;
1028 
1029 		allow_write_access(interpreter);
1030 		fput(interpreter);
1031 		kfree(elf_interpreter);
1032 	} else {
1033 		elf_entry = loc->elf_ex.e_entry;
1034 		if (BAD_ADDR(elf_entry)) {
1035 			retval = -EINVAL;
1036 			goto out_free_dentry;
1037 		}
1038 	}
1039 
1040 	kfree(interp_elf_phdata);
1041 	kfree(elf_phdata);
1042 
1043 	set_binfmt(&elf_format);
1044 
1045 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1046 	retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
1047 	if (retval < 0)
1048 		goto out;
1049 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1050 
1051 	install_exec_creds(bprm);
1052 	retval = create_elf_tables(bprm, &loc->elf_ex,
1053 			  load_addr, interp_load_addr);
1054 	if (retval < 0)
1055 		goto out;
1056 	/* N.B. passed_fileno might not be initialized? */
1057 	current->mm->end_code = end_code;
1058 	current->mm->start_code = start_code;
1059 	current->mm->start_data = start_data;
1060 	current->mm->end_data = end_data;
1061 	current->mm->start_stack = bprm->p;
1062 
1063 #ifdef arch_randomize_brk
1064 	if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1065 		current->mm->brk = current->mm->start_brk =
1066 			arch_randomize_brk(current->mm);
1067 #ifdef CONFIG_COMPAT_BRK
1068 		current->brk_randomized = 1;
1069 #endif
1070 	}
1071 #endif
1072 
1073 	if (current->personality & MMAP_PAGE_ZERO) {
1074 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1075 		   and some applications "depend" upon this behavior.
1076 		   Since we do not have the power to recompile these, we
1077 		   emulate the SVr4 behavior. Sigh. */
1078 		error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1079 				MAP_FIXED | MAP_PRIVATE, 0);
1080 	}
1081 
1082 #ifdef ELF_PLAT_INIT
1083 	/*
1084 	 * The ABI may specify that certain registers be set up in special
1085 	 * ways (on i386 %edx is the address of a DT_FINI function, for
1086 	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1087 	 * that the e_entry field is the address of the function descriptor
1088 	 * for the startup routine, rather than the address of the startup
1089 	 * routine itself.  This macro performs whatever initialization to
1090 	 * the regs structure is required as well as any relocations to the
1091 	 * function descriptor entries when executing dynamically links apps.
1092 	 */
1093 	ELF_PLAT_INIT(regs, reloc_func_desc);
1094 #endif
1095 
1096 	start_thread(regs, elf_entry, bprm->p);
1097 	retval = 0;
1098 out:
1099 	kfree(loc);
1100 out_ret:
1101 	return retval;
1102 
1103 	/* error cleanup */
1104 out_free_dentry:
1105 	kfree(interp_elf_phdata);
1106 	allow_write_access(interpreter);
1107 	if (interpreter)
1108 		fput(interpreter);
1109 out_free_interp:
1110 	kfree(elf_interpreter);
1111 out_free_ph:
1112 	kfree(elf_phdata);
1113 	goto out;
1114 }
1115 
1116 #ifdef CONFIG_USELIB
1117 /* This is really simpleminded and specialized - we are loading an
1118    a.out library that is given an ELF header. */
load_elf_library(struct file * file)1119 static int load_elf_library(struct file *file)
1120 {
1121 	struct elf_phdr *elf_phdata;
1122 	struct elf_phdr *eppnt;
1123 	unsigned long elf_bss, bss, len;
1124 	int retval, error, i, j;
1125 	struct elfhdr elf_ex;
1126 
1127 	error = -ENOEXEC;
1128 	retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1129 	if (retval != sizeof(elf_ex))
1130 		goto out;
1131 
1132 	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1133 		goto out;
1134 
1135 	/* First of all, some simple consistency checks */
1136 	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1137 	    !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1138 		goto out;
1139 
1140 	/* Now read in all of the header information */
1141 
1142 	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1143 	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1144 
1145 	error = -ENOMEM;
1146 	elf_phdata = kmalloc(j, GFP_KERNEL);
1147 	if (!elf_phdata)
1148 		goto out;
1149 
1150 	eppnt = elf_phdata;
1151 	error = -ENOEXEC;
1152 	retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1153 	if (retval != j)
1154 		goto out_free_ph;
1155 
1156 	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1157 		if ((eppnt + i)->p_type == PT_LOAD)
1158 			j++;
1159 	if (j != 1)
1160 		goto out_free_ph;
1161 
1162 	while (eppnt->p_type != PT_LOAD)
1163 		eppnt++;
1164 
1165 	/* Now use mmap to map the library into memory. */
1166 	error = vm_mmap(file,
1167 			ELF_PAGESTART(eppnt->p_vaddr),
1168 			(eppnt->p_filesz +
1169 			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1170 			PROT_READ | PROT_WRITE | PROT_EXEC,
1171 			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1172 			(eppnt->p_offset -
1173 			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1174 	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1175 		goto out_free_ph;
1176 
1177 	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1178 	if (padzero(elf_bss)) {
1179 		error = -EFAULT;
1180 		goto out_free_ph;
1181 	}
1182 
1183 	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1184 			    ELF_MIN_ALIGN - 1);
1185 	bss = eppnt->p_memsz + eppnt->p_vaddr;
1186 	if (bss > len)
1187 		vm_brk(len, bss - len);
1188 	error = 0;
1189 
1190 out_free_ph:
1191 	kfree(elf_phdata);
1192 out:
1193 	return error;
1194 }
1195 #endif /* #ifdef CONFIG_USELIB */
1196 
1197 #ifdef CONFIG_ELF_CORE
1198 /*
1199  * ELF core dumper
1200  *
1201  * Modelled on fs/exec.c:aout_core_dump()
1202  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1203  */
1204 
1205 /*
1206  * The purpose of always_dump_vma() is to make sure that special kernel mappings
1207  * that are useful for post-mortem analysis are included in every core dump.
1208  * In that way we ensure that the core dump is fully interpretable later
1209  * without matching up the same kernel and hardware config to see what PC values
1210  * meant. These special mappings include - vDSO, vsyscall, and other
1211  * architecture specific mappings
1212  */
always_dump_vma(struct vm_area_struct * vma)1213 static bool always_dump_vma(struct vm_area_struct *vma)
1214 {
1215 	/* Any vsyscall mappings? */
1216 	if (vma == get_gate_vma(vma->vm_mm))
1217 		return true;
1218 
1219 	/*
1220 	 * Assume that all vmas with a .name op should always be dumped.
1221 	 * If this changes, a new vm_ops field can easily be added.
1222 	 */
1223 	if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
1224 		return true;
1225 
1226 	/*
1227 	 * arch_vma_name() returns non-NULL for special architecture mappings,
1228 	 * such as vDSO sections.
1229 	 */
1230 	if (arch_vma_name(vma))
1231 		return true;
1232 
1233 	return false;
1234 }
1235 
1236 /*
1237  * Decide what to dump of a segment, part, all or none.
1238  */
vma_dump_size(struct vm_area_struct * vma,unsigned long mm_flags)1239 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1240 				   unsigned long mm_flags)
1241 {
1242 #define FILTER(type)	(mm_flags & (1UL << MMF_DUMP_##type))
1243 
1244 	/* always dump the vdso and vsyscall sections */
1245 	if (always_dump_vma(vma))
1246 		goto whole;
1247 
1248 	if (vma->vm_flags & VM_DONTDUMP)
1249 		return 0;
1250 
1251 	/* Hugetlb memory check */
1252 	if (vma->vm_flags & VM_HUGETLB) {
1253 		if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1254 			goto whole;
1255 		if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1256 			goto whole;
1257 		return 0;
1258 	}
1259 
1260 	/* Do not dump I/O mapped devices or special mappings */
1261 	if (vma->vm_flags & VM_IO)
1262 		return 0;
1263 
1264 	/* By default, dump shared memory if mapped from an anonymous file. */
1265 	if (vma->vm_flags & VM_SHARED) {
1266 		if (file_inode(vma->vm_file)->i_nlink == 0 ?
1267 		    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1268 			goto whole;
1269 		return 0;
1270 	}
1271 
1272 	/* Dump segments that have been written to.  */
1273 	if (vma->anon_vma && FILTER(ANON_PRIVATE))
1274 		goto whole;
1275 	if (vma->vm_file == NULL)
1276 		return 0;
1277 
1278 	if (FILTER(MAPPED_PRIVATE))
1279 		goto whole;
1280 
1281 	/*
1282 	 * If this looks like the beginning of a DSO or executable mapping,
1283 	 * check for an ELF header.  If we find one, dump the first page to
1284 	 * aid in determining what was mapped here.
1285 	 */
1286 	if (FILTER(ELF_HEADERS) &&
1287 	    vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1288 		u32 __user *header = (u32 __user *) vma->vm_start;
1289 		u32 word;
1290 		mm_segment_t fs = get_fs();
1291 		/*
1292 		 * Doing it this way gets the constant folded by GCC.
1293 		 */
1294 		union {
1295 			u32 cmp;
1296 			char elfmag[SELFMAG];
1297 		} magic;
1298 		BUILD_BUG_ON(SELFMAG != sizeof word);
1299 		magic.elfmag[EI_MAG0] = ELFMAG0;
1300 		magic.elfmag[EI_MAG1] = ELFMAG1;
1301 		magic.elfmag[EI_MAG2] = ELFMAG2;
1302 		magic.elfmag[EI_MAG3] = ELFMAG3;
1303 		/*
1304 		 * Switch to the user "segment" for get_user(),
1305 		 * then put back what elf_core_dump() had in place.
1306 		 */
1307 		set_fs(USER_DS);
1308 		if (unlikely(get_user(word, header)))
1309 			word = 0;
1310 		set_fs(fs);
1311 		if (word == magic.cmp)
1312 			return PAGE_SIZE;
1313 	}
1314 
1315 #undef	FILTER
1316 
1317 	return 0;
1318 
1319 whole:
1320 	return vma->vm_end - vma->vm_start;
1321 }
1322 
1323 /* An ELF note in memory */
1324 struct memelfnote
1325 {
1326 	const char *name;
1327 	int type;
1328 	unsigned int datasz;
1329 	void *data;
1330 };
1331 
notesize(struct memelfnote * en)1332 static int notesize(struct memelfnote *en)
1333 {
1334 	int sz;
1335 
1336 	sz = sizeof(struct elf_note);
1337 	sz += roundup(strlen(en->name) + 1, 4);
1338 	sz += roundup(en->datasz, 4);
1339 
1340 	return sz;
1341 }
1342 
writenote(struct memelfnote * men,struct coredump_params * cprm)1343 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1344 {
1345 	struct elf_note en;
1346 	en.n_namesz = strlen(men->name) + 1;
1347 	en.n_descsz = men->datasz;
1348 	en.n_type = men->type;
1349 
1350 	return dump_emit(cprm, &en, sizeof(en)) &&
1351 	    dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1352 	    dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1353 }
1354 
fill_elf_header(struct elfhdr * elf,int segs,u16 machine,u32 flags)1355 static void fill_elf_header(struct elfhdr *elf, int segs,
1356 			    u16 machine, u32 flags)
1357 {
1358 	memset(elf, 0, sizeof(*elf));
1359 
1360 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1361 	elf->e_ident[EI_CLASS] = ELF_CLASS;
1362 	elf->e_ident[EI_DATA] = ELF_DATA;
1363 	elf->e_ident[EI_VERSION] = EV_CURRENT;
1364 	elf->e_ident[EI_OSABI] = ELF_OSABI;
1365 
1366 	elf->e_type = ET_CORE;
1367 	elf->e_machine = machine;
1368 	elf->e_version = EV_CURRENT;
1369 	elf->e_phoff = sizeof(struct elfhdr);
1370 	elf->e_flags = flags;
1371 	elf->e_ehsize = sizeof(struct elfhdr);
1372 	elf->e_phentsize = sizeof(struct elf_phdr);
1373 	elf->e_phnum = segs;
1374 
1375 	return;
1376 }
1377 
fill_elf_note_phdr(struct elf_phdr * phdr,int sz,loff_t offset)1378 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1379 {
1380 	phdr->p_type = PT_NOTE;
1381 	phdr->p_offset = offset;
1382 	phdr->p_vaddr = 0;
1383 	phdr->p_paddr = 0;
1384 	phdr->p_filesz = sz;
1385 	phdr->p_memsz = 0;
1386 	phdr->p_flags = 0;
1387 	phdr->p_align = 0;
1388 	return;
1389 }
1390 
fill_note(struct memelfnote * note,const char * name,int type,unsigned int sz,void * data)1391 static void fill_note(struct memelfnote *note, const char *name, int type,
1392 		unsigned int sz, void *data)
1393 {
1394 	note->name = name;
1395 	note->type = type;
1396 	note->datasz = sz;
1397 	note->data = data;
1398 	return;
1399 }
1400 
1401 /*
1402  * fill up all the fields in prstatus from the given task struct, except
1403  * registers which need to be filled up separately.
1404  */
fill_prstatus(struct elf_prstatus * prstatus,struct task_struct * p,long signr)1405 static void fill_prstatus(struct elf_prstatus *prstatus,
1406 		struct task_struct *p, long signr)
1407 {
1408 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1409 	prstatus->pr_sigpend = p->pending.signal.sig[0];
1410 	prstatus->pr_sighold = p->blocked.sig[0];
1411 	rcu_read_lock();
1412 	prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1413 	rcu_read_unlock();
1414 	prstatus->pr_pid = task_pid_vnr(p);
1415 	prstatus->pr_pgrp = task_pgrp_vnr(p);
1416 	prstatus->pr_sid = task_session_vnr(p);
1417 	if (thread_group_leader(p)) {
1418 		struct task_cputime cputime;
1419 
1420 		/*
1421 		 * This is the record for the group leader.  It shows the
1422 		 * group-wide total, not its individual thread total.
1423 		 */
1424 		thread_group_cputime(p, &cputime);
1425 		cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1426 		cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1427 	} else {
1428 		cputime_t utime, stime;
1429 
1430 		task_cputime(p, &utime, &stime);
1431 		cputime_to_timeval(utime, &prstatus->pr_utime);
1432 		cputime_to_timeval(stime, &prstatus->pr_stime);
1433 	}
1434 	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1435 	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1436 }
1437 
fill_psinfo(struct elf_prpsinfo * psinfo,struct task_struct * p,struct mm_struct * mm)1438 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1439 		       struct mm_struct *mm)
1440 {
1441 	const struct cred *cred;
1442 	unsigned int i, len;
1443 
1444 	/* first copy the parameters from user space */
1445 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1446 
1447 	len = mm->arg_end - mm->arg_start;
1448 	if (len >= ELF_PRARGSZ)
1449 		len = ELF_PRARGSZ-1;
1450 	if (copy_from_user(&psinfo->pr_psargs,
1451 		           (const char __user *)mm->arg_start, len))
1452 		return -EFAULT;
1453 	for(i = 0; i < len; i++)
1454 		if (psinfo->pr_psargs[i] == 0)
1455 			psinfo->pr_psargs[i] = ' ';
1456 	psinfo->pr_psargs[len] = 0;
1457 
1458 	rcu_read_lock();
1459 	psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1460 	rcu_read_unlock();
1461 	psinfo->pr_pid = task_pid_vnr(p);
1462 	psinfo->pr_pgrp = task_pgrp_vnr(p);
1463 	psinfo->pr_sid = task_session_vnr(p);
1464 
1465 	i = p->state ? ffz(~p->state) + 1 : 0;
1466 	psinfo->pr_state = i;
1467 	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1468 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1469 	psinfo->pr_nice = task_nice(p);
1470 	psinfo->pr_flag = p->flags;
1471 	rcu_read_lock();
1472 	cred = __task_cred(p);
1473 	SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1474 	SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1475 	rcu_read_unlock();
1476 	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1477 
1478 	return 0;
1479 }
1480 
fill_auxv_note(struct memelfnote * note,struct mm_struct * mm)1481 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1482 {
1483 	elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1484 	int i = 0;
1485 	do
1486 		i += 2;
1487 	while (auxv[i - 2] != AT_NULL);
1488 	fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1489 }
1490 
fill_siginfo_note(struct memelfnote * note,user_siginfo_t * csigdata,const siginfo_t * siginfo)1491 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1492 		const siginfo_t *siginfo)
1493 {
1494 	mm_segment_t old_fs = get_fs();
1495 	set_fs(KERNEL_DS);
1496 	copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1497 	set_fs(old_fs);
1498 	fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1499 }
1500 
1501 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1502 /*
1503  * Format of NT_FILE note:
1504  *
1505  * long count     -- how many files are mapped
1506  * long page_size -- units for file_ofs
1507  * array of [COUNT] elements of
1508  *   long start
1509  *   long end
1510  *   long file_ofs
1511  * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1512  */
fill_files_note(struct memelfnote * note)1513 static int fill_files_note(struct memelfnote *note)
1514 {
1515 	struct vm_area_struct *vma;
1516 	unsigned count, size, names_ofs, remaining, n;
1517 	user_long_t *data;
1518 	user_long_t *start_end_ofs;
1519 	char *name_base, *name_curpos;
1520 
1521 	/* *Estimated* file count and total data size needed */
1522 	count = current->mm->map_count;
1523 	size = count * 64;
1524 
1525 	names_ofs = (2 + 3 * count) * sizeof(data[0]);
1526  alloc:
1527 	if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1528 		return -EINVAL;
1529 	size = round_up(size, PAGE_SIZE);
1530 	data = vmalloc(size);
1531 	if (!data)
1532 		return -ENOMEM;
1533 
1534 	start_end_ofs = data + 2;
1535 	name_base = name_curpos = ((char *)data) + names_ofs;
1536 	remaining = size - names_ofs;
1537 	count = 0;
1538 	for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1539 		struct file *file;
1540 		const char *filename;
1541 
1542 		file = vma->vm_file;
1543 		if (!file)
1544 			continue;
1545 		filename = d_path(&file->f_path, name_curpos, remaining);
1546 		if (IS_ERR(filename)) {
1547 			if (PTR_ERR(filename) == -ENAMETOOLONG) {
1548 				vfree(data);
1549 				size = size * 5 / 4;
1550 				goto alloc;
1551 			}
1552 			continue;
1553 		}
1554 
1555 		/* d_path() fills at the end, move name down */
1556 		/* n = strlen(filename) + 1: */
1557 		n = (name_curpos + remaining) - filename;
1558 		remaining = filename - name_curpos;
1559 		memmove(name_curpos, filename, n);
1560 		name_curpos += n;
1561 
1562 		*start_end_ofs++ = vma->vm_start;
1563 		*start_end_ofs++ = vma->vm_end;
1564 		*start_end_ofs++ = vma->vm_pgoff;
1565 		count++;
1566 	}
1567 
1568 	/* Now we know exact count of files, can store it */
1569 	data[0] = count;
1570 	data[1] = PAGE_SIZE;
1571 	/*
1572 	 * Count usually is less than current->mm->map_count,
1573 	 * we need to move filenames down.
1574 	 */
1575 	n = current->mm->map_count - count;
1576 	if (n != 0) {
1577 		unsigned shift_bytes = n * 3 * sizeof(data[0]);
1578 		memmove(name_base - shift_bytes, name_base,
1579 			name_curpos - name_base);
1580 		name_curpos -= shift_bytes;
1581 	}
1582 
1583 	size = name_curpos - (char *)data;
1584 	fill_note(note, "CORE", NT_FILE, size, data);
1585 	return 0;
1586 }
1587 
1588 #ifdef CORE_DUMP_USE_REGSET
1589 #include <linux/regset.h>
1590 
1591 struct elf_thread_core_info {
1592 	struct elf_thread_core_info *next;
1593 	struct task_struct *task;
1594 	struct elf_prstatus prstatus;
1595 	struct memelfnote notes[0];
1596 };
1597 
1598 struct elf_note_info {
1599 	struct elf_thread_core_info *thread;
1600 	struct memelfnote psinfo;
1601 	struct memelfnote signote;
1602 	struct memelfnote auxv;
1603 	struct memelfnote files;
1604 	user_siginfo_t csigdata;
1605 	size_t size;
1606 	int thread_notes;
1607 };
1608 
1609 /*
1610  * When a regset has a writeback hook, we call it on each thread before
1611  * dumping user memory.  On register window machines, this makes sure the
1612  * user memory backing the register data is up to date before we read it.
1613  */
do_thread_regset_writeback(struct task_struct * task,const struct user_regset * regset)1614 static void do_thread_regset_writeback(struct task_struct *task,
1615 				       const struct user_regset *regset)
1616 {
1617 	if (regset->writeback)
1618 		regset->writeback(task, regset, 1);
1619 }
1620 
1621 #ifndef PR_REG_SIZE
1622 #define PR_REG_SIZE(S) sizeof(S)
1623 #endif
1624 
1625 #ifndef PRSTATUS_SIZE
1626 #define PRSTATUS_SIZE(S) sizeof(S)
1627 #endif
1628 
1629 #ifndef PR_REG_PTR
1630 #define PR_REG_PTR(S) (&((S)->pr_reg))
1631 #endif
1632 
1633 #ifndef SET_PR_FPVALID
1634 #define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1635 #endif
1636 
fill_thread_core_info(struct elf_thread_core_info * t,const struct user_regset_view * view,long signr,size_t * total)1637 static int fill_thread_core_info(struct elf_thread_core_info *t,
1638 				 const struct user_regset_view *view,
1639 				 long signr, size_t *total)
1640 {
1641 	unsigned int i;
1642 
1643 	/*
1644 	 * NT_PRSTATUS is the one special case, because the regset data
1645 	 * goes into the pr_reg field inside the note contents, rather
1646 	 * than being the whole note contents.  We fill the reset in here.
1647 	 * We assume that regset 0 is NT_PRSTATUS.
1648 	 */
1649 	fill_prstatus(&t->prstatus, t->task, signr);
1650 	(void) view->regsets[0].get(t->task, &view->regsets[0],
1651 				    0, PR_REG_SIZE(t->prstatus.pr_reg),
1652 				    PR_REG_PTR(&t->prstatus), NULL);
1653 
1654 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1655 		  PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1656 	*total += notesize(&t->notes[0]);
1657 
1658 	do_thread_regset_writeback(t->task, &view->regsets[0]);
1659 
1660 	/*
1661 	 * Each other regset might generate a note too.  For each regset
1662 	 * that has no core_note_type or is inactive, we leave t->notes[i]
1663 	 * all zero and we'll know to skip writing it later.
1664 	 */
1665 	for (i = 1; i < view->n; ++i) {
1666 		const struct user_regset *regset = &view->regsets[i];
1667 		do_thread_regset_writeback(t->task, regset);
1668 		if (regset->core_note_type && regset->get &&
1669 		    (!regset->active || regset->active(t->task, regset))) {
1670 			int ret;
1671 			size_t size = regset->n * regset->size;
1672 			void *data = kmalloc(size, GFP_KERNEL);
1673 			if (unlikely(!data))
1674 				return 0;
1675 			ret = regset->get(t->task, regset,
1676 					  0, size, data, NULL);
1677 			if (unlikely(ret))
1678 				kfree(data);
1679 			else {
1680 				if (regset->core_note_type != NT_PRFPREG)
1681 					fill_note(&t->notes[i], "LINUX",
1682 						  regset->core_note_type,
1683 						  size, data);
1684 				else {
1685 					SET_PR_FPVALID(&t->prstatus, 1);
1686 					fill_note(&t->notes[i], "CORE",
1687 						  NT_PRFPREG, size, data);
1688 				}
1689 				*total += notesize(&t->notes[i]);
1690 			}
1691 		}
1692 	}
1693 
1694 	return 1;
1695 }
1696 
fill_note_info(struct elfhdr * elf,int phdrs,struct elf_note_info * info,const siginfo_t * siginfo,struct pt_regs * regs)1697 static int fill_note_info(struct elfhdr *elf, int phdrs,
1698 			  struct elf_note_info *info,
1699 			  const siginfo_t *siginfo, struct pt_regs *regs)
1700 {
1701 	struct task_struct *dump_task = current;
1702 	const struct user_regset_view *view = task_user_regset_view(dump_task);
1703 	struct elf_thread_core_info *t;
1704 	struct elf_prpsinfo *psinfo;
1705 	struct core_thread *ct;
1706 	unsigned int i;
1707 
1708 	info->size = 0;
1709 	info->thread = NULL;
1710 
1711 	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1712 	if (psinfo == NULL) {
1713 		info->psinfo.data = NULL; /* So we don't free this wrongly */
1714 		return 0;
1715 	}
1716 
1717 	fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1718 
1719 	/*
1720 	 * Figure out how many notes we're going to need for each thread.
1721 	 */
1722 	info->thread_notes = 0;
1723 	for (i = 0; i < view->n; ++i)
1724 		if (view->regsets[i].core_note_type != 0)
1725 			++info->thread_notes;
1726 
1727 	/*
1728 	 * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1729 	 * since it is our one special case.
1730 	 */
1731 	if (unlikely(info->thread_notes == 0) ||
1732 	    unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1733 		WARN_ON(1);
1734 		return 0;
1735 	}
1736 
1737 	/*
1738 	 * Initialize the ELF file header.
1739 	 */
1740 	fill_elf_header(elf, phdrs,
1741 			view->e_machine, view->e_flags);
1742 
1743 	/*
1744 	 * Allocate a structure for each thread.
1745 	 */
1746 	for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1747 		t = kzalloc(offsetof(struct elf_thread_core_info,
1748 				     notes[info->thread_notes]),
1749 			    GFP_KERNEL);
1750 		if (unlikely(!t))
1751 			return 0;
1752 
1753 		t->task = ct->task;
1754 		if (ct->task == dump_task || !info->thread) {
1755 			t->next = info->thread;
1756 			info->thread = t;
1757 		} else {
1758 			/*
1759 			 * Make sure to keep the original task at
1760 			 * the head of the list.
1761 			 */
1762 			t->next = info->thread->next;
1763 			info->thread->next = t;
1764 		}
1765 	}
1766 
1767 	/*
1768 	 * Now fill in each thread's information.
1769 	 */
1770 	for (t = info->thread; t != NULL; t = t->next)
1771 		if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1772 			return 0;
1773 
1774 	/*
1775 	 * Fill in the two process-wide notes.
1776 	 */
1777 	fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1778 	info->size += notesize(&info->psinfo);
1779 
1780 	fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1781 	info->size += notesize(&info->signote);
1782 
1783 	fill_auxv_note(&info->auxv, current->mm);
1784 	info->size += notesize(&info->auxv);
1785 
1786 	if (fill_files_note(&info->files) == 0)
1787 		info->size += notesize(&info->files);
1788 
1789 	return 1;
1790 }
1791 
get_note_info_size(struct elf_note_info * info)1792 static size_t get_note_info_size(struct elf_note_info *info)
1793 {
1794 	return info->size;
1795 }
1796 
1797 /*
1798  * Write all the notes for each thread.  When writing the first thread, the
1799  * process-wide notes are interleaved after the first thread-specific note.
1800  */
write_note_info(struct elf_note_info * info,struct coredump_params * cprm)1801 static int write_note_info(struct elf_note_info *info,
1802 			   struct coredump_params *cprm)
1803 {
1804 	bool first = true;
1805 	struct elf_thread_core_info *t = info->thread;
1806 
1807 	do {
1808 		int i;
1809 
1810 		if (!writenote(&t->notes[0], cprm))
1811 			return 0;
1812 
1813 		if (first && !writenote(&info->psinfo, cprm))
1814 			return 0;
1815 		if (first && !writenote(&info->signote, cprm))
1816 			return 0;
1817 		if (first && !writenote(&info->auxv, cprm))
1818 			return 0;
1819 		if (first && info->files.data &&
1820 				!writenote(&info->files, cprm))
1821 			return 0;
1822 
1823 		for (i = 1; i < info->thread_notes; ++i)
1824 			if (t->notes[i].data &&
1825 			    !writenote(&t->notes[i], cprm))
1826 				return 0;
1827 
1828 		first = false;
1829 		t = t->next;
1830 	} while (t);
1831 
1832 	return 1;
1833 }
1834 
free_note_info(struct elf_note_info * info)1835 static void free_note_info(struct elf_note_info *info)
1836 {
1837 	struct elf_thread_core_info *threads = info->thread;
1838 	while (threads) {
1839 		unsigned int i;
1840 		struct elf_thread_core_info *t = threads;
1841 		threads = t->next;
1842 		WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1843 		for (i = 1; i < info->thread_notes; ++i)
1844 			kfree(t->notes[i].data);
1845 		kfree(t);
1846 	}
1847 	kfree(info->psinfo.data);
1848 	vfree(info->files.data);
1849 }
1850 
1851 #else
1852 
1853 /* Here is the structure in which status of each thread is captured. */
1854 struct elf_thread_status
1855 {
1856 	struct list_head list;
1857 	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1858 	elf_fpregset_t fpu;		/* NT_PRFPREG */
1859 	struct task_struct *thread;
1860 #ifdef ELF_CORE_COPY_XFPREGS
1861 	elf_fpxregset_t xfpu;		/* ELF_CORE_XFPREG_TYPE */
1862 #endif
1863 	struct memelfnote notes[3];
1864 	int num_notes;
1865 };
1866 
1867 /*
1868  * In order to add the specific thread information for the elf file format,
1869  * we need to keep a linked list of every threads pr_status and then create
1870  * a single section for them in the final core file.
1871  */
elf_dump_thread_status(long signr,struct elf_thread_status * t)1872 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1873 {
1874 	int sz = 0;
1875 	struct task_struct *p = t->thread;
1876 	t->num_notes = 0;
1877 
1878 	fill_prstatus(&t->prstatus, p, signr);
1879 	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1880 
1881 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1882 		  &(t->prstatus));
1883 	t->num_notes++;
1884 	sz += notesize(&t->notes[0]);
1885 
1886 	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1887 								&t->fpu))) {
1888 		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1889 			  &(t->fpu));
1890 		t->num_notes++;
1891 		sz += notesize(&t->notes[1]);
1892 	}
1893 
1894 #ifdef ELF_CORE_COPY_XFPREGS
1895 	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1896 		fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1897 			  sizeof(t->xfpu), &t->xfpu);
1898 		t->num_notes++;
1899 		sz += notesize(&t->notes[2]);
1900 	}
1901 #endif
1902 	return sz;
1903 }
1904 
1905 struct elf_note_info {
1906 	struct memelfnote *notes;
1907 	struct memelfnote *notes_files;
1908 	struct elf_prstatus *prstatus;	/* NT_PRSTATUS */
1909 	struct elf_prpsinfo *psinfo;	/* NT_PRPSINFO */
1910 	struct list_head thread_list;
1911 	elf_fpregset_t *fpu;
1912 #ifdef ELF_CORE_COPY_XFPREGS
1913 	elf_fpxregset_t *xfpu;
1914 #endif
1915 	user_siginfo_t csigdata;
1916 	int thread_status_size;
1917 	int numnote;
1918 };
1919 
elf_note_info_init(struct elf_note_info * info)1920 static int elf_note_info_init(struct elf_note_info *info)
1921 {
1922 	memset(info, 0, sizeof(*info));
1923 	INIT_LIST_HEAD(&info->thread_list);
1924 
1925 	/* Allocate space for ELF notes */
1926 	info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1927 	if (!info->notes)
1928 		return 0;
1929 	info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1930 	if (!info->psinfo)
1931 		return 0;
1932 	info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1933 	if (!info->prstatus)
1934 		return 0;
1935 	info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1936 	if (!info->fpu)
1937 		return 0;
1938 #ifdef ELF_CORE_COPY_XFPREGS
1939 	info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1940 	if (!info->xfpu)
1941 		return 0;
1942 #endif
1943 	return 1;
1944 }
1945 
fill_note_info(struct elfhdr * elf,int phdrs,struct elf_note_info * info,const siginfo_t * siginfo,struct pt_regs * regs)1946 static int fill_note_info(struct elfhdr *elf, int phdrs,
1947 			  struct elf_note_info *info,
1948 			  const siginfo_t *siginfo, struct pt_regs *regs)
1949 {
1950 	struct list_head *t;
1951 	struct core_thread *ct;
1952 	struct elf_thread_status *ets;
1953 
1954 	if (!elf_note_info_init(info))
1955 		return 0;
1956 
1957 	for (ct = current->mm->core_state->dumper.next;
1958 					ct; ct = ct->next) {
1959 		ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1960 		if (!ets)
1961 			return 0;
1962 
1963 		ets->thread = ct->task;
1964 		list_add(&ets->list, &info->thread_list);
1965 	}
1966 
1967 	list_for_each(t, &info->thread_list) {
1968 		int sz;
1969 
1970 		ets = list_entry(t, struct elf_thread_status, list);
1971 		sz = elf_dump_thread_status(siginfo->si_signo, ets);
1972 		info->thread_status_size += sz;
1973 	}
1974 	/* now collect the dump for the current */
1975 	memset(info->prstatus, 0, sizeof(*info->prstatus));
1976 	fill_prstatus(info->prstatus, current, siginfo->si_signo);
1977 	elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1978 
1979 	/* Set up header */
1980 	fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1981 
1982 	/*
1983 	 * Set up the notes in similar form to SVR4 core dumps made
1984 	 * with info from their /proc.
1985 	 */
1986 
1987 	fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1988 		  sizeof(*info->prstatus), info->prstatus);
1989 	fill_psinfo(info->psinfo, current->group_leader, current->mm);
1990 	fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1991 		  sizeof(*info->psinfo), info->psinfo);
1992 
1993 	fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
1994 	fill_auxv_note(info->notes + 3, current->mm);
1995 	info->numnote = 4;
1996 
1997 	if (fill_files_note(info->notes + info->numnote) == 0) {
1998 		info->notes_files = info->notes + info->numnote;
1999 		info->numnote++;
2000 	}
2001 
2002 	/* Try to dump the FPU. */
2003 	info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
2004 							       info->fpu);
2005 	if (info->prstatus->pr_fpvalid)
2006 		fill_note(info->notes + info->numnote++,
2007 			  "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
2008 #ifdef ELF_CORE_COPY_XFPREGS
2009 	if (elf_core_copy_task_xfpregs(current, info->xfpu))
2010 		fill_note(info->notes + info->numnote++,
2011 			  "LINUX", ELF_CORE_XFPREG_TYPE,
2012 			  sizeof(*info->xfpu), info->xfpu);
2013 #endif
2014 
2015 	return 1;
2016 }
2017 
get_note_info_size(struct elf_note_info * info)2018 static size_t get_note_info_size(struct elf_note_info *info)
2019 {
2020 	int sz = 0;
2021 	int i;
2022 
2023 	for (i = 0; i < info->numnote; i++)
2024 		sz += notesize(info->notes + i);
2025 
2026 	sz += info->thread_status_size;
2027 
2028 	return sz;
2029 }
2030 
write_note_info(struct elf_note_info * info,struct coredump_params * cprm)2031 static int write_note_info(struct elf_note_info *info,
2032 			   struct coredump_params *cprm)
2033 {
2034 	int i;
2035 	struct list_head *t;
2036 
2037 	for (i = 0; i < info->numnote; i++)
2038 		if (!writenote(info->notes + i, cprm))
2039 			return 0;
2040 
2041 	/* write out the thread status notes section */
2042 	list_for_each(t, &info->thread_list) {
2043 		struct elf_thread_status *tmp =
2044 				list_entry(t, struct elf_thread_status, list);
2045 
2046 		for (i = 0; i < tmp->num_notes; i++)
2047 			if (!writenote(&tmp->notes[i], cprm))
2048 				return 0;
2049 	}
2050 
2051 	return 1;
2052 }
2053 
free_note_info(struct elf_note_info * info)2054 static void free_note_info(struct elf_note_info *info)
2055 {
2056 	while (!list_empty(&info->thread_list)) {
2057 		struct list_head *tmp = info->thread_list.next;
2058 		list_del(tmp);
2059 		kfree(list_entry(tmp, struct elf_thread_status, list));
2060 	}
2061 
2062 	/* Free data possibly allocated by fill_files_note(): */
2063 	if (info->notes_files)
2064 		vfree(info->notes_files->data);
2065 
2066 	kfree(info->prstatus);
2067 	kfree(info->psinfo);
2068 	kfree(info->notes);
2069 	kfree(info->fpu);
2070 #ifdef ELF_CORE_COPY_XFPREGS
2071 	kfree(info->xfpu);
2072 #endif
2073 }
2074 
2075 #endif
2076 
first_vma(struct task_struct * tsk,struct vm_area_struct * gate_vma)2077 static struct vm_area_struct *first_vma(struct task_struct *tsk,
2078 					struct vm_area_struct *gate_vma)
2079 {
2080 	struct vm_area_struct *ret = tsk->mm->mmap;
2081 
2082 	if (ret)
2083 		return ret;
2084 	return gate_vma;
2085 }
2086 /*
2087  * Helper function for iterating across a vma list.  It ensures that the caller
2088  * will visit `gate_vma' prior to terminating the search.
2089  */
next_vma(struct vm_area_struct * this_vma,struct vm_area_struct * gate_vma)2090 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2091 					struct vm_area_struct *gate_vma)
2092 {
2093 	struct vm_area_struct *ret;
2094 
2095 	ret = this_vma->vm_next;
2096 	if (ret)
2097 		return ret;
2098 	if (this_vma == gate_vma)
2099 		return NULL;
2100 	return gate_vma;
2101 }
2102 
fill_extnum_info(struct elfhdr * elf,struct elf_shdr * shdr4extnum,elf_addr_t e_shoff,int segs)2103 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2104 			     elf_addr_t e_shoff, int segs)
2105 {
2106 	elf->e_shoff = e_shoff;
2107 	elf->e_shentsize = sizeof(*shdr4extnum);
2108 	elf->e_shnum = 1;
2109 	elf->e_shstrndx = SHN_UNDEF;
2110 
2111 	memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2112 
2113 	shdr4extnum->sh_type = SHT_NULL;
2114 	shdr4extnum->sh_size = elf->e_shnum;
2115 	shdr4extnum->sh_link = elf->e_shstrndx;
2116 	shdr4extnum->sh_info = segs;
2117 }
2118 
elf_core_vma_data_size(struct vm_area_struct * gate_vma,unsigned long mm_flags)2119 static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
2120 				     unsigned long mm_flags)
2121 {
2122 	struct vm_area_struct *vma;
2123 	size_t size = 0;
2124 
2125 	for (vma = first_vma(current, gate_vma); vma != NULL;
2126 	     vma = next_vma(vma, gate_vma))
2127 		size += vma_dump_size(vma, mm_flags);
2128 	return size;
2129 }
2130 
2131 /*
2132  * Actual dumper
2133  *
2134  * This is a two-pass process; first we find the offsets of the bits,
2135  * and then they are actually written out.  If we run out of core limit
2136  * we just truncate.
2137  */
elf_core_dump(struct coredump_params * cprm)2138 static int elf_core_dump(struct coredump_params *cprm)
2139 {
2140 	int has_dumped = 0;
2141 	mm_segment_t fs;
2142 	int segs;
2143 	struct vm_area_struct *vma, *gate_vma;
2144 	struct elfhdr *elf = NULL;
2145 	loff_t offset = 0, dataoff;
2146 	struct elf_note_info info = { };
2147 	struct elf_phdr *phdr4note = NULL;
2148 	struct elf_shdr *shdr4extnum = NULL;
2149 	Elf_Half e_phnum;
2150 	elf_addr_t e_shoff;
2151 
2152 	/*
2153 	 * We no longer stop all VM operations.
2154 	 *
2155 	 * This is because those proceses that could possibly change map_count
2156 	 * or the mmap / vma pages are now blocked in do_exit on current
2157 	 * finishing this core dump.
2158 	 *
2159 	 * Only ptrace can touch these memory addresses, but it doesn't change
2160 	 * the map_count or the pages allocated. So no possibility of crashing
2161 	 * exists while dumping the mm->vm_next areas to the core file.
2162 	 */
2163 
2164 	/* alloc memory for large data structures: too large to be on stack */
2165 	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2166 	if (!elf)
2167 		goto out;
2168 	/*
2169 	 * The number of segs are recored into ELF header as 16bit value.
2170 	 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2171 	 */
2172 	segs = current->mm->map_count;
2173 	segs += elf_core_extra_phdrs();
2174 
2175 	gate_vma = get_gate_vma(current->mm);
2176 	if (gate_vma != NULL)
2177 		segs++;
2178 
2179 	/* for notes section */
2180 	segs++;
2181 
2182 	/* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2183 	 * this, kernel supports extended numbering. Have a look at
2184 	 * include/linux/elf.h for further information. */
2185 	e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2186 
2187 	/*
2188 	 * Collect all the non-memory information about the process for the
2189 	 * notes.  This also sets up the file header.
2190 	 */
2191 	if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2192 		goto cleanup;
2193 
2194 	has_dumped = 1;
2195 
2196 	fs = get_fs();
2197 	set_fs(KERNEL_DS);
2198 
2199 	offset += sizeof(*elf);				/* Elf header */
2200 	offset += segs * sizeof(struct elf_phdr);	/* Program headers */
2201 
2202 	/* Write notes phdr entry */
2203 	{
2204 		size_t sz = get_note_info_size(&info);
2205 
2206 		sz += elf_coredump_extra_notes_size();
2207 
2208 		phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2209 		if (!phdr4note)
2210 			goto end_coredump;
2211 
2212 		fill_elf_note_phdr(phdr4note, sz, offset);
2213 		offset += sz;
2214 	}
2215 
2216 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2217 
2218 	offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
2219 	offset += elf_core_extra_data_size();
2220 	e_shoff = offset;
2221 
2222 	if (e_phnum == PN_XNUM) {
2223 		shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2224 		if (!shdr4extnum)
2225 			goto end_coredump;
2226 		fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2227 	}
2228 
2229 	offset = dataoff;
2230 
2231 	if (!dump_emit(cprm, elf, sizeof(*elf)))
2232 		goto end_coredump;
2233 
2234 	if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2235 		goto end_coredump;
2236 
2237 	/* Write program headers for segments dump */
2238 	for (vma = first_vma(current, gate_vma); vma != NULL;
2239 			vma = next_vma(vma, gate_vma)) {
2240 		struct elf_phdr phdr;
2241 
2242 		phdr.p_type = PT_LOAD;
2243 		phdr.p_offset = offset;
2244 		phdr.p_vaddr = vma->vm_start;
2245 		phdr.p_paddr = 0;
2246 		phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
2247 		phdr.p_memsz = vma->vm_end - vma->vm_start;
2248 		offset += phdr.p_filesz;
2249 		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2250 		if (vma->vm_flags & VM_WRITE)
2251 			phdr.p_flags |= PF_W;
2252 		if (vma->vm_flags & VM_EXEC)
2253 			phdr.p_flags |= PF_X;
2254 		phdr.p_align = ELF_EXEC_PAGESIZE;
2255 
2256 		if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2257 			goto end_coredump;
2258 	}
2259 
2260 	if (!elf_core_write_extra_phdrs(cprm, offset))
2261 		goto end_coredump;
2262 
2263  	/* write out the notes section */
2264 	if (!write_note_info(&info, cprm))
2265 		goto end_coredump;
2266 
2267 	if (elf_coredump_extra_notes_write(cprm))
2268 		goto end_coredump;
2269 
2270 	/* Align to page */
2271 	if (!dump_skip(cprm, dataoff - cprm->written))
2272 		goto end_coredump;
2273 
2274 	for (vma = first_vma(current, gate_vma); vma != NULL;
2275 			vma = next_vma(vma, gate_vma)) {
2276 		unsigned long addr;
2277 		unsigned long end;
2278 
2279 		end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
2280 
2281 		for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2282 			struct page *page;
2283 			int stop;
2284 
2285 			page = get_dump_page(addr);
2286 			if (page) {
2287 				void *kaddr = kmap(page);
2288 				stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2289 				kunmap(page);
2290 				page_cache_release(page);
2291 			} else
2292 				stop = !dump_skip(cprm, PAGE_SIZE);
2293 			if (stop)
2294 				goto end_coredump;
2295 		}
2296 	}
2297 
2298 	if (!elf_core_write_extra_data(cprm))
2299 		goto end_coredump;
2300 
2301 	if (e_phnum == PN_XNUM) {
2302 		if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2303 			goto end_coredump;
2304 	}
2305 
2306 end_coredump:
2307 	set_fs(fs);
2308 
2309 cleanup:
2310 	free_note_info(&info);
2311 	kfree(shdr4extnum);
2312 	kfree(phdr4note);
2313 	kfree(elf);
2314 out:
2315 	return has_dumped;
2316 }
2317 
2318 #endif		/* CONFIG_ELF_CORE */
2319 
init_elf_binfmt(void)2320 static int __init init_elf_binfmt(void)
2321 {
2322 	register_binfmt(&elf_format);
2323 	return 0;
2324 }
2325 
exit_elf_binfmt(void)2326 static void __exit exit_elf_binfmt(void)
2327 {
2328 	/* Remove the COFF and ELF loaders. */
2329 	unregister_binfmt(&elf_format);
2330 }
2331 
2332 core_initcall(init_elf_binfmt);
2333 module_exit(exit_elf_binfmt);
2334 MODULE_LICENSE("GPL");
2335