• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11 
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/vmalloc.h>
31 #include <linux/security.h>
32 #include <linux/random.h>
33 #include <linux/elf.h>
34 #include <linux/elf-randomize.h>
35 #include <linux/utsname.h>
36 #include <linux/coredump.h>
37 #include <linux/sched.h>
38 #include <linux/dax.h>
39 #include <asm/uaccess.h>
40 #include <asm/param.h>
41 #include <asm/page.h>
42 
43 #ifndef user_long_t
44 #define user_long_t long
45 #endif
46 #ifndef user_siginfo_t
47 #define user_siginfo_t siginfo_t
48 #endif
49 
50 static int load_elf_binary(struct linux_binprm *bprm);
51 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
52 				int, int, unsigned long);
53 
54 #ifdef CONFIG_USELIB
55 static int load_elf_library(struct file *);
56 #else
57 #define load_elf_library NULL
58 #endif
59 
60 /*
61  * If we don't support core dumping, then supply a NULL so we
62  * don't even try.
63  */
64 #ifdef CONFIG_ELF_CORE
65 static int elf_core_dump(struct coredump_params *cprm);
66 #else
67 #define elf_core_dump	NULL
68 #endif
69 
70 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
71 #define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
72 #else
73 #define ELF_MIN_ALIGN	PAGE_SIZE
74 #endif
75 
76 #ifndef ELF_CORE_EFLAGS
77 #define ELF_CORE_EFLAGS	0
78 #endif
79 
80 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
81 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
82 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
83 
84 static struct linux_binfmt elf_format = {
85 	.module		= THIS_MODULE,
86 	.load_binary	= load_elf_binary,
87 	.load_shlib	= load_elf_library,
88 	.core_dump	= elf_core_dump,
89 	.min_coredump	= ELF_EXEC_PAGESIZE,
90 };
91 
92 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
93 
set_brk(unsigned long start,unsigned long end)94 static int set_brk(unsigned long start, unsigned long end)
95 {
96 	start = ELF_PAGEALIGN(start);
97 	end = ELF_PAGEALIGN(end);
98 	if (end > start) {
99 		unsigned long addr;
100 		addr = vm_brk(start, end - start);
101 		if (BAD_ADDR(addr))
102 			return addr;
103 	}
104 	current->mm->start_brk = current->mm->brk = end;
105 	return 0;
106 }
107 
108 /* We need to explicitly zero any fractional pages
109    after the data section (i.e. bss).  This would
110    contain the junk from the file that should not
111    be in memory
112  */
padzero(unsigned long elf_bss)113 static int padzero(unsigned long elf_bss)
114 {
115 	unsigned long nbyte;
116 
117 	nbyte = ELF_PAGEOFFSET(elf_bss);
118 	if (nbyte) {
119 		nbyte = ELF_MIN_ALIGN - nbyte;
120 		if (clear_user((void __user *) elf_bss, nbyte))
121 			return -EFAULT;
122 	}
123 	return 0;
124 }
125 
126 /* Let's use some macros to make this stack manipulation a little clearer */
127 #ifdef CONFIG_STACK_GROWSUP
128 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
129 #define STACK_ROUND(sp, items) \
130 	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
131 #define STACK_ALLOC(sp, len) ({ \
132 	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
133 	old_sp; })
134 #else
135 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
136 #define STACK_ROUND(sp, items) \
137 	(((unsigned long) (sp - items)) &~ 15UL)
138 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
139 #endif
140 
141 #ifndef ELF_BASE_PLATFORM
142 /*
143  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
144  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
145  * will be copied to the user stack in the same manner as AT_PLATFORM.
146  */
147 #define ELF_BASE_PLATFORM NULL
148 #endif
149 
150 static int
create_elf_tables(struct linux_binprm * bprm,struct elfhdr * exec,unsigned long load_addr,unsigned long interp_load_addr)151 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
152 		unsigned long load_addr, unsigned long interp_load_addr)
153 {
154 	unsigned long p = bprm->p;
155 	int argc = bprm->argc;
156 	int envc = bprm->envc;
157 	elf_addr_t __user *argv;
158 	elf_addr_t __user *envp;
159 	elf_addr_t __user *sp;
160 	elf_addr_t __user *u_platform;
161 	elf_addr_t __user *u_base_platform;
162 	elf_addr_t __user *u_rand_bytes;
163 	const char *k_platform = ELF_PLATFORM;
164 	const char *k_base_platform = ELF_BASE_PLATFORM;
165 	unsigned char k_rand_bytes[16];
166 	int items;
167 	elf_addr_t *elf_info;
168 	int ei_index = 0;
169 	const struct cred *cred = current_cred();
170 	struct vm_area_struct *vma;
171 
172 	/*
173 	 * In some cases (e.g. Hyper-Threading), we want to avoid L1
174 	 * evictions by the processes running on the same package. One
175 	 * thing we can do is to shuffle the initial stack for them.
176 	 */
177 
178 	p = arch_align_stack(p);
179 
180 	/*
181 	 * If this architecture has a platform capability string, copy it
182 	 * to userspace.  In some cases (Sparc), this info is impossible
183 	 * for userspace to get any other way, in others (i386) it is
184 	 * merely difficult.
185 	 */
186 	u_platform = NULL;
187 	if (k_platform) {
188 		size_t len = strlen(k_platform) + 1;
189 
190 		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
191 		if (__copy_to_user(u_platform, k_platform, len))
192 			return -EFAULT;
193 	}
194 
195 	/*
196 	 * If this architecture has a "base" platform capability
197 	 * string, copy it to userspace.
198 	 */
199 	u_base_platform = NULL;
200 	if (k_base_platform) {
201 		size_t len = strlen(k_base_platform) + 1;
202 
203 		u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
204 		if (__copy_to_user(u_base_platform, k_base_platform, len))
205 			return -EFAULT;
206 	}
207 
208 	/*
209 	 * Generate 16 random bytes for userspace PRNG seeding.
210 	 */
211 	get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
212 	u_rand_bytes = (elf_addr_t __user *)
213 		       STACK_ALLOC(p, sizeof(k_rand_bytes));
214 	if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
215 		return -EFAULT;
216 
217 	/* Create the ELF interpreter info */
218 	elf_info = (elf_addr_t *)current->mm->saved_auxv;
219 	/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
220 #define NEW_AUX_ENT(id, val) \
221 	do { \
222 		elf_info[ei_index++] = id; \
223 		elf_info[ei_index++] = val; \
224 	} while (0)
225 
226 #ifdef ARCH_DLINFO
227 	/*
228 	 * ARCH_DLINFO must come first so PPC can do its special alignment of
229 	 * AUXV.
230 	 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
231 	 * ARCH_DLINFO changes
232 	 */
233 	ARCH_DLINFO;
234 #endif
235 	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
236 	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
237 	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
238 	NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
239 	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
240 	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
241 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
242 	NEW_AUX_ENT(AT_FLAGS, 0);
243 	NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
244 	NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
245 	NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
246 	NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
247 	NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
248  	NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
249 	NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
250 #ifdef ELF_HWCAP2
251 	NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
252 #endif
253 	NEW_AUX_ENT(AT_EXECFN, bprm->exec);
254 	if (k_platform) {
255 		NEW_AUX_ENT(AT_PLATFORM,
256 			    (elf_addr_t)(unsigned long)u_platform);
257 	}
258 	if (k_base_platform) {
259 		NEW_AUX_ENT(AT_BASE_PLATFORM,
260 			    (elf_addr_t)(unsigned long)u_base_platform);
261 	}
262 	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
263 		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
264 	}
265 #undef NEW_AUX_ENT
266 	/* AT_NULL is zero; clear the rest too */
267 	memset(&elf_info[ei_index], 0,
268 	       sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
269 
270 	/* And advance past the AT_NULL entry.  */
271 	ei_index += 2;
272 
273 	sp = STACK_ADD(p, ei_index);
274 
275 	items = (argc + 1) + (envc + 1) + 1;
276 	bprm->p = STACK_ROUND(sp, items);
277 
278 	/* Point sp at the lowest address on the stack */
279 #ifdef CONFIG_STACK_GROWSUP
280 	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
281 	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
282 #else
283 	sp = (elf_addr_t __user *)bprm->p;
284 #endif
285 
286 
287 	/*
288 	 * Grow the stack manually; some architectures have a limit on how
289 	 * far ahead a user-space access may be in order to grow the stack.
290 	 */
291 	vma = find_extend_vma(current->mm, bprm->p);
292 	if (!vma)
293 		return -EFAULT;
294 
295 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
296 	if (__put_user(argc, sp++))
297 		return -EFAULT;
298 	argv = sp;
299 	envp = argv + argc + 1;
300 
301 	/* Populate argv and envp */
302 	p = current->mm->arg_end = current->mm->arg_start;
303 	while (argc-- > 0) {
304 		size_t len;
305 		if (__put_user((elf_addr_t)p, argv++))
306 			return -EFAULT;
307 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
308 		if (!len || len > MAX_ARG_STRLEN)
309 			return -EINVAL;
310 		p += len;
311 	}
312 	if (__put_user(0, argv))
313 		return -EFAULT;
314 	current->mm->arg_end = current->mm->env_start = p;
315 	while (envc-- > 0) {
316 		size_t len;
317 		if (__put_user((elf_addr_t)p, envp++))
318 			return -EFAULT;
319 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
320 		if (!len || len > MAX_ARG_STRLEN)
321 			return -EINVAL;
322 		p += len;
323 	}
324 	if (__put_user(0, envp))
325 		return -EFAULT;
326 	current->mm->env_end = p;
327 
328 	/* Put the elf_info on the stack in the right place.  */
329 	sp = (elf_addr_t __user *)envp + 1;
330 	if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
331 		return -EFAULT;
332 	return 0;
333 }
334 
335 #ifndef elf_map
336 
elf_map(struct file * filep,unsigned long addr,struct elf_phdr * eppnt,int prot,int type,unsigned long total_size)337 static unsigned long elf_map(struct file *filep, unsigned long addr,
338 		struct elf_phdr *eppnt, int prot, int type,
339 		unsigned long total_size)
340 {
341 	unsigned long map_addr;
342 	unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
343 	unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
344 	addr = ELF_PAGESTART(addr);
345 	size = ELF_PAGEALIGN(size);
346 
347 	/* mmap() will return -EINVAL if given a zero size, but a
348 	 * segment with zero filesize is perfectly valid */
349 	if (!size)
350 		return addr;
351 
352 	/*
353 	* total_size is the size of the ELF (interpreter) image.
354 	* The _first_ mmap needs to know the full size, otherwise
355 	* randomization might put this image into an overlapping
356 	* position with the ELF binary image. (since size < total_size)
357 	* So we first map the 'big' image - and unmap the remainder at
358 	* the end. (which unmap is needed for ELF images with holes.)
359 	*/
360 	if (total_size) {
361 		total_size = ELF_PAGEALIGN(total_size);
362 		map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
363 		if (!BAD_ADDR(map_addr))
364 			vm_munmap(map_addr+size, total_size-size);
365 	} else
366 		map_addr = vm_mmap(filep, addr, size, prot, type, off);
367 
368 	return(map_addr);
369 }
370 
371 #endif /* !elf_map */
372 
total_mapping_size(struct elf_phdr * cmds,int nr)373 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
374 {
375 	int i, first_idx = -1, last_idx = -1;
376 
377 	for (i = 0; i < nr; i++) {
378 		if (cmds[i].p_type == PT_LOAD) {
379 			last_idx = i;
380 			if (first_idx == -1)
381 				first_idx = i;
382 		}
383 	}
384 	if (first_idx == -1)
385 		return 0;
386 
387 	return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
388 				ELF_PAGESTART(cmds[first_idx].p_vaddr);
389 }
390 
391 /**
392  * load_elf_phdrs() - load ELF program headers
393  * @elf_ex:   ELF header of the binary whose program headers should be loaded
394  * @elf_file: the opened ELF binary file
395  *
396  * Loads ELF program headers from the binary file elf_file, which has the ELF
397  * header pointed to by elf_ex, into a newly allocated array. The caller is
398  * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
399  */
load_elf_phdrs(struct elfhdr * elf_ex,struct file * elf_file)400 static struct elf_phdr *load_elf_phdrs(struct elfhdr *elf_ex,
401 				       struct file *elf_file)
402 {
403 	struct elf_phdr *elf_phdata = NULL;
404 	int retval, size, err = -1;
405 
406 	/*
407 	 * If the size of this structure has changed, then punt, since
408 	 * we will be doing the wrong thing.
409 	 */
410 	if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
411 		goto out;
412 
413 	/* Sanity check the number of program headers... */
414 	if (elf_ex->e_phnum < 1 ||
415 		elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
416 		goto out;
417 
418 	/* ...and their total size. */
419 	size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
420 	if (size > ELF_MIN_ALIGN)
421 		goto out;
422 
423 	elf_phdata = kmalloc(size, GFP_KERNEL);
424 	if (!elf_phdata)
425 		goto out;
426 
427 	/* Read in the program headers */
428 	retval = kernel_read(elf_file, elf_ex->e_phoff,
429 			     (char *)elf_phdata, size);
430 	if (retval != size) {
431 		err = (retval < 0) ? retval : -EIO;
432 		goto out;
433 	}
434 
435 	/* Success! */
436 	err = 0;
437 out:
438 	if (err) {
439 		kfree(elf_phdata);
440 		elf_phdata = NULL;
441 	}
442 	return elf_phdata;
443 }
444 
445 #ifndef CONFIG_ARCH_BINFMT_ELF_STATE
446 
447 /**
448  * struct arch_elf_state - arch-specific ELF loading state
449  *
450  * This structure is used to preserve architecture specific data during
451  * the loading of an ELF file, throughout the checking of architecture
452  * specific ELF headers & through to the point where the ELF load is
453  * known to be proceeding (ie. SET_PERSONALITY).
454  *
455  * This implementation is a dummy for architectures which require no
456  * specific state.
457  */
458 struct arch_elf_state {
459 };
460 
461 #define INIT_ARCH_ELF_STATE {}
462 
463 /**
464  * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
465  * @ehdr:	The main ELF header
466  * @phdr:	The program header to check
467  * @elf:	The open ELF file
468  * @is_interp:	True if the phdr is from the interpreter of the ELF being
469  *		loaded, else false.
470  * @state:	Architecture-specific state preserved throughout the process
471  *		of loading the ELF.
472  *
473  * Inspects the program header phdr to validate its correctness and/or
474  * suitability for the system. Called once per ELF program header in the
475  * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
476  * interpreter.
477  *
478  * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
479  *         with that return code.
480  */
arch_elf_pt_proc(struct elfhdr * ehdr,struct elf_phdr * phdr,struct file * elf,bool is_interp,struct arch_elf_state * state)481 static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
482 				   struct elf_phdr *phdr,
483 				   struct file *elf, bool is_interp,
484 				   struct arch_elf_state *state)
485 {
486 	/* Dummy implementation, always proceed */
487 	return 0;
488 }
489 
490 /**
491  * arch_check_elf() - check an ELF executable
492  * @ehdr:	The main ELF header
493  * @has_interp:	True if the ELF has an interpreter, else false.
494  * @interp_ehdr: The interpreter's ELF header
495  * @state:	Architecture-specific state preserved throughout the process
496  *		of loading the ELF.
497  *
498  * Provides a final opportunity for architecture code to reject the loading
499  * of the ELF & cause an exec syscall to return an error. This is called after
500  * all program headers to be checked by arch_elf_pt_proc have been.
501  *
502  * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
503  *         with that return code.
504  */
arch_check_elf(struct elfhdr * ehdr,bool has_interp,struct elfhdr * interp_ehdr,struct arch_elf_state * state)505 static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
506 				 struct elfhdr *interp_ehdr,
507 				 struct arch_elf_state *state)
508 {
509 	/* Dummy implementation, always proceed */
510 	return 0;
511 }
512 
513 #endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
514 
515 /* This is much more generalized than the library routine read function,
516    so we keep this separate.  Technically the library read function
517    is only provided so that we can read a.out libraries that have
518    an ELF header */
519 
load_elf_interp(struct elfhdr * interp_elf_ex,struct file * interpreter,unsigned long * interp_map_addr,unsigned long no_base,struct elf_phdr * interp_elf_phdata)520 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
521 		struct file *interpreter, unsigned long *interp_map_addr,
522 		unsigned long no_base, struct elf_phdr *interp_elf_phdata)
523 {
524 	struct elf_phdr *eppnt;
525 	unsigned long load_addr = 0;
526 	int load_addr_set = 0;
527 	unsigned long last_bss = 0, elf_bss = 0;
528 	unsigned long error = ~0UL;
529 	unsigned long total_size;
530 	int i;
531 
532 	/* First of all, some simple consistency checks */
533 	if (interp_elf_ex->e_type != ET_EXEC &&
534 	    interp_elf_ex->e_type != ET_DYN)
535 		goto out;
536 	if (!elf_check_arch(interp_elf_ex))
537 		goto out;
538 	if (!interpreter->f_op->mmap)
539 		goto out;
540 
541 	total_size = total_mapping_size(interp_elf_phdata,
542 					interp_elf_ex->e_phnum);
543 	if (!total_size) {
544 		error = -EINVAL;
545 		goto out;
546 	}
547 
548 	eppnt = interp_elf_phdata;
549 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
550 		if (eppnt->p_type == PT_LOAD) {
551 			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
552 			int elf_prot = 0;
553 			unsigned long vaddr = 0;
554 			unsigned long k, map_addr;
555 
556 			if (eppnt->p_flags & PF_R)
557 		    		elf_prot = PROT_READ;
558 			if (eppnt->p_flags & PF_W)
559 				elf_prot |= PROT_WRITE;
560 			if (eppnt->p_flags & PF_X)
561 				elf_prot |= PROT_EXEC;
562 			vaddr = eppnt->p_vaddr;
563 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
564 				elf_type |= MAP_FIXED;
565 			else if (no_base && interp_elf_ex->e_type == ET_DYN)
566 				load_addr = -vaddr;
567 
568 			map_addr = elf_map(interpreter, load_addr + vaddr,
569 					eppnt, elf_prot, elf_type, total_size);
570 			total_size = 0;
571 			if (!*interp_map_addr)
572 				*interp_map_addr = map_addr;
573 			error = map_addr;
574 			if (BAD_ADDR(map_addr))
575 				goto out;
576 
577 			if (!load_addr_set &&
578 			    interp_elf_ex->e_type == ET_DYN) {
579 				load_addr = map_addr - ELF_PAGESTART(vaddr);
580 				load_addr_set = 1;
581 			}
582 
583 			/*
584 			 * Check to see if the section's size will overflow the
585 			 * allowed task size. Note that p_filesz must always be
586 			 * <= p_memsize so it's only necessary to check p_memsz.
587 			 */
588 			k = load_addr + eppnt->p_vaddr;
589 			if (BAD_ADDR(k) ||
590 			    eppnt->p_filesz > eppnt->p_memsz ||
591 			    eppnt->p_memsz > TASK_SIZE ||
592 			    TASK_SIZE - eppnt->p_memsz < k) {
593 				error = -ENOMEM;
594 				goto out;
595 			}
596 
597 			/*
598 			 * Find the end of the file mapping for this phdr, and
599 			 * keep track of the largest address we see for this.
600 			 */
601 			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
602 			if (k > elf_bss)
603 				elf_bss = k;
604 
605 			/*
606 			 * Do the same thing for the memory mapping - between
607 			 * elf_bss and last_bss is the bss section.
608 			 */
609 			k = load_addr + eppnt->p_vaddr + eppnt->p_memsz;
610 			if (k > last_bss)
611 				last_bss = k;
612 		}
613 	}
614 
615 	/*
616 	 * Now fill out the bss section: first pad the last page from
617 	 * the file up to the page boundary, and zero it from elf_bss
618 	 * up to the end of the page.
619 	 */
620 	if (padzero(elf_bss)) {
621 		error = -EFAULT;
622 		goto out;
623 	}
624 	/*
625 	 * Next, align both the file and mem bss up to the page size,
626 	 * since this is where elf_bss was just zeroed up to, and where
627 	 * last_bss will end after the vm_brk() below.
628 	 */
629 	elf_bss = ELF_PAGEALIGN(elf_bss);
630 	last_bss = ELF_PAGEALIGN(last_bss);
631 	/* Finally, if there is still more bss to allocate, do it. */
632 	if (last_bss > elf_bss) {
633 		error = vm_brk(elf_bss, last_bss - elf_bss);
634 		if (BAD_ADDR(error))
635 			goto out;
636 	}
637 
638 	error = load_addr;
639 out:
640 	return error;
641 }
642 
643 /*
644  * These are the functions used to load ELF style executables and shared
645  * libraries.  There is no binary dependent code anywhere else.
646  */
647 
648 #ifndef STACK_RND_MASK
649 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))	/* 8MB of VA */
650 #endif
651 
randomize_stack_top(unsigned long stack_top)652 static unsigned long randomize_stack_top(unsigned long stack_top)
653 {
654 	unsigned long random_variable = 0;
655 
656 	if ((current->flags & PF_RANDOMIZE) &&
657 		!(current->personality & ADDR_NO_RANDOMIZE)) {
658 		random_variable = get_random_long();
659 		random_variable &= STACK_RND_MASK;
660 		random_variable <<= PAGE_SHIFT;
661 	}
662 #ifdef CONFIG_STACK_GROWSUP
663 	return PAGE_ALIGN(stack_top) + random_variable;
664 #else
665 	return PAGE_ALIGN(stack_top) - random_variable;
666 #endif
667 }
668 
load_elf_binary(struct linux_binprm * bprm)669 static int load_elf_binary(struct linux_binprm *bprm)
670 {
671 	struct file *interpreter = NULL; /* to shut gcc up */
672  	unsigned long load_addr = 0, load_bias = 0;
673 	int load_addr_set = 0;
674 	char * elf_interpreter = NULL;
675 	unsigned long error;
676 	struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
677 	unsigned long elf_bss, elf_brk;
678 	int retval, i;
679 	unsigned long elf_entry;
680 	unsigned long interp_load_addr = 0;
681 	unsigned long start_code, end_code, start_data, end_data;
682 	unsigned long reloc_func_desc __maybe_unused = 0;
683 	int executable_stack = EXSTACK_DEFAULT;
684 	struct pt_regs *regs = current_pt_regs();
685 	struct {
686 		struct elfhdr elf_ex;
687 		struct elfhdr interp_elf_ex;
688 	} *loc;
689 	struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
690 
691 	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
692 	if (!loc) {
693 		retval = -ENOMEM;
694 		goto out_ret;
695 	}
696 
697 	/* Get the exec-header */
698 	loc->elf_ex = *((struct elfhdr *)bprm->buf);
699 
700 	retval = -ENOEXEC;
701 	/* First of all, some simple consistency checks */
702 	if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
703 		goto out;
704 
705 	if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
706 		goto out;
707 	if (!elf_check_arch(&loc->elf_ex))
708 		goto out;
709 	if (!bprm->file->f_op->mmap)
710 		goto out;
711 
712 	elf_phdata = load_elf_phdrs(&loc->elf_ex, bprm->file);
713 	if (!elf_phdata)
714 		goto out;
715 
716 	elf_ppnt = elf_phdata;
717 	elf_bss = 0;
718 	elf_brk = 0;
719 
720 	start_code = ~0UL;
721 	end_code = 0;
722 	start_data = 0;
723 	end_data = 0;
724 
725 	for (i = 0; i < loc->elf_ex.e_phnum; i++) {
726 		if (elf_ppnt->p_type == PT_INTERP) {
727 			/* This is the program interpreter used for
728 			 * shared libraries - for now assume that this
729 			 * is an a.out format binary
730 			 */
731 			retval = -ENOEXEC;
732 			if (elf_ppnt->p_filesz > PATH_MAX ||
733 			    elf_ppnt->p_filesz < 2)
734 				goto out_free_ph;
735 
736 			retval = -ENOMEM;
737 			elf_interpreter = kmalloc(elf_ppnt->p_filesz,
738 						  GFP_KERNEL);
739 			if (!elf_interpreter)
740 				goto out_free_ph;
741 
742 			retval = kernel_read(bprm->file, elf_ppnt->p_offset,
743 					     elf_interpreter,
744 					     elf_ppnt->p_filesz);
745 			if (retval != elf_ppnt->p_filesz) {
746 				if (retval >= 0)
747 					retval = -EIO;
748 				goto out_free_interp;
749 			}
750 			/* make sure path is NULL terminated */
751 			retval = -ENOEXEC;
752 			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
753 				goto out_free_interp;
754 
755 			interpreter = open_exec(elf_interpreter);
756 			retval = PTR_ERR(interpreter);
757 			if (IS_ERR(interpreter))
758 				goto out_free_interp;
759 
760 			/*
761 			 * If the binary is not readable then enforce
762 			 * mm->dumpable = 0 regardless of the interpreter's
763 			 * permissions.
764 			 */
765 			would_dump(bprm, interpreter);
766 
767 			/* Get the exec headers */
768 			retval = kernel_read(interpreter, 0,
769 					     (void *)&loc->interp_elf_ex,
770 					     sizeof(loc->interp_elf_ex));
771 			if (retval != sizeof(loc->interp_elf_ex)) {
772 				if (retval >= 0)
773 					retval = -EIO;
774 				goto out_free_dentry;
775 			}
776 
777 			break;
778 		}
779 		elf_ppnt++;
780 	}
781 
782 	elf_ppnt = elf_phdata;
783 	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
784 		switch (elf_ppnt->p_type) {
785 		case PT_GNU_STACK:
786 			if (elf_ppnt->p_flags & PF_X)
787 				executable_stack = EXSTACK_ENABLE_X;
788 			else
789 				executable_stack = EXSTACK_DISABLE_X;
790 			break;
791 
792 		case PT_LOPROC ... PT_HIPROC:
793 			retval = arch_elf_pt_proc(&loc->elf_ex, elf_ppnt,
794 						  bprm->file, false,
795 						  &arch_state);
796 			if (retval)
797 				goto out_free_dentry;
798 			break;
799 		}
800 
801 	/* Some simple consistency checks for the interpreter */
802 	if (elf_interpreter) {
803 		retval = -ELIBBAD;
804 		/* Not an ELF interpreter */
805 		if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
806 			goto out_free_dentry;
807 		/* Verify the interpreter has a valid arch */
808 		if (!elf_check_arch(&loc->interp_elf_ex))
809 			goto out_free_dentry;
810 
811 		/* Load the interpreter program headers */
812 		interp_elf_phdata = load_elf_phdrs(&loc->interp_elf_ex,
813 						   interpreter);
814 		if (!interp_elf_phdata)
815 			goto out_free_dentry;
816 
817 		/* Pass PT_LOPROC..PT_HIPROC headers to arch code */
818 		elf_ppnt = interp_elf_phdata;
819 		for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++)
820 			switch (elf_ppnt->p_type) {
821 			case PT_LOPROC ... PT_HIPROC:
822 				retval = arch_elf_pt_proc(&loc->interp_elf_ex,
823 							  elf_ppnt, interpreter,
824 							  true, &arch_state);
825 				if (retval)
826 					goto out_free_dentry;
827 				break;
828 			}
829 	}
830 
831 	/*
832 	 * Allow arch code to reject the ELF at this point, whilst it's
833 	 * still possible to return an error to the code that invoked
834 	 * the exec syscall.
835 	 */
836 	retval = arch_check_elf(&loc->elf_ex,
837 				!!interpreter, &loc->interp_elf_ex,
838 				&arch_state);
839 	if (retval)
840 		goto out_free_dentry;
841 
842 	/* Flush all traces of the currently running executable */
843 	retval = flush_old_exec(bprm);
844 	if (retval)
845 		goto out_free_dentry;
846 
847 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
848 	   may depend on the personality.  */
849 	SET_PERSONALITY2(loc->elf_ex, &arch_state);
850 	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
851 		current->personality |= READ_IMPLIES_EXEC;
852 
853 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
854 		current->flags |= PF_RANDOMIZE;
855 
856 	setup_new_exec(bprm);
857 	install_exec_creds(bprm);
858 
859 	/* Do this so that we can load the interpreter, if need be.  We will
860 	   change some of these later */
861 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
862 				 executable_stack);
863 	if (retval < 0)
864 		goto out_free_dentry;
865 
866 	current->mm->start_stack = bprm->p;
867 
868 	/* Now we do a little grungy work by mmapping the ELF image into
869 	   the correct location in memory. */
870 	for(i = 0, elf_ppnt = elf_phdata;
871 	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
872 		int elf_prot = 0, elf_flags;
873 		unsigned long k, vaddr;
874 		unsigned long total_size = 0;
875 
876 		if (elf_ppnt->p_type != PT_LOAD)
877 			continue;
878 
879 		if (unlikely (elf_brk > elf_bss)) {
880 			unsigned long nbyte;
881 
882 			/* There was a PT_LOAD segment with p_memsz > p_filesz
883 			   before this one. Map anonymous pages, if needed,
884 			   and clear the area.  */
885 			retval = set_brk(elf_bss + load_bias,
886 					 elf_brk + load_bias);
887 			if (retval)
888 				goto out_free_dentry;
889 			nbyte = ELF_PAGEOFFSET(elf_bss);
890 			if (nbyte) {
891 				nbyte = ELF_MIN_ALIGN - nbyte;
892 				if (nbyte > elf_brk - elf_bss)
893 					nbyte = elf_brk - elf_bss;
894 				if (clear_user((void __user *)elf_bss +
895 							load_bias, nbyte)) {
896 					/*
897 					 * This bss-zeroing can fail if the ELF
898 					 * file specifies odd protections. So
899 					 * we don't check the return value
900 					 */
901 				}
902 			}
903 		}
904 
905 		if (elf_ppnt->p_flags & PF_R)
906 			elf_prot |= PROT_READ;
907 		if (elf_ppnt->p_flags & PF_W)
908 			elf_prot |= PROT_WRITE;
909 		if (elf_ppnt->p_flags & PF_X)
910 			elf_prot |= PROT_EXEC;
911 
912 		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
913 
914 		vaddr = elf_ppnt->p_vaddr;
915 		/*
916 		 * If we are loading ET_EXEC or we have already performed
917 		 * the ET_DYN load_addr calculations, proceed normally.
918 		 */
919 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
920 			elf_flags |= MAP_FIXED;
921 		} else if (loc->elf_ex.e_type == ET_DYN) {
922 			/*
923 			 * This logic is run once for the first LOAD Program
924 			 * Header for ET_DYN binaries to calculate the
925 			 * randomization (load_bias) for all the LOAD
926 			 * Program Headers, and to calculate the entire
927 			 * size of the ELF mapping (total_size). (Note that
928 			 * load_addr_set is set to true later once the
929 			 * initial mapping is performed.)
930 			 *
931 			 * There are effectively two types of ET_DYN
932 			 * binaries: programs (i.e. PIE: ET_DYN with INTERP)
933 			 * and loaders (ET_DYN without INTERP, since they
934 			 * _are_ the ELF interpreter). The loaders must
935 			 * be loaded away from programs since the program
936 			 * may otherwise collide with the loader (especially
937 			 * for ET_EXEC which does not have a randomized
938 			 * position). For example to handle invocations of
939 			 * "./ld.so someprog" to test out a new version of
940 			 * the loader, the subsequent program that the
941 			 * loader loads must avoid the loader itself, so
942 			 * they cannot share the same load range. Sufficient
943 			 * room for the brk must be allocated with the
944 			 * loader as well, since brk must be available with
945 			 * the loader.
946 			 *
947 			 * Therefore, programs are loaded offset from
948 			 * ELF_ET_DYN_BASE and loaders are loaded into the
949 			 * independently randomized mmap region (0 load_bias
950 			 * without MAP_FIXED).
951 			 */
952 			if (elf_interpreter) {
953 				load_bias = ELF_ET_DYN_BASE;
954 				if (current->flags & PF_RANDOMIZE)
955 					load_bias += arch_mmap_rnd();
956 				elf_flags |= MAP_FIXED;
957 			} else
958 				load_bias = 0;
959 
960 			/*
961 			 * Since load_bias is used for all subsequent loading
962 			 * calculations, we must lower it by the first vaddr
963 			 * so that the remaining calculations based on the
964 			 * ELF vaddrs will be correctly offset. The result
965 			 * is then page aligned.
966 			 */
967 			load_bias = ELF_PAGESTART(load_bias - vaddr);
968 
969 			total_size = total_mapping_size(elf_phdata,
970 							loc->elf_ex.e_phnum);
971 			if (!total_size) {
972 				retval = -EINVAL;
973 				goto out_free_dentry;
974 			}
975 		}
976 
977 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
978 				elf_prot, elf_flags, total_size);
979 		if (BAD_ADDR(error)) {
980 			retval = IS_ERR((void *)error) ?
981 				PTR_ERR((void*)error) : -EINVAL;
982 			goto out_free_dentry;
983 		}
984 
985 		if (!load_addr_set) {
986 			load_addr_set = 1;
987 			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
988 			if (loc->elf_ex.e_type == ET_DYN) {
989 				load_bias += error -
990 				             ELF_PAGESTART(load_bias + vaddr);
991 				load_addr += load_bias;
992 				reloc_func_desc = load_bias;
993 			}
994 		}
995 		k = elf_ppnt->p_vaddr;
996 		if (k < start_code)
997 			start_code = k;
998 		if (start_data < k)
999 			start_data = k;
1000 
1001 		/*
1002 		 * Check to see if the section's size will overflow the
1003 		 * allowed task size. Note that p_filesz must always be
1004 		 * <= p_memsz so it is only necessary to check p_memsz.
1005 		 */
1006 		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
1007 		    elf_ppnt->p_memsz > TASK_SIZE ||
1008 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
1009 			/* set_brk can never work. Avoid overflows. */
1010 			retval = -EINVAL;
1011 			goto out_free_dentry;
1012 		}
1013 
1014 		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
1015 
1016 		if (k > elf_bss)
1017 			elf_bss = k;
1018 		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
1019 			end_code = k;
1020 		if (end_data < k)
1021 			end_data = k;
1022 		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
1023 		if (k > elf_brk)
1024 			elf_brk = k;
1025 	}
1026 
1027 	loc->elf_ex.e_entry += load_bias;
1028 	elf_bss += load_bias;
1029 	elf_brk += load_bias;
1030 	start_code += load_bias;
1031 	end_code += load_bias;
1032 	start_data += load_bias;
1033 	end_data += load_bias;
1034 
1035 	/* Calling set_brk effectively mmaps the pages that we need
1036 	 * for the bss and break sections.  We must do this before
1037 	 * mapping in the interpreter, to make sure it doesn't wind
1038 	 * up getting placed where the bss needs to go.
1039 	 */
1040 	retval = set_brk(elf_bss, elf_brk);
1041 	if (retval)
1042 		goto out_free_dentry;
1043 	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
1044 		retval = -EFAULT; /* Nobody gets to see this, but.. */
1045 		goto out_free_dentry;
1046 	}
1047 
1048 	if (elf_interpreter) {
1049 		unsigned long interp_map_addr = 0;
1050 
1051 		elf_entry = load_elf_interp(&loc->interp_elf_ex,
1052 					    interpreter,
1053 					    &interp_map_addr,
1054 					    load_bias, interp_elf_phdata);
1055 		if (!IS_ERR((void *)elf_entry)) {
1056 			/*
1057 			 * load_elf_interp() returns relocation
1058 			 * adjustment
1059 			 */
1060 			interp_load_addr = elf_entry;
1061 			elf_entry += loc->interp_elf_ex.e_entry;
1062 		}
1063 		if (BAD_ADDR(elf_entry)) {
1064 			retval = IS_ERR((void *)elf_entry) ?
1065 					(int)elf_entry : -EINVAL;
1066 			goto out_free_dentry;
1067 		}
1068 		reloc_func_desc = interp_load_addr;
1069 
1070 		allow_write_access(interpreter);
1071 		fput(interpreter);
1072 		kfree(elf_interpreter);
1073 	} else {
1074 		elf_entry = loc->elf_ex.e_entry;
1075 		if (BAD_ADDR(elf_entry)) {
1076 			retval = -EINVAL;
1077 			goto out_free_dentry;
1078 		}
1079 	}
1080 
1081 	kfree(interp_elf_phdata);
1082 	kfree(elf_phdata);
1083 
1084 	set_binfmt(&elf_format);
1085 
1086 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1087 	retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
1088 	if (retval < 0)
1089 		goto out;
1090 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1091 
1092 	retval = create_elf_tables(bprm, &loc->elf_ex,
1093 			  load_addr, interp_load_addr);
1094 	if (retval < 0)
1095 		goto out;
1096 	/* N.B. passed_fileno might not be initialized? */
1097 	current->mm->end_code = end_code;
1098 	current->mm->start_code = start_code;
1099 	current->mm->start_data = start_data;
1100 	current->mm->end_data = end_data;
1101 	current->mm->start_stack = bprm->p;
1102 
1103 	if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1104 		/*
1105 		 * For architectures with ELF randomization, when executing
1106 		 * a loader directly (i.e. no interpreter listed in ELF
1107 		 * headers), move the brk area out of the mmap region
1108 		 * (since it grows up, and may collide early with the stack
1109 		 * growing down), and into the unused ELF_ET_DYN_BASE region.
1110 		 */
1111 		if (IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) &&
1112 		    loc->elf_ex.e_type == ET_DYN && !interpreter)
1113 			current->mm->brk = current->mm->start_brk =
1114 				ELF_ET_DYN_BASE;
1115 
1116 		current->mm->brk = current->mm->start_brk =
1117 			arch_randomize_brk(current->mm);
1118 #ifdef compat_brk_randomized
1119 		current->brk_randomized = 1;
1120 #endif
1121 	}
1122 
1123 	if (current->personality & MMAP_PAGE_ZERO) {
1124 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1125 		   and some applications "depend" upon this behavior.
1126 		   Since we do not have the power to recompile these, we
1127 		   emulate the SVr4 behavior. Sigh. */
1128 		error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1129 				MAP_FIXED | MAP_PRIVATE, 0);
1130 	}
1131 
1132 #ifdef ELF_PLAT_INIT
1133 	/*
1134 	 * The ABI may specify that certain registers be set up in special
1135 	 * ways (on i386 %edx is the address of a DT_FINI function, for
1136 	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1137 	 * that the e_entry field is the address of the function descriptor
1138 	 * for the startup routine, rather than the address of the startup
1139 	 * routine itself.  This macro performs whatever initialization to
1140 	 * the regs structure is required as well as any relocations to the
1141 	 * function descriptor entries when executing dynamically links apps.
1142 	 */
1143 	ELF_PLAT_INIT(regs, reloc_func_desc);
1144 #endif
1145 
1146 	start_thread(regs, elf_entry, bprm->p);
1147 	retval = 0;
1148 out:
1149 	kfree(loc);
1150 out_ret:
1151 	return retval;
1152 
1153 	/* error cleanup */
1154 out_free_dentry:
1155 	kfree(interp_elf_phdata);
1156 	allow_write_access(interpreter);
1157 	if (interpreter)
1158 		fput(interpreter);
1159 out_free_interp:
1160 	kfree(elf_interpreter);
1161 out_free_ph:
1162 	kfree(elf_phdata);
1163 	goto out;
1164 }
1165 
1166 #ifdef CONFIG_USELIB
1167 /* This is really simpleminded and specialized - we are loading an
1168    a.out library that is given an ELF header. */
load_elf_library(struct file * file)1169 static int load_elf_library(struct file *file)
1170 {
1171 	struct elf_phdr *elf_phdata;
1172 	struct elf_phdr *eppnt;
1173 	unsigned long elf_bss, bss, len;
1174 	int retval, error, i, j;
1175 	struct elfhdr elf_ex;
1176 
1177 	error = -ENOEXEC;
1178 	retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1179 	if (retval != sizeof(elf_ex))
1180 		goto out;
1181 
1182 	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1183 		goto out;
1184 
1185 	/* First of all, some simple consistency checks */
1186 	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1187 	    !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1188 		goto out;
1189 
1190 	/* Now read in all of the header information */
1191 
1192 	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1193 	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1194 
1195 	error = -ENOMEM;
1196 	elf_phdata = kmalloc(j, GFP_KERNEL);
1197 	if (!elf_phdata)
1198 		goto out;
1199 
1200 	eppnt = elf_phdata;
1201 	error = -ENOEXEC;
1202 	retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1203 	if (retval != j)
1204 		goto out_free_ph;
1205 
1206 	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1207 		if ((eppnt + i)->p_type == PT_LOAD)
1208 			j++;
1209 	if (j != 1)
1210 		goto out_free_ph;
1211 
1212 	while (eppnt->p_type != PT_LOAD)
1213 		eppnt++;
1214 
1215 	/* Now use mmap to map the library into memory. */
1216 	error = vm_mmap(file,
1217 			ELF_PAGESTART(eppnt->p_vaddr),
1218 			(eppnt->p_filesz +
1219 			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1220 			PROT_READ | PROT_WRITE | PROT_EXEC,
1221 			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1222 			(eppnt->p_offset -
1223 			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1224 	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1225 		goto out_free_ph;
1226 
1227 	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1228 	if (padzero(elf_bss)) {
1229 		error = -EFAULT;
1230 		goto out_free_ph;
1231 	}
1232 
1233 	len = ELF_PAGEALIGN(eppnt->p_filesz + eppnt->p_vaddr);
1234 	bss = ELF_PAGEALIGN(eppnt->p_memsz + eppnt->p_vaddr);
1235 	if (bss > len) {
1236 		error = vm_brk(len, bss - len);
1237 		if (BAD_ADDR(error))
1238 			goto out_free_ph;
1239 	}
1240 	error = 0;
1241 
1242 out_free_ph:
1243 	kfree(elf_phdata);
1244 out:
1245 	return error;
1246 }
1247 #endif /* #ifdef CONFIG_USELIB */
1248 
1249 #ifdef CONFIG_ELF_CORE
1250 /*
1251  * ELF core dumper
1252  *
1253  * Modelled on fs/exec.c:aout_core_dump()
1254  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1255  */
1256 
1257 /*
1258  * The purpose of always_dump_vma() is to make sure that special kernel mappings
1259  * that are useful for post-mortem analysis are included in every core dump.
1260  * In that way we ensure that the core dump is fully interpretable later
1261  * without matching up the same kernel and hardware config to see what PC values
1262  * meant. These special mappings include - vDSO, vsyscall, and other
1263  * architecture specific mappings
1264  */
always_dump_vma(struct vm_area_struct * vma)1265 static bool always_dump_vma(struct vm_area_struct *vma)
1266 {
1267 	/* Any vsyscall mappings? */
1268 	if (vma == get_gate_vma(vma->vm_mm))
1269 		return true;
1270 
1271 	/*
1272 	 * Assume that all vmas with a .name op should always be dumped.
1273 	 * If this changes, a new vm_ops field can easily be added.
1274 	 */
1275 	if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
1276 		return true;
1277 
1278 	/*
1279 	 * arch_vma_name() returns non-NULL for special architecture mappings,
1280 	 * such as vDSO sections.
1281 	 */
1282 	if (arch_vma_name(vma))
1283 		return true;
1284 
1285 	return false;
1286 }
1287 
1288 /*
1289  * Decide what to dump of a segment, part, all or none.
1290  */
vma_dump_size(struct vm_area_struct * vma,unsigned long mm_flags)1291 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1292 				   unsigned long mm_flags)
1293 {
1294 #define FILTER(type)	(mm_flags & (1UL << MMF_DUMP_##type))
1295 
1296 	/* always dump the vdso and vsyscall sections */
1297 	if (always_dump_vma(vma))
1298 		goto whole;
1299 
1300 	if (vma->vm_flags & VM_DONTDUMP)
1301 		return 0;
1302 
1303 	/* support for DAX */
1304 	if (vma_is_dax(vma)) {
1305 		if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED))
1306 			goto whole;
1307 		if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE))
1308 			goto whole;
1309 		return 0;
1310 	}
1311 
1312 	/* Hugetlb memory check */
1313 	if (vma->vm_flags & VM_HUGETLB) {
1314 		if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1315 			goto whole;
1316 		if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1317 			goto whole;
1318 		return 0;
1319 	}
1320 
1321 	/* Do not dump I/O mapped devices or special mappings */
1322 	if (vma->vm_flags & VM_IO)
1323 		return 0;
1324 
1325 	/* By default, dump shared memory if mapped from an anonymous file. */
1326 	if (vma->vm_flags & VM_SHARED) {
1327 		if (file_inode(vma->vm_file)->i_nlink == 0 ?
1328 		    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1329 			goto whole;
1330 		return 0;
1331 	}
1332 
1333 	/* Dump segments that have been written to.  */
1334 	if (vma->anon_vma && FILTER(ANON_PRIVATE))
1335 		goto whole;
1336 	if (vma->vm_file == NULL)
1337 		return 0;
1338 
1339 	if (FILTER(MAPPED_PRIVATE))
1340 		goto whole;
1341 
1342 	/*
1343 	 * If this looks like the beginning of a DSO or executable mapping,
1344 	 * check for an ELF header.  If we find one, dump the first page to
1345 	 * aid in determining what was mapped here.
1346 	 */
1347 	if (FILTER(ELF_HEADERS) &&
1348 	    vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1349 		u32 __user *header = (u32 __user *) vma->vm_start;
1350 		u32 word;
1351 		mm_segment_t fs = get_fs();
1352 		/*
1353 		 * Doing it this way gets the constant folded by GCC.
1354 		 */
1355 		union {
1356 			u32 cmp;
1357 			char elfmag[SELFMAG];
1358 		} magic;
1359 		BUILD_BUG_ON(SELFMAG != sizeof word);
1360 		magic.elfmag[EI_MAG0] = ELFMAG0;
1361 		magic.elfmag[EI_MAG1] = ELFMAG1;
1362 		magic.elfmag[EI_MAG2] = ELFMAG2;
1363 		magic.elfmag[EI_MAG3] = ELFMAG3;
1364 		/*
1365 		 * Switch to the user "segment" for get_user(),
1366 		 * then put back what elf_core_dump() had in place.
1367 		 */
1368 		set_fs(USER_DS);
1369 		if (unlikely(get_user(word, header)))
1370 			word = 0;
1371 		set_fs(fs);
1372 		if (word == magic.cmp)
1373 			return PAGE_SIZE;
1374 	}
1375 
1376 #undef	FILTER
1377 
1378 	return 0;
1379 
1380 whole:
1381 	return vma->vm_end - vma->vm_start;
1382 }
1383 
1384 /* An ELF note in memory */
1385 struct memelfnote
1386 {
1387 	const char *name;
1388 	int type;
1389 	unsigned int datasz;
1390 	void *data;
1391 };
1392 
notesize(struct memelfnote * en)1393 static int notesize(struct memelfnote *en)
1394 {
1395 	int sz;
1396 
1397 	sz = sizeof(struct elf_note);
1398 	sz += roundup(strlen(en->name) + 1, 4);
1399 	sz += roundup(en->datasz, 4);
1400 
1401 	return sz;
1402 }
1403 
writenote(struct memelfnote * men,struct coredump_params * cprm)1404 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1405 {
1406 	struct elf_note en;
1407 	en.n_namesz = strlen(men->name) + 1;
1408 	en.n_descsz = men->datasz;
1409 	en.n_type = men->type;
1410 
1411 	return dump_emit(cprm, &en, sizeof(en)) &&
1412 	    dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1413 	    dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1414 }
1415 
fill_elf_header(struct elfhdr * elf,int segs,u16 machine,u32 flags)1416 static void fill_elf_header(struct elfhdr *elf, int segs,
1417 			    u16 machine, u32 flags)
1418 {
1419 	memset(elf, 0, sizeof(*elf));
1420 
1421 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1422 	elf->e_ident[EI_CLASS] = ELF_CLASS;
1423 	elf->e_ident[EI_DATA] = ELF_DATA;
1424 	elf->e_ident[EI_VERSION] = EV_CURRENT;
1425 	elf->e_ident[EI_OSABI] = ELF_OSABI;
1426 
1427 	elf->e_type = ET_CORE;
1428 	elf->e_machine = machine;
1429 	elf->e_version = EV_CURRENT;
1430 	elf->e_phoff = sizeof(struct elfhdr);
1431 	elf->e_flags = flags;
1432 	elf->e_ehsize = sizeof(struct elfhdr);
1433 	elf->e_phentsize = sizeof(struct elf_phdr);
1434 	elf->e_phnum = segs;
1435 
1436 	return;
1437 }
1438 
fill_elf_note_phdr(struct elf_phdr * phdr,int sz,loff_t offset)1439 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1440 {
1441 	phdr->p_type = PT_NOTE;
1442 	phdr->p_offset = offset;
1443 	phdr->p_vaddr = 0;
1444 	phdr->p_paddr = 0;
1445 	phdr->p_filesz = sz;
1446 	phdr->p_memsz = 0;
1447 	phdr->p_flags = 0;
1448 	phdr->p_align = 0;
1449 	return;
1450 }
1451 
fill_note(struct memelfnote * note,const char * name,int type,unsigned int sz,void * data)1452 static void fill_note(struct memelfnote *note, const char *name, int type,
1453 		unsigned int sz, void *data)
1454 {
1455 	note->name = name;
1456 	note->type = type;
1457 	note->datasz = sz;
1458 	note->data = data;
1459 	return;
1460 }
1461 
1462 /*
1463  * fill up all the fields in prstatus from the given task struct, except
1464  * registers which need to be filled up separately.
1465  */
fill_prstatus(struct elf_prstatus * prstatus,struct task_struct * p,long signr)1466 static void fill_prstatus(struct elf_prstatus *prstatus,
1467 		struct task_struct *p, long signr)
1468 {
1469 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1470 	prstatus->pr_sigpend = p->pending.signal.sig[0];
1471 	prstatus->pr_sighold = p->blocked.sig[0];
1472 	rcu_read_lock();
1473 	prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1474 	rcu_read_unlock();
1475 	prstatus->pr_pid = task_pid_vnr(p);
1476 	prstatus->pr_pgrp = task_pgrp_vnr(p);
1477 	prstatus->pr_sid = task_session_vnr(p);
1478 	if (thread_group_leader(p)) {
1479 		struct task_cputime cputime;
1480 
1481 		/*
1482 		 * This is the record for the group leader.  It shows the
1483 		 * group-wide total, not its individual thread total.
1484 		 */
1485 		thread_group_cputime(p, &cputime);
1486 		cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1487 		cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1488 	} else {
1489 		cputime_t utime, stime;
1490 
1491 		task_cputime(p, &utime, &stime);
1492 		cputime_to_timeval(utime, &prstatus->pr_utime);
1493 		cputime_to_timeval(stime, &prstatus->pr_stime);
1494 	}
1495 	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1496 	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1497 }
1498 
fill_psinfo(struct elf_prpsinfo * psinfo,struct task_struct * p,struct mm_struct * mm)1499 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1500 		       struct mm_struct *mm)
1501 {
1502 	const struct cred *cred;
1503 	unsigned int i, len;
1504 
1505 	/* first copy the parameters from user space */
1506 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1507 
1508 	len = mm->arg_end - mm->arg_start;
1509 	if (len >= ELF_PRARGSZ)
1510 		len = ELF_PRARGSZ-1;
1511 	if (copy_from_user(&psinfo->pr_psargs,
1512 		           (const char __user *)mm->arg_start, len))
1513 		return -EFAULT;
1514 	for(i = 0; i < len; i++)
1515 		if (psinfo->pr_psargs[i] == 0)
1516 			psinfo->pr_psargs[i] = ' ';
1517 	psinfo->pr_psargs[len] = 0;
1518 
1519 	rcu_read_lock();
1520 	psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1521 	rcu_read_unlock();
1522 	psinfo->pr_pid = task_pid_vnr(p);
1523 	psinfo->pr_pgrp = task_pgrp_vnr(p);
1524 	psinfo->pr_sid = task_session_vnr(p);
1525 
1526 	i = p->state ? ffz(~p->state) + 1 : 0;
1527 	psinfo->pr_state = i;
1528 	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1529 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1530 	psinfo->pr_nice = task_nice(p);
1531 	psinfo->pr_flag = p->flags;
1532 	rcu_read_lock();
1533 	cred = __task_cred(p);
1534 	SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1535 	SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1536 	rcu_read_unlock();
1537 	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1538 
1539 	return 0;
1540 }
1541 
fill_auxv_note(struct memelfnote * note,struct mm_struct * mm)1542 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1543 {
1544 	elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1545 	int i = 0;
1546 	do
1547 		i += 2;
1548 	while (auxv[i - 2] != AT_NULL);
1549 	fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1550 }
1551 
fill_siginfo_note(struct memelfnote * note,user_siginfo_t * csigdata,const siginfo_t * siginfo)1552 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1553 		const siginfo_t *siginfo)
1554 {
1555 	mm_segment_t old_fs = get_fs();
1556 	set_fs(KERNEL_DS);
1557 	copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1558 	set_fs(old_fs);
1559 	fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1560 }
1561 
1562 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1563 /*
1564  * Format of NT_FILE note:
1565  *
1566  * long count     -- how many files are mapped
1567  * long page_size -- units for file_ofs
1568  * array of [COUNT] elements of
1569  *   long start
1570  *   long end
1571  *   long file_ofs
1572  * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1573  */
fill_files_note(struct memelfnote * note)1574 static int fill_files_note(struct memelfnote *note)
1575 {
1576 	struct vm_area_struct *vma;
1577 	unsigned count, size, names_ofs, remaining, n;
1578 	user_long_t *data;
1579 	user_long_t *start_end_ofs;
1580 	char *name_base, *name_curpos;
1581 
1582 	/* *Estimated* file count and total data size needed */
1583 	count = current->mm->map_count;
1584 	size = count * 64;
1585 
1586 	names_ofs = (2 + 3 * count) * sizeof(data[0]);
1587  alloc:
1588 	if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1589 		return -EINVAL;
1590 	size = round_up(size, PAGE_SIZE);
1591 	data = vmalloc(size);
1592 	if (!data)
1593 		return -ENOMEM;
1594 
1595 	start_end_ofs = data + 2;
1596 	name_base = name_curpos = ((char *)data) + names_ofs;
1597 	remaining = size - names_ofs;
1598 	count = 0;
1599 	for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1600 		struct file *file;
1601 		const char *filename;
1602 
1603 		file = vma->vm_file;
1604 		if (!file)
1605 			continue;
1606 		filename = file_path(file, name_curpos, remaining);
1607 		if (IS_ERR(filename)) {
1608 			if (PTR_ERR(filename) == -ENAMETOOLONG) {
1609 				vfree(data);
1610 				size = size * 5 / 4;
1611 				goto alloc;
1612 			}
1613 			continue;
1614 		}
1615 
1616 		/* file_path() fills at the end, move name down */
1617 		/* n = strlen(filename) + 1: */
1618 		n = (name_curpos + remaining) - filename;
1619 		remaining = filename - name_curpos;
1620 		memmove(name_curpos, filename, n);
1621 		name_curpos += n;
1622 
1623 		*start_end_ofs++ = vma->vm_start;
1624 		*start_end_ofs++ = vma->vm_end;
1625 		*start_end_ofs++ = vma->vm_pgoff;
1626 		count++;
1627 	}
1628 
1629 	/* Now we know exact count of files, can store it */
1630 	data[0] = count;
1631 	data[1] = PAGE_SIZE;
1632 	/*
1633 	 * Count usually is less than current->mm->map_count,
1634 	 * we need to move filenames down.
1635 	 */
1636 	n = current->mm->map_count - count;
1637 	if (n != 0) {
1638 		unsigned shift_bytes = n * 3 * sizeof(data[0]);
1639 		memmove(name_base - shift_bytes, name_base,
1640 			name_curpos - name_base);
1641 		name_curpos -= shift_bytes;
1642 	}
1643 
1644 	size = name_curpos - (char *)data;
1645 	fill_note(note, "CORE", NT_FILE, size, data);
1646 	return 0;
1647 }
1648 
1649 #ifdef CORE_DUMP_USE_REGSET
1650 #include <linux/regset.h>
1651 
1652 struct elf_thread_core_info {
1653 	struct elf_thread_core_info *next;
1654 	struct task_struct *task;
1655 	struct elf_prstatus prstatus;
1656 	struct memelfnote notes[0];
1657 };
1658 
1659 struct elf_note_info {
1660 	struct elf_thread_core_info *thread;
1661 	struct memelfnote psinfo;
1662 	struct memelfnote signote;
1663 	struct memelfnote auxv;
1664 	struct memelfnote files;
1665 	user_siginfo_t csigdata;
1666 	size_t size;
1667 	int thread_notes;
1668 };
1669 
1670 /*
1671  * When a regset has a writeback hook, we call it on each thread before
1672  * dumping user memory.  On register window machines, this makes sure the
1673  * user memory backing the register data is up to date before we read it.
1674  */
do_thread_regset_writeback(struct task_struct * task,const struct user_regset * regset)1675 static void do_thread_regset_writeback(struct task_struct *task,
1676 				       const struct user_regset *regset)
1677 {
1678 	if (regset->writeback)
1679 		regset->writeback(task, regset, 1);
1680 }
1681 
1682 #ifndef PR_REG_SIZE
1683 #define PR_REG_SIZE(S) sizeof(S)
1684 #endif
1685 
1686 #ifndef PRSTATUS_SIZE
1687 #define PRSTATUS_SIZE(S) sizeof(S)
1688 #endif
1689 
1690 #ifndef PR_REG_PTR
1691 #define PR_REG_PTR(S) (&((S)->pr_reg))
1692 #endif
1693 
1694 #ifndef SET_PR_FPVALID
1695 #define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1696 #endif
1697 
fill_thread_core_info(struct elf_thread_core_info * t,const struct user_regset_view * view,long signr,size_t * total)1698 static int fill_thread_core_info(struct elf_thread_core_info *t,
1699 				 const struct user_regset_view *view,
1700 				 long signr, size_t *total)
1701 {
1702 	unsigned int i;
1703 
1704 	/*
1705 	 * NT_PRSTATUS is the one special case, because the regset data
1706 	 * goes into the pr_reg field inside the note contents, rather
1707 	 * than being the whole note contents.  We fill the reset in here.
1708 	 * We assume that regset 0 is NT_PRSTATUS.
1709 	 */
1710 	fill_prstatus(&t->prstatus, t->task, signr);
1711 	(void) view->regsets[0].get(t->task, &view->regsets[0],
1712 				    0, PR_REG_SIZE(t->prstatus.pr_reg),
1713 				    PR_REG_PTR(&t->prstatus), NULL);
1714 
1715 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1716 		  PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1717 	*total += notesize(&t->notes[0]);
1718 
1719 	do_thread_regset_writeback(t->task, &view->regsets[0]);
1720 
1721 	/*
1722 	 * Each other regset might generate a note too.  For each regset
1723 	 * that has no core_note_type or is inactive, we leave t->notes[i]
1724 	 * all zero and we'll know to skip writing it later.
1725 	 */
1726 	for (i = 1; i < view->n; ++i) {
1727 		const struct user_regset *regset = &view->regsets[i];
1728 		do_thread_regset_writeback(t->task, regset);
1729 		if (regset->core_note_type && regset->get &&
1730 		    (!regset->active || regset->active(t->task, regset) > 0)) {
1731 			int ret;
1732 			size_t size = regset->n * regset->size;
1733 			void *data = kzalloc(size, GFP_KERNEL);
1734 			if (unlikely(!data))
1735 				return 0;
1736 			ret = regset->get(t->task, regset,
1737 					  0, size, data, NULL);
1738 			if (unlikely(ret))
1739 				kfree(data);
1740 			else {
1741 				if (regset->core_note_type != NT_PRFPREG)
1742 					fill_note(&t->notes[i], "LINUX",
1743 						  regset->core_note_type,
1744 						  size, data);
1745 				else {
1746 					SET_PR_FPVALID(&t->prstatus, 1);
1747 					fill_note(&t->notes[i], "CORE",
1748 						  NT_PRFPREG, size, data);
1749 				}
1750 				*total += notesize(&t->notes[i]);
1751 			}
1752 		}
1753 	}
1754 
1755 	return 1;
1756 }
1757 
fill_note_info(struct elfhdr * elf,int phdrs,struct elf_note_info * info,const siginfo_t * siginfo,struct pt_regs * regs)1758 static int fill_note_info(struct elfhdr *elf, int phdrs,
1759 			  struct elf_note_info *info,
1760 			  const siginfo_t *siginfo, struct pt_regs *regs)
1761 {
1762 	struct task_struct *dump_task = current;
1763 	const struct user_regset_view *view = task_user_regset_view(dump_task);
1764 	struct elf_thread_core_info *t;
1765 	struct elf_prpsinfo *psinfo;
1766 	struct core_thread *ct;
1767 	unsigned int i;
1768 
1769 	info->size = 0;
1770 	info->thread = NULL;
1771 
1772 	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1773 	if (psinfo == NULL) {
1774 		info->psinfo.data = NULL; /* So we don't free this wrongly */
1775 		return 0;
1776 	}
1777 
1778 	fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1779 
1780 	/*
1781 	 * Figure out how many notes we're going to need for each thread.
1782 	 */
1783 	info->thread_notes = 0;
1784 	for (i = 0; i < view->n; ++i)
1785 		if (view->regsets[i].core_note_type != 0)
1786 			++info->thread_notes;
1787 
1788 	/*
1789 	 * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1790 	 * since it is our one special case.
1791 	 */
1792 	if (unlikely(info->thread_notes == 0) ||
1793 	    unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1794 		WARN_ON(1);
1795 		return 0;
1796 	}
1797 
1798 	/*
1799 	 * Initialize the ELF file header.
1800 	 */
1801 	fill_elf_header(elf, phdrs,
1802 			view->e_machine, view->e_flags);
1803 
1804 	/*
1805 	 * Allocate a structure for each thread.
1806 	 */
1807 	for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1808 		t = kzalloc(offsetof(struct elf_thread_core_info,
1809 				     notes[info->thread_notes]),
1810 			    GFP_KERNEL);
1811 		if (unlikely(!t))
1812 			return 0;
1813 
1814 		t->task = ct->task;
1815 		if (ct->task == dump_task || !info->thread) {
1816 			t->next = info->thread;
1817 			info->thread = t;
1818 		} else {
1819 			/*
1820 			 * Make sure to keep the original task at
1821 			 * the head of the list.
1822 			 */
1823 			t->next = info->thread->next;
1824 			info->thread->next = t;
1825 		}
1826 	}
1827 
1828 	/*
1829 	 * Now fill in each thread's information.
1830 	 */
1831 	for (t = info->thread; t != NULL; t = t->next)
1832 		if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1833 			return 0;
1834 
1835 	/*
1836 	 * Fill in the two process-wide notes.
1837 	 */
1838 	fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1839 	info->size += notesize(&info->psinfo);
1840 
1841 	fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1842 	info->size += notesize(&info->signote);
1843 
1844 	fill_auxv_note(&info->auxv, current->mm);
1845 	info->size += notesize(&info->auxv);
1846 
1847 	if (fill_files_note(&info->files) == 0)
1848 		info->size += notesize(&info->files);
1849 
1850 	return 1;
1851 }
1852 
get_note_info_size(struct elf_note_info * info)1853 static size_t get_note_info_size(struct elf_note_info *info)
1854 {
1855 	return info->size;
1856 }
1857 
1858 /*
1859  * Write all the notes for each thread.  When writing the first thread, the
1860  * process-wide notes are interleaved after the first thread-specific note.
1861  */
write_note_info(struct elf_note_info * info,struct coredump_params * cprm)1862 static int write_note_info(struct elf_note_info *info,
1863 			   struct coredump_params *cprm)
1864 {
1865 	bool first = true;
1866 	struct elf_thread_core_info *t = info->thread;
1867 
1868 	do {
1869 		int i;
1870 
1871 		if (!writenote(&t->notes[0], cprm))
1872 			return 0;
1873 
1874 		if (first && !writenote(&info->psinfo, cprm))
1875 			return 0;
1876 		if (first && !writenote(&info->signote, cprm))
1877 			return 0;
1878 		if (first && !writenote(&info->auxv, cprm))
1879 			return 0;
1880 		if (first && info->files.data &&
1881 				!writenote(&info->files, cprm))
1882 			return 0;
1883 
1884 		for (i = 1; i < info->thread_notes; ++i)
1885 			if (t->notes[i].data &&
1886 			    !writenote(&t->notes[i], cprm))
1887 				return 0;
1888 
1889 		first = false;
1890 		t = t->next;
1891 	} while (t);
1892 
1893 	return 1;
1894 }
1895 
free_note_info(struct elf_note_info * info)1896 static void free_note_info(struct elf_note_info *info)
1897 {
1898 	struct elf_thread_core_info *threads = info->thread;
1899 	while (threads) {
1900 		unsigned int i;
1901 		struct elf_thread_core_info *t = threads;
1902 		threads = t->next;
1903 		WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1904 		for (i = 1; i < info->thread_notes; ++i)
1905 			kfree(t->notes[i].data);
1906 		kfree(t);
1907 	}
1908 	kfree(info->psinfo.data);
1909 	vfree(info->files.data);
1910 }
1911 
1912 #else
1913 
1914 /* Here is the structure in which status of each thread is captured. */
1915 struct elf_thread_status
1916 {
1917 	struct list_head list;
1918 	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1919 	elf_fpregset_t fpu;		/* NT_PRFPREG */
1920 	struct task_struct *thread;
1921 #ifdef ELF_CORE_COPY_XFPREGS
1922 	elf_fpxregset_t xfpu;		/* ELF_CORE_XFPREG_TYPE */
1923 #endif
1924 	struct memelfnote notes[3];
1925 	int num_notes;
1926 };
1927 
1928 /*
1929  * In order to add the specific thread information for the elf file format,
1930  * we need to keep a linked list of every threads pr_status and then create
1931  * a single section for them in the final core file.
1932  */
elf_dump_thread_status(long signr,struct elf_thread_status * t)1933 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1934 {
1935 	int sz = 0;
1936 	struct task_struct *p = t->thread;
1937 	t->num_notes = 0;
1938 
1939 	fill_prstatus(&t->prstatus, p, signr);
1940 	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1941 
1942 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1943 		  &(t->prstatus));
1944 	t->num_notes++;
1945 	sz += notesize(&t->notes[0]);
1946 
1947 	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1948 								&t->fpu))) {
1949 		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1950 			  &(t->fpu));
1951 		t->num_notes++;
1952 		sz += notesize(&t->notes[1]);
1953 	}
1954 
1955 #ifdef ELF_CORE_COPY_XFPREGS
1956 	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1957 		fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1958 			  sizeof(t->xfpu), &t->xfpu);
1959 		t->num_notes++;
1960 		sz += notesize(&t->notes[2]);
1961 	}
1962 #endif
1963 	return sz;
1964 }
1965 
1966 struct elf_note_info {
1967 	struct memelfnote *notes;
1968 	struct memelfnote *notes_files;
1969 	struct elf_prstatus *prstatus;	/* NT_PRSTATUS */
1970 	struct elf_prpsinfo *psinfo;	/* NT_PRPSINFO */
1971 	struct list_head thread_list;
1972 	elf_fpregset_t *fpu;
1973 #ifdef ELF_CORE_COPY_XFPREGS
1974 	elf_fpxregset_t *xfpu;
1975 #endif
1976 	user_siginfo_t csigdata;
1977 	int thread_status_size;
1978 	int numnote;
1979 };
1980 
elf_note_info_init(struct elf_note_info * info)1981 static int elf_note_info_init(struct elf_note_info *info)
1982 {
1983 	memset(info, 0, sizeof(*info));
1984 	INIT_LIST_HEAD(&info->thread_list);
1985 
1986 	/* Allocate space for ELF notes */
1987 	info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1988 	if (!info->notes)
1989 		return 0;
1990 	info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1991 	if (!info->psinfo)
1992 		return 0;
1993 	info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1994 	if (!info->prstatus)
1995 		return 0;
1996 	info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1997 	if (!info->fpu)
1998 		return 0;
1999 #ifdef ELF_CORE_COPY_XFPREGS
2000 	info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
2001 	if (!info->xfpu)
2002 		return 0;
2003 #endif
2004 	return 1;
2005 }
2006 
fill_note_info(struct elfhdr * elf,int phdrs,struct elf_note_info * info,const siginfo_t * siginfo,struct pt_regs * regs)2007 static int fill_note_info(struct elfhdr *elf, int phdrs,
2008 			  struct elf_note_info *info,
2009 			  const siginfo_t *siginfo, struct pt_regs *regs)
2010 {
2011 	struct list_head *t;
2012 	struct core_thread *ct;
2013 	struct elf_thread_status *ets;
2014 
2015 	if (!elf_note_info_init(info))
2016 		return 0;
2017 
2018 	for (ct = current->mm->core_state->dumper.next;
2019 					ct; ct = ct->next) {
2020 		ets = kzalloc(sizeof(*ets), GFP_KERNEL);
2021 		if (!ets)
2022 			return 0;
2023 
2024 		ets->thread = ct->task;
2025 		list_add(&ets->list, &info->thread_list);
2026 	}
2027 
2028 	list_for_each(t, &info->thread_list) {
2029 		int sz;
2030 
2031 		ets = list_entry(t, struct elf_thread_status, list);
2032 		sz = elf_dump_thread_status(siginfo->si_signo, ets);
2033 		info->thread_status_size += sz;
2034 	}
2035 	/* now collect the dump for the current */
2036 	memset(info->prstatus, 0, sizeof(*info->prstatus));
2037 	fill_prstatus(info->prstatus, current, siginfo->si_signo);
2038 	elf_core_copy_regs(&info->prstatus->pr_reg, regs);
2039 
2040 	/* Set up header */
2041 	fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
2042 
2043 	/*
2044 	 * Set up the notes in similar form to SVR4 core dumps made
2045 	 * with info from their /proc.
2046 	 */
2047 
2048 	fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
2049 		  sizeof(*info->prstatus), info->prstatus);
2050 	fill_psinfo(info->psinfo, current->group_leader, current->mm);
2051 	fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
2052 		  sizeof(*info->psinfo), info->psinfo);
2053 
2054 	fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
2055 	fill_auxv_note(info->notes + 3, current->mm);
2056 	info->numnote = 4;
2057 
2058 	if (fill_files_note(info->notes + info->numnote) == 0) {
2059 		info->notes_files = info->notes + info->numnote;
2060 		info->numnote++;
2061 	}
2062 
2063 	/* Try to dump the FPU. */
2064 	info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
2065 							       info->fpu);
2066 	if (info->prstatus->pr_fpvalid)
2067 		fill_note(info->notes + info->numnote++,
2068 			  "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
2069 #ifdef ELF_CORE_COPY_XFPREGS
2070 	if (elf_core_copy_task_xfpregs(current, info->xfpu))
2071 		fill_note(info->notes + info->numnote++,
2072 			  "LINUX", ELF_CORE_XFPREG_TYPE,
2073 			  sizeof(*info->xfpu), info->xfpu);
2074 #endif
2075 
2076 	return 1;
2077 }
2078 
get_note_info_size(struct elf_note_info * info)2079 static size_t get_note_info_size(struct elf_note_info *info)
2080 {
2081 	int sz = 0;
2082 	int i;
2083 
2084 	for (i = 0; i < info->numnote; i++)
2085 		sz += notesize(info->notes + i);
2086 
2087 	sz += info->thread_status_size;
2088 
2089 	return sz;
2090 }
2091 
write_note_info(struct elf_note_info * info,struct coredump_params * cprm)2092 static int write_note_info(struct elf_note_info *info,
2093 			   struct coredump_params *cprm)
2094 {
2095 	int i;
2096 	struct list_head *t;
2097 
2098 	for (i = 0; i < info->numnote; i++)
2099 		if (!writenote(info->notes + i, cprm))
2100 			return 0;
2101 
2102 	/* write out the thread status notes section */
2103 	list_for_each(t, &info->thread_list) {
2104 		struct elf_thread_status *tmp =
2105 				list_entry(t, struct elf_thread_status, list);
2106 
2107 		for (i = 0; i < tmp->num_notes; i++)
2108 			if (!writenote(&tmp->notes[i], cprm))
2109 				return 0;
2110 	}
2111 
2112 	return 1;
2113 }
2114 
free_note_info(struct elf_note_info * info)2115 static void free_note_info(struct elf_note_info *info)
2116 {
2117 	while (!list_empty(&info->thread_list)) {
2118 		struct list_head *tmp = info->thread_list.next;
2119 		list_del(tmp);
2120 		kfree(list_entry(tmp, struct elf_thread_status, list));
2121 	}
2122 
2123 	/* Free data possibly allocated by fill_files_note(): */
2124 	if (info->notes_files)
2125 		vfree(info->notes_files->data);
2126 
2127 	kfree(info->prstatus);
2128 	kfree(info->psinfo);
2129 	kfree(info->notes);
2130 	kfree(info->fpu);
2131 #ifdef ELF_CORE_COPY_XFPREGS
2132 	kfree(info->xfpu);
2133 #endif
2134 }
2135 
2136 #endif
2137 
first_vma(struct task_struct * tsk,struct vm_area_struct * gate_vma)2138 static struct vm_area_struct *first_vma(struct task_struct *tsk,
2139 					struct vm_area_struct *gate_vma)
2140 {
2141 	struct vm_area_struct *ret = tsk->mm->mmap;
2142 
2143 	if (ret)
2144 		return ret;
2145 	return gate_vma;
2146 }
2147 /*
2148  * Helper function for iterating across a vma list.  It ensures that the caller
2149  * will visit `gate_vma' prior to terminating the search.
2150  */
next_vma(struct vm_area_struct * this_vma,struct vm_area_struct * gate_vma)2151 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2152 					struct vm_area_struct *gate_vma)
2153 {
2154 	struct vm_area_struct *ret;
2155 
2156 	ret = this_vma->vm_next;
2157 	if (ret)
2158 		return ret;
2159 	if (this_vma == gate_vma)
2160 		return NULL;
2161 	return gate_vma;
2162 }
2163 
fill_extnum_info(struct elfhdr * elf,struct elf_shdr * shdr4extnum,elf_addr_t e_shoff,int segs)2164 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2165 			     elf_addr_t e_shoff, int segs)
2166 {
2167 	elf->e_shoff = e_shoff;
2168 	elf->e_shentsize = sizeof(*shdr4extnum);
2169 	elf->e_shnum = 1;
2170 	elf->e_shstrndx = SHN_UNDEF;
2171 
2172 	memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2173 
2174 	shdr4extnum->sh_type = SHT_NULL;
2175 	shdr4extnum->sh_size = elf->e_shnum;
2176 	shdr4extnum->sh_link = elf->e_shstrndx;
2177 	shdr4extnum->sh_info = segs;
2178 }
2179 
2180 /*
2181  * Actual dumper
2182  *
2183  * This is a two-pass process; first we find the offsets of the bits,
2184  * and then they are actually written out.  If we run out of core limit
2185  * we just truncate.
2186  */
elf_core_dump(struct coredump_params * cprm)2187 static int elf_core_dump(struct coredump_params *cprm)
2188 {
2189 	int has_dumped = 0;
2190 	mm_segment_t fs;
2191 	int segs, i;
2192 	size_t vma_data_size = 0;
2193 	struct vm_area_struct *vma, *gate_vma;
2194 	struct elfhdr *elf = NULL;
2195 	loff_t offset = 0, dataoff;
2196 	struct elf_note_info info = { };
2197 	struct elf_phdr *phdr4note = NULL;
2198 	struct elf_shdr *shdr4extnum = NULL;
2199 	Elf_Half e_phnum;
2200 	elf_addr_t e_shoff;
2201 	elf_addr_t *vma_filesz = NULL;
2202 
2203 	/*
2204 	 * We no longer stop all VM operations.
2205 	 *
2206 	 * This is because those proceses that could possibly change map_count
2207 	 * or the mmap / vma pages are now blocked in do_exit on current
2208 	 * finishing this core dump.
2209 	 *
2210 	 * Only ptrace can touch these memory addresses, but it doesn't change
2211 	 * the map_count or the pages allocated. So no possibility of crashing
2212 	 * exists while dumping the mm->vm_next areas to the core file.
2213 	 */
2214 
2215 	/* alloc memory for large data structures: too large to be on stack */
2216 	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2217 	if (!elf)
2218 		goto out;
2219 	/*
2220 	 * The number of segs are recored into ELF header as 16bit value.
2221 	 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2222 	 */
2223 	segs = current->mm->map_count;
2224 	segs += elf_core_extra_phdrs();
2225 
2226 	gate_vma = get_gate_vma(current->mm);
2227 	if (gate_vma != NULL)
2228 		segs++;
2229 
2230 	/* for notes section */
2231 	segs++;
2232 
2233 	/* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2234 	 * this, kernel supports extended numbering. Have a look at
2235 	 * include/linux/elf.h for further information. */
2236 	e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2237 
2238 	/*
2239 	 * Collect all the non-memory information about the process for the
2240 	 * notes.  This also sets up the file header.
2241 	 */
2242 	if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2243 		goto cleanup;
2244 
2245 	has_dumped = 1;
2246 
2247 	fs = get_fs();
2248 	set_fs(KERNEL_DS);
2249 
2250 	offset += sizeof(*elf);				/* Elf header */
2251 	offset += segs * sizeof(struct elf_phdr);	/* Program headers */
2252 
2253 	/* Write notes phdr entry */
2254 	{
2255 		size_t sz = get_note_info_size(&info);
2256 
2257 		sz += elf_coredump_extra_notes_size();
2258 
2259 		phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2260 		if (!phdr4note)
2261 			goto end_coredump;
2262 
2263 		fill_elf_note_phdr(phdr4note, sz, offset);
2264 		offset += sz;
2265 	}
2266 
2267 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2268 
2269 	vma_filesz = kmalloc_array(segs - 1, sizeof(*vma_filesz), GFP_KERNEL);
2270 	if (!vma_filesz)
2271 		goto end_coredump;
2272 
2273 	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2274 			vma = next_vma(vma, gate_vma)) {
2275 		unsigned long dump_size;
2276 
2277 		dump_size = vma_dump_size(vma, cprm->mm_flags);
2278 		vma_filesz[i++] = dump_size;
2279 		vma_data_size += dump_size;
2280 	}
2281 
2282 	offset += vma_data_size;
2283 	offset += elf_core_extra_data_size();
2284 	e_shoff = offset;
2285 
2286 	if (e_phnum == PN_XNUM) {
2287 		shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2288 		if (!shdr4extnum)
2289 			goto end_coredump;
2290 		fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2291 	}
2292 
2293 	offset = dataoff;
2294 
2295 	if (!dump_emit(cprm, elf, sizeof(*elf)))
2296 		goto end_coredump;
2297 
2298 	if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2299 		goto end_coredump;
2300 
2301 	/* Write program headers for segments dump */
2302 	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2303 			vma = next_vma(vma, gate_vma)) {
2304 		struct elf_phdr phdr;
2305 
2306 		phdr.p_type = PT_LOAD;
2307 		phdr.p_offset = offset;
2308 		phdr.p_vaddr = vma->vm_start;
2309 		phdr.p_paddr = 0;
2310 		phdr.p_filesz = vma_filesz[i++];
2311 		phdr.p_memsz = vma->vm_end - vma->vm_start;
2312 		offset += phdr.p_filesz;
2313 		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2314 		if (vma->vm_flags & VM_WRITE)
2315 			phdr.p_flags |= PF_W;
2316 		if (vma->vm_flags & VM_EXEC)
2317 			phdr.p_flags |= PF_X;
2318 		phdr.p_align = ELF_EXEC_PAGESIZE;
2319 
2320 		if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2321 			goto end_coredump;
2322 	}
2323 
2324 	if (!elf_core_write_extra_phdrs(cprm, offset))
2325 		goto end_coredump;
2326 
2327  	/* write out the notes section */
2328 	if (!write_note_info(&info, cprm))
2329 		goto end_coredump;
2330 
2331 	if (elf_coredump_extra_notes_write(cprm))
2332 		goto end_coredump;
2333 
2334 	/* Align to page */
2335 	if (!dump_skip(cprm, dataoff - cprm->written))
2336 		goto end_coredump;
2337 
2338 	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2339 			vma = next_vma(vma, gate_vma)) {
2340 		unsigned long addr;
2341 		unsigned long end;
2342 
2343 		end = vma->vm_start + vma_filesz[i++];
2344 
2345 		for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2346 			struct page *page;
2347 			int stop;
2348 
2349 			page = get_dump_page(addr);
2350 			if (page) {
2351 				void *kaddr = kmap(page);
2352 				stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2353 				kunmap(page);
2354 				page_cache_release(page);
2355 			} else
2356 				stop = !dump_skip(cprm, PAGE_SIZE);
2357 			if (stop)
2358 				goto end_coredump;
2359 		}
2360 	}
2361 	dump_truncate(cprm);
2362 
2363 	if (!elf_core_write_extra_data(cprm))
2364 		goto end_coredump;
2365 
2366 	if (e_phnum == PN_XNUM) {
2367 		if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2368 			goto end_coredump;
2369 	}
2370 
2371 end_coredump:
2372 	set_fs(fs);
2373 
2374 cleanup:
2375 	free_note_info(&info);
2376 	kfree(shdr4extnum);
2377 	kfree(vma_filesz);
2378 	kfree(phdr4note);
2379 	kfree(elf);
2380 out:
2381 	return has_dumped;
2382 }
2383 
2384 #endif		/* CONFIG_ELF_CORE */
2385 
init_elf_binfmt(void)2386 static int __init init_elf_binfmt(void)
2387 {
2388 	register_binfmt(&elf_format);
2389 	return 0;
2390 }
2391 
exit_elf_binfmt(void)2392 static void __exit exit_elf_binfmt(void)
2393 {
2394 	/* Remove the COFF and ELF loaders. */
2395 	unregister_binfmt(&elf_format);
2396 }
2397 
2398 core_initcall(init_elf_binfmt);
2399 module_exit(exit_elf_binfmt);
2400 MODULE_LICENSE("GPL");
2401