• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * kexec.c - kexec system call core code.
3  * Copyright (C) 2002-2004 Eric Biederman  <ebiederm@xmission.com>
4  *
5  * This source code is licensed under the GNU General Public License,
6  * Version 2.  See the file COPYING for more details.
7  */
8 
9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10 
11 #include <linux/capability.h>
12 #include <linux/mm.h>
13 #include <linux/file.h>
14 #include <linux/slab.h>
15 #include <linux/fs.h>
16 #include <linux/kexec.h>
17 #include <linux/mutex.h>
18 #include <linux/list.h>
19 #include <linux/highmem.h>
20 #include <linux/syscalls.h>
21 #include <linux/reboot.h>
22 #include <linux/ioport.h>
23 #include <linux/hardirq.h>
24 #include <linux/elf.h>
25 #include <linux/elfcore.h>
26 #include <linux/utsname.h>
27 #include <linux/numa.h>
28 #include <linux/suspend.h>
29 #include <linux/device.h>
30 #include <linux/freezer.h>
31 #include <linux/pm.h>
32 #include <linux/cpu.h>
33 #include <linux/uaccess.h>
34 #include <linux/io.h>
35 #include <linux/console.h>
36 #include <linux/vmalloc.h>
37 #include <linux/swap.h>
38 #include <linux/syscore_ops.h>
39 #include <linux/compiler.h>
40 #include <linux/hugetlb.h>
41 
42 #include <asm/page.h>
43 #include <asm/sections.h>
44 
45 #include <crypto/hash.h>
46 #include <crypto/sha.h>
47 #include "kexec_internal.h"
48 
49 DEFINE_MUTEX(kexec_mutex);
50 
51 /* Per cpu memory for storing cpu states in case of system crash. */
52 note_buf_t __percpu *crash_notes;
53 
54 /* vmcoreinfo stuff */
55 static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
56 u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
57 size_t vmcoreinfo_size;
58 size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
59 
60 /* Flag to indicate we are going to kexec a new kernel */
61 bool kexec_in_progress = false;
62 
63 
64 /* Location of the reserved area for the crash kernel */
65 struct resource crashk_res = {
66 	.name  = "Crash kernel",
67 	.start = 0,
68 	.end   = 0,
69 	.flags = IORESOURCE_BUSY | IORESOURCE_MEM
70 };
71 struct resource crashk_low_res = {
72 	.name  = "Crash kernel",
73 	.start = 0,
74 	.end   = 0,
75 	.flags = IORESOURCE_BUSY | IORESOURCE_MEM
76 };
77 
kexec_should_crash(struct task_struct * p)78 int kexec_should_crash(struct task_struct *p)
79 {
80 	/*
81 	 * If crash_kexec_post_notifiers is enabled, don't run
82 	 * crash_kexec() here yet, which must be run after panic
83 	 * notifiers in panic().
84 	 */
85 	if (crash_kexec_post_notifiers)
86 		return 0;
87 	/*
88 	 * There are 4 panic() calls in do_exit() path, each of which
89 	 * corresponds to each of these 4 conditions.
90 	 */
91 	if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops)
92 		return 1;
93 	return 0;
94 }
95 
96 /*
97  * When kexec transitions to the new kernel there is a one-to-one
98  * mapping between physical and virtual addresses.  On processors
99  * where you can disable the MMU this is trivial, and easy.  For
100  * others it is still a simple predictable page table to setup.
101  *
102  * In that environment kexec copies the new kernel to its final
103  * resting place.  This means I can only support memory whose
104  * physical address can fit in an unsigned long.  In particular
105  * addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled.
106  * If the assembly stub has more restrictive requirements
107  * KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be
108  * defined more restrictively in <asm/kexec.h>.
109  *
110  * The code for the transition from the current kernel to the
111  * the new kernel is placed in the control_code_buffer, whose size
112  * is given by KEXEC_CONTROL_PAGE_SIZE.  In the best case only a single
113  * page of memory is necessary, but some architectures require more.
114  * Because this memory must be identity mapped in the transition from
115  * virtual to physical addresses it must live in the range
116  * 0 - TASK_SIZE, as only the user space mappings are arbitrarily
117  * modifiable.
118  *
119  * The assembly stub in the control code buffer is passed a linked list
120  * of descriptor pages detailing the source pages of the new kernel,
121  * and the destination addresses of those source pages.  As this data
122  * structure is not used in the context of the current OS, it must
123  * be self-contained.
124  *
125  * The code has been made to work with highmem pages and will use a
126  * destination page in its final resting place (if it happens
127  * to allocate it).  The end product of this is that most of the
128  * physical address space, and most of RAM can be used.
129  *
130  * Future directions include:
131  *  - allocating a page table with the control code buffer identity
132  *    mapped, to simplify machine_kexec and make kexec_on_panic more
133  *    reliable.
134  */
135 
136 /*
137  * KIMAGE_NO_DEST is an impossible destination address..., for
138  * allocating pages whose destination address we do not care about.
139  */
140 #define KIMAGE_NO_DEST (-1UL)
141 
142 static struct page *kimage_alloc_page(struct kimage *image,
143 				       gfp_t gfp_mask,
144 				       unsigned long dest);
145 
sanity_check_segment_list(struct kimage * image)146 int sanity_check_segment_list(struct kimage *image)
147 {
148 	int result, i;
149 	unsigned long nr_segments = image->nr_segments;
150 
151 	/*
152 	 * Verify we have good destination addresses.  The caller is
153 	 * responsible for making certain we don't attempt to load
154 	 * the new image into invalid or reserved areas of RAM.  This
155 	 * just verifies it is an address we can use.
156 	 *
157 	 * Since the kernel does everything in page size chunks ensure
158 	 * the destination addresses are page aligned.  Too many
159 	 * special cases crop of when we don't do this.  The most
160 	 * insidious is getting overlapping destination addresses
161 	 * simply because addresses are changed to page size
162 	 * granularity.
163 	 */
164 	result = -EADDRNOTAVAIL;
165 	for (i = 0; i < nr_segments; i++) {
166 		unsigned long mstart, mend;
167 
168 		mstart = image->segment[i].mem;
169 		mend   = mstart + image->segment[i].memsz;
170 		if ((mstart & ~PAGE_MASK) || (mend & ~PAGE_MASK))
171 			return result;
172 		if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT)
173 			return result;
174 	}
175 
176 	/* Verify our destination addresses do not overlap.
177 	 * If we alloed overlapping destination addresses
178 	 * through very weird things can happen with no
179 	 * easy explanation as one segment stops on another.
180 	 */
181 	result = -EINVAL;
182 	for (i = 0; i < nr_segments; i++) {
183 		unsigned long mstart, mend;
184 		unsigned long j;
185 
186 		mstart = image->segment[i].mem;
187 		mend   = mstart + image->segment[i].memsz;
188 		for (j = 0; j < i; j++) {
189 			unsigned long pstart, pend;
190 
191 			pstart = image->segment[j].mem;
192 			pend   = pstart + image->segment[j].memsz;
193 			/* Do the segments overlap ? */
194 			if ((mend > pstart) && (mstart < pend))
195 				return result;
196 		}
197 	}
198 
199 	/* Ensure our buffer sizes are strictly less than
200 	 * our memory sizes.  This should always be the case,
201 	 * and it is easier to check up front than to be surprised
202 	 * later on.
203 	 */
204 	result = -EINVAL;
205 	for (i = 0; i < nr_segments; i++) {
206 		if (image->segment[i].bufsz > image->segment[i].memsz)
207 			return result;
208 	}
209 
210 	/*
211 	 * Verify we have good destination addresses.  Normally
212 	 * the caller is responsible for making certain we don't
213 	 * attempt to load the new image into invalid or reserved
214 	 * areas of RAM.  But crash kernels are preloaded into a
215 	 * reserved area of ram.  We must ensure the addresses
216 	 * are in the reserved area otherwise preloading the
217 	 * kernel could corrupt things.
218 	 */
219 
220 	if (image->type == KEXEC_TYPE_CRASH) {
221 		result = -EADDRNOTAVAIL;
222 		for (i = 0; i < nr_segments; i++) {
223 			unsigned long mstart, mend;
224 
225 			mstart = image->segment[i].mem;
226 			mend = mstart + image->segment[i].memsz - 1;
227 			/* Ensure we are within the crash kernel limits */
228 			if ((mstart < crashk_res.start) ||
229 			    (mend > crashk_res.end))
230 				return result;
231 		}
232 	}
233 
234 	return 0;
235 }
236 
do_kimage_alloc_init(void)237 struct kimage *do_kimage_alloc_init(void)
238 {
239 	struct kimage *image;
240 
241 	/* Allocate a controlling structure */
242 	image = kzalloc(sizeof(*image), GFP_KERNEL);
243 	if (!image)
244 		return NULL;
245 
246 	image->head = 0;
247 	image->entry = &image->head;
248 	image->last_entry = &image->head;
249 	image->control_page = ~0; /* By default this does not apply */
250 	image->type = KEXEC_TYPE_DEFAULT;
251 
252 	/* Initialize the list of control pages */
253 	INIT_LIST_HEAD(&image->control_pages);
254 
255 	/* Initialize the list of destination pages */
256 	INIT_LIST_HEAD(&image->dest_pages);
257 
258 	/* Initialize the list of unusable pages */
259 	INIT_LIST_HEAD(&image->unusable_pages);
260 
261 	return image;
262 }
263 
kimage_is_destination_range(struct kimage * image,unsigned long start,unsigned long end)264 int kimage_is_destination_range(struct kimage *image,
265 					unsigned long start,
266 					unsigned long end)
267 {
268 	unsigned long i;
269 
270 	for (i = 0; i < image->nr_segments; i++) {
271 		unsigned long mstart, mend;
272 
273 		mstart = image->segment[i].mem;
274 		mend = mstart + image->segment[i].memsz;
275 		if ((end > mstart) && (start < mend))
276 			return 1;
277 	}
278 
279 	return 0;
280 }
281 
kimage_alloc_pages(gfp_t gfp_mask,unsigned int order)282 static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order)
283 {
284 	struct page *pages;
285 
286 	pages = alloc_pages(gfp_mask, order);
287 	if (pages) {
288 		unsigned int count, i;
289 
290 		pages->mapping = NULL;
291 		set_page_private(pages, order);
292 		count = 1 << order;
293 		for (i = 0; i < count; i++)
294 			SetPageReserved(pages + i);
295 	}
296 
297 	return pages;
298 }
299 
kimage_free_pages(struct page * page)300 static void kimage_free_pages(struct page *page)
301 {
302 	unsigned int order, count, i;
303 
304 	order = page_private(page);
305 	count = 1 << order;
306 	for (i = 0; i < count; i++)
307 		ClearPageReserved(page + i);
308 	__free_pages(page, order);
309 }
310 
kimage_free_page_list(struct list_head * list)311 void kimage_free_page_list(struct list_head *list)
312 {
313 	struct list_head *pos, *next;
314 
315 	list_for_each_safe(pos, next, list) {
316 		struct page *page;
317 
318 		page = list_entry(pos, struct page, lru);
319 		list_del(&page->lru);
320 		kimage_free_pages(page);
321 	}
322 }
323 
kimage_alloc_normal_control_pages(struct kimage * image,unsigned int order)324 static struct page *kimage_alloc_normal_control_pages(struct kimage *image,
325 							unsigned int order)
326 {
327 	/* Control pages are special, they are the intermediaries
328 	 * that are needed while we copy the rest of the pages
329 	 * to their final resting place.  As such they must
330 	 * not conflict with either the destination addresses
331 	 * or memory the kernel is already using.
332 	 *
333 	 * The only case where we really need more than one of
334 	 * these are for architectures where we cannot disable
335 	 * the MMU and must instead generate an identity mapped
336 	 * page table for all of the memory.
337 	 *
338 	 * At worst this runs in O(N) of the image size.
339 	 */
340 	struct list_head extra_pages;
341 	struct page *pages;
342 	unsigned int count;
343 
344 	count = 1 << order;
345 	INIT_LIST_HEAD(&extra_pages);
346 
347 	/* Loop while I can allocate a page and the page allocated
348 	 * is a destination page.
349 	 */
350 	do {
351 		unsigned long pfn, epfn, addr, eaddr;
352 
353 		pages = kimage_alloc_pages(KEXEC_CONTROL_MEMORY_GFP, order);
354 		if (!pages)
355 			break;
356 		pfn   = page_to_pfn(pages);
357 		epfn  = pfn + count;
358 		addr  = pfn << PAGE_SHIFT;
359 		eaddr = epfn << PAGE_SHIFT;
360 		if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) ||
361 			      kimage_is_destination_range(image, addr, eaddr)) {
362 			list_add(&pages->lru, &extra_pages);
363 			pages = NULL;
364 		}
365 	} while (!pages);
366 
367 	if (pages) {
368 		/* Remember the allocated page... */
369 		list_add(&pages->lru, &image->control_pages);
370 
371 		/* Because the page is already in it's destination
372 		 * location we will never allocate another page at
373 		 * that address.  Therefore kimage_alloc_pages
374 		 * will not return it (again) and we don't need
375 		 * to give it an entry in image->segment[].
376 		 */
377 	}
378 	/* Deal with the destination pages I have inadvertently allocated.
379 	 *
380 	 * Ideally I would convert multi-page allocations into single
381 	 * page allocations, and add everything to image->dest_pages.
382 	 *
383 	 * For now it is simpler to just free the pages.
384 	 */
385 	kimage_free_page_list(&extra_pages);
386 
387 	return pages;
388 }
389 
kimage_alloc_crash_control_pages(struct kimage * image,unsigned int order)390 static struct page *kimage_alloc_crash_control_pages(struct kimage *image,
391 						      unsigned int order)
392 {
393 	/* Control pages are special, they are the intermediaries
394 	 * that are needed while we copy the rest of the pages
395 	 * to their final resting place.  As such they must
396 	 * not conflict with either the destination addresses
397 	 * or memory the kernel is already using.
398 	 *
399 	 * Control pages are also the only pags we must allocate
400 	 * when loading a crash kernel.  All of the other pages
401 	 * are specified by the segments and we just memcpy
402 	 * into them directly.
403 	 *
404 	 * The only case where we really need more than one of
405 	 * these are for architectures where we cannot disable
406 	 * the MMU and must instead generate an identity mapped
407 	 * page table for all of the memory.
408 	 *
409 	 * Given the low demand this implements a very simple
410 	 * allocator that finds the first hole of the appropriate
411 	 * size in the reserved memory region, and allocates all
412 	 * of the memory up to and including the hole.
413 	 */
414 	unsigned long hole_start, hole_end, size;
415 	struct page *pages;
416 
417 	pages = NULL;
418 	size = (1 << order) << PAGE_SHIFT;
419 	hole_start = (image->control_page + (size - 1)) & ~(size - 1);
420 	hole_end   = hole_start + size - 1;
421 	while (hole_end <= crashk_res.end) {
422 		unsigned long i;
423 
424 		if (hole_end > KEXEC_CRASH_CONTROL_MEMORY_LIMIT)
425 			break;
426 		/* See if I overlap any of the segments */
427 		for (i = 0; i < image->nr_segments; i++) {
428 			unsigned long mstart, mend;
429 
430 			mstart = image->segment[i].mem;
431 			mend   = mstart + image->segment[i].memsz - 1;
432 			if ((hole_end >= mstart) && (hole_start <= mend)) {
433 				/* Advance the hole to the end of the segment */
434 				hole_start = (mend + (size - 1)) & ~(size - 1);
435 				hole_end   = hole_start + size - 1;
436 				break;
437 			}
438 		}
439 		/* If I don't overlap any segments I have found my hole! */
440 		if (i == image->nr_segments) {
441 			pages = pfn_to_page(hole_start >> PAGE_SHIFT);
442 			image->control_page = hole_end;
443 			break;
444 		}
445 	}
446 
447 	return pages;
448 }
449 
450 
kimage_alloc_control_pages(struct kimage * image,unsigned int order)451 struct page *kimage_alloc_control_pages(struct kimage *image,
452 					 unsigned int order)
453 {
454 	struct page *pages = NULL;
455 
456 	switch (image->type) {
457 	case KEXEC_TYPE_DEFAULT:
458 		pages = kimage_alloc_normal_control_pages(image, order);
459 		break;
460 	case KEXEC_TYPE_CRASH:
461 		pages = kimage_alloc_crash_control_pages(image, order);
462 		break;
463 	}
464 
465 	return pages;
466 }
467 
kimage_add_entry(struct kimage * image,kimage_entry_t entry)468 static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
469 {
470 	if (*image->entry != 0)
471 		image->entry++;
472 
473 	if (image->entry == image->last_entry) {
474 		kimage_entry_t *ind_page;
475 		struct page *page;
476 
477 		page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST);
478 		if (!page)
479 			return -ENOMEM;
480 
481 		ind_page = page_address(page);
482 		*image->entry = virt_to_phys(ind_page) | IND_INDIRECTION;
483 		image->entry = ind_page;
484 		image->last_entry = ind_page +
485 				      ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1);
486 	}
487 	*image->entry = entry;
488 	image->entry++;
489 	*image->entry = 0;
490 
491 	return 0;
492 }
493 
kimage_set_destination(struct kimage * image,unsigned long destination)494 static int kimage_set_destination(struct kimage *image,
495 				   unsigned long destination)
496 {
497 	int result;
498 
499 	destination &= PAGE_MASK;
500 	result = kimage_add_entry(image, destination | IND_DESTINATION);
501 
502 	return result;
503 }
504 
505 
kimage_add_page(struct kimage * image,unsigned long page)506 static int kimage_add_page(struct kimage *image, unsigned long page)
507 {
508 	int result;
509 
510 	page &= PAGE_MASK;
511 	result = kimage_add_entry(image, page | IND_SOURCE);
512 
513 	return result;
514 }
515 
516 
kimage_free_extra_pages(struct kimage * image)517 static void kimage_free_extra_pages(struct kimage *image)
518 {
519 	/* Walk through and free any extra destination pages I may have */
520 	kimage_free_page_list(&image->dest_pages);
521 
522 	/* Walk through and free any unusable pages I have cached */
523 	kimage_free_page_list(&image->unusable_pages);
524 
525 }
kimage_terminate(struct kimage * image)526 void kimage_terminate(struct kimage *image)
527 {
528 	if (*image->entry != 0)
529 		image->entry++;
530 
531 	*image->entry = IND_DONE;
532 }
533 
534 #define for_each_kimage_entry(image, ptr, entry) \
535 	for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \
536 		ptr = (entry & IND_INDIRECTION) ? \
537 			phys_to_virt((entry & PAGE_MASK)) : ptr + 1)
538 
kimage_free_entry(kimage_entry_t entry)539 static void kimage_free_entry(kimage_entry_t entry)
540 {
541 	struct page *page;
542 
543 	page = pfn_to_page(entry >> PAGE_SHIFT);
544 	kimage_free_pages(page);
545 }
546 
kimage_free(struct kimage * image)547 void kimage_free(struct kimage *image)
548 {
549 	kimage_entry_t *ptr, entry;
550 	kimage_entry_t ind = 0;
551 
552 	if (!image)
553 		return;
554 
555 	kimage_free_extra_pages(image);
556 	for_each_kimage_entry(image, ptr, entry) {
557 		if (entry & IND_INDIRECTION) {
558 			/* Free the previous indirection page */
559 			if (ind & IND_INDIRECTION)
560 				kimage_free_entry(ind);
561 			/* Save this indirection page until we are
562 			 * done with it.
563 			 */
564 			ind = entry;
565 		} else if (entry & IND_SOURCE)
566 			kimage_free_entry(entry);
567 	}
568 	/* Free the final indirection page */
569 	if (ind & IND_INDIRECTION)
570 		kimage_free_entry(ind);
571 
572 	/* Handle any machine specific cleanup */
573 	machine_kexec_cleanup(image);
574 
575 	/* Free the kexec control pages... */
576 	kimage_free_page_list(&image->control_pages);
577 
578 	/*
579 	 * Free up any temporary buffers allocated. This might hit if
580 	 * error occurred much later after buffer allocation.
581 	 */
582 	if (image->file_mode)
583 		kimage_file_post_load_cleanup(image);
584 
585 	kfree(image);
586 }
587 
kimage_dst_used(struct kimage * image,unsigned long page)588 static kimage_entry_t *kimage_dst_used(struct kimage *image,
589 					unsigned long page)
590 {
591 	kimage_entry_t *ptr, entry;
592 	unsigned long destination = 0;
593 
594 	for_each_kimage_entry(image, ptr, entry) {
595 		if (entry & IND_DESTINATION)
596 			destination = entry & PAGE_MASK;
597 		else if (entry & IND_SOURCE) {
598 			if (page == destination)
599 				return ptr;
600 			destination += PAGE_SIZE;
601 		}
602 	}
603 
604 	return NULL;
605 }
606 
kimage_alloc_page(struct kimage * image,gfp_t gfp_mask,unsigned long destination)607 static struct page *kimage_alloc_page(struct kimage *image,
608 					gfp_t gfp_mask,
609 					unsigned long destination)
610 {
611 	/*
612 	 * Here we implement safeguards to ensure that a source page
613 	 * is not copied to its destination page before the data on
614 	 * the destination page is no longer useful.
615 	 *
616 	 * To do this we maintain the invariant that a source page is
617 	 * either its own destination page, or it is not a
618 	 * destination page at all.
619 	 *
620 	 * That is slightly stronger than required, but the proof
621 	 * that no problems will not occur is trivial, and the
622 	 * implementation is simply to verify.
623 	 *
624 	 * When allocating all pages normally this algorithm will run
625 	 * in O(N) time, but in the worst case it will run in O(N^2)
626 	 * time.   If the runtime is a problem the data structures can
627 	 * be fixed.
628 	 */
629 	struct page *page;
630 	unsigned long addr;
631 
632 	/*
633 	 * Walk through the list of destination pages, and see if I
634 	 * have a match.
635 	 */
636 	list_for_each_entry(page, &image->dest_pages, lru) {
637 		addr = page_to_pfn(page) << PAGE_SHIFT;
638 		if (addr == destination) {
639 			list_del(&page->lru);
640 			return page;
641 		}
642 	}
643 	page = NULL;
644 	while (1) {
645 		kimage_entry_t *old;
646 
647 		/* Allocate a page, if we run out of memory give up */
648 		page = kimage_alloc_pages(gfp_mask, 0);
649 		if (!page)
650 			return NULL;
651 		/* If the page cannot be used file it away */
652 		if (page_to_pfn(page) >
653 				(KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
654 			list_add(&page->lru, &image->unusable_pages);
655 			continue;
656 		}
657 		addr = page_to_pfn(page) << PAGE_SHIFT;
658 
659 		/* If it is the destination page we want use it */
660 		if (addr == destination)
661 			break;
662 
663 		/* If the page is not a destination page use it */
664 		if (!kimage_is_destination_range(image, addr,
665 						  addr + PAGE_SIZE))
666 			break;
667 
668 		/*
669 		 * I know that the page is someones destination page.
670 		 * See if there is already a source page for this
671 		 * destination page.  And if so swap the source pages.
672 		 */
673 		old = kimage_dst_used(image, addr);
674 		if (old) {
675 			/* If so move it */
676 			unsigned long old_addr;
677 			struct page *old_page;
678 
679 			old_addr = *old & PAGE_MASK;
680 			old_page = pfn_to_page(old_addr >> PAGE_SHIFT);
681 			copy_highpage(page, old_page);
682 			*old = addr | (*old & ~PAGE_MASK);
683 
684 			/* The old page I have found cannot be a
685 			 * destination page, so return it if it's
686 			 * gfp_flags honor the ones passed in.
687 			 */
688 			if (!(gfp_mask & __GFP_HIGHMEM) &&
689 			    PageHighMem(old_page)) {
690 				kimage_free_pages(old_page);
691 				continue;
692 			}
693 			addr = old_addr;
694 			page = old_page;
695 			break;
696 		}
697 		/* Place the page on the destination list, to be used later */
698 		list_add(&page->lru, &image->dest_pages);
699 	}
700 
701 	return page;
702 }
703 
kimage_load_normal_segment(struct kimage * image,struct kexec_segment * segment)704 static int kimage_load_normal_segment(struct kimage *image,
705 					 struct kexec_segment *segment)
706 {
707 	unsigned long maddr;
708 	size_t ubytes, mbytes;
709 	int result;
710 	unsigned char __user *buf = NULL;
711 	unsigned char *kbuf = NULL;
712 
713 	result = 0;
714 	if (image->file_mode)
715 		kbuf = segment->kbuf;
716 	else
717 		buf = segment->buf;
718 	ubytes = segment->bufsz;
719 	mbytes = segment->memsz;
720 	maddr = segment->mem;
721 
722 	result = kimage_set_destination(image, maddr);
723 	if (result < 0)
724 		goto out;
725 
726 	while (mbytes) {
727 		struct page *page;
728 		char *ptr;
729 		size_t uchunk, mchunk;
730 
731 		page = kimage_alloc_page(image, GFP_HIGHUSER, maddr);
732 		if (!page) {
733 			result  = -ENOMEM;
734 			goto out;
735 		}
736 		result = kimage_add_page(image, page_to_pfn(page)
737 								<< PAGE_SHIFT);
738 		if (result < 0)
739 			goto out;
740 
741 		ptr = kmap(page);
742 		/* Start with a clear page */
743 		clear_page(ptr);
744 		ptr += maddr & ~PAGE_MASK;
745 		mchunk = min_t(size_t, mbytes,
746 				PAGE_SIZE - (maddr & ~PAGE_MASK));
747 		uchunk = min(ubytes, mchunk);
748 
749 		/* For file based kexec, source pages are in kernel memory */
750 		if (image->file_mode)
751 			memcpy(ptr, kbuf, uchunk);
752 		else
753 			result = copy_from_user(ptr, buf, uchunk);
754 		kunmap(page);
755 		if (result) {
756 			result = -EFAULT;
757 			goto out;
758 		}
759 		ubytes -= uchunk;
760 		maddr  += mchunk;
761 		if (image->file_mode)
762 			kbuf += mchunk;
763 		else
764 			buf += mchunk;
765 		mbytes -= mchunk;
766 	}
767 out:
768 	return result;
769 }
770 
kimage_load_crash_segment(struct kimage * image,struct kexec_segment * segment)771 static int kimage_load_crash_segment(struct kimage *image,
772 					struct kexec_segment *segment)
773 {
774 	/* For crash dumps kernels we simply copy the data from
775 	 * user space to it's destination.
776 	 * We do things a page at a time for the sake of kmap.
777 	 */
778 	unsigned long maddr;
779 	size_t ubytes, mbytes;
780 	int result;
781 	unsigned char __user *buf = NULL;
782 	unsigned char *kbuf = NULL;
783 
784 	result = 0;
785 	if (image->file_mode)
786 		kbuf = segment->kbuf;
787 	else
788 		buf = segment->buf;
789 	ubytes = segment->bufsz;
790 	mbytes = segment->memsz;
791 	maddr = segment->mem;
792 	while (mbytes) {
793 		struct page *page;
794 		char *ptr;
795 		size_t uchunk, mchunk;
796 
797 		page = pfn_to_page(maddr >> PAGE_SHIFT);
798 		if (!page) {
799 			result  = -ENOMEM;
800 			goto out;
801 		}
802 		ptr = kmap(page);
803 		ptr += maddr & ~PAGE_MASK;
804 		mchunk = min_t(size_t, mbytes,
805 				PAGE_SIZE - (maddr & ~PAGE_MASK));
806 		uchunk = min(ubytes, mchunk);
807 		if (mchunk > uchunk) {
808 			/* Zero the trailing part of the page */
809 			memset(ptr + uchunk, 0, mchunk - uchunk);
810 		}
811 
812 		/* For file based kexec, source pages are in kernel memory */
813 		if (image->file_mode)
814 			memcpy(ptr, kbuf, uchunk);
815 		else
816 			result = copy_from_user(ptr, buf, uchunk);
817 		kexec_flush_icache_page(page);
818 		kunmap(page);
819 		if (result) {
820 			result = -EFAULT;
821 			goto out;
822 		}
823 		ubytes -= uchunk;
824 		maddr  += mchunk;
825 		if (image->file_mode)
826 			kbuf += mchunk;
827 		else
828 			buf += mchunk;
829 		mbytes -= mchunk;
830 	}
831 out:
832 	return result;
833 }
834 
kimage_load_segment(struct kimage * image,struct kexec_segment * segment)835 int kimage_load_segment(struct kimage *image,
836 				struct kexec_segment *segment)
837 {
838 	int result = -ENOMEM;
839 
840 	switch (image->type) {
841 	case KEXEC_TYPE_DEFAULT:
842 		result = kimage_load_normal_segment(image, segment);
843 		break;
844 	case KEXEC_TYPE_CRASH:
845 		result = kimage_load_crash_segment(image, segment);
846 		break;
847 	}
848 
849 	return result;
850 }
851 
852 struct kimage *kexec_image;
853 struct kimage *kexec_crash_image;
854 int kexec_load_disabled;
855 
crash_kexec(struct pt_regs * regs)856 void crash_kexec(struct pt_regs *regs)
857 {
858 	/* Take the kexec_mutex here to prevent sys_kexec_load
859 	 * running on one cpu from replacing the crash kernel
860 	 * we are using after a panic on a different cpu.
861 	 *
862 	 * If the crash kernel was not located in a fixed area
863 	 * of memory the xchg(&kexec_crash_image) would be
864 	 * sufficient.  But since I reuse the memory...
865 	 */
866 	if (mutex_trylock(&kexec_mutex)) {
867 		if (kexec_crash_image) {
868 			struct pt_regs fixed_regs;
869 
870 			crash_setup_regs(&fixed_regs, regs);
871 			crash_save_vmcoreinfo();
872 			machine_crash_shutdown(&fixed_regs);
873 			machine_kexec(kexec_crash_image);
874 		}
875 		mutex_unlock(&kexec_mutex);
876 	}
877 }
878 
crash_get_memory_size(void)879 size_t crash_get_memory_size(void)
880 {
881 	size_t size = 0;
882 
883 	mutex_lock(&kexec_mutex);
884 	if (crashk_res.end != crashk_res.start)
885 		size = resource_size(&crashk_res);
886 	mutex_unlock(&kexec_mutex);
887 	return size;
888 }
889 
crash_free_reserved_phys_range(unsigned long begin,unsigned long end)890 void __weak crash_free_reserved_phys_range(unsigned long begin,
891 					   unsigned long end)
892 {
893 	unsigned long addr;
894 
895 	for (addr = begin; addr < end; addr += PAGE_SIZE)
896 		free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT));
897 }
898 
crash_shrink_memory(unsigned long new_size)899 int crash_shrink_memory(unsigned long new_size)
900 {
901 	int ret = 0;
902 	unsigned long start, end;
903 	unsigned long old_size;
904 	struct resource *ram_res;
905 
906 	mutex_lock(&kexec_mutex);
907 
908 	if (kexec_crash_image) {
909 		ret = -ENOENT;
910 		goto unlock;
911 	}
912 	start = crashk_res.start;
913 	end = crashk_res.end;
914 	old_size = (end == 0) ? 0 : end - start + 1;
915 	if (new_size >= old_size) {
916 		ret = (new_size == old_size) ? 0 : -EINVAL;
917 		goto unlock;
918 	}
919 
920 	ram_res = kzalloc(sizeof(*ram_res), GFP_KERNEL);
921 	if (!ram_res) {
922 		ret = -ENOMEM;
923 		goto unlock;
924 	}
925 
926 	start = roundup(start, KEXEC_CRASH_MEM_ALIGN);
927 	end = roundup(start + new_size, KEXEC_CRASH_MEM_ALIGN);
928 
929 	crash_map_reserved_pages();
930 	crash_free_reserved_phys_range(end, crashk_res.end);
931 
932 	if ((start == end) && (crashk_res.parent != NULL))
933 		release_resource(&crashk_res);
934 
935 	ram_res->start = end;
936 	ram_res->end = crashk_res.end;
937 	ram_res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
938 	ram_res->name = "System RAM";
939 
940 	crashk_res.end = end - 1;
941 
942 	insert_resource(&iomem_resource, ram_res);
943 	crash_unmap_reserved_pages();
944 
945 unlock:
946 	mutex_unlock(&kexec_mutex);
947 	return ret;
948 }
949 
append_elf_note(u32 * buf,char * name,unsigned type,void * data,size_t data_len)950 static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data,
951 			    size_t data_len)
952 {
953 	struct elf_note note;
954 
955 	note.n_namesz = strlen(name) + 1;
956 	note.n_descsz = data_len;
957 	note.n_type   = type;
958 	memcpy(buf, &note, sizeof(note));
959 	buf += (sizeof(note) + 3)/4;
960 	memcpy(buf, name, note.n_namesz);
961 	buf += (note.n_namesz + 3)/4;
962 	memcpy(buf, data, note.n_descsz);
963 	buf += (note.n_descsz + 3)/4;
964 
965 	return buf;
966 }
967 
final_note(u32 * buf)968 static void final_note(u32 *buf)
969 {
970 	struct elf_note note;
971 
972 	note.n_namesz = 0;
973 	note.n_descsz = 0;
974 	note.n_type   = 0;
975 	memcpy(buf, &note, sizeof(note));
976 }
977 
crash_save_cpu(struct pt_regs * regs,int cpu)978 void crash_save_cpu(struct pt_regs *regs, int cpu)
979 {
980 	struct elf_prstatus prstatus;
981 	u32 *buf;
982 
983 	if ((cpu < 0) || (cpu >= nr_cpu_ids))
984 		return;
985 
986 	/* Using ELF notes here is opportunistic.
987 	 * I need a well defined structure format
988 	 * for the data I pass, and I need tags
989 	 * on the data to indicate what information I have
990 	 * squirrelled away.  ELF notes happen to provide
991 	 * all of that, so there is no need to invent something new.
992 	 */
993 	buf = (u32 *)per_cpu_ptr(crash_notes, cpu);
994 	if (!buf)
995 		return;
996 	memset(&prstatus, 0, sizeof(prstatus));
997 	prstatus.pr_pid = current->pid;
998 	elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
999 	buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
1000 			      &prstatus, sizeof(prstatus));
1001 	final_note(buf);
1002 }
1003 
crash_notes_memory_init(void)1004 static int __init crash_notes_memory_init(void)
1005 {
1006 	/* Allocate memory for saving cpu registers. */
1007 	size_t size, align;
1008 
1009 	/*
1010 	 * crash_notes could be allocated across 2 vmalloc pages when percpu
1011 	 * is vmalloc based . vmalloc doesn't guarantee 2 continuous vmalloc
1012 	 * pages are also on 2 continuous physical pages. In this case the
1013 	 * 2nd part of crash_notes in 2nd page could be lost since only the
1014 	 * starting address and size of crash_notes are exported through sysfs.
1015 	 * Here round up the size of crash_notes to the nearest power of two
1016 	 * and pass it to __alloc_percpu as align value. This can make sure
1017 	 * crash_notes is allocated inside one physical page.
1018 	 */
1019 	size = sizeof(note_buf_t);
1020 	align = min(roundup_pow_of_two(sizeof(note_buf_t)), PAGE_SIZE);
1021 
1022 	/*
1023 	 * Break compile if size is bigger than PAGE_SIZE since crash_notes
1024 	 * definitely will be in 2 pages with that.
1025 	 */
1026 	BUILD_BUG_ON(size > PAGE_SIZE);
1027 
1028 	crash_notes = __alloc_percpu(size, align);
1029 	if (!crash_notes) {
1030 		pr_warn("Memory allocation for saving cpu register states failed\n");
1031 		return -ENOMEM;
1032 	}
1033 	return 0;
1034 }
1035 subsys_initcall(crash_notes_memory_init);
1036 
1037 
1038 /*
1039  * parsing the "crashkernel" commandline
1040  *
1041  * this code is intended to be called from architecture specific code
1042  */
1043 
1044 
1045 /*
1046  * This function parses command lines in the format
1047  *
1048  *   crashkernel=ramsize-range:size[,...][@offset]
1049  *
1050  * The function returns 0 on success and -EINVAL on failure.
1051  */
parse_crashkernel_mem(char * cmdline,unsigned long long system_ram,unsigned long long * crash_size,unsigned long long * crash_base)1052 static int __init parse_crashkernel_mem(char *cmdline,
1053 					unsigned long long system_ram,
1054 					unsigned long long *crash_size,
1055 					unsigned long long *crash_base)
1056 {
1057 	char *cur = cmdline, *tmp;
1058 
1059 	/* for each entry of the comma-separated list */
1060 	do {
1061 		unsigned long long start, end = ULLONG_MAX, size;
1062 
1063 		/* get the start of the range */
1064 		start = memparse(cur, &tmp);
1065 		if (cur == tmp) {
1066 			pr_warn("crashkernel: Memory value expected\n");
1067 			return -EINVAL;
1068 		}
1069 		cur = tmp;
1070 		if (*cur != '-') {
1071 			pr_warn("crashkernel: '-' expected\n");
1072 			return -EINVAL;
1073 		}
1074 		cur++;
1075 
1076 		/* if no ':' is here, than we read the end */
1077 		if (*cur != ':') {
1078 			end = memparse(cur, &tmp);
1079 			if (cur == tmp) {
1080 				pr_warn("crashkernel: Memory value expected\n");
1081 				return -EINVAL;
1082 			}
1083 			cur = tmp;
1084 			if (end <= start) {
1085 				pr_warn("crashkernel: end <= start\n");
1086 				return -EINVAL;
1087 			}
1088 		}
1089 
1090 		if (*cur != ':') {
1091 			pr_warn("crashkernel: ':' expected\n");
1092 			return -EINVAL;
1093 		}
1094 		cur++;
1095 
1096 		size = memparse(cur, &tmp);
1097 		if (cur == tmp) {
1098 			pr_warn("Memory value expected\n");
1099 			return -EINVAL;
1100 		}
1101 		cur = tmp;
1102 		if (size >= system_ram) {
1103 			pr_warn("crashkernel: invalid size\n");
1104 			return -EINVAL;
1105 		}
1106 
1107 		/* match ? */
1108 		if (system_ram >= start && system_ram < end) {
1109 			*crash_size = size;
1110 			break;
1111 		}
1112 	} while (*cur++ == ',');
1113 
1114 	if (*crash_size > 0) {
1115 		while (*cur && *cur != ' ' && *cur != '@')
1116 			cur++;
1117 		if (*cur == '@') {
1118 			cur++;
1119 			*crash_base = memparse(cur, &tmp);
1120 			if (cur == tmp) {
1121 				pr_warn("Memory value expected after '@'\n");
1122 				return -EINVAL;
1123 			}
1124 		}
1125 	}
1126 
1127 	return 0;
1128 }
1129 
1130 /*
1131  * That function parses "simple" (old) crashkernel command lines like
1132  *
1133  *	crashkernel=size[@offset]
1134  *
1135  * It returns 0 on success and -EINVAL on failure.
1136  */
parse_crashkernel_simple(char * cmdline,unsigned long long * crash_size,unsigned long long * crash_base)1137 static int __init parse_crashkernel_simple(char *cmdline,
1138 					   unsigned long long *crash_size,
1139 					   unsigned long long *crash_base)
1140 {
1141 	char *cur = cmdline;
1142 
1143 	*crash_size = memparse(cmdline, &cur);
1144 	if (cmdline == cur) {
1145 		pr_warn("crashkernel: memory value expected\n");
1146 		return -EINVAL;
1147 	}
1148 
1149 	if (*cur == '@')
1150 		*crash_base = memparse(cur+1, &cur);
1151 	else if (*cur != ' ' && *cur != '\0') {
1152 		pr_warn("crashkernel: unrecognized char: %c\n", *cur);
1153 		return -EINVAL;
1154 	}
1155 
1156 	return 0;
1157 }
1158 
1159 #define SUFFIX_HIGH 0
1160 #define SUFFIX_LOW  1
1161 #define SUFFIX_NULL 2
1162 static __initdata char *suffix_tbl[] = {
1163 	[SUFFIX_HIGH] = ",high",
1164 	[SUFFIX_LOW]  = ",low",
1165 	[SUFFIX_NULL] = NULL,
1166 };
1167 
1168 /*
1169  * That function parses "suffix"  crashkernel command lines like
1170  *
1171  *	crashkernel=size,[high|low]
1172  *
1173  * It returns 0 on success and -EINVAL on failure.
1174  */
parse_crashkernel_suffix(char * cmdline,unsigned long long * crash_size,const char * suffix)1175 static int __init parse_crashkernel_suffix(char *cmdline,
1176 					   unsigned long long	*crash_size,
1177 					   const char *suffix)
1178 {
1179 	char *cur = cmdline;
1180 
1181 	*crash_size = memparse(cmdline, &cur);
1182 	if (cmdline == cur) {
1183 		pr_warn("crashkernel: memory value expected\n");
1184 		return -EINVAL;
1185 	}
1186 
1187 	/* check with suffix */
1188 	if (strncmp(cur, suffix, strlen(suffix))) {
1189 		pr_warn("crashkernel: unrecognized char: %c\n", *cur);
1190 		return -EINVAL;
1191 	}
1192 	cur += strlen(suffix);
1193 	if (*cur != ' ' && *cur != '\0') {
1194 		pr_warn("crashkernel: unrecognized char: %c\n", *cur);
1195 		return -EINVAL;
1196 	}
1197 
1198 	return 0;
1199 }
1200 
get_last_crashkernel(char * cmdline,const char * name,const char * suffix)1201 static __init char *get_last_crashkernel(char *cmdline,
1202 			     const char *name,
1203 			     const char *suffix)
1204 {
1205 	char *p = cmdline, *ck_cmdline = NULL;
1206 
1207 	/* find crashkernel and use the last one if there are more */
1208 	p = strstr(p, name);
1209 	while (p) {
1210 		char *end_p = strchr(p, ' ');
1211 		char *q;
1212 
1213 		if (!end_p)
1214 			end_p = p + strlen(p);
1215 
1216 		if (!suffix) {
1217 			int i;
1218 
1219 			/* skip the one with any known suffix */
1220 			for (i = 0; suffix_tbl[i]; i++) {
1221 				q = end_p - strlen(suffix_tbl[i]);
1222 				if (!strncmp(q, suffix_tbl[i],
1223 					     strlen(suffix_tbl[i])))
1224 					goto next;
1225 			}
1226 			ck_cmdline = p;
1227 		} else {
1228 			q = end_p - strlen(suffix);
1229 			if (!strncmp(q, suffix, strlen(suffix)))
1230 				ck_cmdline = p;
1231 		}
1232 next:
1233 		p = strstr(p+1, name);
1234 	}
1235 
1236 	if (!ck_cmdline)
1237 		return NULL;
1238 
1239 	return ck_cmdline;
1240 }
1241 
__parse_crashkernel(char * cmdline,unsigned long long system_ram,unsigned long long * crash_size,unsigned long long * crash_base,const char * name,const char * suffix)1242 static int __init __parse_crashkernel(char *cmdline,
1243 			     unsigned long long system_ram,
1244 			     unsigned long long *crash_size,
1245 			     unsigned long long *crash_base,
1246 			     const char *name,
1247 			     const char *suffix)
1248 {
1249 	char	*first_colon, *first_space;
1250 	char	*ck_cmdline;
1251 
1252 	BUG_ON(!crash_size || !crash_base);
1253 	*crash_size = 0;
1254 	*crash_base = 0;
1255 
1256 	ck_cmdline = get_last_crashkernel(cmdline, name, suffix);
1257 
1258 	if (!ck_cmdline)
1259 		return -EINVAL;
1260 
1261 	ck_cmdline += strlen(name);
1262 
1263 	if (suffix)
1264 		return parse_crashkernel_suffix(ck_cmdline, crash_size,
1265 				suffix);
1266 	/*
1267 	 * if the commandline contains a ':', then that's the extended
1268 	 * syntax -- if not, it must be the classic syntax
1269 	 */
1270 	first_colon = strchr(ck_cmdline, ':');
1271 	first_space = strchr(ck_cmdline, ' ');
1272 	if (first_colon && (!first_space || first_colon < first_space))
1273 		return parse_crashkernel_mem(ck_cmdline, system_ram,
1274 				crash_size, crash_base);
1275 
1276 	return parse_crashkernel_simple(ck_cmdline, crash_size, crash_base);
1277 }
1278 
1279 /*
1280  * That function is the entry point for command line parsing and should be
1281  * called from the arch-specific code.
1282  */
parse_crashkernel(char * cmdline,unsigned long long system_ram,unsigned long long * crash_size,unsigned long long * crash_base)1283 int __init parse_crashkernel(char *cmdline,
1284 			     unsigned long long system_ram,
1285 			     unsigned long long *crash_size,
1286 			     unsigned long long *crash_base)
1287 {
1288 	return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
1289 					"crashkernel=", NULL);
1290 }
1291 
parse_crashkernel_high(char * cmdline,unsigned long long system_ram,unsigned long long * crash_size,unsigned long long * crash_base)1292 int __init parse_crashkernel_high(char *cmdline,
1293 			     unsigned long long system_ram,
1294 			     unsigned long long *crash_size,
1295 			     unsigned long long *crash_base)
1296 {
1297 	return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
1298 				"crashkernel=", suffix_tbl[SUFFIX_HIGH]);
1299 }
1300 
parse_crashkernel_low(char * cmdline,unsigned long long system_ram,unsigned long long * crash_size,unsigned long long * crash_base)1301 int __init parse_crashkernel_low(char *cmdline,
1302 			     unsigned long long system_ram,
1303 			     unsigned long long *crash_size,
1304 			     unsigned long long *crash_base)
1305 {
1306 	return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
1307 				"crashkernel=", suffix_tbl[SUFFIX_LOW]);
1308 }
1309 
update_vmcoreinfo_note(void)1310 static void update_vmcoreinfo_note(void)
1311 {
1312 	u32 *buf = vmcoreinfo_note;
1313 
1314 	if (!vmcoreinfo_size)
1315 		return;
1316 	buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data,
1317 			      vmcoreinfo_size);
1318 	final_note(buf);
1319 }
1320 
crash_save_vmcoreinfo(void)1321 void crash_save_vmcoreinfo(void)
1322 {
1323 	vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
1324 	update_vmcoreinfo_note();
1325 }
1326 
vmcoreinfo_append_str(const char * fmt,...)1327 void vmcoreinfo_append_str(const char *fmt, ...)
1328 {
1329 	va_list args;
1330 	char buf[0x50];
1331 	size_t r;
1332 
1333 	va_start(args, fmt);
1334 	r = vscnprintf(buf, sizeof(buf), fmt, args);
1335 	va_end(args);
1336 
1337 	r = min(r, vmcoreinfo_max_size - vmcoreinfo_size);
1338 
1339 	memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r);
1340 
1341 	vmcoreinfo_size += r;
1342 }
1343 
1344 /*
1345  * provide an empty default implementation here -- architecture
1346  * code may override this
1347  */
arch_crash_save_vmcoreinfo(void)1348 void __weak arch_crash_save_vmcoreinfo(void)
1349 {}
1350 
paddr_vmcoreinfo_note(void)1351 unsigned long __weak paddr_vmcoreinfo_note(void)
1352 {
1353 	return __pa((unsigned long)(char *)&vmcoreinfo_note);
1354 }
1355 
crash_save_vmcoreinfo_init(void)1356 static int __init crash_save_vmcoreinfo_init(void)
1357 {
1358 	VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
1359 	VMCOREINFO_PAGESIZE(PAGE_SIZE);
1360 
1361 	VMCOREINFO_SYMBOL(init_uts_ns);
1362 	VMCOREINFO_SYMBOL(node_online_map);
1363 #ifdef CONFIG_MMU
1364 	VMCOREINFO_SYMBOL(swapper_pg_dir);
1365 #endif
1366 	VMCOREINFO_SYMBOL(_stext);
1367 	VMCOREINFO_SYMBOL(vmap_area_list);
1368 
1369 #ifndef CONFIG_NEED_MULTIPLE_NODES
1370 	VMCOREINFO_SYMBOL(mem_map);
1371 	VMCOREINFO_SYMBOL(contig_page_data);
1372 #endif
1373 #ifdef CONFIG_SPARSEMEM
1374 	VMCOREINFO_SYMBOL(mem_section);
1375 	VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS);
1376 	VMCOREINFO_STRUCT_SIZE(mem_section);
1377 	VMCOREINFO_OFFSET(mem_section, section_mem_map);
1378 #endif
1379 	VMCOREINFO_STRUCT_SIZE(page);
1380 	VMCOREINFO_STRUCT_SIZE(pglist_data);
1381 	VMCOREINFO_STRUCT_SIZE(zone);
1382 	VMCOREINFO_STRUCT_SIZE(free_area);
1383 	VMCOREINFO_STRUCT_SIZE(list_head);
1384 	VMCOREINFO_SIZE(nodemask_t);
1385 	VMCOREINFO_OFFSET(page, flags);
1386 	VMCOREINFO_OFFSET(page, _count);
1387 	VMCOREINFO_OFFSET(page, mapping);
1388 	VMCOREINFO_OFFSET(page, lru);
1389 	VMCOREINFO_OFFSET(page, _mapcount);
1390 	VMCOREINFO_OFFSET(page, private);
1391 	VMCOREINFO_OFFSET(pglist_data, node_zones);
1392 	VMCOREINFO_OFFSET(pglist_data, nr_zones);
1393 #ifdef CONFIG_FLAT_NODE_MEM_MAP
1394 	VMCOREINFO_OFFSET(pglist_data, node_mem_map);
1395 #endif
1396 	VMCOREINFO_OFFSET(pglist_data, node_start_pfn);
1397 	VMCOREINFO_OFFSET(pglist_data, node_spanned_pages);
1398 	VMCOREINFO_OFFSET(pglist_data, node_id);
1399 	VMCOREINFO_OFFSET(zone, free_area);
1400 	VMCOREINFO_OFFSET(zone, vm_stat);
1401 	VMCOREINFO_OFFSET(zone, spanned_pages);
1402 	VMCOREINFO_OFFSET(free_area, free_list);
1403 	VMCOREINFO_OFFSET(list_head, next);
1404 	VMCOREINFO_OFFSET(list_head, prev);
1405 	VMCOREINFO_OFFSET(vmap_area, va_start);
1406 	VMCOREINFO_OFFSET(vmap_area, list);
1407 	VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER);
1408 	log_buf_kexec_setup();
1409 	VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES);
1410 	VMCOREINFO_NUMBER(NR_FREE_PAGES);
1411 	VMCOREINFO_NUMBER(PG_lru);
1412 	VMCOREINFO_NUMBER(PG_private);
1413 	VMCOREINFO_NUMBER(PG_swapcache);
1414 	VMCOREINFO_NUMBER(PG_slab);
1415 #ifdef CONFIG_MEMORY_FAILURE
1416 	VMCOREINFO_NUMBER(PG_hwpoison);
1417 #endif
1418 	VMCOREINFO_NUMBER(PG_head_mask);
1419 	VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE);
1420 #ifdef CONFIG_X86
1421 	VMCOREINFO_NUMBER(KERNEL_IMAGE_SIZE);
1422 #endif
1423 #ifdef CONFIG_HUGETLBFS
1424 	VMCOREINFO_SYMBOL(free_huge_page);
1425 #endif
1426 
1427 	arch_crash_save_vmcoreinfo();
1428 	update_vmcoreinfo_note();
1429 
1430 	return 0;
1431 }
1432 
1433 subsys_initcall(crash_save_vmcoreinfo_init);
1434 
1435 /*
1436  * Move into place and start executing a preloaded standalone
1437  * executable.  If nothing was preloaded return an error.
1438  */
kernel_kexec(void)1439 int kernel_kexec(void)
1440 {
1441 	int error = 0;
1442 
1443 	if (!mutex_trylock(&kexec_mutex))
1444 		return -EBUSY;
1445 	if (!kexec_image) {
1446 		error = -EINVAL;
1447 		goto Unlock;
1448 	}
1449 
1450 #ifdef CONFIG_KEXEC_JUMP
1451 	if (kexec_image->preserve_context) {
1452 		lock_system_sleep();
1453 		pm_prepare_console();
1454 		error = freeze_processes();
1455 		if (error) {
1456 			error = -EBUSY;
1457 			goto Restore_console;
1458 		}
1459 		suspend_console();
1460 		error = dpm_suspend_start(PMSG_FREEZE);
1461 		if (error)
1462 			goto Resume_console;
1463 		/* At this point, dpm_suspend_start() has been called,
1464 		 * but *not* dpm_suspend_end(). We *must* call
1465 		 * dpm_suspend_end() now.  Otherwise, drivers for
1466 		 * some devices (e.g. interrupt controllers) become
1467 		 * desynchronized with the actual state of the
1468 		 * hardware at resume time, and evil weirdness ensues.
1469 		 */
1470 		error = dpm_suspend_end(PMSG_FREEZE);
1471 		if (error)
1472 			goto Resume_devices;
1473 		error = disable_nonboot_cpus();
1474 		if (error)
1475 			goto Enable_cpus;
1476 		local_irq_disable();
1477 		error = syscore_suspend();
1478 		if (error)
1479 			goto Enable_irqs;
1480 	} else
1481 #endif
1482 	{
1483 		kexec_in_progress = true;
1484 		kernel_restart_prepare(NULL);
1485 		migrate_to_reboot_cpu();
1486 
1487 		/*
1488 		 * migrate_to_reboot_cpu() disables CPU hotplug assuming that
1489 		 * no further code needs to use CPU hotplug (which is true in
1490 		 * the reboot case). However, the kexec path depends on using
1491 		 * CPU hotplug again; so re-enable it here.
1492 		 */
1493 		cpu_hotplug_enable();
1494 		pr_emerg("Starting new kernel\n");
1495 		machine_shutdown();
1496 	}
1497 
1498 	machine_kexec(kexec_image);
1499 
1500 #ifdef CONFIG_KEXEC_JUMP
1501 	if (kexec_image->preserve_context) {
1502 		syscore_resume();
1503  Enable_irqs:
1504 		local_irq_enable();
1505  Enable_cpus:
1506 		enable_nonboot_cpus();
1507 		dpm_resume_start(PMSG_RESTORE);
1508  Resume_devices:
1509 		dpm_resume_end(PMSG_RESTORE);
1510  Resume_console:
1511 		resume_console();
1512 		thaw_processes();
1513  Restore_console:
1514 		pm_restore_console();
1515 		unlock_system_sleep();
1516 	}
1517 #endif
1518 
1519  Unlock:
1520 	mutex_unlock(&kexec_mutex);
1521 	return error;
1522 }
1523 
1524 /*
1525  * Add and remove page tables for crashkernel memory
1526  *
1527  * Provide an empty default implementation here -- architecture
1528  * code may override this
1529  */
crash_map_reserved_pages(void)1530 void __weak crash_map_reserved_pages(void)
1531 {}
1532 
crash_unmap_reserved_pages(void)1533 void __weak crash_unmap_reserved_pages(void)
1534 {}
1535