• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Firmware Assisted dump: A robust mechanism to get reliable kernel crash
4  * dump with assistance from firmware. This approach does not use kexec,
5  * instead firmware assists in booting the kdump kernel while preserving
6  * memory contents. The most of the code implementation has been adapted
7  * from phyp assisted dump implementation written by Linas Vepstas and
8  * Manish Ahuja
9  *
10  * Copyright 2011 IBM Corporation
11  * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
12  */
13 
14 #undef DEBUG
15 #define pr_fmt(fmt) "fadump: " fmt
16 
17 #include <linux/string.h>
18 #include <linux/memblock.h>
19 #include <linux/delay.h>
20 #include <linux/seq_file.h>
21 #include <linux/crash_dump.h>
22 #include <linux/kobject.h>
23 #include <linux/sysfs.h>
24 #include <linux/slab.h>
25 #include <linux/cma.h>
26 #include <linux/hugetlb.h>
27 
28 #include <asm/debugfs.h>
29 #include <asm/page.h>
30 #include <asm/prom.h>
31 #include <asm/fadump.h>
32 #include <asm/fadump-internal.h>
33 #include <asm/setup.h>
34 
35 static struct fw_dump fw_dump;
36 
37 static void __init fadump_reserve_crash_area(u64 base);
38 
39 #ifndef CONFIG_PRESERVE_FA_DUMP
40 static DEFINE_MUTEX(fadump_mutex);
41 struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0, false };
42 
43 #define RESERVED_RNGS_SZ	16384 /* 16K - 128 entries */
44 #define RESERVED_RNGS_CNT	(RESERVED_RNGS_SZ / \
45 				 sizeof(struct fadump_memory_range))
46 static struct fadump_memory_range rngs[RESERVED_RNGS_CNT];
47 struct fadump_mrange_info reserved_mrange_info = { "reserved", rngs,
48 						   RESERVED_RNGS_SZ, 0,
49 						   RESERVED_RNGS_CNT, true };
50 
51 static void __init early_init_dt_scan_reserved_ranges(unsigned long node);
52 
53 #ifdef CONFIG_CMA
54 static struct cma *fadump_cma;
55 
56 /*
57  * fadump_cma_init() - Initialize CMA area from a fadump reserved memory
58  *
59  * This function initializes CMA area from fadump reserved memory.
60  * The total size of fadump reserved memory covers for boot memory size
61  * + cpu data size + hpte size and metadata.
62  * Initialize only the area equivalent to boot memory size for CMA use.
63  * The reamining portion of fadump reserved memory will be not given
64  * to CMA and pages for thoes will stay reserved. boot memory size is
65  * aligned per CMA requirement to satisy cma_init_reserved_mem() call.
66  * But for some reason even if it fails we still have the memory reservation
67  * with us and we can still continue doing fadump.
68  */
fadump_cma_init(void)69 int __init fadump_cma_init(void)
70 {
71 	unsigned long long base, size;
72 	int rc;
73 
74 	if (!fw_dump.fadump_enabled)
75 		return 0;
76 
77 	/*
78 	 * Do not use CMA if user has provided fadump=nocma kernel parameter.
79 	 * Return 1 to continue with fadump old behaviour.
80 	 */
81 	if (fw_dump.nocma)
82 		return 1;
83 
84 	base = fw_dump.reserve_dump_area_start;
85 	size = fw_dump.boot_memory_size;
86 
87 	if (!size)
88 		return 0;
89 
90 	rc = cma_init_reserved_mem(base, size, 0, "fadump_cma", &fadump_cma);
91 	if (rc) {
92 		pr_err("Failed to init cma area for firmware-assisted dump,%d\n", rc);
93 		/*
94 		 * Though the CMA init has failed we still have memory
95 		 * reservation with us. The reserved memory will be
96 		 * blocked from production system usage.  Hence return 1,
97 		 * so that we can continue with fadump.
98 		 */
99 		return 1;
100 	}
101 
102 	/*
103 	 * So we now have successfully initialized cma area for fadump.
104 	 */
105 	pr_info("Initialized 0x%lx bytes cma area at %ldMB from 0x%lx "
106 		"bytes of memory reserved for firmware-assisted dump\n",
107 		cma_get_size(fadump_cma),
108 		(unsigned long)cma_get_base(fadump_cma) >> 20,
109 		fw_dump.reserve_dump_area_size);
110 	return 1;
111 }
112 #else
fadump_cma_init(void)113 static int __init fadump_cma_init(void) { return 1; }
114 #endif /* CONFIG_CMA */
115 
116 /* Scan the Firmware Assisted dump configuration details. */
early_init_dt_scan_fw_dump(unsigned long node,const char * uname,int depth,void * data)117 int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
118 				      int depth, void *data)
119 {
120 	if (depth == 0) {
121 		early_init_dt_scan_reserved_ranges(node);
122 		return 0;
123 	}
124 
125 	if (depth != 1)
126 		return 0;
127 
128 	if (strcmp(uname, "rtas") == 0) {
129 		rtas_fadump_dt_scan(&fw_dump, node);
130 		return 1;
131 	}
132 
133 	if (strcmp(uname, "ibm,opal") == 0) {
134 		opal_fadump_dt_scan(&fw_dump, node);
135 		return 1;
136 	}
137 
138 	return 0;
139 }
140 
141 /*
142  * If fadump is registered, check if the memory provided
143  * falls within boot memory area and reserved memory area.
144  */
is_fadump_memory_area(u64 addr,unsigned long size)145 int is_fadump_memory_area(u64 addr, unsigned long size)
146 {
147 	u64 d_start, d_end;
148 
149 	if (!fw_dump.dump_registered)
150 		return 0;
151 
152 	if (!size)
153 		return 0;
154 
155 	d_start = fw_dump.reserve_dump_area_start;
156 	d_end = d_start + fw_dump.reserve_dump_area_size;
157 	if (((addr + size) > d_start) && (addr <= d_end))
158 		return 1;
159 
160 	return (addr <= fw_dump.boot_mem_top);
161 }
162 
should_fadump_crash(void)163 int should_fadump_crash(void)
164 {
165 	if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr)
166 		return 0;
167 	return 1;
168 }
169 
is_fadump_active(void)170 int is_fadump_active(void)
171 {
172 	return fw_dump.dump_active;
173 }
174 
175 /*
176  * Returns true, if there are no holes in memory area between d_start to d_end,
177  * false otherwise.
178  */
is_fadump_mem_area_contiguous(u64 d_start,u64 d_end)179 static bool is_fadump_mem_area_contiguous(u64 d_start, u64 d_end)
180 {
181 	struct memblock_region *reg;
182 	bool ret = false;
183 	u64 start, end;
184 
185 	for_each_memblock(memory, reg) {
186 		start = max_t(u64, d_start, reg->base);
187 		end = min_t(u64, d_end, (reg->base + reg->size));
188 		if (d_start < end) {
189 			/* Memory hole from d_start to start */
190 			if (start > d_start)
191 				break;
192 
193 			if (end == d_end) {
194 				ret = true;
195 				break;
196 			}
197 
198 			d_start = end + 1;
199 		}
200 	}
201 
202 	return ret;
203 }
204 
205 /*
206  * Returns true, if there are no holes in boot memory area,
207  * false otherwise.
208  */
is_fadump_boot_mem_contiguous(void)209 bool is_fadump_boot_mem_contiguous(void)
210 {
211 	unsigned long d_start, d_end;
212 	bool ret = false;
213 	int i;
214 
215 	for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
216 		d_start = fw_dump.boot_mem_addr[i];
217 		d_end   = d_start + fw_dump.boot_mem_sz[i];
218 
219 		ret = is_fadump_mem_area_contiguous(d_start, d_end);
220 		if (!ret)
221 			break;
222 	}
223 
224 	return ret;
225 }
226 
227 /*
228  * Returns true, if there are no holes in reserved memory area,
229  * false otherwise.
230  */
is_fadump_reserved_mem_contiguous(void)231 bool is_fadump_reserved_mem_contiguous(void)
232 {
233 	u64 d_start, d_end;
234 
235 	d_start	= fw_dump.reserve_dump_area_start;
236 	d_end	= d_start + fw_dump.reserve_dump_area_size;
237 	return is_fadump_mem_area_contiguous(d_start, d_end);
238 }
239 
240 /* Print firmware assisted dump configurations for debugging purpose. */
fadump_show_config(void)241 static void fadump_show_config(void)
242 {
243 	int i;
244 
245 	pr_debug("Support for firmware-assisted dump (fadump): %s\n",
246 			(fw_dump.fadump_supported ? "present" : "no support"));
247 
248 	if (!fw_dump.fadump_supported)
249 		return;
250 
251 	pr_debug("Fadump enabled    : %s\n",
252 				(fw_dump.fadump_enabled ? "yes" : "no"));
253 	pr_debug("Dump Active       : %s\n",
254 				(fw_dump.dump_active ? "yes" : "no"));
255 	pr_debug("Dump section sizes:\n");
256 	pr_debug("    CPU state data size: %lx\n", fw_dump.cpu_state_data_size);
257 	pr_debug("    HPTE region size   : %lx\n", fw_dump.hpte_region_size);
258 	pr_debug("    Boot memory size   : %lx\n", fw_dump.boot_memory_size);
259 	pr_debug("    Boot memory top    : %llx\n", fw_dump.boot_mem_top);
260 	pr_debug("Boot memory regions cnt: %llx\n", fw_dump.boot_mem_regs_cnt);
261 	for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
262 		pr_debug("[%03d] base = %llx, size = %llx\n", i,
263 			 fw_dump.boot_mem_addr[i], fw_dump.boot_mem_sz[i]);
264 	}
265 }
266 
267 /**
268  * fadump_calculate_reserve_size(): reserve variable boot area 5% of System RAM
269  *
270  * Function to find the largest memory size we need to reserve during early
271  * boot process. This will be the size of the memory that is required for a
272  * kernel to boot successfully.
273  *
274  * This function has been taken from phyp-assisted dump feature implementation.
275  *
276  * returns larger of 256MB or 5% rounded down to multiples of 256MB.
277  *
278  * TODO: Come up with better approach to find out more accurate memory size
279  * that is required for a kernel to boot successfully.
280  *
281  */
fadump_calculate_reserve_size(void)282 static __init u64 fadump_calculate_reserve_size(void)
283 {
284 	u64 base, size, bootmem_min;
285 	int ret;
286 
287 	if (fw_dump.reserve_bootvar)
288 		pr_warn("'fadump_reserve_mem=' parameter is deprecated in favor of 'crashkernel=' parameter.\n");
289 
290 	/*
291 	 * Check if the size is specified through crashkernel= cmdline
292 	 * option. If yes, then use that but ignore base as fadump reserves
293 	 * memory at a predefined offset.
294 	 */
295 	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
296 				&size, &base);
297 	if (ret == 0 && size > 0) {
298 		unsigned long max_size;
299 
300 		if (fw_dump.reserve_bootvar)
301 			pr_info("Using 'crashkernel=' parameter for memory reservation.\n");
302 
303 		fw_dump.reserve_bootvar = (unsigned long)size;
304 
305 		/*
306 		 * Adjust if the boot memory size specified is above
307 		 * the upper limit.
308 		 */
309 		max_size = memblock_phys_mem_size() / MAX_BOOT_MEM_RATIO;
310 		if (fw_dump.reserve_bootvar > max_size) {
311 			fw_dump.reserve_bootvar = max_size;
312 			pr_info("Adjusted boot memory size to %luMB\n",
313 				(fw_dump.reserve_bootvar >> 20));
314 		}
315 
316 		return fw_dump.reserve_bootvar;
317 	} else if (fw_dump.reserve_bootvar) {
318 		/*
319 		 * 'fadump_reserve_mem=' is being used to reserve memory
320 		 * for firmware-assisted dump.
321 		 */
322 		return fw_dump.reserve_bootvar;
323 	}
324 
325 	/* divide by 20 to get 5% of value */
326 	size = memblock_phys_mem_size() / 20;
327 
328 	/* round it down in multiples of 256 */
329 	size = size & ~0x0FFFFFFFUL;
330 
331 	/* Truncate to memory_limit. We don't want to over reserve the memory.*/
332 	if (memory_limit && size > memory_limit)
333 		size = memory_limit;
334 
335 	bootmem_min = fw_dump.ops->fadump_get_bootmem_min();
336 	return (size > bootmem_min ? size : bootmem_min);
337 }
338 
339 /*
340  * Calculate the total memory size required to be reserved for
341  * firmware-assisted dump registration.
342  */
get_fadump_area_size(void)343 static unsigned long get_fadump_area_size(void)
344 {
345 	unsigned long size = 0;
346 
347 	size += fw_dump.cpu_state_data_size;
348 	size += fw_dump.hpte_region_size;
349 	size += fw_dump.boot_memory_size;
350 	size += sizeof(struct fadump_crash_info_header);
351 	size += sizeof(struct elfhdr); /* ELF core header.*/
352 	size += sizeof(struct elf_phdr); /* place holder for cpu notes */
353 	/* Program headers for crash memory regions. */
354 	size += sizeof(struct elf_phdr) * (memblock_num_regions(memory) + 2);
355 
356 	size = PAGE_ALIGN(size);
357 
358 	/* This is to hold kernel metadata on platforms that support it */
359 	size += (fw_dump.ops->fadump_get_metadata_size ?
360 		 fw_dump.ops->fadump_get_metadata_size() : 0);
361 	return size;
362 }
363 
add_boot_mem_region(unsigned long rstart,unsigned long rsize)364 static int __init add_boot_mem_region(unsigned long rstart,
365 				      unsigned long rsize)
366 {
367 	int i = fw_dump.boot_mem_regs_cnt++;
368 
369 	if (fw_dump.boot_mem_regs_cnt > FADUMP_MAX_MEM_REGS) {
370 		fw_dump.boot_mem_regs_cnt = FADUMP_MAX_MEM_REGS;
371 		return 0;
372 	}
373 
374 	pr_debug("Added boot memory range[%d] [%#016lx-%#016lx)\n",
375 		 i, rstart, (rstart + rsize));
376 	fw_dump.boot_mem_addr[i] = rstart;
377 	fw_dump.boot_mem_sz[i] = rsize;
378 	return 1;
379 }
380 
381 /*
382  * Firmware usually has a hard limit on the data it can copy per region.
383  * Honour that by splitting a memory range into multiple regions.
384  */
add_boot_mem_regions(unsigned long mstart,unsigned long msize)385 static int __init add_boot_mem_regions(unsigned long mstart,
386 				       unsigned long msize)
387 {
388 	unsigned long rstart, rsize, max_size;
389 	int ret = 1;
390 
391 	rstart = mstart;
392 	max_size = fw_dump.max_copy_size ? fw_dump.max_copy_size : msize;
393 	while (msize) {
394 		if (msize > max_size)
395 			rsize = max_size;
396 		else
397 			rsize = msize;
398 
399 		ret = add_boot_mem_region(rstart, rsize);
400 		if (!ret)
401 			break;
402 
403 		msize -= rsize;
404 		rstart += rsize;
405 	}
406 
407 	return ret;
408 }
409 
fadump_get_boot_mem_regions(void)410 static int __init fadump_get_boot_mem_regions(void)
411 {
412 	unsigned long base, size, cur_size, hole_size, last_end;
413 	unsigned long mem_size = fw_dump.boot_memory_size;
414 	struct memblock_region *reg;
415 	int ret = 1;
416 
417 	fw_dump.boot_mem_regs_cnt = 0;
418 
419 	last_end = 0;
420 	hole_size = 0;
421 	cur_size = 0;
422 	for_each_memblock(memory, reg) {
423 		base = reg->base;
424 		size = reg->size;
425 		hole_size += (base - last_end);
426 
427 		if ((cur_size + size) >= mem_size) {
428 			size = (mem_size - cur_size);
429 			ret = add_boot_mem_regions(base, size);
430 			break;
431 		}
432 
433 		mem_size -= size;
434 		cur_size += size;
435 		ret = add_boot_mem_regions(base, size);
436 		if (!ret)
437 			break;
438 
439 		last_end = base + size;
440 	}
441 	fw_dump.boot_mem_top = PAGE_ALIGN(fw_dump.boot_memory_size + hole_size);
442 
443 	return ret;
444 }
445 
446 /*
447  * Returns true, if the given range overlaps with reserved memory ranges
448  * starting at idx. Also, updates idx to index of overlapping memory range
449  * with the given memory range.
450  * False, otherwise.
451  */
overlaps_reserved_ranges(u64 base,u64 end,int * idx)452 static bool overlaps_reserved_ranges(u64 base, u64 end, int *idx)
453 {
454 	bool ret = false;
455 	int i;
456 
457 	for (i = *idx; i < reserved_mrange_info.mem_range_cnt; i++) {
458 		u64 rbase = reserved_mrange_info.mem_ranges[i].base;
459 		u64 rend = rbase + reserved_mrange_info.mem_ranges[i].size;
460 
461 		if (end <= rbase)
462 			break;
463 
464 		if ((end > rbase) &&  (base < rend)) {
465 			*idx = i;
466 			ret = true;
467 			break;
468 		}
469 	}
470 
471 	return ret;
472 }
473 
474 /*
475  * Locate a suitable memory area to reserve memory for FADump. While at it,
476  * lookup reserved-ranges & avoid overlap with them, as they are used by F/W.
477  */
fadump_locate_reserve_mem(u64 base,u64 size)478 static u64 __init fadump_locate_reserve_mem(u64 base, u64 size)
479 {
480 	struct fadump_memory_range *mrngs;
481 	phys_addr_t mstart, mend;
482 	int idx = 0;
483 	u64 i, ret = 0;
484 
485 	mrngs = reserved_mrange_info.mem_ranges;
486 	for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE,
487 				&mstart, &mend, NULL) {
488 		pr_debug("%llu) mstart: %llx, mend: %llx, base: %llx\n",
489 			 i, mstart, mend, base);
490 
491 		if (mstart > base)
492 			base = PAGE_ALIGN(mstart);
493 
494 		while ((mend > base) && ((mend - base) >= size)) {
495 			if (!overlaps_reserved_ranges(base, base+size, &idx)) {
496 				ret = base;
497 				goto out;
498 			}
499 
500 			base = mrngs[idx].base + mrngs[idx].size;
501 			base = PAGE_ALIGN(base);
502 		}
503 	}
504 
505 out:
506 	return ret;
507 }
508 
fadump_reserve_mem(void)509 int __init fadump_reserve_mem(void)
510 {
511 	u64 base, size, mem_boundary, bootmem_min;
512 	int ret = 1;
513 
514 	if (!fw_dump.fadump_enabled)
515 		return 0;
516 
517 	if (!fw_dump.fadump_supported) {
518 		pr_info("Firmware-Assisted Dump is not supported on this hardware\n");
519 		goto error_out;
520 	}
521 
522 	/*
523 	 * Initialize boot memory size
524 	 * If dump is active then we have already calculated the size during
525 	 * first kernel.
526 	 */
527 	if (!fw_dump.dump_active) {
528 		fw_dump.boot_memory_size =
529 			PAGE_ALIGN(fadump_calculate_reserve_size());
530 #ifdef CONFIG_CMA
531 		if (!fw_dump.nocma) {
532 			fw_dump.boot_memory_size =
533 				ALIGN(fw_dump.boot_memory_size,
534 				      FADUMP_CMA_ALIGNMENT);
535 		}
536 #endif
537 
538 		bootmem_min = fw_dump.ops->fadump_get_bootmem_min();
539 		if (fw_dump.boot_memory_size < bootmem_min) {
540 			pr_err("Can't enable fadump with boot memory size (0x%lx) less than 0x%llx\n",
541 			       fw_dump.boot_memory_size, bootmem_min);
542 			goto error_out;
543 		}
544 
545 		if (!fadump_get_boot_mem_regions()) {
546 			pr_err("Too many holes in boot memory area to enable fadump\n");
547 			goto error_out;
548 		}
549 	}
550 
551 	/*
552 	 * Calculate the memory boundary.
553 	 * If memory_limit is less than actual memory boundary then reserve
554 	 * the memory for fadump beyond the memory_limit and adjust the
555 	 * memory_limit accordingly, so that the running kernel can run with
556 	 * specified memory_limit.
557 	 */
558 	if (memory_limit && memory_limit < memblock_end_of_DRAM()) {
559 		size = get_fadump_area_size();
560 		if ((memory_limit + size) < memblock_end_of_DRAM())
561 			memory_limit += size;
562 		else
563 			memory_limit = memblock_end_of_DRAM();
564 		printk(KERN_INFO "Adjusted memory_limit for firmware-assisted"
565 				" dump, now %#016llx\n", memory_limit);
566 	}
567 	if (memory_limit)
568 		mem_boundary = memory_limit;
569 	else
570 		mem_boundary = memblock_end_of_DRAM();
571 
572 	base = fw_dump.boot_mem_top;
573 	size = get_fadump_area_size();
574 	fw_dump.reserve_dump_area_size = size;
575 	if (fw_dump.dump_active) {
576 		pr_info("Firmware-assisted dump is active.\n");
577 
578 #ifdef CONFIG_HUGETLB_PAGE
579 		/*
580 		 * FADump capture kernel doesn't care much about hugepages.
581 		 * In fact, handling hugepages in capture kernel is asking for
582 		 * trouble. So, disable HugeTLB support when fadump is active.
583 		 */
584 		hugetlb_disabled = true;
585 #endif
586 		/*
587 		 * If last boot has crashed then reserve all the memory
588 		 * above boot memory size so that we don't touch it until
589 		 * dump is written to disk by userspace tool. This memory
590 		 * can be released for general use by invalidating fadump.
591 		 */
592 		fadump_reserve_crash_area(base);
593 
594 		pr_debug("fadumphdr_addr = %#016lx\n", fw_dump.fadumphdr_addr);
595 		pr_debug("Reserve dump area start address: 0x%lx\n",
596 			 fw_dump.reserve_dump_area_start);
597 	} else {
598 		/*
599 		 * Reserve memory at an offset closer to bottom of the RAM to
600 		 * minimize the impact of memory hot-remove operation.
601 		 */
602 		base = fadump_locate_reserve_mem(base, size);
603 
604 		if (!base || (base + size > mem_boundary)) {
605 			pr_err("Failed to find memory chunk for reservation!\n");
606 			goto error_out;
607 		}
608 		fw_dump.reserve_dump_area_start = base;
609 
610 		/*
611 		 * Calculate the kernel metadata address and register it with
612 		 * f/w if the platform supports.
613 		 */
614 		if (fw_dump.ops->fadump_setup_metadata &&
615 		    (fw_dump.ops->fadump_setup_metadata(&fw_dump) < 0))
616 			goto error_out;
617 
618 		if (memblock_reserve(base, size)) {
619 			pr_err("Failed to reserve memory!\n");
620 			goto error_out;
621 		}
622 
623 		pr_info("Reserved %lldMB of memory at %#016llx (System RAM: %lldMB)\n",
624 			(size >> 20), base, (memblock_phys_mem_size() >> 20));
625 
626 		ret = fadump_cma_init();
627 	}
628 
629 	return ret;
630 error_out:
631 	fw_dump.fadump_enabled = 0;
632 	fw_dump.reserve_dump_area_size = 0;
633 	return 0;
634 }
635 
636 /* Look for fadump= cmdline option. */
early_fadump_param(char * p)637 static int __init early_fadump_param(char *p)
638 {
639 	if (!p)
640 		return 1;
641 
642 	if (strncmp(p, "on", 2) == 0)
643 		fw_dump.fadump_enabled = 1;
644 	else if (strncmp(p, "off", 3) == 0)
645 		fw_dump.fadump_enabled = 0;
646 	else if (strncmp(p, "nocma", 5) == 0) {
647 		fw_dump.fadump_enabled = 1;
648 		fw_dump.nocma = 1;
649 	}
650 
651 	return 0;
652 }
653 early_param("fadump", early_fadump_param);
654 
655 /*
656  * Look for fadump_reserve_mem= cmdline option
657  * TODO: Remove references to 'fadump_reserve_mem=' parameter,
658  *       the sooner 'crashkernel=' parameter is accustomed to.
659  */
early_fadump_reserve_mem(char * p)660 static int __init early_fadump_reserve_mem(char *p)
661 {
662 	if (p)
663 		fw_dump.reserve_bootvar = memparse(p, &p);
664 	return 0;
665 }
666 early_param("fadump_reserve_mem", early_fadump_reserve_mem);
667 
crash_fadump(struct pt_regs * regs,const char * str)668 void crash_fadump(struct pt_regs *regs, const char *str)
669 {
670 	struct fadump_crash_info_header *fdh = NULL;
671 	int old_cpu, this_cpu;
672 
673 	if (!should_fadump_crash())
674 		return;
675 
676 	/*
677 	 * old_cpu == -1 means this is the first CPU which has come here,
678 	 * go ahead and trigger fadump.
679 	 *
680 	 * old_cpu != -1 means some other CPU has already on it's way
681 	 * to trigger fadump, just keep looping here.
682 	 */
683 	this_cpu = smp_processor_id();
684 	old_cpu = cmpxchg(&crashing_cpu, -1, this_cpu);
685 
686 	if (old_cpu != -1) {
687 		/*
688 		 * We can't loop here indefinitely. Wait as long as fadump
689 		 * is in force. If we race with fadump un-registration this
690 		 * loop will break and then we go down to normal panic path
691 		 * and reboot. If fadump is in force the first crashing
692 		 * cpu will definitely trigger fadump.
693 		 */
694 		while (fw_dump.dump_registered)
695 			cpu_relax();
696 		return;
697 	}
698 
699 	fdh = __va(fw_dump.fadumphdr_addr);
700 	fdh->crashing_cpu = crashing_cpu;
701 	crash_save_vmcoreinfo();
702 
703 	if (regs)
704 		fdh->regs = *regs;
705 	else
706 		ppc_save_regs(&fdh->regs);
707 
708 	fdh->online_mask = *cpu_online_mask;
709 
710 	fw_dump.ops->fadump_trigger(fdh, str);
711 }
712 
fadump_regs_to_elf_notes(u32 * buf,struct pt_regs * regs)713 u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
714 {
715 	struct elf_prstatus prstatus;
716 
717 	memset(&prstatus, 0, sizeof(prstatus));
718 	/*
719 	 * FIXME: How do i get PID? Do I really need it?
720 	 * prstatus.pr_pid = ????
721 	 */
722 	elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
723 	buf = append_elf_note(buf, CRASH_CORE_NOTE_NAME, NT_PRSTATUS,
724 			      &prstatus, sizeof(prstatus));
725 	return buf;
726 }
727 
fadump_update_elfcore_header(char * bufp)728 void fadump_update_elfcore_header(char *bufp)
729 {
730 	struct elfhdr *elf;
731 	struct elf_phdr *phdr;
732 
733 	elf = (struct elfhdr *)bufp;
734 	bufp += sizeof(struct elfhdr);
735 
736 	/* First note is a place holder for cpu notes info. */
737 	phdr = (struct elf_phdr *)bufp;
738 
739 	if (phdr->p_type == PT_NOTE) {
740 		phdr->p_paddr	= __pa(fw_dump.cpu_notes_buf_vaddr);
741 		phdr->p_offset	= phdr->p_paddr;
742 		phdr->p_filesz	= fw_dump.cpu_notes_buf_size;
743 		phdr->p_memsz = fw_dump.cpu_notes_buf_size;
744 	}
745 	return;
746 }
747 
fadump_alloc_buffer(unsigned long size)748 static void *fadump_alloc_buffer(unsigned long size)
749 {
750 	unsigned long count, i;
751 	struct page *page;
752 	void *vaddr;
753 
754 	vaddr = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
755 	if (!vaddr)
756 		return NULL;
757 
758 	count = PAGE_ALIGN(size) / PAGE_SIZE;
759 	page = virt_to_page(vaddr);
760 	for (i = 0; i < count; i++)
761 		mark_page_reserved(page + i);
762 	return vaddr;
763 }
764 
fadump_free_buffer(unsigned long vaddr,unsigned long size)765 static void fadump_free_buffer(unsigned long vaddr, unsigned long size)
766 {
767 	free_reserved_area((void *)vaddr, (void *)(vaddr + size), -1, NULL);
768 }
769 
fadump_setup_cpu_notes_buf(u32 num_cpus)770 s32 fadump_setup_cpu_notes_buf(u32 num_cpus)
771 {
772 	/* Allocate buffer to hold cpu crash notes. */
773 	fw_dump.cpu_notes_buf_size = num_cpus * sizeof(note_buf_t);
774 	fw_dump.cpu_notes_buf_size = PAGE_ALIGN(fw_dump.cpu_notes_buf_size);
775 	fw_dump.cpu_notes_buf_vaddr =
776 		(unsigned long)fadump_alloc_buffer(fw_dump.cpu_notes_buf_size);
777 	if (!fw_dump.cpu_notes_buf_vaddr) {
778 		pr_err("Failed to allocate %ld bytes for CPU notes buffer\n",
779 		       fw_dump.cpu_notes_buf_size);
780 		return -ENOMEM;
781 	}
782 
783 	pr_debug("Allocated buffer for cpu notes of size %ld at 0x%lx\n",
784 		 fw_dump.cpu_notes_buf_size,
785 		 fw_dump.cpu_notes_buf_vaddr);
786 	return 0;
787 }
788 
fadump_free_cpu_notes_buf(void)789 void fadump_free_cpu_notes_buf(void)
790 {
791 	if (!fw_dump.cpu_notes_buf_vaddr)
792 		return;
793 
794 	fadump_free_buffer(fw_dump.cpu_notes_buf_vaddr,
795 			   fw_dump.cpu_notes_buf_size);
796 	fw_dump.cpu_notes_buf_vaddr = 0;
797 	fw_dump.cpu_notes_buf_size = 0;
798 }
799 
fadump_free_mem_ranges(struct fadump_mrange_info * mrange_info)800 static void fadump_free_mem_ranges(struct fadump_mrange_info *mrange_info)
801 {
802 	if (mrange_info->is_static) {
803 		mrange_info->mem_range_cnt = 0;
804 		return;
805 	}
806 
807 	kfree(mrange_info->mem_ranges);
808 	memset((void *)((u64)mrange_info + RNG_NAME_SZ), 0,
809 	       (sizeof(struct fadump_mrange_info) - RNG_NAME_SZ));
810 }
811 
812 /*
813  * Allocate or reallocate mem_ranges array in incremental units
814  * of PAGE_SIZE.
815  */
fadump_alloc_mem_ranges(struct fadump_mrange_info * mrange_info)816 static int fadump_alloc_mem_ranges(struct fadump_mrange_info *mrange_info)
817 {
818 	struct fadump_memory_range *new_array;
819 	u64 new_size;
820 
821 	new_size = mrange_info->mem_ranges_sz + PAGE_SIZE;
822 	pr_debug("Allocating %llu bytes of memory for %s memory ranges\n",
823 		 new_size, mrange_info->name);
824 
825 	new_array = krealloc(mrange_info->mem_ranges, new_size, GFP_KERNEL);
826 	if (new_array == NULL) {
827 		pr_err("Insufficient memory for setting up %s memory ranges\n",
828 		       mrange_info->name);
829 		fadump_free_mem_ranges(mrange_info);
830 		return -ENOMEM;
831 	}
832 
833 	mrange_info->mem_ranges = new_array;
834 	mrange_info->mem_ranges_sz = new_size;
835 	mrange_info->max_mem_ranges = (new_size /
836 				       sizeof(struct fadump_memory_range));
837 	return 0;
838 }
fadump_add_mem_range(struct fadump_mrange_info * mrange_info,u64 base,u64 end)839 static inline int fadump_add_mem_range(struct fadump_mrange_info *mrange_info,
840 				       u64 base, u64 end)
841 {
842 	struct fadump_memory_range *mem_ranges = mrange_info->mem_ranges;
843 	bool is_adjacent = false;
844 	u64 start, size;
845 
846 	if (base == end)
847 		return 0;
848 
849 	/*
850 	 * Fold adjacent memory ranges to bring down the memory ranges/
851 	 * PT_LOAD segments count.
852 	 */
853 	if (mrange_info->mem_range_cnt) {
854 		start = mem_ranges[mrange_info->mem_range_cnt - 1].base;
855 		size  = mem_ranges[mrange_info->mem_range_cnt - 1].size;
856 
857 		/*
858 		 * Boot memory area needs separate PT_LOAD segment(s) as it
859 		 * is moved to a different location at the time of crash.
860 		 * So, fold only if the region is not boot memory area.
861 		 */
862 		if ((start + size) == base && start >= fw_dump.boot_mem_top)
863 			is_adjacent = true;
864 	}
865 	if (!is_adjacent) {
866 		/* resize the array on reaching the limit */
867 		if (mrange_info->mem_range_cnt == mrange_info->max_mem_ranges) {
868 			int ret;
869 
870 			if (mrange_info->is_static) {
871 				pr_err("Reached array size limit for %s memory ranges\n",
872 				       mrange_info->name);
873 				return -ENOSPC;
874 			}
875 
876 			ret = fadump_alloc_mem_ranges(mrange_info);
877 			if (ret)
878 				return ret;
879 
880 			/* Update to the new resized array */
881 			mem_ranges = mrange_info->mem_ranges;
882 		}
883 
884 		start = base;
885 		mem_ranges[mrange_info->mem_range_cnt].base = start;
886 		mrange_info->mem_range_cnt++;
887 	}
888 
889 	mem_ranges[mrange_info->mem_range_cnt - 1].size = (end - start);
890 	pr_debug("%s_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n",
891 		 mrange_info->name, (mrange_info->mem_range_cnt - 1),
892 		 start, end - 1, (end - start));
893 	return 0;
894 }
895 
fadump_exclude_reserved_area(u64 start,u64 end)896 static int fadump_exclude_reserved_area(u64 start, u64 end)
897 {
898 	u64 ra_start, ra_end;
899 	int ret = 0;
900 
901 	ra_start = fw_dump.reserve_dump_area_start;
902 	ra_end = ra_start + fw_dump.reserve_dump_area_size;
903 
904 	if ((ra_start < end) && (ra_end > start)) {
905 		if ((start < ra_start) && (end > ra_end)) {
906 			ret = fadump_add_mem_range(&crash_mrange_info,
907 						   start, ra_start);
908 			if (ret)
909 				return ret;
910 
911 			ret = fadump_add_mem_range(&crash_mrange_info,
912 						   ra_end, end);
913 		} else if (start < ra_start) {
914 			ret = fadump_add_mem_range(&crash_mrange_info,
915 						   start, ra_start);
916 		} else if (ra_end < end) {
917 			ret = fadump_add_mem_range(&crash_mrange_info,
918 						   ra_end, end);
919 		}
920 	} else
921 		ret = fadump_add_mem_range(&crash_mrange_info, start, end);
922 
923 	return ret;
924 }
925 
fadump_init_elfcore_header(char * bufp)926 static int fadump_init_elfcore_header(char *bufp)
927 {
928 	struct elfhdr *elf;
929 
930 	elf = (struct elfhdr *) bufp;
931 	bufp += sizeof(struct elfhdr);
932 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
933 	elf->e_ident[EI_CLASS] = ELF_CLASS;
934 	elf->e_ident[EI_DATA] = ELF_DATA;
935 	elf->e_ident[EI_VERSION] = EV_CURRENT;
936 	elf->e_ident[EI_OSABI] = ELF_OSABI;
937 	memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
938 	elf->e_type = ET_CORE;
939 	elf->e_machine = ELF_ARCH;
940 	elf->e_version = EV_CURRENT;
941 	elf->e_entry = 0;
942 	elf->e_phoff = sizeof(struct elfhdr);
943 	elf->e_shoff = 0;
944 #if defined(_CALL_ELF)
945 	elf->e_flags = _CALL_ELF;
946 #else
947 	elf->e_flags = 0;
948 #endif
949 	elf->e_ehsize = sizeof(struct elfhdr);
950 	elf->e_phentsize = sizeof(struct elf_phdr);
951 	elf->e_phnum = 0;
952 	elf->e_shentsize = 0;
953 	elf->e_shnum = 0;
954 	elf->e_shstrndx = 0;
955 
956 	return 0;
957 }
958 
959 /*
960  * Traverse through memblock structure and setup crash memory ranges. These
961  * ranges will be used create PT_LOAD program headers in elfcore header.
962  */
fadump_setup_crash_memory_ranges(void)963 static int fadump_setup_crash_memory_ranges(void)
964 {
965 	struct memblock_region *reg;
966 	u64 start, end;
967 	int i, ret;
968 
969 	pr_debug("Setup crash memory ranges.\n");
970 	crash_mrange_info.mem_range_cnt = 0;
971 
972 	/*
973 	 * Boot memory region(s) registered with firmware are moved to
974 	 * different location at the time of crash. Create separate program
975 	 * header(s) for this memory chunk(s) with the correct offset.
976 	 */
977 	for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
978 		start = fw_dump.boot_mem_addr[i];
979 		end = start + fw_dump.boot_mem_sz[i];
980 		ret = fadump_add_mem_range(&crash_mrange_info, start, end);
981 		if (ret)
982 			return ret;
983 	}
984 
985 	for_each_memblock(memory, reg) {
986 		start = (u64)reg->base;
987 		end = start + (u64)reg->size;
988 
989 		/*
990 		 * skip the memory chunk that is already added
991 		 * (0 through boot_memory_top).
992 		 */
993 		if (start < fw_dump.boot_mem_top) {
994 			if (end > fw_dump.boot_mem_top)
995 				start = fw_dump.boot_mem_top;
996 			else
997 				continue;
998 		}
999 
1000 		/* add this range excluding the reserved dump area. */
1001 		ret = fadump_exclude_reserved_area(start, end);
1002 		if (ret)
1003 			return ret;
1004 	}
1005 
1006 	return 0;
1007 }
1008 
1009 /*
1010  * If the given physical address falls within the boot memory region then
1011  * return the relocated address that points to the dump region reserved
1012  * for saving initial boot memory contents.
1013  */
fadump_relocate(unsigned long paddr)1014 static inline unsigned long fadump_relocate(unsigned long paddr)
1015 {
1016 	unsigned long raddr, rstart, rend, rlast, hole_size;
1017 	int i;
1018 
1019 	hole_size = 0;
1020 	rlast = 0;
1021 	raddr = paddr;
1022 	for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
1023 		rstart = fw_dump.boot_mem_addr[i];
1024 		rend = rstart + fw_dump.boot_mem_sz[i];
1025 		hole_size += (rstart - rlast);
1026 
1027 		if (paddr >= rstart && paddr < rend) {
1028 			raddr += fw_dump.boot_mem_dest_addr - hole_size;
1029 			break;
1030 		}
1031 
1032 		rlast = rend;
1033 	}
1034 
1035 	pr_debug("vmcoreinfo: paddr = 0x%lx, raddr = 0x%lx\n", paddr, raddr);
1036 	return raddr;
1037 }
1038 
fadump_create_elfcore_headers(char * bufp)1039 static int fadump_create_elfcore_headers(char *bufp)
1040 {
1041 	unsigned long long raddr, offset;
1042 	struct elf_phdr *phdr;
1043 	struct elfhdr *elf;
1044 	int i, j;
1045 
1046 	fadump_init_elfcore_header(bufp);
1047 	elf = (struct elfhdr *)bufp;
1048 	bufp += sizeof(struct elfhdr);
1049 
1050 	/*
1051 	 * setup ELF PT_NOTE, place holder for cpu notes info. The notes info
1052 	 * will be populated during second kernel boot after crash. Hence
1053 	 * this PT_NOTE will always be the first elf note.
1054 	 *
1055 	 * NOTE: Any new ELF note addition should be placed after this note.
1056 	 */
1057 	phdr = (struct elf_phdr *)bufp;
1058 	bufp += sizeof(struct elf_phdr);
1059 	phdr->p_type = PT_NOTE;
1060 	phdr->p_flags = 0;
1061 	phdr->p_vaddr = 0;
1062 	phdr->p_align = 0;
1063 
1064 	phdr->p_offset = 0;
1065 	phdr->p_paddr = 0;
1066 	phdr->p_filesz = 0;
1067 	phdr->p_memsz = 0;
1068 
1069 	(elf->e_phnum)++;
1070 
1071 	/* setup ELF PT_NOTE for vmcoreinfo */
1072 	phdr = (struct elf_phdr *)bufp;
1073 	bufp += sizeof(struct elf_phdr);
1074 	phdr->p_type	= PT_NOTE;
1075 	phdr->p_flags	= 0;
1076 	phdr->p_vaddr	= 0;
1077 	phdr->p_align	= 0;
1078 
1079 	phdr->p_paddr	= fadump_relocate(paddr_vmcoreinfo_note());
1080 	phdr->p_offset	= phdr->p_paddr;
1081 	phdr->p_memsz	= phdr->p_filesz = VMCOREINFO_NOTE_SIZE;
1082 
1083 	/* Increment number of program headers. */
1084 	(elf->e_phnum)++;
1085 
1086 	/* setup PT_LOAD sections. */
1087 	j = 0;
1088 	offset = 0;
1089 	raddr = fw_dump.boot_mem_addr[0];
1090 	for (i = 0; i < crash_mrange_info.mem_range_cnt; i++) {
1091 		u64 mbase, msize;
1092 
1093 		mbase = crash_mrange_info.mem_ranges[i].base;
1094 		msize = crash_mrange_info.mem_ranges[i].size;
1095 		if (!msize)
1096 			continue;
1097 
1098 		phdr = (struct elf_phdr *)bufp;
1099 		bufp += sizeof(struct elf_phdr);
1100 		phdr->p_type	= PT_LOAD;
1101 		phdr->p_flags	= PF_R|PF_W|PF_X;
1102 		phdr->p_offset	= mbase;
1103 
1104 		if (mbase == raddr) {
1105 			/*
1106 			 * The entire real memory region will be moved by
1107 			 * firmware to the specified destination_address.
1108 			 * Hence set the correct offset.
1109 			 */
1110 			phdr->p_offset = fw_dump.boot_mem_dest_addr + offset;
1111 			if (j < (fw_dump.boot_mem_regs_cnt - 1)) {
1112 				offset += fw_dump.boot_mem_sz[j];
1113 				raddr = fw_dump.boot_mem_addr[++j];
1114 			}
1115 		}
1116 
1117 		phdr->p_paddr = mbase;
1118 		phdr->p_vaddr = (unsigned long)__va(mbase);
1119 		phdr->p_filesz = msize;
1120 		phdr->p_memsz = msize;
1121 		phdr->p_align = 0;
1122 
1123 		/* Increment number of program headers. */
1124 		(elf->e_phnum)++;
1125 	}
1126 	return 0;
1127 }
1128 
init_fadump_header(unsigned long addr)1129 static unsigned long init_fadump_header(unsigned long addr)
1130 {
1131 	struct fadump_crash_info_header *fdh;
1132 
1133 	if (!addr)
1134 		return 0;
1135 
1136 	fdh = __va(addr);
1137 	addr += sizeof(struct fadump_crash_info_header);
1138 
1139 	memset(fdh, 0, sizeof(struct fadump_crash_info_header));
1140 	fdh->magic_number = FADUMP_CRASH_INFO_MAGIC;
1141 	fdh->elfcorehdr_addr = addr;
1142 	/* We will set the crashing cpu id in crash_fadump() during crash. */
1143 	fdh->crashing_cpu = FADUMP_CPU_UNKNOWN;
1144 
1145 	return addr;
1146 }
1147 
register_fadump(void)1148 static int register_fadump(void)
1149 {
1150 	unsigned long addr;
1151 	void *vaddr;
1152 	int ret;
1153 
1154 	/*
1155 	 * If no memory is reserved then we can not register for firmware-
1156 	 * assisted dump.
1157 	 */
1158 	if (!fw_dump.reserve_dump_area_size)
1159 		return -ENODEV;
1160 
1161 	ret = fadump_setup_crash_memory_ranges();
1162 	if (ret)
1163 		return ret;
1164 
1165 	addr = fw_dump.fadumphdr_addr;
1166 
1167 	/* Initialize fadump crash info header. */
1168 	addr = init_fadump_header(addr);
1169 	vaddr = __va(addr);
1170 
1171 	pr_debug("Creating ELF core headers at %#016lx\n", addr);
1172 	fadump_create_elfcore_headers(vaddr);
1173 
1174 	/* register the future kernel dump with firmware. */
1175 	pr_debug("Registering for firmware-assisted kernel dump...\n");
1176 	return fw_dump.ops->fadump_register(&fw_dump);
1177 }
1178 
fadump_cleanup(void)1179 void fadump_cleanup(void)
1180 {
1181 	if (!fw_dump.fadump_supported)
1182 		return;
1183 
1184 	/* Invalidate the registration only if dump is active. */
1185 	if (fw_dump.dump_active) {
1186 		pr_debug("Invalidating firmware-assisted dump registration\n");
1187 		fw_dump.ops->fadump_invalidate(&fw_dump);
1188 	} else if (fw_dump.dump_registered) {
1189 		/* Un-register Firmware-assisted dump if it was registered. */
1190 		fw_dump.ops->fadump_unregister(&fw_dump);
1191 		fadump_free_mem_ranges(&crash_mrange_info);
1192 	}
1193 
1194 	if (fw_dump.ops->fadump_cleanup)
1195 		fw_dump.ops->fadump_cleanup(&fw_dump);
1196 }
1197 
fadump_free_reserved_memory(unsigned long start_pfn,unsigned long end_pfn)1198 static void fadump_free_reserved_memory(unsigned long start_pfn,
1199 					unsigned long end_pfn)
1200 {
1201 	unsigned long pfn;
1202 	unsigned long time_limit = jiffies + HZ;
1203 
1204 	pr_info("freeing reserved memory (0x%llx - 0x%llx)\n",
1205 		PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
1206 
1207 	for (pfn = start_pfn; pfn < end_pfn; pfn++) {
1208 		free_reserved_page(pfn_to_page(pfn));
1209 
1210 		if (time_after(jiffies, time_limit)) {
1211 			cond_resched();
1212 			time_limit = jiffies + HZ;
1213 		}
1214 	}
1215 }
1216 
1217 /*
1218  * Skip memory holes and free memory that was actually reserved.
1219  */
fadump_release_reserved_area(u64 start,u64 end)1220 static void fadump_release_reserved_area(u64 start, u64 end)
1221 {
1222 	u64 tstart, tend, spfn, epfn;
1223 	struct memblock_region *reg;
1224 
1225 	spfn = PHYS_PFN(start);
1226 	epfn = PHYS_PFN(end);
1227 	for_each_memblock(memory, reg) {
1228 		tstart = max_t(u64, spfn, memblock_region_memory_base_pfn(reg));
1229 		tend   = min_t(u64, epfn, memblock_region_memory_end_pfn(reg));
1230 		if (tstart < tend) {
1231 			fadump_free_reserved_memory(tstart, tend);
1232 
1233 			if (tend == epfn)
1234 				break;
1235 
1236 			spfn = tend;
1237 		}
1238 	}
1239 }
1240 
1241 /*
1242  * Sort the mem ranges in-place and merge adjacent ranges
1243  * to minimize the memory ranges count.
1244  */
sort_and_merge_mem_ranges(struct fadump_mrange_info * mrange_info)1245 static void sort_and_merge_mem_ranges(struct fadump_mrange_info *mrange_info)
1246 {
1247 	struct fadump_memory_range *mem_ranges;
1248 	struct fadump_memory_range tmp_range;
1249 	u64 base, size;
1250 	int i, j, idx;
1251 
1252 	if (!reserved_mrange_info.mem_range_cnt)
1253 		return;
1254 
1255 	/* Sort the memory ranges */
1256 	mem_ranges = mrange_info->mem_ranges;
1257 	for (i = 0; i < mrange_info->mem_range_cnt; i++) {
1258 		idx = i;
1259 		for (j = (i + 1); j < mrange_info->mem_range_cnt; j++) {
1260 			if (mem_ranges[idx].base > mem_ranges[j].base)
1261 				idx = j;
1262 		}
1263 		if (idx != i) {
1264 			tmp_range = mem_ranges[idx];
1265 			mem_ranges[idx] = mem_ranges[i];
1266 			mem_ranges[i] = tmp_range;
1267 		}
1268 	}
1269 
1270 	/* Merge adjacent reserved ranges */
1271 	idx = 0;
1272 	for (i = 1; i < mrange_info->mem_range_cnt; i++) {
1273 		base = mem_ranges[i-1].base;
1274 		size = mem_ranges[i-1].size;
1275 		if (mem_ranges[i].base == (base + size))
1276 			mem_ranges[idx].size += mem_ranges[i].size;
1277 		else {
1278 			idx++;
1279 			if (i == idx)
1280 				continue;
1281 
1282 			mem_ranges[idx] = mem_ranges[i];
1283 		}
1284 	}
1285 	mrange_info->mem_range_cnt = idx + 1;
1286 }
1287 
1288 /*
1289  * Scan reserved-ranges to consider them while reserving/releasing
1290  * memory for FADump.
1291  */
early_init_dt_scan_reserved_ranges(unsigned long node)1292 static void __init early_init_dt_scan_reserved_ranges(unsigned long node)
1293 {
1294 	const __be32 *prop;
1295 	int len, ret = -1;
1296 	unsigned long i;
1297 
1298 	/* reserved-ranges already scanned */
1299 	if (reserved_mrange_info.mem_range_cnt != 0)
1300 		return;
1301 
1302 	prop = of_get_flat_dt_prop(node, "reserved-ranges", &len);
1303 	if (!prop)
1304 		return;
1305 
1306 	/*
1307 	 * Each reserved range is an (address,size) pair, 2 cells each,
1308 	 * totalling 4 cells per range.
1309 	 */
1310 	for (i = 0; i < len / (sizeof(*prop) * 4); i++) {
1311 		u64 base, size;
1312 
1313 		base = of_read_number(prop + (i * 4) + 0, 2);
1314 		size = of_read_number(prop + (i * 4) + 2, 2);
1315 
1316 		if (size) {
1317 			ret = fadump_add_mem_range(&reserved_mrange_info,
1318 						   base, base + size);
1319 			if (ret < 0) {
1320 				pr_warn("some reserved ranges are ignored!\n");
1321 				break;
1322 			}
1323 		}
1324 	}
1325 
1326 	/* Compact reserved ranges */
1327 	sort_and_merge_mem_ranges(&reserved_mrange_info);
1328 }
1329 
1330 /*
1331  * Release the memory that was reserved during early boot to preserve the
1332  * crash'ed kernel's memory contents except reserved dump area (permanent
1333  * reservation) and reserved ranges used by F/W. The released memory will
1334  * be available for general use.
1335  */
fadump_release_memory(u64 begin,u64 end)1336 static void fadump_release_memory(u64 begin, u64 end)
1337 {
1338 	u64 ra_start, ra_end, tstart;
1339 	int i, ret;
1340 
1341 	ra_start = fw_dump.reserve_dump_area_start;
1342 	ra_end = ra_start + fw_dump.reserve_dump_area_size;
1343 
1344 	/*
1345 	 * If reserved ranges array limit is hit, overwrite the last reserved
1346 	 * memory range with reserved dump area to ensure it is excluded from
1347 	 * the memory being released (reused for next FADump registration).
1348 	 */
1349 	if (reserved_mrange_info.mem_range_cnt ==
1350 	    reserved_mrange_info.max_mem_ranges)
1351 		reserved_mrange_info.mem_range_cnt--;
1352 
1353 	ret = fadump_add_mem_range(&reserved_mrange_info, ra_start, ra_end);
1354 	if (ret != 0)
1355 		return;
1356 
1357 	/* Get the reserved ranges list in order first. */
1358 	sort_and_merge_mem_ranges(&reserved_mrange_info);
1359 
1360 	/* Exclude reserved ranges and release remaining memory */
1361 	tstart = begin;
1362 	for (i = 0; i < reserved_mrange_info.mem_range_cnt; i++) {
1363 		ra_start = reserved_mrange_info.mem_ranges[i].base;
1364 		ra_end = ra_start + reserved_mrange_info.mem_ranges[i].size;
1365 
1366 		if (tstart >= ra_end)
1367 			continue;
1368 
1369 		if (tstart < ra_start)
1370 			fadump_release_reserved_area(tstart, ra_start);
1371 		tstart = ra_end;
1372 	}
1373 
1374 	if (tstart < end)
1375 		fadump_release_reserved_area(tstart, end);
1376 }
1377 
fadump_invalidate_release_mem(void)1378 static void fadump_invalidate_release_mem(void)
1379 {
1380 	mutex_lock(&fadump_mutex);
1381 	if (!fw_dump.dump_active) {
1382 		mutex_unlock(&fadump_mutex);
1383 		return;
1384 	}
1385 
1386 	fadump_cleanup();
1387 	mutex_unlock(&fadump_mutex);
1388 
1389 	fadump_release_memory(fw_dump.boot_mem_top, memblock_end_of_DRAM());
1390 	fadump_free_cpu_notes_buf();
1391 
1392 	/*
1393 	 * Setup kernel metadata and initialize the kernel dump
1394 	 * memory structure for FADump re-registration.
1395 	 */
1396 	if (fw_dump.ops->fadump_setup_metadata &&
1397 	    (fw_dump.ops->fadump_setup_metadata(&fw_dump) < 0))
1398 		pr_warn("Failed to setup kernel metadata!\n");
1399 	fw_dump.ops->fadump_init_mem_struct(&fw_dump);
1400 }
1401 
fadump_release_memory_store(struct kobject * kobj,struct kobj_attribute * attr,const char * buf,size_t count)1402 static ssize_t fadump_release_memory_store(struct kobject *kobj,
1403 					struct kobj_attribute *attr,
1404 					const char *buf, size_t count)
1405 {
1406 	int input = -1;
1407 
1408 	if (!fw_dump.dump_active)
1409 		return -EPERM;
1410 
1411 	if (kstrtoint(buf, 0, &input))
1412 		return -EINVAL;
1413 
1414 	if (input == 1) {
1415 		/*
1416 		 * Take away the '/proc/vmcore'. We are releasing the dump
1417 		 * memory, hence it will not be valid anymore.
1418 		 */
1419 #ifdef CONFIG_PROC_VMCORE
1420 		vmcore_cleanup();
1421 #endif
1422 		fadump_invalidate_release_mem();
1423 
1424 	} else
1425 		return -EINVAL;
1426 	return count;
1427 }
1428 
fadump_enabled_show(struct kobject * kobj,struct kobj_attribute * attr,char * buf)1429 static ssize_t fadump_enabled_show(struct kobject *kobj,
1430 					struct kobj_attribute *attr,
1431 					char *buf)
1432 {
1433 	return sprintf(buf, "%d\n", fw_dump.fadump_enabled);
1434 }
1435 
fadump_register_show(struct kobject * kobj,struct kobj_attribute * attr,char * buf)1436 static ssize_t fadump_register_show(struct kobject *kobj,
1437 					struct kobj_attribute *attr,
1438 					char *buf)
1439 {
1440 	return sprintf(buf, "%d\n", fw_dump.dump_registered);
1441 }
1442 
fadump_register_store(struct kobject * kobj,struct kobj_attribute * attr,const char * buf,size_t count)1443 static ssize_t fadump_register_store(struct kobject *kobj,
1444 					struct kobj_attribute *attr,
1445 					const char *buf, size_t count)
1446 {
1447 	int ret = 0;
1448 	int input = -1;
1449 
1450 	if (!fw_dump.fadump_enabled || fw_dump.dump_active)
1451 		return -EPERM;
1452 
1453 	if (kstrtoint(buf, 0, &input))
1454 		return -EINVAL;
1455 
1456 	mutex_lock(&fadump_mutex);
1457 
1458 	switch (input) {
1459 	case 0:
1460 		if (fw_dump.dump_registered == 0) {
1461 			goto unlock_out;
1462 		}
1463 
1464 		/* Un-register Firmware-assisted dump */
1465 		pr_debug("Un-register firmware-assisted dump\n");
1466 		fw_dump.ops->fadump_unregister(&fw_dump);
1467 		break;
1468 	case 1:
1469 		if (fw_dump.dump_registered == 1) {
1470 			/* Un-register Firmware-assisted dump */
1471 			fw_dump.ops->fadump_unregister(&fw_dump);
1472 		}
1473 		/* Register Firmware-assisted dump */
1474 		ret = register_fadump();
1475 		break;
1476 	default:
1477 		ret = -EINVAL;
1478 		break;
1479 	}
1480 
1481 unlock_out:
1482 	mutex_unlock(&fadump_mutex);
1483 	return ret < 0 ? ret : count;
1484 }
1485 
fadump_region_show(struct seq_file * m,void * private)1486 static int fadump_region_show(struct seq_file *m, void *private)
1487 {
1488 	if (!fw_dump.fadump_enabled)
1489 		return 0;
1490 
1491 	mutex_lock(&fadump_mutex);
1492 	fw_dump.ops->fadump_region_show(&fw_dump, m);
1493 	mutex_unlock(&fadump_mutex);
1494 	return 0;
1495 }
1496 
1497 static struct kobj_attribute fadump_release_attr = __ATTR(fadump_release_mem,
1498 						0200, NULL,
1499 						fadump_release_memory_store);
1500 static struct kobj_attribute fadump_attr = __ATTR(fadump_enabled,
1501 						0444, fadump_enabled_show,
1502 						NULL);
1503 static struct kobj_attribute fadump_register_attr = __ATTR(fadump_registered,
1504 						0644, fadump_register_show,
1505 						fadump_register_store);
1506 
1507 DEFINE_SHOW_ATTRIBUTE(fadump_region);
1508 
fadump_init_files(void)1509 static void fadump_init_files(void)
1510 {
1511 	struct dentry *debugfs_file;
1512 	int rc = 0;
1513 
1514 	rc = sysfs_create_file(kernel_kobj, &fadump_attr.attr);
1515 	if (rc)
1516 		printk(KERN_ERR "fadump: unable to create sysfs file"
1517 			" fadump_enabled (%d)\n", rc);
1518 
1519 	rc = sysfs_create_file(kernel_kobj, &fadump_register_attr.attr);
1520 	if (rc)
1521 		printk(KERN_ERR "fadump: unable to create sysfs file"
1522 			" fadump_registered (%d)\n", rc);
1523 
1524 	debugfs_file = debugfs_create_file("fadump_region", 0444,
1525 					powerpc_debugfs_root, NULL,
1526 					&fadump_region_fops);
1527 	if (!debugfs_file)
1528 		printk(KERN_ERR "fadump: unable to create debugfs file"
1529 				" fadump_region\n");
1530 
1531 	if (fw_dump.dump_active) {
1532 		rc = sysfs_create_file(kernel_kobj, &fadump_release_attr.attr);
1533 		if (rc)
1534 			printk(KERN_ERR "fadump: unable to create sysfs file"
1535 				" fadump_release_mem (%d)\n", rc);
1536 	}
1537 	return;
1538 }
1539 
1540 /*
1541  * Prepare for firmware-assisted dump.
1542  */
setup_fadump(void)1543 int __init setup_fadump(void)
1544 {
1545 	if (!fw_dump.fadump_enabled)
1546 		return 0;
1547 
1548 	if (!fw_dump.fadump_supported) {
1549 		printk(KERN_ERR "Firmware-assisted dump is not supported on"
1550 			" this hardware\n");
1551 		return 0;
1552 	}
1553 
1554 	fadump_show_config();
1555 	/*
1556 	 * If dump data is available then see if it is valid and prepare for
1557 	 * saving it to the disk.
1558 	 */
1559 	if (fw_dump.dump_active) {
1560 		/*
1561 		 * if dump process fails then invalidate the registration
1562 		 * and release memory before proceeding for re-registration.
1563 		 */
1564 		if (fw_dump.ops->fadump_process(&fw_dump) < 0)
1565 			fadump_invalidate_release_mem();
1566 	}
1567 	/* Initialize the kernel dump memory structure for FAD registration. */
1568 	else if (fw_dump.reserve_dump_area_size)
1569 		fw_dump.ops->fadump_init_mem_struct(&fw_dump);
1570 
1571 	fadump_init_files();
1572 
1573 	return 1;
1574 }
1575 subsys_initcall(setup_fadump);
1576 #else /* !CONFIG_PRESERVE_FA_DUMP */
1577 
1578 /* Scan the Firmware Assisted dump configuration details. */
early_init_dt_scan_fw_dump(unsigned long node,const char * uname,int depth,void * data)1579 int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
1580 				      int depth, void *data)
1581 {
1582 	if ((depth != 1) || (strcmp(uname, "ibm,opal") != 0))
1583 		return 0;
1584 
1585 	opal_fadump_dt_scan(&fw_dump, node);
1586 	return 1;
1587 }
1588 
1589 /*
1590  * When dump is active but PRESERVE_FA_DUMP is enabled on the kernel,
1591  * preserve crash data. The subsequent memory preserving kernel boot
1592  * is likely to process this crash data.
1593  */
fadump_reserve_mem(void)1594 int __init fadump_reserve_mem(void)
1595 {
1596 	if (fw_dump.dump_active) {
1597 		/*
1598 		 * If last boot has crashed then reserve all the memory
1599 		 * above boot memory to preserve crash data.
1600 		 */
1601 		pr_info("Preserving crash data for processing in next boot.\n");
1602 		fadump_reserve_crash_area(fw_dump.boot_mem_top);
1603 	} else
1604 		pr_debug("FADump-aware kernel..\n");
1605 
1606 	return 1;
1607 }
1608 #endif /* CONFIG_PRESERVE_FA_DUMP */
1609 
1610 /* Preserve everything above the base address */
fadump_reserve_crash_area(u64 base)1611 static void __init fadump_reserve_crash_area(u64 base)
1612 {
1613 	struct memblock_region *reg;
1614 	u64 mstart, msize;
1615 
1616 	for_each_memblock(memory, reg) {
1617 		mstart = reg->base;
1618 		msize  = reg->size;
1619 
1620 		if ((mstart + msize) < base)
1621 			continue;
1622 
1623 		if (mstart < base) {
1624 			msize -= (base - mstart);
1625 			mstart = base;
1626 		}
1627 
1628 		pr_info("Reserving %lluMB of memory at %#016llx for preserving crash data",
1629 			(msize >> 20), mstart);
1630 		memblock_reserve(mstart, msize);
1631 	}
1632 }
1633 
arch_reserved_kernel_pages(void)1634 unsigned long __init arch_reserved_kernel_pages(void)
1635 {
1636 	return memblock_reserved_size() / PAGE_SIZE;
1637 }
1638