• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  S390 version
4  *    Copyright IBM Corp. 1999, 2012
5  *    Author(s): Hartmut Penner (hp@de.ibm.com),
6  *               Martin Schwidefsky (schwidefsky@de.ibm.com)
7  *
8  *  Derived from "arch/i386/kernel/setup.c"
9  *    Copyright (C) 1995, Linus Torvalds
10  */
11 
12 /*
13  * This file handles the architecture-dependent parts of initialization
14  */
15 
16 #define KMSG_COMPONENT "setup"
17 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
18 
19 #include <linux/errno.h>
20 #include <linux/export.h>
21 #include <linux/sched.h>
22 #include <linux/sched/task.h>
23 #include <linux/cpu.h>
24 #include <linux/kernel.h>
25 #include <linux/memblock.h>
26 #include <linux/mm.h>
27 #include <linux/stddef.h>
28 #include <linux/unistd.h>
29 #include <linux/ptrace.h>
30 #include <linux/random.h>
31 #include <linux/user.h>
32 #include <linux/tty.h>
33 #include <linux/ioport.h>
34 #include <linux/delay.h>
35 #include <linux/init.h>
36 #include <linux/initrd.h>
37 #include <linux/bootmem.h>
38 #include <linux/root_dev.h>
39 #include <linux/console.h>
40 #include <linux/kernel_stat.h>
41 #include <linux/dma-contiguous.h>
42 #include <linux/device.h>
43 #include <linux/notifier.h>
44 #include <linux/pfn.h>
45 #include <linux/ctype.h>
46 #include <linux/reboot.h>
47 #include <linux/topology.h>
48 #include <linux/kexec.h>
49 #include <linux/crash_dump.h>
50 #include <linux/memory.h>
51 #include <linux/compat.h>
52 
53 #include <asm/ipl.h>
54 #include <asm/facility.h>
55 #include <asm/smp.h>
56 #include <asm/mmu_context.h>
57 #include <asm/cpcmd.h>
58 #include <asm/lowcore.h>
59 #include <asm/nmi.h>
60 #include <asm/irq.h>
61 #include <asm/page.h>
62 #include <asm/ptrace.h>
63 #include <asm/sections.h>
64 #include <asm/ebcdic.h>
65 #include <asm/diag.h>
66 #include <asm/os_info.h>
67 #include <asm/sclp.h>
68 #include <asm/sysinfo.h>
69 #include <asm/numa.h>
70 #include <asm/alternative.h>
71 #include <asm/nospec-branch.h>
72 #include "entry.h"
73 
74 /*
75  * Machine setup..
76  */
77 unsigned int console_mode = 0;
78 EXPORT_SYMBOL(console_mode);
79 
80 unsigned int console_devno = -1;
81 EXPORT_SYMBOL(console_devno);
82 
83 unsigned int console_irq = -1;
84 EXPORT_SYMBOL(console_irq);
85 
86 unsigned long elf_hwcap __read_mostly = 0;
87 char elf_platform[ELF_PLATFORM_SIZE];
88 
89 unsigned long int_hwcap = 0;
90 
91 int __initdata memory_end_set;
92 unsigned long __initdata memory_end;
93 unsigned long __initdata max_physmem_end;
94 
95 unsigned long VMALLOC_START;
96 EXPORT_SYMBOL(VMALLOC_START);
97 
98 unsigned long VMALLOC_END;
99 EXPORT_SYMBOL(VMALLOC_END);
100 
101 struct page *vmemmap;
102 EXPORT_SYMBOL(vmemmap);
103 
104 unsigned long MODULES_VADDR;
105 unsigned long MODULES_END;
106 
107 /* An array with a pointer to the lowcore of every CPU. */
108 struct lowcore *lowcore_ptr[NR_CPUS];
109 EXPORT_SYMBOL(lowcore_ptr);
110 
111 /*
112  * This is set up by the setup-routine at boot-time
113  * for S390 need to find out, what we have to setup
114  * using address 0x10400 ...
115  */
116 
117 #include <asm/setup.h>
118 
119 /*
120  * condev= and conmode= setup parameter.
121  */
122 
condev_setup(char * str)123 static int __init condev_setup(char *str)
124 {
125 	int vdev;
126 
127 	vdev = simple_strtoul(str, &str, 0);
128 	if (vdev >= 0 && vdev < 65536) {
129 		console_devno = vdev;
130 		console_irq = -1;
131 	}
132 	return 1;
133 }
134 
135 __setup("condev=", condev_setup);
136 
set_preferred_console(void)137 static void __init set_preferred_console(void)
138 {
139 	if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP)
140 		add_preferred_console("ttyS", 0, NULL);
141 	else if (CONSOLE_IS_3270)
142 		add_preferred_console("tty3270", 0, NULL);
143 	else if (CONSOLE_IS_VT220)
144 		add_preferred_console("ttyS", 1, NULL);
145 	else if (CONSOLE_IS_HVC)
146 		add_preferred_console("hvc", 0, NULL);
147 }
148 
conmode_setup(char * str)149 static int __init conmode_setup(char *str)
150 {
151 #if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
152 	if (strncmp(str, "hwc", 4) == 0 || strncmp(str, "sclp", 5) == 0)
153                 SET_CONSOLE_SCLP;
154 #endif
155 #if defined(CONFIG_TN3215_CONSOLE)
156 	if (strncmp(str, "3215", 5) == 0)
157 		SET_CONSOLE_3215;
158 #endif
159 #if defined(CONFIG_TN3270_CONSOLE)
160 	if (strncmp(str, "3270", 5) == 0)
161 		SET_CONSOLE_3270;
162 #endif
163 	set_preferred_console();
164         return 1;
165 }
166 
167 __setup("conmode=", conmode_setup);
168 
conmode_default(void)169 static void __init conmode_default(void)
170 {
171 	char query_buffer[1024];
172 	char *ptr;
173 
174         if (MACHINE_IS_VM) {
175 		cpcmd("QUERY CONSOLE", query_buffer, 1024, NULL);
176 		console_devno = simple_strtoul(query_buffer + 5, NULL, 16);
177 		ptr = strstr(query_buffer, "SUBCHANNEL =");
178 		console_irq = simple_strtoul(ptr + 13, NULL, 16);
179 		cpcmd("QUERY TERM", query_buffer, 1024, NULL);
180 		ptr = strstr(query_buffer, "CONMODE");
181 		/*
182 		 * Set the conmode to 3215 so that the device recognition
183 		 * will set the cu_type of the console to 3215. If the
184 		 * conmode is 3270 and we don't set it back then both
185 		 * 3215 and the 3270 driver will try to access the console
186 		 * device (3215 as console and 3270 as normal tty).
187 		 */
188 		cpcmd("TERM CONMODE 3215", NULL, 0, NULL);
189 		if (ptr == NULL) {
190 #if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
191 			SET_CONSOLE_SCLP;
192 #endif
193 			return;
194 		}
195 		if (strncmp(ptr + 8, "3270", 4) == 0) {
196 #if defined(CONFIG_TN3270_CONSOLE)
197 			SET_CONSOLE_3270;
198 #elif defined(CONFIG_TN3215_CONSOLE)
199 			SET_CONSOLE_3215;
200 #elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
201 			SET_CONSOLE_SCLP;
202 #endif
203 		} else if (strncmp(ptr + 8, "3215", 4) == 0) {
204 #if defined(CONFIG_TN3215_CONSOLE)
205 			SET_CONSOLE_3215;
206 #elif defined(CONFIG_TN3270_CONSOLE)
207 			SET_CONSOLE_3270;
208 #elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
209 			SET_CONSOLE_SCLP;
210 #endif
211 		}
212 	} else if (MACHINE_IS_KVM) {
213 		if (sclp.has_vt220 && IS_ENABLED(CONFIG_SCLP_VT220_CONSOLE))
214 			SET_CONSOLE_VT220;
215 		else if (sclp.has_linemode && IS_ENABLED(CONFIG_SCLP_CONSOLE))
216 			SET_CONSOLE_SCLP;
217 		else
218 			SET_CONSOLE_HVC;
219 	} else {
220 #if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
221 		SET_CONSOLE_SCLP;
222 #endif
223 	}
224 	if (IS_ENABLED(CONFIG_VT) && IS_ENABLED(CONFIG_DUMMY_CONSOLE))
225 		conswitchp = &dummy_con;
226 }
227 
228 #ifdef CONFIG_CRASH_DUMP
setup_zfcpdump(void)229 static void __init setup_zfcpdump(void)
230 {
231 	if (ipl_info.type != IPL_TYPE_FCP_DUMP)
232 		return;
233 	if (OLDMEM_BASE)
234 		return;
235 	strcat(boot_command_line, " cio_ignore=all,!ipldev,!condev");
236 	console_loglevel = 2;
237 }
238 #else
setup_zfcpdump(void)239 static inline void setup_zfcpdump(void) {}
240 #endif /* CONFIG_CRASH_DUMP */
241 
242  /*
243  * Reboot, halt and power_off stubs. They just call _machine_restart,
244  * _machine_halt or _machine_power_off.
245  */
246 
machine_restart(char * command)247 void machine_restart(char *command)
248 {
249 	if ((!in_interrupt() && !in_atomic()) || oops_in_progress)
250 		/*
251 		 * Only unblank the console if we are called in enabled
252 		 * context or a bust_spinlocks cleared the way for us.
253 		 */
254 		console_unblank();
255 	_machine_restart(command);
256 }
257 
machine_halt(void)258 void machine_halt(void)
259 {
260 	if (!in_interrupt() || oops_in_progress)
261 		/*
262 		 * Only unblank the console if we are called in enabled
263 		 * context or a bust_spinlocks cleared the way for us.
264 		 */
265 		console_unblank();
266 	_machine_halt();
267 }
268 
machine_power_off(void)269 void machine_power_off(void)
270 {
271 	if (!in_interrupt() || oops_in_progress)
272 		/*
273 		 * Only unblank the console if we are called in enabled
274 		 * context or a bust_spinlocks cleared the way for us.
275 		 */
276 		console_unblank();
277 	_machine_power_off();
278 }
279 
280 /*
281  * Dummy power off function.
282  */
283 void (*pm_power_off)(void) = machine_power_off;
284 EXPORT_SYMBOL_GPL(pm_power_off);
285 
early_parse_mem(char * p)286 static int __init early_parse_mem(char *p)
287 {
288 	memory_end = memparse(p, &p);
289 	memory_end &= PAGE_MASK;
290 	memory_end_set = 1;
291 	return 0;
292 }
293 early_param("mem", early_parse_mem);
294 
parse_vmalloc(char * arg)295 static int __init parse_vmalloc(char *arg)
296 {
297 	if (!arg)
298 		return -EINVAL;
299 	VMALLOC_END = (memparse(arg, &arg) + PAGE_SIZE - 1) & PAGE_MASK;
300 	return 0;
301 }
302 early_param("vmalloc", parse_vmalloc);
303 
304 void *restart_stack __section(.data);
305 
setup_lowcore_dat_off(void)306 static void __init setup_lowcore_dat_off(void)
307 {
308 	struct lowcore *lc;
309 
310 	/*
311 	 * Setup lowcore for boot cpu
312 	 */
313 	BUILD_BUG_ON(sizeof(struct lowcore) != LC_PAGES * PAGE_SIZE);
314 	lc = memblock_virt_alloc_low(sizeof(*lc), sizeof(*lc));
315 	lc->restart_psw.mask = PSW_KERNEL_BITS;
316 	lc->restart_psw.addr = (unsigned long) restart_int_handler;
317 	lc->external_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
318 	lc->external_new_psw.addr = (unsigned long) ext_int_handler;
319 	lc->svc_new_psw.mask = PSW_KERNEL_BITS |
320 		PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
321 	lc->svc_new_psw.addr = (unsigned long) system_call;
322 	lc->program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
323 	lc->program_new_psw.addr = (unsigned long) pgm_check_handler;
324 	lc->mcck_new_psw.mask = PSW_KERNEL_BITS;
325 	lc->mcck_new_psw.addr = (unsigned long) mcck_int_handler;
326 	lc->io_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
327 	lc->io_new_psw.addr = (unsigned long) io_int_handler;
328 	lc->clock_comparator = clock_comparator_max;
329 	lc->kernel_stack = ((unsigned long) &init_thread_union)
330 		+ THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
331 	lc->async_stack = (unsigned long)
332 		memblock_virt_alloc(ASYNC_SIZE, ASYNC_SIZE)
333 		+ ASYNC_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
334 	lc->panic_stack = (unsigned long)
335 		memblock_virt_alloc(PAGE_SIZE, PAGE_SIZE)
336 		+ PAGE_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
337 	lc->current_task = (unsigned long)&init_task;
338 	lc->lpp = LPP_MAGIC;
339 	lc->machine_flags = S390_lowcore.machine_flags;
340 	lc->preempt_count = S390_lowcore.preempt_count;
341 	lc->stfl_fac_list = S390_lowcore.stfl_fac_list;
342 	memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
343 	       sizeof(lc->stfle_fac_list));
344 	memcpy(lc->alt_stfle_fac_list, S390_lowcore.alt_stfle_fac_list,
345 	       sizeof(lc->alt_stfle_fac_list));
346 	nmi_alloc_boot_cpu(lc);
347 	vdso_alloc_boot_cpu(lc);
348 	lc->sync_enter_timer = S390_lowcore.sync_enter_timer;
349 	lc->async_enter_timer = S390_lowcore.async_enter_timer;
350 	lc->exit_timer = S390_lowcore.exit_timer;
351 	lc->user_timer = S390_lowcore.user_timer;
352 	lc->system_timer = S390_lowcore.system_timer;
353 	lc->steal_timer = S390_lowcore.steal_timer;
354 	lc->last_update_timer = S390_lowcore.last_update_timer;
355 	lc->last_update_clock = S390_lowcore.last_update_clock;
356 
357 	restart_stack = memblock_virt_alloc(ASYNC_SIZE, ASYNC_SIZE);
358 	restart_stack += ASYNC_SIZE;
359 
360 	/*
361 	 * Set up PSW restart to call ipl.c:do_restart(). Copy the relevant
362 	 * restart data to the absolute zero lowcore. This is necessary if
363 	 * PSW restart is done on an offline CPU that has lowcore zero.
364 	 */
365 	lc->restart_stack = (unsigned long) restart_stack;
366 	lc->restart_fn = (unsigned long) do_restart;
367 	lc->restart_data = 0;
368 	lc->restart_source = -1UL;
369 
370 	/* Setup absolute zero lowcore */
371 	mem_assign_absolute(S390_lowcore.restart_stack, lc->restart_stack);
372 	mem_assign_absolute(S390_lowcore.restart_fn, lc->restart_fn);
373 	mem_assign_absolute(S390_lowcore.restart_data, lc->restart_data);
374 	mem_assign_absolute(S390_lowcore.restart_source, lc->restart_source);
375 	mem_assign_absolute(S390_lowcore.restart_psw, lc->restart_psw);
376 
377 #ifdef CONFIG_SMP
378 	lc->spinlock_lockval = arch_spin_lockval(0);
379 	lc->spinlock_index = 0;
380 	arch_spin_lock_setup(0);
381 #endif
382 	lc->br_r1_trampoline = 0x07f1;	/* br %r1 */
383 
384 	set_prefix((u32)(unsigned long) lc);
385 	lowcore_ptr[0] = lc;
386 }
387 
setup_lowcore_dat_on(void)388 static void __init setup_lowcore_dat_on(void)
389 {
390 	__ctl_clear_bit(0, 28);
391 	S390_lowcore.external_new_psw.mask |= PSW_MASK_DAT;
392 	S390_lowcore.svc_new_psw.mask |= PSW_MASK_DAT;
393 	S390_lowcore.program_new_psw.mask |= PSW_MASK_DAT;
394 	S390_lowcore.io_new_psw.mask |= PSW_MASK_DAT;
395 	__ctl_set_bit(0, 28);
396 }
397 
398 static struct resource code_resource = {
399 	.name  = "Kernel code",
400 	.flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
401 };
402 
403 static struct resource data_resource = {
404 	.name = "Kernel data",
405 	.flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
406 };
407 
408 static struct resource bss_resource = {
409 	.name = "Kernel bss",
410 	.flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
411 };
412 
413 static struct resource __initdata *standard_resources[] = {
414 	&code_resource,
415 	&data_resource,
416 	&bss_resource,
417 };
418 
setup_resources(void)419 static void __init setup_resources(void)
420 {
421 	struct resource *res, *std_res, *sub_res;
422 	struct memblock_region *reg;
423 	int j;
424 
425 	code_resource.start = (unsigned long) _text;
426 	code_resource.end = (unsigned long) _etext - 1;
427 	data_resource.start = (unsigned long) _etext;
428 	data_resource.end = (unsigned long) _edata - 1;
429 	bss_resource.start = (unsigned long) __bss_start;
430 	bss_resource.end = (unsigned long) __bss_stop - 1;
431 
432 	for_each_memblock(memory, reg) {
433 		res = memblock_virt_alloc(sizeof(*res), 8);
434 		res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
435 
436 		res->name = "System RAM";
437 		res->start = reg->base;
438 		res->end = reg->base + reg->size - 1;
439 		request_resource(&iomem_resource, res);
440 
441 		for (j = 0; j < ARRAY_SIZE(standard_resources); j++) {
442 			std_res = standard_resources[j];
443 			if (std_res->start < res->start ||
444 			    std_res->start > res->end)
445 				continue;
446 			if (std_res->end > res->end) {
447 				sub_res = memblock_virt_alloc(sizeof(*sub_res), 8);
448 				*sub_res = *std_res;
449 				sub_res->end = res->end;
450 				std_res->start = res->end + 1;
451 				request_resource(res, sub_res);
452 			} else {
453 				request_resource(res, std_res);
454 			}
455 		}
456 	}
457 #ifdef CONFIG_CRASH_DUMP
458 	/*
459 	 * Re-add removed crash kernel memory as reserved memory. This makes
460 	 * sure it will be mapped with the identity mapping and struct pages
461 	 * will be created, so it can be resized later on.
462 	 * However add it later since the crash kernel resource should not be
463 	 * part of the System RAM resource.
464 	 */
465 	if (crashk_res.end) {
466 		memblock_add_node(crashk_res.start, resource_size(&crashk_res), 0);
467 		memblock_reserve(crashk_res.start, resource_size(&crashk_res));
468 		insert_resource(&iomem_resource, &crashk_res);
469 	}
470 #endif
471 }
472 
setup_memory_end(void)473 static void __init setup_memory_end(void)
474 {
475 	unsigned long vmax, vmalloc_size, tmp;
476 
477 	/* Choose kernel address space layout: 2, 3, or 4 levels. */
478 	vmalloc_size = VMALLOC_END ?: (128UL << 30) - MODULES_LEN;
479 	tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE;
480 	tmp = tmp * (sizeof(struct page) + PAGE_SIZE);
481 	if (tmp + vmalloc_size + MODULES_LEN <= _REGION2_SIZE)
482 		vmax = _REGION2_SIZE; /* 3-level kernel page table */
483 	else
484 		vmax = _REGION1_SIZE; /* 4-level kernel page table */
485 	/* module area is at the end of the kernel address space. */
486 	MODULES_END = vmax;
487 	MODULES_VADDR = MODULES_END - MODULES_LEN;
488 	VMALLOC_END = MODULES_VADDR;
489 	VMALLOC_START = vmax - vmalloc_size;
490 
491 	/* Split remaining virtual space between 1:1 mapping & vmemmap array */
492 	tmp = VMALLOC_START / (PAGE_SIZE + sizeof(struct page));
493 	/* vmemmap contains a multiple of PAGES_PER_SECTION struct pages */
494 	tmp = SECTION_ALIGN_UP(tmp);
495 	tmp = VMALLOC_START - tmp * sizeof(struct page);
496 	tmp &= ~((vmax >> 11) - 1);	/* align to page table level */
497 	tmp = min(tmp, 1UL << MAX_PHYSMEM_BITS);
498 	vmemmap = (struct page *) tmp;
499 
500 	/* Take care that memory_end is set and <= vmemmap */
501 	memory_end = min(memory_end ?: max_physmem_end, tmp);
502 	max_pfn = max_low_pfn = PFN_DOWN(memory_end);
503 	memblock_remove(memory_end, ULONG_MAX);
504 
505 	pr_notice("The maximum memory size is %luMB\n", memory_end >> 20);
506 }
507 
508 #ifdef CONFIG_CRASH_DUMP
509 
510 /*
511  * When kdump is enabled, we have to ensure that no memory from
512  * the area [0 - crashkernel memory size] and
513  * [crashk_res.start - crashk_res.end] is set offline.
514  */
kdump_mem_notifier(struct notifier_block * nb,unsigned long action,void * data)515 static int kdump_mem_notifier(struct notifier_block *nb,
516 			      unsigned long action, void *data)
517 {
518 	struct memory_notify *arg = data;
519 
520 	if (action != MEM_GOING_OFFLINE)
521 		return NOTIFY_OK;
522 	if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res)))
523 		return NOTIFY_BAD;
524 	if (arg->start_pfn > PFN_DOWN(crashk_res.end))
525 		return NOTIFY_OK;
526 	if (arg->start_pfn + arg->nr_pages - 1 < PFN_DOWN(crashk_res.start))
527 		return NOTIFY_OK;
528 	return NOTIFY_BAD;
529 }
530 
531 static struct notifier_block kdump_mem_nb = {
532 	.notifier_call = kdump_mem_notifier,
533 };
534 
535 #endif
536 
537 /*
538  * Make sure that the area behind memory_end is protected
539  */
reserve_memory_end(void)540 static void __init reserve_memory_end(void)
541 {
542 #ifdef CONFIG_CRASH_DUMP
543 	if (ipl_info.type == IPL_TYPE_FCP_DUMP &&
544 	    !OLDMEM_BASE && sclp.hsa_size) {
545 		memory_end = sclp.hsa_size;
546 		memory_end &= PAGE_MASK;
547 		memory_end_set = 1;
548 	}
549 #endif
550 	if (!memory_end_set)
551 		return;
552 	memblock_reserve(memory_end, ULONG_MAX);
553 }
554 
555 /*
556  * Make sure that oldmem, where the dump is stored, is protected
557  */
reserve_oldmem(void)558 static void __init reserve_oldmem(void)
559 {
560 #ifdef CONFIG_CRASH_DUMP
561 	if (OLDMEM_BASE)
562 		/* Forget all memory above the running kdump system */
563 		memblock_reserve(OLDMEM_SIZE, (phys_addr_t)ULONG_MAX);
564 #endif
565 }
566 
567 /*
568  * Make sure that oldmem, where the dump is stored, is protected
569  */
remove_oldmem(void)570 static void __init remove_oldmem(void)
571 {
572 #ifdef CONFIG_CRASH_DUMP
573 	if (OLDMEM_BASE)
574 		/* Forget all memory above the running kdump system */
575 		memblock_remove(OLDMEM_SIZE, (phys_addr_t)ULONG_MAX);
576 #endif
577 }
578 
579 /*
580  * Reserve memory for kdump kernel to be loaded with kexec
581  */
reserve_crashkernel(void)582 static void __init reserve_crashkernel(void)
583 {
584 #ifdef CONFIG_CRASH_DUMP
585 	unsigned long long crash_base, crash_size;
586 	phys_addr_t low, high;
587 	int rc;
588 
589 	rc = parse_crashkernel(boot_command_line, memory_end, &crash_size,
590 			       &crash_base);
591 
592 	crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN);
593 	crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN);
594 	if (rc || crash_size == 0)
595 		return;
596 
597 	if (memblock.memory.regions[0].size < crash_size) {
598 		pr_info("crashkernel reservation failed: %s\n",
599 			"first memory chunk must be at least crashkernel size");
600 		return;
601 	}
602 
603 	low = crash_base ?: OLDMEM_BASE;
604 	high = low + crash_size;
605 	if (low >= OLDMEM_BASE && high <= OLDMEM_BASE + OLDMEM_SIZE) {
606 		/* The crashkernel fits into OLDMEM, reuse OLDMEM */
607 		crash_base = low;
608 	} else {
609 		/* Find suitable area in free memory */
610 		low = max_t(unsigned long, crash_size, sclp.hsa_size);
611 		high = crash_base ? crash_base + crash_size : ULONG_MAX;
612 
613 		if (crash_base && crash_base < low) {
614 			pr_info("crashkernel reservation failed: %s\n",
615 				"crash_base too low");
616 			return;
617 		}
618 		low = crash_base ?: low;
619 		crash_base = memblock_find_in_range(low, high, crash_size,
620 						    KEXEC_CRASH_MEM_ALIGN);
621 	}
622 
623 	if (!crash_base) {
624 		pr_info("crashkernel reservation failed: %s\n",
625 			"no suitable area found");
626 		return;
627 	}
628 
629 	if (register_memory_notifier(&kdump_mem_nb))
630 		return;
631 
632 	if (!OLDMEM_BASE && MACHINE_IS_VM)
633 		diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size));
634 	crashk_res.start = crash_base;
635 	crashk_res.end = crash_base + crash_size - 1;
636 	memblock_remove(crash_base, crash_size);
637 	pr_info("Reserving %lluMB of memory at %lluMB "
638 		"for crashkernel (System RAM: %luMB)\n",
639 		crash_size >> 20, crash_base >> 20,
640 		(unsigned long)memblock.memory.total_size >> 20);
641 	os_info_crashkernel_add(crash_base, crash_size);
642 #endif
643 }
644 
645 /*
646  * Reserve the initrd from being used by memblock
647  */
reserve_initrd(void)648 static void __init reserve_initrd(void)
649 {
650 #ifdef CONFIG_BLK_DEV_INITRD
651 	if (!INITRD_START || !INITRD_SIZE)
652 		return;
653 	initrd_start = INITRD_START;
654 	initrd_end = initrd_start + INITRD_SIZE;
655 	memblock_reserve(INITRD_START, INITRD_SIZE);
656 #endif
657 }
658 
659 /*
660  * Check for initrd being in usable memory
661  */
check_initrd(void)662 static void __init check_initrd(void)
663 {
664 #ifdef CONFIG_BLK_DEV_INITRD
665 	if (INITRD_START && INITRD_SIZE &&
666 	    !memblock_is_region_memory(INITRD_START, INITRD_SIZE)) {
667 		pr_err("The initial RAM disk does not fit into the memory\n");
668 		memblock_free(INITRD_START, INITRD_SIZE);
669 		initrd_start = initrd_end = 0;
670 	}
671 #endif
672 }
673 
674 /*
675  * Reserve memory used for lowcore/command line/kernel image.
676  */
reserve_kernel(void)677 static void __init reserve_kernel(void)
678 {
679 	unsigned long start_pfn = PFN_UP(__pa(_end));
680 
681 #ifdef CONFIG_DMA_API_DEBUG
682 	/*
683 	 * DMA_API_DEBUG code stumbles over addresses from the
684 	 * range [PARMAREA_END, _stext]. Mark the memory as reserved
685 	 * so it is not used for CONFIG_DMA_API_DEBUG=y.
686 	 */
687 	memblock_reserve(0, PFN_PHYS(start_pfn));
688 #else
689 	memblock_reserve(0, PARMAREA_END);
690 	memblock_reserve((unsigned long)_stext, PFN_PHYS(start_pfn)
691 			 - (unsigned long)_stext);
692 #endif
693 }
694 
setup_memory(void)695 static void __init setup_memory(void)
696 {
697 	struct memblock_region *reg;
698 
699 	/*
700 	 * Init storage key for present memory
701 	 */
702 	for_each_memblock(memory, reg) {
703 		storage_key_init_range(reg->base, reg->base + reg->size);
704 	}
705 	psw_set_key(PAGE_DEFAULT_KEY);
706 
707 	/* Only cosmetics */
708 	memblock_enforce_memory_limit(memblock_end_of_DRAM());
709 }
710 
711 /*
712  * Setup hardware capabilities.
713  */
setup_hwcaps(void)714 static int __init setup_hwcaps(void)
715 {
716 	static const int stfl_bits[6] = { 0, 2, 7, 17, 19, 21 };
717 	struct cpuid cpu_id;
718 	int i;
719 
720 	/*
721 	 * The store facility list bits numbers as found in the principles
722 	 * of operation are numbered with bit 1UL<<31 as number 0 to
723 	 * bit 1UL<<0 as number 31.
724 	 *   Bit 0: instructions named N3, "backported" to esa-mode
725 	 *   Bit 2: z/Architecture mode is active
726 	 *   Bit 7: the store-facility-list-extended facility is installed
727 	 *   Bit 17: the message-security assist is installed
728 	 *   Bit 19: the long-displacement facility is installed
729 	 *   Bit 21: the extended-immediate facility is installed
730 	 *   Bit 22: extended-translation facility 3 is installed
731 	 *   Bit 30: extended-translation facility 3 enhancement facility
732 	 * These get translated to:
733 	 *   HWCAP_S390_ESAN3 bit 0, HWCAP_S390_ZARCH bit 1,
734 	 *   HWCAP_S390_STFLE bit 2, HWCAP_S390_MSA bit 3,
735 	 *   HWCAP_S390_LDISP bit 4, HWCAP_S390_EIMM bit 5 and
736 	 *   HWCAP_S390_ETF3EH bit 8 (22 && 30).
737 	 */
738 	for (i = 0; i < 6; i++)
739 		if (test_facility(stfl_bits[i]))
740 			elf_hwcap |= 1UL << i;
741 
742 	if (test_facility(22) && test_facility(30))
743 		elf_hwcap |= HWCAP_S390_ETF3EH;
744 
745 	/*
746 	 * Check for additional facilities with store-facility-list-extended.
747 	 * stfle stores doublewords (8 byte) with bit 1ULL<<63 as bit 0
748 	 * and 1ULL<<0 as bit 63. Bits 0-31 contain the same information
749 	 * as stored by stfl, bits 32-xxx contain additional facilities.
750 	 * How many facility words are stored depends on the number of
751 	 * doublewords passed to the instruction. The additional facilities
752 	 * are:
753 	 *   Bit 42: decimal floating point facility is installed
754 	 *   Bit 44: perform floating point operation facility is installed
755 	 * translated to:
756 	 *   HWCAP_S390_DFP bit 6 (42 && 44).
757 	 */
758 	if ((elf_hwcap & (1UL << 2)) && test_facility(42) && test_facility(44))
759 		elf_hwcap |= HWCAP_S390_DFP;
760 
761 	/*
762 	 * Huge page support HWCAP_S390_HPAGE is bit 7.
763 	 */
764 	if (MACHINE_HAS_EDAT1)
765 		elf_hwcap |= HWCAP_S390_HPAGE;
766 
767 	/*
768 	 * 64-bit register support for 31-bit processes
769 	 * HWCAP_S390_HIGH_GPRS is bit 9.
770 	 */
771 	elf_hwcap |= HWCAP_S390_HIGH_GPRS;
772 
773 	/*
774 	 * Transactional execution support HWCAP_S390_TE is bit 10.
775 	 */
776 	if (MACHINE_HAS_TE)
777 		elf_hwcap |= HWCAP_S390_TE;
778 
779 	/*
780 	 * Vector extension HWCAP_S390_VXRS is bit 11. The Vector extension
781 	 * can be disabled with the "novx" parameter. Use MACHINE_HAS_VX
782 	 * instead of facility bit 129.
783 	 */
784 	if (MACHINE_HAS_VX) {
785 		elf_hwcap |= HWCAP_S390_VXRS;
786 		if (test_facility(134))
787 			elf_hwcap |= HWCAP_S390_VXRS_EXT;
788 		if (test_facility(135))
789 			elf_hwcap |= HWCAP_S390_VXRS_BCD;
790 	}
791 
792 	/*
793 	 * Guarded storage support HWCAP_S390_GS is bit 12.
794 	 */
795 	if (MACHINE_HAS_GS)
796 		elf_hwcap |= HWCAP_S390_GS;
797 
798 	get_cpu_id(&cpu_id);
799 	add_device_randomness(&cpu_id, sizeof(cpu_id));
800 	switch (cpu_id.machine) {
801 	case 0x2064:
802 	case 0x2066:
803 	default:	/* Use "z900" as default for 64 bit kernels. */
804 		strcpy(elf_platform, "z900");
805 		break;
806 	case 0x2084:
807 	case 0x2086:
808 		strcpy(elf_platform, "z990");
809 		break;
810 	case 0x2094:
811 	case 0x2096:
812 		strcpy(elf_platform, "z9-109");
813 		break;
814 	case 0x2097:
815 	case 0x2098:
816 		strcpy(elf_platform, "z10");
817 		break;
818 	case 0x2817:
819 	case 0x2818:
820 		strcpy(elf_platform, "z196");
821 		break;
822 	case 0x2827:
823 	case 0x2828:
824 		strcpy(elf_platform, "zEC12");
825 		break;
826 	case 0x2964:
827 	case 0x2965:
828 		strcpy(elf_platform, "z13");
829 		break;
830 	case 0x3906:
831 	case 0x3907:
832 		strcpy(elf_platform, "z14");
833 		break;
834 	}
835 
836 	/*
837 	 * Virtualization support HWCAP_INT_SIE is bit 0.
838 	 */
839 	if (sclp.has_sief2)
840 		int_hwcap |= HWCAP_INT_SIE;
841 
842 	return 0;
843 }
844 arch_initcall(setup_hwcaps);
845 
846 /*
847  * Add system information as device randomness
848  */
setup_randomness(void)849 static void __init setup_randomness(void)
850 {
851 	struct sysinfo_3_2_2 *vmms;
852 
853 	vmms = (struct sysinfo_3_2_2 *) memblock_alloc(PAGE_SIZE, PAGE_SIZE);
854 	if (stsi(vmms, 3, 2, 2) == 0 && vmms->count)
855 		add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count);
856 	memblock_free((unsigned long) vmms, PAGE_SIZE);
857 }
858 
859 /*
860  * Find the correct size for the task_struct. This depends on
861  * the size of the struct fpu at the end of the thread_struct
862  * which is embedded in the task_struct.
863  */
setup_task_size(void)864 static void __init setup_task_size(void)
865 {
866 	int task_size = sizeof(struct task_struct);
867 
868 	if (!MACHINE_HAS_VX) {
869 		task_size -= sizeof(__vector128) * __NUM_VXRS;
870 		task_size += sizeof(freg_t) * __NUM_FPRS;
871 	}
872 	arch_task_struct_size = task_size;
873 }
874 
875 /*
876  * Setup function called from init/main.c just after the banner
877  * was printed.
878  */
879 
setup_arch(char ** cmdline_p)880 void __init setup_arch(char **cmdline_p)
881 {
882         /*
883          * print what head.S has found out about the machine
884          */
885 	if (MACHINE_IS_VM)
886 		pr_info("Linux is running as a z/VM "
887 			"guest operating system in 64-bit mode\n");
888 	else if (MACHINE_IS_KVM)
889 		pr_info("Linux is running under KVM in 64-bit mode\n");
890 	else if (MACHINE_IS_LPAR)
891 		pr_info("Linux is running natively in 64-bit mode\n");
892 	else
893 		pr_info("Linux is running as a guest in 64-bit mode\n");
894 
895 	/* Have one command line that is parsed and saved in /proc/cmdline */
896 	/* boot_command_line has been already set up in early.c */
897 	*cmdline_p = boot_command_line;
898 
899         ROOT_DEV = Root_RAM0;
900 
901 	/* Is init_mm really needed? */
902 	init_mm.start_code = PAGE_OFFSET;
903 	init_mm.end_code = (unsigned long) _etext;
904 	init_mm.end_data = (unsigned long) _edata;
905 	init_mm.brk = (unsigned long) _end;
906 
907 	if (IS_ENABLED(CONFIG_EXPOLINE_AUTO))
908 		nospec_auto_detect();
909 
910 	parse_early_param();
911 #ifdef CONFIG_CRASH_DUMP
912 	/* Deactivate elfcorehdr= kernel parameter */
913 	elfcorehdr_addr = ELFCORE_ADDR_MAX;
914 #endif
915 
916 	os_info_init();
917 	setup_ipl();
918 	setup_task_size();
919 
920 	/* Do some memory reservations *before* memory is added to memblock */
921 	reserve_memory_end();
922 	reserve_oldmem();
923 	reserve_kernel();
924 	reserve_initrd();
925 	memblock_allow_resize();
926 
927 	/* Get information about *all* installed memory */
928 	detect_memory_memblock();
929 
930 	remove_oldmem();
931 
932 	/*
933 	 * Make sure all chunks are MAX_ORDER aligned so we don't need the
934 	 * extra checks that HOLES_IN_ZONE would require.
935 	 *
936 	 * Is this still required?
937 	 */
938 	memblock_trim_memory(1UL << (MAX_ORDER - 1 + PAGE_SHIFT));
939 
940 	setup_memory_end();
941 	setup_memory();
942 	dma_contiguous_reserve(memory_end);
943 	vmcp_cma_reserve();
944 
945 	check_initrd();
946 	reserve_crashkernel();
947 #ifdef CONFIG_CRASH_DUMP
948 	/*
949 	 * Be aware that smp_save_dump_cpus() triggers a system reset.
950 	 * Therefore CPU and device initialization should be done afterwards.
951 	 */
952 	smp_save_dump_cpus();
953 #endif
954 
955 	setup_resources();
956 	setup_lowcore_dat_off();
957 	smp_fill_possible_mask();
958 	cpu_detect_mhz_feature();
959         cpu_init();
960 	numa_setup();
961 	smp_detect_cpus();
962 	topology_init_early();
963 
964 	/*
965 	 * Create kernel page tables and switch to virtual addressing.
966 	 */
967         paging_init();
968 
969 	/*
970 	 * After paging_init created the kernel page table, the new PSWs
971 	 * in lowcore can now run with DAT enabled.
972 	 */
973 	setup_lowcore_dat_on();
974 
975         /* Setup default console */
976 	conmode_default();
977 	set_preferred_console();
978 
979 	apply_alternative_instructions();
980 	if (IS_ENABLED(CONFIG_EXPOLINE))
981 		nospec_init_branches();
982 
983 	/* Setup zfcpdump support */
984 	setup_zfcpdump();
985 
986 	/* Add system specific data to the random pool */
987 	setup_randomness();
988 }
989