• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2020
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13 
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
16 
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 #include <linux/pgtable.h>
35 
36 #include <asm/asm-offsets.h>
37 #include <asm/lowcore.h>
38 #include <asm/stp.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include <asm/ap.h>
47 #include <asm/uv.h>
48 #include <asm/fpu/api.h>
49 #include "kvm-s390.h"
50 #include "gaccess.h"
51 
52 #define CREATE_TRACE_POINTS
53 #include "trace.h"
54 #include "trace-s390.h"
55 
56 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
57 #define LOCAL_IRQS 32
58 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
59 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
60 
61 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
62 	KVM_GENERIC_VM_STATS(),
63 	STATS_DESC_COUNTER(VM, inject_io),
64 	STATS_DESC_COUNTER(VM, inject_float_mchk),
65 	STATS_DESC_COUNTER(VM, inject_pfault_done),
66 	STATS_DESC_COUNTER(VM, inject_service_signal),
67 	STATS_DESC_COUNTER(VM, inject_virtio)
68 };
69 
70 const struct kvm_stats_header kvm_vm_stats_header = {
71 	.name_size = KVM_STATS_NAME_SIZE,
72 	.num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
73 	.id_offset = sizeof(struct kvm_stats_header),
74 	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
75 	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
76 		       sizeof(kvm_vm_stats_desc),
77 };
78 
79 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
80 	KVM_GENERIC_VCPU_STATS(),
81 	STATS_DESC_COUNTER(VCPU, exit_userspace),
82 	STATS_DESC_COUNTER(VCPU, exit_null),
83 	STATS_DESC_COUNTER(VCPU, exit_external_request),
84 	STATS_DESC_COUNTER(VCPU, exit_io_request),
85 	STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
86 	STATS_DESC_COUNTER(VCPU, exit_stop_request),
87 	STATS_DESC_COUNTER(VCPU, exit_validity),
88 	STATS_DESC_COUNTER(VCPU, exit_instruction),
89 	STATS_DESC_COUNTER(VCPU, exit_pei),
90 	STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
91 	STATS_DESC_COUNTER(VCPU, instruction_lctl),
92 	STATS_DESC_COUNTER(VCPU, instruction_lctlg),
93 	STATS_DESC_COUNTER(VCPU, instruction_stctl),
94 	STATS_DESC_COUNTER(VCPU, instruction_stctg),
95 	STATS_DESC_COUNTER(VCPU, exit_program_interruption),
96 	STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
97 	STATS_DESC_COUNTER(VCPU, exit_operation_exception),
98 	STATS_DESC_COUNTER(VCPU, deliver_ckc),
99 	STATS_DESC_COUNTER(VCPU, deliver_cputm),
100 	STATS_DESC_COUNTER(VCPU, deliver_external_call),
101 	STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
102 	STATS_DESC_COUNTER(VCPU, deliver_service_signal),
103 	STATS_DESC_COUNTER(VCPU, deliver_virtio),
104 	STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
105 	STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
106 	STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
107 	STATS_DESC_COUNTER(VCPU, deliver_program),
108 	STATS_DESC_COUNTER(VCPU, deliver_io),
109 	STATS_DESC_COUNTER(VCPU, deliver_machine_check),
110 	STATS_DESC_COUNTER(VCPU, exit_wait_state),
111 	STATS_DESC_COUNTER(VCPU, inject_ckc),
112 	STATS_DESC_COUNTER(VCPU, inject_cputm),
113 	STATS_DESC_COUNTER(VCPU, inject_external_call),
114 	STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
115 	STATS_DESC_COUNTER(VCPU, inject_mchk),
116 	STATS_DESC_COUNTER(VCPU, inject_pfault_init),
117 	STATS_DESC_COUNTER(VCPU, inject_program),
118 	STATS_DESC_COUNTER(VCPU, inject_restart),
119 	STATS_DESC_COUNTER(VCPU, inject_set_prefix),
120 	STATS_DESC_COUNTER(VCPU, inject_stop_signal),
121 	STATS_DESC_COUNTER(VCPU, instruction_epsw),
122 	STATS_DESC_COUNTER(VCPU, instruction_gs),
123 	STATS_DESC_COUNTER(VCPU, instruction_io_other),
124 	STATS_DESC_COUNTER(VCPU, instruction_lpsw),
125 	STATS_DESC_COUNTER(VCPU, instruction_lpswe),
126 	STATS_DESC_COUNTER(VCPU, instruction_pfmf),
127 	STATS_DESC_COUNTER(VCPU, instruction_ptff),
128 	STATS_DESC_COUNTER(VCPU, instruction_sck),
129 	STATS_DESC_COUNTER(VCPU, instruction_sckpf),
130 	STATS_DESC_COUNTER(VCPU, instruction_stidp),
131 	STATS_DESC_COUNTER(VCPU, instruction_spx),
132 	STATS_DESC_COUNTER(VCPU, instruction_stpx),
133 	STATS_DESC_COUNTER(VCPU, instruction_stap),
134 	STATS_DESC_COUNTER(VCPU, instruction_iske),
135 	STATS_DESC_COUNTER(VCPU, instruction_ri),
136 	STATS_DESC_COUNTER(VCPU, instruction_rrbe),
137 	STATS_DESC_COUNTER(VCPU, instruction_sske),
138 	STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
139 	STATS_DESC_COUNTER(VCPU, instruction_stsi),
140 	STATS_DESC_COUNTER(VCPU, instruction_stfl),
141 	STATS_DESC_COUNTER(VCPU, instruction_tb),
142 	STATS_DESC_COUNTER(VCPU, instruction_tpi),
143 	STATS_DESC_COUNTER(VCPU, instruction_tprot),
144 	STATS_DESC_COUNTER(VCPU, instruction_tsch),
145 	STATS_DESC_COUNTER(VCPU, instruction_sie),
146 	STATS_DESC_COUNTER(VCPU, instruction_essa),
147 	STATS_DESC_COUNTER(VCPU, instruction_sthyi),
148 	STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
149 	STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
150 	STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
151 	STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
152 	STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
153 	STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
154 	STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
155 	STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
156 	STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
157 	STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
158 	STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
159 	STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
160 	STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
161 	STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
162 	STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
163 	STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
164 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
165 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
166 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
167 	STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
168 	STATS_DESC_COUNTER(VCPU, diag_9c_forward),
169 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
170 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
171 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
172 	STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
173 	STATS_DESC_COUNTER(VCPU, pfault_sync)
174 };
175 
176 const struct kvm_stats_header kvm_vcpu_stats_header = {
177 	.name_size = KVM_STATS_NAME_SIZE,
178 	.num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
179 	.id_offset = sizeof(struct kvm_stats_header),
180 	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
181 	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
182 		       sizeof(kvm_vcpu_stats_desc),
183 };
184 
185 /* allow nested virtualization in KVM (if enabled by user space) */
186 static int nested;
187 module_param(nested, int, S_IRUGO);
188 MODULE_PARM_DESC(nested, "Nested virtualization support");
189 
190 /* allow 1m huge page guest backing, if !nested */
191 static int hpage;
192 module_param(hpage, int, 0444);
193 MODULE_PARM_DESC(hpage, "1m huge page backing support");
194 
195 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
196 static u8 halt_poll_max_steal = 10;
197 module_param(halt_poll_max_steal, byte, 0644);
198 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
199 
200 /* if set to true, the GISA will be initialized and used if available */
201 static bool use_gisa  = true;
202 module_param(use_gisa, bool, 0644);
203 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
204 
205 /* maximum diag9c forwarding per second */
206 unsigned int diag9c_forwarding_hz;
207 module_param(diag9c_forwarding_hz, uint, 0644);
208 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
209 
210 /*
211  * For now we handle at most 16 double words as this is what the s390 base
212  * kernel handles and stores in the prefix page. If we ever need to go beyond
213  * this, this requires changes to code, but the external uapi can stay.
214  */
215 #define SIZE_INTERNAL 16
216 
217 /*
218  * Base feature mask that defines default mask for facilities. Consists of the
219  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
220  */
221 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
222 /*
223  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
224  * and defines the facilities that can be enabled via a cpu model.
225  */
226 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
227 
kvm_s390_fac_size(void)228 static unsigned long kvm_s390_fac_size(void)
229 {
230 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
231 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
232 	BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
233 		sizeof(stfle_fac_list));
234 
235 	return SIZE_INTERNAL;
236 }
237 
238 /* available cpu features supported by kvm */
239 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
240 /* available subfunctions indicated via query / "test bit" */
241 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
242 
243 static struct gmap_notifier gmap_notifier;
244 static struct gmap_notifier vsie_gmap_notifier;
245 debug_info_t *kvm_s390_dbf;
246 debug_info_t *kvm_s390_dbf_uv;
247 
248 /* Section: not file related */
kvm_arch_hardware_enable(void)249 int kvm_arch_hardware_enable(void)
250 {
251 	/* every s390 is virtualization enabled ;-) */
252 	return 0;
253 }
254 
kvm_arch_check_processor_compat(void * opaque)255 int kvm_arch_check_processor_compat(void *opaque)
256 {
257 	return 0;
258 }
259 
260 /* forward declarations */
261 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
262 			      unsigned long end);
263 static int sca_switch_to_extended(struct kvm *kvm);
264 
kvm_clock_sync_scb(struct kvm_s390_sie_block * scb,u64 delta)265 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
266 {
267 	u8 delta_idx = 0;
268 
269 	/*
270 	 * The TOD jumps by delta, we have to compensate this by adding
271 	 * -delta to the epoch.
272 	 */
273 	delta = -delta;
274 
275 	/* sign-extension - we're adding to signed values below */
276 	if ((s64)delta < 0)
277 		delta_idx = -1;
278 
279 	scb->epoch += delta;
280 	if (scb->ecd & ECD_MEF) {
281 		scb->epdx += delta_idx;
282 		if (scb->epoch < delta)
283 			scb->epdx += 1;
284 	}
285 }
286 
287 /*
288  * This callback is executed during stop_machine(). All CPUs are therefore
289  * temporarily stopped. In order not to change guest behavior, we have to
290  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
291  * so a CPU won't be stopped while calculating with the epoch.
292  */
kvm_clock_sync(struct notifier_block * notifier,unsigned long val,void * v)293 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
294 			  void *v)
295 {
296 	struct kvm *kvm;
297 	struct kvm_vcpu *vcpu;
298 	int i;
299 	unsigned long long *delta = v;
300 
301 	list_for_each_entry(kvm, &vm_list, vm_list) {
302 		kvm_for_each_vcpu(i, vcpu, kvm) {
303 			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
304 			if (i == 0) {
305 				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
306 				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
307 			}
308 			if (vcpu->arch.cputm_enabled)
309 				vcpu->arch.cputm_start += *delta;
310 			if (vcpu->arch.vsie_block)
311 				kvm_clock_sync_scb(vcpu->arch.vsie_block,
312 						   *delta);
313 		}
314 	}
315 	return NOTIFY_OK;
316 }
317 
318 static struct notifier_block kvm_clock_notifier = {
319 	.notifier_call = kvm_clock_sync,
320 };
321 
kvm_arch_hardware_setup(void * opaque)322 int kvm_arch_hardware_setup(void *opaque)
323 {
324 	gmap_notifier.notifier_call = kvm_gmap_notifier;
325 	gmap_register_pte_notifier(&gmap_notifier);
326 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
327 	gmap_register_pte_notifier(&vsie_gmap_notifier);
328 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
329 				       &kvm_clock_notifier);
330 	return 0;
331 }
332 
kvm_arch_hardware_unsetup(void)333 void kvm_arch_hardware_unsetup(void)
334 {
335 	gmap_unregister_pte_notifier(&gmap_notifier);
336 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
337 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
338 					 &kvm_clock_notifier);
339 }
340 
allow_cpu_feat(unsigned long nr)341 static void allow_cpu_feat(unsigned long nr)
342 {
343 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
344 }
345 
plo_test_bit(unsigned char nr)346 static inline int plo_test_bit(unsigned char nr)
347 {
348 	unsigned long function = (unsigned long)nr | 0x100;
349 	int cc;
350 
351 	asm volatile(
352 		"	lgr	0,%[function]\n"
353 		/* Parameter registers are ignored for "test bit" */
354 		"	plo	0,0,0,0(0)\n"
355 		"	ipm	%0\n"
356 		"	srl	%0,28\n"
357 		: "=d" (cc)
358 		: [function] "d" (function)
359 		: "cc", "0");
360 	return cc == 0;
361 }
362 
__insn32_query(unsigned int opcode,u8 * query)363 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
364 {
365 	asm volatile(
366 		"	lghi	0,0\n"
367 		"	lgr	1,%[query]\n"
368 		/* Parameter registers are ignored */
369 		"	.insn	rrf,%[opc] << 16,2,4,6,0\n"
370 		:
371 		: [query] "d" ((unsigned long)query), [opc] "i" (opcode)
372 		: "cc", "memory", "0", "1");
373 }
374 
375 #define INSN_SORTL 0xb938
376 #define INSN_DFLTCC 0xb939
377 
kvm_s390_cpu_feat_init(void)378 static void kvm_s390_cpu_feat_init(void)
379 {
380 	int i;
381 
382 	for (i = 0; i < 256; ++i) {
383 		if (plo_test_bit(i))
384 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
385 	}
386 
387 	if (test_facility(28)) /* TOD-clock steering */
388 		ptff(kvm_s390_available_subfunc.ptff,
389 		     sizeof(kvm_s390_available_subfunc.ptff),
390 		     PTFF_QAF);
391 
392 	if (test_facility(17)) { /* MSA */
393 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
394 			      kvm_s390_available_subfunc.kmac);
395 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
396 			      kvm_s390_available_subfunc.kmc);
397 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
398 			      kvm_s390_available_subfunc.km);
399 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
400 			      kvm_s390_available_subfunc.kimd);
401 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
402 			      kvm_s390_available_subfunc.klmd);
403 	}
404 	if (test_facility(76)) /* MSA3 */
405 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
406 			      kvm_s390_available_subfunc.pckmo);
407 	if (test_facility(77)) { /* MSA4 */
408 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
409 			      kvm_s390_available_subfunc.kmctr);
410 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
411 			      kvm_s390_available_subfunc.kmf);
412 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
413 			      kvm_s390_available_subfunc.kmo);
414 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
415 			      kvm_s390_available_subfunc.pcc);
416 	}
417 	if (test_facility(57)) /* MSA5 */
418 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
419 			      kvm_s390_available_subfunc.ppno);
420 
421 	if (test_facility(146)) /* MSA8 */
422 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
423 			      kvm_s390_available_subfunc.kma);
424 
425 	if (test_facility(155)) /* MSA9 */
426 		__cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
427 			      kvm_s390_available_subfunc.kdsa);
428 
429 	if (test_facility(150)) /* SORTL */
430 		__insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
431 
432 	if (test_facility(151)) /* DFLTCC */
433 		__insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
434 
435 	if (MACHINE_HAS_ESOP)
436 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
437 	/*
438 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
439 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
440 	 */
441 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
442 	    !test_facility(3) || !nested)
443 		return;
444 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
445 	if (sclp.has_64bscao)
446 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
447 	if (sclp.has_siif)
448 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
449 	if (sclp.has_gpere)
450 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
451 	if (sclp.has_gsls)
452 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
453 	if (sclp.has_ib)
454 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
455 	if (sclp.has_cei)
456 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
457 	if (sclp.has_ibs)
458 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
459 	if (sclp.has_kss)
460 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
461 	/*
462 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
463 	 * all skey handling functions read/set the skey from the PGSTE
464 	 * instead of the real storage key.
465 	 *
466 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
467 	 * pages being detected as preserved although they are resident.
468 	 *
469 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
470 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
471 	 *
472 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
473 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
474 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
475 	 *
476 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
477 	 * cannot easily shadow the SCA because of the ipte lock.
478 	 */
479 }
480 
kvm_arch_init(void * opaque)481 int kvm_arch_init(void *opaque)
482 {
483 	int rc = -ENOMEM;
484 
485 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
486 	if (!kvm_s390_dbf)
487 		return -ENOMEM;
488 
489 	kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
490 	if (!kvm_s390_dbf_uv)
491 		goto out;
492 
493 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
494 	    debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
495 		goto out;
496 
497 	kvm_s390_cpu_feat_init();
498 
499 	/* Register floating interrupt controller interface. */
500 	rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
501 	if (rc) {
502 		pr_err("A FLIC registration call failed with rc=%d\n", rc);
503 		goto out;
504 	}
505 
506 	rc = kvm_s390_gib_init(GAL_ISC);
507 	if (rc)
508 		goto out;
509 
510 	return 0;
511 
512 out:
513 	kvm_arch_exit();
514 	return rc;
515 }
516 
kvm_arch_exit(void)517 void kvm_arch_exit(void)
518 {
519 	kvm_s390_gib_destroy();
520 	debug_unregister(kvm_s390_dbf);
521 	debug_unregister(kvm_s390_dbf_uv);
522 }
523 
524 /* Section: device related */
kvm_arch_dev_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)525 long kvm_arch_dev_ioctl(struct file *filp,
526 			unsigned int ioctl, unsigned long arg)
527 {
528 	if (ioctl == KVM_S390_ENABLE_SIE)
529 		return s390_enable_sie();
530 	return -EINVAL;
531 }
532 
kvm_vm_ioctl_check_extension(struct kvm * kvm,long ext)533 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
534 {
535 	int r;
536 
537 	switch (ext) {
538 	case KVM_CAP_S390_PSW:
539 	case KVM_CAP_S390_GMAP:
540 	case KVM_CAP_SYNC_MMU:
541 #ifdef CONFIG_KVM_S390_UCONTROL
542 	case KVM_CAP_S390_UCONTROL:
543 #endif
544 	case KVM_CAP_ASYNC_PF:
545 	case KVM_CAP_SYNC_REGS:
546 	case KVM_CAP_ONE_REG:
547 	case KVM_CAP_ENABLE_CAP:
548 	case KVM_CAP_S390_CSS_SUPPORT:
549 	case KVM_CAP_IOEVENTFD:
550 	case KVM_CAP_DEVICE_CTRL:
551 	case KVM_CAP_S390_IRQCHIP:
552 	case KVM_CAP_VM_ATTRIBUTES:
553 	case KVM_CAP_MP_STATE:
554 	case KVM_CAP_IMMEDIATE_EXIT:
555 	case KVM_CAP_S390_INJECT_IRQ:
556 	case KVM_CAP_S390_USER_SIGP:
557 	case KVM_CAP_S390_USER_STSI:
558 	case KVM_CAP_S390_SKEYS:
559 	case KVM_CAP_S390_IRQ_STATE:
560 	case KVM_CAP_S390_USER_INSTR0:
561 	case KVM_CAP_S390_CMMA_MIGRATION:
562 	case KVM_CAP_S390_AIS:
563 	case KVM_CAP_S390_AIS_MIGRATION:
564 	case KVM_CAP_S390_VCPU_RESETS:
565 	case KVM_CAP_SET_GUEST_DEBUG:
566 	case KVM_CAP_S390_DIAG318:
567 		r = 1;
568 		break;
569 	case KVM_CAP_SET_GUEST_DEBUG2:
570 		r = KVM_GUESTDBG_VALID_MASK;
571 		break;
572 	case KVM_CAP_S390_HPAGE_1M:
573 		r = 0;
574 		if (hpage && !kvm_is_ucontrol(kvm))
575 			r = 1;
576 		break;
577 	case KVM_CAP_S390_MEM_OP:
578 		r = MEM_OP_MAX_SIZE;
579 		break;
580 	case KVM_CAP_NR_VCPUS:
581 	case KVM_CAP_MAX_VCPUS:
582 	case KVM_CAP_MAX_VCPU_ID:
583 		r = KVM_S390_BSCA_CPU_SLOTS;
584 		if (!kvm_s390_use_sca_entries())
585 			r = KVM_MAX_VCPUS;
586 		else if (sclp.has_esca && sclp.has_64bscao)
587 			r = KVM_S390_ESCA_CPU_SLOTS;
588 		break;
589 	case KVM_CAP_S390_COW:
590 		r = MACHINE_HAS_ESOP;
591 		break;
592 	case KVM_CAP_S390_VECTOR_REGISTERS:
593 		r = MACHINE_HAS_VX;
594 		break;
595 	case KVM_CAP_S390_RI:
596 		r = test_facility(64);
597 		break;
598 	case KVM_CAP_S390_GS:
599 		r = test_facility(133);
600 		break;
601 	case KVM_CAP_S390_BPB:
602 		r = test_facility(82);
603 		break;
604 	case KVM_CAP_S390_PROTECTED:
605 		r = is_prot_virt_host();
606 		break;
607 	default:
608 		r = 0;
609 	}
610 	return r;
611 }
612 
kvm_arch_sync_dirty_log(struct kvm * kvm,struct kvm_memory_slot * memslot)613 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
614 {
615 	int i;
616 	gfn_t cur_gfn, last_gfn;
617 	unsigned long gaddr, vmaddr;
618 	struct gmap *gmap = kvm->arch.gmap;
619 	DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
620 
621 	/* Loop over all guest segments */
622 	cur_gfn = memslot->base_gfn;
623 	last_gfn = memslot->base_gfn + memslot->npages;
624 	for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
625 		gaddr = gfn_to_gpa(cur_gfn);
626 		vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
627 		if (kvm_is_error_hva(vmaddr))
628 			continue;
629 
630 		bitmap_zero(bitmap, _PAGE_ENTRIES);
631 		gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
632 		for (i = 0; i < _PAGE_ENTRIES; i++) {
633 			if (test_bit(i, bitmap))
634 				mark_page_dirty(kvm, cur_gfn + i);
635 		}
636 
637 		if (fatal_signal_pending(current))
638 			return;
639 		cond_resched();
640 	}
641 }
642 
643 /* Section: vm related */
644 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
645 
646 /*
647  * Get (and clear) the dirty memory log for a memory slot.
648  */
kvm_vm_ioctl_get_dirty_log(struct kvm * kvm,struct kvm_dirty_log * log)649 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
650 			       struct kvm_dirty_log *log)
651 {
652 	int r;
653 	unsigned long n;
654 	struct kvm_memory_slot *memslot;
655 	int is_dirty;
656 
657 	if (kvm_is_ucontrol(kvm))
658 		return -EINVAL;
659 
660 	mutex_lock(&kvm->slots_lock);
661 
662 	r = -EINVAL;
663 	if (log->slot >= KVM_USER_MEM_SLOTS)
664 		goto out;
665 
666 	r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
667 	if (r)
668 		goto out;
669 
670 	/* Clear the dirty log */
671 	if (is_dirty) {
672 		n = kvm_dirty_bitmap_bytes(memslot);
673 		memset(memslot->dirty_bitmap, 0, n);
674 	}
675 	r = 0;
676 out:
677 	mutex_unlock(&kvm->slots_lock);
678 	return r;
679 }
680 
icpt_operexc_on_all_vcpus(struct kvm * kvm)681 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
682 {
683 	unsigned int i;
684 	struct kvm_vcpu *vcpu;
685 
686 	kvm_for_each_vcpu(i, vcpu, kvm) {
687 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
688 	}
689 }
690 
kvm_vm_ioctl_enable_cap(struct kvm * kvm,struct kvm_enable_cap * cap)691 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
692 {
693 	int r;
694 
695 	if (cap->flags)
696 		return -EINVAL;
697 
698 	switch (cap->cap) {
699 	case KVM_CAP_S390_IRQCHIP:
700 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
701 		kvm->arch.use_irqchip = 1;
702 		r = 0;
703 		break;
704 	case KVM_CAP_S390_USER_SIGP:
705 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
706 		kvm->arch.user_sigp = 1;
707 		r = 0;
708 		break;
709 	case KVM_CAP_S390_VECTOR_REGISTERS:
710 		mutex_lock(&kvm->lock);
711 		if (kvm->created_vcpus) {
712 			r = -EBUSY;
713 		} else if (MACHINE_HAS_VX) {
714 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
715 			set_kvm_facility(kvm->arch.model.fac_list, 129);
716 			if (test_facility(134)) {
717 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
718 				set_kvm_facility(kvm->arch.model.fac_list, 134);
719 			}
720 			if (test_facility(135)) {
721 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
722 				set_kvm_facility(kvm->arch.model.fac_list, 135);
723 			}
724 			if (test_facility(148)) {
725 				set_kvm_facility(kvm->arch.model.fac_mask, 148);
726 				set_kvm_facility(kvm->arch.model.fac_list, 148);
727 			}
728 			if (test_facility(152)) {
729 				set_kvm_facility(kvm->arch.model.fac_mask, 152);
730 				set_kvm_facility(kvm->arch.model.fac_list, 152);
731 			}
732 			if (test_facility(192)) {
733 				set_kvm_facility(kvm->arch.model.fac_mask, 192);
734 				set_kvm_facility(kvm->arch.model.fac_list, 192);
735 			}
736 			r = 0;
737 		} else
738 			r = -EINVAL;
739 		mutex_unlock(&kvm->lock);
740 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
741 			 r ? "(not available)" : "(success)");
742 		break;
743 	case KVM_CAP_S390_RI:
744 		r = -EINVAL;
745 		mutex_lock(&kvm->lock);
746 		if (kvm->created_vcpus) {
747 			r = -EBUSY;
748 		} else if (test_facility(64)) {
749 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
750 			set_kvm_facility(kvm->arch.model.fac_list, 64);
751 			r = 0;
752 		}
753 		mutex_unlock(&kvm->lock);
754 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
755 			 r ? "(not available)" : "(success)");
756 		break;
757 	case KVM_CAP_S390_AIS:
758 		mutex_lock(&kvm->lock);
759 		if (kvm->created_vcpus) {
760 			r = -EBUSY;
761 		} else {
762 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
763 			set_kvm_facility(kvm->arch.model.fac_list, 72);
764 			r = 0;
765 		}
766 		mutex_unlock(&kvm->lock);
767 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
768 			 r ? "(not available)" : "(success)");
769 		break;
770 	case KVM_CAP_S390_GS:
771 		r = -EINVAL;
772 		mutex_lock(&kvm->lock);
773 		if (kvm->created_vcpus) {
774 			r = -EBUSY;
775 		} else if (test_facility(133)) {
776 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
777 			set_kvm_facility(kvm->arch.model.fac_list, 133);
778 			r = 0;
779 		}
780 		mutex_unlock(&kvm->lock);
781 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
782 			 r ? "(not available)" : "(success)");
783 		break;
784 	case KVM_CAP_S390_HPAGE_1M:
785 		mutex_lock(&kvm->lock);
786 		if (kvm->created_vcpus)
787 			r = -EBUSY;
788 		else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
789 			r = -EINVAL;
790 		else {
791 			r = 0;
792 			mmap_write_lock(kvm->mm);
793 			kvm->mm->context.allow_gmap_hpage_1m = 1;
794 			mmap_write_unlock(kvm->mm);
795 			/*
796 			 * We might have to create fake 4k page
797 			 * tables. To avoid that the hardware works on
798 			 * stale PGSTEs, we emulate these instructions.
799 			 */
800 			kvm->arch.use_skf = 0;
801 			kvm->arch.use_pfmfi = 0;
802 		}
803 		mutex_unlock(&kvm->lock);
804 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
805 			 r ? "(not available)" : "(success)");
806 		break;
807 	case KVM_CAP_S390_USER_STSI:
808 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
809 		kvm->arch.user_stsi = 1;
810 		r = 0;
811 		break;
812 	case KVM_CAP_S390_USER_INSTR0:
813 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
814 		kvm->arch.user_instr0 = 1;
815 		icpt_operexc_on_all_vcpus(kvm);
816 		r = 0;
817 		break;
818 	default:
819 		r = -EINVAL;
820 		break;
821 	}
822 	return r;
823 }
824 
kvm_s390_get_mem_control(struct kvm * kvm,struct kvm_device_attr * attr)825 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
826 {
827 	int ret;
828 
829 	switch (attr->attr) {
830 	case KVM_S390_VM_MEM_LIMIT_SIZE:
831 		ret = 0;
832 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
833 			 kvm->arch.mem_limit);
834 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
835 			ret = -EFAULT;
836 		break;
837 	default:
838 		ret = -ENXIO;
839 		break;
840 	}
841 	return ret;
842 }
843 
kvm_s390_set_mem_control(struct kvm * kvm,struct kvm_device_attr * attr)844 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
845 {
846 	int ret;
847 	unsigned int idx;
848 	switch (attr->attr) {
849 	case KVM_S390_VM_MEM_ENABLE_CMMA:
850 		ret = -ENXIO;
851 		if (!sclp.has_cmma)
852 			break;
853 
854 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
855 		mutex_lock(&kvm->lock);
856 		if (kvm->created_vcpus)
857 			ret = -EBUSY;
858 		else if (kvm->mm->context.allow_gmap_hpage_1m)
859 			ret = -EINVAL;
860 		else {
861 			kvm->arch.use_cmma = 1;
862 			/* Not compatible with cmma. */
863 			kvm->arch.use_pfmfi = 0;
864 			ret = 0;
865 		}
866 		mutex_unlock(&kvm->lock);
867 		break;
868 	case KVM_S390_VM_MEM_CLR_CMMA:
869 		ret = -ENXIO;
870 		if (!sclp.has_cmma)
871 			break;
872 		ret = -EINVAL;
873 		if (!kvm->arch.use_cmma)
874 			break;
875 
876 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
877 		mutex_lock(&kvm->lock);
878 		idx = srcu_read_lock(&kvm->srcu);
879 		s390_reset_cmma(kvm->arch.gmap->mm);
880 		srcu_read_unlock(&kvm->srcu, idx);
881 		mutex_unlock(&kvm->lock);
882 		ret = 0;
883 		break;
884 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
885 		unsigned long new_limit;
886 
887 		if (kvm_is_ucontrol(kvm))
888 			return -EINVAL;
889 
890 		if (get_user(new_limit, (u64 __user *)attr->addr))
891 			return -EFAULT;
892 
893 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
894 		    new_limit > kvm->arch.mem_limit)
895 			return -E2BIG;
896 
897 		if (!new_limit)
898 			return -EINVAL;
899 
900 		/* gmap_create takes last usable address */
901 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
902 			new_limit -= 1;
903 
904 		ret = -EBUSY;
905 		mutex_lock(&kvm->lock);
906 		if (!kvm->created_vcpus) {
907 			/* gmap_create will round the limit up */
908 			struct gmap *new = gmap_create(current->mm, new_limit);
909 
910 			if (!new) {
911 				ret = -ENOMEM;
912 			} else {
913 				gmap_remove(kvm->arch.gmap);
914 				new->private = kvm;
915 				kvm->arch.gmap = new;
916 				ret = 0;
917 			}
918 		}
919 		mutex_unlock(&kvm->lock);
920 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
921 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
922 			 (void *) kvm->arch.gmap->asce);
923 		break;
924 	}
925 	default:
926 		ret = -ENXIO;
927 		break;
928 	}
929 	return ret;
930 }
931 
932 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
933 
kvm_s390_vcpu_crypto_reset_all(struct kvm * kvm)934 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
935 {
936 	struct kvm_vcpu *vcpu;
937 	int i;
938 
939 	kvm_s390_vcpu_block_all(kvm);
940 
941 	kvm_for_each_vcpu(i, vcpu, kvm) {
942 		kvm_s390_vcpu_crypto_setup(vcpu);
943 		/* recreate the shadow crycb by leaving the VSIE handler */
944 		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
945 	}
946 
947 	kvm_s390_vcpu_unblock_all(kvm);
948 }
949 
kvm_s390_vm_set_crypto(struct kvm * kvm,struct kvm_device_attr * attr)950 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
951 {
952 	mutex_lock(&kvm->lock);
953 	switch (attr->attr) {
954 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
955 		if (!test_kvm_facility(kvm, 76)) {
956 			mutex_unlock(&kvm->lock);
957 			return -EINVAL;
958 		}
959 		get_random_bytes(
960 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
961 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
962 		kvm->arch.crypto.aes_kw = 1;
963 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
964 		break;
965 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
966 		if (!test_kvm_facility(kvm, 76)) {
967 			mutex_unlock(&kvm->lock);
968 			return -EINVAL;
969 		}
970 		get_random_bytes(
971 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
972 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
973 		kvm->arch.crypto.dea_kw = 1;
974 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
975 		break;
976 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
977 		if (!test_kvm_facility(kvm, 76)) {
978 			mutex_unlock(&kvm->lock);
979 			return -EINVAL;
980 		}
981 		kvm->arch.crypto.aes_kw = 0;
982 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
983 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
984 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
985 		break;
986 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
987 		if (!test_kvm_facility(kvm, 76)) {
988 			mutex_unlock(&kvm->lock);
989 			return -EINVAL;
990 		}
991 		kvm->arch.crypto.dea_kw = 0;
992 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
993 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
994 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
995 		break;
996 	case KVM_S390_VM_CRYPTO_ENABLE_APIE:
997 		if (!ap_instructions_available()) {
998 			mutex_unlock(&kvm->lock);
999 			return -EOPNOTSUPP;
1000 		}
1001 		kvm->arch.crypto.apie = 1;
1002 		break;
1003 	case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1004 		if (!ap_instructions_available()) {
1005 			mutex_unlock(&kvm->lock);
1006 			return -EOPNOTSUPP;
1007 		}
1008 		kvm->arch.crypto.apie = 0;
1009 		break;
1010 	default:
1011 		mutex_unlock(&kvm->lock);
1012 		return -ENXIO;
1013 	}
1014 
1015 	kvm_s390_vcpu_crypto_reset_all(kvm);
1016 	mutex_unlock(&kvm->lock);
1017 	return 0;
1018 }
1019 
kvm_s390_sync_request_broadcast(struct kvm * kvm,int req)1020 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1021 {
1022 	int cx;
1023 	struct kvm_vcpu *vcpu;
1024 
1025 	kvm_for_each_vcpu(cx, vcpu, kvm)
1026 		kvm_s390_sync_request(req, vcpu);
1027 }
1028 
1029 /*
1030  * Must be called with kvm->srcu held to avoid races on memslots, and with
1031  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1032  */
kvm_s390_vm_start_migration(struct kvm * kvm)1033 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1034 {
1035 	struct kvm_memory_slot *ms;
1036 	struct kvm_memslots *slots;
1037 	unsigned long ram_pages = 0;
1038 	int slotnr;
1039 
1040 	/* migration mode already enabled */
1041 	if (kvm->arch.migration_mode)
1042 		return 0;
1043 	slots = kvm_memslots(kvm);
1044 	if (!slots || !slots->used_slots)
1045 		return -EINVAL;
1046 
1047 	if (!kvm->arch.use_cmma) {
1048 		kvm->arch.migration_mode = 1;
1049 		return 0;
1050 	}
1051 	/* mark all the pages in active slots as dirty */
1052 	for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1053 		ms = slots->memslots + slotnr;
1054 		if (!ms->dirty_bitmap)
1055 			return -EINVAL;
1056 		/*
1057 		 * The second half of the bitmap is only used on x86,
1058 		 * and would be wasted otherwise, so we put it to good
1059 		 * use here to keep track of the state of the storage
1060 		 * attributes.
1061 		 */
1062 		memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1063 		ram_pages += ms->npages;
1064 	}
1065 	atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1066 	kvm->arch.migration_mode = 1;
1067 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1068 	return 0;
1069 }
1070 
1071 /*
1072  * Must be called with kvm->slots_lock to avoid races with ourselves and
1073  * kvm_s390_vm_start_migration.
1074  */
kvm_s390_vm_stop_migration(struct kvm * kvm)1075 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1076 {
1077 	/* migration mode already disabled */
1078 	if (!kvm->arch.migration_mode)
1079 		return 0;
1080 	kvm->arch.migration_mode = 0;
1081 	if (kvm->arch.use_cmma)
1082 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1083 	return 0;
1084 }
1085 
kvm_s390_vm_set_migration(struct kvm * kvm,struct kvm_device_attr * attr)1086 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1087 				     struct kvm_device_attr *attr)
1088 {
1089 	int res = -ENXIO;
1090 
1091 	mutex_lock(&kvm->slots_lock);
1092 	switch (attr->attr) {
1093 	case KVM_S390_VM_MIGRATION_START:
1094 		res = kvm_s390_vm_start_migration(kvm);
1095 		break;
1096 	case KVM_S390_VM_MIGRATION_STOP:
1097 		res = kvm_s390_vm_stop_migration(kvm);
1098 		break;
1099 	default:
1100 		break;
1101 	}
1102 	mutex_unlock(&kvm->slots_lock);
1103 
1104 	return res;
1105 }
1106 
kvm_s390_vm_get_migration(struct kvm * kvm,struct kvm_device_attr * attr)1107 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1108 				     struct kvm_device_attr *attr)
1109 {
1110 	u64 mig = kvm->arch.migration_mode;
1111 
1112 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1113 		return -ENXIO;
1114 
1115 	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1116 		return -EFAULT;
1117 	return 0;
1118 }
1119 
1120 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod);
1121 
kvm_s390_set_tod_ext(struct kvm * kvm,struct kvm_device_attr * attr)1122 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1123 {
1124 	struct kvm_s390_vm_tod_clock gtod;
1125 
1126 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1127 		return -EFAULT;
1128 
1129 	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1130 		return -EINVAL;
1131 	__kvm_s390_set_tod_clock(kvm, &gtod);
1132 
1133 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1134 		gtod.epoch_idx, gtod.tod);
1135 
1136 	return 0;
1137 }
1138 
kvm_s390_set_tod_high(struct kvm * kvm,struct kvm_device_attr * attr)1139 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1140 {
1141 	u8 gtod_high;
1142 
1143 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1144 					   sizeof(gtod_high)))
1145 		return -EFAULT;
1146 
1147 	if (gtod_high != 0)
1148 		return -EINVAL;
1149 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1150 
1151 	return 0;
1152 }
1153 
kvm_s390_set_tod_low(struct kvm * kvm,struct kvm_device_attr * attr)1154 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1155 {
1156 	struct kvm_s390_vm_tod_clock gtod = { 0 };
1157 
1158 	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1159 			   sizeof(gtod.tod)))
1160 		return -EFAULT;
1161 
1162 	__kvm_s390_set_tod_clock(kvm, &gtod);
1163 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1164 	return 0;
1165 }
1166 
kvm_s390_set_tod(struct kvm * kvm,struct kvm_device_attr * attr)1167 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1168 {
1169 	int ret;
1170 
1171 	if (attr->flags)
1172 		return -EINVAL;
1173 
1174 	mutex_lock(&kvm->lock);
1175 	/*
1176 	 * For protected guests, the TOD is managed by the ultravisor, so trying
1177 	 * to change it will never bring the expected results.
1178 	 */
1179 	if (kvm_s390_pv_is_protected(kvm)) {
1180 		ret = -EOPNOTSUPP;
1181 		goto out_unlock;
1182 	}
1183 
1184 	switch (attr->attr) {
1185 	case KVM_S390_VM_TOD_EXT:
1186 		ret = kvm_s390_set_tod_ext(kvm, attr);
1187 		break;
1188 	case KVM_S390_VM_TOD_HIGH:
1189 		ret = kvm_s390_set_tod_high(kvm, attr);
1190 		break;
1191 	case KVM_S390_VM_TOD_LOW:
1192 		ret = kvm_s390_set_tod_low(kvm, attr);
1193 		break;
1194 	default:
1195 		ret = -ENXIO;
1196 		break;
1197 	}
1198 
1199 out_unlock:
1200 	mutex_unlock(&kvm->lock);
1201 	return ret;
1202 }
1203 
kvm_s390_get_tod_clock(struct kvm * kvm,struct kvm_s390_vm_tod_clock * gtod)1204 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1205 				   struct kvm_s390_vm_tod_clock *gtod)
1206 {
1207 	union tod_clock clk;
1208 
1209 	preempt_disable();
1210 
1211 	store_tod_clock_ext(&clk);
1212 
1213 	gtod->tod = clk.tod + kvm->arch.epoch;
1214 	gtod->epoch_idx = 0;
1215 	if (test_kvm_facility(kvm, 139)) {
1216 		gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1217 		if (gtod->tod < clk.tod)
1218 			gtod->epoch_idx += 1;
1219 	}
1220 
1221 	preempt_enable();
1222 }
1223 
kvm_s390_get_tod_ext(struct kvm * kvm,struct kvm_device_attr * attr)1224 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1225 {
1226 	struct kvm_s390_vm_tod_clock gtod;
1227 
1228 	memset(&gtod, 0, sizeof(gtod));
1229 	kvm_s390_get_tod_clock(kvm, &gtod);
1230 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1231 		return -EFAULT;
1232 
1233 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1234 		gtod.epoch_idx, gtod.tod);
1235 	return 0;
1236 }
1237 
kvm_s390_get_tod_high(struct kvm * kvm,struct kvm_device_attr * attr)1238 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1239 {
1240 	u8 gtod_high = 0;
1241 
1242 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
1243 					 sizeof(gtod_high)))
1244 		return -EFAULT;
1245 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1246 
1247 	return 0;
1248 }
1249 
kvm_s390_get_tod_low(struct kvm * kvm,struct kvm_device_attr * attr)1250 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1251 {
1252 	u64 gtod;
1253 
1254 	gtod = kvm_s390_get_tod_clock_fast(kvm);
1255 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1256 		return -EFAULT;
1257 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1258 
1259 	return 0;
1260 }
1261 
kvm_s390_get_tod(struct kvm * kvm,struct kvm_device_attr * attr)1262 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1263 {
1264 	int ret;
1265 
1266 	if (attr->flags)
1267 		return -EINVAL;
1268 
1269 	switch (attr->attr) {
1270 	case KVM_S390_VM_TOD_EXT:
1271 		ret = kvm_s390_get_tod_ext(kvm, attr);
1272 		break;
1273 	case KVM_S390_VM_TOD_HIGH:
1274 		ret = kvm_s390_get_tod_high(kvm, attr);
1275 		break;
1276 	case KVM_S390_VM_TOD_LOW:
1277 		ret = kvm_s390_get_tod_low(kvm, attr);
1278 		break;
1279 	default:
1280 		ret = -ENXIO;
1281 		break;
1282 	}
1283 	return ret;
1284 }
1285 
kvm_s390_set_processor(struct kvm * kvm,struct kvm_device_attr * attr)1286 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1287 {
1288 	struct kvm_s390_vm_cpu_processor *proc;
1289 	u16 lowest_ibc, unblocked_ibc;
1290 	int ret = 0;
1291 
1292 	mutex_lock(&kvm->lock);
1293 	if (kvm->created_vcpus) {
1294 		ret = -EBUSY;
1295 		goto out;
1296 	}
1297 	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1298 	if (!proc) {
1299 		ret = -ENOMEM;
1300 		goto out;
1301 	}
1302 	if (!copy_from_user(proc, (void __user *)attr->addr,
1303 			    sizeof(*proc))) {
1304 		kvm->arch.model.cpuid = proc->cpuid;
1305 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1306 		unblocked_ibc = sclp.ibc & 0xfff;
1307 		if (lowest_ibc && proc->ibc) {
1308 			if (proc->ibc > unblocked_ibc)
1309 				kvm->arch.model.ibc = unblocked_ibc;
1310 			else if (proc->ibc < lowest_ibc)
1311 				kvm->arch.model.ibc = lowest_ibc;
1312 			else
1313 				kvm->arch.model.ibc = proc->ibc;
1314 		}
1315 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1316 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1317 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1318 			 kvm->arch.model.ibc,
1319 			 kvm->arch.model.cpuid);
1320 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1321 			 kvm->arch.model.fac_list[0],
1322 			 kvm->arch.model.fac_list[1],
1323 			 kvm->arch.model.fac_list[2]);
1324 	} else
1325 		ret = -EFAULT;
1326 	kfree(proc);
1327 out:
1328 	mutex_unlock(&kvm->lock);
1329 	return ret;
1330 }
1331 
kvm_s390_set_processor_feat(struct kvm * kvm,struct kvm_device_attr * attr)1332 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1333 				       struct kvm_device_attr *attr)
1334 {
1335 	struct kvm_s390_vm_cpu_feat data;
1336 
1337 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1338 		return -EFAULT;
1339 	if (!bitmap_subset((unsigned long *) data.feat,
1340 			   kvm_s390_available_cpu_feat,
1341 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1342 		return -EINVAL;
1343 
1344 	mutex_lock(&kvm->lock);
1345 	if (kvm->created_vcpus) {
1346 		mutex_unlock(&kvm->lock);
1347 		return -EBUSY;
1348 	}
1349 	bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1350 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1351 	mutex_unlock(&kvm->lock);
1352 	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1353 			 data.feat[0],
1354 			 data.feat[1],
1355 			 data.feat[2]);
1356 	return 0;
1357 }
1358 
kvm_s390_set_processor_subfunc(struct kvm * kvm,struct kvm_device_attr * attr)1359 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1360 					  struct kvm_device_attr *attr)
1361 {
1362 	mutex_lock(&kvm->lock);
1363 	if (kvm->created_vcpus) {
1364 		mutex_unlock(&kvm->lock);
1365 		return -EBUSY;
1366 	}
1367 
1368 	if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1369 			   sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1370 		mutex_unlock(&kvm->lock);
1371 		return -EFAULT;
1372 	}
1373 	mutex_unlock(&kvm->lock);
1374 
1375 	VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1376 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1377 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1378 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1379 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1380 	VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1381 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1382 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1383 	VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1384 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1385 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1386 	VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1387 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1388 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1389 	VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1390 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1391 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1392 	VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1393 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1394 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1395 	VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1396 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1397 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1398 	VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1399 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1400 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1401 	VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1402 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1403 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1404 	VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1405 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1406 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1407 	VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1408 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1409 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1410 	VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1411 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1412 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1413 	VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1414 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1415 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1416 	VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1417 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1418 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1419 	VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1420 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1421 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1422 	VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1423 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1424 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1425 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1426 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1427 	VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1428 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1429 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1430 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1431 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1432 
1433 	return 0;
1434 }
1435 
kvm_s390_set_cpu_model(struct kvm * kvm,struct kvm_device_attr * attr)1436 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1437 {
1438 	int ret = -ENXIO;
1439 
1440 	switch (attr->attr) {
1441 	case KVM_S390_VM_CPU_PROCESSOR:
1442 		ret = kvm_s390_set_processor(kvm, attr);
1443 		break;
1444 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1445 		ret = kvm_s390_set_processor_feat(kvm, attr);
1446 		break;
1447 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1448 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1449 		break;
1450 	}
1451 	return ret;
1452 }
1453 
kvm_s390_get_processor(struct kvm * kvm,struct kvm_device_attr * attr)1454 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1455 {
1456 	struct kvm_s390_vm_cpu_processor *proc;
1457 	int ret = 0;
1458 
1459 	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1460 	if (!proc) {
1461 		ret = -ENOMEM;
1462 		goto out;
1463 	}
1464 	proc->cpuid = kvm->arch.model.cpuid;
1465 	proc->ibc = kvm->arch.model.ibc;
1466 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1467 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1468 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1469 		 kvm->arch.model.ibc,
1470 		 kvm->arch.model.cpuid);
1471 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1472 		 kvm->arch.model.fac_list[0],
1473 		 kvm->arch.model.fac_list[1],
1474 		 kvm->arch.model.fac_list[2]);
1475 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1476 		ret = -EFAULT;
1477 	kfree(proc);
1478 out:
1479 	return ret;
1480 }
1481 
kvm_s390_get_machine(struct kvm * kvm,struct kvm_device_attr * attr)1482 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1483 {
1484 	struct kvm_s390_vm_cpu_machine *mach;
1485 	int ret = 0;
1486 
1487 	mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1488 	if (!mach) {
1489 		ret = -ENOMEM;
1490 		goto out;
1491 	}
1492 	get_cpu_id((struct cpuid *) &mach->cpuid);
1493 	mach->ibc = sclp.ibc;
1494 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1495 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1496 	memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
1497 	       sizeof(stfle_fac_list));
1498 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1499 		 kvm->arch.model.ibc,
1500 		 kvm->arch.model.cpuid);
1501 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1502 		 mach->fac_mask[0],
1503 		 mach->fac_mask[1],
1504 		 mach->fac_mask[2]);
1505 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1506 		 mach->fac_list[0],
1507 		 mach->fac_list[1],
1508 		 mach->fac_list[2]);
1509 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1510 		ret = -EFAULT;
1511 	kfree(mach);
1512 out:
1513 	return ret;
1514 }
1515 
kvm_s390_get_processor_feat(struct kvm * kvm,struct kvm_device_attr * attr)1516 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1517 				       struct kvm_device_attr *attr)
1518 {
1519 	struct kvm_s390_vm_cpu_feat data;
1520 
1521 	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1522 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1523 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1524 		return -EFAULT;
1525 	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1526 			 data.feat[0],
1527 			 data.feat[1],
1528 			 data.feat[2]);
1529 	return 0;
1530 }
1531 
kvm_s390_get_machine_feat(struct kvm * kvm,struct kvm_device_attr * attr)1532 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1533 				     struct kvm_device_attr *attr)
1534 {
1535 	struct kvm_s390_vm_cpu_feat data;
1536 
1537 	bitmap_copy((unsigned long *) data.feat,
1538 		    kvm_s390_available_cpu_feat,
1539 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1540 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1541 		return -EFAULT;
1542 	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1543 			 data.feat[0],
1544 			 data.feat[1],
1545 			 data.feat[2]);
1546 	return 0;
1547 }
1548 
kvm_s390_get_processor_subfunc(struct kvm * kvm,struct kvm_device_attr * attr)1549 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1550 					  struct kvm_device_attr *attr)
1551 {
1552 	if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1553 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1554 		return -EFAULT;
1555 
1556 	VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1557 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1558 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1559 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1560 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1561 	VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1562 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1563 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1564 	VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1565 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1566 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1567 	VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1568 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1569 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1570 	VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1571 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1572 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1573 	VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1574 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1575 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1576 	VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1577 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1578 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1579 	VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1580 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1581 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1582 	VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1583 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1584 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1585 	VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1586 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1587 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1588 	VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1589 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1590 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1591 	VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1592 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1593 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1594 	VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1595 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1596 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1597 	VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1598 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1599 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1600 	VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1601 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1602 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1603 	VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1604 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1605 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1606 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1607 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1608 	VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1609 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1610 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1611 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1612 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1613 
1614 	return 0;
1615 }
1616 
kvm_s390_get_machine_subfunc(struct kvm * kvm,struct kvm_device_attr * attr)1617 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1618 					struct kvm_device_attr *attr)
1619 {
1620 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1621 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1622 		return -EFAULT;
1623 
1624 	VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1625 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1626 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1627 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1628 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1629 	VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1630 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1631 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1632 	VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1633 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1634 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1635 	VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1636 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1637 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1638 	VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1639 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1640 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1641 	VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1642 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1643 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1644 	VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1645 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1646 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1647 	VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1648 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1649 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1650 	VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1651 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1652 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1653 	VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1654 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1655 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1656 	VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1657 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1658 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1659 	VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1660 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1661 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1662 	VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1663 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1664 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1665 	VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1666 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1667 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1668 	VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1669 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1670 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1671 	VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1672 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1673 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1674 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1675 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1676 	VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1677 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1678 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1679 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1680 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1681 
1682 	return 0;
1683 }
1684 
kvm_s390_get_cpu_model(struct kvm * kvm,struct kvm_device_attr * attr)1685 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1686 {
1687 	int ret = -ENXIO;
1688 
1689 	switch (attr->attr) {
1690 	case KVM_S390_VM_CPU_PROCESSOR:
1691 		ret = kvm_s390_get_processor(kvm, attr);
1692 		break;
1693 	case KVM_S390_VM_CPU_MACHINE:
1694 		ret = kvm_s390_get_machine(kvm, attr);
1695 		break;
1696 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1697 		ret = kvm_s390_get_processor_feat(kvm, attr);
1698 		break;
1699 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1700 		ret = kvm_s390_get_machine_feat(kvm, attr);
1701 		break;
1702 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1703 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1704 		break;
1705 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1706 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1707 		break;
1708 	}
1709 	return ret;
1710 }
1711 
kvm_s390_vm_set_attr(struct kvm * kvm,struct kvm_device_attr * attr)1712 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1713 {
1714 	int ret;
1715 
1716 	switch (attr->group) {
1717 	case KVM_S390_VM_MEM_CTRL:
1718 		ret = kvm_s390_set_mem_control(kvm, attr);
1719 		break;
1720 	case KVM_S390_VM_TOD:
1721 		ret = kvm_s390_set_tod(kvm, attr);
1722 		break;
1723 	case KVM_S390_VM_CPU_MODEL:
1724 		ret = kvm_s390_set_cpu_model(kvm, attr);
1725 		break;
1726 	case KVM_S390_VM_CRYPTO:
1727 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1728 		break;
1729 	case KVM_S390_VM_MIGRATION:
1730 		ret = kvm_s390_vm_set_migration(kvm, attr);
1731 		break;
1732 	default:
1733 		ret = -ENXIO;
1734 		break;
1735 	}
1736 
1737 	return ret;
1738 }
1739 
kvm_s390_vm_get_attr(struct kvm * kvm,struct kvm_device_attr * attr)1740 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1741 {
1742 	int ret;
1743 
1744 	switch (attr->group) {
1745 	case KVM_S390_VM_MEM_CTRL:
1746 		ret = kvm_s390_get_mem_control(kvm, attr);
1747 		break;
1748 	case KVM_S390_VM_TOD:
1749 		ret = kvm_s390_get_tod(kvm, attr);
1750 		break;
1751 	case KVM_S390_VM_CPU_MODEL:
1752 		ret = kvm_s390_get_cpu_model(kvm, attr);
1753 		break;
1754 	case KVM_S390_VM_MIGRATION:
1755 		ret = kvm_s390_vm_get_migration(kvm, attr);
1756 		break;
1757 	default:
1758 		ret = -ENXIO;
1759 		break;
1760 	}
1761 
1762 	return ret;
1763 }
1764 
kvm_s390_vm_has_attr(struct kvm * kvm,struct kvm_device_attr * attr)1765 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1766 {
1767 	int ret;
1768 
1769 	switch (attr->group) {
1770 	case KVM_S390_VM_MEM_CTRL:
1771 		switch (attr->attr) {
1772 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1773 		case KVM_S390_VM_MEM_CLR_CMMA:
1774 			ret = sclp.has_cmma ? 0 : -ENXIO;
1775 			break;
1776 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1777 			ret = 0;
1778 			break;
1779 		default:
1780 			ret = -ENXIO;
1781 			break;
1782 		}
1783 		break;
1784 	case KVM_S390_VM_TOD:
1785 		switch (attr->attr) {
1786 		case KVM_S390_VM_TOD_LOW:
1787 		case KVM_S390_VM_TOD_HIGH:
1788 			ret = 0;
1789 			break;
1790 		default:
1791 			ret = -ENXIO;
1792 			break;
1793 		}
1794 		break;
1795 	case KVM_S390_VM_CPU_MODEL:
1796 		switch (attr->attr) {
1797 		case KVM_S390_VM_CPU_PROCESSOR:
1798 		case KVM_S390_VM_CPU_MACHINE:
1799 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1800 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1801 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1802 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1803 			ret = 0;
1804 			break;
1805 		default:
1806 			ret = -ENXIO;
1807 			break;
1808 		}
1809 		break;
1810 	case KVM_S390_VM_CRYPTO:
1811 		switch (attr->attr) {
1812 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1813 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1814 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1815 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1816 			ret = 0;
1817 			break;
1818 		case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1819 		case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1820 			ret = ap_instructions_available() ? 0 : -ENXIO;
1821 			break;
1822 		default:
1823 			ret = -ENXIO;
1824 			break;
1825 		}
1826 		break;
1827 	case KVM_S390_VM_MIGRATION:
1828 		ret = 0;
1829 		break;
1830 	default:
1831 		ret = -ENXIO;
1832 		break;
1833 	}
1834 
1835 	return ret;
1836 }
1837 
kvm_s390_get_skeys(struct kvm * kvm,struct kvm_s390_skeys * args)1838 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1839 {
1840 	uint8_t *keys;
1841 	uint64_t hva;
1842 	int srcu_idx, i, r = 0;
1843 
1844 	if (args->flags != 0)
1845 		return -EINVAL;
1846 
1847 	/* Is this guest using storage keys? */
1848 	if (!mm_uses_skeys(current->mm))
1849 		return KVM_S390_GET_SKEYS_NONE;
1850 
1851 	/* Enforce sane limit on memory allocation */
1852 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1853 		return -EINVAL;
1854 
1855 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1856 	if (!keys)
1857 		return -ENOMEM;
1858 
1859 	mmap_read_lock(current->mm);
1860 	srcu_idx = srcu_read_lock(&kvm->srcu);
1861 	for (i = 0; i < args->count; i++) {
1862 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1863 		if (kvm_is_error_hva(hva)) {
1864 			r = -EFAULT;
1865 			break;
1866 		}
1867 
1868 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1869 		if (r)
1870 			break;
1871 	}
1872 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1873 	mmap_read_unlock(current->mm);
1874 
1875 	if (!r) {
1876 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1877 				 sizeof(uint8_t) * args->count);
1878 		if (r)
1879 			r = -EFAULT;
1880 	}
1881 
1882 	kvfree(keys);
1883 	return r;
1884 }
1885 
kvm_s390_set_skeys(struct kvm * kvm,struct kvm_s390_skeys * args)1886 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1887 {
1888 	uint8_t *keys;
1889 	uint64_t hva;
1890 	int srcu_idx, i, r = 0;
1891 	bool unlocked;
1892 
1893 	if (args->flags != 0)
1894 		return -EINVAL;
1895 
1896 	/* Enforce sane limit on memory allocation */
1897 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1898 		return -EINVAL;
1899 
1900 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1901 	if (!keys)
1902 		return -ENOMEM;
1903 
1904 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1905 			   sizeof(uint8_t) * args->count);
1906 	if (r) {
1907 		r = -EFAULT;
1908 		goto out;
1909 	}
1910 
1911 	/* Enable storage key handling for the guest */
1912 	r = s390_enable_skey();
1913 	if (r)
1914 		goto out;
1915 
1916 	i = 0;
1917 	mmap_read_lock(current->mm);
1918 	srcu_idx = srcu_read_lock(&kvm->srcu);
1919         while (i < args->count) {
1920 		unlocked = false;
1921 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1922 		if (kvm_is_error_hva(hva)) {
1923 			r = -EFAULT;
1924 			break;
1925 		}
1926 
1927 		/* Lowest order bit is reserved */
1928 		if (keys[i] & 0x01) {
1929 			r = -EINVAL;
1930 			break;
1931 		}
1932 
1933 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1934 		if (r) {
1935 			r = fixup_user_fault(current->mm, hva,
1936 					     FAULT_FLAG_WRITE, &unlocked);
1937 			if (r)
1938 				break;
1939 		}
1940 		if (!r)
1941 			i++;
1942 	}
1943 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1944 	mmap_read_unlock(current->mm);
1945 out:
1946 	kvfree(keys);
1947 	return r;
1948 }
1949 
1950 /*
1951  * Base address and length must be sent at the start of each block, therefore
1952  * it's cheaper to send some clean data, as long as it's less than the size of
1953  * two longs.
1954  */
1955 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1956 /* for consistency */
1957 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1958 
1959 /*
1960  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1961  * address falls in a hole. In that case the index of one of the memslots
1962  * bordering the hole is returned.
1963  */
gfn_to_memslot_approx(struct kvm_memslots * slots,gfn_t gfn)1964 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1965 {
1966 	int start = 0, end = slots->used_slots;
1967 	int slot = atomic_read(&slots->last_used_slot);
1968 	struct kvm_memory_slot *memslots = slots->memslots;
1969 
1970 	if (gfn >= memslots[slot].base_gfn &&
1971 	    gfn < memslots[slot].base_gfn + memslots[slot].npages)
1972 		return slot;
1973 
1974 	while (start < end) {
1975 		slot = start + (end - start) / 2;
1976 
1977 		if (gfn >= memslots[slot].base_gfn)
1978 			end = slot;
1979 		else
1980 			start = slot + 1;
1981 	}
1982 
1983 	if (start >= slots->used_slots)
1984 		return slots->used_slots - 1;
1985 
1986 	if (gfn >= memslots[start].base_gfn &&
1987 	    gfn < memslots[start].base_gfn + memslots[start].npages) {
1988 		atomic_set(&slots->last_used_slot, start);
1989 	}
1990 
1991 	return start;
1992 }
1993 
kvm_s390_peek_cmma(struct kvm * kvm,struct kvm_s390_cmma_log * args,u8 * res,unsigned long bufsize)1994 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1995 			      u8 *res, unsigned long bufsize)
1996 {
1997 	unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1998 
1999 	args->count = 0;
2000 	while (args->count < bufsize) {
2001 		hva = gfn_to_hva(kvm, cur_gfn);
2002 		/*
2003 		 * We return an error if the first value was invalid, but we
2004 		 * return successfully if at least one value was copied.
2005 		 */
2006 		if (kvm_is_error_hva(hva))
2007 			return args->count ? 0 : -EFAULT;
2008 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2009 			pgstev = 0;
2010 		res[args->count++] = (pgstev >> 24) & 0x43;
2011 		cur_gfn++;
2012 	}
2013 
2014 	return 0;
2015 }
2016 
kvm_s390_next_dirty_cmma(struct kvm_memslots * slots,unsigned long cur_gfn)2017 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
2018 					      unsigned long cur_gfn)
2019 {
2020 	int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
2021 	struct kvm_memory_slot *ms = slots->memslots + slotidx;
2022 	unsigned long ofs = cur_gfn - ms->base_gfn;
2023 
2024 	if (ms->base_gfn + ms->npages <= cur_gfn) {
2025 		slotidx--;
2026 		/* If we are above the highest slot, wrap around */
2027 		if (slotidx < 0)
2028 			slotidx = slots->used_slots - 1;
2029 
2030 		ms = slots->memslots + slotidx;
2031 		ofs = 0;
2032 	}
2033 
2034 	if (cur_gfn < ms->base_gfn)
2035 		ofs = 0;
2036 
2037 	ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
2038 	while ((slotidx > 0) && (ofs >= ms->npages)) {
2039 		slotidx--;
2040 		ms = slots->memslots + slotidx;
2041 		ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
2042 	}
2043 	return ms->base_gfn + ofs;
2044 }
2045 
kvm_s390_get_cmma(struct kvm * kvm,struct kvm_s390_cmma_log * args,u8 * res,unsigned long bufsize)2046 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2047 			     u8 *res, unsigned long bufsize)
2048 {
2049 	unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2050 	struct kvm_memslots *slots = kvm_memslots(kvm);
2051 	struct kvm_memory_slot *ms;
2052 
2053 	if (unlikely(!slots->used_slots))
2054 		return 0;
2055 
2056 	cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2057 	ms = gfn_to_memslot(kvm, cur_gfn);
2058 	args->count = 0;
2059 	args->start_gfn = cur_gfn;
2060 	if (!ms)
2061 		return 0;
2062 	next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2063 	mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2064 
2065 	while (args->count < bufsize) {
2066 		hva = gfn_to_hva(kvm, cur_gfn);
2067 		if (kvm_is_error_hva(hva))
2068 			return 0;
2069 		/* Decrement only if we actually flipped the bit to 0 */
2070 		if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2071 			atomic64_dec(&kvm->arch.cmma_dirty_pages);
2072 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2073 			pgstev = 0;
2074 		/* Save the value */
2075 		res[args->count++] = (pgstev >> 24) & 0x43;
2076 		/* If the next bit is too far away, stop. */
2077 		if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2078 			return 0;
2079 		/* If we reached the previous "next", find the next one */
2080 		if (cur_gfn == next_gfn)
2081 			next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2082 		/* Reached the end of memory or of the buffer, stop */
2083 		if ((next_gfn >= mem_end) ||
2084 		    (next_gfn - args->start_gfn >= bufsize))
2085 			return 0;
2086 		cur_gfn++;
2087 		/* Reached the end of the current memslot, take the next one. */
2088 		if (cur_gfn - ms->base_gfn >= ms->npages) {
2089 			ms = gfn_to_memslot(kvm, cur_gfn);
2090 			if (!ms)
2091 				return 0;
2092 		}
2093 	}
2094 	return 0;
2095 }
2096 
2097 /*
2098  * This function searches for the next page with dirty CMMA attributes, and
2099  * saves the attributes in the buffer up to either the end of the buffer or
2100  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2101  * no trailing clean bytes are saved.
2102  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2103  * output buffer will indicate 0 as length.
2104  */
kvm_s390_get_cmma_bits(struct kvm * kvm,struct kvm_s390_cmma_log * args)2105 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2106 				  struct kvm_s390_cmma_log *args)
2107 {
2108 	unsigned long bufsize;
2109 	int srcu_idx, peek, ret;
2110 	u8 *values;
2111 
2112 	if (!kvm->arch.use_cmma)
2113 		return -ENXIO;
2114 	/* Invalid/unsupported flags were specified */
2115 	if (args->flags & ~KVM_S390_CMMA_PEEK)
2116 		return -EINVAL;
2117 	/* Migration mode query, and we are not doing a migration */
2118 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2119 	if (!peek && !kvm->arch.migration_mode)
2120 		return -EINVAL;
2121 	/* CMMA is disabled or was not used, or the buffer has length zero */
2122 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2123 	if (!bufsize || !kvm->mm->context.uses_cmm) {
2124 		memset(args, 0, sizeof(*args));
2125 		return 0;
2126 	}
2127 	/* We are not peeking, and there are no dirty pages */
2128 	if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2129 		memset(args, 0, sizeof(*args));
2130 		return 0;
2131 	}
2132 
2133 	values = vmalloc(bufsize);
2134 	if (!values)
2135 		return -ENOMEM;
2136 
2137 	mmap_read_lock(kvm->mm);
2138 	srcu_idx = srcu_read_lock(&kvm->srcu);
2139 	if (peek)
2140 		ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2141 	else
2142 		ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2143 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2144 	mmap_read_unlock(kvm->mm);
2145 
2146 	if (kvm->arch.migration_mode)
2147 		args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2148 	else
2149 		args->remaining = 0;
2150 
2151 	if (copy_to_user((void __user *)args->values, values, args->count))
2152 		ret = -EFAULT;
2153 
2154 	vfree(values);
2155 	return ret;
2156 }
2157 
2158 /*
2159  * This function sets the CMMA attributes for the given pages. If the input
2160  * buffer has zero length, no action is taken, otherwise the attributes are
2161  * set and the mm->context.uses_cmm flag is set.
2162  */
kvm_s390_set_cmma_bits(struct kvm * kvm,const struct kvm_s390_cmma_log * args)2163 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2164 				  const struct kvm_s390_cmma_log *args)
2165 {
2166 	unsigned long hva, mask, pgstev, i;
2167 	uint8_t *bits;
2168 	int srcu_idx, r = 0;
2169 
2170 	mask = args->mask;
2171 
2172 	if (!kvm->arch.use_cmma)
2173 		return -ENXIO;
2174 	/* invalid/unsupported flags */
2175 	if (args->flags != 0)
2176 		return -EINVAL;
2177 	/* Enforce sane limit on memory allocation */
2178 	if (args->count > KVM_S390_CMMA_SIZE_MAX)
2179 		return -EINVAL;
2180 	/* Nothing to do */
2181 	if (args->count == 0)
2182 		return 0;
2183 
2184 	bits = vmalloc(array_size(sizeof(*bits), args->count));
2185 	if (!bits)
2186 		return -ENOMEM;
2187 
2188 	r = copy_from_user(bits, (void __user *)args->values, args->count);
2189 	if (r) {
2190 		r = -EFAULT;
2191 		goto out;
2192 	}
2193 
2194 	mmap_read_lock(kvm->mm);
2195 	srcu_idx = srcu_read_lock(&kvm->srcu);
2196 	for (i = 0; i < args->count; i++) {
2197 		hva = gfn_to_hva(kvm, args->start_gfn + i);
2198 		if (kvm_is_error_hva(hva)) {
2199 			r = -EFAULT;
2200 			break;
2201 		}
2202 
2203 		pgstev = bits[i];
2204 		pgstev = pgstev << 24;
2205 		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2206 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
2207 	}
2208 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2209 	mmap_read_unlock(kvm->mm);
2210 
2211 	if (!kvm->mm->context.uses_cmm) {
2212 		mmap_write_lock(kvm->mm);
2213 		kvm->mm->context.uses_cmm = 1;
2214 		mmap_write_unlock(kvm->mm);
2215 	}
2216 out:
2217 	vfree(bits);
2218 	return r;
2219 }
2220 
kvm_s390_cpus_from_pv(struct kvm * kvm,u16 * rcp,u16 * rrcp)2221 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2222 {
2223 	struct kvm_vcpu *vcpu;
2224 	u16 rc, rrc;
2225 	int ret = 0;
2226 	int i;
2227 
2228 	/*
2229 	 * We ignore failures and try to destroy as many CPUs as possible.
2230 	 * At the same time we must not free the assigned resources when
2231 	 * this fails, as the ultravisor has still access to that memory.
2232 	 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2233 	 * behind.
2234 	 * We want to return the first failure rc and rrc, though.
2235 	 */
2236 	kvm_for_each_vcpu(i, vcpu, kvm) {
2237 		mutex_lock(&vcpu->mutex);
2238 		if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2239 			*rcp = rc;
2240 			*rrcp = rrc;
2241 			ret = -EIO;
2242 		}
2243 		mutex_unlock(&vcpu->mutex);
2244 	}
2245 	return ret;
2246 }
2247 
kvm_s390_cpus_to_pv(struct kvm * kvm,u16 * rc,u16 * rrc)2248 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2249 {
2250 	int i, r = 0;
2251 	u16 dummy;
2252 
2253 	struct kvm_vcpu *vcpu;
2254 
2255 	kvm_for_each_vcpu(i, vcpu, kvm) {
2256 		mutex_lock(&vcpu->mutex);
2257 		r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2258 		mutex_unlock(&vcpu->mutex);
2259 		if (r)
2260 			break;
2261 	}
2262 	if (r)
2263 		kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2264 	return r;
2265 }
2266 
kvm_s390_handle_pv(struct kvm * kvm,struct kvm_pv_cmd * cmd)2267 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2268 {
2269 	int r = 0;
2270 	u16 dummy;
2271 	void __user *argp = (void __user *)cmd->data;
2272 
2273 	switch (cmd->cmd) {
2274 	case KVM_PV_ENABLE: {
2275 		r = -EINVAL;
2276 		if (kvm_s390_pv_is_protected(kvm))
2277 			break;
2278 
2279 		/*
2280 		 *  FMT 4 SIE needs esca. As we never switch back to bsca from
2281 		 *  esca, we need no cleanup in the error cases below
2282 		 */
2283 		r = sca_switch_to_extended(kvm);
2284 		if (r)
2285 			break;
2286 
2287 		mmap_write_lock(current->mm);
2288 		r = gmap_mark_unmergeable();
2289 		mmap_write_unlock(current->mm);
2290 		if (r)
2291 			break;
2292 
2293 		r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2294 		if (r)
2295 			break;
2296 
2297 		r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2298 		if (r)
2299 			kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2300 
2301 		/* we need to block service interrupts from now on */
2302 		set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2303 		break;
2304 	}
2305 	case KVM_PV_DISABLE: {
2306 		r = -EINVAL;
2307 		if (!kvm_s390_pv_is_protected(kvm))
2308 			break;
2309 
2310 		r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2311 		/*
2312 		 * If a CPU could not be destroyed, destroy VM will also fail.
2313 		 * There is no point in trying to destroy it. Instead return
2314 		 * the rc and rrc from the first CPU that failed destroying.
2315 		 */
2316 		if (r)
2317 			break;
2318 		r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2319 
2320 		/* no need to block service interrupts any more */
2321 		clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2322 		break;
2323 	}
2324 	case KVM_PV_SET_SEC_PARMS: {
2325 		struct kvm_s390_pv_sec_parm parms = {};
2326 		void *hdr;
2327 
2328 		r = -EINVAL;
2329 		if (!kvm_s390_pv_is_protected(kvm))
2330 			break;
2331 
2332 		r = -EFAULT;
2333 		if (copy_from_user(&parms, argp, sizeof(parms)))
2334 			break;
2335 
2336 		/* Currently restricted to 8KB */
2337 		r = -EINVAL;
2338 		if (parms.length > PAGE_SIZE * 2)
2339 			break;
2340 
2341 		r = -ENOMEM;
2342 		hdr = vmalloc(parms.length);
2343 		if (!hdr)
2344 			break;
2345 
2346 		r = -EFAULT;
2347 		if (!copy_from_user(hdr, (void __user *)parms.origin,
2348 				    parms.length))
2349 			r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2350 						      &cmd->rc, &cmd->rrc);
2351 
2352 		vfree(hdr);
2353 		break;
2354 	}
2355 	case KVM_PV_UNPACK: {
2356 		struct kvm_s390_pv_unp unp = {};
2357 
2358 		r = -EINVAL;
2359 		if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2360 			break;
2361 
2362 		r = -EFAULT;
2363 		if (copy_from_user(&unp, argp, sizeof(unp)))
2364 			break;
2365 
2366 		r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2367 				       &cmd->rc, &cmd->rrc);
2368 		break;
2369 	}
2370 	case KVM_PV_VERIFY: {
2371 		r = -EINVAL;
2372 		if (!kvm_s390_pv_is_protected(kvm))
2373 			break;
2374 
2375 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2376 				  UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2377 		KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2378 			     cmd->rrc);
2379 		break;
2380 	}
2381 	case KVM_PV_PREP_RESET: {
2382 		r = -EINVAL;
2383 		if (!kvm_s390_pv_is_protected(kvm))
2384 			break;
2385 
2386 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2387 				  UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2388 		KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2389 			     cmd->rc, cmd->rrc);
2390 		break;
2391 	}
2392 	case KVM_PV_UNSHARE_ALL: {
2393 		r = -EINVAL;
2394 		if (!kvm_s390_pv_is_protected(kvm))
2395 			break;
2396 
2397 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2398 				  UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2399 		KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2400 			     cmd->rc, cmd->rrc);
2401 		break;
2402 	}
2403 	default:
2404 		r = -ENOTTY;
2405 	}
2406 	return r;
2407 }
2408 
kvm_arch_vm_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)2409 long kvm_arch_vm_ioctl(struct file *filp,
2410 		       unsigned int ioctl, unsigned long arg)
2411 {
2412 	struct kvm *kvm = filp->private_data;
2413 	void __user *argp = (void __user *)arg;
2414 	struct kvm_device_attr attr;
2415 	int r;
2416 
2417 	switch (ioctl) {
2418 	case KVM_S390_INTERRUPT: {
2419 		struct kvm_s390_interrupt s390int;
2420 
2421 		r = -EFAULT;
2422 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
2423 			break;
2424 		r = kvm_s390_inject_vm(kvm, &s390int);
2425 		break;
2426 	}
2427 	case KVM_CREATE_IRQCHIP: {
2428 		struct kvm_irq_routing_entry routing;
2429 
2430 		r = -EINVAL;
2431 		if (kvm->arch.use_irqchip) {
2432 			/* Set up dummy routing. */
2433 			memset(&routing, 0, sizeof(routing));
2434 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2435 		}
2436 		break;
2437 	}
2438 	case KVM_SET_DEVICE_ATTR: {
2439 		r = -EFAULT;
2440 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2441 			break;
2442 		r = kvm_s390_vm_set_attr(kvm, &attr);
2443 		break;
2444 	}
2445 	case KVM_GET_DEVICE_ATTR: {
2446 		r = -EFAULT;
2447 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2448 			break;
2449 		r = kvm_s390_vm_get_attr(kvm, &attr);
2450 		break;
2451 	}
2452 	case KVM_HAS_DEVICE_ATTR: {
2453 		r = -EFAULT;
2454 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2455 			break;
2456 		r = kvm_s390_vm_has_attr(kvm, &attr);
2457 		break;
2458 	}
2459 	case KVM_S390_GET_SKEYS: {
2460 		struct kvm_s390_skeys args;
2461 
2462 		r = -EFAULT;
2463 		if (copy_from_user(&args, argp,
2464 				   sizeof(struct kvm_s390_skeys)))
2465 			break;
2466 		r = kvm_s390_get_skeys(kvm, &args);
2467 		break;
2468 	}
2469 	case KVM_S390_SET_SKEYS: {
2470 		struct kvm_s390_skeys args;
2471 
2472 		r = -EFAULT;
2473 		if (copy_from_user(&args, argp,
2474 				   sizeof(struct kvm_s390_skeys)))
2475 			break;
2476 		r = kvm_s390_set_skeys(kvm, &args);
2477 		break;
2478 	}
2479 	case KVM_S390_GET_CMMA_BITS: {
2480 		struct kvm_s390_cmma_log args;
2481 
2482 		r = -EFAULT;
2483 		if (copy_from_user(&args, argp, sizeof(args)))
2484 			break;
2485 		mutex_lock(&kvm->slots_lock);
2486 		r = kvm_s390_get_cmma_bits(kvm, &args);
2487 		mutex_unlock(&kvm->slots_lock);
2488 		if (!r) {
2489 			r = copy_to_user(argp, &args, sizeof(args));
2490 			if (r)
2491 				r = -EFAULT;
2492 		}
2493 		break;
2494 	}
2495 	case KVM_S390_SET_CMMA_BITS: {
2496 		struct kvm_s390_cmma_log args;
2497 
2498 		r = -EFAULT;
2499 		if (copy_from_user(&args, argp, sizeof(args)))
2500 			break;
2501 		mutex_lock(&kvm->slots_lock);
2502 		r = kvm_s390_set_cmma_bits(kvm, &args);
2503 		mutex_unlock(&kvm->slots_lock);
2504 		break;
2505 	}
2506 	case KVM_S390_PV_COMMAND: {
2507 		struct kvm_pv_cmd args;
2508 
2509 		/* protvirt means user sigp */
2510 		kvm->arch.user_cpu_state_ctrl = 1;
2511 		r = 0;
2512 		if (!is_prot_virt_host()) {
2513 			r = -EINVAL;
2514 			break;
2515 		}
2516 		if (copy_from_user(&args, argp, sizeof(args))) {
2517 			r = -EFAULT;
2518 			break;
2519 		}
2520 		if (args.flags) {
2521 			r = -EINVAL;
2522 			break;
2523 		}
2524 		mutex_lock(&kvm->lock);
2525 		r = kvm_s390_handle_pv(kvm, &args);
2526 		mutex_unlock(&kvm->lock);
2527 		if (copy_to_user(argp, &args, sizeof(args))) {
2528 			r = -EFAULT;
2529 			break;
2530 		}
2531 		break;
2532 	}
2533 	default:
2534 		r = -ENOTTY;
2535 	}
2536 
2537 	return r;
2538 }
2539 
kvm_s390_apxa_installed(void)2540 static int kvm_s390_apxa_installed(void)
2541 {
2542 	struct ap_config_info info;
2543 
2544 	if (ap_instructions_available()) {
2545 		if (ap_qci(&info) == 0)
2546 			return info.apxa;
2547 	}
2548 
2549 	return 0;
2550 }
2551 
2552 /*
2553  * The format of the crypto control block (CRYCB) is specified in the 3 low
2554  * order bits of the CRYCB designation (CRYCBD) field as follows:
2555  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2556  *	     AP extended addressing (APXA) facility are installed.
2557  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2558  * Format 2: Both the APXA and MSAX3 facilities are installed
2559  */
kvm_s390_set_crycb_format(struct kvm * kvm)2560 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2561 {
2562 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2563 
2564 	/* Clear the CRYCB format bits - i.e., set format 0 by default */
2565 	kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2566 
2567 	/* Check whether MSAX3 is installed */
2568 	if (!test_kvm_facility(kvm, 76))
2569 		return;
2570 
2571 	if (kvm_s390_apxa_installed())
2572 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2573 	else
2574 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2575 }
2576 
2577 /*
2578  * kvm_arch_crypto_set_masks
2579  *
2580  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2581  *	 to be set.
2582  * @apm: the mask identifying the accessible AP adapters
2583  * @aqm: the mask identifying the accessible AP domains
2584  * @adm: the mask identifying the accessible AP control domains
2585  *
2586  * Set the masks that identify the adapters, domains and control domains to
2587  * which the KVM guest is granted access.
2588  *
2589  * Note: The kvm->lock mutex must be locked by the caller before invoking this
2590  *	 function.
2591  */
kvm_arch_crypto_set_masks(struct kvm * kvm,unsigned long * apm,unsigned long * aqm,unsigned long * adm)2592 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2593 			       unsigned long *aqm, unsigned long *adm)
2594 {
2595 	struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2596 
2597 	kvm_s390_vcpu_block_all(kvm);
2598 
2599 	switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2600 	case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2601 		memcpy(crycb->apcb1.apm, apm, 32);
2602 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2603 			 apm[0], apm[1], apm[2], apm[3]);
2604 		memcpy(crycb->apcb1.aqm, aqm, 32);
2605 		VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2606 			 aqm[0], aqm[1], aqm[2], aqm[3]);
2607 		memcpy(crycb->apcb1.adm, adm, 32);
2608 		VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2609 			 adm[0], adm[1], adm[2], adm[3]);
2610 		break;
2611 	case CRYCB_FORMAT1:
2612 	case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2613 		memcpy(crycb->apcb0.apm, apm, 8);
2614 		memcpy(crycb->apcb0.aqm, aqm, 2);
2615 		memcpy(crycb->apcb0.adm, adm, 2);
2616 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2617 			 apm[0], *((unsigned short *)aqm),
2618 			 *((unsigned short *)adm));
2619 		break;
2620 	default:	/* Can not happen */
2621 		break;
2622 	}
2623 
2624 	/* recreate the shadow crycb for each vcpu */
2625 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2626 	kvm_s390_vcpu_unblock_all(kvm);
2627 }
2628 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2629 
2630 /*
2631  * kvm_arch_crypto_clear_masks
2632  *
2633  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2634  *	 to be cleared.
2635  *
2636  * Clear the masks that identify the adapters, domains and control domains to
2637  * which the KVM guest is granted access.
2638  *
2639  * Note: The kvm->lock mutex must be locked by the caller before invoking this
2640  *	 function.
2641  */
kvm_arch_crypto_clear_masks(struct kvm * kvm)2642 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2643 {
2644 	kvm_s390_vcpu_block_all(kvm);
2645 
2646 	memset(&kvm->arch.crypto.crycb->apcb0, 0,
2647 	       sizeof(kvm->arch.crypto.crycb->apcb0));
2648 	memset(&kvm->arch.crypto.crycb->apcb1, 0,
2649 	       sizeof(kvm->arch.crypto.crycb->apcb1));
2650 
2651 	VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2652 	/* recreate the shadow crycb for each vcpu */
2653 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2654 	kvm_s390_vcpu_unblock_all(kvm);
2655 }
2656 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2657 
kvm_s390_get_initial_cpuid(void)2658 static u64 kvm_s390_get_initial_cpuid(void)
2659 {
2660 	struct cpuid cpuid;
2661 
2662 	get_cpu_id(&cpuid);
2663 	cpuid.version = 0xff;
2664 	return *((u64 *) &cpuid);
2665 }
2666 
kvm_s390_crypto_init(struct kvm * kvm)2667 static void kvm_s390_crypto_init(struct kvm *kvm)
2668 {
2669 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2670 	kvm_s390_set_crycb_format(kvm);
2671 	init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
2672 
2673 	if (!test_kvm_facility(kvm, 76))
2674 		return;
2675 
2676 	/* Enable AES/DEA protected key functions by default */
2677 	kvm->arch.crypto.aes_kw = 1;
2678 	kvm->arch.crypto.dea_kw = 1;
2679 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2680 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2681 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2682 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2683 }
2684 
sca_dispose(struct kvm * kvm)2685 static void sca_dispose(struct kvm *kvm)
2686 {
2687 	if (kvm->arch.use_esca)
2688 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2689 	else
2690 		free_page((unsigned long)(kvm->arch.sca));
2691 	kvm->arch.sca = NULL;
2692 }
2693 
kvm_arch_init_vm(struct kvm * kvm,unsigned long type)2694 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2695 {
2696 	gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
2697 	int i, rc;
2698 	char debug_name[16];
2699 	static unsigned long sca_offset;
2700 
2701 	rc = -EINVAL;
2702 #ifdef CONFIG_KVM_S390_UCONTROL
2703 	if (type & ~KVM_VM_S390_UCONTROL)
2704 		goto out_err;
2705 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2706 		goto out_err;
2707 #else
2708 	if (type)
2709 		goto out_err;
2710 #endif
2711 
2712 	rc = s390_enable_sie();
2713 	if (rc)
2714 		goto out_err;
2715 
2716 	rc = -ENOMEM;
2717 
2718 	if (!sclp.has_64bscao)
2719 		alloc_flags |= GFP_DMA;
2720 	rwlock_init(&kvm->arch.sca_lock);
2721 	/* start with basic SCA */
2722 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2723 	if (!kvm->arch.sca)
2724 		goto out_err;
2725 	mutex_lock(&kvm_lock);
2726 	sca_offset += 16;
2727 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2728 		sca_offset = 0;
2729 	kvm->arch.sca = (struct bsca_block *)
2730 			((char *) kvm->arch.sca + sca_offset);
2731 	mutex_unlock(&kvm_lock);
2732 
2733 	sprintf(debug_name, "kvm-%u", current->pid);
2734 
2735 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2736 	if (!kvm->arch.dbf)
2737 		goto out_err;
2738 
2739 	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2740 	kvm->arch.sie_page2 =
2741 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
2742 	if (!kvm->arch.sie_page2)
2743 		goto out_err;
2744 
2745 	kvm->arch.sie_page2->kvm = kvm;
2746 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2747 
2748 	for (i = 0; i < kvm_s390_fac_size(); i++) {
2749 		kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
2750 					      (kvm_s390_fac_base[i] |
2751 					       kvm_s390_fac_ext[i]);
2752 		kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
2753 					      kvm_s390_fac_base[i];
2754 	}
2755 	kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2756 
2757 	/* we are always in czam mode - even on pre z14 machines */
2758 	set_kvm_facility(kvm->arch.model.fac_mask, 138);
2759 	set_kvm_facility(kvm->arch.model.fac_list, 138);
2760 	/* we emulate STHYI in kvm */
2761 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
2762 	set_kvm_facility(kvm->arch.model.fac_list, 74);
2763 	if (MACHINE_HAS_TLB_GUEST) {
2764 		set_kvm_facility(kvm->arch.model.fac_mask, 147);
2765 		set_kvm_facility(kvm->arch.model.fac_list, 147);
2766 	}
2767 
2768 	if (css_general_characteristics.aiv && test_facility(65))
2769 		set_kvm_facility(kvm->arch.model.fac_mask, 65);
2770 
2771 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2772 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2773 
2774 	kvm_s390_crypto_init(kvm);
2775 
2776 	mutex_init(&kvm->arch.float_int.ais_lock);
2777 	spin_lock_init(&kvm->arch.float_int.lock);
2778 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
2779 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2780 	init_waitqueue_head(&kvm->arch.ipte_wq);
2781 	mutex_init(&kvm->arch.ipte_mutex);
2782 
2783 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2784 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
2785 
2786 	if (type & KVM_VM_S390_UCONTROL) {
2787 		kvm->arch.gmap = NULL;
2788 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2789 	} else {
2790 		if (sclp.hamax == U64_MAX)
2791 			kvm->arch.mem_limit = TASK_SIZE_MAX;
2792 		else
2793 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2794 						    sclp.hamax + 1);
2795 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2796 		if (!kvm->arch.gmap)
2797 			goto out_err;
2798 		kvm->arch.gmap->private = kvm;
2799 		kvm->arch.gmap->pfault_enabled = 0;
2800 	}
2801 
2802 	kvm->arch.use_pfmfi = sclp.has_pfmfi;
2803 	kvm->arch.use_skf = sclp.has_skey;
2804 	spin_lock_init(&kvm->arch.start_stop_lock);
2805 	kvm_s390_vsie_init(kvm);
2806 	if (use_gisa)
2807 		kvm_s390_gisa_init(kvm);
2808 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2809 
2810 	return 0;
2811 out_err:
2812 	free_page((unsigned long)kvm->arch.sie_page2);
2813 	debug_unregister(kvm->arch.dbf);
2814 	sca_dispose(kvm);
2815 	KVM_EVENT(3, "creation of vm failed: %d", rc);
2816 	return rc;
2817 }
2818 
kvm_arch_vcpu_destroy(struct kvm_vcpu * vcpu)2819 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2820 {
2821 	u16 rc, rrc;
2822 
2823 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2824 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2825 	kvm_s390_clear_local_irqs(vcpu);
2826 	kvm_clear_async_pf_completion_queue(vcpu);
2827 	if (!kvm_is_ucontrol(vcpu->kvm))
2828 		sca_del_vcpu(vcpu);
2829 
2830 	if (kvm_is_ucontrol(vcpu->kvm))
2831 		gmap_remove(vcpu->arch.gmap);
2832 
2833 	if (vcpu->kvm->arch.use_cmma)
2834 		kvm_s390_vcpu_unsetup_cmma(vcpu);
2835 	/* We can not hold the vcpu mutex here, we are already dying */
2836 	if (kvm_s390_pv_cpu_get_handle(vcpu))
2837 		kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2838 	free_page((unsigned long)(vcpu->arch.sie_block));
2839 }
2840 
kvm_free_vcpus(struct kvm * kvm)2841 static void kvm_free_vcpus(struct kvm *kvm)
2842 {
2843 	unsigned int i;
2844 	struct kvm_vcpu *vcpu;
2845 
2846 	kvm_for_each_vcpu(i, vcpu, kvm)
2847 		kvm_vcpu_destroy(vcpu);
2848 
2849 	mutex_lock(&kvm->lock);
2850 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2851 		kvm->vcpus[i] = NULL;
2852 
2853 	atomic_set(&kvm->online_vcpus, 0);
2854 	mutex_unlock(&kvm->lock);
2855 }
2856 
kvm_arch_destroy_vm(struct kvm * kvm)2857 void kvm_arch_destroy_vm(struct kvm *kvm)
2858 {
2859 	u16 rc, rrc;
2860 
2861 	kvm_free_vcpus(kvm);
2862 	sca_dispose(kvm);
2863 	kvm_s390_gisa_destroy(kvm);
2864 	/*
2865 	 * We are already at the end of life and kvm->lock is not taken.
2866 	 * This is ok as the file descriptor is closed by now and nobody
2867 	 * can mess with the pv state. To avoid lockdep_assert_held from
2868 	 * complaining we do not use kvm_s390_pv_is_protected.
2869 	 */
2870 	if (kvm_s390_pv_get_handle(kvm))
2871 		kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2872 	debug_unregister(kvm->arch.dbf);
2873 	free_page((unsigned long)kvm->arch.sie_page2);
2874 	if (!kvm_is_ucontrol(kvm))
2875 		gmap_remove(kvm->arch.gmap);
2876 	kvm_s390_destroy_adapters(kvm);
2877 	kvm_s390_clear_float_irqs(kvm);
2878 	kvm_s390_vsie_destroy(kvm);
2879 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2880 }
2881 
2882 /* Section: vcpu related */
__kvm_ucontrol_vcpu_init(struct kvm_vcpu * vcpu)2883 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2884 {
2885 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2886 	if (!vcpu->arch.gmap)
2887 		return -ENOMEM;
2888 	vcpu->arch.gmap->private = vcpu->kvm;
2889 
2890 	return 0;
2891 }
2892 
sca_del_vcpu(struct kvm_vcpu * vcpu)2893 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2894 {
2895 	if (!kvm_s390_use_sca_entries())
2896 		return;
2897 	read_lock(&vcpu->kvm->arch.sca_lock);
2898 	if (vcpu->kvm->arch.use_esca) {
2899 		struct esca_block *sca = vcpu->kvm->arch.sca;
2900 
2901 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2902 		sca->cpu[vcpu->vcpu_id].sda = 0;
2903 	} else {
2904 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2905 
2906 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2907 		sca->cpu[vcpu->vcpu_id].sda = 0;
2908 	}
2909 	read_unlock(&vcpu->kvm->arch.sca_lock);
2910 }
2911 
sca_add_vcpu(struct kvm_vcpu * vcpu)2912 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2913 {
2914 	if (!kvm_s390_use_sca_entries()) {
2915 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2916 
2917 		/* we still need the basic sca for the ipte control */
2918 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2919 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2920 		return;
2921 	}
2922 	read_lock(&vcpu->kvm->arch.sca_lock);
2923 	if (vcpu->kvm->arch.use_esca) {
2924 		struct esca_block *sca = vcpu->kvm->arch.sca;
2925 
2926 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2927 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2928 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2929 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2930 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2931 	} else {
2932 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2933 
2934 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2935 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2936 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2937 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2938 	}
2939 	read_unlock(&vcpu->kvm->arch.sca_lock);
2940 }
2941 
2942 /* Basic SCA to Extended SCA data copy routines */
sca_copy_entry(struct esca_entry * d,struct bsca_entry * s)2943 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2944 {
2945 	d->sda = s->sda;
2946 	d->sigp_ctrl.c = s->sigp_ctrl.c;
2947 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2948 }
2949 
sca_copy_b_to_e(struct esca_block * d,struct bsca_block * s)2950 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2951 {
2952 	int i;
2953 
2954 	d->ipte_control = s->ipte_control;
2955 	d->mcn[0] = s->mcn;
2956 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2957 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2958 }
2959 
sca_switch_to_extended(struct kvm * kvm)2960 static int sca_switch_to_extended(struct kvm *kvm)
2961 {
2962 	struct bsca_block *old_sca = kvm->arch.sca;
2963 	struct esca_block *new_sca;
2964 	struct kvm_vcpu *vcpu;
2965 	unsigned int vcpu_idx;
2966 	u32 scaol, scaoh;
2967 
2968 	if (kvm->arch.use_esca)
2969 		return 0;
2970 
2971 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
2972 	if (!new_sca)
2973 		return -ENOMEM;
2974 
2975 	scaoh = (u32)((u64)(new_sca) >> 32);
2976 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
2977 
2978 	kvm_s390_vcpu_block_all(kvm);
2979 	write_lock(&kvm->arch.sca_lock);
2980 
2981 	sca_copy_b_to_e(new_sca, old_sca);
2982 
2983 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2984 		vcpu->arch.sie_block->scaoh = scaoh;
2985 		vcpu->arch.sie_block->scaol = scaol;
2986 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2987 	}
2988 	kvm->arch.sca = new_sca;
2989 	kvm->arch.use_esca = 1;
2990 
2991 	write_unlock(&kvm->arch.sca_lock);
2992 	kvm_s390_vcpu_unblock_all(kvm);
2993 
2994 	free_page((unsigned long)old_sca);
2995 
2996 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2997 		 old_sca, kvm->arch.sca);
2998 	return 0;
2999 }
3000 
sca_can_add_vcpu(struct kvm * kvm,unsigned int id)3001 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
3002 {
3003 	int rc;
3004 
3005 	if (!kvm_s390_use_sca_entries()) {
3006 		if (id < KVM_MAX_VCPUS)
3007 			return true;
3008 		return false;
3009 	}
3010 	if (id < KVM_S390_BSCA_CPU_SLOTS)
3011 		return true;
3012 	if (!sclp.has_esca || !sclp.has_64bscao)
3013 		return false;
3014 
3015 	mutex_lock(&kvm->lock);
3016 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
3017 	mutex_unlock(&kvm->lock);
3018 
3019 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
3020 }
3021 
3022 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__start_cpu_timer_accounting(struct kvm_vcpu * vcpu)3023 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3024 {
3025 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
3026 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3027 	vcpu->arch.cputm_start = get_tod_clock_fast();
3028 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3029 }
3030 
3031 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__stop_cpu_timer_accounting(struct kvm_vcpu * vcpu)3032 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3033 {
3034 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
3035 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3036 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3037 	vcpu->arch.cputm_start = 0;
3038 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3039 }
3040 
3041 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__enable_cpu_timer_accounting(struct kvm_vcpu * vcpu)3042 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3043 {
3044 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
3045 	vcpu->arch.cputm_enabled = true;
3046 	__start_cpu_timer_accounting(vcpu);
3047 }
3048 
3049 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__disable_cpu_timer_accounting(struct kvm_vcpu * vcpu)3050 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3051 {
3052 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3053 	__stop_cpu_timer_accounting(vcpu);
3054 	vcpu->arch.cputm_enabled = false;
3055 }
3056 
enable_cpu_timer_accounting(struct kvm_vcpu * vcpu)3057 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3058 {
3059 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3060 	__enable_cpu_timer_accounting(vcpu);
3061 	preempt_enable();
3062 }
3063 
disable_cpu_timer_accounting(struct kvm_vcpu * vcpu)3064 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3065 {
3066 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3067 	__disable_cpu_timer_accounting(vcpu);
3068 	preempt_enable();
3069 }
3070 
3071 /* set the cpu timer - may only be called from the VCPU thread itself */
kvm_s390_set_cpu_timer(struct kvm_vcpu * vcpu,__u64 cputm)3072 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3073 {
3074 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3075 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3076 	if (vcpu->arch.cputm_enabled)
3077 		vcpu->arch.cputm_start = get_tod_clock_fast();
3078 	vcpu->arch.sie_block->cputm = cputm;
3079 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3080 	preempt_enable();
3081 }
3082 
3083 /* update and get the cpu timer - can also be called from other VCPU threads */
kvm_s390_get_cpu_timer(struct kvm_vcpu * vcpu)3084 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3085 {
3086 	unsigned int seq;
3087 	__u64 value;
3088 
3089 	if (unlikely(!vcpu->arch.cputm_enabled))
3090 		return vcpu->arch.sie_block->cputm;
3091 
3092 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3093 	do {
3094 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3095 		/*
3096 		 * If the writer would ever execute a read in the critical
3097 		 * section, e.g. in irq context, we have a deadlock.
3098 		 */
3099 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3100 		value = vcpu->arch.sie_block->cputm;
3101 		/* if cputm_start is 0, accounting is being started/stopped */
3102 		if (likely(vcpu->arch.cputm_start))
3103 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3104 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3105 	preempt_enable();
3106 	return value;
3107 }
3108 
kvm_arch_vcpu_load(struct kvm_vcpu * vcpu,int cpu)3109 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3110 {
3111 
3112 	gmap_enable(vcpu->arch.enabled_gmap);
3113 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3114 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3115 		__start_cpu_timer_accounting(vcpu);
3116 	vcpu->cpu = cpu;
3117 }
3118 
kvm_arch_vcpu_put(struct kvm_vcpu * vcpu)3119 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3120 {
3121 	vcpu->cpu = -1;
3122 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3123 		__stop_cpu_timer_accounting(vcpu);
3124 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3125 	vcpu->arch.enabled_gmap = gmap_get_enabled();
3126 	gmap_disable(vcpu->arch.enabled_gmap);
3127 
3128 }
3129 
kvm_arch_vcpu_postcreate(struct kvm_vcpu * vcpu)3130 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3131 {
3132 	mutex_lock(&vcpu->kvm->lock);
3133 	preempt_disable();
3134 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3135 	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3136 	preempt_enable();
3137 	mutex_unlock(&vcpu->kvm->lock);
3138 	if (!kvm_is_ucontrol(vcpu->kvm)) {
3139 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3140 		sca_add_vcpu(vcpu);
3141 	}
3142 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3143 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3144 	/* make vcpu_load load the right gmap on the first trigger */
3145 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3146 }
3147 
kvm_has_pckmo_subfunc(struct kvm * kvm,unsigned long nr)3148 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3149 {
3150 	if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3151 	    test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3152 		return true;
3153 	return false;
3154 }
3155 
kvm_has_pckmo_ecc(struct kvm * kvm)3156 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3157 {
3158 	/* At least one ECC subfunction must be present */
3159 	return kvm_has_pckmo_subfunc(kvm, 32) ||
3160 	       kvm_has_pckmo_subfunc(kvm, 33) ||
3161 	       kvm_has_pckmo_subfunc(kvm, 34) ||
3162 	       kvm_has_pckmo_subfunc(kvm, 40) ||
3163 	       kvm_has_pckmo_subfunc(kvm, 41);
3164 
3165 }
3166 
kvm_s390_vcpu_crypto_setup(struct kvm_vcpu * vcpu)3167 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3168 {
3169 	/*
3170 	 * If the AP instructions are not being interpreted and the MSAX3
3171 	 * facility is not configured for the guest, there is nothing to set up.
3172 	 */
3173 	if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3174 		return;
3175 
3176 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3177 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3178 	vcpu->arch.sie_block->eca &= ~ECA_APIE;
3179 	vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3180 
3181 	if (vcpu->kvm->arch.crypto.apie)
3182 		vcpu->arch.sie_block->eca |= ECA_APIE;
3183 
3184 	/* Set up protected key support */
3185 	if (vcpu->kvm->arch.crypto.aes_kw) {
3186 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3187 		/* ecc is also wrapped with AES key */
3188 		if (kvm_has_pckmo_ecc(vcpu->kvm))
3189 			vcpu->arch.sie_block->ecd |= ECD_ECC;
3190 	}
3191 
3192 	if (vcpu->kvm->arch.crypto.dea_kw)
3193 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3194 }
3195 
kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu * vcpu)3196 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3197 {
3198 	free_page(vcpu->arch.sie_block->cbrlo);
3199 	vcpu->arch.sie_block->cbrlo = 0;
3200 }
3201 
kvm_s390_vcpu_setup_cmma(struct kvm_vcpu * vcpu)3202 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3203 {
3204 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
3205 	if (!vcpu->arch.sie_block->cbrlo)
3206 		return -ENOMEM;
3207 	return 0;
3208 }
3209 
kvm_s390_vcpu_setup_model(struct kvm_vcpu * vcpu)3210 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3211 {
3212 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3213 
3214 	vcpu->arch.sie_block->ibc = model->ibc;
3215 	if (test_kvm_facility(vcpu->kvm, 7))
3216 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3217 }
3218 
kvm_s390_vcpu_setup(struct kvm_vcpu * vcpu)3219 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3220 {
3221 	int rc = 0;
3222 	u16 uvrc, uvrrc;
3223 
3224 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3225 						    CPUSTAT_SM |
3226 						    CPUSTAT_STOPPED);
3227 
3228 	if (test_kvm_facility(vcpu->kvm, 78))
3229 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3230 	else if (test_kvm_facility(vcpu->kvm, 8))
3231 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3232 
3233 	kvm_s390_vcpu_setup_model(vcpu);
3234 
3235 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3236 	if (MACHINE_HAS_ESOP)
3237 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3238 	if (test_kvm_facility(vcpu->kvm, 9))
3239 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
3240 	if (test_kvm_facility(vcpu->kvm, 73))
3241 		vcpu->arch.sie_block->ecb |= ECB_TE;
3242 	if (!kvm_is_ucontrol(vcpu->kvm))
3243 		vcpu->arch.sie_block->ecb |= ECB_SPECI;
3244 
3245 	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3246 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3247 	if (test_kvm_facility(vcpu->kvm, 130))
3248 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3249 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3250 	if (sclp.has_cei)
3251 		vcpu->arch.sie_block->eca |= ECA_CEI;
3252 	if (sclp.has_ib)
3253 		vcpu->arch.sie_block->eca |= ECA_IB;
3254 	if (sclp.has_siif)
3255 		vcpu->arch.sie_block->eca |= ECA_SII;
3256 	if (sclp.has_sigpif)
3257 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
3258 	if (test_kvm_facility(vcpu->kvm, 129)) {
3259 		vcpu->arch.sie_block->eca |= ECA_VX;
3260 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3261 	}
3262 	if (test_kvm_facility(vcpu->kvm, 139))
3263 		vcpu->arch.sie_block->ecd |= ECD_MEF;
3264 	if (test_kvm_facility(vcpu->kvm, 156))
3265 		vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3266 	if (vcpu->arch.sie_block->gd) {
3267 		vcpu->arch.sie_block->eca |= ECA_AIV;
3268 		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3269 			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3270 	}
3271 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3272 					| SDNXC;
3273 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3274 
3275 	if (sclp.has_kss)
3276 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3277 	else
3278 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3279 
3280 	if (vcpu->kvm->arch.use_cmma) {
3281 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
3282 		if (rc)
3283 			return rc;
3284 	}
3285 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3286 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3287 
3288 	vcpu->arch.sie_block->hpid = HPID_KVM;
3289 
3290 	kvm_s390_vcpu_crypto_setup(vcpu);
3291 
3292 	mutex_lock(&vcpu->kvm->lock);
3293 	if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3294 		rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3295 		if (rc)
3296 			kvm_s390_vcpu_unsetup_cmma(vcpu);
3297 	}
3298 	mutex_unlock(&vcpu->kvm->lock);
3299 
3300 	return rc;
3301 }
3302 
kvm_arch_vcpu_precreate(struct kvm * kvm,unsigned int id)3303 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3304 {
3305 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3306 		return -EINVAL;
3307 	return 0;
3308 }
3309 
kvm_arch_vcpu_create(struct kvm_vcpu * vcpu)3310 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3311 {
3312 	struct sie_page *sie_page;
3313 	int rc;
3314 
3315 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3316 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3317 	if (!sie_page)
3318 		return -ENOMEM;
3319 
3320 	vcpu->arch.sie_block = &sie_page->sie_block;
3321 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3322 
3323 	/* the real guest size will always be smaller than msl */
3324 	vcpu->arch.sie_block->mso = 0;
3325 	vcpu->arch.sie_block->msl = sclp.hamax;
3326 
3327 	vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3328 	spin_lock_init(&vcpu->arch.local_int.lock);
3329 	vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3330 	if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3331 		vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3332 	seqcount_init(&vcpu->arch.cputm_seqcount);
3333 
3334 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3335 	kvm_clear_async_pf_completion_queue(vcpu);
3336 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3337 				    KVM_SYNC_GPRS |
3338 				    KVM_SYNC_ACRS |
3339 				    KVM_SYNC_CRS |
3340 				    KVM_SYNC_ARCH0 |
3341 				    KVM_SYNC_PFAULT |
3342 				    KVM_SYNC_DIAG318;
3343 	kvm_s390_set_prefix(vcpu, 0);
3344 	if (test_kvm_facility(vcpu->kvm, 64))
3345 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3346 	if (test_kvm_facility(vcpu->kvm, 82))
3347 		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3348 	if (test_kvm_facility(vcpu->kvm, 133))
3349 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3350 	if (test_kvm_facility(vcpu->kvm, 156))
3351 		vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3352 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
3353 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3354 	 */
3355 	if (MACHINE_HAS_VX)
3356 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3357 	else
3358 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3359 
3360 	if (kvm_is_ucontrol(vcpu->kvm)) {
3361 		rc = __kvm_ucontrol_vcpu_init(vcpu);
3362 		if (rc)
3363 			goto out_free_sie_block;
3364 	}
3365 
3366 	VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3367 		 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3368 	trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3369 
3370 	rc = kvm_s390_vcpu_setup(vcpu);
3371 	if (rc)
3372 		goto out_ucontrol_uninit;
3373 	return 0;
3374 
3375 out_ucontrol_uninit:
3376 	if (kvm_is_ucontrol(vcpu->kvm))
3377 		gmap_remove(vcpu->arch.gmap);
3378 out_free_sie_block:
3379 	free_page((unsigned long)(vcpu->arch.sie_block));
3380 	return rc;
3381 }
3382 
kvm_arch_vcpu_runnable(struct kvm_vcpu * vcpu)3383 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3384 {
3385 	clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
3386 	return kvm_s390_vcpu_has_irq(vcpu, 0);
3387 }
3388 
kvm_arch_vcpu_in_kernel(struct kvm_vcpu * vcpu)3389 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3390 {
3391 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3392 }
3393 
kvm_s390_vcpu_block(struct kvm_vcpu * vcpu)3394 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3395 {
3396 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3397 	exit_sie(vcpu);
3398 }
3399 
kvm_s390_vcpu_unblock(struct kvm_vcpu * vcpu)3400 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3401 {
3402 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3403 }
3404 
kvm_s390_vcpu_request(struct kvm_vcpu * vcpu)3405 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3406 {
3407 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3408 	exit_sie(vcpu);
3409 }
3410 
kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu * vcpu)3411 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3412 {
3413 	return atomic_read(&vcpu->arch.sie_block->prog20) &
3414 	       (PROG_BLOCK_SIE | PROG_REQUEST);
3415 }
3416 
kvm_s390_vcpu_request_handled(struct kvm_vcpu * vcpu)3417 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3418 {
3419 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3420 }
3421 
3422 /*
3423  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3424  * If the CPU is not running (e.g. waiting as idle) the function will
3425  * return immediately. */
exit_sie(struct kvm_vcpu * vcpu)3426 void exit_sie(struct kvm_vcpu *vcpu)
3427 {
3428 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3429 	kvm_s390_vsie_kick(vcpu);
3430 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3431 		cpu_relax();
3432 }
3433 
3434 /* Kick a guest cpu out of SIE to process a request synchronously */
kvm_s390_sync_request(int req,struct kvm_vcpu * vcpu)3435 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3436 {
3437 	kvm_make_request(req, vcpu);
3438 	kvm_s390_vcpu_request(vcpu);
3439 }
3440 
kvm_gmap_notifier(struct gmap * gmap,unsigned long start,unsigned long end)3441 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3442 			      unsigned long end)
3443 {
3444 	struct kvm *kvm = gmap->private;
3445 	struct kvm_vcpu *vcpu;
3446 	unsigned long prefix;
3447 	int i;
3448 
3449 	if (gmap_is_shadow(gmap))
3450 		return;
3451 	if (start >= 1UL << 31)
3452 		/* We are only interested in prefix pages */
3453 		return;
3454 	kvm_for_each_vcpu(i, vcpu, kvm) {
3455 		/* match against both prefix pages */
3456 		prefix = kvm_s390_get_prefix(vcpu);
3457 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3458 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3459 				   start, end);
3460 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3461 		}
3462 	}
3463 }
3464 
kvm_arch_no_poll(struct kvm_vcpu * vcpu)3465 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3466 {
3467 	/* do not poll with more than halt_poll_max_steal percent of steal time */
3468 	if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3469 	    READ_ONCE(halt_poll_max_steal)) {
3470 		vcpu->stat.halt_no_poll_steal++;
3471 		return true;
3472 	}
3473 	return false;
3474 }
3475 
kvm_arch_vcpu_should_kick(struct kvm_vcpu * vcpu)3476 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3477 {
3478 	/* kvm common code refers to this, but never calls it */
3479 	BUG();
3480 	return 0;
3481 }
3482 
kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu * vcpu,struct kvm_one_reg * reg)3483 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3484 					   struct kvm_one_reg *reg)
3485 {
3486 	int r = -EINVAL;
3487 
3488 	switch (reg->id) {
3489 	case KVM_REG_S390_TODPR:
3490 		r = put_user(vcpu->arch.sie_block->todpr,
3491 			     (u32 __user *)reg->addr);
3492 		break;
3493 	case KVM_REG_S390_EPOCHDIFF:
3494 		r = put_user(vcpu->arch.sie_block->epoch,
3495 			     (u64 __user *)reg->addr);
3496 		break;
3497 	case KVM_REG_S390_CPU_TIMER:
3498 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
3499 			     (u64 __user *)reg->addr);
3500 		break;
3501 	case KVM_REG_S390_CLOCK_COMP:
3502 		r = put_user(vcpu->arch.sie_block->ckc,
3503 			     (u64 __user *)reg->addr);
3504 		break;
3505 	case KVM_REG_S390_PFTOKEN:
3506 		r = put_user(vcpu->arch.pfault_token,
3507 			     (u64 __user *)reg->addr);
3508 		break;
3509 	case KVM_REG_S390_PFCOMPARE:
3510 		r = put_user(vcpu->arch.pfault_compare,
3511 			     (u64 __user *)reg->addr);
3512 		break;
3513 	case KVM_REG_S390_PFSELECT:
3514 		r = put_user(vcpu->arch.pfault_select,
3515 			     (u64 __user *)reg->addr);
3516 		break;
3517 	case KVM_REG_S390_PP:
3518 		r = put_user(vcpu->arch.sie_block->pp,
3519 			     (u64 __user *)reg->addr);
3520 		break;
3521 	case KVM_REG_S390_GBEA:
3522 		r = put_user(vcpu->arch.sie_block->gbea,
3523 			     (u64 __user *)reg->addr);
3524 		break;
3525 	default:
3526 		break;
3527 	}
3528 
3529 	return r;
3530 }
3531 
kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu * vcpu,struct kvm_one_reg * reg)3532 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3533 					   struct kvm_one_reg *reg)
3534 {
3535 	int r = -EINVAL;
3536 	__u64 val;
3537 
3538 	switch (reg->id) {
3539 	case KVM_REG_S390_TODPR:
3540 		r = get_user(vcpu->arch.sie_block->todpr,
3541 			     (u32 __user *)reg->addr);
3542 		break;
3543 	case KVM_REG_S390_EPOCHDIFF:
3544 		r = get_user(vcpu->arch.sie_block->epoch,
3545 			     (u64 __user *)reg->addr);
3546 		break;
3547 	case KVM_REG_S390_CPU_TIMER:
3548 		r = get_user(val, (u64 __user *)reg->addr);
3549 		if (!r)
3550 			kvm_s390_set_cpu_timer(vcpu, val);
3551 		break;
3552 	case KVM_REG_S390_CLOCK_COMP:
3553 		r = get_user(vcpu->arch.sie_block->ckc,
3554 			     (u64 __user *)reg->addr);
3555 		break;
3556 	case KVM_REG_S390_PFTOKEN:
3557 		r = get_user(vcpu->arch.pfault_token,
3558 			     (u64 __user *)reg->addr);
3559 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3560 			kvm_clear_async_pf_completion_queue(vcpu);
3561 		break;
3562 	case KVM_REG_S390_PFCOMPARE:
3563 		r = get_user(vcpu->arch.pfault_compare,
3564 			     (u64 __user *)reg->addr);
3565 		break;
3566 	case KVM_REG_S390_PFSELECT:
3567 		r = get_user(vcpu->arch.pfault_select,
3568 			     (u64 __user *)reg->addr);
3569 		break;
3570 	case KVM_REG_S390_PP:
3571 		r = get_user(vcpu->arch.sie_block->pp,
3572 			     (u64 __user *)reg->addr);
3573 		break;
3574 	case KVM_REG_S390_GBEA:
3575 		r = get_user(vcpu->arch.sie_block->gbea,
3576 			     (u64 __user *)reg->addr);
3577 		break;
3578 	default:
3579 		break;
3580 	}
3581 
3582 	return r;
3583 }
3584 
kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu * vcpu)3585 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3586 {
3587 	vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3588 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3589 	memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3590 
3591 	kvm_clear_async_pf_completion_queue(vcpu);
3592 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3593 		kvm_s390_vcpu_stop(vcpu);
3594 	kvm_s390_clear_local_irqs(vcpu);
3595 }
3596 
kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu * vcpu)3597 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3598 {
3599 	/* Initial reset is a superset of the normal reset */
3600 	kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3601 
3602 	/*
3603 	 * This equals initial cpu reset in pop, but we don't switch to ESA.
3604 	 * We do not only reset the internal data, but also ...
3605 	 */
3606 	vcpu->arch.sie_block->gpsw.mask = 0;
3607 	vcpu->arch.sie_block->gpsw.addr = 0;
3608 	kvm_s390_set_prefix(vcpu, 0);
3609 	kvm_s390_set_cpu_timer(vcpu, 0);
3610 	vcpu->arch.sie_block->ckc = 0;
3611 	memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3612 	vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3613 	vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3614 
3615 	/* ... the data in sync regs */
3616 	memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3617 	vcpu->run->s.regs.ckc = 0;
3618 	vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3619 	vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3620 	vcpu->run->psw_addr = 0;
3621 	vcpu->run->psw_mask = 0;
3622 	vcpu->run->s.regs.todpr = 0;
3623 	vcpu->run->s.regs.cputm = 0;
3624 	vcpu->run->s.regs.ckc = 0;
3625 	vcpu->run->s.regs.pp = 0;
3626 	vcpu->run->s.regs.gbea = 1;
3627 	vcpu->run->s.regs.fpc = 0;
3628 	/*
3629 	 * Do not reset these registers in the protected case, as some of
3630 	 * them are overlayed and they are not accessible in this case
3631 	 * anyway.
3632 	 */
3633 	if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3634 		vcpu->arch.sie_block->gbea = 1;
3635 		vcpu->arch.sie_block->pp = 0;
3636 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3637 		vcpu->arch.sie_block->todpr = 0;
3638 	}
3639 }
3640 
kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu * vcpu)3641 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3642 {
3643 	struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3644 
3645 	/* Clear reset is a superset of the initial reset */
3646 	kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3647 
3648 	memset(&regs->gprs, 0, sizeof(regs->gprs));
3649 	memset(&regs->vrs, 0, sizeof(regs->vrs));
3650 	memset(&regs->acrs, 0, sizeof(regs->acrs));
3651 	memset(&regs->gscb, 0, sizeof(regs->gscb));
3652 
3653 	regs->etoken = 0;
3654 	regs->etoken_extension = 0;
3655 }
3656 
kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu * vcpu,struct kvm_regs * regs)3657 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3658 {
3659 	vcpu_load(vcpu);
3660 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3661 	vcpu_put(vcpu);
3662 	return 0;
3663 }
3664 
kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu * vcpu,struct kvm_regs * regs)3665 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3666 {
3667 	vcpu_load(vcpu);
3668 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3669 	vcpu_put(vcpu);
3670 	return 0;
3671 }
3672 
kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu * vcpu,struct kvm_sregs * sregs)3673 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3674 				  struct kvm_sregs *sregs)
3675 {
3676 	vcpu_load(vcpu);
3677 
3678 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3679 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3680 
3681 	vcpu_put(vcpu);
3682 	return 0;
3683 }
3684 
kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu * vcpu,struct kvm_sregs * sregs)3685 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3686 				  struct kvm_sregs *sregs)
3687 {
3688 	vcpu_load(vcpu);
3689 
3690 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3691 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3692 
3693 	vcpu_put(vcpu);
3694 	return 0;
3695 }
3696 
kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu * vcpu,struct kvm_fpu * fpu)3697 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3698 {
3699 	int ret = 0;
3700 
3701 	vcpu_load(vcpu);
3702 
3703 	vcpu->run->s.regs.fpc = fpu->fpc;
3704 	if (MACHINE_HAS_VX)
3705 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3706 				 (freg_t *) fpu->fprs);
3707 	else
3708 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3709 
3710 	vcpu_put(vcpu);
3711 	return ret;
3712 }
3713 
kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu * vcpu,struct kvm_fpu * fpu)3714 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3715 {
3716 	vcpu_load(vcpu);
3717 
3718 	/* make sure we have the latest values */
3719 	save_fpu_regs();
3720 	if (MACHINE_HAS_VX)
3721 		convert_vx_to_fp((freg_t *) fpu->fprs,
3722 				 (__vector128 *) vcpu->run->s.regs.vrs);
3723 	else
3724 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3725 	fpu->fpc = vcpu->run->s.regs.fpc;
3726 
3727 	vcpu_put(vcpu);
3728 	return 0;
3729 }
3730 
kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu * vcpu,psw_t psw)3731 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3732 {
3733 	int rc = 0;
3734 
3735 	if (!is_vcpu_stopped(vcpu))
3736 		rc = -EBUSY;
3737 	else {
3738 		vcpu->run->psw_mask = psw.mask;
3739 		vcpu->run->psw_addr = psw.addr;
3740 	}
3741 	return rc;
3742 }
3743 
kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu * vcpu,struct kvm_translation * tr)3744 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3745 				  struct kvm_translation *tr)
3746 {
3747 	return -EINVAL; /* not implemented yet */
3748 }
3749 
3750 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3751 			      KVM_GUESTDBG_USE_HW_BP | \
3752 			      KVM_GUESTDBG_ENABLE)
3753 
kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu * vcpu,struct kvm_guest_debug * dbg)3754 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3755 					struct kvm_guest_debug *dbg)
3756 {
3757 	int rc = 0;
3758 
3759 	vcpu_load(vcpu);
3760 
3761 	vcpu->guest_debug = 0;
3762 	kvm_s390_clear_bp_data(vcpu);
3763 
3764 	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3765 		rc = -EINVAL;
3766 		goto out;
3767 	}
3768 	if (!sclp.has_gpere) {
3769 		rc = -EINVAL;
3770 		goto out;
3771 	}
3772 
3773 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
3774 		vcpu->guest_debug = dbg->control;
3775 		/* enforce guest PER */
3776 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3777 
3778 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3779 			rc = kvm_s390_import_bp_data(vcpu, dbg);
3780 	} else {
3781 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3782 		vcpu->arch.guestdbg.last_bp = 0;
3783 	}
3784 
3785 	if (rc) {
3786 		vcpu->guest_debug = 0;
3787 		kvm_s390_clear_bp_data(vcpu);
3788 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3789 	}
3790 
3791 out:
3792 	vcpu_put(vcpu);
3793 	return rc;
3794 }
3795 
kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu * vcpu,struct kvm_mp_state * mp_state)3796 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3797 				    struct kvm_mp_state *mp_state)
3798 {
3799 	int ret;
3800 
3801 	vcpu_load(vcpu);
3802 
3803 	/* CHECK_STOP and LOAD are not supported yet */
3804 	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3805 				      KVM_MP_STATE_OPERATING;
3806 
3807 	vcpu_put(vcpu);
3808 	return ret;
3809 }
3810 
kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu * vcpu,struct kvm_mp_state * mp_state)3811 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3812 				    struct kvm_mp_state *mp_state)
3813 {
3814 	int rc = 0;
3815 
3816 	vcpu_load(vcpu);
3817 
3818 	/* user space knows about this interface - let it control the state */
3819 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3820 
3821 	switch (mp_state->mp_state) {
3822 	case KVM_MP_STATE_STOPPED:
3823 		rc = kvm_s390_vcpu_stop(vcpu);
3824 		break;
3825 	case KVM_MP_STATE_OPERATING:
3826 		rc = kvm_s390_vcpu_start(vcpu);
3827 		break;
3828 	case KVM_MP_STATE_LOAD:
3829 		if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3830 			rc = -ENXIO;
3831 			break;
3832 		}
3833 		rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3834 		break;
3835 	case KVM_MP_STATE_CHECK_STOP:
3836 		fallthrough;	/* CHECK_STOP and LOAD are not supported yet */
3837 	default:
3838 		rc = -ENXIO;
3839 	}
3840 
3841 	vcpu_put(vcpu);
3842 	return rc;
3843 }
3844 
ibs_enabled(struct kvm_vcpu * vcpu)3845 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3846 {
3847 	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3848 }
3849 
kvm_s390_handle_requests(struct kvm_vcpu * vcpu)3850 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3851 {
3852 retry:
3853 	kvm_s390_vcpu_request_handled(vcpu);
3854 	if (!kvm_request_pending(vcpu))
3855 		return 0;
3856 	/*
3857 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3858 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3859 	 * This ensures that the ipte instruction for this request has
3860 	 * already finished. We might race against a second unmapper that
3861 	 * wants to set the blocking bit. Lets just retry the request loop.
3862 	 */
3863 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3864 		int rc;
3865 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
3866 					  kvm_s390_get_prefix(vcpu),
3867 					  PAGE_SIZE * 2, PROT_WRITE);
3868 		if (rc) {
3869 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3870 			return rc;
3871 		}
3872 		goto retry;
3873 	}
3874 
3875 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3876 		vcpu->arch.sie_block->ihcpu = 0xffff;
3877 		goto retry;
3878 	}
3879 
3880 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3881 		if (!ibs_enabled(vcpu)) {
3882 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3883 			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3884 		}
3885 		goto retry;
3886 	}
3887 
3888 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3889 		if (ibs_enabled(vcpu)) {
3890 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3891 			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3892 		}
3893 		goto retry;
3894 	}
3895 
3896 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3897 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3898 		goto retry;
3899 	}
3900 
3901 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3902 		/*
3903 		 * Disable CMM virtualization; we will emulate the ESSA
3904 		 * instruction manually, in order to provide additional
3905 		 * functionalities needed for live migration.
3906 		 */
3907 		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3908 		goto retry;
3909 	}
3910 
3911 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3912 		/*
3913 		 * Re-enable CMM virtualization if CMMA is available and
3914 		 * CMM has been used.
3915 		 */
3916 		if ((vcpu->kvm->arch.use_cmma) &&
3917 		    (vcpu->kvm->mm->context.uses_cmm))
3918 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3919 		goto retry;
3920 	}
3921 
3922 	/* nothing to do, just clear the request */
3923 	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3924 	/* we left the vsie handler, nothing to do, just clear the request */
3925 	kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3926 
3927 	return 0;
3928 }
3929 
__kvm_s390_set_tod_clock(struct kvm * kvm,const struct kvm_s390_vm_tod_clock * gtod)3930 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
3931 {
3932 	struct kvm_vcpu *vcpu;
3933 	union tod_clock clk;
3934 	int i;
3935 
3936 	preempt_disable();
3937 
3938 	store_tod_clock_ext(&clk);
3939 
3940 	kvm->arch.epoch = gtod->tod - clk.tod;
3941 	kvm->arch.epdx = 0;
3942 	if (test_kvm_facility(kvm, 139)) {
3943 		kvm->arch.epdx = gtod->epoch_idx - clk.ei;
3944 		if (kvm->arch.epoch > gtod->tod)
3945 			kvm->arch.epdx -= 1;
3946 	}
3947 
3948 	kvm_s390_vcpu_block_all(kvm);
3949 	kvm_for_each_vcpu(i, vcpu, kvm) {
3950 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3951 		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3952 	}
3953 
3954 	kvm_s390_vcpu_unblock_all(kvm);
3955 	preempt_enable();
3956 }
3957 
kvm_s390_try_set_tod_clock(struct kvm * kvm,const struct kvm_s390_vm_tod_clock * gtod)3958 int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
3959 {
3960 	if (!mutex_trylock(&kvm->lock))
3961 		return 0;
3962 	__kvm_s390_set_tod_clock(kvm, gtod);
3963 	mutex_unlock(&kvm->lock);
3964 	return 1;
3965 }
3966 
3967 /**
3968  * kvm_arch_fault_in_page - fault-in guest page if necessary
3969  * @vcpu: The corresponding virtual cpu
3970  * @gpa: Guest physical address
3971  * @writable: Whether the page should be writable or not
3972  *
3973  * Make sure that a guest page has been faulted-in on the host.
3974  *
3975  * Return: Zero on success, negative error code otherwise.
3976  */
kvm_arch_fault_in_page(struct kvm_vcpu * vcpu,gpa_t gpa,int writable)3977 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3978 {
3979 	return gmap_fault(vcpu->arch.gmap, gpa,
3980 			  writable ? FAULT_FLAG_WRITE : 0);
3981 }
3982 
__kvm_inject_pfault_token(struct kvm_vcpu * vcpu,bool start_token,unsigned long token)3983 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3984 				      unsigned long token)
3985 {
3986 	struct kvm_s390_interrupt inti;
3987 	struct kvm_s390_irq irq;
3988 
3989 	if (start_token) {
3990 		irq.u.ext.ext_params2 = token;
3991 		irq.type = KVM_S390_INT_PFAULT_INIT;
3992 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3993 	} else {
3994 		inti.type = KVM_S390_INT_PFAULT_DONE;
3995 		inti.parm64 = token;
3996 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3997 	}
3998 }
3999 
kvm_arch_async_page_not_present(struct kvm_vcpu * vcpu,struct kvm_async_pf * work)4000 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
4001 				     struct kvm_async_pf *work)
4002 {
4003 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
4004 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
4005 
4006 	return true;
4007 }
4008 
kvm_arch_async_page_present(struct kvm_vcpu * vcpu,struct kvm_async_pf * work)4009 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
4010 				 struct kvm_async_pf *work)
4011 {
4012 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
4013 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
4014 }
4015 
kvm_arch_async_page_ready(struct kvm_vcpu * vcpu,struct kvm_async_pf * work)4016 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
4017 			       struct kvm_async_pf *work)
4018 {
4019 	/* s390 will always inject the page directly */
4020 }
4021 
kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu * vcpu)4022 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
4023 {
4024 	/*
4025 	 * s390 will always inject the page directly,
4026 	 * but we still want check_async_completion to cleanup
4027 	 */
4028 	return true;
4029 }
4030 
kvm_arch_setup_async_pf(struct kvm_vcpu * vcpu)4031 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
4032 {
4033 	hva_t hva;
4034 	struct kvm_arch_async_pf arch;
4035 
4036 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4037 		return false;
4038 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
4039 	    vcpu->arch.pfault_compare)
4040 		return false;
4041 	if (psw_extint_disabled(vcpu))
4042 		return false;
4043 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
4044 		return false;
4045 	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
4046 		return false;
4047 	if (!vcpu->arch.gmap->pfault_enabled)
4048 		return false;
4049 
4050 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
4051 	hva += current->thread.gmap_addr & ~PAGE_MASK;
4052 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4053 		return false;
4054 
4055 	return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
4056 }
4057 
vcpu_pre_run(struct kvm_vcpu * vcpu)4058 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4059 {
4060 	int rc, cpuflags;
4061 
4062 	/*
4063 	 * On s390 notifications for arriving pages will be delivered directly
4064 	 * to the guest but the house keeping for completed pfaults is
4065 	 * handled outside the worker.
4066 	 */
4067 	kvm_check_async_pf_completion(vcpu);
4068 
4069 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4070 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4071 
4072 	if (need_resched())
4073 		schedule();
4074 
4075 	if (!kvm_is_ucontrol(vcpu->kvm)) {
4076 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
4077 		if (rc)
4078 			return rc;
4079 	}
4080 
4081 	rc = kvm_s390_handle_requests(vcpu);
4082 	if (rc)
4083 		return rc;
4084 
4085 	if (guestdbg_enabled(vcpu)) {
4086 		kvm_s390_backup_guest_per_regs(vcpu);
4087 		kvm_s390_patch_guest_per_regs(vcpu);
4088 	}
4089 
4090 	clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
4091 
4092 	vcpu->arch.sie_block->icptcode = 0;
4093 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4094 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4095 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
4096 
4097 	return 0;
4098 }
4099 
vcpu_post_run_fault_in_sie(struct kvm_vcpu * vcpu)4100 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4101 {
4102 	struct kvm_s390_pgm_info pgm_info = {
4103 		.code = PGM_ADDRESSING,
4104 	};
4105 	u8 opcode, ilen;
4106 	int rc;
4107 
4108 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4109 	trace_kvm_s390_sie_fault(vcpu);
4110 
4111 	/*
4112 	 * We want to inject an addressing exception, which is defined as a
4113 	 * suppressing or terminating exception. However, since we came here
4114 	 * by a DAT access exception, the PSW still points to the faulting
4115 	 * instruction since DAT exceptions are nullifying. So we've got
4116 	 * to look up the current opcode to get the length of the instruction
4117 	 * to be able to forward the PSW.
4118 	 */
4119 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4120 	ilen = insn_length(opcode);
4121 	if (rc < 0) {
4122 		return rc;
4123 	} else if (rc) {
4124 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
4125 		 * Forward by arbitrary ilc, injection will take care of
4126 		 * nullification if necessary.
4127 		 */
4128 		pgm_info = vcpu->arch.pgm;
4129 		ilen = 4;
4130 	}
4131 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4132 	kvm_s390_forward_psw(vcpu, ilen);
4133 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4134 }
4135 
vcpu_post_run(struct kvm_vcpu * vcpu,int exit_reason)4136 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4137 {
4138 	struct mcck_volatile_info *mcck_info;
4139 	struct sie_page *sie_page;
4140 
4141 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4142 		   vcpu->arch.sie_block->icptcode);
4143 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4144 
4145 	if (guestdbg_enabled(vcpu))
4146 		kvm_s390_restore_guest_per_regs(vcpu);
4147 
4148 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4149 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4150 
4151 	if (exit_reason == -EINTR) {
4152 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
4153 		sie_page = container_of(vcpu->arch.sie_block,
4154 					struct sie_page, sie_block);
4155 		mcck_info = &sie_page->mcck_info;
4156 		kvm_s390_reinject_machine_check(vcpu, mcck_info);
4157 		return 0;
4158 	}
4159 
4160 	if (vcpu->arch.sie_block->icptcode > 0) {
4161 		int rc = kvm_handle_sie_intercept(vcpu);
4162 
4163 		if (rc != -EOPNOTSUPP)
4164 			return rc;
4165 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4166 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4167 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4168 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4169 		return -EREMOTE;
4170 	} else if (exit_reason != -EFAULT) {
4171 		vcpu->stat.exit_null++;
4172 		return 0;
4173 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
4174 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4175 		vcpu->run->s390_ucontrol.trans_exc_code =
4176 						current->thread.gmap_addr;
4177 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
4178 		return -EREMOTE;
4179 	} else if (current->thread.gmap_pfault) {
4180 		trace_kvm_s390_major_guest_pfault(vcpu);
4181 		current->thread.gmap_pfault = 0;
4182 		if (kvm_arch_setup_async_pf(vcpu))
4183 			return 0;
4184 		vcpu->stat.pfault_sync++;
4185 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4186 	}
4187 	return vcpu_post_run_fault_in_sie(vcpu);
4188 }
4189 
4190 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
__vcpu_run(struct kvm_vcpu * vcpu)4191 static int __vcpu_run(struct kvm_vcpu *vcpu)
4192 {
4193 	int rc, exit_reason;
4194 	struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4195 
4196 	/*
4197 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4198 	 * ning the guest), so that memslots (and other stuff) are protected
4199 	 */
4200 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4201 
4202 	do {
4203 		rc = vcpu_pre_run(vcpu);
4204 		if (rc)
4205 			break;
4206 
4207 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4208 		/*
4209 		 * As PF_VCPU will be used in fault handler, between
4210 		 * guest_enter and guest_exit should be no uaccess.
4211 		 */
4212 		local_irq_disable();
4213 		guest_enter_irqoff();
4214 		__disable_cpu_timer_accounting(vcpu);
4215 		local_irq_enable();
4216 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4217 			memcpy(sie_page->pv_grregs,
4218 			       vcpu->run->s.regs.gprs,
4219 			       sizeof(sie_page->pv_grregs));
4220 		}
4221 		if (test_cpu_flag(CIF_FPU))
4222 			load_fpu_regs();
4223 		exit_reason = sie64a(vcpu->arch.sie_block,
4224 				     vcpu->run->s.regs.gprs);
4225 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4226 			memcpy(vcpu->run->s.regs.gprs,
4227 			       sie_page->pv_grregs,
4228 			       sizeof(sie_page->pv_grregs));
4229 			/*
4230 			 * We're not allowed to inject interrupts on intercepts
4231 			 * that leave the guest state in an "in-between" state
4232 			 * where the next SIE entry will do a continuation.
4233 			 * Fence interrupts in our "internal" PSW.
4234 			 */
4235 			if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4236 			    vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4237 				vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4238 			}
4239 		}
4240 		local_irq_disable();
4241 		__enable_cpu_timer_accounting(vcpu);
4242 		guest_exit_irqoff();
4243 		local_irq_enable();
4244 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4245 
4246 		rc = vcpu_post_run(vcpu, exit_reason);
4247 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4248 
4249 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4250 	return rc;
4251 }
4252 
sync_regs_fmt2(struct kvm_vcpu * vcpu)4253 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4254 {
4255 	struct kvm_run *kvm_run = vcpu->run;
4256 	struct runtime_instr_cb *riccb;
4257 	struct gs_cb *gscb;
4258 
4259 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4260 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4261 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4262 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4263 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4264 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4265 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4266 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4267 	}
4268 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4269 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4270 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4271 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4272 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4273 			kvm_clear_async_pf_completion_queue(vcpu);
4274 	}
4275 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4276 		vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4277 		vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4278 	}
4279 	/*
4280 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
4281 	 * we should enable RI here instead of doing the lazy enablement.
4282 	 */
4283 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4284 	    test_kvm_facility(vcpu->kvm, 64) &&
4285 	    riccb->v &&
4286 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4287 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4288 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4289 	}
4290 	/*
4291 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
4292 	 * we should enable GS here instead of doing the lazy enablement.
4293 	 */
4294 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4295 	    test_kvm_facility(vcpu->kvm, 133) &&
4296 	    gscb->gssm &&
4297 	    !vcpu->arch.gs_enabled) {
4298 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4299 		vcpu->arch.sie_block->ecb |= ECB_GS;
4300 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4301 		vcpu->arch.gs_enabled = 1;
4302 	}
4303 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4304 	    test_kvm_facility(vcpu->kvm, 82)) {
4305 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4306 		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4307 	}
4308 	if (MACHINE_HAS_GS) {
4309 		preempt_disable();
4310 		__ctl_set_bit(2, 4);
4311 		if (current->thread.gs_cb) {
4312 			vcpu->arch.host_gscb = current->thread.gs_cb;
4313 			save_gs_cb(vcpu->arch.host_gscb);
4314 		}
4315 		if (vcpu->arch.gs_enabled) {
4316 			current->thread.gs_cb = (struct gs_cb *)
4317 						&vcpu->run->s.regs.gscb;
4318 			restore_gs_cb(current->thread.gs_cb);
4319 		}
4320 		preempt_enable();
4321 	}
4322 	/* SIE will load etoken directly from SDNX and therefore kvm_run */
4323 }
4324 
sync_regs(struct kvm_vcpu * vcpu)4325 static void sync_regs(struct kvm_vcpu *vcpu)
4326 {
4327 	struct kvm_run *kvm_run = vcpu->run;
4328 
4329 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4330 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4331 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4332 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4333 		/* some control register changes require a tlb flush */
4334 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4335 	}
4336 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4337 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4338 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4339 	}
4340 	save_access_regs(vcpu->arch.host_acrs);
4341 	restore_access_regs(vcpu->run->s.regs.acrs);
4342 	/* save host (userspace) fprs/vrs */
4343 	save_fpu_regs();
4344 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4345 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4346 	if (MACHINE_HAS_VX)
4347 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4348 	else
4349 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4350 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4351 	if (test_fp_ctl(current->thread.fpu.fpc))
4352 		/* User space provided an invalid FPC, let's clear it */
4353 		current->thread.fpu.fpc = 0;
4354 
4355 	/* Sync fmt2 only data */
4356 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4357 		sync_regs_fmt2(vcpu);
4358 	} else {
4359 		/*
4360 		 * In several places we have to modify our internal view to
4361 		 * not do things that are disallowed by the ultravisor. For
4362 		 * example we must not inject interrupts after specific exits
4363 		 * (e.g. 112 prefix page not secure). We do this by turning
4364 		 * off the machine check, external and I/O interrupt bits
4365 		 * of our PSW copy. To avoid getting validity intercepts, we
4366 		 * do only accept the condition code from userspace.
4367 		 */
4368 		vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4369 		vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4370 						   PSW_MASK_CC;
4371 	}
4372 
4373 	kvm_run->kvm_dirty_regs = 0;
4374 }
4375 
store_regs_fmt2(struct kvm_vcpu * vcpu)4376 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4377 {
4378 	struct kvm_run *kvm_run = vcpu->run;
4379 
4380 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4381 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4382 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4383 	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4384 	kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4385 	if (MACHINE_HAS_GS) {
4386 		preempt_disable();
4387 		__ctl_set_bit(2, 4);
4388 		if (vcpu->arch.gs_enabled)
4389 			save_gs_cb(current->thread.gs_cb);
4390 		current->thread.gs_cb = vcpu->arch.host_gscb;
4391 		restore_gs_cb(vcpu->arch.host_gscb);
4392 		if (!vcpu->arch.host_gscb)
4393 			__ctl_clear_bit(2, 4);
4394 		vcpu->arch.host_gscb = NULL;
4395 		preempt_enable();
4396 	}
4397 	/* SIE will save etoken directly into SDNX and therefore kvm_run */
4398 }
4399 
store_regs(struct kvm_vcpu * vcpu)4400 static void store_regs(struct kvm_vcpu *vcpu)
4401 {
4402 	struct kvm_run *kvm_run = vcpu->run;
4403 
4404 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4405 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4406 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4407 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4408 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4409 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4410 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4411 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4412 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4413 	save_access_regs(vcpu->run->s.regs.acrs);
4414 	restore_access_regs(vcpu->arch.host_acrs);
4415 	/* Save guest register state */
4416 	save_fpu_regs();
4417 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4418 	/* Restore will be done lazily at return */
4419 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4420 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4421 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4422 		store_regs_fmt2(vcpu);
4423 }
4424 
kvm_arch_vcpu_ioctl_run(struct kvm_vcpu * vcpu)4425 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4426 {
4427 	struct kvm_run *kvm_run = vcpu->run;
4428 	int rc;
4429 
4430 	if (kvm_run->immediate_exit)
4431 		return -EINTR;
4432 
4433 	if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4434 	    kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4435 		return -EINVAL;
4436 
4437 	vcpu_load(vcpu);
4438 
4439 	if (guestdbg_exit_pending(vcpu)) {
4440 		kvm_s390_prepare_debug_exit(vcpu);
4441 		rc = 0;
4442 		goto out;
4443 	}
4444 
4445 	kvm_sigset_activate(vcpu);
4446 
4447 	/*
4448 	 * no need to check the return value of vcpu_start as it can only have
4449 	 * an error for protvirt, but protvirt means user cpu state
4450 	 */
4451 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4452 		kvm_s390_vcpu_start(vcpu);
4453 	} else if (is_vcpu_stopped(vcpu)) {
4454 		pr_err_ratelimited("can't run stopped vcpu %d\n",
4455 				   vcpu->vcpu_id);
4456 		rc = -EINVAL;
4457 		goto out;
4458 	}
4459 
4460 	sync_regs(vcpu);
4461 	enable_cpu_timer_accounting(vcpu);
4462 
4463 	might_fault();
4464 	rc = __vcpu_run(vcpu);
4465 
4466 	if (signal_pending(current) && !rc) {
4467 		kvm_run->exit_reason = KVM_EXIT_INTR;
4468 		rc = -EINTR;
4469 	}
4470 
4471 	if (guestdbg_exit_pending(vcpu) && !rc)  {
4472 		kvm_s390_prepare_debug_exit(vcpu);
4473 		rc = 0;
4474 	}
4475 
4476 	if (rc == -EREMOTE) {
4477 		/* userspace support is needed, kvm_run has been prepared */
4478 		rc = 0;
4479 	}
4480 
4481 	disable_cpu_timer_accounting(vcpu);
4482 	store_regs(vcpu);
4483 
4484 	kvm_sigset_deactivate(vcpu);
4485 
4486 	vcpu->stat.exit_userspace++;
4487 out:
4488 	vcpu_put(vcpu);
4489 	return rc;
4490 }
4491 
4492 /*
4493  * store status at address
4494  * we use have two special cases:
4495  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4496  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4497  */
kvm_s390_store_status_unloaded(struct kvm_vcpu * vcpu,unsigned long gpa)4498 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4499 {
4500 	unsigned char archmode = 1;
4501 	freg_t fprs[NUM_FPRS];
4502 	unsigned int px;
4503 	u64 clkcomp, cputm;
4504 	int rc;
4505 
4506 	px = kvm_s390_get_prefix(vcpu);
4507 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4508 		if (write_guest_abs(vcpu, 163, &archmode, 1))
4509 			return -EFAULT;
4510 		gpa = 0;
4511 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4512 		if (write_guest_real(vcpu, 163, &archmode, 1))
4513 			return -EFAULT;
4514 		gpa = px;
4515 	} else
4516 		gpa -= __LC_FPREGS_SAVE_AREA;
4517 
4518 	/* manually convert vector registers if necessary */
4519 	if (MACHINE_HAS_VX) {
4520 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4521 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4522 				     fprs, 128);
4523 	} else {
4524 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4525 				     vcpu->run->s.regs.fprs, 128);
4526 	}
4527 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4528 			      vcpu->run->s.regs.gprs, 128);
4529 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4530 			      &vcpu->arch.sie_block->gpsw, 16);
4531 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4532 			      &px, 4);
4533 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4534 			      &vcpu->run->s.regs.fpc, 4);
4535 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4536 			      &vcpu->arch.sie_block->todpr, 4);
4537 	cputm = kvm_s390_get_cpu_timer(vcpu);
4538 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4539 			      &cputm, 8);
4540 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
4541 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4542 			      &clkcomp, 8);
4543 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4544 			      &vcpu->run->s.regs.acrs, 64);
4545 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4546 			      &vcpu->arch.sie_block->gcr, 128);
4547 	return rc ? -EFAULT : 0;
4548 }
4549 
kvm_s390_vcpu_store_status(struct kvm_vcpu * vcpu,unsigned long addr)4550 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4551 {
4552 	/*
4553 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4554 	 * switch in the run ioctl. Let's update our copies before we save
4555 	 * it into the save area
4556 	 */
4557 	save_fpu_regs();
4558 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4559 	save_access_regs(vcpu->run->s.regs.acrs);
4560 
4561 	return kvm_s390_store_status_unloaded(vcpu, addr);
4562 }
4563 
__disable_ibs_on_vcpu(struct kvm_vcpu * vcpu)4564 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4565 {
4566 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4567 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4568 }
4569 
__disable_ibs_on_all_vcpus(struct kvm * kvm)4570 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4571 {
4572 	unsigned int i;
4573 	struct kvm_vcpu *vcpu;
4574 
4575 	kvm_for_each_vcpu(i, vcpu, kvm) {
4576 		__disable_ibs_on_vcpu(vcpu);
4577 	}
4578 }
4579 
__enable_ibs_on_vcpu(struct kvm_vcpu * vcpu)4580 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4581 {
4582 	if (!sclp.has_ibs)
4583 		return;
4584 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4585 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4586 }
4587 
kvm_s390_vcpu_start(struct kvm_vcpu * vcpu)4588 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4589 {
4590 	int i, online_vcpus, r = 0, started_vcpus = 0;
4591 
4592 	if (!is_vcpu_stopped(vcpu))
4593 		return 0;
4594 
4595 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4596 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4597 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4598 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4599 
4600 	/* Let's tell the UV that we want to change into the operating state */
4601 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4602 		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4603 		if (r) {
4604 			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4605 			return r;
4606 		}
4607 	}
4608 
4609 	for (i = 0; i < online_vcpus; i++) {
4610 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4611 			started_vcpus++;
4612 	}
4613 
4614 	if (started_vcpus == 0) {
4615 		/* we're the only active VCPU -> speed it up */
4616 		__enable_ibs_on_vcpu(vcpu);
4617 	} else if (started_vcpus == 1) {
4618 		/*
4619 		 * As we are starting a second VCPU, we have to disable
4620 		 * the IBS facility on all VCPUs to remove potentially
4621 		 * outstanding ENABLE requests.
4622 		 */
4623 		__disable_ibs_on_all_vcpus(vcpu->kvm);
4624 	}
4625 
4626 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4627 	/*
4628 	 * The real PSW might have changed due to a RESTART interpreted by the
4629 	 * ultravisor. We block all interrupts and let the next sie exit
4630 	 * refresh our view.
4631 	 */
4632 	if (kvm_s390_pv_cpu_is_protected(vcpu))
4633 		vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4634 	/*
4635 	 * Another VCPU might have used IBS while we were offline.
4636 	 * Let's play safe and flush the VCPU at startup.
4637 	 */
4638 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4639 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4640 	return 0;
4641 }
4642 
kvm_s390_vcpu_stop(struct kvm_vcpu * vcpu)4643 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4644 {
4645 	int i, online_vcpus, r = 0, started_vcpus = 0;
4646 	struct kvm_vcpu *started_vcpu = NULL;
4647 
4648 	if (is_vcpu_stopped(vcpu))
4649 		return 0;
4650 
4651 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4652 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4653 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4654 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4655 
4656 	/* Let's tell the UV that we want to change into the stopped state */
4657 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4658 		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4659 		if (r) {
4660 			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4661 			return r;
4662 		}
4663 	}
4664 
4665 	/*
4666 	 * Set the VCPU to STOPPED and THEN clear the interrupt flag,
4667 	 * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
4668 	 * have been fully processed. This will ensure that the VCPU
4669 	 * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
4670 	 */
4671 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4672 	kvm_s390_clear_stop_irq(vcpu);
4673 
4674 	__disable_ibs_on_vcpu(vcpu);
4675 
4676 	for (i = 0; i < online_vcpus; i++) {
4677 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4678 			started_vcpus++;
4679 			started_vcpu = vcpu->kvm->vcpus[i];
4680 		}
4681 	}
4682 
4683 	if (started_vcpus == 1) {
4684 		/*
4685 		 * As we only have one VCPU left, we want to enable the
4686 		 * IBS facility for that VCPU to speed it up.
4687 		 */
4688 		__enable_ibs_on_vcpu(started_vcpu);
4689 	}
4690 
4691 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4692 	return 0;
4693 }
4694 
kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu * vcpu,struct kvm_enable_cap * cap)4695 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4696 				     struct kvm_enable_cap *cap)
4697 {
4698 	int r;
4699 
4700 	if (cap->flags)
4701 		return -EINVAL;
4702 
4703 	switch (cap->cap) {
4704 	case KVM_CAP_S390_CSS_SUPPORT:
4705 		if (!vcpu->kvm->arch.css_support) {
4706 			vcpu->kvm->arch.css_support = 1;
4707 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4708 			trace_kvm_s390_enable_css(vcpu->kvm);
4709 		}
4710 		r = 0;
4711 		break;
4712 	default:
4713 		r = -EINVAL;
4714 		break;
4715 	}
4716 	return r;
4717 }
4718 
kvm_s390_guest_sida_op(struct kvm_vcpu * vcpu,struct kvm_s390_mem_op * mop)4719 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4720 				   struct kvm_s390_mem_op *mop)
4721 {
4722 	void __user *uaddr = (void __user *)mop->buf;
4723 	int r = 0;
4724 
4725 	if (mop->flags || !mop->size)
4726 		return -EINVAL;
4727 	if (mop->size + mop->sida_offset < mop->size)
4728 		return -EINVAL;
4729 	if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4730 		return -E2BIG;
4731 	if (!kvm_s390_pv_cpu_is_protected(vcpu))
4732 		return -EINVAL;
4733 
4734 	switch (mop->op) {
4735 	case KVM_S390_MEMOP_SIDA_READ:
4736 		if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4737 				 mop->sida_offset), mop->size))
4738 			r = -EFAULT;
4739 
4740 		break;
4741 	case KVM_S390_MEMOP_SIDA_WRITE:
4742 		if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4743 				   mop->sida_offset), uaddr, mop->size))
4744 			r = -EFAULT;
4745 		break;
4746 	}
4747 	return r;
4748 }
kvm_s390_guest_mem_op(struct kvm_vcpu * vcpu,struct kvm_s390_mem_op * mop)4749 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4750 				  struct kvm_s390_mem_op *mop)
4751 {
4752 	void __user *uaddr = (void __user *)mop->buf;
4753 	void *tmpbuf = NULL;
4754 	int r = 0;
4755 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4756 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
4757 
4758 	if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4759 		return -EINVAL;
4760 
4761 	if (mop->size > MEM_OP_MAX_SIZE)
4762 		return -E2BIG;
4763 
4764 	if (kvm_s390_pv_cpu_is_protected(vcpu))
4765 		return -EINVAL;
4766 
4767 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4768 		tmpbuf = vmalloc(mop->size);
4769 		if (!tmpbuf)
4770 			return -ENOMEM;
4771 	}
4772 
4773 	switch (mop->op) {
4774 	case KVM_S390_MEMOP_LOGICAL_READ:
4775 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4776 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4777 					    mop->size, GACC_FETCH);
4778 			break;
4779 		}
4780 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4781 		if (r == 0) {
4782 			if (copy_to_user(uaddr, tmpbuf, mop->size))
4783 				r = -EFAULT;
4784 		}
4785 		break;
4786 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4787 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4788 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4789 					    mop->size, GACC_STORE);
4790 			break;
4791 		}
4792 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4793 			r = -EFAULT;
4794 			break;
4795 		}
4796 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4797 		break;
4798 	}
4799 
4800 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4801 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4802 
4803 	vfree(tmpbuf);
4804 	return r;
4805 }
4806 
kvm_s390_guest_memsida_op(struct kvm_vcpu * vcpu,struct kvm_s390_mem_op * mop)4807 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4808 				      struct kvm_s390_mem_op *mop)
4809 {
4810 	int r, srcu_idx;
4811 
4812 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4813 
4814 	switch (mop->op) {
4815 	case KVM_S390_MEMOP_LOGICAL_READ:
4816 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4817 		r = kvm_s390_guest_mem_op(vcpu, mop);
4818 		break;
4819 	case KVM_S390_MEMOP_SIDA_READ:
4820 	case KVM_S390_MEMOP_SIDA_WRITE:
4821 		/* we are locked against sida going away by the vcpu->mutex */
4822 		r = kvm_s390_guest_sida_op(vcpu, mop);
4823 		break;
4824 	default:
4825 		r = -EINVAL;
4826 	}
4827 
4828 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4829 	return r;
4830 }
4831 
kvm_arch_vcpu_async_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)4832 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4833 			       unsigned int ioctl, unsigned long arg)
4834 {
4835 	struct kvm_vcpu *vcpu = filp->private_data;
4836 	void __user *argp = (void __user *)arg;
4837 
4838 	switch (ioctl) {
4839 	case KVM_S390_IRQ: {
4840 		struct kvm_s390_irq s390irq;
4841 
4842 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4843 			return -EFAULT;
4844 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4845 	}
4846 	case KVM_S390_INTERRUPT: {
4847 		struct kvm_s390_interrupt s390int;
4848 		struct kvm_s390_irq s390irq = {};
4849 
4850 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
4851 			return -EFAULT;
4852 		if (s390int_to_s390irq(&s390int, &s390irq))
4853 			return -EINVAL;
4854 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4855 	}
4856 	}
4857 	return -ENOIOCTLCMD;
4858 }
4859 
kvm_arch_vcpu_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)4860 long kvm_arch_vcpu_ioctl(struct file *filp,
4861 			 unsigned int ioctl, unsigned long arg)
4862 {
4863 	struct kvm_vcpu *vcpu = filp->private_data;
4864 	void __user *argp = (void __user *)arg;
4865 	int idx;
4866 	long r;
4867 	u16 rc, rrc;
4868 
4869 	vcpu_load(vcpu);
4870 
4871 	switch (ioctl) {
4872 	case KVM_S390_STORE_STATUS:
4873 		idx = srcu_read_lock(&vcpu->kvm->srcu);
4874 		r = kvm_s390_store_status_unloaded(vcpu, arg);
4875 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
4876 		break;
4877 	case KVM_S390_SET_INITIAL_PSW: {
4878 		psw_t psw;
4879 
4880 		r = -EFAULT;
4881 		if (copy_from_user(&psw, argp, sizeof(psw)))
4882 			break;
4883 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4884 		break;
4885 	}
4886 	case KVM_S390_CLEAR_RESET:
4887 		r = 0;
4888 		kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4889 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4890 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4891 					  UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4892 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4893 				   rc, rrc);
4894 		}
4895 		break;
4896 	case KVM_S390_INITIAL_RESET:
4897 		r = 0;
4898 		kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4899 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4900 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4901 					  UVC_CMD_CPU_RESET_INITIAL,
4902 					  &rc, &rrc);
4903 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4904 				   rc, rrc);
4905 		}
4906 		break;
4907 	case KVM_S390_NORMAL_RESET:
4908 		r = 0;
4909 		kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4910 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4911 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4912 					  UVC_CMD_CPU_RESET, &rc, &rrc);
4913 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4914 				   rc, rrc);
4915 		}
4916 		break;
4917 	case KVM_SET_ONE_REG:
4918 	case KVM_GET_ONE_REG: {
4919 		struct kvm_one_reg reg;
4920 		r = -EINVAL;
4921 		if (kvm_s390_pv_cpu_is_protected(vcpu))
4922 			break;
4923 		r = -EFAULT;
4924 		if (copy_from_user(&reg, argp, sizeof(reg)))
4925 			break;
4926 		if (ioctl == KVM_SET_ONE_REG)
4927 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4928 		else
4929 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4930 		break;
4931 	}
4932 #ifdef CONFIG_KVM_S390_UCONTROL
4933 	case KVM_S390_UCAS_MAP: {
4934 		struct kvm_s390_ucas_mapping ucasmap;
4935 
4936 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4937 			r = -EFAULT;
4938 			break;
4939 		}
4940 
4941 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4942 			r = -EINVAL;
4943 			break;
4944 		}
4945 
4946 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4947 				     ucasmap.vcpu_addr, ucasmap.length);
4948 		break;
4949 	}
4950 	case KVM_S390_UCAS_UNMAP: {
4951 		struct kvm_s390_ucas_mapping ucasmap;
4952 
4953 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4954 			r = -EFAULT;
4955 			break;
4956 		}
4957 
4958 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4959 			r = -EINVAL;
4960 			break;
4961 		}
4962 
4963 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4964 			ucasmap.length);
4965 		break;
4966 	}
4967 #endif
4968 	case KVM_S390_VCPU_FAULT: {
4969 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
4970 		break;
4971 	}
4972 	case KVM_ENABLE_CAP:
4973 	{
4974 		struct kvm_enable_cap cap;
4975 		r = -EFAULT;
4976 		if (copy_from_user(&cap, argp, sizeof(cap)))
4977 			break;
4978 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4979 		break;
4980 	}
4981 	case KVM_S390_MEM_OP: {
4982 		struct kvm_s390_mem_op mem_op;
4983 
4984 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4985 			r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4986 		else
4987 			r = -EFAULT;
4988 		break;
4989 	}
4990 	case KVM_S390_SET_IRQ_STATE: {
4991 		struct kvm_s390_irq_state irq_state;
4992 
4993 		r = -EFAULT;
4994 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4995 			break;
4996 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4997 		    irq_state.len == 0 ||
4998 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4999 			r = -EINVAL;
5000 			break;
5001 		}
5002 		/* do not use irq_state.flags, it will break old QEMUs */
5003 		r = kvm_s390_set_irq_state(vcpu,
5004 					   (void __user *) irq_state.buf,
5005 					   irq_state.len);
5006 		break;
5007 	}
5008 	case KVM_S390_GET_IRQ_STATE: {
5009 		struct kvm_s390_irq_state irq_state;
5010 
5011 		r = -EFAULT;
5012 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5013 			break;
5014 		if (irq_state.len == 0) {
5015 			r = -EINVAL;
5016 			break;
5017 		}
5018 		/* do not use irq_state.flags, it will break old QEMUs */
5019 		r = kvm_s390_get_irq_state(vcpu,
5020 					   (__u8 __user *)  irq_state.buf,
5021 					   irq_state.len);
5022 		break;
5023 	}
5024 	default:
5025 		r = -ENOTTY;
5026 	}
5027 
5028 	vcpu_put(vcpu);
5029 	return r;
5030 }
5031 
kvm_arch_vcpu_fault(struct kvm_vcpu * vcpu,struct vm_fault * vmf)5032 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
5033 {
5034 #ifdef CONFIG_KVM_S390_UCONTROL
5035 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
5036 		 && (kvm_is_ucontrol(vcpu->kvm))) {
5037 		vmf->page = virt_to_page(vcpu->arch.sie_block);
5038 		get_page(vmf->page);
5039 		return 0;
5040 	}
5041 #endif
5042 	return VM_FAULT_SIGBUS;
5043 }
5044 
5045 /* Section: memory related */
kvm_arch_prepare_memory_region(struct kvm * kvm,struct kvm_memory_slot * memslot,const struct kvm_userspace_memory_region * mem,enum kvm_mr_change change)5046 int kvm_arch_prepare_memory_region(struct kvm *kvm,
5047 				   struct kvm_memory_slot *memslot,
5048 				   const struct kvm_userspace_memory_region *mem,
5049 				   enum kvm_mr_change change)
5050 {
5051 	/* A few sanity checks. We can have memory slots which have to be
5052 	   located/ended at a segment boundary (1MB). The memory in userland is
5053 	   ok to be fragmented into various different vmas. It is okay to mmap()
5054 	   and munmap() stuff in this slot after doing this call at any time */
5055 
5056 	if (mem->userspace_addr & 0xffffful)
5057 		return -EINVAL;
5058 
5059 	if (mem->memory_size & 0xffffful)
5060 		return -EINVAL;
5061 
5062 	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
5063 		return -EINVAL;
5064 
5065 	/* When we are protected, we should not change the memory slots */
5066 	if (kvm_s390_pv_get_handle(kvm))
5067 		return -EINVAL;
5068 
5069 	if (!kvm->arch.migration_mode)
5070 		return 0;
5071 
5072 	/*
5073 	 * Turn off migration mode when:
5074 	 * - userspace creates a new memslot with dirty logging off,
5075 	 * - userspace modifies an existing memslot (MOVE or FLAGS_ONLY) and
5076 	 *   dirty logging is turned off.
5077 	 * Migration mode expects dirty page logging being enabled to store
5078 	 * its dirty bitmap.
5079 	 */
5080 	if (change != KVM_MR_DELETE &&
5081 	    !(mem->flags & KVM_MEM_LOG_DIRTY_PAGES))
5082 		WARN(kvm_s390_vm_stop_migration(kvm),
5083 		     "Failed to stop migration mode");
5084 
5085 	return 0;
5086 }
5087 
kvm_arch_commit_memory_region(struct kvm * kvm,const struct kvm_userspace_memory_region * mem,struct kvm_memory_slot * old,const struct kvm_memory_slot * new,enum kvm_mr_change change)5088 void kvm_arch_commit_memory_region(struct kvm *kvm,
5089 				const struct kvm_userspace_memory_region *mem,
5090 				struct kvm_memory_slot *old,
5091 				const struct kvm_memory_slot *new,
5092 				enum kvm_mr_change change)
5093 {
5094 	int rc = 0;
5095 
5096 	switch (change) {
5097 	case KVM_MR_DELETE:
5098 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5099 					old->npages * PAGE_SIZE);
5100 		break;
5101 	case KVM_MR_MOVE:
5102 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5103 					old->npages * PAGE_SIZE);
5104 		if (rc)
5105 			break;
5106 		fallthrough;
5107 	case KVM_MR_CREATE:
5108 		rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
5109 				      mem->guest_phys_addr, mem->memory_size);
5110 		break;
5111 	case KVM_MR_FLAGS_ONLY:
5112 		break;
5113 	default:
5114 		WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5115 	}
5116 	if (rc)
5117 		pr_warn("failed to commit memory region\n");
5118 	return;
5119 }
5120 
nonhyp_mask(int i)5121 static inline unsigned long nonhyp_mask(int i)
5122 {
5123 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5124 
5125 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5126 }
5127 
kvm_arch_vcpu_block_finish(struct kvm_vcpu * vcpu)5128 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
5129 {
5130 	vcpu->valid_wakeup = false;
5131 }
5132 
kvm_s390_init(void)5133 static int __init kvm_s390_init(void)
5134 {
5135 	int i;
5136 
5137 	if (!sclp.has_sief2) {
5138 		pr_info("SIE is not available\n");
5139 		return -ENODEV;
5140 	}
5141 
5142 	if (nested && hpage) {
5143 		pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5144 		return -EINVAL;
5145 	}
5146 
5147 	for (i = 0; i < 16; i++)
5148 		kvm_s390_fac_base[i] |=
5149 			stfle_fac_list[i] & nonhyp_mask(i);
5150 
5151 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5152 }
5153 
kvm_s390_exit(void)5154 static void __exit kvm_s390_exit(void)
5155 {
5156 	kvm_exit();
5157 }
5158 
5159 module_init(kvm_s390_init);
5160 module_exit(kvm_s390_exit);
5161 
5162 /*
5163  * Enable autoloading of the kvm module.
5164  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5165  * since x86 takes a different approach.
5166  */
5167 #include <linux/miscdevice.h>
5168 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5169 MODULE_ALIAS("devname:kvm");
5170