• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2020
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13 
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
16 
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 #include <linux/pgtable.h>
35 
36 #include <asm/asm-offsets.h>
37 #include <asm/lowcore.h>
38 #include <asm/stp.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include <asm/ap.h>
47 #include <asm/uv.h>
48 #include "kvm-s390.h"
49 #include "gaccess.h"
50 
51 #define CREATE_TRACE_POINTS
52 #include "trace.h"
53 #include "trace-s390.h"
54 
55 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
56 #define LOCAL_IRQS 32
57 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
58 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
59 
60 struct kvm_stats_debugfs_item debugfs_entries[] = {
61 	VCPU_STAT("userspace_handled", exit_userspace),
62 	VCPU_STAT("exit_null", exit_null),
63 	VCPU_STAT("exit_validity", exit_validity),
64 	VCPU_STAT("exit_stop_request", exit_stop_request),
65 	VCPU_STAT("exit_external_request", exit_external_request),
66 	VCPU_STAT("exit_io_request", exit_io_request),
67 	VCPU_STAT("exit_external_interrupt", exit_external_interrupt),
68 	VCPU_STAT("exit_instruction", exit_instruction),
69 	VCPU_STAT("exit_pei", exit_pei),
70 	VCPU_STAT("exit_program_interruption", exit_program_interruption),
71 	VCPU_STAT("exit_instr_and_program_int", exit_instr_and_program),
72 	VCPU_STAT("exit_operation_exception", exit_operation_exception),
73 	VCPU_STAT("halt_successful_poll", halt_successful_poll),
74 	VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
75 	VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
76 	VCPU_STAT("halt_no_poll_steal", halt_no_poll_steal),
77 	VCPU_STAT("halt_wakeup", halt_wakeup),
78 	VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
79 	VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
80 	VCPU_STAT("instruction_lctlg", instruction_lctlg),
81 	VCPU_STAT("instruction_lctl", instruction_lctl),
82 	VCPU_STAT("instruction_stctl", instruction_stctl),
83 	VCPU_STAT("instruction_stctg", instruction_stctg),
84 	VCPU_STAT("deliver_ckc", deliver_ckc),
85 	VCPU_STAT("deliver_cputm", deliver_cputm),
86 	VCPU_STAT("deliver_emergency_signal", deliver_emergency_signal),
87 	VCPU_STAT("deliver_external_call", deliver_external_call),
88 	VCPU_STAT("deliver_service_signal", deliver_service_signal),
89 	VCPU_STAT("deliver_virtio", deliver_virtio),
90 	VCPU_STAT("deliver_stop_signal", deliver_stop_signal),
91 	VCPU_STAT("deliver_prefix_signal", deliver_prefix_signal),
92 	VCPU_STAT("deliver_restart_signal", deliver_restart_signal),
93 	VCPU_STAT("deliver_program", deliver_program),
94 	VCPU_STAT("deliver_io", deliver_io),
95 	VCPU_STAT("deliver_machine_check", deliver_machine_check),
96 	VCPU_STAT("exit_wait_state", exit_wait_state),
97 	VCPU_STAT("inject_ckc", inject_ckc),
98 	VCPU_STAT("inject_cputm", inject_cputm),
99 	VCPU_STAT("inject_external_call", inject_external_call),
100 	VM_STAT("inject_float_mchk", inject_float_mchk),
101 	VCPU_STAT("inject_emergency_signal", inject_emergency_signal),
102 	VM_STAT("inject_io", inject_io),
103 	VCPU_STAT("inject_mchk", inject_mchk),
104 	VM_STAT("inject_pfault_done", inject_pfault_done),
105 	VCPU_STAT("inject_program", inject_program),
106 	VCPU_STAT("inject_restart", inject_restart),
107 	VM_STAT("inject_service_signal", inject_service_signal),
108 	VCPU_STAT("inject_set_prefix", inject_set_prefix),
109 	VCPU_STAT("inject_stop_signal", inject_stop_signal),
110 	VCPU_STAT("inject_pfault_init", inject_pfault_init),
111 	VM_STAT("inject_virtio", inject_virtio),
112 	VCPU_STAT("instruction_epsw", instruction_epsw),
113 	VCPU_STAT("instruction_gs", instruction_gs),
114 	VCPU_STAT("instruction_io_other", instruction_io_other),
115 	VCPU_STAT("instruction_lpsw", instruction_lpsw),
116 	VCPU_STAT("instruction_lpswe", instruction_lpswe),
117 	VCPU_STAT("instruction_pfmf", instruction_pfmf),
118 	VCPU_STAT("instruction_ptff", instruction_ptff),
119 	VCPU_STAT("instruction_stidp", instruction_stidp),
120 	VCPU_STAT("instruction_sck", instruction_sck),
121 	VCPU_STAT("instruction_sckpf", instruction_sckpf),
122 	VCPU_STAT("instruction_spx", instruction_spx),
123 	VCPU_STAT("instruction_stpx", instruction_stpx),
124 	VCPU_STAT("instruction_stap", instruction_stap),
125 	VCPU_STAT("instruction_iske", instruction_iske),
126 	VCPU_STAT("instruction_ri", instruction_ri),
127 	VCPU_STAT("instruction_rrbe", instruction_rrbe),
128 	VCPU_STAT("instruction_sske", instruction_sske),
129 	VCPU_STAT("instruction_ipte_interlock", instruction_ipte_interlock),
130 	VCPU_STAT("instruction_essa", instruction_essa),
131 	VCPU_STAT("instruction_stsi", instruction_stsi),
132 	VCPU_STAT("instruction_stfl", instruction_stfl),
133 	VCPU_STAT("instruction_tb", instruction_tb),
134 	VCPU_STAT("instruction_tpi", instruction_tpi),
135 	VCPU_STAT("instruction_tprot", instruction_tprot),
136 	VCPU_STAT("instruction_tsch", instruction_tsch),
137 	VCPU_STAT("instruction_sthyi", instruction_sthyi),
138 	VCPU_STAT("instruction_sie", instruction_sie),
139 	VCPU_STAT("instruction_sigp_sense", instruction_sigp_sense),
140 	VCPU_STAT("instruction_sigp_sense_running", instruction_sigp_sense_running),
141 	VCPU_STAT("instruction_sigp_external_call", instruction_sigp_external_call),
142 	VCPU_STAT("instruction_sigp_emergency", instruction_sigp_emergency),
143 	VCPU_STAT("instruction_sigp_cond_emergency", instruction_sigp_cond_emergency),
144 	VCPU_STAT("instruction_sigp_start", instruction_sigp_start),
145 	VCPU_STAT("instruction_sigp_stop", instruction_sigp_stop),
146 	VCPU_STAT("instruction_sigp_stop_store_status", instruction_sigp_stop_store_status),
147 	VCPU_STAT("instruction_sigp_store_status", instruction_sigp_store_status),
148 	VCPU_STAT("instruction_sigp_store_adtl_status", instruction_sigp_store_adtl_status),
149 	VCPU_STAT("instruction_sigp_set_arch", instruction_sigp_arch),
150 	VCPU_STAT("instruction_sigp_set_prefix", instruction_sigp_prefix),
151 	VCPU_STAT("instruction_sigp_restart", instruction_sigp_restart),
152 	VCPU_STAT("instruction_sigp_cpu_reset", instruction_sigp_cpu_reset),
153 	VCPU_STAT("instruction_sigp_init_cpu_reset", instruction_sigp_init_cpu_reset),
154 	VCPU_STAT("instruction_sigp_unknown", instruction_sigp_unknown),
155 	VCPU_STAT("instruction_diag_10", diagnose_10),
156 	VCPU_STAT("instruction_diag_44", diagnose_44),
157 	VCPU_STAT("instruction_diag_9c", diagnose_9c),
158 	VCPU_STAT("diag_9c_ignored", diagnose_9c_ignored),
159 	VCPU_STAT("instruction_diag_258", diagnose_258),
160 	VCPU_STAT("instruction_diag_308", diagnose_308),
161 	VCPU_STAT("instruction_diag_500", diagnose_500),
162 	VCPU_STAT("instruction_diag_other", diagnose_other),
163 	{ NULL }
164 };
165 
166 struct kvm_s390_tod_clock_ext {
167 	__u8 epoch_idx;
168 	__u64 tod;
169 	__u8 reserved[7];
170 } __packed;
171 
172 /* allow nested virtualization in KVM (if enabled by user space) */
173 static int nested;
174 module_param(nested, int, S_IRUGO);
175 MODULE_PARM_DESC(nested, "Nested virtualization support");
176 
177 /* allow 1m huge page guest backing, if !nested */
178 static int hpage;
179 module_param(hpage, int, 0444);
180 MODULE_PARM_DESC(hpage, "1m huge page backing support");
181 
182 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
183 static u8 halt_poll_max_steal = 10;
184 module_param(halt_poll_max_steal, byte, 0644);
185 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
186 
187 /* if set to true, the GISA will be initialized and used if available */
188 static bool use_gisa  = true;
189 module_param(use_gisa, bool, 0644);
190 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
191 
192 /*
193  * For now we handle at most 16 double words as this is what the s390 base
194  * kernel handles and stores in the prefix page. If we ever need to go beyond
195  * this, this requires changes to code, but the external uapi can stay.
196  */
197 #define SIZE_INTERNAL 16
198 
199 /*
200  * Base feature mask that defines default mask for facilities. Consists of the
201  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
202  */
203 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
204 /*
205  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
206  * and defines the facilities that can be enabled via a cpu model.
207  */
208 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
209 
kvm_s390_fac_size(void)210 static unsigned long kvm_s390_fac_size(void)
211 {
212 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
213 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
214 	BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
215 		sizeof(S390_lowcore.stfle_fac_list));
216 
217 	return SIZE_INTERNAL;
218 }
219 
220 /* available cpu features supported by kvm */
221 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
222 /* available subfunctions indicated via query / "test bit" */
223 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
224 
225 static struct gmap_notifier gmap_notifier;
226 static struct gmap_notifier vsie_gmap_notifier;
227 debug_info_t *kvm_s390_dbf;
228 debug_info_t *kvm_s390_dbf_uv;
229 
230 /* Section: not file related */
kvm_arch_hardware_enable(void)231 int kvm_arch_hardware_enable(void)
232 {
233 	/* every s390 is virtualization enabled ;-) */
234 	return 0;
235 }
236 
kvm_arch_check_processor_compat(void * opaque)237 int kvm_arch_check_processor_compat(void *opaque)
238 {
239 	return 0;
240 }
241 
242 /* forward declarations */
243 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
244 			      unsigned long end);
245 static int sca_switch_to_extended(struct kvm *kvm);
246 
kvm_clock_sync_scb(struct kvm_s390_sie_block * scb,u64 delta)247 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
248 {
249 	u8 delta_idx = 0;
250 
251 	/*
252 	 * The TOD jumps by delta, we have to compensate this by adding
253 	 * -delta to the epoch.
254 	 */
255 	delta = -delta;
256 
257 	/* sign-extension - we're adding to signed values below */
258 	if ((s64)delta < 0)
259 		delta_idx = -1;
260 
261 	scb->epoch += delta;
262 	if (scb->ecd & ECD_MEF) {
263 		scb->epdx += delta_idx;
264 		if (scb->epoch < delta)
265 			scb->epdx += 1;
266 	}
267 }
268 
269 /*
270  * This callback is executed during stop_machine(). All CPUs are therefore
271  * temporarily stopped. In order not to change guest behavior, we have to
272  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
273  * so a CPU won't be stopped while calculating with the epoch.
274  */
kvm_clock_sync(struct notifier_block * notifier,unsigned long val,void * v)275 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
276 			  void *v)
277 {
278 	struct kvm *kvm;
279 	struct kvm_vcpu *vcpu;
280 	int i;
281 	unsigned long long *delta = v;
282 
283 	list_for_each_entry(kvm, &vm_list, vm_list) {
284 		kvm_for_each_vcpu(i, vcpu, kvm) {
285 			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
286 			if (i == 0) {
287 				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
288 				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
289 			}
290 			if (vcpu->arch.cputm_enabled)
291 				vcpu->arch.cputm_start += *delta;
292 			if (vcpu->arch.vsie_block)
293 				kvm_clock_sync_scb(vcpu->arch.vsie_block,
294 						   *delta);
295 		}
296 	}
297 	return NOTIFY_OK;
298 }
299 
300 static struct notifier_block kvm_clock_notifier = {
301 	.notifier_call = kvm_clock_sync,
302 };
303 
kvm_arch_hardware_setup(void * opaque)304 int kvm_arch_hardware_setup(void *opaque)
305 {
306 	gmap_notifier.notifier_call = kvm_gmap_notifier;
307 	gmap_register_pte_notifier(&gmap_notifier);
308 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
309 	gmap_register_pte_notifier(&vsie_gmap_notifier);
310 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
311 				       &kvm_clock_notifier);
312 	return 0;
313 }
314 
kvm_arch_hardware_unsetup(void)315 void kvm_arch_hardware_unsetup(void)
316 {
317 	gmap_unregister_pte_notifier(&gmap_notifier);
318 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
319 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
320 					 &kvm_clock_notifier);
321 }
322 
allow_cpu_feat(unsigned long nr)323 static void allow_cpu_feat(unsigned long nr)
324 {
325 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
326 }
327 
plo_test_bit(unsigned char nr)328 static inline int plo_test_bit(unsigned char nr)
329 {
330 	unsigned long function = (unsigned long)nr | 0x100;
331 	int cc;
332 
333 	asm volatile(
334 		"	lgr	0,%[function]\n"
335 		/* Parameter registers are ignored for "test bit" */
336 		"	plo	0,0,0,0(0)\n"
337 		"	ipm	%0\n"
338 		"	srl	%0,28\n"
339 		: "=d" (cc)
340 		: [function] "d" (function)
341 		: "cc", "0");
342 	return cc == 0;
343 }
344 
__insn32_query(unsigned int opcode,u8 * query)345 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
346 {
347 	asm volatile(
348 		"	lghi	0,0\n"
349 		"	lgr	1,%[query]\n"
350 		/* Parameter registers are ignored */
351 		"	.insn	rrf,%[opc] << 16,2,4,6,0\n"
352 		:
353 		: [query] "d" ((unsigned long)query), [opc] "i" (opcode)
354 		: "cc", "memory", "0", "1");
355 }
356 
357 #define INSN_SORTL 0xb938
358 #define INSN_DFLTCC 0xb939
359 
kvm_s390_cpu_feat_init(void)360 static void kvm_s390_cpu_feat_init(void)
361 {
362 	int i;
363 
364 	for (i = 0; i < 256; ++i) {
365 		if (plo_test_bit(i))
366 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
367 	}
368 
369 	if (test_facility(28)) /* TOD-clock steering */
370 		ptff(kvm_s390_available_subfunc.ptff,
371 		     sizeof(kvm_s390_available_subfunc.ptff),
372 		     PTFF_QAF);
373 
374 	if (test_facility(17)) { /* MSA */
375 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
376 			      kvm_s390_available_subfunc.kmac);
377 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
378 			      kvm_s390_available_subfunc.kmc);
379 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
380 			      kvm_s390_available_subfunc.km);
381 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
382 			      kvm_s390_available_subfunc.kimd);
383 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
384 			      kvm_s390_available_subfunc.klmd);
385 	}
386 	if (test_facility(76)) /* MSA3 */
387 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
388 			      kvm_s390_available_subfunc.pckmo);
389 	if (test_facility(77)) { /* MSA4 */
390 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
391 			      kvm_s390_available_subfunc.kmctr);
392 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
393 			      kvm_s390_available_subfunc.kmf);
394 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
395 			      kvm_s390_available_subfunc.kmo);
396 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
397 			      kvm_s390_available_subfunc.pcc);
398 	}
399 	if (test_facility(57)) /* MSA5 */
400 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
401 			      kvm_s390_available_subfunc.ppno);
402 
403 	if (test_facility(146)) /* MSA8 */
404 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
405 			      kvm_s390_available_subfunc.kma);
406 
407 	if (test_facility(155)) /* MSA9 */
408 		__cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
409 			      kvm_s390_available_subfunc.kdsa);
410 
411 	if (test_facility(150)) /* SORTL */
412 		__insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
413 
414 	if (test_facility(151)) /* DFLTCC */
415 		__insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
416 
417 	if (MACHINE_HAS_ESOP)
418 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
419 	/*
420 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
421 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
422 	 */
423 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
424 	    !test_facility(3) || !nested)
425 		return;
426 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
427 	if (sclp.has_64bscao)
428 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
429 	if (sclp.has_siif)
430 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
431 	if (sclp.has_gpere)
432 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
433 	if (sclp.has_gsls)
434 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
435 	if (sclp.has_ib)
436 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
437 	if (sclp.has_cei)
438 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
439 	if (sclp.has_ibs)
440 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
441 	if (sclp.has_kss)
442 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
443 	/*
444 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
445 	 * all skey handling functions read/set the skey from the PGSTE
446 	 * instead of the real storage key.
447 	 *
448 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
449 	 * pages being detected as preserved although they are resident.
450 	 *
451 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
452 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
453 	 *
454 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
455 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
456 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
457 	 *
458 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
459 	 * cannot easily shadow the SCA because of the ipte lock.
460 	 */
461 }
462 
kvm_arch_init(void * opaque)463 int kvm_arch_init(void *opaque)
464 {
465 	int rc = -ENOMEM;
466 
467 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
468 	if (!kvm_s390_dbf)
469 		return -ENOMEM;
470 
471 	kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
472 	if (!kvm_s390_dbf_uv)
473 		goto out;
474 
475 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
476 	    debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
477 		goto out;
478 
479 	kvm_s390_cpu_feat_init();
480 
481 	/* Register floating interrupt controller interface. */
482 	rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
483 	if (rc) {
484 		pr_err("A FLIC registration call failed with rc=%d\n", rc);
485 		goto out;
486 	}
487 
488 	rc = kvm_s390_gib_init(GAL_ISC);
489 	if (rc)
490 		goto out;
491 
492 	return 0;
493 
494 out:
495 	kvm_arch_exit();
496 	return rc;
497 }
498 
kvm_arch_exit(void)499 void kvm_arch_exit(void)
500 {
501 	kvm_s390_gib_destroy();
502 	debug_unregister(kvm_s390_dbf);
503 	debug_unregister(kvm_s390_dbf_uv);
504 }
505 
506 /* Section: device related */
kvm_arch_dev_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)507 long kvm_arch_dev_ioctl(struct file *filp,
508 			unsigned int ioctl, unsigned long arg)
509 {
510 	if (ioctl == KVM_S390_ENABLE_SIE)
511 		return s390_enable_sie();
512 	return -EINVAL;
513 }
514 
kvm_vm_ioctl_check_extension(struct kvm * kvm,long ext)515 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
516 {
517 	int r;
518 
519 	switch (ext) {
520 	case KVM_CAP_S390_PSW:
521 	case KVM_CAP_S390_GMAP:
522 	case KVM_CAP_SYNC_MMU:
523 #ifdef CONFIG_KVM_S390_UCONTROL
524 	case KVM_CAP_S390_UCONTROL:
525 #endif
526 	case KVM_CAP_ASYNC_PF:
527 	case KVM_CAP_SYNC_REGS:
528 	case KVM_CAP_ONE_REG:
529 	case KVM_CAP_ENABLE_CAP:
530 	case KVM_CAP_S390_CSS_SUPPORT:
531 	case KVM_CAP_IOEVENTFD:
532 	case KVM_CAP_DEVICE_CTRL:
533 	case KVM_CAP_S390_IRQCHIP:
534 	case KVM_CAP_VM_ATTRIBUTES:
535 	case KVM_CAP_MP_STATE:
536 	case KVM_CAP_IMMEDIATE_EXIT:
537 	case KVM_CAP_S390_INJECT_IRQ:
538 	case KVM_CAP_S390_USER_SIGP:
539 	case KVM_CAP_S390_USER_STSI:
540 	case KVM_CAP_S390_SKEYS:
541 	case KVM_CAP_S390_IRQ_STATE:
542 	case KVM_CAP_S390_USER_INSTR0:
543 	case KVM_CAP_S390_CMMA_MIGRATION:
544 	case KVM_CAP_S390_AIS:
545 	case KVM_CAP_S390_AIS_MIGRATION:
546 	case KVM_CAP_S390_VCPU_RESETS:
547 	case KVM_CAP_SET_GUEST_DEBUG:
548 	case KVM_CAP_S390_DIAG318:
549 		r = 1;
550 		break;
551 	case KVM_CAP_S390_HPAGE_1M:
552 		r = 0;
553 		if (hpage && !kvm_is_ucontrol(kvm))
554 			r = 1;
555 		break;
556 	case KVM_CAP_S390_MEM_OP:
557 		r = MEM_OP_MAX_SIZE;
558 		break;
559 	case KVM_CAP_NR_VCPUS:
560 	case KVM_CAP_MAX_VCPUS:
561 	case KVM_CAP_MAX_VCPU_ID:
562 		r = KVM_S390_BSCA_CPU_SLOTS;
563 		if (!kvm_s390_use_sca_entries())
564 			r = KVM_MAX_VCPUS;
565 		else if (sclp.has_esca && sclp.has_64bscao)
566 			r = KVM_S390_ESCA_CPU_SLOTS;
567 		break;
568 	case KVM_CAP_S390_COW:
569 		r = MACHINE_HAS_ESOP;
570 		break;
571 	case KVM_CAP_S390_VECTOR_REGISTERS:
572 		r = MACHINE_HAS_VX;
573 		break;
574 	case KVM_CAP_S390_RI:
575 		r = test_facility(64);
576 		break;
577 	case KVM_CAP_S390_GS:
578 		r = test_facility(133);
579 		break;
580 	case KVM_CAP_S390_BPB:
581 		r = test_facility(82);
582 		break;
583 	case KVM_CAP_S390_PROTECTED:
584 		r = is_prot_virt_host();
585 		break;
586 	default:
587 		r = 0;
588 	}
589 	return r;
590 }
591 
kvm_arch_sync_dirty_log(struct kvm * kvm,struct kvm_memory_slot * memslot)592 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
593 {
594 	int i;
595 	gfn_t cur_gfn, last_gfn;
596 	unsigned long gaddr, vmaddr;
597 	struct gmap *gmap = kvm->arch.gmap;
598 	DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
599 
600 	/* Loop over all guest segments */
601 	cur_gfn = memslot->base_gfn;
602 	last_gfn = memslot->base_gfn + memslot->npages;
603 	for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
604 		gaddr = gfn_to_gpa(cur_gfn);
605 		vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
606 		if (kvm_is_error_hva(vmaddr))
607 			continue;
608 
609 		bitmap_zero(bitmap, _PAGE_ENTRIES);
610 		gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
611 		for (i = 0; i < _PAGE_ENTRIES; i++) {
612 			if (test_bit(i, bitmap))
613 				mark_page_dirty(kvm, cur_gfn + i);
614 		}
615 
616 		if (fatal_signal_pending(current))
617 			return;
618 		cond_resched();
619 	}
620 }
621 
622 /* Section: vm related */
623 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
624 
625 /*
626  * Get (and clear) the dirty memory log for a memory slot.
627  */
kvm_vm_ioctl_get_dirty_log(struct kvm * kvm,struct kvm_dirty_log * log)628 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
629 			       struct kvm_dirty_log *log)
630 {
631 	int r;
632 	unsigned long n;
633 	struct kvm_memory_slot *memslot;
634 	int is_dirty;
635 
636 	if (kvm_is_ucontrol(kvm))
637 		return -EINVAL;
638 
639 	mutex_lock(&kvm->slots_lock);
640 
641 	r = -EINVAL;
642 	if (log->slot >= KVM_USER_MEM_SLOTS)
643 		goto out;
644 
645 	r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
646 	if (r)
647 		goto out;
648 
649 	/* Clear the dirty log */
650 	if (is_dirty) {
651 		n = kvm_dirty_bitmap_bytes(memslot);
652 		memset(memslot->dirty_bitmap, 0, n);
653 	}
654 	r = 0;
655 out:
656 	mutex_unlock(&kvm->slots_lock);
657 	return r;
658 }
659 
icpt_operexc_on_all_vcpus(struct kvm * kvm)660 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
661 {
662 	unsigned int i;
663 	struct kvm_vcpu *vcpu;
664 
665 	kvm_for_each_vcpu(i, vcpu, kvm) {
666 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
667 	}
668 }
669 
kvm_vm_ioctl_enable_cap(struct kvm * kvm,struct kvm_enable_cap * cap)670 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
671 {
672 	int r;
673 
674 	if (cap->flags)
675 		return -EINVAL;
676 
677 	switch (cap->cap) {
678 	case KVM_CAP_S390_IRQCHIP:
679 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
680 		kvm->arch.use_irqchip = 1;
681 		r = 0;
682 		break;
683 	case KVM_CAP_S390_USER_SIGP:
684 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
685 		kvm->arch.user_sigp = 1;
686 		r = 0;
687 		break;
688 	case KVM_CAP_S390_VECTOR_REGISTERS:
689 		mutex_lock(&kvm->lock);
690 		if (kvm->created_vcpus) {
691 			r = -EBUSY;
692 		} else if (MACHINE_HAS_VX) {
693 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
694 			set_kvm_facility(kvm->arch.model.fac_list, 129);
695 			if (test_facility(134)) {
696 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
697 				set_kvm_facility(kvm->arch.model.fac_list, 134);
698 			}
699 			if (test_facility(135)) {
700 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
701 				set_kvm_facility(kvm->arch.model.fac_list, 135);
702 			}
703 			if (test_facility(148)) {
704 				set_kvm_facility(kvm->arch.model.fac_mask, 148);
705 				set_kvm_facility(kvm->arch.model.fac_list, 148);
706 			}
707 			if (test_facility(152)) {
708 				set_kvm_facility(kvm->arch.model.fac_mask, 152);
709 				set_kvm_facility(kvm->arch.model.fac_list, 152);
710 			}
711 			r = 0;
712 		} else
713 			r = -EINVAL;
714 		mutex_unlock(&kvm->lock);
715 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
716 			 r ? "(not available)" : "(success)");
717 		break;
718 	case KVM_CAP_S390_RI:
719 		r = -EINVAL;
720 		mutex_lock(&kvm->lock);
721 		if (kvm->created_vcpus) {
722 			r = -EBUSY;
723 		} else if (test_facility(64)) {
724 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
725 			set_kvm_facility(kvm->arch.model.fac_list, 64);
726 			r = 0;
727 		}
728 		mutex_unlock(&kvm->lock);
729 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
730 			 r ? "(not available)" : "(success)");
731 		break;
732 	case KVM_CAP_S390_AIS:
733 		mutex_lock(&kvm->lock);
734 		if (kvm->created_vcpus) {
735 			r = -EBUSY;
736 		} else {
737 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
738 			set_kvm_facility(kvm->arch.model.fac_list, 72);
739 			r = 0;
740 		}
741 		mutex_unlock(&kvm->lock);
742 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
743 			 r ? "(not available)" : "(success)");
744 		break;
745 	case KVM_CAP_S390_GS:
746 		r = -EINVAL;
747 		mutex_lock(&kvm->lock);
748 		if (kvm->created_vcpus) {
749 			r = -EBUSY;
750 		} else if (test_facility(133)) {
751 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
752 			set_kvm_facility(kvm->arch.model.fac_list, 133);
753 			r = 0;
754 		}
755 		mutex_unlock(&kvm->lock);
756 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
757 			 r ? "(not available)" : "(success)");
758 		break;
759 	case KVM_CAP_S390_HPAGE_1M:
760 		mutex_lock(&kvm->lock);
761 		if (kvm->created_vcpus)
762 			r = -EBUSY;
763 		else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
764 			r = -EINVAL;
765 		else {
766 			r = 0;
767 			mmap_write_lock(kvm->mm);
768 			kvm->mm->context.allow_gmap_hpage_1m = 1;
769 			mmap_write_unlock(kvm->mm);
770 			/*
771 			 * We might have to create fake 4k page
772 			 * tables. To avoid that the hardware works on
773 			 * stale PGSTEs, we emulate these instructions.
774 			 */
775 			kvm->arch.use_skf = 0;
776 			kvm->arch.use_pfmfi = 0;
777 		}
778 		mutex_unlock(&kvm->lock);
779 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
780 			 r ? "(not available)" : "(success)");
781 		break;
782 	case KVM_CAP_S390_USER_STSI:
783 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
784 		kvm->arch.user_stsi = 1;
785 		r = 0;
786 		break;
787 	case KVM_CAP_S390_USER_INSTR0:
788 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
789 		kvm->arch.user_instr0 = 1;
790 		icpt_operexc_on_all_vcpus(kvm);
791 		r = 0;
792 		break;
793 	default:
794 		r = -EINVAL;
795 		break;
796 	}
797 	return r;
798 }
799 
kvm_s390_get_mem_control(struct kvm * kvm,struct kvm_device_attr * attr)800 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
801 {
802 	int ret;
803 
804 	switch (attr->attr) {
805 	case KVM_S390_VM_MEM_LIMIT_SIZE:
806 		ret = 0;
807 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
808 			 kvm->arch.mem_limit);
809 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
810 			ret = -EFAULT;
811 		break;
812 	default:
813 		ret = -ENXIO;
814 		break;
815 	}
816 	return ret;
817 }
818 
kvm_s390_set_mem_control(struct kvm * kvm,struct kvm_device_attr * attr)819 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
820 {
821 	int ret;
822 	unsigned int idx;
823 	switch (attr->attr) {
824 	case KVM_S390_VM_MEM_ENABLE_CMMA:
825 		ret = -ENXIO;
826 		if (!sclp.has_cmma)
827 			break;
828 
829 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
830 		mutex_lock(&kvm->lock);
831 		if (kvm->created_vcpus)
832 			ret = -EBUSY;
833 		else if (kvm->mm->context.allow_gmap_hpage_1m)
834 			ret = -EINVAL;
835 		else {
836 			kvm->arch.use_cmma = 1;
837 			/* Not compatible with cmma. */
838 			kvm->arch.use_pfmfi = 0;
839 			ret = 0;
840 		}
841 		mutex_unlock(&kvm->lock);
842 		break;
843 	case KVM_S390_VM_MEM_CLR_CMMA:
844 		ret = -ENXIO;
845 		if (!sclp.has_cmma)
846 			break;
847 		ret = -EINVAL;
848 		if (!kvm->arch.use_cmma)
849 			break;
850 
851 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
852 		mutex_lock(&kvm->lock);
853 		idx = srcu_read_lock(&kvm->srcu);
854 		s390_reset_cmma(kvm->arch.gmap->mm);
855 		srcu_read_unlock(&kvm->srcu, idx);
856 		mutex_unlock(&kvm->lock);
857 		ret = 0;
858 		break;
859 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
860 		unsigned long new_limit;
861 
862 		if (kvm_is_ucontrol(kvm))
863 			return -EINVAL;
864 
865 		if (get_user(new_limit, (u64 __user *)attr->addr))
866 			return -EFAULT;
867 
868 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
869 		    new_limit > kvm->arch.mem_limit)
870 			return -E2BIG;
871 
872 		if (!new_limit)
873 			return -EINVAL;
874 
875 		/* gmap_create takes last usable address */
876 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
877 			new_limit -= 1;
878 
879 		ret = -EBUSY;
880 		mutex_lock(&kvm->lock);
881 		if (!kvm->created_vcpus) {
882 			/* gmap_create will round the limit up */
883 			struct gmap *new = gmap_create(current->mm, new_limit);
884 
885 			if (!new) {
886 				ret = -ENOMEM;
887 			} else {
888 				gmap_remove(kvm->arch.gmap);
889 				new->private = kvm;
890 				kvm->arch.gmap = new;
891 				ret = 0;
892 			}
893 		}
894 		mutex_unlock(&kvm->lock);
895 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
896 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
897 			 (void *) kvm->arch.gmap->asce);
898 		break;
899 	}
900 	default:
901 		ret = -ENXIO;
902 		break;
903 	}
904 	return ret;
905 }
906 
907 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
908 
kvm_s390_vcpu_crypto_reset_all(struct kvm * kvm)909 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
910 {
911 	struct kvm_vcpu *vcpu;
912 	int i;
913 
914 	kvm_s390_vcpu_block_all(kvm);
915 
916 	kvm_for_each_vcpu(i, vcpu, kvm) {
917 		kvm_s390_vcpu_crypto_setup(vcpu);
918 		/* recreate the shadow crycb by leaving the VSIE handler */
919 		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
920 	}
921 
922 	kvm_s390_vcpu_unblock_all(kvm);
923 }
924 
kvm_s390_vm_set_crypto(struct kvm * kvm,struct kvm_device_attr * attr)925 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
926 {
927 	mutex_lock(&kvm->lock);
928 	switch (attr->attr) {
929 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
930 		if (!test_kvm_facility(kvm, 76)) {
931 			mutex_unlock(&kvm->lock);
932 			return -EINVAL;
933 		}
934 		get_random_bytes(
935 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
936 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
937 		kvm->arch.crypto.aes_kw = 1;
938 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
939 		break;
940 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
941 		if (!test_kvm_facility(kvm, 76)) {
942 			mutex_unlock(&kvm->lock);
943 			return -EINVAL;
944 		}
945 		get_random_bytes(
946 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
947 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
948 		kvm->arch.crypto.dea_kw = 1;
949 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
950 		break;
951 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
952 		if (!test_kvm_facility(kvm, 76)) {
953 			mutex_unlock(&kvm->lock);
954 			return -EINVAL;
955 		}
956 		kvm->arch.crypto.aes_kw = 0;
957 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
958 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
959 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
960 		break;
961 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
962 		if (!test_kvm_facility(kvm, 76)) {
963 			mutex_unlock(&kvm->lock);
964 			return -EINVAL;
965 		}
966 		kvm->arch.crypto.dea_kw = 0;
967 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
968 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
969 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
970 		break;
971 	case KVM_S390_VM_CRYPTO_ENABLE_APIE:
972 		if (!ap_instructions_available()) {
973 			mutex_unlock(&kvm->lock);
974 			return -EOPNOTSUPP;
975 		}
976 		kvm->arch.crypto.apie = 1;
977 		break;
978 	case KVM_S390_VM_CRYPTO_DISABLE_APIE:
979 		if (!ap_instructions_available()) {
980 			mutex_unlock(&kvm->lock);
981 			return -EOPNOTSUPP;
982 		}
983 		kvm->arch.crypto.apie = 0;
984 		break;
985 	default:
986 		mutex_unlock(&kvm->lock);
987 		return -ENXIO;
988 	}
989 
990 	kvm_s390_vcpu_crypto_reset_all(kvm);
991 	mutex_unlock(&kvm->lock);
992 	return 0;
993 }
994 
kvm_s390_sync_request_broadcast(struct kvm * kvm,int req)995 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
996 {
997 	int cx;
998 	struct kvm_vcpu *vcpu;
999 
1000 	kvm_for_each_vcpu(cx, vcpu, kvm)
1001 		kvm_s390_sync_request(req, vcpu);
1002 }
1003 
1004 /*
1005  * Must be called with kvm->srcu held to avoid races on memslots, and with
1006  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1007  */
kvm_s390_vm_start_migration(struct kvm * kvm)1008 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1009 {
1010 	struct kvm_memory_slot *ms;
1011 	struct kvm_memslots *slots;
1012 	unsigned long ram_pages = 0;
1013 	int slotnr;
1014 
1015 	/* migration mode already enabled */
1016 	if (kvm->arch.migration_mode)
1017 		return 0;
1018 	slots = kvm_memslots(kvm);
1019 	if (!slots || !slots->used_slots)
1020 		return -EINVAL;
1021 
1022 	if (!kvm->arch.use_cmma) {
1023 		kvm->arch.migration_mode = 1;
1024 		return 0;
1025 	}
1026 	/* mark all the pages in active slots as dirty */
1027 	for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1028 		ms = slots->memslots + slotnr;
1029 		if (!ms->dirty_bitmap)
1030 			return -EINVAL;
1031 		/*
1032 		 * The second half of the bitmap is only used on x86,
1033 		 * and would be wasted otherwise, so we put it to good
1034 		 * use here to keep track of the state of the storage
1035 		 * attributes.
1036 		 */
1037 		memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1038 		ram_pages += ms->npages;
1039 	}
1040 	atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1041 	kvm->arch.migration_mode = 1;
1042 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1043 	return 0;
1044 }
1045 
1046 /*
1047  * Must be called with kvm->slots_lock to avoid races with ourselves and
1048  * kvm_s390_vm_start_migration.
1049  */
kvm_s390_vm_stop_migration(struct kvm * kvm)1050 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1051 {
1052 	/* migration mode already disabled */
1053 	if (!kvm->arch.migration_mode)
1054 		return 0;
1055 	kvm->arch.migration_mode = 0;
1056 	if (kvm->arch.use_cmma)
1057 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1058 	return 0;
1059 }
1060 
kvm_s390_vm_set_migration(struct kvm * kvm,struct kvm_device_attr * attr)1061 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1062 				     struct kvm_device_attr *attr)
1063 {
1064 	int res = -ENXIO;
1065 
1066 	mutex_lock(&kvm->slots_lock);
1067 	switch (attr->attr) {
1068 	case KVM_S390_VM_MIGRATION_START:
1069 		res = kvm_s390_vm_start_migration(kvm);
1070 		break;
1071 	case KVM_S390_VM_MIGRATION_STOP:
1072 		res = kvm_s390_vm_stop_migration(kvm);
1073 		break;
1074 	default:
1075 		break;
1076 	}
1077 	mutex_unlock(&kvm->slots_lock);
1078 
1079 	return res;
1080 }
1081 
kvm_s390_vm_get_migration(struct kvm * kvm,struct kvm_device_attr * attr)1082 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1083 				     struct kvm_device_attr *attr)
1084 {
1085 	u64 mig = kvm->arch.migration_mode;
1086 
1087 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1088 		return -ENXIO;
1089 
1090 	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1091 		return -EFAULT;
1092 	return 0;
1093 }
1094 
kvm_s390_set_tod_ext(struct kvm * kvm,struct kvm_device_attr * attr)1095 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1096 {
1097 	struct kvm_s390_vm_tod_clock gtod;
1098 
1099 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1100 		return -EFAULT;
1101 
1102 	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1103 		return -EINVAL;
1104 	kvm_s390_set_tod_clock(kvm, &gtod);
1105 
1106 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1107 		gtod.epoch_idx, gtod.tod);
1108 
1109 	return 0;
1110 }
1111 
kvm_s390_set_tod_high(struct kvm * kvm,struct kvm_device_attr * attr)1112 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1113 {
1114 	u8 gtod_high;
1115 
1116 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1117 					   sizeof(gtod_high)))
1118 		return -EFAULT;
1119 
1120 	if (gtod_high != 0)
1121 		return -EINVAL;
1122 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1123 
1124 	return 0;
1125 }
1126 
kvm_s390_set_tod_low(struct kvm * kvm,struct kvm_device_attr * attr)1127 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1128 {
1129 	struct kvm_s390_vm_tod_clock gtod = { 0 };
1130 
1131 	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1132 			   sizeof(gtod.tod)))
1133 		return -EFAULT;
1134 
1135 	kvm_s390_set_tod_clock(kvm, &gtod);
1136 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1137 	return 0;
1138 }
1139 
kvm_s390_set_tod(struct kvm * kvm,struct kvm_device_attr * attr)1140 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1141 {
1142 	int ret;
1143 
1144 	if (attr->flags)
1145 		return -EINVAL;
1146 
1147 	switch (attr->attr) {
1148 	case KVM_S390_VM_TOD_EXT:
1149 		ret = kvm_s390_set_tod_ext(kvm, attr);
1150 		break;
1151 	case KVM_S390_VM_TOD_HIGH:
1152 		ret = kvm_s390_set_tod_high(kvm, attr);
1153 		break;
1154 	case KVM_S390_VM_TOD_LOW:
1155 		ret = kvm_s390_set_tod_low(kvm, attr);
1156 		break;
1157 	default:
1158 		ret = -ENXIO;
1159 		break;
1160 	}
1161 	return ret;
1162 }
1163 
kvm_s390_get_tod_clock(struct kvm * kvm,struct kvm_s390_vm_tod_clock * gtod)1164 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1165 				   struct kvm_s390_vm_tod_clock *gtod)
1166 {
1167 	struct kvm_s390_tod_clock_ext htod;
1168 
1169 	preempt_disable();
1170 
1171 	get_tod_clock_ext((char *)&htod);
1172 
1173 	gtod->tod = htod.tod + kvm->arch.epoch;
1174 	gtod->epoch_idx = 0;
1175 	if (test_kvm_facility(kvm, 139)) {
1176 		gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1177 		if (gtod->tod < htod.tod)
1178 			gtod->epoch_idx += 1;
1179 	}
1180 
1181 	preempt_enable();
1182 }
1183 
kvm_s390_get_tod_ext(struct kvm * kvm,struct kvm_device_attr * attr)1184 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1185 {
1186 	struct kvm_s390_vm_tod_clock gtod;
1187 
1188 	memset(&gtod, 0, sizeof(gtod));
1189 	kvm_s390_get_tod_clock(kvm, &gtod);
1190 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1191 		return -EFAULT;
1192 
1193 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1194 		gtod.epoch_idx, gtod.tod);
1195 	return 0;
1196 }
1197 
kvm_s390_get_tod_high(struct kvm * kvm,struct kvm_device_attr * attr)1198 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1199 {
1200 	u8 gtod_high = 0;
1201 
1202 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
1203 					 sizeof(gtod_high)))
1204 		return -EFAULT;
1205 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1206 
1207 	return 0;
1208 }
1209 
kvm_s390_get_tod_low(struct kvm * kvm,struct kvm_device_attr * attr)1210 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1211 {
1212 	u64 gtod;
1213 
1214 	gtod = kvm_s390_get_tod_clock_fast(kvm);
1215 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1216 		return -EFAULT;
1217 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1218 
1219 	return 0;
1220 }
1221 
kvm_s390_get_tod(struct kvm * kvm,struct kvm_device_attr * attr)1222 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1223 {
1224 	int ret;
1225 
1226 	if (attr->flags)
1227 		return -EINVAL;
1228 
1229 	switch (attr->attr) {
1230 	case KVM_S390_VM_TOD_EXT:
1231 		ret = kvm_s390_get_tod_ext(kvm, attr);
1232 		break;
1233 	case KVM_S390_VM_TOD_HIGH:
1234 		ret = kvm_s390_get_tod_high(kvm, attr);
1235 		break;
1236 	case KVM_S390_VM_TOD_LOW:
1237 		ret = kvm_s390_get_tod_low(kvm, attr);
1238 		break;
1239 	default:
1240 		ret = -ENXIO;
1241 		break;
1242 	}
1243 	return ret;
1244 }
1245 
kvm_s390_set_processor(struct kvm * kvm,struct kvm_device_attr * attr)1246 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1247 {
1248 	struct kvm_s390_vm_cpu_processor *proc;
1249 	u16 lowest_ibc, unblocked_ibc;
1250 	int ret = 0;
1251 
1252 	mutex_lock(&kvm->lock);
1253 	if (kvm->created_vcpus) {
1254 		ret = -EBUSY;
1255 		goto out;
1256 	}
1257 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1258 	if (!proc) {
1259 		ret = -ENOMEM;
1260 		goto out;
1261 	}
1262 	if (!copy_from_user(proc, (void __user *)attr->addr,
1263 			    sizeof(*proc))) {
1264 		kvm->arch.model.cpuid = proc->cpuid;
1265 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1266 		unblocked_ibc = sclp.ibc & 0xfff;
1267 		if (lowest_ibc && proc->ibc) {
1268 			if (proc->ibc > unblocked_ibc)
1269 				kvm->arch.model.ibc = unblocked_ibc;
1270 			else if (proc->ibc < lowest_ibc)
1271 				kvm->arch.model.ibc = lowest_ibc;
1272 			else
1273 				kvm->arch.model.ibc = proc->ibc;
1274 		}
1275 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1276 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1277 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1278 			 kvm->arch.model.ibc,
1279 			 kvm->arch.model.cpuid);
1280 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1281 			 kvm->arch.model.fac_list[0],
1282 			 kvm->arch.model.fac_list[1],
1283 			 kvm->arch.model.fac_list[2]);
1284 	} else
1285 		ret = -EFAULT;
1286 	kfree(proc);
1287 out:
1288 	mutex_unlock(&kvm->lock);
1289 	return ret;
1290 }
1291 
kvm_s390_set_processor_feat(struct kvm * kvm,struct kvm_device_attr * attr)1292 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1293 				       struct kvm_device_attr *attr)
1294 {
1295 	struct kvm_s390_vm_cpu_feat data;
1296 
1297 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1298 		return -EFAULT;
1299 	if (!bitmap_subset((unsigned long *) data.feat,
1300 			   kvm_s390_available_cpu_feat,
1301 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1302 		return -EINVAL;
1303 
1304 	mutex_lock(&kvm->lock);
1305 	if (kvm->created_vcpus) {
1306 		mutex_unlock(&kvm->lock);
1307 		return -EBUSY;
1308 	}
1309 	bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1310 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1311 	mutex_unlock(&kvm->lock);
1312 	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1313 			 data.feat[0],
1314 			 data.feat[1],
1315 			 data.feat[2]);
1316 	return 0;
1317 }
1318 
kvm_s390_set_processor_subfunc(struct kvm * kvm,struct kvm_device_attr * attr)1319 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1320 					  struct kvm_device_attr *attr)
1321 {
1322 	mutex_lock(&kvm->lock);
1323 	if (kvm->created_vcpus) {
1324 		mutex_unlock(&kvm->lock);
1325 		return -EBUSY;
1326 	}
1327 
1328 	if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1329 			   sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1330 		mutex_unlock(&kvm->lock);
1331 		return -EFAULT;
1332 	}
1333 	mutex_unlock(&kvm->lock);
1334 
1335 	VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1336 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1337 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1338 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1339 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1340 	VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1341 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1342 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1343 	VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1344 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1345 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1346 	VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1347 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1348 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1349 	VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1350 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1351 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1352 	VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1353 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1354 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1355 	VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1356 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1357 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1358 	VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1359 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1360 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1361 	VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1362 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1363 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1364 	VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1365 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1366 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1367 	VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1368 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1369 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1370 	VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1371 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1372 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1373 	VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1374 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1375 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1376 	VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1377 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1378 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1379 	VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1380 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1381 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1382 	VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1383 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1384 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1385 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1386 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1387 	VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1388 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1389 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1390 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1391 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1392 
1393 	return 0;
1394 }
1395 
kvm_s390_set_cpu_model(struct kvm * kvm,struct kvm_device_attr * attr)1396 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1397 {
1398 	int ret = -ENXIO;
1399 
1400 	switch (attr->attr) {
1401 	case KVM_S390_VM_CPU_PROCESSOR:
1402 		ret = kvm_s390_set_processor(kvm, attr);
1403 		break;
1404 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1405 		ret = kvm_s390_set_processor_feat(kvm, attr);
1406 		break;
1407 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1408 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1409 		break;
1410 	}
1411 	return ret;
1412 }
1413 
kvm_s390_get_processor(struct kvm * kvm,struct kvm_device_attr * attr)1414 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1415 {
1416 	struct kvm_s390_vm_cpu_processor *proc;
1417 	int ret = 0;
1418 
1419 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1420 	if (!proc) {
1421 		ret = -ENOMEM;
1422 		goto out;
1423 	}
1424 	proc->cpuid = kvm->arch.model.cpuid;
1425 	proc->ibc = kvm->arch.model.ibc;
1426 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1427 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1428 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1429 		 kvm->arch.model.ibc,
1430 		 kvm->arch.model.cpuid);
1431 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1432 		 kvm->arch.model.fac_list[0],
1433 		 kvm->arch.model.fac_list[1],
1434 		 kvm->arch.model.fac_list[2]);
1435 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1436 		ret = -EFAULT;
1437 	kfree(proc);
1438 out:
1439 	return ret;
1440 }
1441 
kvm_s390_get_machine(struct kvm * kvm,struct kvm_device_attr * attr)1442 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1443 {
1444 	struct kvm_s390_vm_cpu_machine *mach;
1445 	int ret = 0;
1446 
1447 	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1448 	if (!mach) {
1449 		ret = -ENOMEM;
1450 		goto out;
1451 	}
1452 	get_cpu_id((struct cpuid *) &mach->cpuid);
1453 	mach->ibc = sclp.ibc;
1454 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1455 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1456 	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1457 	       sizeof(S390_lowcore.stfle_fac_list));
1458 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1459 		 kvm->arch.model.ibc,
1460 		 kvm->arch.model.cpuid);
1461 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1462 		 mach->fac_mask[0],
1463 		 mach->fac_mask[1],
1464 		 mach->fac_mask[2]);
1465 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1466 		 mach->fac_list[0],
1467 		 mach->fac_list[1],
1468 		 mach->fac_list[2]);
1469 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1470 		ret = -EFAULT;
1471 	kfree(mach);
1472 out:
1473 	return ret;
1474 }
1475 
kvm_s390_get_processor_feat(struct kvm * kvm,struct kvm_device_attr * attr)1476 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1477 				       struct kvm_device_attr *attr)
1478 {
1479 	struct kvm_s390_vm_cpu_feat data;
1480 
1481 	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1482 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1483 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1484 		return -EFAULT;
1485 	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1486 			 data.feat[0],
1487 			 data.feat[1],
1488 			 data.feat[2]);
1489 	return 0;
1490 }
1491 
kvm_s390_get_machine_feat(struct kvm * kvm,struct kvm_device_attr * attr)1492 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1493 				     struct kvm_device_attr *attr)
1494 {
1495 	struct kvm_s390_vm_cpu_feat data;
1496 
1497 	bitmap_copy((unsigned long *) data.feat,
1498 		    kvm_s390_available_cpu_feat,
1499 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1500 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1501 		return -EFAULT;
1502 	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1503 			 data.feat[0],
1504 			 data.feat[1],
1505 			 data.feat[2]);
1506 	return 0;
1507 }
1508 
kvm_s390_get_processor_subfunc(struct kvm * kvm,struct kvm_device_attr * attr)1509 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1510 					  struct kvm_device_attr *attr)
1511 {
1512 	if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1513 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1514 		return -EFAULT;
1515 
1516 	VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1517 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1518 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1519 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1520 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1521 	VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1522 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1523 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1524 	VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1525 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1526 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1527 	VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1528 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1529 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1530 	VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1531 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1532 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1533 	VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1534 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1535 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1536 	VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1537 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1538 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1539 	VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1540 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1541 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1542 	VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1543 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1544 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1545 	VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1546 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1547 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1548 	VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1549 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1550 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1551 	VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1552 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1553 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1554 	VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1555 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1556 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1557 	VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1558 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1559 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1560 	VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1561 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1562 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1563 	VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1564 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1565 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1566 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1567 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1568 	VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1569 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1570 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1571 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1572 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1573 
1574 	return 0;
1575 }
1576 
kvm_s390_get_machine_subfunc(struct kvm * kvm,struct kvm_device_attr * attr)1577 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1578 					struct kvm_device_attr *attr)
1579 {
1580 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1581 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1582 		return -EFAULT;
1583 
1584 	VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1585 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1586 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1587 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1588 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1589 	VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1590 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1591 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1592 	VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1593 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1594 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1595 	VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1596 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1597 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1598 	VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1599 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1600 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1601 	VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1602 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1603 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1604 	VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1605 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1606 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1607 	VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1608 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1609 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1610 	VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1611 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1612 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1613 	VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1614 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1615 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1616 	VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1617 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1618 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1619 	VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1620 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1621 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1622 	VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1623 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1624 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1625 	VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1626 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1627 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1628 	VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1629 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1630 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1631 	VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1632 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1633 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1634 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1635 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1636 	VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1637 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1638 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1639 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1640 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1641 
1642 	return 0;
1643 }
1644 
kvm_s390_get_cpu_model(struct kvm * kvm,struct kvm_device_attr * attr)1645 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1646 {
1647 	int ret = -ENXIO;
1648 
1649 	switch (attr->attr) {
1650 	case KVM_S390_VM_CPU_PROCESSOR:
1651 		ret = kvm_s390_get_processor(kvm, attr);
1652 		break;
1653 	case KVM_S390_VM_CPU_MACHINE:
1654 		ret = kvm_s390_get_machine(kvm, attr);
1655 		break;
1656 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1657 		ret = kvm_s390_get_processor_feat(kvm, attr);
1658 		break;
1659 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1660 		ret = kvm_s390_get_machine_feat(kvm, attr);
1661 		break;
1662 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1663 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1664 		break;
1665 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1666 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1667 		break;
1668 	}
1669 	return ret;
1670 }
1671 
kvm_s390_vm_set_attr(struct kvm * kvm,struct kvm_device_attr * attr)1672 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1673 {
1674 	int ret;
1675 
1676 	switch (attr->group) {
1677 	case KVM_S390_VM_MEM_CTRL:
1678 		ret = kvm_s390_set_mem_control(kvm, attr);
1679 		break;
1680 	case KVM_S390_VM_TOD:
1681 		ret = kvm_s390_set_tod(kvm, attr);
1682 		break;
1683 	case KVM_S390_VM_CPU_MODEL:
1684 		ret = kvm_s390_set_cpu_model(kvm, attr);
1685 		break;
1686 	case KVM_S390_VM_CRYPTO:
1687 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1688 		break;
1689 	case KVM_S390_VM_MIGRATION:
1690 		ret = kvm_s390_vm_set_migration(kvm, attr);
1691 		break;
1692 	default:
1693 		ret = -ENXIO;
1694 		break;
1695 	}
1696 
1697 	return ret;
1698 }
1699 
kvm_s390_vm_get_attr(struct kvm * kvm,struct kvm_device_attr * attr)1700 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1701 {
1702 	int ret;
1703 
1704 	switch (attr->group) {
1705 	case KVM_S390_VM_MEM_CTRL:
1706 		ret = kvm_s390_get_mem_control(kvm, attr);
1707 		break;
1708 	case KVM_S390_VM_TOD:
1709 		ret = kvm_s390_get_tod(kvm, attr);
1710 		break;
1711 	case KVM_S390_VM_CPU_MODEL:
1712 		ret = kvm_s390_get_cpu_model(kvm, attr);
1713 		break;
1714 	case KVM_S390_VM_MIGRATION:
1715 		ret = kvm_s390_vm_get_migration(kvm, attr);
1716 		break;
1717 	default:
1718 		ret = -ENXIO;
1719 		break;
1720 	}
1721 
1722 	return ret;
1723 }
1724 
kvm_s390_vm_has_attr(struct kvm * kvm,struct kvm_device_attr * attr)1725 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1726 {
1727 	int ret;
1728 
1729 	switch (attr->group) {
1730 	case KVM_S390_VM_MEM_CTRL:
1731 		switch (attr->attr) {
1732 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1733 		case KVM_S390_VM_MEM_CLR_CMMA:
1734 			ret = sclp.has_cmma ? 0 : -ENXIO;
1735 			break;
1736 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1737 			ret = 0;
1738 			break;
1739 		default:
1740 			ret = -ENXIO;
1741 			break;
1742 		}
1743 		break;
1744 	case KVM_S390_VM_TOD:
1745 		switch (attr->attr) {
1746 		case KVM_S390_VM_TOD_LOW:
1747 		case KVM_S390_VM_TOD_HIGH:
1748 			ret = 0;
1749 			break;
1750 		default:
1751 			ret = -ENXIO;
1752 			break;
1753 		}
1754 		break;
1755 	case KVM_S390_VM_CPU_MODEL:
1756 		switch (attr->attr) {
1757 		case KVM_S390_VM_CPU_PROCESSOR:
1758 		case KVM_S390_VM_CPU_MACHINE:
1759 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1760 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1761 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1762 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1763 			ret = 0;
1764 			break;
1765 		default:
1766 			ret = -ENXIO;
1767 			break;
1768 		}
1769 		break;
1770 	case KVM_S390_VM_CRYPTO:
1771 		switch (attr->attr) {
1772 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1773 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1774 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1775 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1776 			ret = 0;
1777 			break;
1778 		case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1779 		case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1780 			ret = ap_instructions_available() ? 0 : -ENXIO;
1781 			break;
1782 		default:
1783 			ret = -ENXIO;
1784 			break;
1785 		}
1786 		break;
1787 	case KVM_S390_VM_MIGRATION:
1788 		ret = 0;
1789 		break;
1790 	default:
1791 		ret = -ENXIO;
1792 		break;
1793 	}
1794 
1795 	return ret;
1796 }
1797 
kvm_s390_get_skeys(struct kvm * kvm,struct kvm_s390_skeys * args)1798 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1799 {
1800 	uint8_t *keys;
1801 	uint64_t hva;
1802 	int srcu_idx, i, r = 0;
1803 
1804 	if (args->flags != 0)
1805 		return -EINVAL;
1806 
1807 	/* Is this guest using storage keys? */
1808 	if (!mm_uses_skeys(current->mm))
1809 		return KVM_S390_GET_SKEYS_NONE;
1810 
1811 	/* Enforce sane limit on memory allocation */
1812 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1813 		return -EINVAL;
1814 
1815 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1816 	if (!keys)
1817 		return -ENOMEM;
1818 
1819 	mmap_read_lock(current->mm);
1820 	srcu_idx = srcu_read_lock(&kvm->srcu);
1821 	for (i = 0; i < args->count; i++) {
1822 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1823 		if (kvm_is_error_hva(hva)) {
1824 			r = -EFAULT;
1825 			break;
1826 		}
1827 
1828 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1829 		if (r)
1830 			break;
1831 	}
1832 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1833 	mmap_read_unlock(current->mm);
1834 
1835 	if (!r) {
1836 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1837 				 sizeof(uint8_t) * args->count);
1838 		if (r)
1839 			r = -EFAULT;
1840 	}
1841 
1842 	kvfree(keys);
1843 	return r;
1844 }
1845 
kvm_s390_set_skeys(struct kvm * kvm,struct kvm_s390_skeys * args)1846 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1847 {
1848 	uint8_t *keys;
1849 	uint64_t hva;
1850 	int srcu_idx, i, r = 0;
1851 	bool unlocked;
1852 
1853 	if (args->flags != 0)
1854 		return -EINVAL;
1855 
1856 	/* Enforce sane limit on memory allocation */
1857 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1858 		return -EINVAL;
1859 
1860 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1861 	if (!keys)
1862 		return -ENOMEM;
1863 
1864 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1865 			   sizeof(uint8_t) * args->count);
1866 	if (r) {
1867 		r = -EFAULT;
1868 		goto out;
1869 	}
1870 
1871 	/* Enable storage key handling for the guest */
1872 	r = s390_enable_skey();
1873 	if (r)
1874 		goto out;
1875 
1876 	i = 0;
1877 	mmap_read_lock(current->mm);
1878 	srcu_idx = srcu_read_lock(&kvm->srcu);
1879         while (i < args->count) {
1880 		unlocked = false;
1881 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1882 		if (kvm_is_error_hva(hva)) {
1883 			r = -EFAULT;
1884 			break;
1885 		}
1886 
1887 		/* Lowest order bit is reserved */
1888 		if (keys[i] & 0x01) {
1889 			r = -EINVAL;
1890 			break;
1891 		}
1892 
1893 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1894 		if (r) {
1895 			r = fixup_user_fault(current->mm, hva,
1896 					     FAULT_FLAG_WRITE, &unlocked);
1897 			if (r)
1898 				break;
1899 		}
1900 		if (!r)
1901 			i++;
1902 	}
1903 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1904 	mmap_read_unlock(current->mm);
1905 out:
1906 	kvfree(keys);
1907 	return r;
1908 }
1909 
1910 /*
1911  * Base address and length must be sent at the start of each block, therefore
1912  * it's cheaper to send some clean data, as long as it's less than the size of
1913  * two longs.
1914  */
1915 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1916 /* for consistency */
1917 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1918 
1919 /*
1920  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1921  * address falls in a hole. In that case the index of one of the memslots
1922  * bordering the hole is returned.
1923  */
gfn_to_memslot_approx(struct kvm_memslots * slots,gfn_t gfn)1924 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1925 {
1926 	int start = 0, end = slots->used_slots;
1927 	int slot = atomic_read(&slots->lru_slot);
1928 	struct kvm_memory_slot *memslots = slots->memslots;
1929 
1930 	if (gfn >= memslots[slot].base_gfn &&
1931 	    gfn < memslots[slot].base_gfn + memslots[slot].npages)
1932 		return slot;
1933 
1934 	while (start < end) {
1935 		slot = start + (end - start) / 2;
1936 
1937 		if (gfn >= memslots[slot].base_gfn)
1938 			end = slot;
1939 		else
1940 			start = slot + 1;
1941 	}
1942 
1943 	if (start >= slots->used_slots)
1944 		return slots->used_slots - 1;
1945 
1946 	if (gfn >= memslots[start].base_gfn &&
1947 	    gfn < memslots[start].base_gfn + memslots[start].npages) {
1948 		atomic_set(&slots->lru_slot, start);
1949 	}
1950 
1951 	return start;
1952 }
1953 
kvm_s390_peek_cmma(struct kvm * kvm,struct kvm_s390_cmma_log * args,u8 * res,unsigned long bufsize)1954 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1955 			      u8 *res, unsigned long bufsize)
1956 {
1957 	unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1958 
1959 	args->count = 0;
1960 	while (args->count < bufsize) {
1961 		hva = gfn_to_hva(kvm, cur_gfn);
1962 		/*
1963 		 * We return an error if the first value was invalid, but we
1964 		 * return successfully if at least one value was copied.
1965 		 */
1966 		if (kvm_is_error_hva(hva))
1967 			return args->count ? 0 : -EFAULT;
1968 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1969 			pgstev = 0;
1970 		res[args->count++] = (pgstev >> 24) & 0x43;
1971 		cur_gfn++;
1972 	}
1973 
1974 	return 0;
1975 }
1976 
kvm_s390_next_dirty_cmma(struct kvm_memslots * slots,unsigned long cur_gfn)1977 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1978 					      unsigned long cur_gfn)
1979 {
1980 	int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1981 	struct kvm_memory_slot *ms = slots->memslots + slotidx;
1982 	unsigned long ofs = cur_gfn - ms->base_gfn;
1983 
1984 	if (ms->base_gfn + ms->npages <= cur_gfn) {
1985 		slotidx--;
1986 		/* If we are above the highest slot, wrap around */
1987 		if (slotidx < 0)
1988 			slotidx = slots->used_slots - 1;
1989 
1990 		ms = slots->memslots + slotidx;
1991 		ofs = 0;
1992 	}
1993 	ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1994 	while ((slotidx > 0) && (ofs >= ms->npages)) {
1995 		slotidx--;
1996 		ms = slots->memslots + slotidx;
1997 		ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1998 	}
1999 	return ms->base_gfn + ofs;
2000 }
2001 
kvm_s390_get_cmma(struct kvm * kvm,struct kvm_s390_cmma_log * args,u8 * res,unsigned long bufsize)2002 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2003 			     u8 *res, unsigned long bufsize)
2004 {
2005 	unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2006 	struct kvm_memslots *slots = kvm_memslots(kvm);
2007 	struct kvm_memory_slot *ms;
2008 
2009 	if (unlikely(!slots->used_slots))
2010 		return 0;
2011 
2012 	cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2013 	ms = gfn_to_memslot(kvm, cur_gfn);
2014 	args->count = 0;
2015 	args->start_gfn = cur_gfn;
2016 	if (!ms)
2017 		return 0;
2018 	next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2019 	mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2020 
2021 	while (args->count < bufsize) {
2022 		hva = gfn_to_hva(kvm, cur_gfn);
2023 		if (kvm_is_error_hva(hva))
2024 			return 0;
2025 		/* Decrement only if we actually flipped the bit to 0 */
2026 		if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2027 			atomic64_dec(&kvm->arch.cmma_dirty_pages);
2028 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2029 			pgstev = 0;
2030 		/* Save the value */
2031 		res[args->count++] = (pgstev >> 24) & 0x43;
2032 		/* If the next bit is too far away, stop. */
2033 		if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2034 			return 0;
2035 		/* If we reached the previous "next", find the next one */
2036 		if (cur_gfn == next_gfn)
2037 			next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2038 		/* Reached the end of memory or of the buffer, stop */
2039 		if ((next_gfn >= mem_end) ||
2040 		    (next_gfn - args->start_gfn >= bufsize))
2041 			return 0;
2042 		cur_gfn++;
2043 		/* Reached the end of the current memslot, take the next one. */
2044 		if (cur_gfn - ms->base_gfn >= ms->npages) {
2045 			ms = gfn_to_memslot(kvm, cur_gfn);
2046 			if (!ms)
2047 				return 0;
2048 		}
2049 	}
2050 	return 0;
2051 }
2052 
2053 /*
2054  * This function searches for the next page with dirty CMMA attributes, and
2055  * saves the attributes in the buffer up to either the end of the buffer or
2056  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2057  * no trailing clean bytes are saved.
2058  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2059  * output buffer will indicate 0 as length.
2060  */
kvm_s390_get_cmma_bits(struct kvm * kvm,struct kvm_s390_cmma_log * args)2061 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2062 				  struct kvm_s390_cmma_log *args)
2063 {
2064 	unsigned long bufsize;
2065 	int srcu_idx, peek, ret;
2066 	u8 *values;
2067 
2068 	if (!kvm->arch.use_cmma)
2069 		return -ENXIO;
2070 	/* Invalid/unsupported flags were specified */
2071 	if (args->flags & ~KVM_S390_CMMA_PEEK)
2072 		return -EINVAL;
2073 	/* Migration mode query, and we are not doing a migration */
2074 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2075 	if (!peek && !kvm->arch.migration_mode)
2076 		return -EINVAL;
2077 	/* CMMA is disabled or was not used, or the buffer has length zero */
2078 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2079 	if (!bufsize || !kvm->mm->context.uses_cmm) {
2080 		memset(args, 0, sizeof(*args));
2081 		return 0;
2082 	}
2083 	/* We are not peeking, and there are no dirty pages */
2084 	if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2085 		memset(args, 0, sizeof(*args));
2086 		return 0;
2087 	}
2088 
2089 	values = vmalloc(bufsize);
2090 	if (!values)
2091 		return -ENOMEM;
2092 
2093 	mmap_read_lock(kvm->mm);
2094 	srcu_idx = srcu_read_lock(&kvm->srcu);
2095 	if (peek)
2096 		ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2097 	else
2098 		ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2099 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2100 	mmap_read_unlock(kvm->mm);
2101 
2102 	if (kvm->arch.migration_mode)
2103 		args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2104 	else
2105 		args->remaining = 0;
2106 
2107 	if (copy_to_user((void __user *)args->values, values, args->count))
2108 		ret = -EFAULT;
2109 
2110 	vfree(values);
2111 	return ret;
2112 }
2113 
2114 /*
2115  * This function sets the CMMA attributes for the given pages. If the input
2116  * buffer has zero length, no action is taken, otherwise the attributes are
2117  * set and the mm->context.uses_cmm flag is set.
2118  */
kvm_s390_set_cmma_bits(struct kvm * kvm,const struct kvm_s390_cmma_log * args)2119 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2120 				  const struct kvm_s390_cmma_log *args)
2121 {
2122 	unsigned long hva, mask, pgstev, i;
2123 	uint8_t *bits;
2124 	int srcu_idx, r = 0;
2125 
2126 	mask = args->mask;
2127 
2128 	if (!kvm->arch.use_cmma)
2129 		return -ENXIO;
2130 	/* invalid/unsupported flags */
2131 	if (args->flags != 0)
2132 		return -EINVAL;
2133 	/* Enforce sane limit on memory allocation */
2134 	if (args->count > KVM_S390_CMMA_SIZE_MAX)
2135 		return -EINVAL;
2136 	/* Nothing to do */
2137 	if (args->count == 0)
2138 		return 0;
2139 
2140 	bits = vmalloc(array_size(sizeof(*bits), args->count));
2141 	if (!bits)
2142 		return -ENOMEM;
2143 
2144 	r = copy_from_user(bits, (void __user *)args->values, args->count);
2145 	if (r) {
2146 		r = -EFAULT;
2147 		goto out;
2148 	}
2149 
2150 	mmap_read_lock(kvm->mm);
2151 	srcu_idx = srcu_read_lock(&kvm->srcu);
2152 	for (i = 0; i < args->count; i++) {
2153 		hva = gfn_to_hva(kvm, args->start_gfn + i);
2154 		if (kvm_is_error_hva(hva)) {
2155 			r = -EFAULT;
2156 			break;
2157 		}
2158 
2159 		pgstev = bits[i];
2160 		pgstev = pgstev << 24;
2161 		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2162 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
2163 	}
2164 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2165 	mmap_read_unlock(kvm->mm);
2166 
2167 	if (!kvm->mm->context.uses_cmm) {
2168 		mmap_write_lock(kvm->mm);
2169 		kvm->mm->context.uses_cmm = 1;
2170 		mmap_write_unlock(kvm->mm);
2171 	}
2172 out:
2173 	vfree(bits);
2174 	return r;
2175 }
2176 
kvm_s390_cpus_from_pv(struct kvm * kvm,u16 * rcp,u16 * rrcp)2177 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2178 {
2179 	struct kvm_vcpu *vcpu;
2180 	u16 rc, rrc;
2181 	int ret = 0;
2182 	int i;
2183 
2184 	/*
2185 	 * We ignore failures and try to destroy as many CPUs as possible.
2186 	 * At the same time we must not free the assigned resources when
2187 	 * this fails, as the ultravisor has still access to that memory.
2188 	 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2189 	 * behind.
2190 	 * We want to return the first failure rc and rrc, though.
2191 	 */
2192 	kvm_for_each_vcpu(i, vcpu, kvm) {
2193 		mutex_lock(&vcpu->mutex);
2194 		if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2195 			*rcp = rc;
2196 			*rrcp = rrc;
2197 			ret = -EIO;
2198 		}
2199 		mutex_unlock(&vcpu->mutex);
2200 	}
2201 	return ret;
2202 }
2203 
kvm_s390_cpus_to_pv(struct kvm * kvm,u16 * rc,u16 * rrc)2204 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2205 {
2206 	int i, r = 0;
2207 	u16 dummy;
2208 
2209 	struct kvm_vcpu *vcpu;
2210 
2211 	kvm_for_each_vcpu(i, vcpu, kvm) {
2212 		mutex_lock(&vcpu->mutex);
2213 		r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2214 		mutex_unlock(&vcpu->mutex);
2215 		if (r)
2216 			break;
2217 	}
2218 	if (r)
2219 		kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2220 	return r;
2221 }
2222 
kvm_s390_handle_pv(struct kvm * kvm,struct kvm_pv_cmd * cmd)2223 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2224 {
2225 	int r = 0;
2226 	u16 dummy;
2227 	void __user *argp = (void __user *)cmd->data;
2228 
2229 	switch (cmd->cmd) {
2230 	case KVM_PV_ENABLE: {
2231 		r = -EINVAL;
2232 		if (kvm_s390_pv_is_protected(kvm))
2233 			break;
2234 
2235 		/*
2236 		 *  FMT 4 SIE needs esca. As we never switch back to bsca from
2237 		 *  esca, we need no cleanup in the error cases below
2238 		 */
2239 		r = sca_switch_to_extended(kvm);
2240 		if (r)
2241 			break;
2242 
2243 		mmap_write_lock(current->mm);
2244 		r = gmap_mark_unmergeable();
2245 		mmap_write_unlock(current->mm);
2246 		if (r)
2247 			break;
2248 
2249 		r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2250 		if (r)
2251 			break;
2252 
2253 		r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2254 		if (r)
2255 			kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2256 
2257 		/* we need to block service interrupts from now on */
2258 		set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2259 		break;
2260 	}
2261 	case KVM_PV_DISABLE: {
2262 		r = -EINVAL;
2263 		if (!kvm_s390_pv_is_protected(kvm))
2264 			break;
2265 
2266 		r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2267 		/*
2268 		 * If a CPU could not be destroyed, destroy VM will also fail.
2269 		 * There is no point in trying to destroy it. Instead return
2270 		 * the rc and rrc from the first CPU that failed destroying.
2271 		 */
2272 		if (r)
2273 			break;
2274 		r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2275 
2276 		/* no need to block service interrupts any more */
2277 		clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2278 		break;
2279 	}
2280 	case KVM_PV_SET_SEC_PARMS: {
2281 		struct kvm_s390_pv_sec_parm parms = {};
2282 		void *hdr;
2283 
2284 		r = -EINVAL;
2285 		if (!kvm_s390_pv_is_protected(kvm))
2286 			break;
2287 
2288 		r = -EFAULT;
2289 		if (copy_from_user(&parms, argp, sizeof(parms)))
2290 			break;
2291 
2292 		/* Currently restricted to 8KB */
2293 		r = -EINVAL;
2294 		if (parms.length > PAGE_SIZE * 2)
2295 			break;
2296 
2297 		r = -ENOMEM;
2298 		hdr = vmalloc(parms.length);
2299 		if (!hdr)
2300 			break;
2301 
2302 		r = -EFAULT;
2303 		if (!copy_from_user(hdr, (void __user *)parms.origin,
2304 				    parms.length))
2305 			r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2306 						      &cmd->rc, &cmd->rrc);
2307 
2308 		vfree(hdr);
2309 		break;
2310 	}
2311 	case KVM_PV_UNPACK: {
2312 		struct kvm_s390_pv_unp unp = {};
2313 
2314 		r = -EINVAL;
2315 		if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2316 			break;
2317 
2318 		r = -EFAULT;
2319 		if (copy_from_user(&unp, argp, sizeof(unp)))
2320 			break;
2321 
2322 		r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2323 				       &cmd->rc, &cmd->rrc);
2324 		break;
2325 	}
2326 	case KVM_PV_VERIFY: {
2327 		r = -EINVAL;
2328 		if (!kvm_s390_pv_is_protected(kvm))
2329 			break;
2330 
2331 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2332 				  UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2333 		KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2334 			     cmd->rrc);
2335 		break;
2336 	}
2337 	case KVM_PV_PREP_RESET: {
2338 		r = -EINVAL;
2339 		if (!kvm_s390_pv_is_protected(kvm))
2340 			break;
2341 
2342 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2343 				  UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2344 		KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2345 			     cmd->rc, cmd->rrc);
2346 		break;
2347 	}
2348 	case KVM_PV_UNSHARE_ALL: {
2349 		r = -EINVAL;
2350 		if (!kvm_s390_pv_is_protected(kvm))
2351 			break;
2352 
2353 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2354 				  UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2355 		KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2356 			     cmd->rc, cmd->rrc);
2357 		break;
2358 	}
2359 	default:
2360 		r = -ENOTTY;
2361 	}
2362 	return r;
2363 }
2364 
kvm_arch_vm_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)2365 long kvm_arch_vm_ioctl(struct file *filp,
2366 		       unsigned int ioctl, unsigned long arg)
2367 {
2368 	struct kvm *kvm = filp->private_data;
2369 	void __user *argp = (void __user *)arg;
2370 	struct kvm_device_attr attr;
2371 	int r;
2372 
2373 	switch (ioctl) {
2374 	case KVM_S390_INTERRUPT: {
2375 		struct kvm_s390_interrupt s390int;
2376 
2377 		r = -EFAULT;
2378 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
2379 			break;
2380 		r = kvm_s390_inject_vm(kvm, &s390int);
2381 		break;
2382 	}
2383 	case KVM_CREATE_IRQCHIP: {
2384 		struct kvm_irq_routing_entry routing;
2385 
2386 		r = -EINVAL;
2387 		if (kvm->arch.use_irqchip) {
2388 			/* Set up dummy routing. */
2389 			memset(&routing, 0, sizeof(routing));
2390 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2391 		}
2392 		break;
2393 	}
2394 	case KVM_SET_DEVICE_ATTR: {
2395 		r = -EFAULT;
2396 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2397 			break;
2398 		r = kvm_s390_vm_set_attr(kvm, &attr);
2399 		break;
2400 	}
2401 	case KVM_GET_DEVICE_ATTR: {
2402 		r = -EFAULT;
2403 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2404 			break;
2405 		r = kvm_s390_vm_get_attr(kvm, &attr);
2406 		break;
2407 	}
2408 	case KVM_HAS_DEVICE_ATTR: {
2409 		r = -EFAULT;
2410 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2411 			break;
2412 		r = kvm_s390_vm_has_attr(kvm, &attr);
2413 		break;
2414 	}
2415 	case KVM_S390_GET_SKEYS: {
2416 		struct kvm_s390_skeys args;
2417 
2418 		r = -EFAULT;
2419 		if (copy_from_user(&args, argp,
2420 				   sizeof(struct kvm_s390_skeys)))
2421 			break;
2422 		r = kvm_s390_get_skeys(kvm, &args);
2423 		break;
2424 	}
2425 	case KVM_S390_SET_SKEYS: {
2426 		struct kvm_s390_skeys args;
2427 
2428 		r = -EFAULT;
2429 		if (copy_from_user(&args, argp,
2430 				   sizeof(struct kvm_s390_skeys)))
2431 			break;
2432 		r = kvm_s390_set_skeys(kvm, &args);
2433 		break;
2434 	}
2435 	case KVM_S390_GET_CMMA_BITS: {
2436 		struct kvm_s390_cmma_log args;
2437 
2438 		r = -EFAULT;
2439 		if (copy_from_user(&args, argp, sizeof(args)))
2440 			break;
2441 		mutex_lock(&kvm->slots_lock);
2442 		r = kvm_s390_get_cmma_bits(kvm, &args);
2443 		mutex_unlock(&kvm->slots_lock);
2444 		if (!r) {
2445 			r = copy_to_user(argp, &args, sizeof(args));
2446 			if (r)
2447 				r = -EFAULT;
2448 		}
2449 		break;
2450 	}
2451 	case KVM_S390_SET_CMMA_BITS: {
2452 		struct kvm_s390_cmma_log args;
2453 
2454 		r = -EFAULT;
2455 		if (copy_from_user(&args, argp, sizeof(args)))
2456 			break;
2457 		mutex_lock(&kvm->slots_lock);
2458 		r = kvm_s390_set_cmma_bits(kvm, &args);
2459 		mutex_unlock(&kvm->slots_lock);
2460 		break;
2461 	}
2462 	case KVM_S390_PV_COMMAND: {
2463 		struct kvm_pv_cmd args;
2464 
2465 		/* protvirt means user sigp */
2466 		kvm->arch.user_cpu_state_ctrl = 1;
2467 		r = 0;
2468 		if (!is_prot_virt_host()) {
2469 			r = -EINVAL;
2470 			break;
2471 		}
2472 		if (copy_from_user(&args, argp, sizeof(args))) {
2473 			r = -EFAULT;
2474 			break;
2475 		}
2476 		if (args.flags) {
2477 			r = -EINVAL;
2478 			break;
2479 		}
2480 		mutex_lock(&kvm->lock);
2481 		r = kvm_s390_handle_pv(kvm, &args);
2482 		mutex_unlock(&kvm->lock);
2483 		if (copy_to_user(argp, &args, sizeof(args))) {
2484 			r = -EFAULT;
2485 			break;
2486 		}
2487 		break;
2488 	}
2489 	default:
2490 		r = -ENOTTY;
2491 	}
2492 
2493 	return r;
2494 }
2495 
kvm_s390_apxa_installed(void)2496 static int kvm_s390_apxa_installed(void)
2497 {
2498 	struct ap_config_info info;
2499 
2500 	if (ap_instructions_available()) {
2501 		if (ap_qci(&info) == 0)
2502 			return info.apxa;
2503 	}
2504 
2505 	return 0;
2506 }
2507 
2508 /*
2509  * The format of the crypto control block (CRYCB) is specified in the 3 low
2510  * order bits of the CRYCB designation (CRYCBD) field as follows:
2511  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2512  *	     AP extended addressing (APXA) facility are installed.
2513  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2514  * Format 2: Both the APXA and MSAX3 facilities are installed
2515  */
kvm_s390_set_crycb_format(struct kvm * kvm)2516 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2517 {
2518 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2519 
2520 	/* Clear the CRYCB format bits - i.e., set format 0 by default */
2521 	kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2522 
2523 	/* Check whether MSAX3 is installed */
2524 	if (!test_kvm_facility(kvm, 76))
2525 		return;
2526 
2527 	if (kvm_s390_apxa_installed())
2528 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2529 	else
2530 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2531 }
2532 
kvm_arch_crypto_set_masks(struct kvm * kvm,unsigned long * apm,unsigned long * aqm,unsigned long * adm)2533 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2534 			       unsigned long *aqm, unsigned long *adm)
2535 {
2536 	struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2537 
2538 	mutex_lock(&kvm->lock);
2539 	kvm_s390_vcpu_block_all(kvm);
2540 
2541 	switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2542 	case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2543 		memcpy(crycb->apcb1.apm, apm, 32);
2544 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2545 			 apm[0], apm[1], apm[2], apm[3]);
2546 		memcpy(crycb->apcb1.aqm, aqm, 32);
2547 		VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2548 			 aqm[0], aqm[1], aqm[2], aqm[3]);
2549 		memcpy(crycb->apcb1.adm, adm, 32);
2550 		VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2551 			 adm[0], adm[1], adm[2], adm[3]);
2552 		break;
2553 	case CRYCB_FORMAT1:
2554 	case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2555 		memcpy(crycb->apcb0.apm, apm, 8);
2556 		memcpy(crycb->apcb0.aqm, aqm, 2);
2557 		memcpy(crycb->apcb0.adm, adm, 2);
2558 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2559 			 apm[0], *((unsigned short *)aqm),
2560 			 *((unsigned short *)adm));
2561 		break;
2562 	default:	/* Can not happen */
2563 		break;
2564 	}
2565 
2566 	/* recreate the shadow crycb for each vcpu */
2567 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2568 	kvm_s390_vcpu_unblock_all(kvm);
2569 	mutex_unlock(&kvm->lock);
2570 }
2571 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2572 
kvm_arch_crypto_clear_masks(struct kvm * kvm)2573 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2574 {
2575 	mutex_lock(&kvm->lock);
2576 	kvm_s390_vcpu_block_all(kvm);
2577 
2578 	memset(&kvm->arch.crypto.crycb->apcb0, 0,
2579 	       sizeof(kvm->arch.crypto.crycb->apcb0));
2580 	memset(&kvm->arch.crypto.crycb->apcb1, 0,
2581 	       sizeof(kvm->arch.crypto.crycb->apcb1));
2582 
2583 	VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2584 	/* recreate the shadow crycb for each vcpu */
2585 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2586 	kvm_s390_vcpu_unblock_all(kvm);
2587 	mutex_unlock(&kvm->lock);
2588 }
2589 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2590 
kvm_s390_get_initial_cpuid(void)2591 static u64 kvm_s390_get_initial_cpuid(void)
2592 {
2593 	struct cpuid cpuid;
2594 
2595 	get_cpu_id(&cpuid);
2596 	cpuid.version = 0xff;
2597 	return *((u64 *) &cpuid);
2598 }
2599 
kvm_s390_crypto_init(struct kvm * kvm)2600 static void kvm_s390_crypto_init(struct kvm *kvm)
2601 {
2602 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2603 	kvm_s390_set_crycb_format(kvm);
2604 
2605 	if (!test_kvm_facility(kvm, 76))
2606 		return;
2607 
2608 	/* Enable AES/DEA protected key functions by default */
2609 	kvm->arch.crypto.aes_kw = 1;
2610 	kvm->arch.crypto.dea_kw = 1;
2611 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2612 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2613 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2614 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2615 }
2616 
sca_dispose(struct kvm * kvm)2617 static void sca_dispose(struct kvm *kvm)
2618 {
2619 	if (kvm->arch.use_esca)
2620 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2621 	else
2622 		free_page((unsigned long)(kvm->arch.sca));
2623 	kvm->arch.sca = NULL;
2624 }
2625 
kvm_arch_init_vm(struct kvm * kvm,unsigned long type)2626 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2627 {
2628 	gfp_t alloc_flags = GFP_KERNEL;
2629 	int i, rc;
2630 	char debug_name[16];
2631 	static unsigned long sca_offset;
2632 
2633 	rc = -EINVAL;
2634 #ifdef CONFIG_KVM_S390_UCONTROL
2635 	if (type & ~KVM_VM_S390_UCONTROL)
2636 		goto out_err;
2637 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2638 		goto out_err;
2639 #else
2640 	if (type)
2641 		goto out_err;
2642 #endif
2643 
2644 	rc = s390_enable_sie();
2645 	if (rc)
2646 		goto out_err;
2647 
2648 	rc = -ENOMEM;
2649 
2650 	if (!sclp.has_64bscao)
2651 		alloc_flags |= GFP_DMA;
2652 	rwlock_init(&kvm->arch.sca_lock);
2653 	/* start with basic SCA */
2654 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2655 	if (!kvm->arch.sca)
2656 		goto out_err;
2657 	mutex_lock(&kvm_lock);
2658 	sca_offset += 16;
2659 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2660 		sca_offset = 0;
2661 	kvm->arch.sca = (struct bsca_block *)
2662 			((char *) kvm->arch.sca + sca_offset);
2663 	mutex_unlock(&kvm_lock);
2664 
2665 	sprintf(debug_name, "kvm-%u", current->pid);
2666 
2667 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2668 	if (!kvm->arch.dbf)
2669 		goto out_err;
2670 
2671 	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2672 	kvm->arch.sie_page2 =
2673 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2674 	if (!kvm->arch.sie_page2)
2675 		goto out_err;
2676 
2677 	kvm->arch.sie_page2->kvm = kvm;
2678 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2679 
2680 	for (i = 0; i < kvm_s390_fac_size(); i++) {
2681 		kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2682 					      (kvm_s390_fac_base[i] |
2683 					       kvm_s390_fac_ext[i]);
2684 		kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2685 					      kvm_s390_fac_base[i];
2686 	}
2687 	kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2688 
2689 	/* we are always in czam mode - even on pre z14 machines */
2690 	set_kvm_facility(kvm->arch.model.fac_mask, 138);
2691 	set_kvm_facility(kvm->arch.model.fac_list, 138);
2692 	/* we emulate STHYI in kvm */
2693 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
2694 	set_kvm_facility(kvm->arch.model.fac_list, 74);
2695 	if (MACHINE_HAS_TLB_GUEST) {
2696 		set_kvm_facility(kvm->arch.model.fac_mask, 147);
2697 		set_kvm_facility(kvm->arch.model.fac_list, 147);
2698 	}
2699 
2700 	if (css_general_characteristics.aiv && test_facility(65))
2701 		set_kvm_facility(kvm->arch.model.fac_mask, 65);
2702 
2703 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2704 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2705 
2706 	kvm_s390_crypto_init(kvm);
2707 
2708 	mutex_init(&kvm->arch.float_int.ais_lock);
2709 	spin_lock_init(&kvm->arch.float_int.lock);
2710 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
2711 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2712 	init_waitqueue_head(&kvm->arch.ipte_wq);
2713 	mutex_init(&kvm->arch.ipte_mutex);
2714 
2715 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2716 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
2717 
2718 	if (type & KVM_VM_S390_UCONTROL) {
2719 		kvm->arch.gmap = NULL;
2720 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2721 	} else {
2722 		if (sclp.hamax == U64_MAX)
2723 			kvm->arch.mem_limit = TASK_SIZE_MAX;
2724 		else
2725 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2726 						    sclp.hamax + 1);
2727 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2728 		if (!kvm->arch.gmap)
2729 			goto out_err;
2730 		kvm->arch.gmap->private = kvm;
2731 		kvm->arch.gmap->pfault_enabled = 0;
2732 	}
2733 
2734 	kvm->arch.use_pfmfi = sclp.has_pfmfi;
2735 	kvm->arch.use_skf = sclp.has_skey;
2736 	spin_lock_init(&kvm->arch.start_stop_lock);
2737 	kvm_s390_vsie_init(kvm);
2738 	if (use_gisa)
2739 		kvm_s390_gisa_init(kvm);
2740 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2741 
2742 	return 0;
2743 out_err:
2744 	free_page((unsigned long)kvm->arch.sie_page2);
2745 	debug_unregister(kvm->arch.dbf);
2746 	sca_dispose(kvm);
2747 	KVM_EVENT(3, "creation of vm failed: %d", rc);
2748 	return rc;
2749 }
2750 
kvm_arch_vcpu_destroy(struct kvm_vcpu * vcpu)2751 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2752 {
2753 	u16 rc, rrc;
2754 
2755 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2756 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2757 	kvm_s390_clear_local_irqs(vcpu);
2758 	kvm_clear_async_pf_completion_queue(vcpu);
2759 	if (!kvm_is_ucontrol(vcpu->kvm))
2760 		sca_del_vcpu(vcpu);
2761 
2762 	if (kvm_is_ucontrol(vcpu->kvm))
2763 		gmap_remove(vcpu->arch.gmap);
2764 
2765 	if (vcpu->kvm->arch.use_cmma)
2766 		kvm_s390_vcpu_unsetup_cmma(vcpu);
2767 	/* We can not hold the vcpu mutex here, we are already dying */
2768 	if (kvm_s390_pv_cpu_get_handle(vcpu))
2769 		kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2770 	free_page((unsigned long)(vcpu->arch.sie_block));
2771 }
2772 
kvm_free_vcpus(struct kvm * kvm)2773 static void kvm_free_vcpus(struct kvm *kvm)
2774 {
2775 	unsigned int i;
2776 	struct kvm_vcpu *vcpu;
2777 
2778 	kvm_for_each_vcpu(i, vcpu, kvm)
2779 		kvm_vcpu_destroy(vcpu);
2780 
2781 	mutex_lock(&kvm->lock);
2782 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2783 		kvm->vcpus[i] = NULL;
2784 
2785 	atomic_set(&kvm->online_vcpus, 0);
2786 	mutex_unlock(&kvm->lock);
2787 }
2788 
kvm_arch_destroy_vm(struct kvm * kvm)2789 void kvm_arch_destroy_vm(struct kvm *kvm)
2790 {
2791 	u16 rc, rrc;
2792 
2793 	kvm_free_vcpus(kvm);
2794 	sca_dispose(kvm);
2795 	kvm_s390_gisa_destroy(kvm);
2796 	/*
2797 	 * We are already at the end of life and kvm->lock is not taken.
2798 	 * This is ok as the file descriptor is closed by now and nobody
2799 	 * can mess with the pv state. To avoid lockdep_assert_held from
2800 	 * complaining we do not use kvm_s390_pv_is_protected.
2801 	 */
2802 	if (kvm_s390_pv_get_handle(kvm))
2803 		kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2804 	debug_unregister(kvm->arch.dbf);
2805 	free_page((unsigned long)kvm->arch.sie_page2);
2806 	if (!kvm_is_ucontrol(kvm))
2807 		gmap_remove(kvm->arch.gmap);
2808 	kvm_s390_destroy_adapters(kvm);
2809 	kvm_s390_clear_float_irqs(kvm);
2810 	kvm_s390_vsie_destroy(kvm);
2811 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2812 }
2813 
2814 /* Section: vcpu related */
__kvm_ucontrol_vcpu_init(struct kvm_vcpu * vcpu)2815 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2816 {
2817 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2818 	if (!vcpu->arch.gmap)
2819 		return -ENOMEM;
2820 	vcpu->arch.gmap->private = vcpu->kvm;
2821 
2822 	return 0;
2823 }
2824 
sca_del_vcpu(struct kvm_vcpu * vcpu)2825 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2826 {
2827 	if (!kvm_s390_use_sca_entries())
2828 		return;
2829 	read_lock(&vcpu->kvm->arch.sca_lock);
2830 	if (vcpu->kvm->arch.use_esca) {
2831 		struct esca_block *sca = vcpu->kvm->arch.sca;
2832 
2833 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2834 		sca->cpu[vcpu->vcpu_id].sda = 0;
2835 	} else {
2836 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2837 
2838 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2839 		sca->cpu[vcpu->vcpu_id].sda = 0;
2840 	}
2841 	read_unlock(&vcpu->kvm->arch.sca_lock);
2842 }
2843 
sca_add_vcpu(struct kvm_vcpu * vcpu)2844 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2845 {
2846 	if (!kvm_s390_use_sca_entries()) {
2847 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2848 
2849 		/* we still need the basic sca for the ipte control */
2850 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2851 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2852 		return;
2853 	}
2854 	read_lock(&vcpu->kvm->arch.sca_lock);
2855 	if (vcpu->kvm->arch.use_esca) {
2856 		struct esca_block *sca = vcpu->kvm->arch.sca;
2857 
2858 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2859 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2860 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2861 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2862 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2863 	} else {
2864 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2865 
2866 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2867 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2868 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2869 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2870 	}
2871 	read_unlock(&vcpu->kvm->arch.sca_lock);
2872 }
2873 
2874 /* Basic SCA to Extended SCA data copy routines */
sca_copy_entry(struct esca_entry * d,struct bsca_entry * s)2875 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2876 {
2877 	d->sda = s->sda;
2878 	d->sigp_ctrl.c = s->sigp_ctrl.c;
2879 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2880 }
2881 
sca_copy_b_to_e(struct esca_block * d,struct bsca_block * s)2882 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2883 {
2884 	int i;
2885 
2886 	d->ipte_control = s->ipte_control;
2887 	d->mcn[0] = s->mcn;
2888 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2889 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2890 }
2891 
sca_switch_to_extended(struct kvm * kvm)2892 static int sca_switch_to_extended(struct kvm *kvm)
2893 {
2894 	struct bsca_block *old_sca = kvm->arch.sca;
2895 	struct esca_block *new_sca;
2896 	struct kvm_vcpu *vcpu;
2897 	unsigned int vcpu_idx;
2898 	u32 scaol, scaoh;
2899 
2900 	if (kvm->arch.use_esca)
2901 		return 0;
2902 
2903 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2904 	if (!new_sca)
2905 		return -ENOMEM;
2906 
2907 	scaoh = (u32)((u64)(new_sca) >> 32);
2908 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
2909 
2910 	kvm_s390_vcpu_block_all(kvm);
2911 	write_lock(&kvm->arch.sca_lock);
2912 
2913 	sca_copy_b_to_e(new_sca, old_sca);
2914 
2915 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2916 		vcpu->arch.sie_block->scaoh = scaoh;
2917 		vcpu->arch.sie_block->scaol = scaol;
2918 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2919 	}
2920 	kvm->arch.sca = new_sca;
2921 	kvm->arch.use_esca = 1;
2922 
2923 	write_unlock(&kvm->arch.sca_lock);
2924 	kvm_s390_vcpu_unblock_all(kvm);
2925 
2926 	free_page((unsigned long)old_sca);
2927 
2928 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2929 		 old_sca, kvm->arch.sca);
2930 	return 0;
2931 }
2932 
sca_can_add_vcpu(struct kvm * kvm,unsigned int id)2933 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2934 {
2935 	int rc;
2936 
2937 	if (!kvm_s390_use_sca_entries()) {
2938 		if (id < KVM_MAX_VCPUS)
2939 			return true;
2940 		return false;
2941 	}
2942 	if (id < KVM_S390_BSCA_CPU_SLOTS)
2943 		return true;
2944 	if (!sclp.has_esca || !sclp.has_64bscao)
2945 		return false;
2946 
2947 	mutex_lock(&kvm->lock);
2948 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2949 	mutex_unlock(&kvm->lock);
2950 
2951 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2952 }
2953 
2954 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__start_cpu_timer_accounting(struct kvm_vcpu * vcpu)2955 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2956 {
2957 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2958 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2959 	vcpu->arch.cputm_start = get_tod_clock_fast();
2960 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2961 }
2962 
2963 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__stop_cpu_timer_accounting(struct kvm_vcpu * vcpu)2964 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2965 {
2966 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2967 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2968 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2969 	vcpu->arch.cputm_start = 0;
2970 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2971 }
2972 
2973 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__enable_cpu_timer_accounting(struct kvm_vcpu * vcpu)2974 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2975 {
2976 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2977 	vcpu->arch.cputm_enabled = true;
2978 	__start_cpu_timer_accounting(vcpu);
2979 }
2980 
2981 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__disable_cpu_timer_accounting(struct kvm_vcpu * vcpu)2982 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2983 {
2984 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2985 	__stop_cpu_timer_accounting(vcpu);
2986 	vcpu->arch.cputm_enabled = false;
2987 }
2988 
enable_cpu_timer_accounting(struct kvm_vcpu * vcpu)2989 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2990 {
2991 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2992 	__enable_cpu_timer_accounting(vcpu);
2993 	preempt_enable();
2994 }
2995 
disable_cpu_timer_accounting(struct kvm_vcpu * vcpu)2996 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2997 {
2998 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2999 	__disable_cpu_timer_accounting(vcpu);
3000 	preempt_enable();
3001 }
3002 
3003 /* set the cpu timer - may only be called from the VCPU thread itself */
kvm_s390_set_cpu_timer(struct kvm_vcpu * vcpu,__u64 cputm)3004 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3005 {
3006 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3007 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3008 	if (vcpu->arch.cputm_enabled)
3009 		vcpu->arch.cputm_start = get_tod_clock_fast();
3010 	vcpu->arch.sie_block->cputm = cputm;
3011 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3012 	preempt_enable();
3013 }
3014 
3015 /* update and get the cpu timer - can also be called from other VCPU threads */
kvm_s390_get_cpu_timer(struct kvm_vcpu * vcpu)3016 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3017 {
3018 	unsigned int seq;
3019 	__u64 value;
3020 
3021 	if (unlikely(!vcpu->arch.cputm_enabled))
3022 		return vcpu->arch.sie_block->cputm;
3023 
3024 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3025 	do {
3026 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3027 		/*
3028 		 * If the writer would ever execute a read in the critical
3029 		 * section, e.g. in irq context, we have a deadlock.
3030 		 */
3031 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3032 		value = vcpu->arch.sie_block->cputm;
3033 		/* if cputm_start is 0, accounting is being started/stopped */
3034 		if (likely(vcpu->arch.cputm_start))
3035 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3036 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3037 	preempt_enable();
3038 	return value;
3039 }
3040 
kvm_arch_vcpu_load(struct kvm_vcpu * vcpu,int cpu)3041 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3042 {
3043 
3044 	gmap_enable(vcpu->arch.enabled_gmap);
3045 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3046 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3047 		__start_cpu_timer_accounting(vcpu);
3048 	vcpu->cpu = cpu;
3049 }
3050 
kvm_arch_vcpu_put(struct kvm_vcpu * vcpu)3051 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3052 {
3053 	vcpu->cpu = -1;
3054 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3055 		__stop_cpu_timer_accounting(vcpu);
3056 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3057 	vcpu->arch.enabled_gmap = gmap_get_enabled();
3058 	gmap_disable(vcpu->arch.enabled_gmap);
3059 
3060 }
3061 
kvm_arch_vcpu_postcreate(struct kvm_vcpu * vcpu)3062 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3063 {
3064 	mutex_lock(&vcpu->kvm->lock);
3065 	preempt_disable();
3066 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3067 	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3068 	preempt_enable();
3069 	mutex_unlock(&vcpu->kvm->lock);
3070 	if (!kvm_is_ucontrol(vcpu->kvm)) {
3071 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3072 		sca_add_vcpu(vcpu);
3073 	}
3074 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3075 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3076 	/* make vcpu_load load the right gmap on the first trigger */
3077 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3078 }
3079 
kvm_has_pckmo_subfunc(struct kvm * kvm,unsigned long nr)3080 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3081 {
3082 	if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3083 	    test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3084 		return true;
3085 	return false;
3086 }
3087 
kvm_has_pckmo_ecc(struct kvm * kvm)3088 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3089 {
3090 	/* At least one ECC subfunction must be present */
3091 	return kvm_has_pckmo_subfunc(kvm, 32) ||
3092 	       kvm_has_pckmo_subfunc(kvm, 33) ||
3093 	       kvm_has_pckmo_subfunc(kvm, 34) ||
3094 	       kvm_has_pckmo_subfunc(kvm, 40) ||
3095 	       kvm_has_pckmo_subfunc(kvm, 41);
3096 
3097 }
3098 
kvm_s390_vcpu_crypto_setup(struct kvm_vcpu * vcpu)3099 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3100 {
3101 	/*
3102 	 * If the AP instructions are not being interpreted and the MSAX3
3103 	 * facility is not configured for the guest, there is nothing to set up.
3104 	 */
3105 	if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3106 		return;
3107 
3108 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3109 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3110 	vcpu->arch.sie_block->eca &= ~ECA_APIE;
3111 	vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3112 
3113 	if (vcpu->kvm->arch.crypto.apie)
3114 		vcpu->arch.sie_block->eca |= ECA_APIE;
3115 
3116 	/* Set up protected key support */
3117 	if (vcpu->kvm->arch.crypto.aes_kw) {
3118 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3119 		/* ecc is also wrapped with AES key */
3120 		if (kvm_has_pckmo_ecc(vcpu->kvm))
3121 			vcpu->arch.sie_block->ecd |= ECD_ECC;
3122 	}
3123 
3124 	if (vcpu->kvm->arch.crypto.dea_kw)
3125 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3126 }
3127 
kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu * vcpu)3128 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3129 {
3130 	free_page(vcpu->arch.sie_block->cbrlo);
3131 	vcpu->arch.sie_block->cbrlo = 0;
3132 }
3133 
kvm_s390_vcpu_setup_cmma(struct kvm_vcpu * vcpu)3134 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3135 {
3136 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
3137 	if (!vcpu->arch.sie_block->cbrlo)
3138 		return -ENOMEM;
3139 	return 0;
3140 }
3141 
kvm_s390_vcpu_setup_model(struct kvm_vcpu * vcpu)3142 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3143 {
3144 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3145 
3146 	vcpu->arch.sie_block->ibc = model->ibc;
3147 	if (test_kvm_facility(vcpu->kvm, 7))
3148 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3149 }
3150 
kvm_s390_vcpu_setup(struct kvm_vcpu * vcpu)3151 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3152 {
3153 	int rc = 0;
3154 	u16 uvrc, uvrrc;
3155 
3156 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3157 						    CPUSTAT_SM |
3158 						    CPUSTAT_STOPPED);
3159 
3160 	if (test_kvm_facility(vcpu->kvm, 78))
3161 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3162 	else if (test_kvm_facility(vcpu->kvm, 8))
3163 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3164 
3165 	kvm_s390_vcpu_setup_model(vcpu);
3166 
3167 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3168 	if (MACHINE_HAS_ESOP)
3169 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3170 	if (test_kvm_facility(vcpu->kvm, 9))
3171 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
3172 	if (test_kvm_facility(vcpu->kvm, 73))
3173 		vcpu->arch.sie_block->ecb |= ECB_TE;
3174 
3175 	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3176 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3177 	if (test_kvm_facility(vcpu->kvm, 130))
3178 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3179 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3180 	if (sclp.has_cei)
3181 		vcpu->arch.sie_block->eca |= ECA_CEI;
3182 	if (sclp.has_ib)
3183 		vcpu->arch.sie_block->eca |= ECA_IB;
3184 	if (sclp.has_siif)
3185 		vcpu->arch.sie_block->eca |= ECA_SII;
3186 	if (sclp.has_sigpif)
3187 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
3188 	if (test_kvm_facility(vcpu->kvm, 129)) {
3189 		vcpu->arch.sie_block->eca |= ECA_VX;
3190 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3191 	}
3192 	if (test_kvm_facility(vcpu->kvm, 139))
3193 		vcpu->arch.sie_block->ecd |= ECD_MEF;
3194 	if (test_kvm_facility(vcpu->kvm, 156))
3195 		vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3196 	if (vcpu->arch.sie_block->gd) {
3197 		vcpu->arch.sie_block->eca |= ECA_AIV;
3198 		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3199 			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3200 	}
3201 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3202 					| SDNXC;
3203 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3204 
3205 	if (sclp.has_kss)
3206 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3207 	else
3208 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3209 
3210 	if (vcpu->kvm->arch.use_cmma) {
3211 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
3212 		if (rc)
3213 			return rc;
3214 	}
3215 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3216 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3217 
3218 	vcpu->arch.sie_block->hpid = HPID_KVM;
3219 
3220 	kvm_s390_vcpu_crypto_setup(vcpu);
3221 
3222 	mutex_lock(&vcpu->kvm->lock);
3223 	if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3224 		rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3225 		if (rc)
3226 			kvm_s390_vcpu_unsetup_cmma(vcpu);
3227 	}
3228 	mutex_unlock(&vcpu->kvm->lock);
3229 
3230 	return rc;
3231 }
3232 
kvm_arch_vcpu_precreate(struct kvm * kvm,unsigned int id)3233 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3234 {
3235 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3236 		return -EINVAL;
3237 	return 0;
3238 }
3239 
kvm_arch_vcpu_create(struct kvm_vcpu * vcpu)3240 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3241 {
3242 	struct sie_page *sie_page;
3243 	int rc;
3244 
3245 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3246 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
3247 	if (!sie_page)
3248 		return -ENOMEM;
3249 
3250 	vcpu->arch.sie_block = &sie_page->sie_block;
3251 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3252 
3253 	/* the real guest size will always be smaller than msl */
3254 	vcpu->arch.sie_block->mso = 0;
3255 	vcpu->arch.sie_block->msl = sclp.hamax;
3256 
3257 	vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3258 	spin_lock_init(&vcpu->arch.local_int.lock);
3259 	vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3260 	if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3261 		vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3262 	seqcount_init(&vcpu->arch.cputm_seqcount);
3263 
3264 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3265 	kvm_clear_async_pf_completion_queue(vcpu);
3266 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3267 				    KVM_SYNC_GPRS |
3268 				    KVM_SYNC_ACRS |
3269 				    KVM_SYNC_CRS |
3270 				    KVM_SYNC_ARCH0 |
3271 				    KVM_SYNC_PFAULT |
3272 				    KVM_SYNC_DIAG318;
3273 	kvm_s390_set_prefix(vcpu, 0);
3274 	if (test_kvm_facility(vcpu->kvm, 64))
3275 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3276 	if (test_kvm_facility(vcpu->kvm, 82))
3277 		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3278 	if (test_kvm_facility(vcpu->kvm, 133))
3279 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3280 	if (test_kvm_facility(vcpu->kvm, 156))
3281 		vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3282 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
3283 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3284 	 */
3285 	if (MACHINE_HAS_VX)
3286 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3287 	else
3288 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3289 
3290 	if (kvm_is_ucontrol(vcpu->kvm)) {
3291 		rc = __kvm_ucontrol_vcpu_init(vcpu);
3292 		if (rc)
3293 			goto out_free_sie_block;
3294 	}
3295 
3296 	VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3297 		 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3298 	trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3299 
3300 	rc = kvm_s390_vcpu_setup(vcpu);
3301 	if (rc)
3302 		goto out_ucontrol_uninit;
3303 	return 0;
3304 
3305 out_ucontrol_uninit:
3306 	if (kvm_is_ucontrol(vcpu->kvm))
3307 		gmap_remove(vcpu->arch.gmap);
3308 out_free_sie_block:
3309 	free_page((unsigned long)(vcpu->arch.sie_block));
3310 	return rc;
3311 }
3312 
kvm_arch_vcpu_runnable(struct kvm_vcpu * vcpu)3313 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3314 {
3315 	clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
3316 	return kvm_s390_vcpu_has_irq(vcpu, 0);
3317 }
3318 
kvm_arch_vcpu_in_kernel(struct kvm_vcpu * vcpu)3319 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3320 {
3321 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3322 }
3323 
kvm_s390_vcpu_block(struct kvm_vcpu * vcpu)3324 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3325 {
3326 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3327 	exit_sie(vcpu);
3328 }
3329 
kvm_s390_vcpu_unblock(struct kvm_vcpu * vcpu)3330 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3331 {
3332 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3333 }
3334 
kvm_s390_vcpu_request(struct kvm_vcpu * vcpu)3335 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3336 {
3337 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3338 	exit_sie(vcpu);
3339 }
3340 
kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu * vcpu)3341 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3342 {
3343 	return atomic_read(&vcpu->arch.sie_block->prog20) &
3344 	       (PROG_BLOCK_SIE | PROG_REQUEST);
3345 }
3346 
kvm_s390_vcpu_request_handled(struct kvm_vcpu * vcpu)3347 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3348 {
3349 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3350 }
3351 
3352 /*
3353  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3354  * If the CPU is not running (e.g. waiting as idle) the function will
3355  * return immediately. */
exit_sie(struct kvm_vcpu * vcpu)3356 void exit_sie(struct kvm_vcpu *vcpu)
3357 {
3358 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3359 	kvm_s390_vsie_kick(vcpu);
3360 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3361 		cpu_relax();
3362 }
3363 
3364 /* Kick a guest cpu out of SIE to process a request synchronously */
kvm_s390_sync_request(int req,struct kvm_vcpu * vcpu)3365 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3366 {
3367 	kvm_make_request(req, vcpu);
3368 	kvm_s390_vcpu_request(vcpu);
3369 }
3370 
kvm_gmap_notifier(struct gmap * gmap,unsigned long start,unsigned long end)3371 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3372 			      unsigned long end)
3373 {
3374 	struct kvm *kvm = gmap->private;
3375 	struct kvm_vcpu *vcpu;
3376 	unsigned long prefix;
3377 	int i;
3378 
3379 	if (gmap_is_shadow(gmap))
3380 		return;
3381 	if (start >= 1UL << 31)
3382 		/* We are only interested in prefix pages */
3383 		return;
3384 	kvm_for_each_vcpu(i, vcpu, kvm) {
3385 		/* match against both prefix pages */
3386 		prefix = kvm_s390_get_prefix(vcpu);
3387 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3388 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3389 				   start, end);
3390 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3391 		}
3392 	}
3393 }
3394 
kvm_arch_no_poll(struct kvm_vcpu * vcpu)3395 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3396 {
3397 	/* do not poll with more than halt_poll_max_steal percent of steal time */
3398 	if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3399 	    halt_poll_max_steal) {
3400 		vcpu->stat.halt_no_poll_steal++;
3401 		return true;
3402 	}
3403 	return false;
3404 }
3405 
kvm_arch_vcpu_should_kick(struct kvm_vcpu * vcpu)3406 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3407 {
3408 	/* kvm common code refers to this, but never calls it */
3409 	BUG();
3410 	return 0;
3411 }
3412 
kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu * vcpu,struct kvm_one_reg * reg)3413 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3414 					   struct kvm_one_reg *reg)
3415 {
3416 	int r = -EINVAL;
3417 
3418 	switch (reg->id) {
3419 	case KVM_REG_S390_TODPR:
3420 		r = put_user(vcpu->arch.sie_block->todpr,
3421 			     (u32 __user *)reg->addr);
3422 		break;
3423 	case KVM_REG_S390_EPOCHDIFF:
3424 		r = put_user(vcpu->arch.sie_block->epoch,
3425 			     (u64 __user *)reg->addr);
3426 		break;
3427 	case KVM_REG_S390_CPU_TIMER:
3428 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
3429 			     (u64 __user *)reg->addr);
3430 		break;
3431 	case KVM_REG_S390_CLOCK_COMP:
3432 		r = put_user(vcpu->arch.sie_block->ckc,
3433 			     (u64 __user *)reg->addr);
3434 		break;
3435 	case KVM_REG_S390_PFTOKEN:
3436 		r = put_user(vcpu->arch.pfault_token,
3437 			     (u64 __user *)reg->addr);
3438 		break;
3439 	case KVM_REG_S390_PFCOMPARE:
3440 		r = put_user(vcpu->arch.pfault_compare,
3441 			     (u64 __user *)reg->addr);
3442 		break;
3443 	case KVM_REG_S390_PFSELECT:
3444 		r = put_user(vcpu->arch.pfault_select,
3445 			     (u64 __user *)reg->addr);
3446 		break;
3447 	case KVM_REG_S390_PP:
3448 		r = put_user(vcpu->arch.sie_block->pp,
3449 			     (u64 __user *)reg->addr);
3450 		break;
3451 	case KVM_REG_S390_GBEA:
3452 		r = put_user(vcpu->arch.sie_block->gbea,
3453 			     (u64 __user *)reg->addr);
3454 		break;
3455 	default:
3456 		break;
3457 	}
3458 
3459 	return r;
3460 }
3461 
kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu * vcpu,struct kvm_one_reg * reg)3462 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3463 					   struct kvm_one_reg *reg)
3464 {
3465 	int r = -EINVAL;
3466 	__u64 val;
3467 
3468 	switch (reg->id) {
3469 	case KVM_REG_S390_TODPR:
3470 		r = get_user(vcpu->arch.sie_block->todpr,
3471 			     (u32 __user *)reg->addr);
3472 		break;
3473 	case KVM_REG_S390_EPOCHDIFF:
3474 		r = get_user(vcpu->arch.sie_block->epoch,
3475 			     (u64 __user *)reg->addr);
3476 		break;
3477 	case KVM_REG_S390_CPU_TIMER:
3478 		r = get_user(val, (u64 __user *)reg->addr);
3479 		if (!r)
3480 			kvm_s390_set_cpu_timer(vcpu, val);
3481 		break;
3482 	case KVM_REG_S390_CLOCK_COMP:
3483 		r = get_user(vcpu->arch.sie_block->ckc,
3484 			     (u64 __user *)reg->addr);
3485 		break;
3486 	case KVM_REG_S390_PFTOKEN:
3487 		r = get_user(vcpu->arch.pfault_token,
3488 			     (u64 __user *)reg->addr);
3489 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3490 			kvm_clear_async_pf_completion_queue(vcpu);
3491 		break;
3492 	case KVM_REG_S390_PFCOMPARE:
3493 		r = get_user(vcpu->arch.pfault_compare,
3494 			     (u64 __user *)reg->addr);
3495 		break;
3496 	case KVM_REG_S390_PFSELECT:
3497 		r = get_user(vcpu->arch.pfault_select,
3498 			     (u64 __user *)reg->addr);
3499 		break;
3500 	case KVM_REG_S390_PP:
3501 		r = get_user(vcpu->arch.sie_block->pp,
3502 			     (u64 __user *)reg->addr);
3503 		break;
3504 	case KVM_REG_S390_GBEA:
3505 		r = get_user(vcpu->arch.sie_block->gbea,
3506 			     (u64 __user *)reg->addr);
3507 		break;
3508 	default:
3509 		break;
3510 	}
3511 
3512 	return r;
3513 }
3514 
kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu * vcpu)3515 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3516 {
3517 	vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3518 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3519 	memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3520 
3521 	kvm_clear_async_pf_completion_queue(vcpu);
3522 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3523 		kvm_s390_vcpu_stop(vcpu);
3524 	kvm_s390_clear_local_irqs(vcpu);
3525 }
3526 
kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu * vcpu)3527 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3528 {
3529 	/* Initial reset is a superset of the normal reset */
3530 	kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3531 
3532 	/*
3533 	 * This equals initial cpu reset in pop, but we don't switch to ESA.
3534 	 * We do not only reset the internal data, but also ...
3535 	 */
3536 	vcpu->arch.sie_block->gpsw.mask = 0;
3537 	vcpu->arch.sie_block->gpsw.addr = 0;
3538 	kvm_s390_set_prefix(vcpu, 0);
3539 	kvm_s390_set_cpu_timer(vcpu, 0);
3540 	vcpu->arch.sie_block->ckc = 0;
3541 	memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3542 	vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3543 	vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3544 
3545 	/* ... the data in sync regs */
3546 	memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3547 	vcpu->run->s.regs.ckc = 0;
3548 	vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3549 	vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3550 	vcpu->run->psw_addr = 0;
3551 	vcpu->run->psw_mask = 0;
3552 	vcpu->run->s.regs.todpr = 0;
3553 	vcpu->run->s.regs.cputm = 0;
3554 	vcpu->run->s.regs.ckc = 0;
3555 	vcpu->run->s.regs.pp = 0;
3556 	vcpu->run->s.regs.gbea = 1;
3557 	vcpu->run->s.regs.fpc = 0;
3558 	/*
3559 	 * Do not reset these registers in the protected case, as some of
3560 	 * them are overlayed and they are not accessible in this case
3561 	 * anyway.
3562 	 */
3563 	if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3564 		vcpu->arch.sie_block->gbea = 1;
3565 		vcpu->arch.sie_block->pp = 0;
3566 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3567 		vcpu->arch.sie_block->todpr = 0;
3568 	}
3569 }
3570 
kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu * vcpu)3571 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3572 {
3573 	struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3574 
3575 	/* Clear reset is a superset of the initial reset */
3576 	kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3577 
3578 	memset(&regs->gprs, 0, sizeof(regs->gprs));
3579 	memset(&regs->vrs, 0, sizeof(regs->vrs));
3580 	memset(&regs->acrs, 0, sizeof(regs->acrs));
3581 	memset(&regs->gscb, 0, sizeof(regs->gscb));
3582 
3583 	regs->etoken = 0;
3584 	regs->etoken_extension = 0;
3585 }
3586 
kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu * vcpu,struct kvm_regs * regs)3587 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3588 {
3589 	vcpu_load(vcpu);
3590 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3591 	vcpu_put(vcpu);
3592 	return 0;
3593 }
3594 
kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu * vcpu,struct kvm_regs * regs)3595 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3596 {
3597 	vcpu_load(vcpu);
3598 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3599 	vcpu_put(vcpu);
3600 	return 0;
3601 }
3602 
kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu * vcpu,struct kvm_sregs * sregs)3603 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3604 				  struct kvm_sregs *sregs)
3605 {
3606 	vcpu_load(vcpu);
3607 
3608 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3609 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3610 
3611 	vcpu_put(vcpu);
3612 	return 0;
3613 }
3614 
kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu * vcpu,struct kvm_sregs * sregs)3615 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3616 				  struct kvm_sregs *sregs)
3617 {
3618 	vcpu_load(vcpu);
3619 
3620 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3621 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3622 
3623 	vcpu_put(vcpu);
3624 	return 0;
3625 }
3626 
kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu * vcpu,struct kvm_fpu * fpu)3627 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3628 {
3629 	int ret = 0;
3630 
3631 	vcpu_load(vcpu);
3632 
3633 	if (test_fp_ctl(fpu->fpc)) {
3634 		ret = -EINVAL;
3635 		goto out;
3636 	}
3637 	vcpu->run->s.regs.fpc = fpu->fpc;
3638 	if (MACHINE_HAS_VX)
3639 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3640 				 (freg_t *) fpu->fprs);
3641 	else
3642 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3643 
3644 out:
3645 	vcpu_put(vcpu);
3646 	return ret;
3647 }
3648 
kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu * vcpu,struct kvm_fpu * fpu)3649 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3650 {
3651 	vcpu_load(vcpu);
3652 
3653 	/* make sure we have the latest values */
3654 	save_fpu_regs();
3655 	if (MACHINE_HAS_VX)
3656 		convert_vx_to_fp((freg_t *) fpu->fprs,
3657 				 (__vector128 *) vcpu->run->s.regs.vrs);
3658 	else
3659 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3660 	fpu->fpc = vcpu->run->s.regs.fpc;
3661 
3662 	vcpu_put(vcpu);
3663 	return 0;
3664 }
3665 
kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu * vcpu,psw_t psw)3666 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3667 {
3668 	int rc = 0;
3669 
3670 	if (!is_vcpu_stopped(vcpu))
3671 		rc = -EBUSY;
3672 	else {
3673 		vcpu->run->psw_mask = psw.mask;
3674 		vcpu->run->psw_addr = psw.addr;
3675 	}
3676 	return rc;
3677 }
3678 
kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu * vcpu,struct kvm_translation * tr)3679 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3680 				  struct kvm_translation *tr)
3681 {
3682 	return -EINVAL; /* not implemented yet */
3683 }
3684 
3685 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3686 			      KVM_GUESTDBG_USE_HW_BP | \
3687 			      KVM_GUESTDBG_ENABLE)
3688 
kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu * vcpu,struct kvm_guest_debug * dbg)3689 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3690 					struct kvm_guest_debug *dbg)
3691 {
3692 	int rc = 0;
3693 
3694 	vcpu_load(vcpu);
3695 
3696 	vcpu->guest_debug = 0;
3697 	kvm_s390_clear_bp_data(vcpu);
3698 
3699 	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3700 		rc = -EINVAL;
3701 		goto out;
3702 	}
3703 	if (!sclp.has_gpere) {
3704 		rc = -EINVAL;
3705 		goto out;
3706 	}
3707 
3708 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
3709 		vcpu->guest_debug = dbg->control;
3710 		/* enforce guest PER */
3711 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3712 
3713 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3714 			rc = kvm_s390_import_bp_data(vcpu, dbg);
3715 	} else {
3716 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3717 		vcpu->arch.guestdbg.last_bp = 0;
3718 	}
3719 
3720 	if (rc) {
3721 		vcpu->guest_debug = 0;
3722 		kvm_s390_clear_bp_data(vcpu);
3723 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3724 	}
3725 
3726 out:
3727 	vcpu_put(vcpu);
3728 	return rc;
3729 }
3730 
kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu * vcpu,struct kvm_mp_state * mp_state)3731 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3732 				    struct kvm_mp_state *mp_state)
3733 {
3734 	int ret;
3735 
3736 	vcpu_load(vcpu);
3737 
3738 	/* CHECK_STOP and LOAD are not supported yet */
3739 	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3740 				      KVM_MP_STATE_OPERATING;
3741 
3742 	vcpu_put(vcpu);
3743 	return ret;
3744 }
3745 
kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu * vcpu,struct kvm_mp_state * mp_state)3746 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3747 				    struct kvm_mp_state *mp_state)
3748 {
3749 	int rc = 0;
3750 
3751 	vcpu_load(vcpu);
3752 
3753 	/* user space knows about this interface - let it control the state */
3754 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3755 
3756 	switch (mp_state->mp_state) {
3757 	case KVM_MP_STATE_STOPPED:
3758 		rc = kvm_s390_vcpu_stop(vcpu);
3759 		break;
3760 	case KVM_MP_STATE_OPERATING:
3761 		rc = kvm_s390_vcpu_start(vcpu);
3762 		break;
3763 	case KVM_MP_STATE_LOAD:
3764 		if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3765 			rc = -ENXIO;
3766 			break;
3767 		}
3768 		rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3769 		break;
3770 	case KVM_MP_STATE_CHECK_STOP:
3771 		fallthrough;	/* CHECK_STOP and LOAD are not supported yet */
3772 	default:
3773 		rc = -ENXIO;
3774 	}
3775 
3776 	vcpu_put(vcpu);
3777 	return rc;
3778 }
3779 
ibs_enabled(struct kvm_vcpu * vcpu)3780 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3781 {
3782 	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3783 }
3784 
kvm_s390_handle_requests(struct kvm_vcpu * vcpu)3785 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3786 {
3787 retry:
3788 	kvm_s390_vcpu_request_handled(vcpu);
3789 	if (!kvm_request_pending(vcpu))
3790 		return 0;
3791 	/*
3792 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3793 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3794 	 * This ensures that the ipte instruction for this request has
3795 	 * already finished. We might race against a second unmapper that
3796 	 * wants to set the blocking bit. Lets just retry the request loop.
3797 	 */
3798 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3799 		int rc;
3800 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
3801 					  kvm_s390_get_prefix(vcpu),
3802 					  PAGE_SIZE * 2, PROT_WRITE);
3803 		if (rc) {
3804 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3805 			return rc;
3806 		}
3807 		goto retry;
3808 	}
3809 
3810 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3811 		vcpu->arch.sie_block->ihcpu = 0xffff;
3812 		goto retry;
3813 	}
3814 
3815 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3816 		if (!ibs_enabled(vcpu)) {
3817 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3818 			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3819 		}
3820 		goto retry;
3821 	}
3822 
3823 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3824 		if (ibs_enabled(vcpu)) {
3825 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3826 			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3827 		}
3828 		goto retry;
3829 	}
3830 
3831 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3832 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3833 		goto retry;
3834 	}
3835 
3836 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3837 		/*
3838 		 * Disable CMM virtualization; we will emulate the ESSA
3839 		 * instruction manually, in order to provide additional
3840 		 * functionalities needed for live migration.
3841 		 */
3842 		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3843 		goto retry;
3844 	}
3845 
3846 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3847 		/*
3848 		 * Re-enable CMM virtualization if CMMA is available and
3849 		 * CMM has been used.
3850 		 */
3851 		if ((vcpu->kvm->arch.use_cmma) &&
3852 		    (vcpu->kvm->mm->context.uses_cmm))
3853 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3854 		goto retry;
3855 	}
3856 
3857 	/* nothing to do, just clear the request */
3858 	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3859 	/* we left the vsie handler, nothing to do, just clear the request */
3860 	kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3861 
3862 	return 0;
3863 }
3864 
kvm_s390_set_tod_clock(struct kvm * kvm,const struct kvm_s390_vm_tod_clock * gtod)3865 void kvm_s390_set_tod_clock(struct kvm *kvm,
3866 			    const struct kvm_s390_vm_tod_clock *gtod)
3867 {
3868 	struct kvm_vcpu *vcpu;
3869 	struct kvm_s390_tod_clock_ext htod;
3870 	int i;
3871 
3872 	mutex_lock(&kvm->lock);
3873 	preempt_disable();
3874 
3875 	get_tod_clock_ext((char *)&htod);
3876 
3877 	kvm->arch.epoch = gtod->tod - htod.tod;
3878 	kvm->arch.epdx = 0;
3879 	if (test_kvm_facility(kvm, 139)) {
3880 		kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3881 		if (kvm->arch.epoch > gtod->tod)
3882 			kvm->arch.epdx -= 1;
3883 	}
3884 
3885 	kvm_s390_vcpu_block_all(kvm);
3886 	kvm_for_each_vcpu(i, vcpu, kvm) {
3887 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3888 		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3889 	}
3890 
3891 	kvm_s390_vcpu_unblock_all(kvm);
3892 	preempt_enable();
3893 	mutex_unlock(&kvm->lock);
3894 }
3895 
3896 /**
3897  * kvm_arch_fault_in_page - fault-in guest page if necessary
3898  * @vcpu: The corresponding virtual cpu
3899  * @gpa: Guest physical address
3900  * @writable: Whether the page should be writable or not
3901  *
3902  * Make sure that a guest page has been faulted-in on the host.
3903  *
3904  * Return: Zero on success, negative error code otherwise.
3905  */
kvm_arch_fault_in_page(struct kvm_vcpu * vcpu,gpa_t gpa,int writable)3906 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3907 {
3908 	return gmap_fault(vcpu->arch.gmap, gpa,
3909 			  writable ? FAULT_FLAG_WRITE : 0);
3910 }
3911 
__kvm_inject_pfault_token(struct kvm_vcpu * vcpu,bool start_token,unsigned long token)3912 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3913 				      unsigned long token)
3914 {
3915 	struct kvm_s390_interrupt inti;
3916 	struct kvm_s390_irq irq;
3917 
3918 	if (start_token) {
3919 		irq.u.ext.ext_params2 = token;
3920 		irq.type = KVM_S390_INT_PFAULT_INIT;
3921 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3922 	} else {
3923 		inti.type = KVM_S390_INT_PFAULT_DONE;
3924 		inti.parm64 = token;
3925 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3926 	}
3927 }
3928 
kvm_arch_async_page_not_present(struct kvm_vcpu * vcpu,struct kvm_async_pf * work)3929 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3930 				     struct kvm_async_pf *work)
3931 {
3932 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3933 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3934 
3935 	return true;
3936 }
3937 
kvm_arch_async_page_present(struct kvm_vcpu * vcpu,struct kvm_async_pf * work)3938 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3939 				 struct kvm_async_pf *work)
3940 {
3941 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3942 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3943 }
3944 
kvm_arch_async_page_ready(struct kvm_vcpu * vcpu,struct kvm_async_pf * work)3945 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3946 			       struct kvm_async_pf *work)
3947 {
3948 	/* s390 will always inject the page directly */
3949 }
3950 
kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu * vcpu)3951 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
3952 {
3953 	/*
3954 	 * s390 will always inject the page directly,
3955 	 * but we still want check_async_completion to cleanup
3956 	 */
3957 	return true;
3958 }
3959 
kvm_arch_setup_async_pf(struct kvm_vcpu * vcpu)3960 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3961 {
3962 	hva_t hva;
3963 	struct kvm_arch_async_pf arch;
3964 
3965 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3966 		return false;
3967 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3968 	    vcpu->arch.pfault_compare)
3969 		return false;
3970 	if (psw_extint_disabled(vcpu))
3971 		return false;
3972 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
3973 		return false;
3974 	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3975 		return false;
3976 	if (!vcpu->arch.gmap->pfault_enabled)
3977 		return false;
3978 
3979 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3980 	hva += current->thread.gmap_addr & ~PAGE_MASK;
3981 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3982 		return false;
3983 
3984 	return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3985 }
3986 
vcpu_pre_run(struct kvm_vcpu * vcpu)3987 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3988 {
3989 	int rc, cpuflags;
3990 
3991 	/*
3992 	 * On s390 notifications for arriving pages will be delivered directly
3993 	 * to the guest but the house keeping for completed pfaults is
3994 	 * handled outside the worker.
3995 	 */
3996 	kvm_check_async_pf_completion(vcpu);
3997 
3998 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3999 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4000 
4001 	if (need_resched())
4002 		schedule();
4003 
4004 	if (!kvm_is_ucontrol(vcpu->kvm)) {
4005 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
4006 		if (rc)
4007 			return rc;
4008 	}
4009 
4010 	rc = kvm_s390_handle_requests(vcpu);
4011 	if (rc)
4012 		return rc;
4013 
4014 	if (guestdbg_enabled(vcpu)) {
4015 		kvm_s390_backup_guest_per_regs(vcpu);
4016 		kvm_s390_patch_guest_per_regs(vcpu);
4017 	}
4018 
4019 	clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.gisa_int.kicked_mask);
4020 
4021 	vcpu->arch.sie_block->icptcode = 0;
4022 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4023 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4024 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
4025 
4026 	return 0;
4027 }
4028 
vcpu_post_run_fault_in_sie(struct kvm_vcpu * vcpu)4029 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4030 {
4031 	struct kvm_s390_pgm_info pgm_info = {
4032 		.code = PGM_ADDRESSING,
4033 	};
4034 	u8 opcode, ilen;
4035 	int rc;
4036 
4037 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4038 	trace_kvm_s390_sie_fault(vcpu);
4039 
4040 	/*
4041 	 * We want to inject an addressing exception, which is defined as a
4042 	 * suppressing or terminating exception. However, since we came here
4043 	 * by a DAT access exception, the PSW still points to the faulting
4044 	 * instruction since DAT exceptions are nullifying. So we've got
4045 	 * to look up the current opcode to get the length of the instruction
4046 	 * to be able to forward the PSW.
4047 	 */
4048 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4049 	ilen = insn_length(opcode);
4050 	if (rc < 0) {
4051 		return rc;
4052 	} else if (rc) {
4053 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
4054 		 * Forward by arbitrary ilc, injection will take care of
4055 		 * nullification if necessary.
4056 		 */
4057 		pgm_info = vcpu->arch.pgm;
4058 		ilen = 4;
4059 	}
4060 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4061 	kvm_s390_forward_psw(vcpu, ilen);
4062 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4063 }
4064 
vcpu_post_run(struct kvm_vcpu * vcpu,int exit_reason)4065 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4066 {
4067 	struct mcck_volatile_info *mcck_info;
4068 	struct sie_page *sie_page;
4069 
4070 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4071 		   vcpu->arch.sie_block->icptcode);
4072 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4073 
4074 	if (guestdbg_enabled(vcpu))
4075 		kvm_s390_restore_guest_per_regs(vcpu);
4076 
4077 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4078 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4079 
4080 	if (exit_reason == -EINTR) {
4081 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
4082 		sie_page = container_of(vcpu->arch.sie_block,
4083 					struct sie_page, sie_block);
4084 		mcck_info = &sie_page->mcck_info;
4085 		kvm_s390_reinject_machine_check(vcpu, mcck_info);
4086 		return 0;
4087 	}
4088 
4089 	if (vcpu->arch.sie_block->icptcode > 0) {
4090 		int rc = kvm_handle_sie_intercept(vcpu);
4091 
4092 		if (rc != -EOPNOTSUPP)
4093 			return rc;
4094 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4095 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4096 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4097 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4098 		return -EREMOTE;
4099 	} else if (exit_reason != -EFAULT) {
4100 		vcpu->stat.exit_null++;
4101 		return 0;
4102 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
4103 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4104 		vcpu->run->s390_ucontrol.trans_exc_code =
4105 						current->thread.gmap_addr;
4106 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
4107 		return -EREMOTE;
4108 	} else if (current->thread.gmap_pfault) {
4109 		trace_kvm_s390_major_guest_pfault(vcpu);
4110 		current->thread.gmap_pfault = 0;
4111 		if (kvm_arch_setup_async_pf(vcpu))
4112 			return 0;
4113 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4114 	}
4115 	return vcpu_post_run_fault_in_sie(vcpu);
4116 }
4117 
4118 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
__vcpu_run(struct kvm_vcpu * vcpu)4119 static int __vcpu_run(struct kvm_vcpu *vcpu)
4120 {
4121 	int rc, exit_reason;
4122 	struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4123 
4124 	/*
4125 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4126 	 * ning the guest), so that memslots (and other stuff) are protected
4127 	 */
4128 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4129 
4130 	do {
4131 		rc = vcpu_pre_run(vcpu);
4132 		if (rc)
4133 			break;
4134 
4135 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4136 		/*
4137 		 * As PF_VCPU will be used in fault handler, between
4138 		 * guest_enter and guest_exit should be no uaccess.
4139 		 */
4140 		local_irq_disable();
4141 		guest_enter_irqoff();
4142 		__disable_cpu_timer_accounting(vcpu);
4143 		local_irq_enable();
4144 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4145 			memcpy(sie_page->pv_grregs,
4146 			       vcpu->run->s.regs.gprs,
4147 			       sizeof(sie_page->pv_grregs));
4148 		}
4149 		exit_reason = sie64a(vcpu->arch.sie_block,
4150 				     vcpu->run->s.regs.gprs);
4151 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4152 			memcpy(vcpu->run->s.regs.gprs,
4153 			       sie_page->pv_grregs,
4154 			       sizeof(sie_page->pv_grregs));
4155 			/*
4156 			 * We're not allowed to inject interrupts on intercepts
4157 			 * that leave the guest state in an "in-between" state
4158 			 * where the next SIE entry will do a continuation.
4159 			 * Fence interrupts in our "internal" PSW.
4160 			 */
4161 			if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4162 			    vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4163 				vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4164 			}
4165 		}
4166 		local_irq_disable();
4167 		__enable_cpu_timer_accounting(vcpu);
4168 		guest_exit_irqoff();
4169 		local_irq_enable();
4170 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4171 
4172 		rc = vcpu_post_run(vcpu, exit_reason);
4173 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4174 
4175 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4176 	return rc;
4177 }
4178 
sync_regs_fmt2(struct kvm_vcpu * vcpu)4179 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4180 {
4181 	struct kvm_run *kvm_run = vcpu->run;
4182 	struct runtime_instr_cb *riccb;
4183 	struct gs_cb *gscb;
4184 
4185 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4186 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4187 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4188 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4189 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4190 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4191 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4192 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4193 	}
4194 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4195 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4196 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4197 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4198 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4199 			kvm_clear_async_pf_completion_queue(vcpu);
4200 	}
4201 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4202 		vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4203 		vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4204 	}
4205 	/*
4206 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
4207 	 * we should enable RI here instead of doing the lazy enablement.
4208 	 */
4209 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4210 	    test_kvm_facility(vcpu->kvm, 64) &&
4211 	    riccb->v &&
4212 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4213 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4214 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4215 	}
4216 	/*
4217 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
4218 	 * we should enable GS here instead of doing the lazy enablement.
4219 	 */
4220 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4221 	    test_kvm_facility(vcpu->kvm, 133) &&
4222 	    gscb->gssm &&
4223 	    !vcpu->arch.gs_enabled) {
4224 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4225 		vcpu->arch.sie_block->ecb |= ECB_GS;
4226 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4227 		vcpu->arch.gs_enabled = 1;
4228 	}
4229 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4230 	    test_kvm_facility(vcpu->kvm, 82)) {
4231 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4232 		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4233 	}
4234 	if (MACHINE_HAS_GS) {
4235 		preempt_disable();
4236 		__ctl_set_bit(2, 4);
4237 		if (current->thread.gs_cb) {
4238 			vcpu->arch.host_gscb = current->thread.gs_cb;
4239 			save_gs_cb(vcpu->arch.host_gscb);
4240 		}
4241 		if (vcpu->arch.gs_enabled) {
4242 			current->thread.gs_cb = (struct gs_cb *)
4243 						&vcpu->run->s.regs.gscb;
4244 			restore_gs_cb(current->thread.gs_cb);
4245 		}
4246 		preempt_enable();
4247 	}
4248 	/* SIE will load etoken directly from SDNX and therefore kvm_run */
4249 }
4250 
sync_regs(struct kvm_vcpu * vcpu)4251 static void sync_regs(struct kvm_vcpu *vcpu)
4252 {
4253 	struct kvm_run *kvm_run = vcpu->run;
4254 
4255 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4256 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4257 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4258 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4259 		/* some control register changes require a tlb flush */
4260 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4261 	}
4262 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4263 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4264 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4265 	}
4266 	save_access_regs(vcpu->arch.host_acrs);
4267 	restore_access_regs(vcpu->run->s.regs.acrs);
4268 	/* save host (userspace) fprs/vrs */
4269 	save_fpu_regs();
4270 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4271 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4272 	if (MACHINE_HAS_VX)
4273 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4274 	else
4275 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4276 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4277 	if (test_fp_ctl(current->thread.fpu.fpc))
4278 		/* User space provided an invalid FPC, let's clear it */
4279 		current->thread.fpu.fpc = 0;
4280 
4281 	/* Sync fmt2 only data */
4282 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4283 		sync_regs_fmt2(vcpu);
4284 	} else {
4285 		/*
4286 		 * In several places we have to modify our internal view to
4287 		 * not do things that are disallowed by the ultravisor. For
4288 		 * example we must not inject interrupts after specific exits
4289 		 * (e.g. 112 prefix page not secure). We do this by turning
4290 		 * off the machine check, external and I/O interrupt bits
4291 		 * of our PSW copy. To avoid getting validity intercepts, we
4292 		 * do only accept the condition code from userspace.
4293 		 */
4294 		vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4295 		vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4296 						   PSW_MASK_CC;
4297 	}
4298 
4299 	kvm_run->kvm_dirty_regs = 0;
4300 }
4301 
store_regs_fmt2(struct kvm_vcpu * vcpu)4302 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4303 {
4304 	struct kvm_run *kvm_run = vcpu->run;
4305 
4306 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4307 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4308 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4309 	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4310 	kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4311 	if (MACHINE_HAS_GS) {
4312 		preempt_disable();
4313 		__ctl_set_bit(2, 4);
4314 		if (vcpu->arch.gs_enabled)
4315 			save_gs_cb(current->thread.gs_cb);
4316 		current->thread.gs_cb = vcpu->arch.host_gscb;
4317 		restore_gs_cb(vcpu->arch.host_gscb);
4318 		if (!vcpu->arch.host_gscb)
4319 			__ctl_clear_bit(2, 4);
4320 		vcpu->arch.host_gscb = NULL;
4321 		preempt_enable();
4322 	}
4323 	/* SIE will save etoken directly into SDNX and therefore kvm_run */
4324 }
4325 
store_regs(struct kvm_vcpu * vcpu)4326 static void store_regs(struct kvm_vcpu *vcpu)
4327 {
4328 	struct kvm_run *kvm_run = vcpu->run;
4329 
4330 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4331 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4332 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4333 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4334 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4335 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4336 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4337 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4338 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4339 	save_access_regs(vcpu->run->s.regs.acrs);
4340 	restore_access_regs(vcpu->arch.host_acrs);
4341 	/* Save guest register state */
4342 	save_fpu_regs();
4343 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4344 	/* Restore will be done lazily at return */
4345 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4346 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4347 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4348 		store_regs_fmt2(vcpu);
4349 }
4350 
kvm_arch_vcpu_ioctl_run(struct kvm_vcpu * vcpu)4351 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4352 {
4353 	struct kvm_run *kvm_run = vcpu->run;
4354 	int rc;
4355 
4356 	if (kvm_run->immediate_exit)
4357 		return -EINTR;
4358 
4359 	if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4360 	    kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4361 		return -EINVAL;
4362 
4363 	vcpu_load(vcpu);
4364 
4365 	if (guestdbg_exit_pending(vcpu)) {
4366 		kvm_s390_prepare_debug_exit(vcpu);
4367 		rc = 0;
4368 		goto out;
4369 	}
4370 
4371 	kvm_sigset_activate(vcpu);
4372 
4373 	/*
4374 	 * no need to check the return value of vcpu_start as it can only have
4375 	 * an error for protvirt, but protvirt means user cpu state
4376 	 */
4377 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4378 		kvm_s390_vcpu_start(vcpu);
4379 	} else if (is_vcpu_stopped(vcpu)) {
4380 		pr_err_ratelimited("can't run stopped vcpu %d\n",
4381 				   vcpu->vcpu_id);
4382 		rc = -EINVAL;
4383 		goto out;
4384 	}
4385 
4386 	sync_regs(vcpu);
4387 	enable_cpu_timer_accounting(vcpu);
4388 
4389 	might_fault();
4390 	rc = __vcpu_run(vcpu);
4391 
4392 	if (signal_pending(current) && !rc) {
4393 		kvm_run->exit_reason = KVM_EXIT_INTR;
4394 		rc = -EINTR;
4395 	}
4396 
4397 	if (guestdbg_exit_pending(vcpu) && !rc)  {
4398 		kvm_s390_prepare_debug_exit(vcpu);
4399 		rc = 0;
4400 	}
4401 
4402 	if (rc == -EREMOTE) {
4403 		/* userspace support is needed, kvm_run has been prepared */
4404 		rc = 0;
4405 	}
4406 
4407 	disable_cpu_timer_accounting(vcpu);
4408 	store_regs(vcpu);
4409 
4410 	kvm_sigset_deactivate(vcpu);
4411 
4412 	vcpu->stat.exit_userspace++;
4413 out:
4414 	vcpu_put(vcpu);
4415 	return rc;
4416 }
4417 
4418 /*
4419  * store status at address
4420  * we use have two special cases:
4421  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4422  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4423  */
kvm_s390_store_status_unloaded(struct kvm_vcpu * vcpu,unsigned long gpa)4424 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4425 {
4426 	unsigned char archmode = 1;
4427 	freg_t fprs[NUM_FPRS];
4428 	unsigned int px;
4429 	u64 clkcomp, cputm;
4430 	int rc;
4431 
4432 	px = kvm_s390_get_prefix(vcpu);
4433 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4434 		if (write_guest_abs(vcpu, 163, &archmode, 1))
4435 			return -EFAULT;
4436 		gpa = 0;
4437 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4438 		if (write_guest_real(vcpu, 163, &archmode, 1))
4439 			return -EFAULT;
4440 		gpa = px;
4441 	} else
4442 		gpa -= __LC_FPREGS_SAVE_AREA;
4443 
4444 	/* manually convert vector registers if necessary */
4445 	if (MACHINE_HAS_VX) {
4446 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4447 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4448 				     fprs, 128);
4449 	} else {
4450 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4451 				     vcpu->run->s.regs.fprs, 128);
4452 	}
4453 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4454 			      vcpu->run->s.regs.gprs, 128);
4455 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4456 			      &vcpu->arch.sie_block->gpsw, 16);
4457 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4458 			      &px, 4);
4459 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4460 			      &vcpu->run->s.regs.fpc, 4);
4461 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4462 			      &vcpu->arch.sie_block->todpr, 4);
4463 	cputm = kvm_s390_get_cpu_timer(vcpu);
4464 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4465 			      &cputm, 8);
4466 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
4467 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4468 			      &clkcomp, 8);
4469 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4470 			      &vcpu->run->s.regs.acrs, 64);
4471 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4472 			      &vcpu->arch.sie_block->gcr, 128);
4473 	return rc ? -EFAULT : 0;
4474 }
4475 
kvm_s390_vcpu_store_status(struct kvm_vcpu * vcpu,unsigned long addr)4476 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4477 {
4478 	/*
4479 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4480 	 * switch in the run ioctl. Let's update our copies before we save
4481 	 * it into the save area
4482 	 */
4483 	save_fpu_regs();
4484 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4485 	save_access_regs(vcpu->run->s.regs.acrs);
4486 
4487 	return kvm_s390_store_status_unloaded(vcpu, addr);
4488 }
4489 
__disable_ibs_on_vcpu(struct kvm_vcpu * vcpu)4490 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4491 {
4492 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4493 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4494 }
4495 
__disable_ibs_on_all_vcpus(struct kvm * kvm)4496 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4497 {
4498 	unsigned int i;
4499 	struct kvm_vcpu *vcpu;
4500 
4501 	kvm_for_each_vcpu(i, vcpu, kvm) {
4502 		__disable_ibs_on_vcpu(vcpu);
4503 	}
4504 }
4505 
__enable_ibs_on_vcpu(struct kvm_vcpu * vcpu)4506 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4507 {
4508 	if (!sclp.has_ibs)
4509 		return;
4510 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4511 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4512 }
4513 
kvm_s390_vcpu_start(struct kvm_vcpu * vcpu)4514 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4515 {
4516 	int i, online_vcpus, r = 0, started_vcpus = 0;
4517 
4518 	if (!is_vcpu_stopped(vcpu))
4519 		return 0;
4520 
4521 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4522 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4523 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4524 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4525 
4526 	/* Let's tell the UV that we want to change into the operating state */
4527 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4528 		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4529 		if (r) {
4530 			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4531 			return r;
4532 		}
4533 	}
4534 
4535 	for (i = 0; i < online_vcpus; i++) {
4536 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4537 			started_vcpus++;
4538 	}
4539 
4540 	if (started_vcpus == 0) {
4541 		/* we're the only active VCPU -> speed it up */
4542 		__enable_ibs_on_vcpu(vcpu);
4543 	} else if (started_vcpus == 1) {
4544 		/*
4545 		 * As we are starting a second VCPU, we have to disable
4546 		 * the IBS facility on all VCPUs to remove potentially
4547 		 * oustanding ENABLE requests.
4548 		 */
4549 		__disable_ibs_on_all_vcpus(vcpu->kvm);
4550 	}
4551 
4552 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4553 	/*
4554 	 * The real PSW might have changed due to a RESTART interpreted by the
4555 	 * ultravisor. We block all interrupts and let the next sie exit
4556 	 * refresh our view.
4557 	 */
4558 	if (kvm_s390_pv_cpu_is_protected(vcpu))
4559 		vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4560 	/*
4561 	 * Another VCPU might have used IBS while we were offline.
4562 	 * Let's play safe and flush the VCPU at startup.
4563 	 */
4564 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4565 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4566 	return 0;
4567 }
4568 
kvm_s390_vcpu_stop(struct kvm_vcpu * vcpu)4569 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4570 {
4571 	int i, online_vcpus, r = 0, started_vcpus = 0;
4572 	struct kvm_vcpu *started_vcpu = NULL;
4573 
4574 	if (is_vcpu_stopped(vcpu))
4575 		return 0;
4576 
4577 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4578 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4579 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4580 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4581 
4582 	/* Let's tell the UV that we want to change into the stopped state */
4583 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4584 		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4585 		if (r) {
4586 			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4587 			return r;
4588 		}
4589 	}
4590 
4591 	/*
4592 	 * Set the VCPU to STOPPED and THEN clear the interrupt flag,
4593 	 * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
4594 	 * have been fully processed. This will ensure that the VCPU
4595 	 * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
4596 	 */
4597 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4598 	kvm_s390_clear_stop_irq(vcpu);
4599 
4600 	__disable_ibs_on_vcpu(vcpu);
4601 
4602 	for (i = 0; i < online_vcpus; i++) {
4603 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4604 			started_vcpus++;
4605 			started_vcpu = vcpu->kvm->vcpus[i];
4606 		}
4607 	}
4608 
4609 	if (started_vcpus == 1) {
4610 		/*
4611 		 * As we only have one VCPU left, we want to enable the
4612 		 * IBS facility for that VCPU to speed it up.
4613 		 */
4614 		__enable_ibs_on_vcpu(started_vcpu);
4615 	}
4616 
4617 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4618 	return 0;
4619 }
4620 
kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu * vcpu,struct kvm_enable_cap * cap)4621 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4622 				     struct kvm_enable_cap *cap)
4623 {
4624 	int r;
4625 
4626 	if (cap->flags)
4627 		return -EINVAL;
4628 
4629 	switch (cap->cap) {
4630 	case KVM_CAP_S390_CSS_SUPPORT:
4631 		if (!vcpu->kvm->arch.css_support) {
4632 			vcpu->kvm->arch.css_support = 1;
4633 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4634 			trace_kvm_s390_enable_css(vcpu->kvm);
4635 		}
4636 		r = 0;
4637 		break;
4638 	default:
4639 		r = -EINVAL;
4640 		break;
4641 	}
4642 	return r;
4643 }
4644 
kvm_s390_guest_sida_op(struct kvm_vcpu * vcpu,struct kvm_s390_mem_op * mop)4645 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4646 				   struct kvm_s390_mem_op *mop)
4647 {
4648 	void __user *uaddr = (void __user *)mop->buf;
4649 	int r = 0;
4650 
4651 	if (mop->flags || !mop->size)
4652 		return -EINVAL;
4653 	if (mop->size + mop->sida_offset < mop->size)
4654 		return -EINVAL;
4655 	if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4656 		return -E2BIG;
4657 	if (!kvm_s390_pv_cpu_is_protected(vcpu))
4658 		return -EINVAL;
4659 
4660 	switch (mop->op) {
4661 	case KVM_S390_MEMOP_SIDA_READ:
4662 		if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4663 				 mop->sida_offset), mop->size))
4664 			r = -EFAULT;
4665 
4666 		break;
4667 	case KVM_S390_MEMOP_SIDA_WRITE:
4668 		if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4669 				   mop->sida_offset), uaddr, mop->size))
4670 			r = -EFAULT;
4671 		break;
4672 	}
4673 	return r;
4674 }
kvm_s390_guest_mem_op(struct kvm_vcpu * vcpu,struct kvm_s390_mem_op * mop)4675 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4676 				  struct kvm_s390_mem_op *mop)
4677 {
4678 	void __user *uaddr = (void __user *)mop->buf;
4679 	void *tmpbuf = NULL;
4680 	int r = 0;
4681 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4682 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
4683 
4684 	if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4685 		return -EINVAL;
4686 
4687 	if (mop->size > MEM_OP_MAX_SIZE)
4688 		return -E2BIG;
4689 
4690 	if (kvm_s390_pv_cpu_is_protected(vcpu))
4691 		return -EINVAL;
4692 
4693 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4694 		tmpbuf = vmalloc(mop->size);
4695 		if (!tmpbuf)
4696 			return -ENOMEM;
4697 	}
4698 
4699 	switch (mop->op) {
4700 	case KVM_S390_MEMOP_LOGICAL_READ:
4701 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4702 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4703 					    mop->size, GACC_FETCH);
4704 			break;
4705 		}
4706 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4707 		if (r == 0) {
4708 			if (copy_to_user(uaddr, tmpbuf, mop->size))
4709 				r = -EFAULT;
4710 		}
4711 		break;
4712 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4713 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4714 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4715 					    mop->size, GACC_STORE);
4716 			break;
4717 		}
4718 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4719 			r = -EFAULT;
4720 			break;
4721 		}
4722 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4723 		break;
4724 	}
4725 
4726 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4727 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4728 
4729 	vfree(tmpbuf);
4730 	return r;
4731 }
4732 
kvm_s390_guest_memsida_op(struct kvm_vcpu * vcpu,struct kvm_s390_mem_op * mop)4733 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4734 				      struct kvm_s390_mem_op *mop)
4735 {
4736 	int r, srcu_idx;
4737 
4738 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4739 
4740 	switch (mop->op) {
4741 	case KVM_S390_MEMOP_LOGICAL_READ:
4742 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4743 		r = kvm_s390_guest_mem_op(vcpu, mop);
4744 		break;
4745 	case KVM_S390_MEMOP_SIDA_READ:
4746 	case KVM_S390_MEMOP_SIDA_WRITE:
4747 		/* we are locked against sida going away by the vcpu->mutex */
4748 		r = kvm_s390_guest_sida_op(vcpu, mop);
4749 		break;
4750 	default:
4751 		r = -EINVAL;
4752 	}
4753 
4754 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4755 	return r;
4756 }
4757 
kvm_arch_vcpu_async_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)4758 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4759 			       unsigned int ioctl, unsigned long arg)
4760 {
4761 	struct kvm_vcpu *vcpu = filp->private_data;
4762 	void __user *argp = (void __user *)arg;
4763 
4764 	switch (ioctl) {
4765 	case KVM_S390_IRQ: {
4766 		struct kvm_s390_irq s390irq;
4767 
4768 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4769 			return -EFAULT;
4770 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4771 	}
4772 	case KVM_S390_INTERRUPT: {
4773 		struct kvm_s390_interrupt s390int;
4774 		struct kvm_s390_irq s390irq = {};
4775 
4776 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
4777 			return -EFAULT;
4778 		if (s390int_to_s390irq(&s390int, &s390irq))
4779 			return -EINVAL;
4780 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4781 	}
4782 	}
4783 	return -ENOIOCTLCMD;
4784 }
4785 
kvm_arch_vcpu_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)4786 long kvm_arch_vcpu_ioctl(struct file *filp,
4787 			 unsigned int ioctl, unsigned long arg)
4788 {
4789 	struct kvm_vcpu *vcpu = filp->private_data;
4790 	void __user *argp = (void __user *)arg;
4791 	int idx;
4792 	long r;
4793 	u16 rc, rrc;
4794 
4795 	vcpu_load(vcpu);
4796 
4797 	switch (ioctl) {
4798 	case KVM_S390_STORE_STATUS:
4799 		idx = srcu_read_lock(&vcpu->kvm->srcu);
4800 		r = kvm_s390_store_status_unloaded(vcpu, arg);
4801 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
4802 		break;
4803 	case KVM_S390_SET_INITIAL_PSW: {
4804 		psw_t psw;
4805 
4806 		r = -EFAULT;
4807 		if (copy_from_user(&psw, argp, sizeof(psw)))
4808 			break;
4809 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4810 		break;
4811 	}
4812 	case KVM_S390_CLEAR_RESET:
4813 		r = 0;
4814 		kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4815 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4816 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4817 					  UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4818 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4819 				   rc, rrc);
4820 		}
4821 		break;
4822 	case KVM_S390_INITIAL_RESET:
4823 		r = 0;
4824 		kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4825 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4826 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4827 					  UVC_CMD_CPU_RESET_INITIAL,
4828 					  &rc, &rrc);
4829 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4830 				   rc, rrc);
4831 		}
4832 		break;
4833 	case KVM_S390_NORMAL_RESET:
4834 		r = 0;
4835 		kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4836 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4837 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4838 					  UVC_CMD_CPU_RESET, &rc, &rrc);
4839 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4840 				   rc, rrc);
4841 		}
4842 		break;
4843 	case KVM_SET_ONE_REG:
4844 	case KVM_GET_ONE_REG: {
4845 		struct kvm_one_reg reg;
4846 		r = -EINVAL;
4847 		if (kvm_s390_pv_cpu_is_protected(vcpu))
4848 			break;
4849 		r = -EFAULT;
4850 		if (copy_from_user(&reg, argp, sizeof(reg)))
4851 			break;
4852 		if (ioctl == KVM_SET_ONE_REG)
4853 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4854 		else
4855 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4856 		break;
4857 	}
4858 #ifdef CONFIG_KVM_S390_UCONTROL
4859 	case KVM_S390_UCAS_MAP: {
4860 		struct kvm_s390_ucas_mapping ucasmap;
4861 
4862 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4863 			r = -EFAULT;
4864 			break;
4865 		}
4866 
4867 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4868 			r = -EINVAL;
4869 			break;
4870 		}
4871 
4872 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4873 				     ucasmap.vcpu_addr, ucasmap.length);
4874 		break;
4875 	}
4876 	case KVM_S390_UCAS_UNMAP: {
4877 		struct kvm_s390_ucas_mapping ucasmap;
4878 
4879 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4880 			r = -EFAULT;
4881 			break;
4882 		}
4883 
4884 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4885 			r = -EINVAL;
4886 			break;
4887 		}
4888 
4889 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4890 			ucasmap.length);
4891 		break;
4892 	}
4893 #endif
4894 	case KVM_S390_VCPU_FAULT: {
4895 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
4896 		break;
4897 	}
4898 	case KVM_ENABLE_CAP:
4899 	{
4900 		struct kvm_enable_cap cap;
4901 		r = -EFAULT;
4902 		if (copy_from_user(&cap, argp, sizeof(cap)))
4903 			break;
4904 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4905 		break;
4906 	}
4907 	case KVM_S390_MEM_OP: {
4908 		struct kvm_s390_mem_op mem_op;
4909 
4910 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4911 			r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4912 		else
4913 			r = -EFAULT;
4914 		break;
4915 	}
4916 	case KVM_S390_SET_IRQ_STATE: {
4917 		struct kvm_s390_irq_state irq_state;
4918 
4919 		r = -EFAULT;
4920 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4921 			break;
4922 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4923 		    irq_state.len == 0 ||
4924 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4925 			r = -EINVAL;
4926 			break;
4927 		}
4928 		/* do not use irq_state.flags, it will break old QEMUs */
4929 		r = kvm_s390_set_irq_state(vcpu,
4930 					   (void __user *) irq_state.buf,
4931 					   irq_state.len);
4932 		break;
4933 	}
4934 	case KVM_S390_GET_IRQ_STATE: {
4935 		struct kvm_s390_irq_state irq_state;
4936 
4937 		r = -EFAULT;
4938 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4939 			break;
4940 		if (irq_state.len == 0) {
4941 			r = -EINVAL;
4942 			break;
4943 		}
4944 		/* do not use irq_state.flags, it will break old QEMUs */
4945 		r = kvm_s390_get_irq_state(vcpu,
4946 					   (__u8 __user *)  irq_state.buf,
4947 					   irq_state.len);
4948 		break;
4949 	}
4950 	default:
4951 		r = -ENOTTY;
4952 	}
4953 
4954 	vcpu_put(vcpu);
4955 	return r;
4956 }
4957 
kvm_arch_vcpu_fault(struct kvm_vcpu * vcpu,struct vm_fault * vmf)4958 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4959 {
4960 #ifdef CONFIG_KVM_S390_UCONTROL
4961 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4962 		 && (kvm_is_ucontrol(vcpu->kvm))) {
4963 		vmf->page = virt_to_page(vcpu->arch.sie_block);
4964 		get_page(vmf->page);
4965 		return 0;
4966 	}
4967 #endif
4968 	return VM_FAULT_SIGBUS;
4969 }
4970 
4971 /* Section: memory related */
kvm_arch_prepare_memory_region(struct kvm * kvm,struct kvm_memory_slot * memslot,const struct kvm_userspace_memory_region * mem,enum kvm_mr_change change)4972 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4973 				   struct kvm_memory_slot *memslot,
4974 				   const struct kvm_userspace_memory_region *mem,
4975 				   enum kvm_mr_change change)
4976 {
4977 	/* A few sanity checks. We can have memory slots which have to be
4978 	   located/ended at a segment boundary (1MB). The memory in userland is
4979 	   ok to be fragmented into various different vmas. It is okay to mmap()
4980 	   and munmap() stuff in this slot after doing this call at any time */
4981 
4982 	if (mem->userspace_addr & 0xffffful)
4983 		return -EINVAL;
4984 
4985 	if (mem->memory_size & 0xffffful)
4986 		return -EINVAL;
4987 
4988 	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4989 		return -EINVAL;
4990 
4991 	/* When we are protected, we should not change the memory slots */
4992 	if (kvm_s390_pv_get_handle(kvm))
4993 		return -EINVAL;
4994 	return 0;
4995 }
4996 
kvm_arch_commit_memory_region(struct kvm * kvm,const struct kvm_userspace_memory_region * mem,struct kvm_memory_slot * old,const struct kvm_memory_slot * new,enum kvm_mr_change change)4997 void kvm_arch_commit_memory_region(struct kvm *kvm,
4998 				const struct kvm_userspace_memory_region *mem,
4999 				struct kvm_memory_slot *old,
5000 				const struct kvm_memory_slot *new,
5001 				enum kvm_mr_change change)
5002 {
5003 	int rc = 0;
5004 
5005 	switch (change) {
5006 	case KVM_MR_DELETE:
5007 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5008 					old->npages * PAGE_SIZE);
5009 		break;
5010 	case KVM_MR_MOVE:
5011 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5012 					old->npages * PAGE_SIZE);
5013 		if (rc)
5014 			break;
5015 		fallthrough;
5016 	case KVM_MR_CREATE:
5017 		rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
5018 				      mem->guest_phys_addr, mem->memory_size);
5019 		break;
5020 	case KVM_MR_FLAGS_ONLY:
5021 		break;
5022 	default:
5023 		WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5024 	}
5025 	if (rc)
5026 		pr_warn("failed to commit memory region\n");
5027 	return;
5028 }
5029 
nonhyp_mask(int i)5030 static inline unsigned long nonhyp_mask(int i)
5031 {
5032 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5033 
5034 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5035 }
5036 
kvm_arch_vcpu_block_finish(struct kvm_vcpu * vcpu)5037 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
5038 {
5039 	vcpu->valid_wakeup = false;
5040 }
5041 
kvm_s390_init(void)5042 static int __init kvm_s390_init(void)
5043 {
5044 	int i;
5045 
5046 	if (!sclp.has_sief2) {
5047 		pr_info("SIE is not available\n");
5048 		return -ENODEV;
5049 	}
5050 
5051 	if (nested && hpage) {
5052 		pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5053 		return -EINVAL;
5054 	}
5055 
5056 	for (i = 0; i < 16; i++)
5057 		kvm_s390_fac_base[i] |=
5058 			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
5059 
5060 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5061 }
5062 
kvm_s390_exit(void)5063 static void __exit kvm_s390_exit(void)
5064 {
5065 	kvm_exit();
5066 }
5067 
5068 module_init(kvm_s390_init);
5069 module_exit(kvm_s390_exit);
5070 
5071 /*
5072  * Enable autoloading of the kvm module.
5073  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5074  * since x86 takes a different approach.
5075  */
5076 #include <linux/miscdevice.h>
5077 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5078 MODULE_ALIAS("devname:kvm");
5079