• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2020
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13 
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
16 
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 #include <linux/pgtable.h>
35 
36 #include <asm/asm-offsets.h>
37 #include <asm/lowcore.h>
38 #include <asm/stp.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include <asm/ap.h>
47 #include <asm/uv.h>
48 #include "kvm-s390.h"
49 #include "gaccess.h"
50 
51 #define CREATE_TRACE_POINTS
52 #include "trace.h"
53 #include "trace-s390.h"
54 
55 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
56 #define LOCAL_IRQS 32
57 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
58 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
59 
60 struct kvm_stats_debugfs_item debugfs_entries[] = {
61 	VCPU_STAT("userspace_handled", exit_userspace),
62 	VCPU_STAT("exit_null", exit_null),
63 	VCPU_STAT("exit_validity", exit_validity),
64 	VCPU_STAT("exit_stop_request", exit_stop_request),
65 	VCPU_STAT("exit_external_request", exit_external_request),
66 	VCPU_STAT("exit_io_request", exit_io_request),
67 	VCPU_STAT("exit_external_interrupt", exit_external_interrupt),
68 	VCPU_STAT("exit_instruction", exit_instruction),
69 	VCPU_STAT("exit_pei", exit_pei),
70 	VCPU_STAT("exit_program_interruption", exit_program_interruption),
71 	VCPU_STAT("exit_instr_and_program_int", exit_instr_and_program),
72 	VCPU_STAT("exit_operation_exception", exit_operation_exception),
73 	VCPU_STAT("halt_successful_poll", halt_successful_poll),
74 	VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
75 	VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
76 	VCPU_STAT("halt_no_poll_steal", halt_no_poll_steal),
77 	VCPU_STAT("halt_wakeup", halt_wakeup),
78 	VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
79 	VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
80 	VCPU_STAT("instruction_lctlg", instruction_lctlg),
81 	VCPU_STAT("instruction_lctl", instruction_lctl),
82 	VCPU_STAT("instruction_stctl", instruction_stctl),
83 	VCPU_STAT("instruction_stctg", instruction_stctg),
84 	VCPU_STAT("deliver_ckc", deliver_ckc),
85 	VCPU_STAT("deliver_cputm", deliver_cputm),
86 	VCPU_STAT("deliver_emergency_signal", deliver_emergency_signal),
87 	VCPU_STAT("deliver_external_call", deliver_external_call),
88 	VCPU_STAT("deliver_service_signal", deliver_service_signal),
89 	VCPU_STAT("deliver_virtio", deliver_virtio),
90 	VCPU_STAT("deliver_stop_signal", deliver_stop_signal),
91 	VCPU_STAT("deliver_prefix_signal", deliver_prefix_signal),
92 	VCPU_STAT("deliver_restart_signal", deliver_restart_signal),
93 	VCPU_STAT("deliver_program", deliver_program),
94 	VCPU_STAT("deliver_io", deliver_io),
95 	VCPU_STAT("deliver_machine_check", deliver_machine_check),
96 	VCPU_STAT("exit_wait_state", exit_wait_state),
97 	VCPU_STAT("inject_ckc", inject_ckc),
98 	VCPU_STAT("inject_cputm", inject_cputm),
99 	VCPU_STAT("inject_external_call", inject_external_call),
100 	VM_STAT("inject_float_mchk", inject_float_mchk),
101 	VCPU_STAT("inject_emergency_signal", inject_emergency_signal),
102 	VM_STAT("inject_io", inject_io),
103 	VCPU_STAT("inject_mchk", inject_mchk),
104 	VM_STAT("inject_pfault_done", inject_pfault_done),
105 	VCPU_STAT("inject_program", inject_program),
106 	VCPU_STAT("inject_restart", inject_restart),
107 	VM_STAT("inject_service_signal", inject_service_signal),
108 	VCPU_STAT("inject_set_prefix", inject_set_prefix),
109 	VCPU_STAT("inject_stop_signal", inject_stop_signal),
110 	VCPU_STAT("inject_pfault_init", inject_pfault_init),
111 	VM_STAT("inject_virtio", inject_virtio),
112 	VCPU_STAT("instruction_epsw", instruction_epsw),
113 	VCPU_STAT("instruction_gs", instruction_gs),
114 	VCPU_STAT("instruction_io_other", instruction_io_other),
115 	VCPU_STAT("instruction_lpsw", instruction_lpsw),
116 	VCPU_STAT("instruction_lpswe", instruction_lpswe),
117 	VCPU_STAT("instruction_pfmf", instruction_pfmf),
118 	VCPU_STAT("instruction_ptff", instruction_ptff),
119 	VCPU_STAT("instruction_stidp", instruction_stidp),
120 	VCPU_STAT("instruction_sck", instruction_sck),
121 	VCPU_STAT("instruction_sckpf", instruction_sckpf),
122 	VCPU_STAT("instruction_spx", instruction_spx),
123 	VCPU_STAT("instruction_stpx", instruction_stpx),
124 	VCPU_STAT("instruction_stap", instruction_stap),
125 	VCPU_STAT("instruction_iske", instruction_iske),
126 	VCPU_STAT("instruction_ri", instruction_ri),
127 	VCPU_STAT("instruction_rrbe", instruction_rrbe),
128 	VCPU_STAT("instruction_sske", instruction_sske),
129 	VCPU_STAT("instruction_ipte_interlock", instruction_ipte_interlock),
130 	VCPU_STAT("instruction_essa", instruction_essa),
131 	VCPU_STAT("instruction_stsi", instruction_stsi),
132 	VCPU_STAT("instruction_stfl", instruction_stfl),
133 	VCPU_STAT("instruction_tb", instruction_tb),
134 	VCPU_STAT("instruction_tpi", instruction_tpi),
135 	VCPU_STAT("instruction_tprot", instruction_tprot),
136 	VCPU_STAT("instruction_tsch", instruction_tsch),
137 	VCPU_STAT("instruction_sthyi", instruction_sthyi),
138 	VCPU_STAT("instruction_sie", instruction_sie),
139 	VCPU_STAT("instruction_sigp_sense", instruction_sigp_sense),
140 	VCPU_STAT("instruction_sigp_sense_running", instruction_sigp_sense_running),
141 	VCPU_STAT("instruction_sigp_external_call", instruction_sigp_external_call),
142 	VCPU_STAT("instruction_sigp_emergency", instruction_sigp_emergency),
143 	VCPU_STAT("instruction_sigp_cond_emergency", instruction_sigp_cond_emergency),
144 	VCPU_STAT("instruction_sigp_start", instruction_sigp_start),
145 	VCPU_STAT("instruction_sigp_stop", instruction_sigp_stop),
146 	VCPU_STAT("instruction_sigp_stop_store_status", instruction_sigp_stop_store_status),
147 	VCPU_STAT("instruction_sigp_store_status", instruction_sigp_store_status),
148 	VCPU_STAT("instruction_sigp_store_adtl_status", instruction_sigp_store_adtl_status),
149 	VCPU_STAT("instruction_sigp_set_arch", instruction_sigp_arch),
150 	VCPU_STAT("instruction_sigp_set_prefix", instruction_sigp_prefix),
151 	VCPU_STAT("instruction_sigp_restart", instruction_sigp_restart),
152 	VCPU_STAT("instruction_sigp_cpu_reset", instruction_sigp_cpu_reset),
153 	VCPU_STAT("instruction_sigp_init_cpu_reset", instruction_sigp_init_cpu_reset),
154 	VCPU_STAT("instruction_sigp_unknown", instruction_sigp_unknown),
155 	VCPU_STAT("instruction_diag_10", diagnose_10),
156 	VCPU_STAT("instruction_diag_44", diagnose_44),
157 	VCPU_STAT("instruction_diag_9c", diagnose_9c),
158 	VCPU_STAT("diag_9c_ignored", diagnose_9c_ignored),
159 	VCPU_STAT("instruction_diag_258", diagnose_258),
160 	VCPU_STAT("instruction_diag_308", diagnose_308),
161 	VCPU_STAT("instruction_diag_500", diagnose_500),
162 	VCPU_STAT("instruction_diag_other", diagnose_other),
163 	{ NULL }
164 };
165 
166 struct kvm_s390_tod_clock_ext {
167 	__u8 epoch_idx;
168 	__u64 tod;
169 	__u8 reserved[7];
170 } __packed;
171 
172 /* allow nested virtualization in KVM (if enabled by user space) */
173 static int nested;
174 module_param(nested, int, S_IRUGO);
175 MODULE_PARM_DESC(nested, "Nested virtualization support");
176 
177 /* allow 1m huge page guest backing, if !nested */
178 static int hpage;
179 module_param(hpage, int, 0444);
180 MODULE_PARM_DESC(hpage, "1m huge page backing support");
181 
182 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
183 static u8 halt_poll_max_steal = 10;
184 module_param(halt_poll_max_steal, byte, 0644);
185 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
186 
187 /* if set to true, the GISA will be initialized and used if available */
188 static bool use_gisa  = true;
189 module_param(use_gisa, bool, 0644);
190 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
191 
192 /*
193  * For now we handle at most 16 double words as this is what the s390 base
194  * kernel handles and stores in the prefix page. If we ever need to go beyond
195  * this, this requires changes to code, but the external uapi can stay.
196  */
197 #define SIZE_INTERNAL 16
198 
199 /*
200  * Base feature mask that defines default mask for facilities. Consists of the
201  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
202  */
203 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
204 /*
205  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
206  * and defines the facilities that can be enabled via a cpu model.
207  */
208 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
209 
kvm_s390_fac_size(void)210 static unsigned long kvm_s390_fac_size(void)
211 {
212 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
213 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
214 	BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
215 		sizeof(S390_lowcore.stfle_fac_list));
216 
217 	return SIZE_INTERNAL;
218 }
219 
220 /* available cpu features supported by kvm */
221 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
222 /* available subfunctions indicated via query / "test bit" */
223 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
224 
225 static struct gmap_notifier gmap_notifier;
226 static struct gmap_notifier vsie_gmap_notifier;
227 debug_info_t *kvm_s390_dbf;
228 debug_info_t *kvm_s390_dbf_uv;
229 
230 /* Section: not file related */
kvm_arch_hardware_enable(void)231 int kvm_arch_hardware_enable(void)
232 {
233 	/* every s390 is virtualization enabled ;-) */
234 	return 0;
235 }
236 
kvm_arch_check_processor_compat(void * opaque)237 int kvm_arch_check_processor_compat(void *opaque)
238 {
239 	return 0;
240 }
241 
242 /* forward declarations */
243 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
244 			      unsigned long end);
245 static int sca_switch_to_extended(struct kvm *kvm);
246 
kvm_clock_sync_scb(struct kvm_s390_sie_block * scb,u64 delta)247 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
248 {
249 	u8 delta_idx = 0;
250 
251 	/*
252 	 * The TOD jumps by delta, we have to compensate this by adding
253 	 * -delta to the epoch.
254 	 */
255 	delta = -delta;
256 
257 	/* sign-extension - we're adding to signed values below */
258 	if ((s64)delta < 0)
259 		delta_idx = -1;
260 
261 	scb->epoch += delta;
262 	if (scb->ecd & ECD_MEF) {
263 		scb->epdx += delta_idx;
264 		if (scb->epoch < delta)
265 			scb->epdx += 1;
266 	}
267 }
268 
269 /*
270  * This callback is executed during stop_machine(). All CPUs are therefore
271  * temporarily stopped. In order not to change guest behavior, we have to
272  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
273  * so a CPU won't be stopped while calculating with the epoch.
274  */
kvm_clock_sync(struct notifier_block * notifier,unsigned long val,void * v)275 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
276 			  void *v)
277 {
278 	struct kvm *kvm;
279 	struct kvm_vcpu *vcpu;
280 	int i;
281 	unsigned long long *delta = v;
282 
283 	list_for_each_entry(kvm, &vm_list, vm_list) {
284 		kvm_for_each_vcpu(i, vcpu, kvm) {
285 			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
286 			if (i == 0) {
287 				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
288 				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
289 			}
290 			if (vcpu->arch.cputm_enabled)
291 				vcpu->arch.cputm_start += *delta;
292 			if (vcpu->arch.vsie_block)
293 				kvm_clock_sync_scb(vcpu->arch.vsie_block,
294 						   *delta);
295 		}
296 	}
297 	return NOTIFY_OK;
298 }
299 
300 static struct notifier_block kvm_clock_notifier = {
301 	.notifier_call = kvm_clock_sync,
302 };
303 
kvm_arch_hardware_setup(void * opaque)304 int kvm_arch_hardware_setup(void *opaque)
305 {
306 	gmap_notifier.notifier_call = kvm_gmap_notifier;
307 	gmap_register_pte_notifier(&gmap_notifier);
308 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
309 	gmap_register_pte_notifier(&vsie_gmap_notifier);
310 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
311 				       &kvm_clock_notifier);
312 	return 0;
313 }
314 
kvm_arch_hardware_unsetup(void)315 void kvm_arch_hardware_unsetup(void)
316 {
317 	gmap_unregister_pte_notifier(&gmap_notifier);
318 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
319 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
320 					 &kvm_clock_notifier);
321 }
322 
allow_cpu_feat(unsigned long nr)323 static void allow_cpu_feat(unsigned long nr)
324 {
325 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
326 }
327 
plo_test_bit(unsigned char nr)328 static inline int plo_test_bit(unsigned char nr)
329 {
330 	unsigned long function = (unsigned long)nr | 0x100;
331 	int cc;
332 
333 	asm volatile(
334 		"	lgr	0,%[function]\n"
335 		/* Parameter registers are ignored for "test bit" */
336 		"	plo	0,0,0,0(0)\n"
337 		"	ipm	%0\n"
338 		"	srl	%0,28\n"
339 		: "=d" (cc)
340 		: [function] "d" (function)
341 		: "cc", "0");
342 	return cc == 0;
343 }
344 
__insn32_query(unsigned int opcode,u8 * query)345 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
346 {
347 	asm volatile(
348 		"	lghi	0,0\n"
349 		"	lgr	1,%[query]\n"
350 		/* Parameter registers are ignored */
351 		"	.insn	rrf,%[opc] << 16,2,4,6,0\n"
352 		:
353 		: [query] "d" ((unsigned long)query), [opc] "i" (opcode)
354 		: "cc", "memory", "0", "1");
355 }
356 
357 #define INSN_SORTL 0xb938
358 #define INSN_DFLTCC 0xb939
359 
kvm_s390_cpu_feat_init(void)360 static void kvm_s390_cpu_feat_init(void)
361 {
362 	int i;
363 
364 	for (i = 0; i < 256; ++i) {
365 		if (plo_test_bit(i))
366 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
367 	}
368 
369 	if (test_facility(28)) /* TOD-clock steering */
370 		ptff(kvm_s390_available_subfunc.ptff,
371 		     sizeof(kvm_s390_available_subfunc.ptff),
372 		     PTFF_QAF);
373 
374 	if (test_facility(17)) { /* MSA */
375 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
376 			      kvm_s390_available_subfunc.kmac);
377 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
378 			      kvm_s390_available_subfunc.kmc);
379 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
380 			      kvm_s390_available_subfunc.km);
381 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
382 			      kvm_s390_available_subfunc.kimd);
383 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
384 			      kvm_s390_available_subfunc.klmd);
385 	}
386 	if (test_facility(76)) /* MSA3 */
387 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
388 			      kvm_s390_available_subfunc.pckmo);
389 	if (test_facility(77)) { /* MSA4 */
390 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
391 			      kvm_s390_available_subfunc.kmctr);
392 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
393 			      kvm_s390_available_subfunc.kmf);
394 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
395 			      kvm_s390_available_subfunc.kmo);
396 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
397 			      kvm_s390_available_subfunc.pcc);
398 	}
399 	if (test_facility(57)) /* MSA5 */
400 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
401 			      kvm_s390_available_subfunc.ppno);
402 
403 	if (test_facility(146)) /* MSA8 */
404 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
405 			      kvm_s390_available_subfunc.kma);
406 
407 	if (test_facility(155)) /* MSA9 */
408 		__cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
409 			      kvm_s390_available_subfunc.kdsa);
410 
411 	if (test_facility(150)) /* SORTL */
412 		__insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
413 
414 	if (test_facility(151)) /* DFLTCC */
415 		__insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
416 
417 	if (MACHINE_HAS_ESOP)
418 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
419 	/*
420 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
421 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
422 	 */
423 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
424 	    !test_facility(3) || !nested)
425 		return;
426 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
427 	if (sclp.has_64bscao)
428 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
429 	if (sclp.has_siif)
430 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
431 	if (sclp.has_gpere)
432 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
433 	if (sclp.has_gsls)
434 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
435 	if (sclp.has_ib)
436 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
437 	if (sclp.has_cei)
438 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
439 	if (sclp.has_ibs)
440 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
441 	if (sclp.has_kss)
442 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
443 	/*
444 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
445 	 * all skey handling functions read/set the skey from the PGSTE
446 	 * instead of the real storage key.
447 	 *
448 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
449 	 * pages being detected as preserved although they are resident.
450 	 *
451 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
452 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
453 	 *
454 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
455 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
456 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
457 	 *
458 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
459 	 * cannot easily shadow the SCA because of the ipte lock.
460 	 */
461 }
462 
kvm_arch_init(void * opaque)463 int kvm_arch_init(void *opaque)
464 {
465 	int rc = -ENOMEM;
466 
467 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
468 	if (!kvm_s390_dbf)
469 		return -ENOMEM;
470 
471 	kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
472 	if (!kvm_s390_dbf_uv)
473 		goto out;
474 
475 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
476 	    debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
477 		goto out;
478 
479 	kvm_s390_cpu_feat_init();
480 
481 	/* Register floating interrupt controller interface. */
482 	rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
483 	if (rc) {
484 		pr_err("A FLIC registration call failed with rc=%d\n", rc);
485 		goto out;
486 	}
487 
488 	rc = kvm_s390_gib_init(GAL_ISC);
489 	if (rc)
490 		goto out;
491 
492 	return 0;
493 
494 out:
495 	kvm_arch_exit();
496 	return rc;
497 }
498 
kvm_arch_exit(void)499 void kvm_arch_exit(void)
500 {
501 	kvm_s390_gib_destroy();
502 	debug_unregister(kvm_s390_dbf);
503 	debug_unregister(kvm_s390_dbf_uv);
504 }
505 
506 /* Section: device related */
kvm_arch_dev_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)507 long kvm_arch_dev_ioctl(struct file *filp,
508 			unsigned int ioctl, unsigned long arg)
509 {
510 	if (ioctl == KVM_S390_ENABLE_SIE)
511 		return s390_enable_sie();
512 	return -EINVAL;
513 }
514 
kvm_vm_ioctl_check_extension(struct kvm * kvm,long ext)515 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
516 {
517 	int r;
518 
519 	switch (ext) {
520 	case KVM_CAP_S390_PSW:
521 	case KVM_CAP_S390_GMAP:
522 	case KVM_CAP_SYNC_MMU:
523 #ifdef CONFIG_KVM_S390_UCONTROL
524 	case KVM_CAP_S390_UCONTROL:
525 #endif
526 	case KVM_CAP_ASYNC_PF:
527 	case KVM_CAP_SYNC_REGS:
528 	case KVM_CAP_ONE_REG:
529 	case KVM_CAP_ENABLE_CAP:
530 	case KVM_CAP_S390_CSS_SUPPORT:
531 	case KVM_CAP_IOEVENTFD:
532 	case KVM_CAP_DEVICE_CTRL:
533 	case KVM_CAP_S390_IRQCHIP:
534 	case KVM_CAP_VM_ATTRIBUTES:
535 	case KVM_CAP_MP_STATE:
536 	case KVM_CAP_IMMEDIATE_EXIT:
537 	case KVM_CAP_S390_INJECT_IRQ:
538 	case KVM_CAP_S390_USER_SIGP:
539 	case KVM_CAP_S390_USER_STSI:
540 	case KVM_CAP_S390_SKEYS:
541 	case KVM_CAP_S390_IRQ_STATE:
542 	case KVM_CAP_S390_USER_INSTR0:
543 	case KVM_CAP_S390_CMMA_MIGRATION:
544 	case KVM_CAP_S390_AIS:
545 	case KVM_CAP_S390_AIS_MIGRATION:
546 	case KVM_CAP_S390_VCPU_RESETS:
547 	case KVM_CAP_SET_GUEST_DEBUG:
548 	case KVM_CAP_S390_DIAG318:
549 		r = 1;
550 		break;
551 	case KVM_CAP_S390_HPAGE_1M:
552 		r = 0;
553 		if (hpage && !kvm_is_ucontrol(kvm))
554 			r = 1;
555 		break;
556 	case KVM_CAP_S390_MEM_OP:
557 		r = MEM_OP_MAX_SIZE;
558 		break;
559 	case KVM_CAP_NR_VCPUS:
560 	case KVM_CAP_MAX_VCPUS:
561 	case KVM_CAP_MAX_VCPU_ID:
562 		r = KVM_S390_BSCA_CPU_SLOTS;
563 		if (!kvm_s390_use_sca_entries())
564 			r = KVM_MAX_VCPUS;
565 		else if (sclp.has_esca && sclp.has_64bscao)
566 			r = KVM_S390_ESCA_CPU_SLOTS;
567 		break;
568 	case KVM_CAP_S390_COW:
569 		r = MACHINE_HAS_ESOP;
570 		break;
571 	case KVM_CAP_S390_VECTOR_REGISTERS:
572 		r = MACHINE_HAS_VX;
573 		break;
574 	case KVM_CAP_S390_RI:
575 		r = test_facility(64);
576 		break;
577 	case KVM_CAP_S390_GS:
578 		r = test_facility(133);
579 		break;
580 	case KVM_CAP_S390_BPB:
581 		r = test_facility(82);
582 		break;
583 	case KVM_CAP_S390_PROTECTED:
584 		r = is_prot_virt_host();
585 		break;
586 	default:
587 		r = 0;
588 	}
589 	return r;
590 }
591 
kvm_arch_sync_dirty_log(struct kvm * kvm,struct kvm_memory_slot * memslot)592 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
593 {
594 	int i;
595 	gfn_t cur_gfn, last_gfn;
596 	unsigned long gaddr, vmaddr;
597 	struct gmap *gmap = kvm->arch.gmap;
598 	DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
599 
600 	/* Loop over all guest segments */
601 	cur_gfn = memslot->base_gfn;
602 	last_gfn = memslot->base_gfn + memslot->npages;
603 	for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
604 		gaddr = gfn_to_gpa(cur_gfn);
605 		vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
606 		if (kvm_is_error_hva(vmaddr))
607 			continue;
608 
609 		bitmap_zero(bitmap, _PAGE_ENTRIES);
610 		gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
611 		for (i = 0; i < _PAGE_ENTRIES; i++) {
612 			if (test_bit(i, bitmap))
613 				mark_page_dirty(kvm, cur_gfn + i);
614 		}
615 
616 		if (fatal_signal_pending(current))
617 			return;
618 		cond_resched();
619 	}
620 }
621 
622 /* Section: vm related */
623 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
624 
625 /*
626  * Get (and clear) the dirty memory log for a memory slot.
627  */
kvm_vm_ioctl_get_dirty_log(struct kvm * kvm,struct kvm_dirty_log * log)628 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
629 			       struct kvm_dirty_log *log)
630 {
631 	int r;
632 	unsigned long n;
633 	struct kvm_memory_slot *memslot;
634 	int is_dirty;
635 
636 	if (kvm_is_ucontrol(kvm))
637 		return -EINVAL;
638 
639 	mutex_lock(&kvm->slots_lock);
640 
641 	r = -EINVAL;
642 	if (log->slot >= KVM_USER_MEM_SLOTS)
643 		goto out;
644 
645 	r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
646 	if (r)
647 		goto out;
648 
649 	/* Clear the dirty log */
650 	if (is_dirty) {
651 		n = kvm_dirty_bitmap_bytes(memslot);
652 		memset(memslot->dirty_bitmap, 0, n);
653 	}
654 	r = 0;
655 out:
656 	mutex_unlock(&kvm->slots_lock);
657 	return r;
658 }
659 
icpt_operexc_on_all_vcpus(struct kvm * kvm)660 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
661 {
662 	unsigned int i;
663 	struct kvm_vcpu *vcpu;
664 
665 	kvm_for_each_vcpu(i, vcpu, kvm) {
666 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
667 	}
668 }
669 
kvm_vm_ioctl_enable_cap(struct kvm * kvm,struct kvm_enable_cap * cap)670 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
671 {
672 	int r;
673 
674 	if (cap->flags)
675 		return -EINVAL;
676 
677 	switch (cap->cap) {
678 	case KVM_CAP_S390_IRQCHIP:
679 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
680 		kvm->arch.use_irqchip = 1;
681 		r = 0;
682 		break;
683 	case KVM_CAP_S390_USER_SIGP:
684 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
685 		kvm->arch.user_sigp = 1;
686 		r = 0;
687 		break;
688 	case KVM_CAP_S390_VECTOR_REGISTERS:
689 		mutex_lock(&kvm->lock);
690 		if (kvm->created_vcpus) {
691 			r = -EBUSY;
692 		} else if (MACHINE_HAS_VX) {
693 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
694 			set_kvm_facility(kvm->arch.model.fac_list, 129);
695 			if (test_facility(134)) {
696 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
697 				set_kvm_facility(kvm->arch.model.fac_list, 134);
698 			}
699 			if (test_facility(135)) {
700 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
701 				set_kvm_facility(kvm->arch.model.fac_list, 135);
702 			}
703 			if (test_facility(148)) {
704 				set_kvm_facility(kvm->arch.model.fac_mask, 148);
705 				set_kvm_facility(kvm->arch.model.fac_list, 148);
706 			}
707 			if (test_facility(152)) {
708 				set_kvm_facility(kvm->arch.model.fac_mask, 152);
709 				set_kvm_facility(kvm->arch.model.fac_list, 152);
710 			}
711 			r = 0;
712 		} else
713 			r = -EINVAL;
714 		mutex_unlock(&kvm->lock);
715 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
716 			 r ? "(not available)" : "(success)");
717 		break;
718 	case KVM_CAP_S390_RI:
719 		r = -EINVAL;
720 		mutex_lock(&kvm->lock);
721 		if (kvm->created_vcpus) {
722 			r = -EBUSY;
723 		} else if (test_facility(64)) {
724 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
725 			set_kvm_facility(kvm->arch.model.fac_list, 64);
726 			r = 0;
727 		}
728 		mutex_unlock(&kvm->lock);
729 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
730 			 r ? "(not available)" : "(success)");
731 		break;
732 	case KVM_CAP_S390_AIS:
733 		mutex_lock(&kvm->lock);
734 		if (kvm->created_vcpus) {
735 			r = -EBUSY;
736 		} else {
737 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
738 			set_kvm_facility(kvm->arch.model.fac_list, 72);
739 			r = 0;
740 		}
741 		mutex_unlock(&kvm->lock);
742 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
743 			 r ? "(not available)" : "(success)");
744 		break;
745 	case KVM_CAP_S390_GS:
746 		r = -EINVAL;
747 		mutex_lock(&kvm->lock);
748 		if (kvm->created_vcpus) {
749 			r = -EBUSY;
750 		} else if (test_facility(133)) {
751 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
752 			set_kvm_facility(kvm->arch.model.fac_list, 133);
753 			r = 0;
754 		}
755 		mutex_unlock(&kvm->lock);
756 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
757 			 r ? "(not available)" : "(success)");
758 		break;
759 	case KVM_CAP_S390_HPAGE_1M:
760 		mutex_lock(&kvm->lock);
761 		if (kvm->created_vcpus)
762 			r = -EBUSY;
763 		else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
764 			r = -EINVAL;
765 		else {
766 			r = 0;
767 			mmap_write_lock(kvm->mm);
768 			kvm->mm->context.allow_gmap_hpage_1m = 1;
769 			mmap_write_unlock(kvm->mm);
770 			/*
771 			 * We might have to create fake 4k page
772 			 * tables. To avoid that the hardware works on
773 			 * stale PGSTEs, we emulate these instructions.
774 			 */
775 			kvm->arch.use_skf = 0;
776 			kvm->arch.use_pfmfi = 0;
777 		}
778 		mutex_unlock(&kvm->lock);
779 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
780 			 r ? "(not available)" : "(success)");
781 		break;
782 	case KVM_CAP_S390_USER_STSI:
783 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
784 		kvm->arch.user_stsi = 1;
785 		r = 0;
786 		break;
787 	case KVM_CAP_S390_USER_INSTR0:
788 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
789 		kvm->arch.user_instr0 = 1;
790 		icpt_operexc_on_all_vcpus(kvm);
791 		r = 0;
792 		break;
793 	default:
794 		r = -EINVAL;
795 		break;
796 	}
797 	return r;
798 }
799 
kvm_s390_get_mem_control(struct kvm * kvm,struct kvm_device_attr * attr)800 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
801 {
802 	int ret;
803 
804 	switch (attr->attr) {
805 	case KVM_S390_VM_MEM_LIMIT_SIZE:
806 		ret = 0;
807 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
808 			 kvm->arch.mem_limit);
809 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
810 			ret = -EFAULT;
811 		break;
812 	default:
813 		ret = -ENXIO;
814 		break;
815 	}
816 	return ret;
817 }
818 
kvm_s390_set_mem_control(struct kvm * kvm,struct kvm_device_attr * attr)819 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
820 {
821 	int ret;
822 	unsigned int idx;
823 	switch (attr->attr) {
824 	case KVM_S390_VM_MEM_ENABLE_CMMA:
825 		ret = -ENXIO;
826 		if (!sclp.has_cmma)
827 			break;
828 
829 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
830 		mutex_lock(&kvm->lock);
831 		if (kvm->created_vcpus)
832 			ret = -EBUSY;
833 		else if (kvm->mm->context.allow_gmap_hpage_1m)
834 			ret = -EINVAL;
835 		else {
836 			kvm->arch.use_cmma = 1;
837 			/* Not compatible with cmma. */
838 			kvm->arch.use_pfmfi = 0;
839 			ret = 0;
840 		}
841 		mutex_unlock(&kvm->lock);
842 		break;
843 	case KVM_S390_VM_MEM_CLR_CMMA:
844 		ret = -ENXIO;
845 		if (!sclp.has_cmma)
846 			break;
847 		ret = -EINVAL;
848 		if (!kvm->arch.use_cmma)
849 			break;
850 
851 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
852 		mutex_lock(&kvm->lock);
853 		idx = srcu_read_lock(&kvm->srcu);
854 		s390_reset_cmma(kvm->arch.gmap->mm);
855 		srcu_read_unlock(&kvm->srcu, idx);
856 		mutex_unlock(&kvm->lock);
857 		ret = 0;
858 		break;
859 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
860 		unsigned long new_limit;
861 
862 		if (kvm_is_ucontrol(kvm))
863 			return -EINVAL;
864 
865 		if (get_user(new_limit, (u64 __user *)attr->addr))
866 			return -EFAULT;
867 
868 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
869 		    new_limit > kvm->arch.mem_limit)
870 			return -E2BIG;
871 
872 		if (!new_limit)
873 			return -EINVAL;
874 
875 		/* gmap_create takes last usable address */
876 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
877 			new_limit -= 1;
878 
879 		ret = -EBUSY;
880 		mutex_lock(&kvm->lock);
881 		if (!kvm->created_vcpus) {
882 			/* gmap_create will round the limit up */
883 			struct gmap *new = gmap_create(current->mm, new_limit);
884 
885 			if (!new) {
886 				ret = -ENOMEM;
887 			} else {
888 				gmap_remove(kvm->arch.gmap);
889 				new->private = kvm;
890 				kvm->arch.gmap = new;
891 				ret = 0;
892 			}
893 		}
894 		mutex_unlock(&kvm->lock);
895 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
896 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
897 			 (void *) kvm->arch.gmap->asce);
898 		break;
899 	}
900 	default:
901 		ret = -ENXIO;
902 		break;
903 	}
904 	return ret;
905 }
906 
907 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
908 
kvm_s390_vcpu_crypto_reset_all(struct kvm * kvm)909 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
910 {
911 	struct kvm_vcpu *vcpu;
912 	int i;
913 
914 	kvm_s390_vcpu_block_all(kvm);
915 
916 	kvm_for_each_vcpu(i, vcpu, kvm) {
917 		kvm_s390_vcpu_crypto_setup(vcpu);
918 		/* recreate the shadow crycb by leaving the VSIE handler */
919 		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
920 	}
921 
922 	kvm_s390_vcpu_unblock_all(kvm);
923 }
924 
kvm_s390_vm_set_crypto(struct kvm * kvm,struct kvm_device_attr * attr)925 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
926 {
927 	mutex_lock(&kvm->lock);
928 	switch (attr->attr) {
929 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
930 		if (!test_kvm_facility(kvm, 76)) {
931 			mutex_unlock(&kvm->lock);
932 			return -EINVAL;
933 		}
934 		get_random_bytes(
935 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
936 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
937 		kvm->arch.crypto.aes_kw = 1;
938 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
939 		break;
940 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
941 		if (!test_kvm_facility(kvm, 76)) {
942 			mutex_unlock(&kvm->lock);
943 			return -EINVAL;
944 		}
945 		get_random_bytes(
946 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
947 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
948 		kvm->arch.crypto.dea_kw = 1;
949 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
950 		break;
951 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
952 		if (!test_kvm_facility(kvm, 76)) {
953 			mutex_unlock(&kvm->lock);
954 			return -EINVAL;
955 		}
956 		kvm->arch.crypto.aes_kw = 0;
957 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
958 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
959 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
960 		break;
961 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
962 		if (!test_kvm_facility(kvm, 76)) {
963 			mutex_unlock(&kvm->lock);
964 			return -EINVAL;
965 		}
966 		kvm->arch.crypto.dea_kw = 0;
967 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
968 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
969 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
970 		break;
971 	case KVM_S390_VM_CRYPTO_ENABLE_APIE:
972 		if (!ap_instructions_available()) {
973 			mutex_unlock(&kvm->lock);
974 			return -EOPNOTSUPP;
975 		}
976 		kvm->arch.crypto.apie = 1;
977 		break;
978 	case KVM_S390_VM_CRYPTO_DISABLE_APIE:
979 		if (!ap_instructions_available()) {
980 			mutex_unlock(&kvm->lock);
981 			return -EOPNOTSUPP;
982 		}
983 		kvm->arch.crypto.apie = 0;
984 		break;
985 	default:
986 		mutex_unlock(&kvm->lock);
987 		return -ENXIO;
988 	}
989 
990 	kvm_s390_vcpu_crypto_reset_all(kvm);
991 	mutex_unlock(&kvm->lock);
992 	return 0;
993 }
994 
kvm_s390_sync_request_broadcast(struct kvm * kvm,int req)995 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
996 {
997 	int cx;
998 	struct kvm_vcpu *vcpu;
999 
1000 	kvm_for_each_vcpu(cx, vcpu, kvm)
1001 		kvm_s390_sync_request(req, vcpu);
1002 }
1003 
1004 /*
1005  * Must be called with kvm->srcu held to avoid races on memslots, and with
1006  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1007  */
kvm_s390_vm_start_migration(struct kvm * kvm)1008 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1009 {
1010 	struct kvm_memory_slot *ms;
1011 	struct kvm_memslots *slots;
1012 	unsigned long ram_pages = 0;
1013 	int slotnr;
1014 
1015 	/* migration mode already enabled */
1016 	if (kvm->arch.migration_mode)
1017 		return 0;
1018 	slots = kvm_memslots(kvm);
1019 	if (!slots || !slots->used_slots)
1020 		return -EINVAL;
1021 
1022 	if (!kvm->arch.use_cmma) {
1023 		kvm->arch.migration_mode = 1;
1024 		return 0;
1025 	}
1026 	/* mark all the pages in active slots as dirty */
1027 	for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1028 		ms = slots->memslots + slotnr;
1029 		if (!ms->dirty_bitmap)
1030 			return -EINVAL;
1031 		/*
1032 		 * The second half of the bitmap is only used on x86,
1033 		 * and would be wasted otherwise, so we put it to good
1034 		 * use here to keep track of the state of the storage
1035 		 * attributes.
1036 		 */
1037 		memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1038 		ram_pages += ms->npages;
1039 	}
1040 	atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1041 	kvm->arch.migration_mode = 1;
1042 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1043 	return 0;
1044 }
1045 
1046 /*
1047  * Must be called with kvm->slots_lock to avoid races with ourselves and
1048  * kvm_s390_vm_start_migration.
1049  */
kvm_s390_vm_stop_migration(struct kvm * kvm)1050 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1051 {
1052 	/* migration mode already disabled */
1053 	if (!kvm->arch.migration_mode)
1054 		return 0;
1055 	kvm->arch.migration_mode = 0;
1056 	if (kvm->arch.use_cmma)
1057 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1058 	return 0;
1059 }
1060 
kvm_s390_vm_set_migration(struct kvm * kvm,struct kvm_device_attr * attr)1061 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1062 				     struct kvm_device_attr *attr)
1063 {
1064 	int res = -ENXIO;
1065 
1066 	mutex_lock(&kvm->slots_lock);
1067 	switch (attr->attr) {
1068 	case KVM_S390_VM_MIGRATION_START:
1069 		res = kvm_s390_vm_start_migration(kvm);
1070 		break;
1071 	case KVM_S390_VM_MIGRATION_STOP:
1072 		res = kvm_s390_vm_stop_migration(kvm);
1073 		break;
1074 	default:
1075 		break;
1076 	}
1077 	mutex_unlock(&kvm->slots_lock);
1078 
1079 	return res;
1080 }
1081 
kvm_s390_vm_get_migration(struct kvm * kvm,struct kvm_device_attr * attr)1082 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1083 				     struct kvm_device_attr *attr)
1084 {
1085 	u64 mig = kvm->arch.migration_mode;
1086 
1087 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1088 		return -ENXIO;
1089 
1090 	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1091 		return -EFAULT;
1092 	return 0;
1093 }
1094 
1095 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod);
1096 
kvm_s390_set_tod_ext(struct kvm * kvm,struct kvm_device_attr * attr)1097 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1098 {
1099 	struct kvm_s390_vm_tod_clock gtod;
1100 
1101 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1102 		return -EFAULT;
1103 
1104 	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1105 		return -EINVAL;
1106 	__kvm_s390_set_tod_clock(kvm, &gtod);
1107 
1108 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1109 		gtod.epoch_idx, gtod.tod);
1110 
1111 	return 0;
1112 }
1113 
kvm_s390_set_tod_high(struct kvm * kvm,struct kvm_device_attr * attr)1114 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1115 {
1116 	u8 gtod_high;
1117 
1118 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1119 					   sizeof(gtod_high)))
1120 		return -EFAULT;
1121 
1122 	if (gtod_high != 0)
1123 		return -EINVAL;
1124 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1125 
1126 	return 0;
1127 }
1128 
kvm_s390_set_tod_low(struct kvm * kvm,struct kvm_device_attr * attr)1129 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1130 {
1131 	struct kvm_s390_vm_tod_clock gtod = { 0 };
1132 
1133 	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1134 			   sizeof(gtod.tod)))
1135 		return -EFAULT;
1136 
1137 	__kvm_s390_set_tod_clock(kvm, &gtod);
1138 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1139 	return 0;
1140 }
1141 
kvm_s390_set_tod(struct kvm * kvm,struct kvm_device_attr * attr)1142 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1143 {
1144 	int ret;
1145 
1146 	if (attr->flags)
1147 		return -EINVAL;
1148 
1149 	mutex_lock(&kvm->lock);
1150 	/*
1151 	 * For protected guests, the TOD is managed by the ultravisor, so trying
1152 	 * to change it will never bring the expected results.
1153 	 */
1154 	if (kvm_s390_pv_is_protected(kvm)) {
1155 		ret = -EOPNOTSUPP;
1156 		goto out_unlock;
1157 	}
1158 
1159 	switch (attr->attr) {
1160 	case KVM_S390_VM_TOD_EXT:
1161 		ret = kvm_s390_set_tod_ext(kvm, attr);
1162 		break;
1163 	case KVM_S390_VM_TOD_HIGH:
1164 		ret = kvm_s390_set_tod_high(kvm, attr);
1165 		break;
1166 	case KVM_S390_VM_TOD_LOW:
1167 		ret = kvm_s390_set_tod_low(kvm, attr);
1168 		break;
1169 	default:
1170 		ret = -ENXIO;
1171 		break;
1172 	}
1173 
1174 out_unlock:
1175 	mutex_unlock(&kvm->lock);
1176 	return ret;
1177 }
1178 
kvm_s390_get_tod_clock(struct kvm * kvm,struct kvm_s390_vm_tod_clock * gtod)1179 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1180 				   struct kvm_s390_vm_tod_clock *gtod)
1181 {
1182 	struct kvm_s390_tod_clock_ext htod;
1183 
1184 	preempt_disable();
1185 
1186 	get_tod_clock_ext((char *)&htod);
1187 
1188 	gtod->tod = htod.tod + kvm->arch.epoch;
1189 	gtod->epoch_idx = 0;
1190 	if (test_kvm_facility(kvm, 139)) {
1191 		gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1192 		if (gtod->tod < htod.tod)
1193 			gtod->epoch_idx += 1;
1194 	}
1195 
1196 	preempt_enable();
1197 }
1198 
kvm_s390_get_tod_ext(struct kvm * kvm,struct kvm_device_attr * attr)1199 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1200 {
1201 	struct kvm_s390_vm_tod_clock gtod;
1202 
1203 	memset(&gtod, 0, sizeof(gtod));
1204 	kvm_s390_get_tod_clock(kvm, &gtod);
1205 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1206 		return -EFAULT;
1207 
1208 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1209 		gtod.epoch_idx, gtod.tod);
1210 	return 0;
1211 }
1212 
kvm_s390_get_tod_high(struct kvm * kvm,struct kvm_device_attr * attr)1213 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1214 {
1215 	u8 gtod_high = 0;
1216 
1217 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
1218 					 sizeof(gtod_high)))
1219 		return -EFAULT;
1220 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1221 
1222 	return 0;
1223 }
1224 
kvm_s390_get_tod_low(struct kvm * kvm,struct kvm_device_attr * attr)1225 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1226 {
1227 	u64 gtod;
1228 
1229 	gtod = kvm_s390_get_tod_clock_fast(kvm);
1230 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1231 		return -EFAULT;
1232 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1233 
1234 	return 0;
1235 }
1236 
kvm_s390_get_tod(struct kvm * kvm,struct kvm_device_attr * attr)1237 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1238 {
1239 	int ret;
1240 
1241 	if (attr->flags)
1242 		return -EINVAL;
1243 
1244 	switch (attr->attr) {
1245 	case KVM_S390_VM_TOD_EXT:
1246 		ret = kvm_s390_get_tod_ext(kvm, attr);
1247 		break;
1248 	case KVM_S390_VM_TOD_HIGH:
1249 		ret = kvm_s390_get_tod_high(kvm, attr);
1250 		break;
1251 	case KVM_S390_VM_TOD_LOW:
1252 		ret = kvm_s390_get_tod_low(kvm, attr);
1253 		break;
1254 	default:
1255 		ret = -ENXIO;
1256 		break;
1257 	}
1258 	return ret;
1259 }
1260 
kvm_s390_set_processor(struct kvm * kvm,struct kvm_device_attr * attr)1261 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1262 {
1263 	struct kvm_s390_vm_cpu_processor *proc;
1264 	u16 lowest_ibc, unblocked_ibc;
1265 	int ret = 0;
1266 
1267 	mutex_lock(&kvm->lock);
1268 	if (kvm->created_vcpus) {
1269 		ret = -EBUSY;
1270 		goto out;
1271 	}
1272 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1273 	if (!proc) {
1274 		ret = -ENOMEM;
1275 		goto out;
1276 	}
1277 	if (!copy_from_user(proc, (void __user *)attr->addr,
1278 			    sizeof(*proc))) {
1279 		kvm->arch.model.cpuid = proc->cpuid;
1280 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1281 		unblocked_ibc = sclp.ibc & 0xfff;
1282 		if (lowest_ibc && proc->ibc) {
1283 			if (proc->ibc > unblocked_ibc)
1284 				kvm->arch.model.ibc = unblocked_ibc;
1285 			else if (proc->ibc < lowest_ibc)
1286 				kvm->arch.model.ibc = lowest_ibc;
1287 			else
1288 				kvm->arch.model.ibc = proc->ibc;
1289 		}
1290 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1291 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1292 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1293 			 kvm->arch.model.ibc,
1294 			 kvm->arch.model.cpuid);
1295 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1296 			 kvm->arch.model.fac_list[0],
1297 			 kvm->arch.model.fac_list[1],
1298 			 kvm->arch.model.fac_list[2]);
1299 	} else
1300 		ret = -EFAULT;
1301 	kfree(proc);
1302 out:
1303 	mutex_unlock(&kvm->lock);
1304 	return ret;
1305 }
1306 
kvm_s390_set_processor_feat(struct kvm * kvm,struct kvm_device_attr * attr)1307 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1308 				       struct kvm_device_attr *attr)
1309 {
1310 	struct kvm_s390_vm_cpu_feat data;
1311 
1312 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1313 		return -EFAULT;
1314 	if (!bitmap_subset((unsigned long *) data.feat,
1315 			   kvm_s390_available_cpu_feat,
1316 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1317 		return -EINVAL;
1318 
1319 	mutex_lock(&kvm->lock);
1320 	if (kvm->created_vcpus) {
1321 		mutex_unlock(&kvm->lock);
1322 		return -EBUSY;
1323 	}
1324 	bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1325 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1326 	mutex_unlock(&kvm->lock);
1327 	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1328 			 data.feat[0],
1329 			 data.feat[1],
1330 			 data.feat[2]);
1331 	return 0;
1332 }
1333 
kvm_s390_set_processor_subfunc(struct kvm * kvm,struct kvm_device_attr * attr)1334 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1335 					  struct kvm_device_attr *attr)
1336 {
1337 	mutex_lock(&kvm->lock);
1338 	if (kvm->created_vcpus) {
1339 		mutex_unlock(&kvm->lock);
1340 		return -EBUSY;
1341 	}
1342 
1343 	if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1344 			   sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1345 		mutex_unlock(&kvm->lock);
1346 		return -EFAULT;
1347 	}
1348 	mutex_unlock(&kvm->lock);
1349 
1350 	VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1351 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1352 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1353 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1354 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1355 	VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1356 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1357 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1358 	VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1359 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1360 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1361 	VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1362 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1363 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1364 	VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1365 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1366 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1367 	VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1368 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1369 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1370 	VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1371 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1372 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1373 	VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1374 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1375 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1376 	VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1377 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1378 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1379 	VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1380 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1381 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1382 	VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1383 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1384 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1385 	VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1386 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1387 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1388 	VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1389 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1390 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1391 	VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1392 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1393 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1394 	VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1395 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1396 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1397 	VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1398 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1399 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1400 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1401 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1402 	VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1403 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1404 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1405 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1406 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1407 
1408 	return 0;
1409 }
1410 
kvm_s390_set_cpu_model(struct kvm * kvm,struct kvm_device_attr * attr)1411 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1412 {
1413 	int ret = -ENXIO;
1414 
1415 	switch (attr->attr) {
1416 	case KVM_S390_VM_CPU_PROCESSOR:
1417 		ret = kvm_s390_set_processor(kvm, attr);
1418 		break;
1419 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1420 		ret = kvm_s390_set_processor_feat(kvm, attr);
1421 		break;
1422 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1423 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1424 		break;
1425 	}
1426 	return ret;
1427 }
1428 
kvm_s390_get_processor(struct kvm * kvm,struct kvm_device_attr * attr)1429 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1430 {
1431 	struct kvm_s390_vm_cpu_processor *proc;
1432 	int ret = 0;
1433 
1434 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1435 	if (!proc) {
1436 		ret = -ENOMEM;
1437 		goto out;
1438 	}
1439 	proc->cpuid = kvm->arch.model.cpuid;
1440 	proc->ibc = kvm->arch.model.ibc;
1441 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1442 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1443 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1444 		 kvm->arch.model.ibc,
1445 		 kvm->arch.model.cpuid);
1446 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1447 		 kvm->arch.model.fac_list[0],
1448 		 kvm->arch.model.fac_list[1],
1449 		 kvm->arch.model.fac_list[2]);
1450 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1451 		ret = -EFAULT;
1452 	kfree(proc);
1453 out:
1454 	return ret;
1455 }
1456 
kvm_s390_get_machine(struct kvm * kvm,struct kvm_device_attr * attr)1457 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1458 {
1459 	struct kvm_s390_vm_cpu_machine *mach;
1460 	int ret = 0;
1461 
1462 	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1463 	if (!mach) {
1464 		ret = -ENOMEM;
1465 		goto out;
1466 	}
1467 	get_cpu_id((struct cpuid *) &mach->cpuid);
1468 	mach->ibc = sclp.ibc;
1469 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1470 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1471 	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1472 	       sizeof(S390_lowcore.stfle_fac_list));
1473 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1474 		 kvm->arch.model.ibc,
1475 		 kvm->arch.model.cpuid);
1476 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1477 		 mach->fac_mask[0],
1478 		 mach->fac_mask[1],
1479 		 mach->fac_mask[2]);
1480 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1481 		 mach->fac_list[0],
1482 		 mach->fac_list[1],
1483 		 mach->fac_list[2]);
1484 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1485 		ret = -EFAULT;
1486 	kfree(mach);
1487 out:
1488 	return ret;
1489 }
1490 
kvm_s390_get_processor_feat(struct kvm * kvm,struct kvm_device_attr * attr)1491 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1492 				       struct kvm_device_attr *attr)
1493 {
1494 	struct kvm_s390_vm_cpu_feat data;
1495 
1496 	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1497 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1498 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1499 		return -EFAULT;
1500 	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1501 			 data.feat[0],
1502 			 data.feat[1],
1503 			 data.feat[2]);
1504 	return 0;
1505 }
1506 
kvm_s390_get_machine_feat(struct kvm * kvm,struct kvm_device_attr * attr)1507 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1508 				     struct kvm_device_attr *attr)
1509 {
1510 	struct kvm_s390_vm_cpu_feat data;
1511 
1512 	bitmap_copy((unsigned long *) data.feat,
1513 		    kvm_s390_available_cpu_feat,
1514 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1515 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1516 		return -EFAULT;
1517 	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1518 			 data.feat[0],
1519 			 data.feat[1],
1520 			 data.feat[2]);
1521 	return 0;
1522 }
1523 
kvm_s390_get_processor_subfunc(struct kvm * kvm,struct kvm_device_attr * attr)1524 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1525 					  struct kvm_device_attr *attr)
1526 {
1527 	if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1528 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1529 		return -EFAULT;
1530 
1531 	VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1532 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1533 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1534 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1535 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1536 	VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1537 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1538 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1539 	VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1540 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1541 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1542 	VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1543 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1544 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1545 	VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1546 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1547 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1548 	VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1549 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1550 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1551 	VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1552 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1553 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1554 	VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1555 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1556 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1557 	VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1558 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1559 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1560 	VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1561 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1562 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1563 	VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1564 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1565 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1566 	VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1567 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1568 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1569 	VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1570 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1571 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1572 	VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1573 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1574 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1575 	VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1576 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1577 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1578 	VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1579 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1580 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1581 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1582 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1583 	VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1584 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1585 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1586 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1587 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1588 
1589 	return 0;
1590 }
1591 
kvm_s390_get_machine_subfunc(struct kvm * kvm,struct kvm_device_attr * attr)1592 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1593 					struct kvm_device_attr *attr)
1594 {
1595 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1596 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1597 		return -EFAULT;
1598 
1599 	VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1600 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1601 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1602 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1603 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1604 	VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1605 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1606 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1607 	VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1608 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1609 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1610 	VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1611 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1612 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1613 	VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1614 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1615 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1616 	VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1617 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1618 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1619 	VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1620 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1621 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1622 	VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1623 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1624 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1625 	VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1626 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1627 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1628 	VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1629 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1630 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1631 	VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1632 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1633 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1634 	VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1635 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1636 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1637 	VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1638 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1639 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1640 	VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1641 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1642 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1643 	VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1644 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1645 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1646 	VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1647 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1648 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1649 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1650 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1651 	VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1652 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1653 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1654 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1655 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1656 
1657 	return 0;
1658 }
1659 
kvm_s390_get_cpu_model(struct kvm * kvm,struct kvm_device_attr * attr)1660 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1661 {
1662 	int ret = -ENXIO;
1663 
1664 	switch (attr->attr) {
1665 	case KVM_S390_VM_CPU_PROCESSOR:
1666 		ret = kvm_s390_get_processor(kvm, attr);
1667 		break;
1668 	case KVM_S390_VM_CPU_MACHINE:
1669 		ret = kvm_s390_get_machine(kvm, attr);
1670 		break;
1671 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1672 		ret = kvm_s390_get_processor_feat(kvm, attr);
1673 		break;
1674 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1675 		ret = kvm_s390_get_machine_feat(kvm, attr);
1676 		break;
1677 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1678 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1679 		break;
1680 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1681 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1682 		break;
1683 	}
1684 	return ret;
1685 }
1686 
kvm_s390_vm_set_attr(struct kvm * kvm,struct kvm_device_attr * attr)1687 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1688 {
1689 	int ret;
1690 
1691 	switch (attr->group) {
1692 	case KVM_S390_VM_MEM_CTRL:
1693 		ret = kvm_s390_set_mem_control(kvm, attr);
1694 		break;
1695 	case KVM_S390_VM_TOD:
1696 		ret = kvm_s390_set_tod(kvm, attr);
1697 		break;
1698 	case KVM_S390_VM_CPU_MODEL:
1699 		ret = kvm_s390_set_cpu_model(kvm, attr);
1700 		break;
1701 	case KVM_S390_VM_CRYPTO:
1702 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1703 		break;
1704 	case KVM_S390_VM_MIGRATION:
1705 		ret = kvm_s390_vm_set_migration(kvm, attr);
1706 		break;
1707 	default:
1708 		ret = -ENXIO;
1709 		break;
1710 	}
1711 
1712 	return ret;
1713 }
1714 
kvm_s390_vm_get_attr(struct kvm * kvm,struct kvm_device_attr * attr)1715 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1716 {
1717 	int ret;
1718 
1719 	switch (attr->group) {
1720 	case KVM_S390_VM_MEM_CTRL:
1721 		ret = kvm_s390_get_mem_control(kvm, attr);
1722 		break;
1723 	case KVM_S390_VM_TOD:
1724 		ret = kvm_s390_get_tod(kvm, attr);
1725 		break;
1726 	case KVM_S390_VM_CPU_MODEL:
1727 		ret = kvm_s390_get_cpu_model(kvm, attr);
1728 		break;
1729 	case KVM_S390_VM_MIGRATION:
1730 		ret = kvm_s390_vm_get_migration(kvm, attr);
1731 		break;
1732 	default:
1733 		ret = -ENXIO;
1734 		break;
1735 	}
1736 
1737 	return ret;
1738 }
1739 
kvm_s390_vm_has_attr(struct kvm * kvm,struct kvm_device_attr * attr)1740 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1741 {
1742 	int ret;
1743 
1744 	switch (attr->group) {
1745 	case KVM_S390_VM_MEM_CTRL:
1746 		switch (attr->attr) {
1747 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1748 		case KVM_S390_VM_MEM_CLR_CMMA:
1749 			ret = sclp.has_cmma ? 0 : -ENXIO;
1750 			break;
1751 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1752 			ret = 0;
1753 			break;
1754 		default:
1755 			ret = -ENXIO;
1756 			break;
1757 		}
1758 		break;
1759 	case KVM_S390_VM_TOD:
1760 		switch (attr->attr) {
1761 		case KVM_S390_VM_TOD_LOW:
1762 		case KVM_S390_VM_TOD_HIGH:
1763 			ret = 0;
1764 			break;
1765 		default:
1766 			ret = -ENXIO;
1767 			break;
1768 		}
1769 		break;
1770 	case KVM_S390_VM_CPU_MODEL:
1771 		switch (attr->attr) {
1772 		case KVM_S390_VM_CPU_PROCESSOR:
1773 		case KVM_S390_VM_CPU_MACHINE:
1774 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1775 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1776 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1777 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1778 			ret = 0;
1779 			break;
1780 		default:
1781 			ret = -ENXIO;
1782 			break;
1783 		}
1784 		break;
1785 	case KVM_S390_VM_CRYPTO:
1786 		switch (attr->attr) {
1787 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1788 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1789 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1790 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1791 			ret = 0;
1792 			break;
1793 		case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1794 		case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1795 			ret = ap_instructions_available() ? 0 : -ENXIO;
1796 			break;
1797 		default:
1798 			ret = -ENXIO;
1799 			break;
1800 		}
1801 		break;
1802 	case KVM_S390_VM_MIGRATION:
1803 		ret = 0;
1804 		break;
1805 	default:
1806 		ret = -ENXIO;
1807 		break;
1808 	}
1809 
1810 	return ret;
1811 }
1812 
kvm_s390_get_skeys(struct kvm * kvm,struct kvm_s390_skeys * args)1813 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1814 {
1815 	uint8_t *keys;
1816 	uint64_t hva;
1817 	int srcu_idx, i, r = 0;
1818 
1819 	if (args->flags != 0)
1820 		return -EINVAL;
1821 
1822 	/* Is this guest using storage keys? */
1823 	if (!mm_uses_skeys(current->mm))
1824 		return KVM_S390_GET_SKEYS_NONE;
1825 
1826 	/* Enforce sane limit on memory allocation */
1827 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1828 		return -EINVAL;
1829 
1830 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1831 	if (!keys)
1832 		return -ENOMEM;
1833 
1834 	mmap_read_lock(current->mm);
1835 	srcu_idx = srcu_read_lock(&kvm->srcu);
1836 	for (i = 0; i < args->count; i++) {
1837 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1838 		if (kvm_is_error_hva(hva)) {
1839 			r = -EFAULT;
1840 			break;
1841 		}
1842 
1843 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1844 		if (r)
1845 			break;
1846 	}
1847 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1848 	mmap_read_unlock(current->mm);
1849 
1850 	if (!r) {
1851 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1852 				 sizeof(uint8_t) * args->count);
1853 		if (r)
1854 			r = -EFAULT;
1855 	}
1856 
1857 	kvfree(keys);
1858 	return r;
1859 }
1860 
kvm_s390_set_skeys(struct kvm * kvm,struct kvm_s390_skeys * args)1861 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1862 {
1863 	uint8_t *keys;
1864 	uint64_t hva;
1865 	int srcu_idx, i, r = 0;
1866 	bool unlocked;
1867 
1868 	if (args->flags != 0)
1869 		return -EINVAL;
1870 
1871 	/* Enforce sane limit on memory allocation */
1872 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1873 		return -EINVAL;
1874 
1875 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1876 	if (!keys)
1877 		return -ENOMEM;
1878 
1879 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1880 			   sizeof(uint8_t) * args->count);
1881 	if (r) {
1882 		r = -EFAULT;
1883 		goto out;
1884 	}
1885 
1886 	/* Enable storage key handling for the guest */
1887 	r = s390_enable_skey();
1888 	if (r)
1889 		goto out;
1890 
1891 	i = 0;
1892 	mmap_read_lock(current->mm);
1893 	srcu_idx = srcu_read_lock(&kvm->srcu);
1894         while (i < args->count) {
1895 		unlocked = false;
1896 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1897 		if (kvm_is_error_hva(hva)) {
1898 			r = -EFAULT;
1899 			break;
1900 		}
1901 
1902 		/* Lowest order bit is reserved */
1903 		if (keys[i] & 0x01) {
1904 			r = -EINVAL;
1905 			break;
1906 		}
1907 
1908 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1909 		if (r) {
1910 			r = fixup_user_fault(current->mm, hva,
1911 					     FAULT_FLAG_WRITE, &unlocked);
1912 			if (r)
1913 				break;
1914 		}
1915 		if (!r)
1916 			i++;
1917 	}
1918 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1919 	mmap_read_unlock(current->mm);
1920 out:
1921 	kvfree(keys);
1922 	return r;
1923 }
1924 
1925 /*
1926  * Base address and length must be sent at the start of each block, therefore
1927  * it's cheaper to send some clean data, as long as it's less than the size of
1928  * two longs.
1929  */
1930 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1931 /* for consistency */
1932 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1933 
1934 /*
1935  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1936  * address falls in a hole. In that case the index of one of the memslots
1937  * bordering the hole is returned.
1938  */
gfn_to_memslot_approx(struct kvm_memslots * slots,gfn_t gfn)1939 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1940 {
1941 	int start = 0, end = slots->used_slots;
1942 	int slot = atomic_read(&slots->lru_slot);
1943 	struct kvm_memory_slot *memslots = slots->memslots;
1944 
1945 	if (gfn >= memslots[slot].base_gfn &&
1946 	    gfn < memslots[slot].base_gfn + memslots[slot].npages)
1947 		return slot;
1948 
1949 	while (start < end) {
1950 		slot = start + (end - start) / 2;
1951 
1952 		if (gfn >= memslots[slot].base_gfn)
1953 			end = slot;
1954 		else
1955 			start = slot + 1;
1956 	}
1957 
1958 	if (start >= slots->used_slots)
1959 		return slots->used_slots - 1;
1960 
1961 	if (gfn >= memslots[start].base_gfn &&
1962 	    gfn < memslots[start].base_gfn + memslots[start].npages) {
1963 		atomic_set(&slots->lru_slot, start);
1964 	}
1965 
1966 	return start;
1967 }
1968 
kvm_s390_peek_cmma(struct kvm * kvm,struct kvm_s390_cmma_log * args,u8 * res,unsigned long bufsize)1969 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1970 			      u8 *res, unsigned long bufsize)
1971 {
1972 	unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1973 
1974 	args->count = 0;
1975 	while (args->count < bufsize) {
1976 		hva = gfn_to_hva(kvm, cur_gfn);
1977 		/*
1978 		 * We return an error if the first value was invalid, but we
1979 		 * return successfully if at least one value was copied.
1980 		 */
1981 		if (kvm_is_error_hva(hva))
1982 			return args->count ? 0 : -EFAULT;
1983 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1984 			pgstev = 0;
1985 		res[args->count++] = (pgstev >> 24) & 0x43;
1986 		cur_gfn++;
1987 	}
1988 
1989 	return 0;
1990 }
1991 
kvm_s390_next_dirty_cmma(struct kvm_memslots * slots,unsigned long cur_gfn)1992 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1993 					      unsigned long cur_gfn)
1994 {
1995 	int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1996 	struct kvm_memory_slot *ms = slots->memslots + slotidx;
1997 	unsigned long ofs = cur_gfn - ms->base_gfn;
1998 
1999 	if (ms->base_gfn + ms->npages <= cur_gfn) {
2000 		slotidx--;
2001 		/* If we are above the highest slot, wrap around */
2002 		if (slotidx < 0)
2003 			slotidx = slots->used_slots - 1;
2004 
2005 		ms = slots->memslots + slotidx;
2006 		ofs = 0;
2007 	}
2008 
2009 	if (cur_gfn < ms->base_gfn)
2010 		ofs = 0;
2011 
2012 	ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
2013 	while ((slotidx > 0) && (ofs >= ms->npages)) {
2014 		slotidx--;
2015 		ms = slots->memslots + slotidx;
2016 		ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
2017 	}
2018 	return ms->base_gfn + ofs;
2019 }
2020 
kvm_s390_get_cmma(struct kvm * kvm,struct kvm_s390_cmma_log * args,u8 * res,unsigned long bufsize)2021 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2022 			     u8 *res, unsigned long bufsize)
2023 {
2024 	unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2025 	struct kvm_memslots *slots = kvm_memslots(kvm);
2026 	struct kvm_memory_slot *ms;
2027 
2028 	if (unlikely(!slots->used_slots))
2029 		return 0;
2030 
2031 	cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2032 	ms = gfn_to_memslot(kvm, cur_gfn);
2033 	args->count = 0;
2034 	args->start_gfn = cur_gfn;
2035 	if (!ms)
2036 		return 0;
2037 	next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2038 	mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2039 
2040 	while (args->count < bufsize) {
2041 		hva = gfn_to_hva(kvm, cur_gfn);
2042 		if (kvm_is_error_hva(hva))
2043 			return 0;
2044 		/* Decrement only if we actually flipped the bit to 0 */
2045 		if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2046 			atomic64_dec(&kvm->arch.cmma_dirty_pages);
2047 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2048 			pgstev = 0;
2049 		/* Save the value */
2050 		res[args->count++] = (pgstev >> 24) & 0x43;
2051 		/* If the next bit is too far away, stop. */
2052 		if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2053 			return 0;
2054 		/* If we reached the previous "next", find the next one */
2055 		if (cur_gfn == next_gfn)
2056 			next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2057 		/* Reached the end of memory or of the buffer, stop */
2058 		if ((next_gfn >= mem_end) ||
2059 		    (next_gfn - args->start_gfn >= bufsize))
2060 			return 0;
2061 		cur_gfn++;
2062 		/* Reached the end of the current memslot, take the next one. */
2063 		if (cur_gfn - ms->base_gfn >= ms->npages) {
2064 			ms = gfn_to_memslot(kvm, cur_gfn);
2065 			if (!ms)
2066 				return 0;
2067 		}
2068 	}
2069 	return 0;
2070 }
2071 
2072 /*
2073  * This function searches for the next page with dirty CMMA attributes, and
2074  * saves the attributes in the buffer up to either the end of the buffer or
2075  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2076  * no trailing clean bytes are saved.
2077  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2078  * output buffer will indicate 0 as length.
2079  */
kvm_s390_get_cmma_bits(struct kvm * kvm,struct kvm_s390_cmma_log * args)2080 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2081 				  struct kvm_s390_cmma_log *args)
2082 {
2083 	unsigned long bufsize;
2084 	int srcu_idx, peek, ret;
2085 	u8 *values;
2086 
2087 	if (!kvm->arch.use_cmma)
2088 		return -ENXIO;
2089 	/* Invalid/unsupported flags were specified */
2090 	if (args->flags & ~KVM_S390_CMMA_PEEK)
2091 		return -EINVAL;
2092 	/* Migration mode query, and we are not doing a migration */
2093 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2094 	if (!peek && !kvm->arch.migration_mode)
2095 		return -EINVAL;
2096 	/* CMMA is disabled or was not used, or the buffer has length zero */
2097 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2098 	if (!bufsize || !kvm->mm->context.uses_cmm) {
2099 		memset(args, 0, sizeof(*args));
2100 		return 0;
2101 	}
2102 	/* We are not peeking, and there are no dirty pages */
2103 	if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2104 		memset(args, 0, sizeof(*args));
2105 		return 0;
2106 	}
2107 
2108 	values = vmalloc(bufsize);
2109 	if (!values)
2110 		return -ENOMEM;
2111 
2112 	mmap_read_lock(kvm->mm);
2113 	srcu_idx = srcu_read_lock(&kvm->srcu);
2114 	if (peek)
2115 		ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2116 	else
2117 		ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2118 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2119 	mmap_read_unlock(kvm->mm);
2120 
2121 	if (kvm->arch.migration_mode)
2122 		args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2123 	else
2124 		args->remaining = 0;
2125 
2126 	if (copy_to_user((void __user *)args->values, values, args->count))
2127 		ret = -EFAULT;
2128 
2129 	vfree(values);
2130 	return ret;
2131 }
2132 
2133 /*
2134  * This function sets the CMMA attributes for the given pages. If the input
2135  * buffer has zero length, no action is taken, otherwise the attributes are
2136  * set and the mm->context.uses_cmm flag is set.
2137  */
kvm_s390_set_cmma_bits(struct kvm * kvm,const struct kvm_s390_cmma_log * args)2138 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2139 				  const struct kvm_s390_cmma_log *args)
2140 {
2141 	unsigned long hva, mask, pgstev, i;
2142 	uint8_t *bits;
2143 	int srcu_idx, r = 0;
2144 
2145 	mask = args->mask;
2146 
2147 	if (!kvm->arch.use_cmma)
2148 		return -ENXIO;
2149 	/* invalid/unsupported flags */
2150 	if (args->flags != 0)
2151 		return -EINVAL;
2152 	/* Enforce sane limit on memory allocation */
2153 	if (args->count > KVM_S390_CMMA_SIZE_MAX)
2154 		return -EINVAL;
2155 	/* Nothing to do */
2156 	if (args->count == 0)
2157 		return 0;
2158 
2159 	bits = vmalloc(array_size(sizeof(*bits), args->count));
2160 	if (!bits)
2161 		return -ENOMEM;
2162 
2163 	r = copy_from_user(bits, (void __user *)args->values, args->count);
2164 	if (r) {
2165 		r = -EFAULT;
2166 		goto out;
2167 	}
2168 
2169 	mmap_read_lock(kvm->mm);
2170 	srcu_idx = srcu_read_lock(&kvm->srcu);
2171 	for (i = 0; i < args->count; i++) {
2172 		hva = gfn_to_hva(kvm, args->start_gfn + i);
2173 		if (kvm_is_error_hva(hva)) {
2174 			r = -EFAULT;
2175 			break;
2176 		}
2177 
2178 		pgstev = bits[i];
2179 		pgstev = pgstev << 24;
2180 		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2181 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
2182 	}
2183 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2184 	mmap_read_unlock(kvm->mm);
2185 
2186 	if (!kvm->mm->context.uses_cmm) {
2187 		mmap_write_lock(kvm->mm);
2188 		kvm->mm->context.uses_cmm = 1;
2189 		mmap_write_unlock(kvm->mm);
2190 	}
2191 out:
2192 	vfree(bits);
2193 	return r;
2194 }
2195 
kvm_s390_cpus_from_pv(struct kvm * kvm,u16 * rcp,u16 * rrcp)2196 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2197 {
2198 	struct kvm_vcpu *vcpu;
2199 	u16 rc, rrc;
2200 	int ret = 0;
2201 	int i;
2202 
2203 	/*
2204 	 * We ignore failures and try to destroy as many CPUs as possible.
2205 	 * At the same time we must not free the assigned resources when
2206 	 * this fails, as the ultravisor has still access to that memory.
2207 	 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2208 	 * behind.
2209 	 * We want to return the first failure rc and rrc, though.
2210 	 */
2211 	kvm_for_each_vcpu(i, vcpu, kvm) {
2212 		mutex_lock(&vcpu->mutex);
2213 		if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2214 			*rcp = rc;
2215 			*rrcp = rrc;
2216 			ret = -EIO;
2217 		}
2218 		mutex_unlock(&vcpu->mutex);
2219 	}
2220 	return ret;
2221 }
2222 
kvm_s390_cpus_to_pv(struct kvm * kvm,u16 * rc,u16 * rrc)2223 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2224 {
2225 	int i, r = 0;
2226 	u16 dummy;
2227 
2228 	struct kvm_vcpu *vcpu;
2229 
2230 	kvm_for_each_vcpu(i, vcpu, kvm) {
2231 		mutex_lock(&vcpu->mutex);
2232 		r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2233 		mutex_unlock(&vcpu->mutex);
2234 		if (r)
2235 			break;
2236 	}
2237 	if (r)
2238 		kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2239 	return r;
2240 }
2241 
kvm_s390_handle_pv(struct kvm * kvm,struct kvm_pv_cmd * cmd)2242 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2243 {
2244 	int r = 0;
2245 	u16 dummy;
2246 	void __user *argp = (void __user *)cmd->data;
2247 
2248 	switch (cmd->cmd) {
2249 	case KVM_PV_ENABLE: {
2250 		r = -EINVAL;
2251 		if (kvm_s390_pv_is_protected(kvm))
2252 			break;
2253 
2254 		/*
2255 		 *  FMT 4 SIE needs esca. As we never switch back to bsca from
2256 		 *  esca, we need no cleanup in the error cases below
2257 		 */
2258 		r = sca_switch_to_extended(kvm);
2259 		if (r)
2260 			break;
2261 
2262 		mmap_write_lock(current->mm);
2263 		r = gmap_mark_unmergeable();
2264 		mmap_write_unlock(current->mm);
2265 		if (r)
2266 			break;
2267 
2268 		r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2269 		if (r)
2270 			break;
2271 
2272 		r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2273 		if (r)
2274 			kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2275 
2276 		/* we need to block service interrupts from now on */
2277 		set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2278 		break;
2279 	}
2280 	case KVM_PV_DISABLE: {
2281 		r = -EINVAL;
2282 		if (!kvm_s390_pv_is_protected(kvm))
2283 			break;
2284 
2285 		r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2286 		/*
2287 		 * If a CPU could not be destroyed, destroy VM will also fail.
2288 		 * There is no point in trying to destroy it. Instead return
2289 		 * the rc and rrc from the first CPU that failed destroying.
2290 		 */
2291 		if (r)
2292 			break;
2293 		r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2294 
2295 		/* no need to block service interrupts any more */
2296 		clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2297 		break;
2298 	}
2299 	case KVM_PV_SET_SEC_PARMS: {
2300 		struct kvm_s390_pv_sec_parm parms = {};
2301 		void *hdr;
2302 
2303 		r = -EINVAL;
2304 		if (!kvm_s390_pv_is_protected(kvm))
2305 			break;
2306 
2307 		r = -EFAULT;
2308 		if (copy_from_user(&parms, argp, sizeof(parms)))
2309 			break;
2310 
2311 		/* Currently restricted to 8KB */
2312 		r = -EINVAL;
2313 		if (parms.length > PAGE_SIZE * 2)
2314 			break;
2315 
2316 		r = -ENOMEM;
2317 		hdr = vmalloc(parms.length);
2318 		if (!hdr)
2319 			break;
2320 
2321 		r = -EFAULT;
2322 		if (!copy_from_user(hdr, (void __user *)parms.origin,
2323 				    parms.length))
2324 			r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2325 						      &cmd->rc, &cmd->rrc);
2326 
2327 		vfree(hdr);
2328 		break;
2329 	}
2330 	case KVM_PV_UNPACK: {
2331 		struct kvm_s390_pv_unp unp = {};
2332 
2333 		r = -EINVAL;
2334 		if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2335 			break;
2336 
2337 		r = -EFAULT;
2338 		if (copy_from_user(&unp, argp, sizeof(unp)))
2339 			break;
2340 
2341 		r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2342 				       &cmd->rc, &cmd->rrc);
2343 		break;
2344 	}
2345 	case KVM_PV_VERIFY: {
2346 		r = -EINVAL;
2347 		if (!kvm_s390_pv_is_protected(kvm))
2348 			break;
2349 
2350 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2351 				  UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2352 		KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2353 			     cmd->rrc);
2354 		break;
2355 	}
2356 	case KVM_PV_PREP_RESET: {
2357 		r = -EINVAL;
2358 		if (!kvm_s390_pv_is_protected(kvm))
2359 			break;
2360 
2361 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2362 				  UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2363 		KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2364 			     cmd->rc, cmd->rrc);
2365 		break;
2366 	}
2367 	case KVM_PV_UNSHARE_ALL: {
2368 		r = -EINVAL;
2369 		if (!kvm_s390_pv_is_protected(kvm))
2370 			break;
2371 
2372 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2373 				  UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2374 		KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2375 			     cmd->rc, cmd->rrc);
2376 		break;
2377 	}
2378 	default:
2379 		r = -ENOTTY;
2380 	}
2381 	return r;
2382 }
2383 
kvm_arch_vm_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)2384 long kvm_arch_vm_ioctl(struct file *filp,
2385 		       unsigned int ioctl, unsigned long arg)
2386 {
2387 	struct kvm *kvm = filp->private_data;
2388 	void __user *argp = (void __user *)arg;
2389 	struct kvm_device_attr attr;
2390 	int r;
2391 
2392 	switch (ioctl) {
2393 	case KVM_S390_INTERRUPT: {
2394 		struct kvm_s390_interrupt s390int;
2395 
2396 		r = -EFAULT;
2397 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
2398 			break;
2399 		r = kvm_s390_inject_vm(kvm, &s390int);
2400 		break;
2401 	}
2402 	case KVM_CREATE_IRQCHIP: {
2403 		struct kvm_irq_routing_entry routing;
2404 
2405 		r = -EINVAL;
2406 		if (kvm->arch.use_irqchip) {
2407 			/* Set up dummy routing. */
2408 			memset(&routing, 0, sizeof(routing));
2409 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2410 		}
2411 		break;
2412 	}
2413 	case KVM_SET_DEVICE_ATTR: {
2414 		r = -EFAULT;
2415 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2416 			break;
2417 		r = kvm_s390_vm_set_attr(kvm, &attr);
2418 		break;
2419 	}
2420 	case KVM_GET_DEVICE_ATTR: {
2421 		r = -EFAULT;
2422 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2423 			break;
2424 		r = kvm_s390_vm_get_attr(kvm, &attr);
2425 		break;
2426 	}
2427 	case KVM_HAS_DEVICE_ATTR: {
2428 		r = -EFAULT;
2429 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2430 			break;
2431 		r = kvm_s390_vm_has_attr(kvm, &attr);
2432 		break;
2433 	}
2434 	case KVM_S390_GET_SKEYS: {
2435 		struct kvm_s390_skeys args;
2436 
2437 		r = -EFAULT;
2438 		if (copy_from_user(&args, argp,
2439 				   sizeof(struct kvm_s390_skeys)))
2440 			break;
2441 		r = kvm_s390_get_skeys(kvm, &args);
2442 		break;
2443 	}
2444 	case KVM_S390_SET_SKEYS: {
2445 		struct kvm_s390_skeys args;
2446 
2447 		r = -EFAULT;
2448 		if (copy_from_user(&args, argp,
2449 				   sizeof(struct kvm_s390_skeys)))
2450 			break;
2451 		r = kvm_s390_set_skeys(kvm, &args);
2452 		break;
2453 	}
2454 	case KVM_S390_GET_CMMA_BITS: {
2455 		struct kvm_s390_cmma_log args;
2456 
2457 		r = -EFAULT;
2458 		if (copy_from_user(&args, argp, sizeof(args)))
2459 			break;
2460 		mutex_lock(&kvm->slots_lock);
2461 		r = kvm_s390_get_cmma_bits(kvm, &args);
2462 		mutex_unlock(&kvm->slots_lock);
2463 		if (!r) {
2464 			r = copy_to_user(argp, &args, sizeof(args));
2465 			if (r)
2466 				r = -EFAULT;
2467 		}
2468 		break;
2469 	}
2470 	case KVM_S390_SET_CMMA_BITS: {
2471 		struct kvm_s390_cmma_log args;
2472 
2473 		r = -EFAULT;
2474 		if (copy_from_user(&args, argp, sizeof(args)))
2475 			break;
2476 		mutex_lock(&kvm->slots_lock);
2477 		r = kvm_s390_set_cmma_bits(kvm, &args);
2478 		mutex_unlock(&kvm->slots_lock);
2479 		break;
2480 	}
2481 	case KVM_S390_PV_COMMAND: {
2482 		struct kvm_pv_cmd args;
2483 
2484 		/* protvirt means user sigp */
2485 		kvm->arch.user_cpu_state_ctrl = 1;
2486 		r = 0;
2487 		if (!is_prot_virt_host()) {
2488 			r = -EINVAL;
2489 			break;
2490 		}
2491 		if (copy_from_user(&args, argp, sizeof(args))) {
2492 			r = -EFAULT;
2493 			break;
2494 		}
2495 		if (args.flags) {
2496 			r = -EINVAL;
2497 			break;
2498 		}
2499 		mutex_lock(&kvm->lock);
2500 		r = kvm_s390_handle_pv(kvm, &args);
2501 		mutex_unlock(&kvm->lock);
2502 		if (copy_to_user(argp, &args, sizeof(args))) {
2503 			r = -EFAULT;
2504 			break;
2505 		}
2506 		break;
2507 	}
2508 	default:
2509 		r = -ENOTTY;
2510 	}
2511 
2512 	return r;
2513 }
2514 
kvm_s390_apxa_installed(void)2515 static int kvm_s390_apxa_installed(void)
2516 {
2517 	struct ap_config_info info;
2518 
2519 	if (ap_instructions_available()) {
2520 		if (ap_qci(&info) == 0)
2521 			return info.apxa;
2522 	}
2523 
2524 	return 0;
2525 }
2526 
2527 /*
2528  * The format of the crypto control block (CRYCB) is specified in the 3 low
2529  * order bits of the CRYCB designation (CRYCBD) field as follows:
2530  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2531  *	     AP extended addressing (APXA) facility are installed.
2532  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2533  * Format 2: Both the APXA and MSAX3 facilities are installed
2534  */
kvm_s390_set_crycb_format(struct kvm * kvm)2535 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2536 {
2537 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2538 
2539 	/* Clear the CRYCB format bits - i.e., set format 0 by default */
2540 	kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2541 
2542 	/* Check whether MSAX3 is installed */
2543 	if (!test_kvm_facility(kvm, 76))
2544 		return;
2545 
2546 	if (kvm_s390_apxa_installed())
2547 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2548 	else
2549 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2550 }
2551 
kvm_arch_crypto_set_masks(struct kvm * kvm,unsigned long * apm,unsigned long * aqm,unsigned long * adm)2552 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2553 			       unsigned long *aqm, unsigned long *adm)
2554 {
2555 	struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2556 
2557 	mutex_lock(&kvm->lock);
2558 	kvm_s390_vcpu_block_all(kvm);
2559 
2560 	switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2561 	case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2562 		memcpy(crycb->apcb1.apm, apm, 32);
2563 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2564 			 apm[0], apm[1], apm[2], apm[3]);
2565 		memcpy(crycb->apcb1.aqm, aqm, 32);
2566 		VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2567 			 aqm[0], aqm[1], aqm[2], aqm[3]);
2568 		memcpy(crycb->apcb1.adm, adm, 32);
2569 		VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2570 			 adm[0], adm[1], adm[2], adm[3]);
2571 		break;
2572 	case CRYCB_FORMAT1:
2573 	case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2574 		memcpy(crycb->apcb0.apm, apm, 8);
2575 		memcpy(crycb->apcb0.aqm, aqm, 2);
2576 		memcpy(crycb->apcb0.adm, adm, 2);
2577 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2578 			 apm[0], *((unsigned short *)aqm),
2579 			 *((unsigned short *)adm));
2580 		break;
2581 	default:	/* Can not happen */
2582 		break;
2583 	}
2584 
2585 	/* recreate the shadow crycb for each vcpu */
2586 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2587 	kvm_s390_vcpu_unblock_all(kvm);
2588 	mutex_unlock(&kvm->lock);
2589 }
2590 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2591 
kvm_arch_crypto_clear_masks(struct kvm * kvm)2592 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2593 {
2594 	mutex_lock(&kvm->lock);
2595 	kvm_s390_vcpu_block_all(kvm);
2596 
2597 	memset(&kvm->arch.crypto.crycb->apcb0, 0,
2598 	       sizeof(kvm->arch.crypto.crycb->apcb0));
2599 	memset(&kvm->arch.crypto.crycb->apcb1, 0,
2600 	       sizeof(kvm->arch.crypto.crycb->apcb1));
2601 
2602 	VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2603 	/* recreate the shadow crycb for each vcpu */
2604 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2605 	kvm_s390_vcpu_unblock_all(kvm);
2606 	mutex_unlock(&kvm->lock);
2607 }
2608 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2609 
kvm_s390_get_initial_cpuid(void)2610 static u64 kvm_s390_get_initial_cpuid(void)
2611 {
2612 	struct cpuid cpuid;
2613 
2614 	get_cpu_id(&cpuid);
2615 	cpuid.version = 0xff;
2616 	return *((u64 *) &cpuid);
2617 }
2618 
kvm_s390_crypto_init(struct kvm * kvm)2619 static void kvm_s390_crypto_init(struct kvm *kvm)
2620 {
2621 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2622 	kvm_s390_set_crycb_format(kvm);
2623 
2624 	if (!test_kvm_facility(kvm, 76))
2625 		return;
2626 
2627 	/* Enable AES/DEA protected key functions by default */
2628 	kvm->arch.crypto.aes_kw = 1;
2629 	kvm->arch.crypto.dea_kw = 1;
2630 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2631 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2632 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2633 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2634 }
2635 
sca_dispose(struct kvm * kvm)2636 static void sca_dispose(struct kvm *kvm)
2637 {
2638 	if (kvm->arch.use_esca)
2639 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2640 	else
2641 		free_page((unsigned long)(kvm->arch.sca));
2642 	kvm->arch.sca = NULL;
2643 }
2644 
kvm_arch_init_vm(struct kvm * kvm,unsigned long type)2645 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2646 {
2647 	gfp_t alloc_flags = GFP_KERNEL;
2648 	int i, rc;
2649 	char debug_name[16];
2650 	static unsigned long sca_offset;
2651 
2652 	rc = -EINVAL;
2653 #ifdef CONFIG_KVM_S390_UCONTROL
2654 	if (type & ~KVM_VM_S390_UCONTROL)
2655 		goto out_err;
2656 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2657 		goto out_err;
2658 #else
2659 	if (type)
2660 		goto out_err;
2661 #endif
2662 
2663 	rc = s390_enable_sie();
2664 	if (rc)
2665 		goto out_err;
2666 
2667 	rc = -ENOMEM;
2668 
2669 	if (!sclp.has_64bscao)
2670 		alloc_flags |= GFP_DMA;
2671 	rwlock_init(&kvm->arch.sca_lock);
2672 	/* start with basic SCA */
2673 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2674 	if (!kvm->arch.sca)
2675 		goto out_err;
2676 	mutex_lock(&kvm_lock);
2677 	sca_offset += 16;
2678 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2679 		sca_offset = 0;
2680 	kvm->arch.sca = (struct bsca_block *)
2681 			((char *) kvm->arch.sca + sca_offset);
2682 	mutex_unlock(&kvm_lock);
2683 
2684 	sprintf(debug_name, "kvm-%u", current->pid);
2685 
2686 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2687 	if (!kvm->arch.dbf)
2688 		goto out_err;
2689 
2690 	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2691 	kvm->arch.sie_page2 =
2692 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2693 	if (!kvm->arch.sie_page2)
2694 		goto out_err;
2695 
2696 	kvm->arch.sie_page2->kvm = kvm;
2697 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2698 
2699 	for (i = 0; i < kvm_s390_fac_size(); i++) {
2700 		kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2701 					      (kvm_s390_fac_base[i] |
2702 					       kvm_s390_fac_ext[i]);
2703 		kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2704 					      kvm_s390_fac_base[i];
2705 	}
2706 	kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2707 
2708 	/* we are always in czam mode - even on pre z14 machines */
2709 	set_kvm_facility(kvm->arch.model.fac_mask, 138);
2710 	set_kvm_facility(kvm->arch.model.fac_list, 138);
2711 	/* we emulate STHYI in kvm */
2712 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
2713 	set_kvm_facility(kvm->arch.model.fac_list, 74);
2714 	if (MACHINE_HAS_TLB_GUEST) {
2715 		set_kvm_facility(kvm->arch.model.fac_mask, 147);
2716 		set_kvm_facility(kvm->arch.model.fac_list, 147);
2717 	}
2718 
2719 	if (css_general_characteristics.aiv && test_facility(65))
2720 		set_kvm_facility(kvm->arch.model.fac_mask, 65);
2721 
2722 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2723 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2724 
2725 	kvm_s390_crypto_init(kvm);
2726 
2727 	mutex_init(&kvm->arch.float_int.ais_lock);
2728 	spin_lock_init(&kvm->arch.float_int.lock);
2729 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
2730 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2731 	init_waitqueue_head(&kvm->arch.ipte_wq);
2732 	mutex_init(&kvm->arch.ipte_mutex);
2733 
2734 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2735 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
2736 
2737 	if (type & KVM_VM_S390_UCONTROL) {
2738 		kvm->arch.gmap = NULL;
2739 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2740 	} else {
2741 		if (sclp.hamax == U64_MAX)
2742 			kvm->arch.mem_limit = TASK_SIZE_MAX;
2743 		else
2744 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2745 						    sclp.hamax + 1);
2746 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2747 		if (!kvm->arch.gmap)
2748 			goto out_err;
2749 		kvm->arch.gmap->private = kvm;
2750 		kvm->arch.gmap->pfault_enabled = 0;
2751 	}
2752 
2753 	kvm->arch.use_pfmfi = sclp.has_pfmfi;
2754 	kvm->arch.use_skf = sclp.has_skey;
2755 	spin_lock_init(&kvm->arch.start_stop_lock);
2756 	kvm_s390_vsie_init(kvm);
2757 	if (use_gisa)
2758 		kvm_s390_gisa_init(kvm);
2759 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2760 
2761 	return 0;
2762 out_err:
2763 	free_page((unsigned long)kvm->arch.sie_page2);
2764 	debug_unregister(kvm->arch.dbf);
2765 	sca_dispose(kvm);
2766 	KVM_EVENT(3, "creation of vm failed: %d", rc);
2767 	return rc;
2768 }
2769 
kvm_arch_vcpu_destroy(struct kvm_vcpu * vcpu)2770 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2771 {
2772 	u16 rc, rrc;
2773 
2774 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2775 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2776 	kvm_s390_clear_local_irqs(vcpu);
2777 	kvm_clear_async_pf_completion_queue(vcpu);
2778 	if (!kvm_is_ucontrol(vcpu->kvm))
2779 		sca_del_vcpu(vcpu);
2780 
2781 	if (kvm_is_ucontrol(vcpu->kvm))
2782 		gmap_remove(vcpu->arch.gmap);
2783 
2784 	if (vcpu->kvm->arch.use_cmma)
2785 		kvm_s390_vcpu_unsetup_cmma(vcpu);
2786 	/* We can not hold the vcpu mutex here, we are already dying */
2787 	if (kvm_s390_pv_cpu_get_handle(vcpu))
2788 		kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2789 	free_page((unsigned long)(vcpu->arch.sie_block));
2790 }
2791 
kvm_free_vcpus(struct kvm * kvm)2792 static void kvm_free_vcpus(struct kvm *kvm)
2793 {
2794 	unsigned int i;
2795 	struct kvm_vcpu *vcpu;
2796 
2797 	kvm_for_each_vcpu(i, vcpu, kvm)
2798 		kvm_vcpu_destroy(vcpu);
2799 
2800 	mutex_lock(&kvm->lock);
2801 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2802 		kvm->vcpus[i] = NULL;
2803 
2804 	atomic_set(&kvm->online_vcpus, 0);
2805 	mutex_unlock(&kvm->lock);
2806 }
2807 
kvm_arch_destroy_vm(struct kvm * kvm)2808 void kvm_arch_destroy_vm(struct kvm *kvm)
2809 {
2810 	u16 rc, rrc;
2811 
2812 	kvm_free_vcpus(kvm);
2813 	sca_dispose(kvm);
2814 	kvm_s390_gisa_destroy(kvm);
2815 	/*
2816 	 * We are already at the end of life and kvm->lock is not taken.
2817 	 * This is ok as the file descriptor is closed by now and nobody
2818 	 * can mess with the pv state. To avoid lockdep_assert_held from
2819 	 * complaining we do not use kvm_s390_pv_is_protected.
2820 	 */
2821 	if (kvm_s390_pv_get_handle(kvm))
2822 		kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2823 	debug_unregister(kvm->arch.dbf);
2824 	free_page((unsigned long)kvm->arch.sie_page2);
2825 	if (!kvm_is_ucontrol(kvm))
2826 		gmap_remove(kvm->arch.gmap);
2827 	kvm_s390_destroy_adapters(kvm);
2828 	kvm_s390_clear_float_irqs(kvm);
2829 	kvm_s390_vsie_destroy(kvm);
2830 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2831 }
2832 
2833 /* Section: vcpu related */
__kvm_ucontrol_vcpu_init(struct kvm_vcpu * vcpu)2834 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2835 {
2836 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2837 	if (!vcpu->arch.gmap)
2838 		return -ENOMEM;
2839 	vcpu->arch.gmap->private = vcpu->kvm;
2840 
2841 	return 0;
2842 }
2843 
sca_del_vcpu(struct kvm_vcpu * vcpu)2844 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2845 {
2846 	if (!kvm_s390_use_sca_entries())
2847 		return;
2848 	read_lock(&vcpu->kvm->arch.sca_lock);
2849 	if (vcpu->kvm->arch.use_esca) {
2850 		struct esca_block *sca = vcpu->kvm->arch.sca;
2851 
2852 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2853 		sca->cpu[vcpu->vcpu_id].sda = 0;
2854 	} else {
2855 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2856 
2857 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2858 		sca->cpu[vcpu->vcpu_id].sda = 0;
2859 	}
2860 	read_unlock(&vcpu->kvm->arch.sca_lock);
2861 }
2862 
sca_add_vcpu(struct kvm_vcpu * vcpu)2863 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2864 {
2865 	if (!kvm_s390_use_sca_entries()) {
2866 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2867 
2868 		/* we still need the basic sca for the ipte control */
2869 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2870 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2871 		return;
2872 	}
2873 	read_lock(&vcpu->kvm->arch.sca_lock);
2874 	if (vcpu->kvm->arch.use_esca) {
2875 		struct esca_block *sca = vcpu->kvm->arch.sca;
2876 
2877 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2878 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2879 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2880 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2881 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2882 	} else {
2883 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2884 
2885 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2886 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2887 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2888 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2889 	}
2890 	read_unlock(&vcpu->kvm->arch.sca_lock);
2891 }
2892 
2893 /* Basic SCA to Extended SCA data copy routines */
sca_copy_entry(struct esca_entry * d,struct bsca_entry * s)2894 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2895 {
2896 	d->sda = s->sda;
2897 	d->sigp_ctrl.c = s->sigp_ctrl.c;
2898 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2899 }
2900 
sca_copy_b_to_e(struct esca_block * d,struct bsca_block * s)2901 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2902 {
2903 	int i;
2904 
2905 	d->ipte_control = s->ipte_control;
2906 	d->mcn[0] = s->mcn;
2907 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2908 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2909 }
2910 
sca_switch_to_extended(struct kvm * kvm)2911 static int sca_switch_to_extended(struct kvm *kvm)
2912 {
2913 	struct bsca_block *old_sca = kvm->arch.sca;
2914 	struct esca_block *new_sca;
2915 	struct kvm_vcpu *vcpu;
2916 	unsigned int vcpu_idx;
2917 	u32 scaol, scaoh;
2918 
2919 	if (kvm->arch.use_esca)
2920 		return 0;
2921 
2922 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2923 	if (!new_sca)
2924 		return -ENOMEM;
2925 
2926 	scaoh = (u32)((u64)(new_sca) >> 32);
2927 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
2928 
2929 	kvm_s390_vcpu_block_all(kvm);
2930 	write_lock(&kvm->arch.sca_lock);
2931 
2932 	sca_copy_b_to_e(new_sca, old_sca);
2933 
2934 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2935 		vcpu->arch.sie_block->scaoh = scaoh;
2936 		vcpu->arch.sie_block->scaol = scaol;
2937 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2938 	}
2939 	kvm->arch.sca = new_sca;
2940 	kvm->arch.use_esca = 1;
2941 
2942 	write_unlock(&kvm->arch.sca_lock);
2943 	kvm_s390_vcpu_unblock_all(kvm);
2944 
2945 	free_page((unsigned long)old_sca);
2946 
2947 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2948 		 old_sca, kvm->arch.sca);
2949 	return 0;
2950 }
2951 
sca_can_add_vcpu(struct kvm * kvm,unsigned int id)2952 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2953 {
2954 	int rc;
2955 
2956 	if (!kvm_s390_use_sca_entries()) {
2957 		if (id < KVM_MAX_VCPUS)
2958 			return true;
2959 		return false;
2960 	}
2961 	if (id < KVM_S390_BSCA_CPU_SLOTS)
2962 		return true;
2963 	if (!sclp.has_esca || !sclp.has_64bscao)
2964 		return false;
2965 
2966 	mutex_lock(&kvm->lock);
2967 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2968 	mutex_unlock(&kvm->lock);
2969 
2970 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2971 }
2972 
2973 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__start_cpu_timer_accounting(struct kvm_vcpu * vcpu)2974 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2975 {
2976 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2977 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2978 	vcpu->arch.cputm_start = get_tod_clock_fast();
2979 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2980 }
2981 
2982 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__stop_cpu_timer_accounting(struct kvm_vcpu * vcpu)2983 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2984 {
2985 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2986 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2987 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2988 	vcpu->arch.cputm_start = 0;
2989 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2990 }
2991 
2992 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__enable_cpu_timer_accounting(struct kvm_vcpu * vcpu)2993 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2994 {
2995 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2996 	vcpu->arch.cputm_enabled = true;
2997 	__start_cpu_timer_accounting(vcpu);
2998 }
2999 
3000 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__disable_cpu_timer_accounting(struct kvm_vcpu * vcpu)3001 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3002 {
3003 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3004 	__stop_cpu_timer_accounting(vcpu);
3005 	vcpu->arch.cputm_enabled = false;
3006 }
3007 
enable_cpu_timer_accounting(struct kvm_vcpu * vcpu)3008 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3009 {
3010 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3011 	__enable_cpu_timer_accounting(vcpu);
3012 	preempt_enable();
3013 }
3014 
disable_cpu_timer_accounting(struct kvm_vcpu * vcpu)3015 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3016 {
3017 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3018 	__disable_cpu_timer_accounting(vcpu);
3019 	preempt_enable();
3020 }
3021 
3022 /* set the cpu timer - may only be called from the VCPU thread itself */
kvm_s390_set_cpu_timer(struct kvm_vcpu * vcpu,__u64 cputm)3023 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3024 {
3025 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3026 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3027 	if (vcpu->arch.cputm_enabled)
3028 		vcpu->arch.cputm_start = get_tod_clock_fast();
3029 	vcpu->arch.sie_block->cputm = cputm;
3030 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3031 	preempt_enable();
3032 }
3033 
3034 /* update and get the cpu timer - can also be called from other VCPU threads */
kvm_s390_get_cpu_timer(struct kvm_vcpu * vcpu)3035 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3036 {
3037 	unsigned int seq;
3038 	__u64 value;
3039 
3040 	if (unlikely(!vcpu->arch.cputm_enabled))
3041 		return vcpu->arch.sie_block->cputm;
3042 
3043 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3044 	do {
3045 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3046 		/*
3047 		 * If the writer would ever execute a read in the critical
3048 		 * section, e.g. in irq context, we have a deadlock.
3049 		 */
3050 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3051 		value = vcpu->arch.sie_block->cputm;
3052 		/* if cputm_start is 0, accounting is being started/stopped */
3053 		if (likely(vcpu->arch.cputm_start))
3054 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3055 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3056 	preempt_enable();
3057 	return value;
3058 }
3059 
kvm_arch_vcpu_load(struct kvm_vcpu * vcpu,int cpu)3060 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3061 {
3062 
3063 	gmap_enable(vcpu->arch.enabled_gmap);
3064 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3065 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3066 		__start_cpu_timer_accounting(vcpu);
3067 	vcpu->cpu = cpu;
3068 }
3069 
kvm_arch_vcpu_put(struct kvm_vcpu * vcpu)3070 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3071 {
3072 	vcpu->cpu = -1;
3073 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3074 		__stop_cpu_timer_accounting(vcpu);
3075 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3076 	vcpu->arch.enabled_gmap = gmap_get_enabled();
3077 	gmap_disable(vcpu->arch.enabled_gmap);
3078 
3079 }
3080 
kvm_arch_vcpu_postcreate(struct kvm_vcpu * vcpu)3081 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3082 {
3083 	mutex_lock(&vcpu->kvm->lock);
3084 	preempt_disable();
3085 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3086 	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3087 	preempt_enable();
3088 	mutex_unlock(&vcpu->kvm->lock);
3089 	if (!kvm_is_ucontrol(vcpu->kvm)) {
3090 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3091 		sca_add_vcpu(vcpu);
3092 	}
3093 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3094 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3095 	/* make vcpu_load load the right gmap on the first trigger */
3096 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3097 }
3098 
kvm_has_pckmo_subfunc(struct kvm * kvm,unsigned long nr)3099 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3100 {
3101 	if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3102 	    test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3103 		return true;
3104 	return false;
3105 }
3106 
kvm_has_pckmo_ecc(struct kvm * kvm)3107 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3108 {
3109 	/* At least one ECC subfunction must be present */
3110 	return kvm_has_pckmo_subfunc(kvm, 32) ||
3111 	       kvm_has_pckmo_subfunc(kvm, 33) ||
3112 	       kvm_has_pckmo_subfunc(kvm, 34) ||
3113 	       kvm_has_pckmo_subfunc(kvm, 40) ||
3114 	       kvm_has_pckmo_subfunc(kvm, 41);
3115 
3116 }
3117 
kvm_s390_vcpu_crypto_setup(struct kvm_vcpu * vcpu)3118 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3119 {
3120 	/*
3121 	 * If the AP instructions are not being interpreted and the MSAX3
3122 	 * facility is not configured for the guest, there is nothing to set up.
3123 	 */
3124 	if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3125 		return;
3126 
3127 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3128 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3129 	vcpu->arch.sie_block->eca &= ~ECA_APIE;
3130 	vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3131 
3132 	if (vcpu->kvm->arch.crypto.apie)
3133 		vcpu->arch.sie_block->eca |= ECA_APIE;
3134 
3135 	/* Set up protected key support */
3136 	if (vcpu->kvm->arch.crypto.aes_kw) {
3137 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3138 		/* ecc is also wrapped with AES key */
3139 		if (kvm_has_pckmo_ecc(vcpu->kvm))
3140 			vcpu->arch.sie_block->ecd |= ECD_ECC;
3141 	}
3142 
3143 	if (vcpu->kvm->arch.crypto.dea_kw)
3144 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3145 }
3146 
kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu * vcpu)3147 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3148 {
3149 	free_page(vcpu->arch.sie_block->cbrlo);
3150 	vcpu->arch.sie_block->cbrlo = 0;
3151 }
3152 
kvm_s390_vcpu_setup_cmma(struct kvm_vcpu * vcpu)3153 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3154 {
3155 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
3156 	if (!vcpu->arch.sie_block->cbrlo)
3157 		return -ENOMEM;
3158 	return 0;
3159 }
3160 
kvm_s390_vcpu_setup_model(struct kvm_vcpu * vcpu)3161 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3162 {
3163 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3164 
3165 	vcpu->arch.sie_block->ibc = model->ibc;
3166 	if (test_kvm_facility(vcpu->kvm, 7))
3167 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3168 }
3169 
kvm_s390_vcpu_setup(struct kvm_vcpu * vcpu)3170 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3171 {
3172 	int rc = 0;
3173 	u16 uvrc, uvrrc;
3174 
3175 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3176 						    CPUSTAT_SM |
3177 						    CPUSTAT_STOPPED);
3178 
3179 	if (test_kvm_facility(vcpu->kvm, 78))
3180 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3181 	else if (test_kvm_facility(vcpu->kvm, 8))
3182 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3183 
3184 	kvm_s390_vcpu_setup_model(vcpu);
3185 
3186 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3187 	if (MACHINE_HAS_ESOP)
3188 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3189 	if (test_kvm_facility(vcpu->kvm, 9))
3190 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
3191 	if (test_kvm_facility(vcpu->kvm, 73))
3192 		vcpu->arch.sie_block->ecb |= ECB_TE;
3193 
3194 	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3195 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3196 	if (test_kvm_facility(vcpu->kvm, 130))
3197 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3198 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3199 	if (sclp.has_cei)
3200 		vcpu->arch.sie_block->eca |= ECA_CEI;
3201 	if (sclp.has_ib)
3202 		vcpu->arch.sie_block->eca |= ECA_IB;
3203 	if (sclp.has_siif)
3204 		vcpu->arch.sie_block->eca |= ECA_SII;
3205 	if (sclp.has_sigpif)
3206 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
3207 	if (test_kvm_facility(vcpu->kvm, 129)) {
3208 		vcpu->arch.sie_block->eca |= ECA_VX;
3209 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3210 	}
3211 	if (test_kvm_facility(vcpu->kvm, 139))
3212 		vcpu->arch.sie_block->ecd |= ECD_MEF;
3213 	if (test_kvm_facility(vcpu->kvm, 156))
3214 		vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3215 	if (vcpu->arch.sie_block->gd) {
3216 		vcpu->arch.sie_block->eca |= ECA_AIV;
3217 		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3218 			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3219 	}
3220 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3221 					| SDNXC;
3222 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3223 
3224 	if (sclp.has_kss)
3225 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3226 	else
3227 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3228 
3229 	if (vcpu->kvm->arch.use_cmma) {
3230 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
3231 		if (rc)
3232 			return rc;
3233 	}
3234 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3235 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3236 
3237 	vcpu->arch.sie_block->hpid = HPID_KVM;
3238 
3239 	kvm_s390_vcpu_crypto_setup(vcpu);
3240 
3241 	mutex_lock(&vcpu->kvm->lock);
3242 	if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3243 		rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3244 		if (rc)
3245 			kvm_s390_vcpu_unsetup_cmma(vcpu);
3246 	}
3247 	mutex_unlock(&vcpu->kvm->lock);
3248 
3249 	return rc;
3250 }
3251 
kvm_arch_vcpu_precreate(struct kvm * kvm,unsigned int id)3252 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3253 {
3254 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3255 		return -EINVAL;
3256 	return 0;
3257 }
3258 
kvm_arch_vcpu_create(struct kvm_vcpu * vcpu)3259 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3260 {
3261 	struct sie_page *sie_page;
3262 	int rc;
3263 
3264 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3265 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
3266 	if (!sie_page)
3267 		return -ENOMEM;
3268 
3269 	vcpu->arch.sie_block = &sie_page->sie_block;
3270 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3271 
3272 	/* the real guest size will always be smaller than msl */
3273 	vcpu->arch.sie_block->mso = 0;
3274 	vcpu->arch.sie_block->msl = sclp.hamax;
3275 
3276 	vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3277 	spin_lock_init(&vcpu->arch.local_int.lock);
3278 	vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3279 	if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3280 		vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3281 	seqcount_init(&vcpu->arch.cputm_seqcount);
3282 
3283 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3284 	kvm_clear_async_pf_completion_queue(vcpu);
3285 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3286 				    KVM_SYNC_GPRS |
3287 				    KVM_SYNC_ACRS |
3288 				    KVM_SYNC_CRS |
3289 				    KVM_SYNC_ARCH0 |
3290 				    KVM_SYNC_PFAULT |
3291 				    KVM_SYNC_DIAG318;
3292 	kvm_s390_set_prefix(vcpu, 0);
3293 	if (test_kvm_facility(vcpu->kvm, 64))
3294 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3295 	if (test_kvm_facility(vcpu->kvm, 82))
3296 		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3297 	if (test_kvm_facility(vcpu->kvm, 133))
3298 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3299 	if (test_kvm_facility(vcpu->kvm, 156))
3300 		vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3301 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
3302 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3303 	 */
3304 	if (MACHINE_HAS_VX)
3305 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3306 	else
3307 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3308 
3309 	if (kvm_is_ucontrol(vcpu->kvm)) {
3310 		rc = __kvm_ucontrol_vcpu_init(vcpu);
3311 		if (rc)
3312 			goto out_free_sie_block;
3313 	}
3314 
3315 	VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3316 		 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3317 	trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3318 
3319 	rc = kvm_s390_vcpu_setup(vcpu);
3320 	if (rc)
3321 		goto out_ucontrol_uninit;
3322 	return 0;
3323 
3324 out_ucontrol_uninit:
3325 	if (kvm_is_ucontrol(vcpu->kvm))
3326 		gmap_remove(vcpu->arch.gmap);
3327 out_free_sie_block:
3328 	free_page((unsigned long)(vcpu->arch.sie_block));
3329 	return rc;
3330 }
3331 
kvm_arch_vcpu_runnable(struct kvm_vcpu * vcpu)3332 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3333 {
3334 	clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
3335 	return kvm_s390_vcpu_has_irq(vcpu, 0);
3336 }
3337 
kvm_arch_vcpu_in_kernel(struct kvm_vcpu * vcpu)3338 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3339 {
3340 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3341 }
3342 
kvm_s390_vcpu_block(struct kvm_vcpu * vcpu)3343 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3344 {
3345 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3346 	exit_sie(vcpu);
3347 }
3348 
kvm_s390_vcpu_unblock(struct kvm_vcpu * vcpu)3349 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3350 {
3351 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3352 }
3353 
kvm_s390_vcpu_request(struct kvm_vcpu * vcpu)3354 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3355 {
3356 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3357 	exit_sie(vcpu);
3358 }
3359 
kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu * vcpu)3360 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3361 {
3362 	return atomic_read(&vcpu->arch.sie_block->prog20) &
3363 	       (PROG_BLOCK_SIE | PROG_REQUEST);
3364 }
3365 
kvm_s390_vcpu_request_handled(struct kvm_vcpu * vcpu)3366 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3367 {
3368 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3369 }
3370 
3371 /*
3372  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3373  * If the CPU is not running (e.g. waiting as idle) the function will
3374  * return immediately. */
exit_sie(struct kvm_vcpu * vcpu)3375 void exit_sie(struct kvm_vcpu *vcpu)
3376 {
3377 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3378 	kvm_s390_vsie_kick(vcpu);
3379 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3380 		cpu_relax();
3381 }
3382 
3383 /* Kick a guest cpu out of SIE to process a request synchronously */
kvm_s390_sync_request(int req,struct kvm_vcpu * vcpu)3384 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3385 {
3386 	kvm_make_request(req, vcpu);
3387 	kvm_s390_vcpu_request(vcpu);
3388 }
3389 
kvm_gmap_notifier(struct gmap * gmap,unsigned long start,unsigned long end)3390 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3391 			      unsigned long end)
3392 {
3393 	struct kvm *kvm = gmap->private;
3394 	struct kvm_vcpu *vcpu;
3395 	unsigned long prefix;
3396 	int i;
3397 
3398 	if (gmap_is_shadow(gmap))
3399 		return;
3400 	if (start >= 1UL << 31)
3401 		/* We are only interested in prefix pages */
3402 		return;
3403 	kvm_for_each_vcpu(i, vcpu, kvm) {
3404 		/* match against both prefix pages */
3405 		prefix = kvm_s390_get_prefix(vcpu);
3406 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3407 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3408 				   start, end);
3409 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3410 		}
3411 	}
3412 }
3413 
kvm_arch_no_poll(struct kvm_vcpu * vcpu)3414 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3415 {
3416 	/* do not poll with more than halt_poll_max_steal percent of steal time */
3417 	if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3418 	    halt_poll_max_steal) {
3419 		vcpu->stat.halt_no_poll_steal++;
3420 		return true;
3421 	}
3422 	return false;
3423 }
3424 
kvm_arch_vcpu_should_kick(struct kvm_vcpu * vcpu)3425 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3426 {
3427 	/* kvm common code refers to this, but never calls it */
3428 	BUG();
3429 	return 0;
3430 }
3431 
kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu * vcpu,struct kvm_one_reg * reg)3432 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3433 					   struct kvm_one_reg *reg)
3434 {
3435 	int r = -EINVAL;
3436 
3437 	switch (reg->id) {
3438 	case KVM_REG_S390_TODPR:
3439 		r = put_user(vcpu->arch.sie_block->todpr,
3440 			     (u32 __user *)reg->addr);
3441 		break;
3442 	case KVM_REG_S390_EPOCHDIFF:
3443 		r = put_user(vcpu->arch.sie_block->epoch,
3444 			     (u64 __user *)reg->addr);
3445 		break;
3446 	case KVM_REG_S390_CPU_TIMER:
3447 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
3448 			     (u64 __user *)reg->addr);
3449 		break;
3450 	case KVM_REG_S390_CLOCK_COMP:
3451 		r = put_user(vcpu->arch.sie_block->ckc,
3452 			     (u64 __user *)reg->addr);
3453 		break;
3454 	case KVM_REG_S390_PFTOKEN:
3455 		r = put_user(vcpu->arch.pfault_token,
3456 			     (u64 __user *)reg->addr);
3457 		break;
3458 	case KVM_REG_S390_PFCOMPARE:
3459 		r = put_user(vcpu->arch.pfault_compare,
3460 			     (u64 __user *)reg->addr);
3461 		break;
3462 	case KVM_REG_S390_PFSELECT:
3463 		r = put_user(vcpu->arch.pfault_select,
3464 			     (u64 __user *)reg->addr);
3465 		break;
3466 	case KVM_REG_S390_PP:
3467 		r = put_user(vcpu->arch.sie_block->pp,
3468 			     (u64 __user *)reg->addr);
3469 		break;
3470 	case KVM_REG_S390_GBEA:
3471 		r = put_user(vcpu->arch.sie_block->gbea,
3472 			     (u64 __user *)reg->addr);
3473 		break;
3474 	default:
3475 		break;
3476 	}
3477 
3478 	return r;
3479 }
3480 
kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu * vcpu,struct kvm_one_reg * reg)3481 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3482 					   struct kvm_one_reg *reg)
3483 {
3484 	int r = -EINVAL;
3485 	__u64 val;
3486 
3487 	switch (reg->id) {
3488 	case KVM_REG_S390_TODPR:
3489 		r = get_user(vcpu->arch.sie_block->todpr,
3490 			     (u32 __user *)reg->addr);
3491 		break;
3492 	case KVM_REG_S390_EPOCHDIFF:
3493 		r = get_user(vcpu->arch.sie_block->epoch,
3494 			     (u64 __user *)reg->addr);
3495 		break;
3496 	case KVM_REG_S390_CPU_TIMER:
3497 		r = get_user(val, (u64 __user *)reg->addr);
3498 		if (!r)
3499 			kvm_s390_set_cpu_timer(vcpu, val);
3500 		break;
3501 	case KVM_REG_S390_CLOCK_COMP:
3502 		r = get_user(vcpu->arch.sie_block->ckc,
3503 			     (u64 __user *)reg->addr);
3504 		break;
3505 	case KVM_REG_S390_PFTOKEN:
3506 		r = get_user(vcpu->arch.pfault_token,
3507 			     (u64 __user *)reg->addr);
3508 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3509 			kvm_clear_async_pf_completion_queue(vcpu);
3510 		break;
3511 	case KVM_REG_S390_PFCOMPARE:
3512 		r = get_user(vcpu->arch.pfault_compare,
3513 			     (u64 __user *)reg->addr);
3514 		break;
3515 	case KVM_REG_S390_PFSELECT:
3516 		r = get_user(vcpu->arch.pfault_select,
3517 			     (u64 __user *)reg->addr);
3518 		break;
3519 	case KVM_REG_S390_PP:
3520 		r = get_user(vcpu->arch.sie_block->pp,
3521 			     (u64 __user *)reg->addr);
3522 		break;
3523 	case KVM_REG_S390_GBEA:
3524 		r = get_user(vcpu->arch.sie_block->gbea,
3525 			     (u64 __user *)reg->addr);
3526 		break;
3527 	default:
3528 		break;
3529 	}
3530 
3531 	return r;
3532 }
3533 
kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu * vcpu)3534 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3535 {
3536 	vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3537 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3538 	memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3539 
3540 	kvm_clear_async_pf_completion_queue(vcpu);
3541 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3542 		kvm_s390_vcpu_stop(vcpu);
3543 	kvm_s390_clear_local_irqs(vcpu);
3544 }
3545 
kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu * vcpu)3546 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3547 {
3548 	/* Initial reset is a superset of the normal reset */
3549 	kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3550 
3551 	/*
3552 	 * This equals initial cpu reset in pop, but we don't switch to ESA.
3553 	 * We do not only reset the internal data, but also ...
3554 	 */
3555 	vcpu->arch.sie_block->gpsw.mask = 0;
3556 	vcpu->arch.sie_block->gpsw.addr = 0;
3557 	kvm_s390_set_prefix(vcpu, 0);
3558 	kvm_s390_set_cpu_timer(vcpu, 0);
3559 	vcpu->arch.sie_block->ckc = 0;
3560 	memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3561 	vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3562 	vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3563 
3564 	/* ... the data in sync regs */
3565 	memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3566 	vcpu->run->s.regs.ckc = 0;
3567 	vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3568 	vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3569 	vcpu->run->psw_addr = 0;
3570 	vcpu->run->psw_mask = 0;
3571 	vcpu->run->s.regs.todpr = 0;
3572 	vcpu->run->s.regs.cputm = 0;
3573 	vcpu->run->s.regs.ckc = 0;
3574 	vcpu->run->s.regs.pp = 0;
3575 	vcpu->run->s.regs.gbea = 1;
3576 	vcpu->run->s.regs.fpc = 0;
3577 	/*
3578 	 * Do not reset these registers in the protected case, as some of
3579 	 * them are overlayed and they are not accessible in this case
3580 	 * anyway.
3581 	 */
3582 	if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3583 		vcpu->arch.sie_block->gbea = 1;
3584 		vcpu->arch.sie_block->pp = 0;
3585 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3586 		vcpu->arch.sie_block->todpr = 0;
3587 	}
3588 }
3589 
kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu * vcpu)3590 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3591 {
3592 	struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3593 
3594 	/* Clear reset is a superset of the initial reset */
3595 	kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3596 
3597 	memset(&regs->gprs, 0, sizeof(regs->gprs));
3598 	memset(&regs->vrs, 0, sizeof(regs->vrs));
3599 	memset(&regs->acrs, 0, sizeof(regs->acrs));
3600 	memset(&regs->gscb, 0, sizeof(regs->gscb));
3601 
3602 	regs->etoken = 0;
3603 	regs->etoken_extension = 0;
3604 }
3605 
kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu * vcpu,struct kvm_regs * regs)3606 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3607 {
3608 	vcpu_load(vcpu);
3609 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3610 	vcpu_put(vcpu);
3611 	return 0;
3612 }
3613 
kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu * vcpu,struct kvm_regs * regs)3614 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3615 {
3616 	vcpu_load(vcpu);
3617 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3618 	vcpu_put(vcpu);
3619 	return 0;
3620 }
3621 
kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu * vcpu,struct kvm_sregs * sregs)3622 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3623 				  struct kvm_sregs *sregs)
3624 {
3625 	vcpu_load(vcpu);
3626 
3627 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3628 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3629 
3630 	vcpu_put(vcpu);
3631 	return 0;
3632 }
3633 
kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu * vcpu,struct kvm_sregs * sregs)3634 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3635 				  struct kvm_sregs *sregs)
3636 {
3637 	vcpu_load(vcpu);
3638 
3639 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3640 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3641 
3642 	vcpu_put(vcpu);
3643 	return 0;
3644 }
3645 
kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu * vcpu,struct kvm_fpu * fpu)3646 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3647 {
3648 	int ret = 0;
3649 
3650 	vcpu_load(vcpu);
3651 
3652 	if (test_fp_ctl(fpu->fpc)) {
3653 		ret = -EINVAL;
3654 		goto out;
3655 	}
3656 	vcpu->run->s.regs.fpc = fpu->fpc;
3657 	if (MACHINE_HAS_VX)
3658 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3659 				 (freg_t *) fpu->fprs);
3660 	else
3661 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3662 
3663 out:
3664 	vcpu_put(vcpu);
3665 	return ret;
3666 }
3667 
kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu * vcpu,struct kvm_fpu * fpu)3668 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3669 {
3670 	vcpu_load(vcpu);
3671 
3672 	/* make sure we have the latest values */
3673 	save_fpu_regs();
3674 	if (MACHINE_HAS_VX)
3675 		convert_vx_to_fp((freg_t *) fpu->fprs,
3676 				 (__vector128 *) vcpu->run->s.regs.vrs);
3677 	else
3678 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3679 	fpu->fpc = vcpu->run->s.regs.fpc;
3680 
3681 	vcpu_put(vcpu);
3682 	return 0;
3683 }
3684 
kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu * vcpu,psw_t psw)3685 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3686 {
3687 	int rc = 0;
3688 
3689 	if (!is_vcpu_stopped(vcpu))
3690 		rc = -EBUSY;
3691 	else {
3692 		vcpu->run->psw_mask = psw.mask;
3693 		vcpu->run->psw_addr = psw.addr;
3694 	}
3695 	return rc;
3696 }
3697 
kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu * vcpu,struct kvm_translation * tr)3698 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3699 				  struct kvm_translation *tr)
3700 {
3701 	return -EINVAL; /* not implemented yet */
3702 }
3703 
3704 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3705 			      KVM_GUESTDBG_USE_HW_BP | \
3706 			      KVM_GUESTDBG_ENABLE)
3707 
kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu * vcpu,struct kvm_guest_debug * dbg)3708 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3709 					struct kvm_guest_debug *dbg)
3710 {
3711 	int rc = 0;
3712 
3713 	vcpu_load(vcpu);
3714 
3715 	vcpu->guest_debug = 0;
3716 	kvm_s390_clear_bp_data(vcpu);
3717 
3718 	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3719 		rc = -EINVAL;
3720 		goto out;
3721 	}
3722 	if (!sclp.has_gpere) {
3723 		rc = -EINVAL;
3724 		goto out;
3725 	}
3726 
3727 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
3728 		vcpu->guest_debug = dbg->control;
3729 		/* enforce guest PER */
3730 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3731 
3732 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3733 			rc = kvm_s390_import_bp_data(vcpu, dbg);
3734 	} else {
3735 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3736 		vcpu->arch.guestdbg.last_bp = 0;
3737 	}
3738 
3739 	if (rc) {
3740 		vcpu->guest_debug = 0;
3741 		kvm_s390_clear_bp_data(vcpu);
3742 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3743 	}
3744 
3745 out:
3746 	vcpu_put(vcpu);
3747 	return rc;
3748 }
3749 
kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu * vcpu,struct kvm_mp_state * mp_state)3750 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3751 				    struct kvm_mp_state *mp_state)
3752 {
3753 	int ret;
3754 
3755 	vcpu_load(vcpu);
3756 
3757 	/* CHECK_STOP and LOAD are not supported yet */
3758 	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3759 				      KVM_MP_STATE_OPERATING;
3760 
3761 	vcpu_put(vcpu);
3762 	return ret;
3763 }
3764 
kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu * vcpu,struct kvm_mp_state * mp_state)3765 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3766 				    struct kvm_mp_state *mp_state)
3767 {
3768 	int rc = 0;
3769 
3770 	vcpu_load(vcpu);
3771 
3772 	/* user space knows about this interface - let it control the state */
3773 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3774 
3775 	switch (mp_state->mp_state) {
3776 	case KVM_MP_STATE_STOPPED:
3777 		rc = kvm_s390_vcpu_stop(vcpu);
3778 		break;
3779 	case KVM_MP_STATE_OPERATING:
3780 		rc = kvm_s390_vcpu_start(vcpu);
3781 		break;
3782 	case KVM_MP_STATE_LOAD:
3783 		if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3784 			rc = -ENXIO;
3785 			break;
3786 		}
3787 		rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3788 		break;
3789 	case KVM_MP_STATE_CHECK_STOP:
3790 		fallthrough;	/* CHECK_STOP and LOAD are not supported yet */
3791 	default:
3792 		rc = -ENXIO;
3793 	}
3794 
3795 	vcpu_put(vcpu);
3796 	return rc;
3797 }
3798 
ibs_enabled(struct kvm_vcpu * vcpu)3799 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3800 {
3801 	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3802 }
3803 
kvm_s390_handle_requests(struct kvm_vcpu * vcpu)3804 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3805 {
3806 retry:
3807 	kvm_s390_vcpu_request_handled(vcpu);
3808 	if (!kvm_request_pending(vcpu))
3809 		return 0;
3810 	/*
3811 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3812 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3813 	 * This ensures that the ipte instruction for this request has
3814 	 * already finished. We might race against a second unmapper that
3815 	 * wants to set the blocking bit. Lets just retry the request loop.
3816 	 */
3817 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3818 		int rc;
3819 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
3820 					  kvm_s390_get_prefix(vcpu),
3821 					  PAGE_SIZE * 2, PROT_WRITE);
3822 		if (rc) {
3823 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3824 			return rc;
3825 		}
3826 		goto retry;
3827 	}
3828 
3829 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3830 		vcpu->arch.sie_block->ihcpu = 0xffff;
3831 		goto retry;
3832 	}
3833 
3834 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3835 		if (!ibs_enabled(vcpu)) {
3836 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3837 			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3838 		}
3839 		goto retry;
3840 	}
3841 
3842 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3843 		if (ibs_enabled(vcpu)) {
3844 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3845 			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3846 		}
3847 		goto retry;
3848 	}
3849 
3850 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3851 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3852 		goto retry;
3853 	}
3854 
3855 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3856 		/*
3857 		 * Disable CMM virtualization; we will emulate the ESSA
3858 		 * instruction manually, in order to provide additional
3859 		 * functionalities needed for live migration.
3860 		 */
3861 		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3862 		goto retry;
3863 	}
3864 
3865 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3866 		/*
3867 		 * Re-enable CMM virtualization if CMMA is available and
3868 		 * CMM has been used.
3869 		 */
3870 		if ((vcpu->kvm->arch.use_cmma) &&
3871 		    (vcpu->kvm->mm->context.uses_cmm))
3872 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3873 		goto retry;
3874 	}
3875 
3876 	/* nothing to do, just clear the request */
3877 	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3878 	/* we left the vsie handler, nothing to do, just clear the request */
3879 	kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3880 
3881 	return 0;
3882 }
3883 
__kvm_s390_set_tod_clock(struct kvm * kvm,const struct kvm_s390_vm_tod_clock * gtod)3884 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
3885 {
3886 	struct kvm_vcpu *vcpu;
3887 	struct kvm_s390_tod_clock_ext htod;
3888 	int i;
3889 
3890 	preempt_disable();
3891 
3892 	get_tod_clock_ext((char *)&htod);
3893 
3894 	kvm->arch.epoch = gtod->tod - htod.tod;
3895 	kvm->arch.epdx = 0;
3896 	if (test_kvm_facility(kvm, 139)) {
3897 		kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3898 		if (kvm->arch.epoch > gtod->tod)
3899 			kvm->arch.epdx -= 1;
3900 	}
3901 
3902 	kvm_s390_vcpu_block_all(kvm);
3903 	kvm_for_each_vcpu(i, vcpu, kvm) {
3904 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3905 		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3906 	}
3907 
3908 	kvm_s390_vcpu_unblock_all(kvm);
3909 	preempt_enable();
3910 }
3911 
kvm_s390_try_set_tod_clock(struct kvm * kvm,const struct kvm_s390_vm_tod_clock * gtod)3912 int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
3913 {
3914 	if (!mutex_trylock(&kvm->lock))
3915 		return 0;
3916 	__kvm_s390_set_tod_clock(kvm, gtod);
3917 	mutex_unlock(&kvm->lock);
3918 	return 1;
3919 }
3920 
3921 /**
3922  * kvm_arch_fault_in_page - fault-in guest page if necessary
3923  * @vcpu: The corresponding virtual cpu
3924  * @gpa: Guest physical address
3925  * @writable: Whether the page should be writable or not
3926  *
3927  * Make sure that a guest page has been faulted-in on the host.
3928  *
3929  * Return: Zero on success, negative error code otherwise.
3930  */
kvm_arch_fault_in_page(struct kvm_vcpu * vcpu,gpa_t gpa,int writable)3931 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3932 {
3933 	return gmap_fault(vcpu->arch.gmap, gpa,
3934 			  writable ? FAULT_FLAG_WRITE : 0);
3935 }
3936 
__kvm_inject_pfault_token(struct kvm_vcpu * vcpu,bool start_token,unsigned long token)3937 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3938 				      unsigned long token)
3939 {
3940 	struct kvm_s390_interrupt inti;
3941 	struct kvm_s390_irq irq;
3942 
3943 	if (start_token) {
3944 		irq.u.ext.ext_params2 = token;
3945 		irq.type = KVM_S390_INT_PFAULT_INIT;
3946 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3947 	} else {
3948 		inti.type = KVM_S390_INT_PFAULT_DONE;
3949 		inti.parm64 = token;
3950 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3951 	}
3952 }
3953 
kvm_arch_async_page_not_present(struct kvm_vcpu * vcpu,struct kvm_async_pf * work)3954 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3955 				     struct kvm_async_pf *work)
3956 {
3957 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3958 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3959 
3960 	return true;
3961 }
3962 
kvm_arch_async_page_present(struct kvm_vcpu * vcpu,struct kvm_async_pf * work)3963 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3964 				 struct kvm_async_pf *work)
3965 {
3966 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3967 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3968 }
3969 
kvm_arch_async_page_ready(struct kvm_vcpu * vcpu,struct kvm_async_pf * work)3970 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3971 			       struct kvm_async_pf *work)
3972 {
3973 	/* s390 will always inject the page directly */
3974 }
3975 
kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu * vcpu)3976 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
3977 {
3978 	/*
3979 	 * s390 will always inject the page directly,
3980 	 * but we still want check_async_completion to cleanup
3981 	 */
3982 	return true;
3983 }
3984 
kvm_arch_setup_async_pf(struct kvm_vcpu * vcpu)3985 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3986 {
3987 	hva_t hva;
3988 	struct kvm_arch_async_pf arch;
3989 
3990 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3991 		return false;
3992 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3993 	    vcpu->arch.pfault_compare)
3994 		return false;
3995 	if (psw_extint_disabled(vcpu))
3996 		return false;
3997 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
3998 		return false;
3999 	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
4000 		return false;
4001 	if (!vcpu->arch.gmap->pfault_enabled)
4002 		return false;
4003 
4004 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
4005 	hva += current->thread.gmap_addr & ~PAGE_MASK;
4006 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4007 		return false;
4008 
4009 	return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
4010 }
4011 
vcpu_pre_run(struct kvm_vcpu * vcpu)4012 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4013 {
4014 	int rc, cpuflags;
4015 
4016 	/*
4017 	 * On s390 notifications for arriving pages will be delivered directly
4018 	 * to the guest but the house keeping for completed pfaults is
4019 	 * handled outside the worker.
4020 	 */
4021 	kvm_check_async_pf_completion(vcpu);
4022 
4023 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4024 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4025 
4026 	if (need_resched())
4027 		schedule();
4028 
4029 	if (!kvm_is_ucontrol(vcpu->kvm)) {
4030 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
4031 		if (rc)
4032 			return rc;
4033 	}
4034 
4035 	rc = kvm_s390_handle_requests(vcpu);
4036 	if (rc)
4037 		return rc;
4038 
4039 	if (guestdbg_enabled(vcpu)) {
4040 		kvm_s390_backup_guest_per_regs(vcpu);
4041 		kvm_s390_patch_guest_per_regs(vcpu);
4042 	}
4043 
4044 	clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.gisa_int.kicked_mask);
4045 
4046 	vcpu->arch.sie_block->icptcode = 0;
4047 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4048 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4049 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
4050 
4051 	return 0;
4052 }
4053 
vcpu_post_run_fault_in_sie(struct kvm_vcpu * vcpu)4054 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4055 {
4056 	struct kvm_s390_pgm_info pgm_info = {
4057 		.code = PGM_ADDRESSING,
4058 	};
4059 	u8 opcode, ilen;
4060 	int rc;
4061 
4062 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4063 	trace_kvm_s390_sie_fault(vcpu);
4064 
4065 	/*
4066 	 * We want to inject an addressing exception, which is defined as a
4067 	 * suppressing or terminating exception. However, since we came here
4068 	 * by a DAT access exception, the PSW still points to the faulting
4069 	 * instruction since DAT exceptions are nullifying. So we've got
4070 	 * to look up the current opcode to get the length of the instruction
4071 	 * to be able to forward the PSW.
4072 	 */
4073 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4074 	ilen = insn_length(opcode);
4075 	if (rc < 0) {
4076 		return rc;
4077 	} else if (rc) {
4078 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
4079 		 * Forward by arbitrary ilc, injection will take care of
4080 		 * nullification if necessary.
4081 		 */
4082 		pgm_info = vcpu->arch.pgm;
4083 		ilen = 4;
4084 	}
4085 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4086 	kvm_s390_forward_psw(vcpu, ilen);
4087 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4088 }
4089 
vcpu_post_run(struct kvm_vcpu * vcpu,int exit_reason)4090 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4091 {
4092 	struct mcck_volatile_info *mcck_info;
4093 	struct sie_page *sie_page;
4094 
4095 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4096 		   vcpu->arch.sie_block->icptcode);
4097 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4098 
4099 	if (guestdbg_enabled(vcpu))
4100 		kvm_s390_restore_guest_per_regs(vcpu);
4101 
4102 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4103 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4104 
4105 	if (exit_reason == -EINTR) {
4106 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
4107 		sie_page = container_of(vcpu->arch.sie_block,
4108 					struct sie_page, sie_block);
4109 		mcck_info = &sie_page->mcck_info;
4110 		kvm_s390_reinject_machine_check(vcpu, mcck_info);
4111 		return 0;
4112 	}
4113 
4114 	if (vcpu->arch.sie_block->icptcode > 0) {
4115 		int rc = kvm_handle_sie_intercept(vcpu);
4116 
4117 		if (rc != -EOPNOTSUPP)
4118 			return rc;
4119 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4120 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4121 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4122 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4123 		return -EREMOTE;
4124 	} else if (exit_reason != -EFAULT) {
4125 		vcpu->stat.exit_null++;
4126 		return 0;
4127 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
4128 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4129 		vcpu->run->s390_ucontrol.trans_exc_code =
4130 						current->thread.gmap_addr;
4131 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
4132 		return -EREMOTE;
4133 	} else if (current->thread.gmap_pfault) {
4134 		trace_kvm_s390_major_guest_pfault(vcpu);
4135 		current->thread.gmap_pfault = 0;
4136 		if (kvm_arch_setup_async_pf(vcpu))
4137 			return 0;
4138 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4139 	}
4140 	return vcpu_post_run_fault_in_sie(vcpu);
4141 }
4142 
4143 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
__vcpu_run(struct kvm_vcpu * vcpu)4144 static int __vcpu_run(struct kvm_vcpu *vcpu)
4145 {
4146 	int rc, exit_reason;
4147 	struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4148 
4149 	/*
4150 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4151 	 * ning the guest), so that memslots (and other stuff) are protected
4152 	 */
4153 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4154 
4155 	do {
4156 		rc = vcpu_pre_run(vcpu);
4157 		if (rc)
4158 			break;
4159 
4160 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4161 		/*
4162 		 * As PF_VCPU will be used in fault handler, between
4163 		 * guest_enter and guest_exit should be no uaccess.
4164 		 */
4165 		local_irq_disable();
4166 		guest_enter_irqoff();
4167 		__disable_cpu_timer_accounting(vcpu);
4168 		local_irq_enable();
4169 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4170 			memcpy(sie_page->pv_grregs,
4171 			       vcpu->run->s.regs.gprs,
4172 			       sizeof(sie_page->pv_grregs));
4173 		}
4174 		exit_reason = sie64a(vcpu->arch.sie_block,
4175 				     vcpu->run->s.regs.gprs);
4176 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4177 			memcpy(vcpu->run->s.regs.gprs,
4178 			       sie_page->pv_grregs,
4179 			       sizeof(sie_page->pv_grregs));
4180 			/*
4181 			 * We're not allowed to inject interrupts on intercepts
4182 			 * that leave the guest state in an "in-between" state
4183 			 * where the next SIE entry will do a continuation.
4184 			 * Fence interrupts in our "internal" PSW.
4185 			 */
4186 			if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4187 			    vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4188 				vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4189 			}
4190 		}
4191 		local_irq_disable();
4192 		__enable_cpu_timer_accounting(vcpu);
4193 		guest_exit_irqoff();
4194 		local_irq_enable();
4195 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4196 
4197 		rc = vcpu_post_run(vcpu, exit_reason);
4198 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4199 
4200 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4201 	return rc;
4202 }
4203 
sync_regs_fmt2(struct kvm_vcpu * vcpu)4204 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4205 {
4206 	struct kvm_run *kvm_run = vcpu->run;
4207 	struct runtime_instr_cb *riccb;
4208 	struct gs_cb *gscb;
4209 
4210 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4211 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4212 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4213 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4214 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4215 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4216 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4217 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4218 	}
4219 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4220 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4221 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4222 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4223 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4224 			kvm_clear_async_pf_completion_queue(vcpu);
4225 	}
4226 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4227 		vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4228 		vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4229 	}
4230 	/*
4231 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
4232 	 * we should enable RI here instead of doing the lazy enablement.
4233 	 */
4234 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4235 	    test_kvm_facility(vcpu->kvm, 64) &&
4236 	    riccb->v &&
4237 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4238 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4239 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4240 	}
4241 	/*
4242 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
4243 	 * we should enable GS here instead of doing the lazy enablement.
4244 	 */
4245 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4246 	    test_kvm_facility(vcpu->kvm, 133) &&
4247 	    gscb->gssm &&
4248 	    !vcpu->arch.gs_enabled) {
4249 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4250 		vcpu->arch.sie_block->ecb |= ECB_GS;
4251 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4252 		vcpu->arch.gs_enabled = 1;
4253 	}
4254 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4255 	    test_kvm_facility(vcpu->kvm, 82)) {
4256 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4257 		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4258 	}
4259 	if (MACHINE_HAS_GS) {
4260 		preempt_disable();
4261 		__ctl_set_bit(2, 4);
4262 		if (current->thread.gs_cb) {
4263 			vcpu->arch.host_gscb = current->thread.gs_cb;
4264 			save_gs_cb(vcpu->arch.host_gscb);
4265 		}
4266 		if (vcpu->arch.gs_enabled) {
4267 			current->thread.gs_cb = (struct gs_cb *)
4268 						&vcpu->run->s.regs.gscb;
4269 			restore_gs_cb(current->thread.gs_cb);
4270 		}
4271 		preempt_enable();
4272 	}
4273 	/* SIE will load etoken directly from SDNX and therefore kvm_run */
4274 }
4275 
sync_regs(struct kvm_vcpu * vcpu)4276 static void sync_regs(struct kvm_vcpu *vcpu)
4277 {
4278 	struct kvm_run *kvm_run = vcpu->run;
4279 
4280 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4281 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4282 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4283 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4284 		/* some control register changes require a tlb flush */
4285 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4286 	}
4287 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4288 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4289 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4290 	}
4291 	save_access_regs(vcpu->arch.host_acrs);
4292 	restore_access_regs(vcpu->run->s.regs.acrs);
4293 	/* save host (userspace) fprs/vrs */
4294 	save_fpu_regs();
4295 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4296 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4297 	if (MACHINE_HAS_VX)
4298 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4299 	else
4300 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4301 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4302 	if (test_fp_ctl(current->thread.fpu.fpc))
4303 		/* User space provided an invalid FPC, let's clear it */
4304 		current->thread.fpu.fpc = 0;
4305 
4306 	/* Sync fmt2 only data */
4307 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4308 		sync_regs_fmt2(vcpu);
4309 	} else {
4310 		/*
4311 		 * In several places we have to modify our internal view to
4312 		 * not do things that are disallowed by the ultravisor. For
4313 		 * example we must not inject interrupts after specific exits
4314 		 * (e.g. 112 prefix page not secure). We do this by turning
4315 		 * off the machine check, external and I/O interrupt bits
4316 		 * of our PSW copy. To avoid getting validity intercepts, we
4317 		 * do only accept the condition code from userspace.
4318 		 */
4319 		vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4320 		vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4321 						   PSW_MASK_CC;
4322 	}
4323 
4324 	kvm_run->kvm_dirty_regs = 0;
4325 }
4326 
store_regs_fmt2(struct kvm_vcpu * vcpu)4327 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4328 {
4329 	struct kvm_run *kvm_run = vcpu->run;
4330 
4331 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4332 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4333 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4334 	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4335 	kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4336 	if (MACHINE_HAS_GS) {
4337 		preempt_disable();
4338 		__ctl_set_bit(2, 4);
4339 		if (vcpu->arch.gs_enabled)
4340 			save_gs_cb(current->thread.gs_cb);
4341 		current->thread.gs_cb = vcpu->arch.host_gscb;
4342 		restore_gs_cb(vcpu->arch.host_gscb);
4343 		if (!vcpu->arch.host_gscb)
4344 			__ctl_clear_bit(2, 4);
4345 		vcpu->arch.host_gscb = NULL;
4346 		preempt_enable();
4347 	}
4348 	/* SIE will save etoken directly into SDNX and therefore kvm_run */
4349 }
4350 
store_regs(struct kvm_vcpu * vcpu)4351 static void store_regs(struct kvm_vcpu *vcpu)
4352 {
4353 	struct kvm_run *kvm_run = vcpu->run;
4354 
4355 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4356 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4357 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4358 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4359 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4360 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4361 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4362 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4363 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4364 	save_access_regs(vcpu->run->s.regs.acrs);
4365 	restore_access_regs(vcpu->arch.host_acrs);
4366 	/* Save guest register state */
4367 	save_fpu_regs();
4368 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4369 	/* Restore will be done lazily at return */
4370 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4371 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4372 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4373 		store_regs_fmt2(vcpu);
4374 }
4375 
kvm_arch_vcpu_ioctl_run(struct kvm_vcpu * vcpu)4376 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4377 {
4378 	struct kvm_run *kvm_run = vcpu->run;
4379 	int rc;
4380 
4381 	if (kvm_run->immediate_exit)
4382 		return -EINTR;
4383 
4384 	if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4385 	    kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4386 		return -EINVAL;
4387 
4388 	vcpu_load(vcpu);
4389 
4390 	if (guestdbg_exit_pending(vcpu)) {
4391 		kvm_s390_prepare_debug_exit(vcpu);
4392 		rc = 0;
4393 		goto out;
4394 	}
4395 
4396 	kvm_sigset_activate(vcpu);
4397 
4398 	/*
4399 	 * no need to check the return value of vcpu_start as it can only have
4400 	 * an error for protvirt, but protvirt means user cpu state
4401 	 */
4402 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4403 		kvm_s390_vcpu_start(vcpu);
4404 	} else if (is_vcpu_stopped(vcpu)) {
4405 		pr_err_ratelimited("can't run stopped vcpu %d\n",
4406 				   vcpu->vcpu_id);
4407 		rc = -EINVAL;
4408 		goto out;
4409 	}
4410 
4411 	sync_regs(vcpu);
4412 	enable_cpu_timer_accounting(vcpu);
4413 
4414 	might_fault();
4415 	rc = __vcpu_run(vcpu);
4416 
4417 	if (signal_pending(current) && !rc) {
4418 		kvm_run->exit_reason = KVM_EXIT_INTR;
4419 		rc = -EINTR;
4420 	}
4421 
4422 	if (guestdbg_exit_pending(vcpu) && !rc)  {
4423 		kvm_s390_prepare_debug_exit(vcpu);
4424 		rc = 0;
4425 	}
4426 
4427 	if (rc == -EREMOTE) {
4428 		/* userspace support is needed, kvm_run has been prepared */
4429 		rc = 0;
4430 	}
4431 
4432 	disable_cpu_timer_accounting(vcpu);
4433 	store_regs(vcpu);
4434 
4435 	kvm_sigset_deactivate(vcpu);
4436 
4437 	vcpu->stat.exit_userspace++;
4438 out:
4439 	vcpu_put(vcpu);
4440 	return rc;
4441 }
4442 
4443 /*
4444  * store status at address
4445  * we use have two special cases:
4446  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4447  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4448  */
kvm_s390_store_status_unloaded(struct kvm_vcpu * vcpu,unsigned long gpa)4449 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4450 {
4451 	unsigned char archmode = 1;
4452 	freg_t fprs[NUM_FPRS];
4453 	unsigned int px;
4454 	u64 clkcomp, cputm;
4455 	int rc;
4456 
4457 	px = kvm_s390_get_prefix(vcpu);
4458 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4459 		if (write_guest_abs(vcpu, 163, &archmode, 1))
4460 			return -EFAULT;
4461 		gpa = 0;
4462 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4463 		if (write_guest_real(vcpu, 163, &archmode, 1))
4464 			return -EFAULT;
4465 		gpa = px;
4466 	} else
4467 		gpa -= __LC_FPREGS_SAVE_AREA;
4468 
4469 	/* manually convert vector registers if necessary */
4470 	if (MACHINE_HAS_VX) {
4471 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4472 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4473 				     fprs, 128);
4474 	} else {
4475 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4476 				     vcpu->run->s.regs.fprs, 128);
4477 	}
4478 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4479 			      vcpu->run->s.regs.gprs, 128);
4480 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4481 			      &vcpu->arch.sie_block->gpsw, 16);
4482 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4483 			      &px, 4);
4484 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4485 			      &vcpu->run->s.regs.fpc, 4);
4486 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4487 			      &vcpu->arch.sie_block->todpr, 4);
4488 	cputm = kvm_s390_get_cpu_timer(vcpu);
4489 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4490 			      &cputm, 8);
4491 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
4492 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4493 			      &clkcomp, 8);
4494 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4495 			      &vcpu->run->s.regs.acrs, 64);
4496 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4497 			      &vcpu->arch.sie_block->gcr, 128);
4498 	return rc ? -EFAULT : 0;
4499 }
4500 
kvm_s390_vcpu_store_status(struct kvm_vcpu * vcpu,unsigned long addr)4501 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4502 {
4503 	/*
4504 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4505 	 * switch in the run ioctl. Let's update our copies before we save
4506 	 * it into the save area
4507 	 */
4508 	save_fpu_regs();
4509 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4510 	save_access_regs(vcpu->run->s.regs.acrs);
4511 
4512 	return kvm_s390_store_status_unloaded(vcpu, addr);
4513 }
4514 
__disable_ibs_on_vcpu(struct kvm_vcpu * vcpu)4515 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4516 {
4517 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4518 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4519 }
4520 
__disable_ibs_on_all_vcpus(struct kvm * kvm)4521 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4522 {
4523 	unsigned int i;
4524 	struct kvm_vcpu *vcpu;
4525 
4526 	kvm_for_each_vcpu(i, vcpu, kvm) {
4527 		__disable_ibs_on_vcpu(vcpu);
4528 	}
4529 }
4530 
__enable_ibs_on_vcpu(struct kvm_vcpu * vcpu)4531 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4532 {
4533 	if (!sclp.has_ibs)
4534 		return;
4535 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4536 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4537 }
4538 
kvm_s390_vcpu_start(struct kvm_vcpu * vcpu)4539 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4540 {
4541 	int i, online_vcpus, r = 0, started_vcpus = 0;
4542 
4543 	if (!is_vcpu_stopped(vcpu))
4544 		return 0;
4545 
4546 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4547 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4548 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4549 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4550 
4551 	/* Let's tell the UV that we want to change into the operating state */
4552 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4553 		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4554 		if (r) {
4555 			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4556 			return r;
4557 		}
4558 	}
4559 
4560 	for (i = 0; i < online_vcpus; i++) {
4561 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4562 			started_vcpus++;
4563 	}
4564 
4565 	if (started_vcpus == 0) {
4566 		/* we're the only active VCPU -> speed it up */
4567 		__enable_ibs_on_vcpu(vcpu);
4568 	} else if (started_vcpus == 1) {
4569 		/*
4570 		 * As we are starting a second VCPU, we have to disable
4571 		 * the IBS facility on all VCPUs to remove potentially
4572 		 * oustanding ENABLE requests.
4573 		 */
4574 		__disable_ibs_on_all_vcpus(vcpu->kvm);
4575 	}
4576 
4577 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4578 	/*
4579 	 * The real PSW might have changed due to a RESTART interpreted by the
4580 	 * ultravisor. We block all interrupts and let the next sie exit
4581 	 * refresh our view.
4582 	 */
4583 	if (kvm_s390_pv_cpu_is_protected(vcpu))
4584 		vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4585 	/*
4586 	 * Another VCPU might have used IBS while we were offline.
4587 	 * Let's play safe and flush the VCPU at startup.
4588 	 */
4589 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4590 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4591 	return 0;
4592 }
4593 
kvm_s390_vcpu_stop(struct kvm_vcpu * vcpu)4594 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4595 {
4596 	int i, online_vcpus, r = 0, started_vcpus = 0;
4597 	struct kvm_vcpu *started_vcpu = NULL;
4598 
4599 	if (is_vcpu_stopped(vcpu))
4600 		return 0;
4601 
4602 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4603 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4604 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4605 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4606 
4607 	/* Let's tell the UV that we want to change into the stopped state */
4608 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4609 		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4610 		if (r) {
4611 			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4612 			return r;
4613 		}
4614 	}
4615 
4616 	/*
4617 	 * Set the VCPU to STOPPED and THEN clear the interrupt flag,
4618 	 * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
4619 	 * have been fully processed. This will ensure that the VCPU
4620 	 * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
4621 	 */
4622 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4623 	kvm_s390_clear_stop_irq(vcpu);
4624 
4625 	__disable_ibs_on_vcpu(vcpu);
4626 
4627 	for (i = 0; i < online_vcpus; i++) {
4628 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4629 			started_vcpus++;
4630 			started_vcpu = vcpu->kvm->vcpus[i];
4631 		}
4632 	}
4633 
4634 	if (started_vcpus == 1) {
4635 		/*
4636 		 * As we only have one VCPU left, we want to enable the
4637 		 * IBS facility for that VCPU to speed it up.
4638 		 */
4639 		__enable_ibs_on_vcpu(started_vcpu);
4640 	}
4641 
4642 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4643 	return 0;
4644 }
4645 
kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu * vcpu,struct kvm_enable_cap * cap)4646 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4647 				     struct kvm_enable_cap *cap)
4648 {
4649 	int r;
4650 
4651 	if (cap->flags)
4652 		return -EINVAL;
4653 
4654 	switch (cap->cap) {
4655 	case KVM_CAP_S390_CSS_SUPPORT:
4656 		if (!vcpu->kvm->arch.css_support) {
4657 			vcpu->kvm->arch.css_support = 1;
4658 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4659 			trace_kvm_s390_enable_css(vcpu->kvm);
4660 		}
4661 		r = 0;
4662 		break;
4663 	default:
4664 		r = -EINVAL;
4665 		break;
4666 	}
4667 	return r;
4668 }
4669 
kvm_s390_guest_sida_op(struct kvm_vcpu * vcpu,struct kvm_s390_mem_op * mop)4670 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4671 				   struct kvm_s390_mem_op *mop)
4672 {
4673 	void __user *uaddr = (void __user *)mop->buf;
4674 	int r = 0;
4675 
4676 	if (mop->flags || !mop->size)
4677 		return -EINVAL;
4678 	if (mop->size + mop->sida_offset < mop->size)
4679 		return -EINVAL;
4680 	if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4681 		return -E2BIG;
4682 	if (!kvm_s390_pv_cpu_is_protected(vcpu))
4683 		return -EINVAL;
4684 
4685 	switch (mop->op) {
4686 	case KVM_S390_MEMOP_SIDA_READ:
4687 		if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4688 				 mop->sida_offset), mop->size))
4689 			r = -EFAULT;
4690 
4691 		break;
4692 	case KVM_S390_MEMOP_SIDA_WRITE:
4693 		if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4694 				   mop->sida_offset), uaddr, mop->size))
4695 			r = -EFAULT;
4696 		break;
4697 	}
4698 	return r;
4699 }
kvm_s390_guest_mem_op(struct kvm_vcpu * vcpu,struct kvm_s390_mem_op * mop)4700 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4701 				  struct kvm_s390_mem_op *mop)
4702 {
4703 	void __user *uaddr = (void __user *)mop->buf;
4704 	void *tmpbuf = NULL;
4705 	int r = 0;
4706 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4707 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
4708 
4709 	if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4710 		return -EINVAL;
4711 
4712 	if (mop->size > MEM_OP_MAX_SIZE)
4713 		return -E2BIG;
4714 
4715 	if (kvm_s390_pv_cpu_is_protected(vcpu))
4716 		return -EINVAL;
4717 
4718 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4719 		tmpbuf = vmalloc(mop->size);
4720 		if (!tmpbuf)
4721 			return -ENOMEM;
4722 	}
4723 
4724 	switch (mop->op) {
4725 	case KVM_S390_MEMOP_LOGICAL_READ:
4726 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4727 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4728 					    mop->size, GACC_FETCH);
4729 			break;
4730 		}
4731 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4732 		if (r == 0) {
4733 			if (copy_to_user(uaddr, tmpbuf, mop->size))
4734 				r = -EFAULT;
4735 		}
4736 		break;
4737 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4738 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4739 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4740 					    mop->size, GACC_STORE);
4741 			break;
4742 		}
4743 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4744 			r = -EFAULT;
4745 			break;
4746 		}
4747 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4748 		break;
4749 	}
4750 
4751 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4752 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4753 
4754 	vfree(tmpbuf);
4755 	return r;
4756 }
4757 
kvm_s390_guest_memsida_op(struct kvm_vcpu * vcpu,struct kvm_s390_mem_op * mop)4758 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4759 				      struct kvm_s390_mem_op *mop)
4760 {
4761 	int r, srcu_idx;
4762 
4763 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4764 
4765 	switch (mop->op) {
4766 	case KVM_S390_MEMOP_LOGICAL_READ:
4767 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4768 		r = kvm_s390_guest_mem_op(vcpu, mop);
4769 		break;
4770 	case KVM_S390_MEMOP_SIDA_READ:
4771 	case KVM_S390_MEMOP_SIDA_WRITE:
4772 		/* we are locked against sida going away by the vcpu->mutex */
4773 		r = kvm_s390_guest_sida_op(vcpu, mop);
4774 		break;
4775 	default:
4776 		r = -EINVAL;
4777 	}
4778 
4779 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4780 	return r;
4781 }
4782 
kvm_arch_vcpu_async_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)4783 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4784 			       unsigned int ioctl, unsigned long arg)
4785 {
4786 	struct kvm_vcpu *vcpu = filp->private_data;
4787 	void __user *argp = (void __user *)arg;
4788 
4789 	switch (ioctl) {
4790 	case KVM_S390_IRQ: {
4791 		struct kvm_s390_irq s390irq;
4792 
4793 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4794 			return -EFAULT;
4795 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4796 	}
4797 	case KVM_S390_INTERRUPT: {
4798 		struct kvm_s390_interrupt s390int;
4799 		struct kvm_s390_irq s390irq = {};
4800 
4801 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
4802 			return -EFAULT;
4803 		if (s390int_to_s390irq(&s390int, &s390irq))
4804 			return -EINVAL;
4805 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4806 	}
4807 	}
4808 	return -ENOIOCTLCMD;
4809 }
4810 
kvm_arch_vcpu_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)4811 long kvm_arch_vcpu_ioctl(struct file *filp,
4812 			 unsigned int ioctl, unsigned long arg)
4813 {
4814 	struct kvm_vcpu *vcpu = filp->private_data;
4815 	void __user *argp = (void __user *)arg;
4816 	int idx;
4817 	long r;
4818 	u16 rc, rrc;
4819 
4820 	vcpu_load(vcpu);
4821 
4822 	switch (ioctl) {
4823 	case KVM_S390_STORE_STATUS:
4824 		idx = srcu_read_lock(&vcpu->kvm->srcu);
4825 		r = kvm_s390_store_status_unloaded(vcpu, arg);
4826 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
4827 		break;
4828 	case KVM_S390_SET_INITIAL_PSW: {
4829 		psw_t psw;
4830 
4831 		r = -EFAULT;
4832 		if (copy_from_user(&psw, argp, sizeof(psw)))
4833 			break;
4834 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4835 		break;
4836 	}
4837 	case KVM_S390_CLEAR_RESET:
4838 		r = 0;
4839 		kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4840 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4841 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4842 					  UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4843 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4844 				   rc, rrc);
4845 		}
4846 		break;
4847 	case KVM_S390_INITIAL_RESET:
4848 		r = 0;
4849 		kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4850 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4851 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4852 					  UVC_CMD_CPU_RESET_INITIAL,
4853 					  &rc, &rrc);
4854 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4855 				   rc, rrc);
4856 		}
4857 		break;
4858 	case KVM_S390_NORMAL_RESET:
4859 		r = 0;
4860 		kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4861 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4862 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4863 					  UVC_CMD_CPU_RESET, &rc, &rrc);
4864 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4865 				   rc, rrc);
4866 		}
4867 		break;
4868 	case KVM_SET_ONE_REG:
4869 	case KVM_GET_ONE_REG: {
4870 		struct kvm_one_reg reg;
4871 		r = -EINVAL;
4872 		if (kvm_s390_pv_cpu_is_protected(vcpu))
4873 			break;
4874 		r = -EFAULT;
4875 		if (copy_from_user(&reg, argp, sizeof(reg)))
4876 			break;
4877 		if (ioctl == KVM_SET_ONE_REG)
4878 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4879 		else
4880 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4881 		break;
4882 	}
4883 #ifdef CONFIG_KVM_S390_UCONTROL
4884 	case KVM_S390_UCAS_MAP: {
4885 		struct kvm_s390_ucas_mapping ucasmap;
4886 
4887 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4888 			r = -EFAULT;
4889 			break;
4890 		}
4891 
4892 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4893 			r = -EINVAL;
4894 			break;
4895 		}
4896 
4897 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4898 				     ucasmap.vcpu_addr, ucasmap.length);
4899 		break;
4900 	}
4901 	case KVM_S390_UCAS_UNMAP: {
4902 		struct kvm_s390_ucas_mapping ucasmap;
4903 
4904 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4905 			r = -EFAULT;
4906 			break;
4907 		}
4908 
4909 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4910 			r = -EINVAL;
4911 			break;
4912 		}
4913 
4914 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4915 			ucasmap.length);
4916 		break;
4917 	}
4918 #endif
4919 	case KVM_S390_VCPU_FAULT: {
4920 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
4921 		break;
4922 	}
4923 	case KVM_ENABLE_CAP:
4924 	{
4925 		struct kvm_enable_cap cap;
4926 		r = -EFAULT;
4927 		if (copy_from_user(&cap, argp, sizeof(cap)))
4928 			break;
4929 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4930 		break;
4931 	}
4932 	case KVM_S390_MEM_OP: {
4933 		struct kvm_s390_mem_op mem_op;
4934 
4935 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4936 			r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4937 		else
4938 			r = -EFAULT;
4939 		break;
4940 	}
4941 	case KVM_S390_SET_IRQ_STATE: {
4942 		struct kvm_s390_irq_state irq_state;
4943 
4944 		r = -EFAULT;
4945 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4946 			break;
4947 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4948 		    irq_state.len == 0 ||
4949 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4950 			r = -EINVAL;
4951 			break;
4952 		}
4953 		/* do not use irq_state.flags, it will break old QEMUs */
4954 		r = kvm_s390_set_irq_state(vcpu,
4955 					   (void __user *) irq_state.buf,
4956 					   irq_state.len);
4957 		break;
4958 	}
4959 	case KVM_S390_GET_IRQ_STATE: {
4960 		struct kvm_s390_irq_state irq_state;
4961 
4962 		r = -EFAULT;
4963 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4964 			break;
4965 		if (irq_state.len == 0) {
4966 			r = -EINVAL;
4967 			break;
4968 		}
4969 		/* do not use irq_state.flags, it will break old QEMUs */
4970 		r = kvm_s390_get_irq_state(vcpu,
4971 					   (__u8 __user *)  irq_state.buf,
4972 					   irq_state.len);
4973 		break;
4974 	}
4975 	default:
4976 		r = -ENOTTY;
4977 	}
4978 
4979 	vcpu_put(vcpu);
4980 	return r;
4981 }
4982 
kvm_arch_vcpu_fault(struct kvm_vcpu * vcpu,struct vm_fault * vmf)4983 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4984 {
4985 #ifdef CONFIG_KVM_S390_UCONTROL
4986 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4987 		 && (kvm_is_ucontrol(vcpu->kvm))) {
4988 		vmf->page = virt_to_page(vcpu->arch.sie_block);
4989 		get_page(vmf->page);
4990 		return 0;
4991 	}
4992 #endif
4993 	return VM_FAULT_SIGBUS;
4994 }
4995 
4996 /* Section: memory related */
kvm_arch_prepare_memory_region(struct kvm * kvm,struct kvm_memory_slot * memslot,const struct kvm_userspace_memory_region * mem,enum kvm_mr_change change)4997 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4998 				   struct kvm_memory_slot *memslot,
4999 				   const struct kvm_userspace_memory_region *mem,
5000 				   enum kvm_mr_change change)
5001 {
5002 	/* A few sanity checks. We can have memory slots which have to be
5003 	   located/ended at a segment boundary (1MB). The memory in userland is
5004 	   ok to be fragmented into various different vmas. It is okay to mmap()
5005 	   and munmap() stuff in this slot after doing this call at any time */
5006 
5007 	if (mem->userspace_addr & 0xffffful)
5008 		return -EINVAL;
5009 
5010 	if (mem->memory_size & 0xffffful)
5011 		return -EINVAL;
5012 
5013 	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
5014 		return -EINVAL;
5015 
5016 	/* When we are protected, we should not change the memory slots */
5017 	if (kvm_s390_pv_get_handle(kvm))
5018 		return -EINVAL;
5019 
5020 	if (!kvm->arch.migration_mode)
5021 		return 0;
5022 
5023 	/*
5024 	 * Turn off migration mode when:
5025 	 * - userspace creates a new memslot with dirty logging off,
5026 	 * - userspace modifies an existing memslot (MOVE or FLAGS_ONLY) and
5027 	 *   dirty logging is turned off.
5028 	 * Migration mode expects dirty page logging being enabled to store
5029 	 * its dirty bitmap.
5030 	 */
5031 	if (change != KVM_MR_DELETE &&
5032 	    !(mem->flags & KVM_MEM_LOG_DIRTY_PAGES))
5033 		WARN(kvm_s390_vm_stop_migration(kvm),
5034 		     "Failed to stop migration mode");
5035 
5036 	return 0;
5037 }
5038 
kvm_arch_commit_memory_region(struct kvm * kvm,const struct kvm_userspace_memory_region * mem,struct kvm_memory_slot * old,const struct kvm_memory_slot * new,enum kvm_mr_change change)5039 void kvm_arch_commit_memory_region(struct kvm *kvm,
5040 				const struct kvm_userspace_memory_region *mem,
5041 				struct kvm_memory_slot *old,
5042 				const struct kvm_memory_slot *new,
5043 				enum kvm_mr_change change)
5044 {
5045 	int rc = 0;
5046 
5047 	switch (change) {
5048 	case KVM_MR_DELETE:
5049 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5050 					old->npages * PAGE_SIZE);
5051 		break;
5052 	case KVM_MR_MOVE:
5053 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5054 					old->npages * PAGE_SIZE);
5055 		if (rc)
5056 			break;
5057 		fallthrough;
5058 	case KVM_MR_CREATE:
5059 		rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
5060 				      mem->guest_phys_addr, mem->memory_size);
5061 		break;
5062 	case KVM_MR_FLAGS_ONLY:
5063 		break;
5064 	default:
5065 		WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5066 	}
5067 	if (rc)
5068 		pr_warn("failed to commit memory region\n");
5069 	return;
5070 }
5071 
nonhyp_mask(int i)5072 static inline unsigned long nonhyp_mask(int i)
5073 {
5074 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5075 
5076 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5077 }
5078 
kvm_arch_vcpu_block_finish(struct kvm_vcpu * vcpu)5079 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
5080 {
5081 	vcpu->valid_wakeup = false;
5082 }
5083 
kvm_s390_init(void)5084 static int __init kvm_s390_init(void)
5085 {
5086 	int i;
5087 
5088 	if (!sclp.has_sief2) {
5089 		pr_info("SIE is not available\n");
5090 		return -ENODEV;
5091 	}
5092 
5093 	if (nested && hpage) {
5094 		pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5095 		return -EINVAL;
5096 	}
5097 
5098 	for (i = 0; i < 16; i++)
5099 		kvm_s390_fac_base[i] |=
5100 			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
5101 
5102 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5103 }
5104 
kvm_s390_exit(void)5105 static void __exit kvm_s390_exit(void)
5106 {
5107 	kvm_exit();
5108 }
5109 
5110 module_init(kvm_s390_init);
5111 module_exit(kvm_s390_exit);
5112 
5113 /*
5114  * Enable autoloading of the kvm module.
5115  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5116  * since x86 takes a different approach.
5117  */
5118 #include <linux/miscdevice.h>
5119 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5120 MODULE_ALIAS("devname:kvm");
5121