• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2020
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13 
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
16 
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 #include <linux/pgtable.h>
35 
36 #include <asm/asm-offsets.h>
37 #include <asm/lowcore.h>
38 #include <asm/stp.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include <asm/ap.h>
47 #include <asm/uv.h>
48 #include "kvm-s390.h"
49 #include "gaccess.h"
50 
51 #define CREATE_TRACE_POINTS
52 #include "trace.h"
53 #include "trace-s390.h"
54 
55 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
56 #define LOCAL_IRQS 32
57 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
58 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
59 
60 struct kvm_stats_debugfs_item debugfs_entries[] = {
61 	VCPU_STAT("userspace_handled", exit_userspace),
62 	VCPU_STAT("exit_null", exit_null),
63 	VCPU_STAT("exit_validity", exit_validity),
64 	VCPU_STAT("exit_stop_request", exit_stop_request),
65 	VCPU_STAT("exit_external_request", exit_external_request),
66 	VCPU_STAT("exit_io_request", exit_io_request),
67 	VCPU_STAT("exit_external_interrupt", exit_external_interrupt),
68 	VCPU_STAT("exit_instruction", exit_instruction),
69 	VCPU_STAT("exit_pei", exit_pei),
70 	VCPU_STAT("exit_program_interruption", exit_program_interruption),
71 	VCPU_STAT("exit_instr_and_program_int", exit_instr_and_program),
72 	VCPU_STAT("exit_operation_exception", exit_operation_exception),
73 	VCPU_STAT("halt_successful_poll", halt_successful_poll),
74 	VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
75 	VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
76 	VCPU_STAT("halt_no_poll_steal", halt_no_poll_steal),
77 	VCPU_STAT("halt_wakeup", halt_wakeup),
78 	VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
79 	VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
80 	VCPU_STAT("instruction_lctlg", instruction_lctlg),
81 	VCPU_STAT("instruction_lctl", instruction_lctl),
82 	VCPU_STAT("instruction_stctl", instruction_stctl),
83 	VCPU_STAT("instruction_stctg", instruction_stctg),
84 	VCPU_STAT("deliver_ckc", deliver_ckc),
85 	VCPU_STAT("deliver_cputm", deliver_cputm),
86 	VCPU_STAT("deliver_emergency_signal", deliver_emergency_signal),
87 	VCPU_STAT("deliver_external_call", deliver_external_call),
88 	VCPU_STAT("deliver_service_signal", deliver_service_signal),
89 	VCPU_STAT("deliver_virtio", deliver_virtio),
90 	VCPU_STAT("deliver_stop_signal", deliver_stop_signal),
91 	VCPU_STAT("deliver_prefix_signal", deliver_prefix_signal),
92 	VCPU_STAT("deliver_restart_signal", deliver_restart_signal),
93 	VCPU_STAT("deliver_program", deliver_program),
94 	VCPU_STAT("deliver_io", deliver_io),
95 	VCPU_STAT("deliver_machine_check", deliver_machine_check),
96 	VCPU_STAT("exit_wait_state", exit_wait_state),
97 	VCPU_STAT("inject_ckc", inject_ckc),
98 	VCPU_STAT("inject_cputm", inject_cputm),
99 	VCPU_STAT("inject_external_call", inject_external_call),
100 	VM_STAT("inject_float_mchk", inject_float_mchk),
101 	VCPU_STAT("inject_emergency_signal", inject_emergency_signal),
102 	VM_STAT("inject_io", inject_io),
103 	VCPU_STAT("inject_mchk", inject_mchk),
104 	VM_STAT("inject_pfault_done", inject_pfault_done),
105 	VCPU_STAT("inject_program", inject_program),
106 	VCPU_STAT("inject_restart", inject_restart),
107 	VM_STAT("inject_service_signal", inject_service_signal),
108 	VCPU_STAT("inject_set_prefix", inject_set_prefix),
109 	VCPU_STAT("inject_stop_signal", inject_stop_signal),
110 	VCPU_STAT("inject_pfault_init", inject_pfault_init),
111 	VM_STAT("inject_virtio", inject_virtio),
112 	VCPU_STAT("instruction_epsw", instruction_epsw),
113 	VCPU_STAT("instruction_gs", instruction_gs),
114 	VCPU_STAT("instruction_io_other", instruction_io_other),
115 	VCPU_STAT("instruction_lpsw", instruction_lpsw),
116 	VCPU_STAT("instruction_lpswe", instruction_lpswe),
117 	VCPU_STAT("instruction_pfmf", instruction_pfmf),
118 	VCPU_STAT("instruction_ptff", instruction_ptff),
119 	VCPU_STAT("instruction_stidp", instruction_stidp),
120 	VCPU_STAT("instruction_sck", instruction_sck),
121 	VCPU_STAT("instruction_sckpf", instruction_sckpf),
122 	VCPU_STAT("instruction_spx", instruction_spx),
123 	VCPU_STAT("instruction_stpx", instruction_stpx),
124 	VCPU_STAT("instruction_stap", instruction_stap),
125 	VCPU_STAT("instruction_iske", instruction_iske),
126 	VCPU_STAT("instruction_ri", instruction_ri),
127 	VCPU_STAT("instruction_rrbe", instruction_rrbe),
128 	VCPU_STAT("instruction_sske", instruction_sske),
129 	VCPU_STAT("instruction_ipte_interlock", instruction_ipte_interlock),
130 	VCPU_STAT("instruction_essa", instruction_essa),
131 	VCPU_STAT("instruction_stsi", instruction_stsi),
132 	VCPU_STAT("instruction_stfl", instruction_stfl),
133 	VCPU_STAT("instruction_tb", instruction_tb),
134 	VCPU_STAT("instruction_tpi", instruction_tpi),
135 	VCPU_STAT("instruction_tprot", instruction_tprot),
136 	VCPU_STAT("instruction_tsch", instruction_tsch),
137 	VCPU_STAT("instruction_sthyi", instruction_sthyi),
138 	VCPU_STAT("instruction_sie", instruction_sie),
139 	VCPU_STAT("instruction_sigp_sense", instruction_sigp_sense),
140 	VCPU_STAT("instruction_sigp_sense_running", instruction_sigp_sense_running),
141 	VCPU_STAT("instruction_sigp_external_call", instruction_sigp_external_call),
142 	VCPU_STAT("instruction_sigp_emergency", instruction_sigp_emergency),
143 	VCPU_STAT("instruction_sigp_cond_emergency", instruction_sigp_cond_emergency),
144 	VCPU_STAT("instruction_sigp_start", instruction_sigp_start),
145 	VCPU_STAT("instruction_sigp_stop", instruction_sigp_stop),
146 	VCPU_STAT("instruction_sigp_stop_store_status", instruction_sigp_stop_store_status),
147 	VCPU_STAT("instruction_sigp_store_status", instruction_sigp_store_status),
148 	VCPU_STAT("instruction_sigp_store_adtl_status", instruction_sigp_store_adtl_status),
149 	VCPU_STAT("instruction_sigp_set_arch", instruction_sigp_arch),
150 	VCPU_STAT("instruction_sigp_set_prefix", instruction_sigp_prefix),
151 	VCPU_STAT("instruction_sigp_restart", instruction_sigp_restart),
152 	VCPU_STAT("instruction_sigp_cpu_reset", instruction_sigp_cpu_reset),
153 	VCPU_STAT("instruction_sigp_init_cpu_reset", instruction_sigp_init_cpu_reset),
154 	VCPU_STAT("instruction_sigp_unknown", instruction_sigp_unknown),
155 	VCPU_STAT("instruction_diag_10", diagnose_10),
156 	VCPU_STAT("instruction_diag_44", diagnose_44),
157 	VCPU_STAT("instruction_diag_9c", diagnose_9c),
158 	VCPU_STAT("diag_9c_ignored", diagnose_9c_ignored),
159 	VCPU_STAT("instruction_diag_258", diagnose_258),
160 	VCPU_STAT("instruction_diag_308", diagnose_308),
161 	VCPU_STAT("instruction_diag_500", diagnose_500),
162 	VCPU_STAT("instruction_diag_other", diagnose_other),
163 	{ NULL }
164 };
165 
166 struct kvm_s390_tod_clock_ext {
167 	__u8 epoch_idx;
168 	__u64 tod;
169 	__u8 reserved[7];
170 } __packed;
171 
172 /* allow nested virtualization in KVM (if enabled by user space) */
173 static int nested;
174 module_param(nested, int, S_IRUGO);
175 MODULE_PARM_DESC(nested, "Nested virtualization support");
176 
177 /* allow 1m huge page guest backing, if !nested */
178 static int hpage;
179 module_param(hpage, int, 0444);
180 MODULE_PARM_DESC(hpage, "1m huge page backing support");
181 
182 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
183 static u8 halt_poll_max_steal = 10;
184 module_param(halt_poll_max_steal, byte, 0644);
185 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
186 
187 /* if set to true, the GISA will be initialized and used if available */
188 static bool use_gisa  = true;
189 module_param(use_gisa, bool, 0644);
190 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
191 
192 /*
193  * For now we handle at most 16 double words as this is what the s390 base
194  * kernel handles and stores in the prefix page. If we ever need to go beyond
195  * this, this requires changes to code, but the external uapi can stay.
196  */
197 #define SIZE_INTERNAL 16
198 
199 /*
200  * Base feature mask that defines default mask for facilities. Consists of the
201  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
202  */
203 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
204 /*
205  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
206  * and defines the facilities that can be enabled via a cpu model.
207  */
208 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
209 
kvm_s390_fac_size(void)210 static unsigned long kvm_s390_fac_size(void)
211 {
212 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
213 	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
214 	BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
215 		sizeof(S390_lowcore.stfle_fac_list));
216 
217 	return SIZE_INTERNAL;
218 }
219 
220 /* available cpu features supported by kvm */
221 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
222 /* available subfunctions indicated via query / "test bit" */
223 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
224 
225 static struct gmap_notifier gmap_notifier;
226 static struct gmap_notifier vsie_gmap_notifier;
227 debug_info_t *kvm_s390_dbf;
228 debug_info_t *kvm_s390_dbf_uv;
229 
230 /* Section: not file related */
kvm_arch_hardware_enable(void)231 int kvm_arch_hardware_enable(void)
232 {
233 	/* every s390 is virtualization enabled ;-) */
234 	return 0;
235 }
236 
kvm_arch_check_processor_compat(void * opaque)237 int kvm_arch_check_processor_compat(void *opaque)
238 {
239 	return 0;
240 }
241 
242 /* forward declarations */
243 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
244 			      unsigned long end);
245 static int sca_switch_to_extended(struct kvm *kvm);
246 
kvm_clock_sync_scb(struct kvm_s390_sie_block * scb,u64 delta)247 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
248 {
249 	u8 delta_idx = 0;
250 
251 	/*
252 	 * The TOD jumps by delta, we have to compensate this by adding
253 	 * -delta to the epoch.
254 	 */
255 	delta = -delta;
256 
257 	/* sign-extension - we're adding to signed values below */
258 	if ((s64)delta < 0)
259 		delta_idx = -1;
260 
261 	scb->epoch += delta;
262 	if (scb->ecd & ECD_MEF) {
263 		scb->epdx += delta_idx;
264 		if (scb->epoch < delta)
265 			scb->epdx += 1;
266 	}
267 }
268 
269 /*
270  * This callback is executed during stop_machine(). All CPUs are therefore
271  * temporarily stopped. In order not to change guest behavior, we have to
272  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
273  * so a CPU won't be stopped while calculating with the epoch.
274  */
kvm_clock_sync(struct notifier_block * notifier,unsigned long val,void * v)275 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
276 			  void *v)
277 {
278 	struct kvm *kvm;
279 	struct kvm_vcpu *vcpu;
280 	int i;
281 	unsigned long long *delta = v;
282 
283 	list_for_each_entry(kvm, &vm_list, vm_list) {
284 		kvm_for_each_vcpu(i, vcpu, kvm) {
285 			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
286 			if (i == 0) {
287 				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
288 				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
289 			}
290 			if (vcpu->arch.cputm_enabled)
291 				vcpu->arch.cputm_start += *delta;
292 			if (vcpu->arch.vsie_block)
293 				kvm_clock_sync_scb(vcpu->arch.vsie_block,
294 						   *delta);
295 		}
296 	}
297 	return NOTIFY_OK;
298 }
299 
300 static struct notifier_block kvm_clock_notifier = {
301 	.notifier_call = kvm_clock_sync,
302 };
303 
kvm_arch_hardware_setup(void * opaque)304 int kvm_arch_hardware_setup(void *opaque)
305 {
306 	gmap_notifier.notifier_call = kvm_gmap_notifier;
307 	gmap_register_pte_notifier(&gmap_notifier);
308 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
309 	gmap_register_pte_notifier(&vsie_gmap_notifier);
310 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
311 				       &kvm_clock_notifier);
312 	return 0;
313 }
314 
kvm_arch_hardware_unsetup(void)315 void kvm_arch_hardware_unsetup(void)
316 {
317 	gmap_unregister_pte_notifier(&gmap_notifier);
318 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
319 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
320 					 &kvm_clock_notifier);
321 }
322 
allow_cpu_feat(unsigned long nr)323 static void allow_cpu_feat(unsigned long nr)
324 {
325 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
326 }
327 
plo_test_bit(unsigned char nr)328 static inline int plo_test_bit(unsigned char nr)
329 {
330 	unsigned long function = (unsigned long)nr | 0x100;
331 	int cc;
332 
333 	asm volatile(
334 		"	lgr	0,%[function]\n"
335 		/* Parameter registers are ignored for "test bit" */
336 		"	plo	0,0,0,0(0)\n"
337 		"	ipm	%0\n"
338 		"	srl	%0,28\n"
339 		: "=d" (cc)
340 		: [function] "d" (function)
341 		: "cc", "0");
342 	return cc == 0;
343 }
344 
__insn32_query(unsigned int opcode,u8 * query)345 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
346 {
347 	asm volatile(
348 		"	lghi	0,0\n"
349 		"	lgr	1,%[query]\n"
350 		/* Parameter registers are ignored */
351 		"	.insn	rrf,%[opc] << 16,2,4,6,0\n"
352 		:
353 		: [query] "d" ((unsigned long)query), [opc] "i" (opcode)
354 		: "cc", "memory", "0", "1");
355 }
356 
357 #define INSN_SORTL 0xb938
358 #define INSN_DFLTCC 0xb939
359 
kvm_s390_cpu_feat_init(void)360 static void kvm_s390_cpu_feat_init(void)
361 {
362 	int i;
363 
364 	for (i = 0; i < 256; ++i) {
365 		if (plo_test_bit(i))
366 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
367 	}
368 
369 	if (test_facility(28)) /* TOD-clock steering */
370 		ptff(kvm_s390_available_subfunc.ptff,
371 		     sizeof(kvm_s390_available_subfunc.ptff),
372 		     PTFF_QAF);
373 
374 	if (test_facility(17)) { /* MSA */
375 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
376 			      kvm_s390_available_subfunc.kmac);
377 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
378 			      kvm_s390_available_subfunc.kmc);
379 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
380 			      kvm_s390_available_subfunc.km);
381 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
382 			      kvm_s390_available_subfunc.kimd);
383 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
384 			      kvm_s390_available_subfunc.klmd);
385 	}
386 	if (test_facility(76)) /* MSA3 */
387 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
388 			      kvm_s390_available_subfunc.pckmo);
389 	if (test_facility(77)) { /* MSA4 */
390 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
391 			      kvm_s390_available_subfunc.kmctr);
392 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
393 			      kvm_s390_available_subfunc.kmf);
394 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
395 			      kvm_s390_available_subfunc.kmo);
396 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
397 			      kvm_s390_available_subfunc.pcc);
398 	}
399 	if (test_facility(57)) /* MSA5 */
400 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
401 			      kvm_s390_available_subfunc.ppno);
402 
403 	if (test_facility(146)) /* MSA8 */
404 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
405 			      kvm_s390_available_subfunc.kma);
406 
407 	if (test_facility(155)) /* MSA9 */
408 		__cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
409 			      kvm_s390_available_subfunc.kdsa);
410 
411 	if (test_facility(150)) /* SORTL */
412 		__insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
413 
414 	if (test_facility(151)) /* DFLTCC */
415 		__insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
416 
417 	if (MACHINE_HAS_ESOP)
418 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
419 	/*
420 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
421 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
422 	 */
423 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
424 	    !test_facility(3) || !nested)
425 		return;
426 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
427 	if (sclp.has_64bscao)
428 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
429 	if (sclp.has_siif)
430 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
431 	if (sclp.has_gpere)
432 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
433 	if (sclp.has_gsls)
434 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
435 	if (sclp.has_ib)
436 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
437 	if (sclp.has_cei)
438 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
439 	if (sclp.has_ibs)
440 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
441 	if (sclp.has_kss)
442 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
443 	/*
444 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
445 	 * all skey handling functions read/set the skey from the PGSTE
446 	 * instead of the real storage key.
447 	 *
448 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
449 	 * pages being detected as preserved although they are resident.
450 	 *
451 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
452 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
453 	 *
454 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
455 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
456 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
457 	 *
458 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
459 	 * cannot easily shadow the SCA because of the ipte lock.
460 	 */
461 }
462 
kvm_arch_init(void * opaque)463 int kvm_arch_init(void *opaque)
464 {
465 	int rc = -ENOMEM;
466 
467 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
468 	if (!kvm_s390_dbf)
469 		return -ENOMEM;
470 
471 	kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
472 	if (!kvm_s390_dbf_uv)
473 		goto out;
474 
475 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
476 	    debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
477 		goto out;
478 
479 	kvm_s390_cpu_feat_init();
480 
481 	/* Register floating interrupt controller interface. */
482 	rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
483 	if (rc) {
484 		pr_err("A FLIC registration call failed with rc=%d\n", rc);
485 		goto out;
486 	}
487 
488 	rc = kvm_s390_gib_init(GAL_ISC);
489 	if (rc)
490 		goto out;
491 
492 	return 0;
493 
494 out:
495 	kvm_arch_exit();
496 	return rc;
497 }
498 
kvm_arch_exit(void)499 void kvm_arch_exit(void)
500 {
501 	kvm_s390_gib_destroy();
502 	debug_unregister(kvm_s390_dbf);
503 	debug_unregister(kvm_s390_dbf_uv);
504 }
505 
506 /* Section: device related */
kvm_arch_dev_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)507 long kvm_arch_dev_ioctl(struct file *filp,
508 			unsigned int ioctl, unsigned long arg)
509 {
510 	if (ioctl == KVM_S390_ENABLE_SIE)
511 		return s390_enable_sie();
512 	return -EINVAL;
513 }
514 
kvm_vm_ioctl_check_extension(struct kvm * kvm,long ext)515 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
516 {
517 	int r;
518 
519 	switch (ext) {
520 	case KVM_CAP_S390_PSW:
521 	case KVM_CAP_S390_GMAP:
522 	case KVM_CAP_SYNC_MMU:
523 #ifdef CONFIG_KVM_S390_UCONTROL
524 	case KVM_CAP_S390_UCONTROL:
525 #endif
526 	case KVM_CAP_ASYNC_PF:
527 	case KVM_CAP_SYNC_REGS:
528 	case KVM_CAP_ONE_REG:
529 	case KVM_CAP_ENABLE_CAP:
530 	case KVM_CAP_S390_CSS_SUPPORT:
531 	case KVM_CAP_IOEVENTFD:
532 	case KVM_CAP_DEVICE_CTRL:
533 	case KVM_CAP_S390_IRQCHIP:
534 	case KVM_CAP_VM_ATTRIBUTES:
535 	case KVM_CAP_MP_STATE:
536 	case KVM_CAP_IMMEDIATE_EXIT:
537 	case KVM_CAP_S390_INJECT_IRQ:
538 	case KVM_CAP_S390_USER_SIGP:
539 	case KVM_CAP_S390_USER_STSI:
540 	case KVM_CAP_S390_SKEYS:
541 	case KVM_CAP_S390_IRQ_STATE:
542 	case KVM_CAP_S390_USER_INSTR0:
543 	case KVM_CAP_S390_CMMA_MIGRATION:
544 	case KVM_CAP_S390_AIS:
545 	case KVM_CAP_S390_AIS_MIGRATION:
546 	case KVM_CAP_S390_VCPU_RESETS:
547 	case KVM_CAP_SET_GUEST_DEBUG:
548 	case KVM_CAP_S390_DIAG318:
549 		r = 1;
550 		break;
551 	case KVM_CAP_S390_HPAGE_1M:
552 		r = 0;
553 		if (hpage && !kvm_is_ucontrol(kvm))
554 			r = 1;
555 		break;
556 	case KVM_CAP_S390_MEM_OP:
557 		r = MEM_OP_MAX_SIZE;
558 		break;
559 	case KVM_CAP_NR_VCPUS:
560 	case KVM_CAP_MAX_VCPUS:
561 	case KVM_CAP_MAX_VCPU_ID:
562 		r = KVM_S390_BSCA_CPU_SLOTS;
563 		if (!kvm_s390_use_sca_entries())
564 			r = KVM_MAX_VCPUS;
565 		else if (sclp.has_esca && sclp.has_64bscao)
566 			r = KVM_S390_ESCA_CPU_SLOTS;
567 		break;
568 	case KVM_CAP_S390_COW:
569 		r = MACHINE_HAS_ESOP;
570 		break;
571 	case KVM_CAP_S390_VECTOR_REGISTERS:
572 		r = MACHINE_HAS_VX;
573 		break;
574 	case KVM_CAP_S390_RI:
575 		r = test_facility(64);
576 		break;
577 	case KVM_CAP_S390_GS:
578 		r = test_facility(133);
579 		break;
580 	case KVM_CAP_S390_BPB:
581 		r = test_facility(82);
582 		break;
583 	case KVM_CAP_S390_PROTECTED:
584 		r = is_prot_virt_host();
585 		break;
586 	default:
587 		r = 0;
588 	}
589 	return r;
590 }
591 
kvm_arch_sync_dirty_log(struct kvm * kvm,struct kvm_memory_slot * memslot)592 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
593 {
594 	int i;
595 	gfn_t cur_gfn, last_gfn;
596 	unsigned long gaddr, vmaddr;
597 	struct gmap *gmap = kvm->arch.gmap;
598 	DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
599 
600 	/* Loop over all guest segments */
601 	cur_gfn = memslot->base_gfn;
602 	last_gfn = memslot->base_gfn + memslot->npages;
603 	for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
604 		gaddr = gfn_to_gpa(cur_gfn);
605 		vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
606 		if (kvm_is_error_hva(vmaddr))
607 			continue;
608 
609 		bitmap_zero(bitmap, _PAGE_ENTRIES);
610 		gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
611 		for (i = 0; i < _PAGE_ENTRIES; i++) {
612 			if (test_bit(i, bitmap))
613 				mark_page_dirty(kvm, cur_gfn + i);
614 		}
615 
616 		if (fatal_signal_pending(current))
617 			return;
618 		cond_resched();
619 	}
620 }
621 
622 /* Section: vm related */
623 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
624 
625 /*
626  * Get (and clear) the dirty memory log for a memory slot.
627  */
kvm_vm_ioctl_get_dirty_log(struct kvm * kvm,struct kvm_dirty_log * log)628 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
629 			       struct kvm_dirty_log *log)
630 {
631 	int r;
632 	unsigned long n;
633 	struct kvm_memory_slot *memslot;
634 	int is_dirty;
635 
636 	if (kvm_is_ucontrol(kvm))
637 		return -EINVAL;
638 
639 	mutex_lock(&kvm->slots_lock);
640 
641 	r = -EINVAL;
642 	if (log->slot >= KVM_USER_MEM_SLOTS)
643 		goto out;
644 
645 	r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
646 	if (r)
647 		goto out;
648 
649 	/* Clear the dirty log */
650 	if (is_dirty) {
651 		n = kvm_dirty_bitmap_bytes(memslot);
652 		memset(memslot->dirty_bitmap, 0, n);
653 	}
654 	r = 0;
655 out:
656 	mutex_unlock(&kvm->slots_lock);
657 	return r;
658 }
659 
icpt_operexc_on_all_vcpus(struct kvm * kvm)660 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
661 {
662 	unsigned int i;
663 	struct kvm_vcpu *vcpu;
664 
665 	kvm_for_each_vcpu(i, vcpu, kvm) {
666 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
667 	}
668 }
669 
kvm_vm_ioctl_enable_cap(struct kvm * kvm,struct kvm_enable_cap * cap)670 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
671 {
672 	int r;
673 
674 	if (cap->flags)
675 		return -EINVAL;
676 
677 	switch (cap->cap) {
678 	case KVM_CAP_S390_IRQCHIP:
679 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
680 		kvm->arch.use_irqchip = 1;
681 		r = 0;
682 		break;
683 	case KVM_CAP_S390_USER_SIGP:
684 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
685 		kvm->arch.user_sigp = 1;
686 		r = 0;
687 		break;
688 	case KVM_CAP_S390_VECTOR_REGISTERS:
689 		mutex_lock(&kvm->lock);
690 		if (kvm->created_vcpus) {
691 			r = -EBUSY;
692 		} else if (MACHINE_HAS_VX) {
693 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
694 			set_kvm_facility(kvm->arch.model.fac_list, 129);
695 			if (test_facility(134)) {
696 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
697 				set_kvm_facility(kvm->arch.model.fac_list, 134);
698 			}
699 			if (test_facility(135)) {
700 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
701 				set_kvm_facility(kvm->arch.model.fac_list, 135);
702 			}
703 			if (test_facility(148)) {
704 				set_kvm_facility(kvm->arch.model.fac_mask, 148);
705 				set_kvm_facility(kvm->arch.model.fac_list, 148);
706 			}
707 			if (test_facility(152)) {
708 				set_kvm_facility(kvm->arch.model.fac_mask, 152);
709 				set_kvm_facility(kvm->arch.model.fac_list, 152);
710 			}
711 			r = 0;
712 		} else
713 			r = -EINVAL;
714 		mutex_unlock(&kvm->lock);
715 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
716 			 r ? "(not available)" : "(success)");
717 		break;
718 	case KVM_CAP_S390_RI:
719 		r = -EINVAL;
720 		mutex_lock(&kvm->lock);
721 		if (kvm->created_vcpus) {
722 			r = -EBUSY;
723 		} else if (test_facility(64)) {
724 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
725 			set_kvm_facility(kvm->arch.model.fac_list, 64);
726 			r = 0;
727 		}
728 		mutex_unlock(&kvm->lock);
729 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
730 			 r ? "(not available)" : "(success)");
731 		break;
732 	case KVM_CAP_S390_AIS:
733 		mutex_lock(&kvm->lock);
734 		if (kvm->created_vcpus) {
735 			r = -EBUSY;
736 		} else {
737 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
738 			set_kvm_facility(kvm->arch.model.fac_list, 72);
739 			r = 0;
740 		}
741 		mutex_unlock(&kvm->lock);
742 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
743 			 r ? "(not available)" : "(success)");
744 		break;
745 	case KVM_CAP_S390_GS:
746 		r = -EINVAL;
747 		mutex_lock(&kvm->lock);
748 		if (kvm->created_vcpus) {
749 			r = -EBUSY;
750 		} else if (test_facility(133)) {
751 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
752 			set_kvm_facility(kvm->arch.model.fac_list, 133);
753 			r = 0;
754 		}
755 		mutex_unlock(&kvm->lock);
756 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
757 			 r ? "(not available)" : "(success)");
758 		break;
759 	case KVM_CAP_S390_HPAGE_1M:
760 		mutex_lock(&kvm->lock);
761 		if (kvm->created_vcpus)
762 			r = -EBUSY;
763 		else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
764 			r = -EINVAL;
765 		else {
766 			r = 0;
767 			mmap_write_lock(kvm->mm);
768 			kvm->mm->context.allow_gmap_hpage_1m = 1;
769 			mmap_write_unlock(kvm->mm);
770 			/*
771 			 * We might have to create fake 4k page
772 			 * tables. To avoid that the hardware works on
773 			 * stale PGSTEs, we emulate these instructions.
774 			 */
775 			kvm->arch.use_skf = 0;
776 			kvm->arch.use_pfmfi = 0;
777 		}
778 		mutex_unlock(&kvm->lock);
779 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
780 			 r ? "(not available)" : "(success)");
781 		break;
782 	case KVM_CAP_S390_USER_STSI:
783 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
784 		kvm->arch.user_stsi = 1;
785 		r = 0;
786 		break;
787 	case KVM_CAP_S390_USER_INSTR0:
788 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
789 		kvm->arch.user_instr0 = 1;
790 		icpt_operexc_on_all_vcpus(kvm);
791 		r = 0;
792 		break;
793 	default:
794 		r = -EINVAL;
795 		break;
796 	}
797 	return r;
798 }
799 
kvm_s390_get_mem_control(struct kvm * kvm,struct kvm_device_attr * attr)800 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
801 {
802 	int ret;
803 
804 	switch (attr->attr) {
805 	case KVM_S390_VM_MEM_LIMIT_SIZE:
806 		ret = 0;
807 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
808 			 kvm->arch.mem_limit);
809 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
810 			ret = -EFAULT;
811 		break;
812 	default:
813 		ret = -ENXIO;
814 		break;
815 	}
816 	return ret;
817 }
818 
kvm_s390_set_mem_control(struct kvm * kvm,struct kvm_device_attr * attr)819 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
820 {
821 	int ret;
822 	unsigned int idx;
823 	switch (attr->attr) {
824 	case KVM_S390_VM_MEM_ENABLE_CMMA:
825 		ret = -ENXIO;
826 		if (!sclp.has_cmma)
827 			break;
828 
829 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
830 		mutex_lock(&kvm->lock);
831 		if (kvm->created_vcpus)
832 			ret = -EBUSY;
833 		else if (kvm->mm->context.allow_gmap_hpage_1m)
834 			ret = -EINVAL;
835 		else {
836 			kvm->arch.use_cmma = 1;
837 			/* Not compatible with cmma. */
838 			kvm->arch.use_pfmfi = 0;
839 			ret = 0;
840 		}
841 		mutex_unlock(&kvm->lock);
842 		break;
843 	case KVM_S390_VM_MEM_CLR_CMMA:
844 		ret = -ENXIO;
845 		if (!sclp.has_cmma)
846 			break;
847 		ret = -EINVAL;
848 		if (!kvm->arch.use_cmma)
849 			break;
850 
851 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
852 		mutex_lock(&kvm->lock);
853 		idx = srcu_read_lock(&kvm->srcu);
854 		s390_reset_cmma(kvm->arch.gmap->mm);
855 		srcu_read_unlock(&kvm->srcu, idx);
856 		mutex_unlock(&kvm->lock);
857 		ret = 0;
858 		break;
859 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
860 		unsigned long new_limit;
861 
862 		if (kvm_is_ucontrol(kvm))
863 			return -EINVAL;
864 
865 		if (get_user(new_limit, (u64 __user *)attr->addr))
866 			return -EFAULT;
867 
868 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
869 		    new_limit > kvm->arch.mem_limit)
870 			return -E2BIG;
871 
872 		if (!new_limit)
873 			return -EINVAL;
874 
875 		/* gmap_create takes last usable address */
876 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
877 			new_limit -= 1;
878 
879 		ret = -EBUSY;
880 		mutex_lock(&kvm->lock);
881 		if (!kvm->created_vcpus) {
882 			/* gmap_create will round the limit up */
883 			struct gmap *new = gmap_create(current->mm, new_limit);
884 
885 			if (!new) {
886 				ret = -ENOMEM;
887 			} else {
888 				gmap_remove(kvm->arch.gmap);
889 				new->private = kvm;
890 				kvm->arch.gmap = new;
891 				ret = 0;
892 			}
893 		}
894 		mutex_unlock(&kvm->lock);
895 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
896 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
897 			 (void *) kvm->arch.gmap->asce);
898 		break;
899 	}
900 	default:
901 		ret = -ENXIO;
902 		break;
903 	}
904 	return ret;
905 }
906 
907 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
908 
kvm_s390_vcpu_crypto_reset_all(struct kvm * kvm)909 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
910 {
911 	struct kvm_vcpu *vcpu;
912 	int i;
913 
914 	kvm_s390_vcpu_block_all(kvm);
915 
916 	kvm_for_each_vcpu(i, vcpu, kvm) {
917 		kvm_s390_vcpu_crypto_setup(vcpu);
918 		/* recreate the shadow crycb by leaving the VSIE handler */
919 		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
920 	}
921 
922 	kvm_s390_vcpu_unblock_all(kvm);
923 }
924 
kvm_s390_vm_set_crypto(struct kvm * kvm,struct kvm_device_attr * attr)925 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
926 {
927 	mutex_lock(&kvm->lock);
928 	switch (attr->attr) {
929 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
930 		if (!test_kvm_facility(kvm, 76)) {
931 			mutex_unlock(&kvm->lock);
932 			return -EINVAL;
933 		}
934 		get_random_bytes(
935 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
936 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
937 		kvm->arch.crypto.aes_kw = 1;
938 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
939 		break;
940 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
941 		if (!test_kvm_facility(kvm, 76)) {
942 			mutex_unlock(&kvm->lock);
943 			return -EINVAL;
944 		}
945 		get_random_bytes(
946 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
947 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
948 		kvm->arch.crypto.dea_kw = 1;
949 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
950 		break;
951 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
952 		if (!test_kvm_facility(kvm, 76)) {
953 			mutex_unlock(&kvm->lock);
954 			return -EINVAL;
955 		}
956 		kvm->arch.crypto.aes_kw = 0;
957 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
958 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
959 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
960 		break;
961 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
962 		if (!test_kvm_facility(kvm, 76)) {
963 			mutex_unlock(&kvm->lock);
964 			return -EINVAL;
965 		}
966 		kvm->arch.crypto.dea_kw = 0;
967 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
968 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
969 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
970 		break;
971 	case KVM_S390_VM_CRYPTO_ENABLE_APIE:
972 		if (!ap_instructions_available()) {
973 			mutex_unlock(&kvm->lock);
974 			return -EOPNOTSUPP;
975 		}
976 		kvm->arch.crypto.apie = 1;
977 		break;
978 	case KVM_S390_VM_CRYPTO_DISABLE_APIE:
979 		if (!ap_instructions_available()) {
980 			mutex_unlock(&kvm->lock);
981 			return -EOPNOTSUPP;
982 		}
983 		kvm->arch.crypto.apie = 0;
984 		break;
985 	default:
986 		mutex_unlock(&kvm->lock);
987 		return -ENXIO;
988 	}
989 
990 	kvm_s390_vcpu_crypto_reset_all(kvm);
991 	mutex_unlock(&kvm->lock);
992 	return 0;
993 }
994 
kvm_s390_sync_request_broadcast(struct kvm * kvm,int req)995 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
996 {
997 	int cx;
998 	struct kvm_vcpu *vcpu;
999 
1000 	kvm_for_each_vcpu(cx, vcpu, kvm)
1001 		kvm_s390_sync_request(req, vcpu);
1002 }
1003 
1004 /*
1005  * Must be called with kvm->srcu held to avoid races on memslots, and with
1006  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1007  */
kvm_s390_vm_start_migration(struct kvm * kvm)1008 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1009 {
1010 	struct kvm_memory_slot *ms;
1011 	struct kvm_memslots *slots;
1012 	unsigned long ram_pages = 0;
1013 	int slotnr;
1014 
1015 	/* migration mode already enabled */
1016 	if (kvm->arch.migration_mode)
1017 		return 0;
1018 	slots = kvm_memslots(kvm);
1019 	if (!slots || !slots->used_slots)
1020 		return -EINVAL;
1021 
1022 	if (!kvm->arch.use_cmma) {
1023 		kvm->arch.migration_mode = 1;
1024 		return 0;
1025 	}
1026 	/* mark all the pages in active slots as dirty */
1027 	for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1028 		ms = slots->memslots + slotnr;
1029 		if (!ms->dirty_bitmap)
1030 			return -EINVAL;
1031 		/*
1032 		 * The second half of the bitmap is only used on x86,
1033 		 * and would be wasted otherwise, so we put it to good
1034 		 * use here to keep track of the state of the storage
1035 		 * attributes.
1036 		 */
1037 		memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1038 		ram_pages += ms->npages;
1039 	}
1040 	atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1041 	kvm->arch.migration_mode = 1;
1042 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1043 	return 0;
1044 }
1045 
1046 /*
1047  * Must be called with kvm->slots_lock to avoid races with ourselves and
1048  * kvm_s390_vm_start_migration.
1049  */
kvm_s390_vm_stop_migration(struct kvm * kvm)1050 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1051 {
1052 	/* migration mode already disabled */
1053 	if (!kvm->arch.migration_mode)
1054 		return 0;
1055 	kvm->arch.migration_mode = 0;
1056 	if (kvm->arch.use_cmma)
1057 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1058 	return 0;
1059 }
1060 
kvm_s390_vm_set_migration(struct kvm * kvm,struct kvm_device_attr * attr)1061 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1062 				     struct kvm_device_attr *attr)
1063 {
1064 	int res = -ENXIO;
1065 
1066 	mutex_lock(&kvm->slots_lock);
1067 	switch (attr->attr) {
1068 	case KVM_S390_VM_MIGRATION_START:
1069 		res = kvm_s390_vm_start_migration(kvm);
1070 		break;
1071 	case KVM_S390_VM_MIGRATION_STOP:
1072 		res = kvm_s390_vm_stop_migration(kvm);
1073 		break;
1074 	default:
1075 		break;
1076 	}
1077 	mutex_unlock(&kvm->slots_lock);
1078 
1079 	return res;
1080 }
1081 
kvm_s390_vm_get_migration(struct kvm * kvm,struct kvm_device_attr * attr)1082 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1083 				     struct kvm_device_attr *attr)
1084 {
1085 	u64 mig = kvm->arch.migration_mode;
1086 
1087 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1088 		return -ENXIO;
1089 
1090 	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1091 		return -EFAULT;
1092 	return 0;
1093 }
1094 
1095 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod);
1096 
kvm_s390_set_tod_ext(struct kvm * kvm,struct kvm_device_attr * attr)1097 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1098 {
1099 	struct kvm_s390_vm_tod_clock gtod;
1100 
1101 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1102 		return -EFAULT;
1103 
1104 	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1105 		return -EINVAL;
1106 	__kvm_s390_set_tod_clock(kvm, &gtod);
1107 
1108 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1109 		gtod.epoch_idx, gtod.tod);
1110 
1111 	return 0;
1112 }
1113 
kvm_s390_set_tod_high(struct kvm * kvm,struct kvm_device_attr * attr)1114 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1115 {
1116 	u8 gtod_high;
1117 
1118 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1119 					   sizeof(gtod_high)))
1120 		return -EFAULT;
1121 
1122 	if (gtod_high != 0)
1123 		return -EINVAL;
1124 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1125 
1126 	return 0;
1127 }
1128 
kvm_s390_set_tod_low(struct kvm * kvm,struct kvm_device_attr * attr)1129 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1130 {
1131 	struct kvm_s390_vm_tod_clock gtod = { 0 };
1132 
1133 	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1134 			   sizeof(gtod.tod)))
1135 		return -EFAULT;
1136 
1137 	__kvm_s390_set_tod_clock(kvm, &gtod);
1138 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1139 	return 0;
1140 }
1141 
kvm_s390_set_tod(struct kvm * kvm,struct kvm_device_attr * attr)1142 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1143 {
1144 	int ret;
1145 
1146 	if (attr->flags)
1147 		return -EINVAL;
1148 
1149 	mutex_lock(&kvm->lock);
1150 	/*
1151 	 * For protected guests, the TOD is managed by the ultravisor, so trying
1152 	 * to change it will never bring the expected results.
1153 	 */
1154 	if (kvm_s390_pv_is_protected(kvm)) {
1155 		ret = -EOPNOTSUPP;
1156 		goto out_unlock;
1157 	}
1158 
1159 	switch (attr->attr) {
1160 	case KVM_S390_VM_TOD_EXT:
1161 		ret = kvm_s390_set_tod_ext(kvm, attr);
1162 		break;
1163 	case KVM_S390_VM_TOD_HIGH:
1164 		ret = kvm_s390_set_tod_high(kvm, attr);
1165 		break;
1166 	case KVM_S390_VM_TOD_LOW:
1167 		ret = kvm_s390_set_tod_low(kvm, attr);
1168 		break;
1169 	default:
1170 		ret = -ENXIO;
1171 		break;
1172 	}
1173 
1174 out_unlock:
1175 	mutex_unlock(&kvm->lock);
1176 	return ret;
1177 }
1178 
kvm_s390_get_tod_clock(struct kvm * kvm,struct kvm_s390_vm_tod_clock * gtod)1179 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1180 				   struct kvm_s390_vm_tod_clock *gtod)
1181 {
1182 	struct kvm_s390_tod_clock_ext htod;
1183 
1184 	preempt_disable();
1185 
1186 	get_tod_clock_ext((char *)&htod);
1187 
1188 	gtod->tod = htod.tod + kvm->arch.epoch;
1189 	gtod->epoch_idx = 0;
1190 	if (test_kvm_facility(kvm, 139)) {
1191 		gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1192 		if (gtod->tod < htod.tod)
1193 			gtod->epoch_idx += 1;
1194 	}
1195 
1196 	preempt_enable();
1197 }
1198 
kvm_s390_get_tod_ext(struct kvm * kvm,struct kvm_device_attr * attr)1199 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1200 {
1201 	struct kvm_s390_vm_tod_clock gtod;
1202 
1203 	memset(&gtod, 0, sizeof(gtod));
1204 	kvm_s390_get_tod_clock(kvm, &gtod);
1205 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1206 		return -EFAULT;
1207 
1208 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1209 		gtod.epoch_idx, gtod.tod);
1210 	return 0;
1211 }
1212 
kvm_s390_get_tod_high(struct kvm * kvm,struct kvm_device_attr * attr)1213 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1214 {
1215 	u8 gtod_high = 0;
1216 
1217 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
1218 					 sizeof(gtod_high)))
1219 		return -EFAULT;
1220 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1221 
1222 	return 0;
1223 }
1224 
kvm_s390_get_tod_low(struct kvm * kvm,struct kvm_device_attr * attr)1225 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1226 {
1227 	u64 gtod;
1228 
1229 	gtod = kvm_s390_get_tod_clock_fast(kvm);
1230 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1231 		return -EFAULT;
1232 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1233 
1234 	return 0;
1235 }
1236 
kvm_s390_get_tod(struct kvm * kvm,struct kvm_device_attr * attr)1237 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1238 {
1239 	int ret;
1240 
1241 	if (attr->flags)
1242 		return -EINVAL;
1243 
1244 	switch (attr->attr) {
1245 	case KVM_S390_VM_TOD_EXT:
1246 		ret = kvm_s390_get_tod_ext(kvm, attr);
1247 		break;
1248 	case KVM_S390_VM_TOD_HIGH:
1249 		ret = kvm_s390_get_tod_high(kvm, attr);
1250 		break;
1251 	case KVM_S390_VM_TOD_LOW:
1252 		ret = kvm_s390_get_tod_low(kvm, attr);
1253 		break;
1254 	default:
1255 		ret = -ENXIO;
1256 		break;
1257 	}
1258 	return ret;
1259 }
1260 
kvm_s390_set_processor(struct kvm * kvm,struct kvm_device_attr * attr)1261 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1262 {
1263 	struct kvm_s390_vm_cpu_processor *proc;
1264 	u16 lowest_ibc, unblocked_ibc;
1265 	int ret = 0;
1266 
1267 	mutex_lock(&kvm->lock);
1268 	if (kvm->created_vcpus) {
1269 		ret = -EBUSY;
1270 		goto out;
1271 	}
1272 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1273 	if (!proc) {
1274 		ret = -ENOMEM;
1275 		goto out;
1276 	}
1277 	if (!copy_from_user(proc, (void __user *)attr->addr,
1278 			    sizeof(*proc))) {
1279 		kvm->arch.model.cpuid = proc->cpuid;
1280 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1281 		unblocked_ibc = sclp.ibc & 0xfff;
1282 		if (lowest_ibc && proc->ibc) {
1283 			if (proc->ibc > unblocked_ibc)
1284 				kvm->arch.model.ibc = unblocked_ibc;
1285 			else if (proc->ibc < lowest_ibc)
1286 				kvm->arch.model.ibc = lowest_ibc;
1287 			else
1288 				kvm->arch.model.ibc = proc->ibc;
1289 		}
1290 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1291 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1292 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1293 			 kvm->arch.model.ibc,
1294 			 kvm->arch.model.cpuid);
1295 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1296 			 kvm->arch.model.fac_list[0],
1297 			 kvm->arch.model.fac_list[1],
1298 			 kvm->arch.model.fac_list[2]);
1299 	} else
1300 		ret = -EFAULT;
1301 	kfree(proc);
1302 out:
1303 	mutex_unlock(&kvm->lock);
1304 	return ret;
1305 }
1306 
kvm_s390_set_processor_feat(struct kvm * kvm,struct kvm_device_attr * attr)1307 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1308 				       struct kvm_device_attr *attr)
1309 {
1310 	struct kvm_s390_vm_cpu_feat data;
1311 
1312 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1313 		return -EFAULT;
1314 	if (!bitmap_subset((unsigned long *) data.feat,
1315 			   kvm_s390_available_cpu_feat,
1316 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1317 		return -EINVAL;
1318 
1319 	mutex_lock(&kvm->lock);
1320 	if (kvm->created_vcpus) {
1321 		mutex_unlock(&kvm->lock);
1322 		return -EBUSY;
1323 	}
1324 	bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1325 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1326 	mutex_unlock(&kvm->lock);
1327 	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1328 			 data.feat[0],
1329 			 data.feat[1],
1330 			 data.feat[2]);
1331 	return 0;
1332 }
1333 
kvm_s390_set_processor_subfunc(struct kvm * kvm,struct kvm_device_attr * attr)1334 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1335 					  struct kvm_device_attr *attr)
1336 {
1337 	mutex_lock(&kvm->lock);
1338 	if (kvm->created_vcpus) {
1339 		mutex_unlock(&kvm->lock);
1340 		return -EBUSY;
1341 	}
1342 
1343 	if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1344 			   sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1345 		mutex_unlock(&kvm->lock);
1346 		return -EFAULT;
1347 	}
1348 	mutex_unlock(&kvm->lock);
1349 
1350 	VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1351 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1352 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1353 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1354 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1355 	VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1356 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1357 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1358 	VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1359 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1360 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1361 	VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1362 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1363 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1364 	VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1365 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1366 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1367 	VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1368 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1369 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1370 	VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1371 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1372 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1373 	VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1374 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1375 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1376 	VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1377 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1378 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1379 	VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1380 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1381 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1382 	VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1383 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1384 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1385 	VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1386 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1387 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1388 	VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1389 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1390 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1391 	VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1392 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1393 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1394 	VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1395 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1396 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1397 	VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1398 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1399 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1400 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1401 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1402 	VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1403 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1404 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1405 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1406 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1407 
1408 	return 0;
1409 }
1410 
kvm_s390_set_cpu_model(struct kvm * kvm,struct kvm_device_attr * attr)1411 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1412 {
1413 	int ret = -ENXIO;
1414 
1415 	switch (attr->attr) {
1416 	case KVM_S390_VM_CPU_PROCESSOR:
1417 		ret = kvm_s390_set_processor(kvm, attr);
1418 		break;
1419 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1420 		ret = kvm_s390_set_processor_feat(kvm, attr);
1421 		break;
1422 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1423 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1424 		break;
1425 	}
1426 	return ret;
1427 }
1428 
kvm_s390_get_processor(struct kvm * kvm,struct kvm_device_attr * attr)1429 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1430 {
1431 	struct kvm_s390_vm_cpu_processor *proc;
1432 	int ret = 0;
1433 
1434 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1435 	if (!proc) {
1436 		ret = -ENOMEM;
1437 		goto out;
1438 	}
1439 	proc->cpuid = kvm->arch.model.cpuid;
1440 	proc->ibc = kvm->arch.model.ibc;
1441 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1442 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1443 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1444 		 kvm->arch.model.ibc,
1445 		 kvm->arch.model.cpuid);
1446 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1447 		 kvm->arch.model.fac_list[0],
1448 		 kvm->arch.model.fac_list[1],
1449 		 kvm->arch.model.fac_list[2]);
1450 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1451 		ret = -EFAULT;
1452 	kfree(proc);
1453 out:
1454 	return ret;
1455 }
1456 
kvm_s390_get_machine(struct kvm * kvm,struct kvm_device_attr * attr)1457 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1458 {
1459 	struct kvm_s390_vm_cpu_machine *mach;
1460 	int ret = 0;
1461 
1462 	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1463 	if (!mach) {
1464 		ret = -ENOMEM;
1465 		goto out;
1466 	}
1467 	get_cpu_id((struct cpuid *) &mach->cpuid);
1468 	mach->ibc = sclp.ibc;
1469 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1470 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1471 	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1472 	       sizeof(S390_lowcore.stfle_fac_list));
1473 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1474 		 kvm->arch.model.ibc,
1475 		 kvm->arch.model.cpuid);
1476 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1477 		 mach->fac_mask[0],
1478 		 mach->fac_mask[1],
1479 		 mach->fac_mask[2]);
1480 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1481 		 mach->fac_list[0],
1482 		 mach->fac_list[1],
1483 		 mach->fac_list[2]);
1484 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1485 		ret = -EFAULT;
1486 	kfree(mach);
1487 out:
1488 	return ret;
1489 }
1490 
kvm_s390_get_processor_feat(struct kvm * kvm,struct kvm_device_attr * attr)1491 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1492 				       struct kvm_device_attr *attr)
1493 {
1494 	struct kvm_s390_vm_cpu_feat data;
1495 
1496 	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1497 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1498 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1499 		return -EFAULT;
1500 	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1501 			 data.feat[0],
1502 			 data.feat[1],
1503 			 data.feat[2]);
1504 	return 0;
1505 }
1506 
kvm_s390_get_machine_feat(struct kvm * kvm,struct kvm_device_attr * attr)1507 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1508 				     struct kvm_device_attr *attr)
1509 {
1510 	struct kvm_s390_vm_cpu_feat data;
1511 
1512 	bitmap_copy((unsigned long *) data.feat,
1513 		    kvm_s390_available_cpu_feat,
1514 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1515 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1516 		return -EFAULT;
1517 	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1518 			 data.feat[0],
1519 			 data.feat[1],
1520 			 data.feat[2]);
1521 	return 0;
1522 }
1523 
kvm_s390_get_processor_subfunc(struct kvm * kvm,struct kvm_device_attr * attr)1524 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1525 					  struct kvm_device_attr *attr)
1526 {
1527 	if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1528 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1529 		return -EFAULT;
1530 
1531 	VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1532 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1533 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1534 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1535 		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1536 	VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1537 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1538 		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1539 	VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1540 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1541 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1542 	VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1543 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1544 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1545 	VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1546 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1547 		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1548 	VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1549 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1550 		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1551 	VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1552 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1553 		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1554 	VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1555 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1556 		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1557 	VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1558 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1559 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1560 	VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1561 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1562 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1563 	VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1564 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1565 		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1566 	VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1567 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1568 		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1569 	VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1570 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1571 		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1572 	VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1573 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1574 		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1575 	VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1576 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1577 		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1578 	VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1579 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1580 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1581 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1582 		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1583 	VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1584 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1585 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1586 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1587 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1588 
1589 	return 0;
1590 }
1591 
kvm_s390_get_machine_subfunc(struct kvm * kvm,struct kvm_device_attr * attr)1592 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1593 					struct kvm_device_attr *attr)
1594 {
1595 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1596 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1597 		return -EFAULT;
1598 
1599 	VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1600 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1601 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1602 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1603 		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1604 	VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1605 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1606 		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1607 	VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1608 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1609 		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1610 	VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1611 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1612 		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1613 	VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1614 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1615 		 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1616 	VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1617 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1618 		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1619 	VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1620 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1621 		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1622 	VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1623 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1624 		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1625 	VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1626 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1627 		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1628 	VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1629 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1630 		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1631 	VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1632 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1633 		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1634 	VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1635 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1636 		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1637 	VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1638 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1639 		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1640 	VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1641 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1642 		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1643 	VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1644 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1645 		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1646 	VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1647 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1648 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1649 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1650 		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1651 	VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1652 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1653 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1654 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1655 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1656 
1657 	return 0;
1658 }
1659 
kvm_s390_get_cpu_model(struct kvm * kvm,struct kvm_device_attr * attr)1660 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1661 {
1662 	int ret = -ENXIO;
1663 
1664 	switch (attr->attr) {
1665 	case KVM_S390_VM_CPU_PROCESSOR:
1666 		ret = kvm_s390_get_processor(kvm, attr);
1667 		break;
1668 	case KVM_S390_VM_CPU_MACHINE:
1669 		ret = kvm_s390_get_machine(kvm, attr);
1670 		break;
1671 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1672 		ret = kvm_s390_get_processor_feat(kvm, attr);
1673 		break;
1674 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1675 		ret = kvm_s390_get_machine_feat(kvm, attr);
1676 		break;
1677 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1678 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1679 		break;
1680 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1681 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1682 		break;
1683 	}
1684 	return ret;
1685 }
1686 
kvm_s390_vm_set_attr(struct kvm * kvm,struct kvm_device_attr * attr)1687 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1688 {
1689 	int ret;
1690 
1691 	switch (attr->group) {
1692 	case KVM_S390_VM_MEM_CTRL:
1693 		ret = kvm_s390_set_mem_control(kvm, attr);
1694 		break;
1695 	case KVM_S390_VM_TOD:
1696 		ret = kvm_s390_set_tod(kvm, attr);
1697 		break;
1698 	case KVM_S390_VM_CPU_MODEL:
1699 		ret = kvm_s390_set_cpu_model(kvm, attr);
1700 		break;
1701 	case KVM_S390_VM_CRYPTO:
1702 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1703 		break;
1704 	case KVM_S390_VM_MIGRATION:
1705 		ret = kvm_s390_vm_set_migration(kvm, attr);
1706 		break;
1707 	default:
1708 		ret = -ENXIO;
1709 		break;
1710 	}
1711 
1712 	return ret;
1713 }
1714 
kvm_s390_vm_get_attr(struct kvm * kvm,struct kvm_device_attr * attr)1715 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1716 {
1717 	int ret;
1718 
1719 	switch (attr->group) {
1720 	case KVM_S390_VM_MEM_CTRL:
1721 		ret = kvm_s390_get_mem_control(kvm, attr);
1722 		break;
1723 	case KVM_S390_VM_TOD:
1724 		ret = kvm_s390_get_tod(kvm, attr);
1725 		break;
1726 	case KVM_S390_VM_CPU_MODEL:
1727 		ret = kvm_s390_get_cpu_model(kvm, attr);
1728 		break;
1729 	case KVM_S390_VM_MIGRATION:
1730 		ret = kvm_s390_vm_get_migration(kvm, attr);
1731 		break;
1732 	default:
1733 		ret = -ENXIO;
1734 		break;
1735 	}
1736 
1737 	return ret;
1738 }
1739 
kvm_s390_vm_has_attr(struct kvm * kvm,struct kvm_device_attr * attr)1740 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1741 {
1742 	int ret;
1743 
1744 	switch (attr->group) {
1745 	case KVM_S390_VM_MEM_CTRL:
1746 		switch (attr->attr) {
1747 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1748 		case KVM_S390_VM_MEM_CLR_CMMA:
1749 			ret = sclp.has_cmma ? 0 : -ENXIO;
1750 			break;
1751 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1752 			ret = 0;
1753 			break;
1754 		default:
1755 			ret = -ENXIO;
1756 			break;
1757 		}
1758 		break;
1759 	case KVM_S390_VM_TOD:
1760 		switch (attr->attr) {
1761 		case KVM_S390_VM_TOD_LOW:
1762 		case KVM_S390_VM_TOD_HIGH:
1763 			ret = 0;
1764 			break;
1765 		default:
1766 			ret = -ENXIO;
1767 			break;
1768 		}
1769 		break;
1770 	case KVM_S390_VM_CPU_MODEL:
1771 		switch (attr->attr) {
1772 		case KVM_S390_VM_CPU_PROCESSOR:
1773 		case KVM_S390_VM_CPU_MACHINE:
1774 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1775 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1776 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1777 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1778 			ret = 0;
1779 			break;
1780 		default:
1781 			ret = -ENXIO;
1782 			break;
1783 		}
1784 		break;
1785 	case KVM_S390_VM_CRYPTO:
1786 		switch (attr->attr) {
1787 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1788 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1789 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1790 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1791 			ret = 0;
1792 			break;
1793 		case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1794 		case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1795 			ret = ap_instructions_available() ? 0 : -ENXIO;
1796 			break;
1797 		default:
1798 			ret = -ENXIO;
1799 			break;
1800 		}
1801 		break;
1802 	case KVM_S390_VM_MIGRATION:
1803 		ret = 0;
1804 		break;
1805 	default:
1806 		ret = -ENXIO;
1807 		break;
1808 	}
1809 
1810 	return ret;
1811 }
1812 
kvm_s390_get_skeys(struct kvm * kvm,struct kvm_s390_skeys * args)1813 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1814 {
1815 	uint8_t *keys;
1816 	uint64_t hva;
1817 	int srcu_idx, i, r = 0;
1818 
1819 	if (args->flags != 0)
1820 		return -EINVAL;
1821 
1822 	/* Is this guest using storage keys? */
1823 	if (!mm_uses_skeys(current->mm))
1824 		return KVM_S390_GET_SKEYS_NONE;
1825 
1826 	/* Enforce sane limit on memory allocation */
1827 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1828 		return -EINVAL;
1829 
1830 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1831 	if (!keys)
1832 		return -ENOMEM;
1833 
1834 	mmap_read_lock(current->mm);
1835 	srcu_idx = srcu_read_lock(&kvm->srcu);
1836 	for (i = 0; i < args->count; i++) {
1837 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1838 		if (kvm_is_error_hva(hva)) {
1839 			r = -EFAULT;
1840 			break;
1841 		}
1842 
1843 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1844 		if (r)
1845 			break;
1846 	}
1847 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1848 	mmap_read_unlock(current->mm);
1849 
1850 	if (!r) {
1851 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1852 				 sizeof(uint8_t) * args->count);
1853 		if (r)
1854 			r = -EFAULT;
1855 	}
1856 
1857 	kvfree(keys);
1858 	return r;
1859 }
1860 
kvm_s390_set_skeys(struct kvm * kvm,struct kvm_s390_skeys * args)1861 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1862 {
1863 	uint8_t *keys;
1864 	uint64_t hva;
1865 	int srcu_idx, i, r = 0;
1866 	bool unlocked;
1867 
1868 	if (args->flags != 0)
1869 		return -EINVAL;
1870 
1871 	/* Enforce sane limit on memory allocation */
1872 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1873 		return -EINVAL;
1874 
1875 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1876 	if (!keys)
1877 		return -ENOMEM;
1878 
1879 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1880 			   sizeof(uint8_t) * args->count);
1881 	if (r) {
1882 		r = -EFAULT;
1883 		goto out;
1884 	}
1885 
1886 	/* Enable storage key handling for the guest */
1887 	r = s390_enable_skey();
1888 	if (r)
1889 		goto out;
1890 
1891 	i = 0;
1892 	mmap_read_lock(current->mm);
1893 	srcu_idx = srcu_read_lock(&kvm->srcu);
1894         while (i < args->count) {
1895 		unlocked = false;
1896 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1897 		if (kvm_is_error_hva(hva)) {
1898 			r = -EFAULT;
1899 			break;
1900 		}
1901 
1902 		/* Lowest order bit is reserved */
1903 		if (keys[i] & 0x01) {
1904 			r = -EINVAL;
1905 			break;
1906 		}
1907 
1908 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1909 		if (r) {
1910 			r = fixup_user_fault(current->mm, hva,
1911 					     FAULT_FLAG_WRITE, &unlocked);
1912 			if (r)
1913 				break;
1914 		}
1915 		if (!r)
1916 			i++;
1917 	}
1918 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1919 	mmap_read_unlock(current->mm);
1920 out:
1921 	kvfree(keys);
1922 	return r;
1923 }
1924 
1925 /*
1926  * Base address and length must be sent at the start of each block, therefore
1927  * it's cheaper to send some clean data, as long as it's less than the size of
1928  * two longs.
1929  */
1930 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1931 /* for consistency */
1932 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1933 
1934 /*
1935  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1936  * address falls in a hole. In that case the index of one of the memslots
1937  * bordering the hole is returned.
1938  */
gfn_to_memslot_approx(struct kvm_memslots * slots,gfn_t gfn)1939 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1940 {
1941 	int start = 0, end = slots->used_slots;
1942 	int slot = atomic_read(&slots->lru_slot);
1943 	struct kvm_memory_slot *memslots = slots->memslots;
1944 
1945 	if (gfn >= memslots[slot].base_gfn &&
1946 	    gfn < memslots[slot].base_gfn + memslots[slot].npages)
1947 		return slot;
1948 
1949 	while (start < end) {
1950 		slot = start + (end - start) / 2;
1951 
1952 		if (gfn >= memslots[slot].base_gfn)
1953 			end = slot;
1954 		else
1955 			start = slot + 1;
1956 	}
1957 
1958 	if (start >= slots->used_slots)
1959 		return slots->used_slots - 1;
1960 
1961 	if (gfn >= memslots[start].base_gfn &&
1962 	    gfn < memslots[start].base_gfn + memslots[start].npages) {
1963 		atomic_set(&slots->lru_slot, start);
1964 	}
1965 
1966 	return start;
1967 }
1968 
kvm_s390_peek_cmma(struct kvm * kvm,struct kvm_s390_cmma_log * args,u8 * res,unsigned long bufsize)1969 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1970 			      u8 *res, unsigned long bufsize)
1971 {
1972 	unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1973 
1974 	args->count = 0;
1975 	while (args->count < bufsize) {
1976 		hva = gfn_to_hva(kvm, cur_gfn);
1977 		/*
1978 		 * We return an error if the first value was invalid, but we
1979 		 * return successfully if at least one value was copied.
1980 		 */
1981 		if (kvm_is_error_hva(hva))
1982 			return args->count ? 0 : -EFAULT;
1983 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1984 			pgstev = 0;
1985 		res[args->count++] = (pgstev >> 24) & 0x43;
1986 		cur_gfn++;
1987 	}
1988 
1989 	return 0;
1990 }
1991 
kvm_s390_next_dirty_cmma(struct kvm_memslots * slots,unsigned long cur_gfn)1992 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1993 					      unsigned long cur_gfn)
1994 {
1995 	int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1996 	struct kvm_memory_slot *ms = slots->memslots + slotidx;
1997 	unsigned long ofs = cur_gfn - ms->base_gfn;
1998 
1999 	if (ms->base_gfn + ms->npages <= cur_gfn) {
2000 		slotidx--;
2001 		/* If we are above the highest slot, wrap around */
2002 		if (slotidx < 0)
2003 			slotidx = slots->used_slots - 1;
2004 
2005 		ms = slots->memslots + slotidx;
2006 		ofs = 0;
2007 	}
2008 	ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
2009 	while ((slotidx > 0) && (ofs >= ms->npages)) {
2010 		slotidx--;
2011 		ms = slots->memslots + slotidx;
2012 		ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
2013 	}
2014 	return ms->base_gfn + ofs;
2015 }
2016 
kvm_s390_get_cmma(struct kvm * kvm,struct kvm_s390_cmma_log * args,u8 * res,unsigned long bufsize)2017 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2018 			     u8 *res, unsigned long bufsize)
2019 {
2020 	unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2021 	struct kvm_memslots *slots = kvm_memslots(kvm);
2022 	struct kvm_memory_slot *ms;
2023 
2024 	if (unlikely(!slots->used_slots))
2025 		return 0;
2026 
2027 	cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2028 	ms = gfn_to_memslot(kvm, cur_gfn);
2029 	args->count = 0;
2030 	args->start_gfn = cur_gfn;
2031 	if (!ms)
2032 		return 0;
2033 	next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2034 	mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2035 
2036 	while (args->count < bufsize) {
2037 		hva = gfn_to_hva(kvm, cur_gfn);
2038 		if (kvm_is_error_hva(hva))
2039 			return 0;
2040 		/* Decrement only if we actually flipped the bit to 0 */
2041 		if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2042 			atomic64_dec(&kvm->arch.cmma_dirty_pages);
2043 		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2044 			pgstev = 0;
2045 		/* Save the value */
2046 		res[args->count++] = (pgstev >> 24) & 0x43;
2047 		/* If the next bit is too far away, stop. */
2048 		if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2049 			return 0;
2050 		/* If we reached the previous "next", find the next one */
2051 		if (cur_gfn == next_gfn)
2052 			next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2053 		/* Reached the end of memory or of the buffer, stop */
2054 		if ((next_gfn >= mem_end) ||
2055 		    (next_gfn - args->start_gfn >= bufsize))
2056 			return 0;
2057 		cur_gfn++;
2058 		/* Reached the end of the current memslot, take the next one. */
2059 		if (cur_gfn - ms->base_gfn >= ms->npages) {
2060 			ms = gfn_to_memslot(kvm, cur_gfn);
2061 			if (!ms)
2062 				return 0;
2063 		}
2064 	}
2065 	return 0;
2066 }
2067 
2068 /*
2069  * This function searches for the next page with dirty CMMA attributes, and
2070  * saves the attributes in the buffer up to either the end of the buffer or
2071  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2072  * no trailing clean bytes are saved.
2073  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2074  * output buffer will indicate 0 as length.
2075  */
kvm_s390_get_cmma_bits(struct kvm * kvm,struct kvm_s390_cmma_log * args)2076 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2077 				  struct kvm_s390_cmma_log *args)
2078 {
2079 	unsigned long bufsize;
2080 	int srcu_idx, peek, ret;
2081 	u8 *values;
2082 
2083 	if (!kvm->arch.use_cmma)
2084 		return -ENXIO;
2085 	/* Invalid/unsupported flags were specified */
2086 	if (args->flags & ~KVM_S390_CMMA_PEEK)
2087 		return -EINVAL;
2088 	/* Migration mode query, and we are not doing a migration */
2089 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2090 	if (!peek && !kvm->arch.migration_mode)
2091 		return -EINVAL;
2092 	/* CMMA is disabled or was not used, or the buffer has length zero */
2093 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2094 	if (!bufsize || !kvm->mm->context.uses_cmm) {
2095 		memset(args, 0, sizeof(*args));
2096 		return 0;
2097 	}
2098 	/* We are not peeking, and there are no dirty pages */
2099 	if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2100 		memset(args, 0, sizeof(*args));
2101 		return 0;
2102 	}
2103 
2104 	values = vmalloc(bufsize);
2105 	if (!values)
2106 		return -ENOMEM;
2107 
2108 	mmap_read_lock(kvm->mm);
2109 	srcu_idx = srcu_read_lock(&kvm->srcu);
2110 	if (peek)
2111 		ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2112 	else
2113 		ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2114 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2115 	mmap_read_unlock(kvm->mm);
2116 
2117 	if (kvm->arch.migration_mode)
2118 		args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2119 	else
2120 		args->remaining = 0;
2121 
2122 	if (copy_to_user((void __user *)args->values, values, args->count))
2123 		ret = -EFAULT;
2124 
2125 	vfree(values);
2126 	return ret;
2127 }
2128 
2129 /*
2130  * This function sets the CMMA attributes for the given pages. If the input
2131  * buffer has zero length, no action is taken, otherwise the attributes are
2132  * set and the mm->context.uses_cmm flag is set.
2133  */
kvm_s390_set_cmma_bits(struct kvm * kvm,const struct kvm_s390_cmma_log * args)2134 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2135 				  const struct kvm_s390_cmma_log *args)
2136 {
2137 	unsigned long hva, mask, pgstev, i;
2138 	uint8_t *bits;
2139 	int srcu_idx, r = 0;
2140 
2141 	mask = args->mask;
2142 
2143 	if (!kvm->arch.use_cmma)
2144 		return -ENXIO;
2145 	/* invalid/unsupported flags */
2146 	if (args->flags != 0)
2147 		return -EINVAL;
2148 	/* Enforce sane limit on memory allocation */
2149 	if (args->count > KVM_S390_CMMA_SIZE_MAX)
2150 		return -EINVAL;
2151 	/* Nothing to do */
2152 	if (args->count == 0)
2153 		return 0;
2154 
2155 	bits = vmalloc(array_size(sizeof(*bits), args->count));
2156 	if (!bits)
2157 		return -ENOMEM;
2158 
2159 	r = copy_from_user(bits, (void __user *)args->values, args->count);
2160 	if (r) {
2161 		r = -EFAULT;
2162 		goto out;
2163 	}
2164 
2165 	mmap_read_lock(kvm->mm);
2166 	srcu_idx = srcu_read_lock(&kvm->srcu);
2167 	for (i = 0; i < args->count; i++) {
2168 		hva = gfn_to_hva(kvm, args->start_gfn + i);
2169 		if (kvm_is_error_hva(hva)) {
2170 			r = -EFAULT;
2171 			break;
2172 		}
2173 
2174 		pgstev = bits[i];
2175 		pgstev = pgstev << 24;
2176 		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2177 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
2178 	}
2179 	srcu_read_unlock(&kvm->srcu, srcu_idx);
2180 	mmap_read_unlock(kvm->mm);
2181 
2182 	if (!kvm->mm->context.uses_cmm) {
2183 		mmap_write_lock(kvm->mm);
2184 		kvm->mm->context.uses_cmm = 1;
2185 		mmap_write_unlock(kvm->mm);
2186 	}
2187 out:
2188 	vfree(bits);
2189 	return r;
2190 }
2191 
kvm_s390_cpus_from_pv(struct kvm * kvm,u16 * rcp,u16 * rrcp)2192 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2193 {
2194 	struct kvm_vcpu *vcpu;
2195 	u16 rc, rrc;
2196 	int ret = 0;
2197 	int i;
2198 
2199 	/*
2200 	 * We ignore failures and try to destroy as many CPUs as possible.
2201 	 * At the same time we must not free the assigned resources when
2202 	 * this fails, as the ultravisor has still access to that memory.
2203 	 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2204 	 * behind.
2205 	 * We want to return the first failure rc and rrc, though.
2206 	 */
2207 	kvm_for_each_vcpu(i, vcpu, kvm) {
2208 		mutex_lock(&vcpu->mutex);
2209 		if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2210 			*rcp = rc;
2211 			*rrcp = rrc;
2212 			ret = -EIO;
2213 		}
2214 		mutex_unlock(&vcpu->mutex);
2215 	}
2216 	return ret;
2217 }
2218 
kvm_s390_cpus_to_pv(struct kvm * kvm,u16 * rc,u16 * rrc)2219 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2220 {
2221 	int i, r = 0;
2222 	u16 dummy;
2223 
2224 	struct kvm_vcpu *vcpu;
2225 
2226 	kvm_for_each_vcpu(i, vcpu, kvm) {
2227 		mutex_lock(&vcpu->mutex);
2228 		r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2229 		mutex_unlock(&vcpu->mutex);
2230 		if (r)
2231 			break;
2232 	}
2233 	if (r)
2234 		kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2235 	return r;
2236 }
2237 
kvm_s390_handle_pv(struct kvm * kvm,struct kvm_pv_cmd * cmd)2238 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2239 {
2240 	int r = 0;
2241 	u16 dummy;
2242 	void __user *argp = (void __user *)cmd->data;
2243 
2244 	switch (cmd->cmd) {
2245 	case KVM_PV_ENABLE: {
2246 		r = -EINVAL;
2247 		if (kvm_s390_pv_is_protected(kvm))
2248 			break;
2249 
2250 		/*
2251 		 *  FMT 4 SIE needs esca. As we never switch back to bsca from
2252 		 *  esca, we need no cleanup in the error cases below
2253 		 */
2254 		r = sca_switch_to_extended(kvm);
2255 		if (r)
2256 			break;
2257 
2258 		mmap_write_lock(current->mm);
2259 		r = gmap_mark_unmergeable();
2260 		mmap_write_unlock(current->mm);
2261 		if (r)
2262 			break;
2263 
2264 		r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2265 		if (r)
2266 			break;
2267 
2268 		r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2269 		if (r)
2270 			kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2271 
2272 		/* we need to block service interrupts from now on */
2273 		set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2274 		break;
2275 	}
2276 	case KVM_PV_DISABLE: {
2277 		r = -EINVAL;
2278 		if (!kvm_s390_pv_is_protected(kvm))
2279 			break;
2280 
2281 		r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2282 		/*
2283 		 * If a CPU could not be destroyed, destroy VM will also fail.
2284 		 * There is no point in trying to destroy it. Instead return
2285 		 * the rc and rrc from the first CPU that failed destroying.
2286 		 */
2287 		if (r)
2288 			break;
2289 		r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2290 
2291 		/* no need to block service interrupts any more */
2292 		clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2293 		break;
2294 	}
2295 	case KVM_PV_SET_SEC_PARMS: {
2296 		struct kvm_s390_pv_sec_parm parms = {};
2297 		void *hdr;
2298 
2299 		r = -EINVAL;
2300 		if (!kvm_s390_pv_is_protected(kvm))
2301 			break;
2302 
2303 		r = -EFAULT;
2304 		if (copy_from_user(&parms, argp, sizeof(parms)))
2305 			break;
2306 
2307 		/* Currently restricted to 8KB */
2308 		r = -EINVAL;
2309 		if (parms.length > PAGE_SIZE * 2)
2310 			break;
2311 
2312 		r = -ENOMEM;
2313 		hdr = vmalloc(parms.length);
2314 		if (!hdr)
2315 			break;
2316 
2317 		r = -EFAULT;
2318 		if (!copy_from_user(hdr, (void __user *)parms.origin,
2319 				    parms.length))
2320 			r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2321 						      &cmd->rc, &cmd->rrc);
2322 
2323 		vfree(hdr);
2324 		break;
2325 	}
2326 	case KVM_PV_UNPACK: {
2327 		struct kvm_s390_pv_unp unp = {};
2328 
2329 		r = -EINVAL;
2330 		if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2331 			break;
2332 
2333 		r = -EFAULT;
2334 		if (copy_from_user(&unp, argp, sizeof(unp)))
2335 			break;
2336 
2337 		r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2338 				       &cmd->rc, &cmd->rrc);
2339 		break;
2340 	}
2341 	case KVM_PV_VERIFY: {
2342 		r = -EINVAL;
2343 		if (!kvm_s390_pv_is_protected(kvm))
2344 			break;
2345 
2346 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2347 				  UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2348 		KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2349 			     cmd->rrc);
2350 		break;
2351 	}
2352 	case KVM_PV_PREP_RESET: {
2353 		r = -EINVAL;
2354 		if (!kvm_s390_pv_is_protected(kvm))
2355 			break;
2356 
2357 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2358 				  UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2359 		KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2360 			     cmd->rc, cmd->rrc);
2361 		break;
2362 	}
2363 	case KVM_PV_UNSHARE_ALL: {
2364 		r = -EINVAL;
2365 		if (!kvm_s390_pv_is_protected(kvm))
2366 			break;
2367 
2368 		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2369 				  UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2370 		KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2371 			     cmd->rc, cmd->rrc);
2372 		break;
2373 	}
2374 	default:
2375 		r = -ENOTTY;
2376 	}
2377 	return r;
2378 }
2379 
kvm_arch_vm_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)2380 long kvm_arch_vm_ioctl(struct file *filp,
2381 		       unsigned int ioctl, unsigned long arg)
2382 {
2383 	struct kvm *kvm = filp->private_data;
2384 	void __user *argp = (void __user *)arg;
2385 	struct kvm_device_attr attr;
2386 	int r;
2387 
2388 	switch (ioctl) {
2389 	case KVM_S390_INTERRUPT: {
2390 		struct kvm_s390_interrupt s390int;
2391 
2392 		r = -EFAULT;
2393 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
2394 			break;
2395 		r = kvm_s390_inject_vm(kvm, &s390int);
2396 		break;
2397 	}
2398 	case KVM_CREATE_IRQCHIP: {
2399 		struct kvm_irq_routing_entry routing;
2400 
2401 		r = -EINVAL;
2402 		if (kvm->arch.use_irqchip) {
2403 			/* Set up dummy routing. */
2404 			memset(&routing, 0, sizeof(routing));
2405 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2406 		}
2407 		break;
2408 	}
2409 	case KVM_SET_DEVICE_ATTR: {
2410 		r = -EFAULT;
2411 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2412 			break;
2413 		r = kvm_s390_vm_set_attr(kvm, &attr);
2414 		break;
2415 	}
2416 	case KVM_GET_DEVICE_ATTR: {
2417 		r = -EFAULT;
2418 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2419 			break;
2420 		r = kvm_s390_vm_get_attr(kvm, &attr);
2421 		break;
2422 	}
2423 	case KVM_HAS_DEVICE_ATTR: {
2424 		r = -EFAULT;
2425 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2426 			break;
2427 		r = kvm_s390_vm_has_attr(kvm, &attr);
2428 		break;
2429 	}
2430 	case KVM_S390_GET_SKEYS: {
2431 		struct kvm_s390_skeys args;
2432 
2433 		r = -EFAULT;
2434 		if (copy_from_user(&args, argp,
2435 				   sizeof(struct kvm_s390_skeys)))
2436 			break;
2437 		r = kvm_s390_get_skeys(kvm, &args);
2438 		break;
2439 	}
2440 	case KVM_S390_SET_SKEYS: {
2441 		struct kvm_s390_skeys args;
2442 
2443 		r = -EFAULT;
2444 		if (copy_from_user(&args, argp,
2445 				   sizeof(struct kvm_s390_skeys)))
2446 			break;
2447 		r = kvm_s390_set_skeys(kvm, &args);
2448 		break;
2449 	}
2450 	case KVM_S390_GET_CMMA_BITS: {
2451 		struct kvm_s390_cmma_log args;
2452 
2453 		r = -EFAULT;
2454 		if (copy_from_user(&args, argp, sizeof(args)))
2455 			break;
2456 		mutex_lock(&kvm->slots_lock);
2457 		r = kvm_s390_get_cmma_bits(kvm, &args);
2458 		mutex_unlock(&kvm->slots_lock);
2459 		if (!r) {
2460 			r = copy_to_user(argp, &args, sizeof(args));
2461 			if (r)
2462 				r = -EFAULT;
2463 		}
2464 		break;
2465 	}
2466 	case KVM_S390_SET_CMMA_BITS: {
2467 		struct kvm_s390_cmma_log args;
2468 
2469 		r = -EFAULT;
2470 		if (copy_from_user(&args, argp, sizeof(args)))
2471 			break;
2472 		mutex_lock(&kvm->slots_lock);
2473 		r = kvm_s390_set_cmma_bits(kvm, &args);
2474 		mutex_unlock(&kvm->slots_lock);
2475 		break;
2476 	}
2477 	case KVM_S390_PV_COMMAND: {
2478 		struct kvm_pv_cmd args;
2479 
2480 		/* protvirt means user sigp */
2481 		kvm->arch.user_cpu_state_ctrl = 1;
2482 		r = 0;
2483 		if (!is_prot_virt_host()) {
2484 			r = -EINVAL;
2485 			break;
2486 		}
2487 		if (copy_from_user(&args, argp, sizeof(args))) {
2488 			r = -EFAULT;
2489 			break;
2490 		}
2491 		if (args.flags) {
2492 			r = -EINVAL;
2493 			break;
2494 		}
2495 		mutex_lock(&kvm->lock);
2496 		r = kvm_s390_handle_pv(kvm, &args);
2497 		mutex_unlock(&kvm->lock);
2498 		if (copy_to_user(argp, &args, sizeof(args))) {
2499 			r = -EFAULT;
2500 			break;
2501 		}
2502 		break;
2503 	}
2504 	default:
2505 		r = -ENOTTY;
2506 	}
2507 
2508 	return r;
2509 }
2510 
kvm_s390_apxa_installed(void)2511 static int kvm_s390_apxa_installed(void)
2512 {
2513 	struct ap_config_info info;
2514 
2515 	if (ap_instructions_available()) {
2516 		if (ap_qci(&info) == 0)
2517 			return info.apxa;
2518 	}
2519 
2520 	return 0;
2521 }
2522 
2523 /*
2524  * The format of the crypto control block (CRYCB) is specified in the 3 low
2525  * order bits of the CRYCB designation (CRYCBD) field as follows:
2526  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2527  *	     AP extended addressing (APXA) facility are installed.
2528  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2529  * Format 2: Both the APXA and MSAX3 facilities are installed
2530  */
kvm_s390_set_crycb_format(struct kvm * kvm)2531 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2532 {
2533 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2534 
2535 	/* Clear the CRYCB format bits - i.e., set format 0 by default */
2536 	kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2537 
2538 	/* Check whether MSAX3 is installed */
2539 	if (!test_kvm_facility(kvm, 76))
2540 		return;
2541 
2542 	if (kvm_s390_apxa_installed())
2543 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2544 	else
2545 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2546 }
2547 
kvm_arch_crypto_set_masks(struct kvm * kvm,unsigned long * apm,unsigned long * aqm,unsigned long * adm)2548 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2549 			       unsigned long *aqm, unsigned long *adm)
2550 {
2551 	struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2552 
2553 	mutex_lock(&kvm->lock);
2554 	kvm_s390_vcpu_block_all(kvm);
2555 
2556 	switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2557 	case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2558 		memcpy(crycb->apcb1.apm, apm, 32);
2559 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2560 			 apm[0], apm[1], apm[2], apm[3]);
2561 		memcpy(crycb->apcb1.aqm, aqm, 32);
2562 		VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2563 			 aqm[0], aqm[1], aqm[2], aqm[3]);
2564 		memcpy(crycb->apcb1.adm, adm, 32);
2565 		VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2566 			 adm[0], adm[1], adm[2], adm[3]);
2567 		break;
2568 	case CRYCB_FORMAT1:
2569 	case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2570 		memcpy(crycb->apcb0.apm, apm, 8);
2571 		memcpy(crycb->apcb0.aqm, aqm, 2);
2572 		memcpy(crycb->apcb0.adm, adm, 2);
2573 		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2574 			 apm[0], *((unsigned short *)aqm),
2575 			 *((unsigned short *)adm));
2576 		break;
2577 	default:	/* Can not happen */
2578 		break;
2579 	}
2580 
2581 	/* recreate the shadow crycb for each vcpu */
2582 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2583 	kvm_s390_vcpu_unblock_all(kvm);
2584 	mutex_unlock(&kvm->lock);
2585 }
2586 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2587 
kvm_arch_crypto_clear_masks(struct kvm * kvm)2588 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2589 {
2590 	mutex_lock(&kvm->lock);
2591 	kvm_s390_vcpu_block_all(kvm);
2592 
2593 	memset(&kvm->arch.crypto.crycb->apcb0, 0,
2594 	       sizeof(kvm->arch.crypto.crycb->apcb0));
2595 	memset(&kvm->arch.crypto.crycb->apcb1, 0,
2596 	       sizeof(kvm->arch.crypto.crycb->apcb1));
2597 
2598 	VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2599 	/* recreate the shadow crycb for each vcpu */
2600 	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2601 	kvm_s390_vcpu_unblock_all(kvm);
2602 	mutex_unlock(&kvm->lock);
2603 }
2604 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2605 
kvm_s390_get_initial_cpuid(void)2606 static u64 kvm_s390_get_initial_cpuid(void)
2607 {
2608 	struct cpuid cpuid;
2609 
2610 	get_cpu_id(&cpuid);
2611 	cpuid.version = 0xff;
2612 	return *((u64 *) &cpuid);
2613 }
2614 
kvm_s390_crypto_init(struct kvm * kvm)2615 static void kvm_s390_crypto_init(struct kvm *kvm)
2616 {
2617 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2618 	kvm_s390_set_crycb_format(kvm);
2619 
2620 	if (!test_kvm_facility(kvm, 76))
2621 		return;
2622 
2623 	/* Enable AES/DEA protected key functions by default */
2624 	kvm->arch.crypto.aes_kw = 1;
2625 	kvm->arch.crypto.dea_kw = 1;
2626 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2627 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2628 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2629 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2630 }
2631 
sca_dispose(struct kvm * kvm)2632 static void sca_dispose(struct kvm *kvm)
2633 {
2634 	if (kvm->arch.use_esca)
2635 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2636 	else
2637 		free_page((unsigned long)(kvm->arch.sca));
2638 	kvm->arch.sca = NULL;
2639 }
2640 
kvm_arch_init_vm(struct kvm * kvm,unsigned long type)2641 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2642 {
2643 	gfp_t alloc_flags = GFP_KERNEL;
2644 	int i, rc;
2645 	char debug_name[16];
2646 	static unsigned long sca_offset;
2647 
2648 	rc = -EINVAL;
2649 #ifdef CONFIG_KVM_S390_UCONTROL
2650 	if (type & ~KVM_VM_S390_UCONTROL)
2651 		goto out_err;
2652 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2653 		goto out_err;
2654 #else
2655 	if (type)
2656 		goto out_err;
2657 #endif
2658 
2659 	rc = s390_enable_sie();
2660 	if (rc)
2661 		goto out_err;
2662 
2663 	rc = -ENOMEM;
2664 
2665 	if (!sclp.has_64bscao)
2666 		alloc_flags |= GFP_DMA;
2667 	rwlock_init(&kvm->arch.sca_lock);
2668 	/* start with basic SCA */
2669 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2670 	if (!kvm->arch.sca)
2671 		goto out_err;
2672 	mutex_lock(&kvm_lock);
2673 	sca_offset += 16;
2674 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2675 		sca_offset = 0;
2676 	kvm->arch.sca = (struct bsca_block *)
2677 			((char *) kvm->arch.sca + sca_offset);
2678 	mutex_unlock(&kvm_lock);
2679 
2680 	sprintf(debug_name, "kvm-%u", current->pid);
2681 
2682 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2683 	if (!kvm->arch.dbf)
2684 		goto out_err;
2685 
2686 	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2687 	kvm->arch.sie_page2 =
2688 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2689 	if (!kvm->arch.sie_page2)
2690 		goto out_err;
2691 
2692 	kvm->arch.sie_page2->kvm = kvm;
2693 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2694 
2695 	for (i = 0; i < kvm_s390_fac_size(); i++) {
2696 		kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2697 					      (kvm_s390_fac_base[i] |
2698 					       kvm_s390_fac_ext[i]);
2699 		kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2700 					      kvm_s390_fac_base[i];
2701 	}
2702 	kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2703 
2704 	/* we are always in czam mode - even on pre z14 machines */
2705 	set_kvm_facility(kvm->arch.model.fac_mask, 138);
2706 	set_kvm_facility(kvm->arch.model.fac_list, 138);
2707 	/* we emulate STHYI in kvm */
2708 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
2709 	set_kvm_facility(kvm->arch.model.fac_list, 74);
2710 	if (MACHINE_HAS_TLB_GUEST) {
2711 		set_kvm_facility(kvm->arch.model.fac_mask, 147);
2712 		set_kvm_facility(kvm->arch.model.fac_list, 147);
2713 	}
2714 
2715 	if (css_general_characteristics.aiv && test_facility(65))
2716 		set_kvm_facility(kvm->arch.model.fac_mask, 65);
2717 
2718 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2719 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2720 
2721 	kvm_s390_crypto_init(kvm);
2722 
2723 	mutex_init(&kvm->arch.float_int.ais_lock);
2724 	spin_lock_init(&kvm->arch.float_int.lock);
2725 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
2726 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2727 	init_waitqueue_head(&kvm->arch.ipte_wq);
2728 	mutex_init(&kvm->arch.ipte_mutex);
2729 
2730 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2731 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
2732 
2733 	if (type & KVM_VM_S390_UCONTROL) {
2734 		kvm->arch.gmap = NULL;
2735 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2736 	} else {
2737 		if (sclp.hamax == U64_MAX)
2738 			kvm->arch.mem_limit = TASK_SIZE_MAX;
2739 		else
2740 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2741 						    sclp.hamax + 1);
2742 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2743 		if (!kvm->arch.gmap)
2744 			goto out_err;
2745 		kvm->arch.gmap->private = kvm;
2746 		kvm->arch.gmap->pfault_enabled = 0;
2747 	}
2748 
2749 	kvm->arch.use_pfmfi = sclp.has_pfmfi;
2750 	kvm->arch.use_skf = sclp.has_skey;
2751 	spin_lock_init(&kvm->arch.start_stop_lock);
2752 	kvm_s390_vsie_init(kvm);
2753 	if (use_gisa)
2754 		kvm_s390_gisa_init(kvm);
2755 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2756 
2757 	return 0;
2758 out_err:
2759 	free_page((unsigned long)kvm->arch.sie_page2);
2760 	debug_unregister(kvm->arch.dbf);
2761 	sca_dispose(kvm);
2762 	KVM_EVENT(3, "creation of vm failed: %d", rc);
2763 	return rc;
2764 }
2765 
kvm_arch_vcpu_destroy(struct kvm_vcpu * vcpu)2766 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2767 {
2768 	u16 rc, rrc;
2769 
2770 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2771 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2772 	kvm_s390_clear_local_irqs(vcpu);
2773 	kvm_clear_async_pf_completion_queue(vcpu);
2774 	if (!kvm_is_ucontrol(vcpu->kvm))
2775 		sca_del_vcpu(vcpu);
2776 
2777 	if (kvm_is_ucontrol(vcpu->kvm))
2778 		gmap_remove(vcpu->arch.gmap);
2779 
2780 	if (vcpu->kvm->arch.use_cmma)
2781 		kvm_s390_vcpu_unsetup_cmma(vcpu);
2782 	/* We can not hold the vcpu mutex here, we are already dying */
2783 	if (kvm_s390_pv_cpu_get_handle(vcpu))
2784 		kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2785 	free_page((unsigned long)(vcpu->arch.sie_block));
2786 }
2787 
kvm_free_vcpus(struct kvm * kvm)2788 static void kvm_free_vcpus(struct kvm *kvm)
2789 {
2790 	unsigned int i;
2791 	struct kvm_vcpu *vcpu;
2792 
2793 	kvm_for_each_vcpu(i, vcpu, kvm)
2794 		kvm_vcpu_destroy(vcpu);
2795 
2796 	mutex_lock(&kvm->lock);
2797 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2798 		kvm->vcpus[i] = NULL;
2799 
2800 	atomic_set(&kvm->online_vcpus, 0);
2801 	mutex_unlock(&kvm->lock);
2802 }
2803 
kvm_arch_destroy_vm(struct kvm * kvm)2804 void kvm_arch_destroy_vm(struct kvm *kvm)
2805 {
2806 	u16 rc, rrc;
2807 
2808 	kvm_free_vcpus(kvm);
2809 	sca_dispose(kvm);
2810 	kvm_s390_gisa_destroy(kvm);
2811 	/*
2812 	 * We are already at the end of life and kvm->lock is not taken.
2813 	 * This is ok as the file descriptor is closed by now and nobody
2814 	 * can mess with the pv state. To avoid lockdep_assert_held from
2815 	 * complaining we do not use kvm_s390_pv_is_protected.
2816 	 */
2817 	if (kvm_s390_pv_get_handle(kvm))
2818 		kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2819 	debug_unregister(kvm->arch.dbf);
2820 	free_page((unsigned long)kvm->arch.sie_page2);
2821 	if (!kvm_is_ucontrol(kvm))
2822 		gmap_remove(kvm->arch.gmap);
2823 	kvm_s390_destroy_adapters(kvm);
2824 	kvm_s390_clear_float_irqs(kvm);
2825 	kvm_s390_vsie_destroy(kvm);
2826 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2827 }
2828 
2829 /* Section: vcpu related */
__kvm_ucontrol_vcpu_init(struct kvm_vcpu * vcpu)2830 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2831 {
2832 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2833 	if (!vcpu->arch.gmap)
2834 		return -ENOMEM;
2835 	vcpu->arch.gmap->private = vcpu->kvm;
2836 
2837 	return 0;
2838 }
2839 
sca_del_vcpu(struct kvm_vcpu * vcpu)2840 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2841 {
2842 	if (!kvm_s390_use_sca_entries())
2843 		return;
2844 	read_lock(&vcpu->kvm->arch.sca_lock);
2845 	if (vcpu->kvm->arch.use_esca) {
2846 		struct esca_block *sca = vcpu->kvm->arch.sca;
2847 
2848 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2849 		sca->cpu[vcpu->vcpu_id].sda = 0;
2850 	} else {
2851 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2852 
2853 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2854 		sca->cpu[vcpu->vcpu_id].sda = 0;
2855 	}
2856 	read_unlock(&vcpu->kvm->arch.sca_lock);
2857 }
2858 
sca_add_vcpu(struct kvm_vcpu * vcpu)2859 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2860 {
2861 	if (!kvm_s390_use_sca_entries()) {
2862 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2863 
2864 		/* we still need the basic sca for the ipte control */
2865 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2866 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2867 		return;
2868 	}
2869 	read_lock(&vcpu->kvm->arch.sca_lock);
2870 	if (vcpu->kvm->arch.use_esca) {
2871 		struct esca_block *sca = vcpu->kvm->arch.sca;
2872 
2873 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2874 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2875 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2876 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2877 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2878 	} else {
2879 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2880 
2881 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2882 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2883 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2884 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2885 	}
2886 	read_unlock(&vcpu->kvm->arch.sca_lock);
2887 }
2888 
2889 /* Basic SCA to Extended SCA data copy routines */
sca_copy_entry(struct esca_entry * d,struct bsca_entry * s)2890 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2891 {
2892 	d->sda = s->sda;
2893 	d->sigp_ctrl.c = s->sigp_ctrl.c;
2894 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2895 }
2896 
sca_copy_b_to_e(struct esca_block * d,struct bsca_block * s)2897 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2898 {
2899 	int i;
2900 
2901 	d->ipte_control = s->ipte_control;
2902 	d->mcn[0] = s->mcn;
2903 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2904 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2905 }
2906 
sca_switch_to_extended(struct kvm * kvm)2907 static int sca_switch_to_extended(struct kvm *kvm)
2908 {
2909 	struct bsca_block *old_sca = kvm->arch.sca;
2910 	struct esca_block *new_sca;
2911 	struct kvm_vcpu *vcpu;
2912 	unsigned int vcpu_idx;
2913 	u32 scaol, scaoh;
2914 
2915 	if (kvm->arch.use_esca)
2916 		return 0;
2917 
2918 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2919 	if (!new_sca)
2920 		return -ENOMEM;
2921 
2922 	scaoh = (u32)((u64)(new_sca) >> 32);
2923 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
2924 
2925 	kvm_s390_vcpu_block_all(kvm);
2926 	write_lock(&kvm->arch.sca_lock);
2927 
2928 	sca_copy_b_to_e(new_sca, old_sca);
2929 
2930 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2931 		vcpu->arch.sie_block->scaoh = scaoh;
2932 		vcpu->arch.sie_block->scaol = scaol;
2933 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2934 	}
2935 	kvm->arch.sca = new_sca;
2936 	kvm->arch.use_esca = 1;
2937 
2938 	write_unlock(&kvm->arch.sca_lock);
2939 	kvm_s390_vcpu_unblock_all(kvm);
2940 
2941 	free_page((unsigned long)old_sca);
2942 
2943 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2944 		 old_sca, kvm->arch.sca);
2945 	return 0;
2946 }
2947 
sca_can_add_vcpu(struct kvm * kvm,unsigned int id)2948 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2949 {
2950 	int rc;
2951 
2952 	if (!kvm_s390_use_sca_entries()) {
2953 		if (id < KVM_MAX_VCPUS)
2954 			return true;
2955 		return false;
2956 	}
2957 	if (id < KVM_S390_BSCA_CPU_SLOTS)
2958 		return true;
2959 	if (!sclp.has_esca || !sclp.has_64bscao)
2960 		return false;
2961 
2962 	mutex_lock(&kvm->lock);
2963 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2964 	mutex_unlock(&kvm->lock);
2965 
2966 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2967 }
2968 
2969 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__start_cpu_timer_accounting(struct kvm_vcpu * vcpu)2970 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2971 {
2972 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2973 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2974 	vcpu->arch.cputm_start = get_tod_clock_fast();
2975 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2976 }
2977 
2978 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__stop_cpu_timer_accounting(struct kvm_vcpu * vcpu)2979 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2980 {
2981 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2982 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2983 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2984 	vcpu->arch.cputm_start = 0;
2985 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2986 }
2987 
2988 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__enable_cpu_timer_accounting(struct kvm_vcpu * vcpu)2989 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2990 {
2991 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2992 	vcpu->arch.cputm_enabled = true;
2993 	__start_cpu_timer_accounting(vcpu);
2994 }
2995 
2996 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__disable_cpu_timer_accounting(struct kvm_vcpu * vcpu)2997 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2998 {
2999 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3000 	__stop_cpu_timer_accounting(vcpu);
3001 	vcpu->arch.cputm_enabled = false;
3002 }
3003 
enable_cpu_timer_accounting(struct kvm_vcpu * vcpu)3004 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3005 {
3006 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3007 	__enable_cpu_timer_accounting(vcpu);
3008 	preempt_enable();
3009 }
3010 
disable_cpu_timer_accounting(struct kvm_vcpu * vcpu)3011 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3012 {
3013 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3014 	__disable_cpu_timer_accounting(vcpu);
3015 	preempt_enable();
3016 }
3017 
3018 /* set the cpu timer - may only be called from the VCPU thread itself */
kvm_s390_set_cpu_timer(struct kvm_vcpu * vcpu,__u64 cputm)3019 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3020 {
3021 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3022 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3023 	if (vcpu->arch.cputm_enabled)
3024 		vcpu->arch.cputm_start = get_tod_clock_fast();
3025 	vcpu->arch.sie_block->cputm = cputm;
3026 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3027 	preempt_enable();
3028 }
3029 
3030 /* update and get the cpu timer - can also be called from other VCPU threads */
kvm_s390_get_cpu_timer(struct kvm_vcpu * vcpu)3031 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3032 {
3033 	unsigned int seq;
3034 	__u64 value;
3035 
3036 	if (unlikely(!vcpu->arch.cputm_enabled))
3037 		return vcpu->arch.sie_block->cputm;
3038 
3039 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3040 	do {
3041 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3042 		/*
3043 		 * If the writer would ever execute a read in the critical
3044 		 * section, e.g. in irq context, we have a deadlock.
3045 		 */
3046 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3047 		value = vcpu->arch.sie_block->cputm;
3048 		/* if cputm_start is 0, accounting is being started/stopped */
3049 		if (likely(vcpu->arch.cputm_start))
3050 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3051 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3052 	preempt_enable();
3053 	return value;
3054 }
3055 
kvm_arch_vcpu_load(struct kvm_vcpu * vcpu,int cpu)3056 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3057 {
3058 
3059 	gmap_enable(vcpu->arch.enabled_gmap);
3060 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3061 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3062 		__start_cpu_timer_accounting(vcpu);
3063 	vcpu->cpu = cpu;
3064 }
3065 
kvm_arch_vcpu_put(struct kvm_vcpu * vcpu)3066 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3067 {
3068 	vcpu->cpu = -1;
3069 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3070 		__stop_cpu_timer_accounting(vcpu);
3071 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3072 	vcpu->arch.enabled_gmap = gmap_get_enabled();
3073 	gmap_disable(vcpu->arch.enabled_gmap);
3074 
3075 }
3076 
kvm_arch_vcpu_postcreate(struct kvm_vcpu * vcpu)3077 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3078 {
3079 	mutex_lock(&vcpu->kvm->lock);
3080 	preempt_disable();
3081 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3082 	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3083 	preempt_enable();
3084 	mutex_unlock(&vcpu->kvm->lock);
3085 	if (!kvm_is_ucontrol(vcpu->kvm)) {
3086 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3087 		sca_add_vcpu(vcpu);
3088 	}
3089 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3090 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3091 	/* make vcpu_load load the right gmap on the first trigger */
3092 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3093 }
3094 
kvm_has_pckmo_subfunc(struct kvm * kvm,unsigned long nr)3095 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3096 {
3097 	if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3098 	    test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3099 		return true;
3100 	return false;
3101 }
3102 
kvm_has_pckmo_ecc(struct kvm * kvm)3103 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3104 {
3105 	/* At least one ECC subfunction must be present */
3106 	return kvm_has_pckmo_subfunc(kvm, 32) ||
3107 	       kvm_has_pckmo_subfunc(kvm, 33) ||
3108 	       kvm_has_pckmo_subfunc(kvm, 34) ||
3109 	       kvm_has_pckmo_subfunc(kvm, 40) ||
3110 	       kvm_has_pckmo_subfunc(kvm, 41);
3111 
3112 }
3113 
kvm_s390_vcpu_crypto_setup(struct kvm_vcpu * vcpu)3114 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3115 {
3116 	/*
3117 	 * If the AP instructions are not being interpreted and the MSAX3
3118 	 * facility is not configured for the guest, there is nothing to set up.
3119 	 */
3120 	if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3121 		return;
3122 
3123 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3124 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3125 	vcpu->arch.sie_block->eca &= ~ECA_APIE;
3126 	vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3127 
3128 	if (vcpu->kvm->arch.crypto.apie)
3129 		vcpu->arch.sie_block->eca |= ECA_APIE;
3130 
3131 	/* Set up protected key support */
3132 	if (vcpu->kvm->arch.crypto.aes_kw) {
3133 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3134 		/* ecc is also wrapped with AES key */
3135 		if (kvm_has_pckmo_ecc(vcpu->kvm))
3136 			vcpu->arch.sie_block->ecd |= ECD_ECC;
3137 	}
3138 
3139 	if (vcpu->kvm->arch.crypto.dea_kw)
3140 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3141 }
3142 
kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu * vcpu)3143 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3144 {
3145 	free_page(vcpu->arch.sie_block->cbrlo);
3146 	vcpu->arch.sie_block->cbrlo = 0;
3147 }
3148 
kvm_s390_vcpu_setup_cmma(struct kvm_vcpu * vcpu)3149 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3150 {
3151 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
3152 	if (!vcpu->arch.sie_block->cbrlo)
3153 		return -ENOMEM;
3154 	return 0;
3155 }
3156 
kvm_s390_vcpu_setup_model(struct kvm_vcpu * vcpu)3157 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3158 {
3159 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3160 
3161 	vcpu->arch.sie_block->ibc = model->ibc;
3162 	if (test_kvm_facility(vcpu->kvm, 7))
3163 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3164 }
3165 
kvm_s390_vcpu_setup(struct kvm_vcpu * vcpu)3166 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3167 {
3168 	int rc = 0;
3169 	u16 uvrc, uvrrc;
3170 
3171 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3172 						    CPUSTAT_SM |
3173 						    CPUSTAT_STOPPED);
3174 
3175 	if (test_kvm_facility(vcpu->kvm, 78))
3176 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3177 	else if (test_kvm_facility(vcpu->kvm, 8))
3178 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3179 
3180 	kvm_s390_vcpu_setup_model(vcpu);
3181 
3182 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3183 	if (MACHINE_HAS_ESOP)
3184 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3185 	if (test_kvm_facility(vcpu->kvm, 9))
3186 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
3187 	if (test_kvm_facility(vcpu->kvm, 73))
3188 		vcpu->arch.sie_block->ecb |= ECB_TE;
3189 
3190 	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3191 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3192 	if (test_kvm_facility(vcpu->kvm, 130))
3193 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3194 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3195 	if (sclp.has_cei)
3196 		vcpu->arch.sie_block->eca |= ECA_CEI;
3197 	if (sclp.has_ib)
3198 		vcpu->arch.sie_block->eca |= ECA_IB;
3199 	if (sclp.has_siif)
3200 		vcpu->arch.sie_block->eca |= ECA_SII;
3201 	if (sclp.has_sigpif)
3202 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
3203 	if (test_kvm_facility(vcpu->kvm, 129)) {
3204 		vcpu->arch.sie_block->eca |= ECA_VX;
3205 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3206 	}
3207 	if (test_kvm_facility(vcpu->kvm, 139))
3208 		vcpu->arch.sie_block->ecd |= ECD_MEF;
3209 	if (test_kvm_facility(vcpu->kvm, 156))
3210 		vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3211 	if (vcpu->arch.sie_block->gd) {
3212 		vcpu->arch.sie_block->eca |= ECA_AIV;
3213 		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3214 			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3215 	}
3216 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3217 					| SDNXC;
3218 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3219 
3220 	if (sclp.has_kss)
3221 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3222 	else
3223 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3224 
3225 	if (vcpu->kvm->arch.use_cmma) {
3226 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
3227 		if (rc)
3228 			return rc;
3229 	}
3230 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3231 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3232 
3233 	vcpu->arch.sie_block->hpid = HPID_KVM;
3234 
3235 	kvm_s390_vcpu_crypto_setup(vcpu);
3236 
3237 	mutex_lock(&vcpu->kvm->lock);
3238 	if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3239 		rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3240 		if (rc)
3241 			kvm_s390_vcpu_unsetup_cmma(vcpu);
3242 	}
3243 	mutex_unlock(&vcpu->kvm->lock);
3244 
3245 	return rc;
3246 }
3247 
kvm_arch_vcpu_precreate(struct kvm * kvm,unsigned int id)3248 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3249 {
3250 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3251 		return -EINVAL;
3252 	return 0;
3253 }
3254 
kvm_arch_vcpu_create(struct kvm_vcpu * vcpu)3255 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3256 {
3257 	struct sie_page *sie_page;
3258 	int rc;
3259 
3260 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3261 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
3262 	if (!sie_page)
3263 		return -ENOMEM;
3264 
3265 	vcpu->arch.sie_block = &sie_page->sie_block;
3266 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3267 
3268 	/* the real guest size will always be smaller than msl */
3269 	vcpu->arch.sie_block->mso = 0;
3270 	vcpu->arch.sie_block->msl = sclp.hamax;
3271 
3272 	vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3273 	spin_lock_init(&vcpu->arch.local_int.lock);
3274 	vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3275 	if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3276 		vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3277 	seqcount_init(&vcpu->arch.cputm_seqcount);
3278 
3279 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3280 	kvm_clear_async_pf_completion_queue(vcpu);
3281 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3282 				    KVM_SYNC_GPRS |
3283 				    KVM_SYNC_ACRS |
3284 				    KVM_SYNC_CRS |
3285 				    KVM_SYNC_ARCH0 |
3286 				    KVM_SYNC_PFAULT |
3287 				    KVM_SYNC_DIAG318;
3288 	kvm_s390_set_prefix(vcpu, 0);
3289 	if (test_kvm_facility(vcpu->kvm, 64))
3290 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3291 	if (test_kvm_facility(vcpu->kvm, 82))
3292 		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3293 	if (test_kvm_facility(vcpu->kvm, 133))
3294 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3295 	if (test_kvm_facility(vcpu->kvm, 156))
3296 		vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3297 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
3298 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3299 	 */
3300 	if (MACHINE_HAS_VX)
3301 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3302 	else
3303 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3304 
3305 	if (kvm_is_ucontrol(vcpu->kvm)) {
3306 		rc = __kvm_ucontrol_vcpu_init(vcpu);
3307 		if (rc)
3308 			goto out_free_sie_block;
3309 	}
3310 
3311 	VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3312 		 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3313 	trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3314 
3315 	rc = kvm_s390_vcpu_setup(vcpu);
3316 	if (rc)
3317 		goto out_ucontrol_uninit;
3318 	return 0;
3319 
3320 out_ucontrol_uninit:
3321 	if (kvm_is_ucontrol(vcpu->kvm))
3322 		gmap_remove(vcpu->arch.gmap);
3323 out_free_sie_block:
3324 	free_page((unsigned long)(vcpu->arch.sie_block));
3325 	return rc;
3326 }
3327 
kvm_arch_vcpu_runnable(struct kvm_vcpu * vcpu)3328 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3329 {
3330 	clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
3331 	return kvm_s390_vcpu_has_irq(vcpu, 0);
3332 }
3333 
kvm_arch_vcpu_in_kernel(struct kvm_vcpu * vcpu)3334 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3335 {
3336 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3337 }
3338 
kvm_s390_vcpu_block(struct kvm_vcpu * vcpu)3339 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3340 {
3341 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3342 	exit_sie(vcpu);
3343 }
3344 
kvm_s390_vcpu_unblock(struct kvm_vcpu * vcpu)3345 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3346 {
3347 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3348 }
3349 
kvm_s390_vcpu_request(struct kvm_vcpu * vcpu)3350 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3351 {
3352 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3353 	exit_sie(vcpu);
3354 }
3355 
kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu * vcpu)3356 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3357 {
3358 	return atomic_read(&vcpu->arch.sie_block->prog20) &
3359 	       (PROG_BLOCK_SIE | PROG_REQUEST);
3360 }
3361 
kvm_s390_vcpu_request_handled(struct kvm_vcpu * vcpu)3362 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3363 {
3364 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3365 }
3366 
3367 /*
3368  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3369  * If the CPU is not running (e.g. waiting as idle) the function will
3370  * return immediately. */
exit_sie(struct kvm_vcpu * vcpu)3371 void exit_sie(struct kvm_vcpu *vcpu)
3372 {
3373 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3374 	kvm_s390_vsie_kick(vcpu);
3375 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3376 		cpu_relax();
3377 }
3378 
3379 /* Kick a guest cpu out of SIE to process a request synchronously */
kvm_s390_sync_request(int req,struct kvm_vcpu * vcpu)3380 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3381 {
3382 	kvm_make_request(req, vcpu);
3383 	kvm_s390_vcpu_request(vcpu);
3384 }
3385 
kvm_gmap_notifier(struct gmap * gmap,unsigned long start,unsigned long end)3386 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3387 			      unsigned long end)
3388 {
3389 	struct kvm *kvm = gmap->private;
3390 	struct kvm_vcpu *vcpu;
3391 	unsigned long prefix;
3392 	int i;
3393 
3394 	if (gmap_is_shadow(gmap))
3395 		return;
3396 	if (start >= 1UL << 31)
3397 		/* We are only interested in prefix pages */
3398 		return;
3399 	kvm_for_each_vcpu(i, vcpu, kvm) {
3400 		/* match against both prefix pages */
3401 		prefix = kvm_s390_get_prefix(vcpu);
3402 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3403 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3404 				   start, end);
3405 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3406 		}
3407 	}
3408 }
3409 
kvm_arch_no_poll(struct kvm_vcpu * vcpu)3410 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3411 {
3412 	/* do not poll with more than halt_poll_max_steal percent of steal time */
3413 	if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3414 	    halt_poll_max_steal) {
3415 		vcpu->stat.halt_no_poll_steal++;
3416 		return true;
3417 	}
3418 	return false;
3419 }
3420 
kvm_arch_vcpu_should_kick(struct kvm_vcpu * vcpu)3421 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3422 {
3423 	/* kvm common code refers to this, but never calls it */
3424 	BUG();
3425 	return 0;
3426 }
3427 
kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu * vcpu,struct kvm_one_reg * reg)3428 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3429 					   struct kvm_one_reg *reg)
3430 {
3431 	int r = -EINVAL;
3432 
3433 	switch (reg->id) {
3434 	case KVM_REG_S390_TODPR:
3435 		r = put_user(vcpu->arch.sie_block->todpr,
3436 			     (u32 __user *)reg->addr);
3437 		break;
3438 	case KVM_REG_S390_EPOCHDIFF:
3439 		r = put_user(vcpu->arch.sie_block->epoch,
3440 			     (u64 __user *)reg->addr);
3441 		break;
3442 	case KVM_REG_S390_CPU_TIMER:
3443 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
3444 			     (u64 __user *)reg->addr);
3445 		break;
3446 	case KVM_REG_S390_CLOCK_COMP:
3447 		r = put_user(vcpu->arch.sie_block->ckc,
3448 			     (u64 __user *)reg->addr);
3449 		break;
3450 	case KVM_REG_S390_PFTOKEN:
3451 		r = put_user(vcpu->arch.pfault_token,
3452 			     (u64 __user *)reg->addr);
3453 		break;
3454 	case KVM_REG_S390_PFCOMPARE:
3455 		r = put_user(vcpu->arch.pfault_compare,
3456 			     (u64 __user *)reg->addr);
3457 		break;
3458 	case KVM_REG_S390_PFSELECT:
3459 		r = put_user(vcpu->arch.pfault_select,
3460 			     (u64 __user *)reg->addr);
3461 		break;
3462 	case KVM_REG_S390_PP:
3463 		r = put_user(vcpu->arch.sie_block->pp,
3464 			     (u64 __user *)reg->addr);
3465 		break;
3466 	case KVM_REG_S390_GBEA:
3467 		r = put_user(vcpu->arch.sie_block->gbea,
3468 			     (u64 __user *)reg->addr);
3469 		break;
3470 	default:
3471 		break;
3472 	}
3473 
3474 	return r;
3475 }
3476 
kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu * vcpu,struct kvm_one_reg * reg)3477 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3478 					   struct kvm_one_reg *reg)
3479 {
3480 	int r = -EINVAL;
3481 	__u64 val;
3482 
3483 	switch (reg->id) {
3484 	case KVM_REG_S390_TODPR:
3485 		r = get_user(vcpu->arch.sie_block->todpr,
3486 			     (u32 __user *)reg->addr);
3487 		break;
3488 	case KVM_REG_S390_EPOCHDIFF:
3489 		r = get_user(vcpu->arch.sie_block->epoch,
3490 			     (u64 __user *)reg->addr);
3491 		break;
3492 	case KVM_REG_S390_CPU_TIMER:
3493 		r = get_user(val, (u64 __user *)reg->addr);
3494 		if (!r)
3495 			kvm_s390_set_cpu_timer(vcpu, val);
3496 		break;
3497 	case KVM_REG_S390_CLOCK_COMP:
3498 		r = get_user(vcpu->arch.sie_block->ckc,
3499 			     (u64 __user *)reg->addr);
3500 		break;
3501 	case KVM_REG_S390_PFTOKEN:
3502 		r = get_user(vcpu->arch.pfault_token,
3503 			     (u64 __user *)reg->addr);
3504 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3505 			kvm_clear_async_pf_completion_queue(vcpu);
3506 		break;
3507 	case KVM_REG_S390_PFCOMPARE:
3508 		r = get_user(vcpu->arch.pfault_compare,
3509 			     (u64 __user *)reg->addr);
3510 		break;
3511 	case KVM_REG_S390_PFSELECT:
3512 		r = get_user(vcpu->arch.pfault_select,
3513 			     (u64 __user *)reg->addr);
3514 		break;
3515 	case KVM_REG_S390_PP:
3516 		r = get_user(vcpu->arch.sie_block->pp,
3517 			     (u64 __user *)reg->addr);
3518 		break;
3519 	case KVM_REG_S390_GBEA:
3520 		r = get_user(vcpu->arch.sie_block->gbea,
3521 			     (u64 __user *)reg->addr);
3522 		break;
3523 	default:
3524 		break;
3525 	}
3526 
3527 	return r;
3528 }
3529 
kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu * vcpu)3530 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3531 {
3532 	vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3533 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3534 	memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3535 
3536 	kvm_clear_async_pf_completion_queue(vcpu);
3537 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3538 		kvm_s390_vcpu_stop(vcpu);
3539 	kvm_s390_clear_local_irqs(vcpu);
3540 }
3541 
kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu * vcpu)3542 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3543 {
3544 	/* Initial reset is a superset of the normal reset */
3545 	kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3546 
3547 	/*
3548 	 * This equals initial cpu reset in pop, but we don't switch to ESA.
3549 	 * We do not only reset the internal data, but also ...
3550 	 */
3551 	vcpu->arch.sie_block->gpsw.mask = 0;
3552 	vcpu->arch.sie_block->gpsw.addr = 0;
3553 	kvm_s390_set_prefix(vcpu, 0);
3554 	kvm_s390_set_cpu_timer(vcpu, 0);
3555 	vcpu->arch.sie_block->ckc = 0;
3556 	memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3557 	vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3558 	vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3559 
3560 	/* ... the data in sync regs */
3561 	memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3562 	vcpu->run->s.regs.ckc = 0;
3563 	vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3564 	vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3565 	vcpu->run->psw_addr = 0;
3566 	vcpu->run->psw_mask = 0;
3567 	vcpu->run->s.regs.todpr = 0;
3568 	vcpu->run->s.regs.cputm = 0;
3569 	vcpu->run->s.regs.ckc = 0;
3570 	vcpu->run->s.regs.pp = 0;
3571 	vcpu->run->s.regs.gbea = 1;
3572 	vcpu->run->s.regs.fpc = 0;
3573 	/*
3574 	 * Do not reset these registers in the protected case, as some of
3575 	 * them are overlayed and they are not accessible in this case
3576 	 * anyway.
3577 	 */
3578 	if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3579 		vcpu->arch.sie_block->gbea = 1;
3580 		vcpu->arch.sie_block->pp = 0;
3581 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3582 		vcpu->arch.sie_block->todpr = 0;
3583 	}
3584 }
3585 
kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu * vcpu)3586 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3587 {
3588 	struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3589 
3590 	/* Clear reset is a superset of the initial reset */
3591 	kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3592 
3593 	memset(&regs->gprs, 0, sizeof(regs->gprs));
3594 	memset(&regs->vrs, 0, sizeof(regs->vrs));
3595 	memset(&regs->acrs, 0, sizeof(regs->acrs));
3596 	memset(&regs->gscb, 0, sizeof(regs->gscb));
3597 
3598 	regs->etoken = 0;
3599 	regs->etoken_extension = 0;
3600 }
3601 
kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu * vcpu,struct kvm_regs * regs)3602 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3603 {
3604 	vcpu_load(vcpu);
3605 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3606 	vcpu_put(vcpu);
3607 	return 0;
3608 }
3609 
kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu * vcpu,struct kvm_regs * regs)3610 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3611 {
3612 	vcpu_load(vcpu);
3613 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3614 	vcpu_put(vcpu);
3615 	return 0;
3616 }
3617 
kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu * vcpu,struct kvm_sregs * sregs)3618 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3619 				  struct kvm_sregs *sregs)
3620 {
3621 	vcpu_load(vcpu);
3622 
3623 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3624 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3625 
3626 	vcpu_put(vcpu);
3627 	return 0;
3628 }
3629 
kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu * vcpu,struct kvm_sregs * sregs)3630 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3631 				  struct kvm_sregs *sregs)
3632 {
3633 	vcpu_load(vcpu);
3634 
3635 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3636 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3637 
3638 	vcpu_put(vcpu);
3639 	return 0;
3640 }
3641 
kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu * vcpu,struct kvm_fpu * fpu)3642 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3643 {
3644 	int ret = 0;
3645 
3646 	vcpu_load(vcpu);
3647 
3648 	if (test_fp_ctl(fpu->fpc)) {
3649 		ret = -EINVAL;
3650 		goto out;
3651 	}
3652 	vcpu->run->s.regs.fpc = fpu->fpc;
3653 	if (MACHINE_HAS_VX)
3654 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3655 				 (freg_t *) fpu->fprs);
3656 	else
3657 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3658 
3659 out:
3660 	vcpu_put(vcpu);
3661 	return ret;
3662 }
3663 
kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu * vcpu,struct kvm_fpu * fpu)3664 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3665 {
3666 	vcpu_load(vcpu);
3667 
3668 	/* make sure we have the latest values */
3669 	save_fpu_regs();
3670 	if (MACHINE_HAS_VX)
3671 		convert_vx_to_fp((freg_t *) fpu->fprs,
3672 				 (__vector128 *) vcpu->run->s.regs.vrs);
3673 	else
3674 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3675 	fpu->fpc = vcpu->run->s.regs.fpc;
3676 
3677 	vcpu_put(vcpu);
3678 	return 0;
3679 }
3680 
kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu * vcpu,psw_t psw)3681 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3682 {
3683 	int rc = 0;
3684 
3685 	if (!is_vcpu_stopped(vcpu))
3686 		rc = -EBUSY;
3687 	else {
3688 		vcpu->run->psw_mask = psw.mask;
3689 		vcpu->run->psw_addr = psw.addr;
3690 	}
3691 	return rc;
3692 }
3693 
kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu * vcpu,struct kvm_translation * tr)3694 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3695 				  struct kvm_translation *tr)
3696 {
3697 	return -EINVAL; /* not implemented yet */
3698 }
3699 
3700 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3701 			      KVM_GUESTDBG_USE_HW_BP | \
3702 			      KVM_GUESTDBG_ENABLE)
3703 
kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu * vcpu,struct kvm_guest_debug * dbg)3704 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3705 					struct kvm_guest_debug *dbg)
3706 {
3707 	int rc = 0;
3708 
3709 	vcpu_load(vcpu);
3710 
3711 	vcpu->guest_debug = 0;
3712 	kvm_s390_clear_bp_data(vcpu);
3713 
3714 	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3715 		rc = -EINVAL;
3716 		goto out;
3717 	}
3718 	if (!sclp.has_gpere) {
3719 		rc = -EINVAL;
3720 		goto out;
3721 	}
3722 
3723 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
3724 		vcpu->guest_debug = dbg->control;
3725 		/* enforce guest PER */
3726 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3727 
3728 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3729 			rc = kvm_s390_import_bp_data(vcpu, dbg);
3730 	} else {
3731 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3732 		vcpu->arch.guestdbg.last_bp = 0;
3733 	}
3734 
3735 	if (rc) {
3736 		vcpu->guest_debug = 0;
3737 		kvm_s390_clear_bp_data(vcpu);
3738 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3739 	}
3740 
3741 out:
3742 	vcpu_put(vcpu);
3743 	return rc;
3744 }
3745 
kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu * vcpu,struct kvm_mp_state * mp_state)3746 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3747 				    struct kvm_mp_state *mp_state)
3748 {
3749 	int ret;
3750 
3751 	vcpu_load(vcpu);
3752 
3753 	/* CHECK_STOP and LOAD are not supported yet */
3754 	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3755 				      KVM_MP_STATE_OPERATING;
3756 
3757 	vcpu_put(vcpu);
3758 	return ret;
3759 }
3760 
kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu * vcpu,struct kvm_mp_state * mp_state)3761 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3762 				    struct kvm_mp_state *mp_state)
3763 {
3764 	int rc = 0;
3765 
3766 	vcpu_load(vcpu);
3767 
3768 	/* user space knows about this interface - let it control the state */
3769 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3770 
3771 	switch (mp_state->mp_state) {
3772 	case KVM_MP_STATE_STOPPED:
3773 		rc = kvm_s390_vcpu_stop(vcpu);
3774 		break;
3775 	case KVM_MP_STATE_OPERATING:
3776 		rc = kvm_s390_vcpu_start(vcpu);
3777 		break;
3778 	case KVM_MP_STATE_LOAD:
3779 		if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3780 			rc = -ENXIO;
3781 			break;
3782 		}
3783 		rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3784 		break;
3785 	case KVM_MP_STATE_CHECK_STOP:
3786 		fallthrough;	/* CHECK_STOP and LOAD are not supported yet */
3787 	default:
3788 		rc = -ENXIO;
3789 	}
3790 
3791 	vcpu_put(vcpu);
3792 	return rc;
3793 }
3794 
ibs_enabled(struct kvm_vcpu * vcpu)3795 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3796 {
3797 	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3798 }
3799 
kvm_s390_handle_requests(struct kvm_vcpu * vcpu)3800 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3801 {
3802 retry:
3803 	kvm_s390_vcpu_request_handled(vcpu);
3804 	if (!kvm_request_pending(vcpu))
3805 		return 0;
3806 	/*
3807 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3808 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3809 	 * This ensures that the ipte instruction for this request has
3810 	 * already finished. We might race against a second unmapper that
3811 	 * wants to set the blocking bit. Lets just retry the request loop.
3812 	 */
3813 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3814 		int rc;
3815 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
3816 					  kvm_s390_get_prefix(vcpu),
3817 					  PAGE_SIZE * 2, PROT_WRITE);
3818 		if (rc) {
3819 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3820 			return rc;
3821 		}
3822 		goto retry;
3823 	}
3824 
3825 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3826 		vcpu->arch.sie_block->ihcpu = 0xffff;
3827 		goto retry;
3828 	}
3829 
3830 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3831 		if (!ibs_enabled(vcpu)) {
3832 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3833 			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3834 		}
3835 		goto retry;
3836 	}
3837 
3838 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3839 		if (ibs_enabled(vcpu)) {
3840 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3841 			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3842 		}
3843 		goto retry;
3844 	}
3845 
3846 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3847 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3848 		goto retry;
3849 	}
3850 
3851 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3852 		/*
3853 		 * Disable CMM virtualization; we will emulate the ESSA
3854 		 * instruction manually, in order to provide additional
3855 		 * functionalities needed for live migration.
3856 		 */
3857 		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3858 		goto retry;
3859 	}
3860 
3861 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3862 		/*
3863 		 * Re-enable CMM virtualization if CMMA is available and
3864 		 * CMM has been used.
3865 		 */
3866 		if ((vcpu->kvm->arch.use_cmma) &&
3867 		    (vcpu->kvm->mm->context.uses_cmm))
3868 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3869 		goto retry;
3870 	}
3871 
3872 	/* nothing to do, just clear the request */
3873 	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3874 	/* we left the vsie handler, nothing to do, just clear the request */
3875 	kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3876 
3877 	return 0;
3878 }
3879 
__kvm_s390_set_tod_clock(struct kvm * kvm,const struct kvm_s390_vm_tod_clock * gtod)3880 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
3881 {
3882 	struct kvm_vcpu *vcpu;
3883 	struct kvm_s390_tod_clock_ext htod;
3884 	int i;
3885 
3886 	preempt_disable();
3887 
3888 	get_tod_clock_ext((char *)&htod);
3889 
3890 	kvm->arch.epoch = gtod->tod - htod.tod;
3891 	kvm->arch.epdx = 0;
3892 	if (test_kvm_facility(kvm, 139)) {
3893 		kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3894 		if (kvm->arch.epoch > gtod->tod)
3895 			kvm->arch.epdx -= 1;
3896 	}
3897 
3898 	kvm_s390_vcpu_block_all(kvm);
3899 	kvm_for_each_vcpu(i, vcpu, kvm) {
3900 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3901 		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3902 	}
3903 
3904 	kvm_s390_vcpu_unblock_all(kvm);
3905 	preempt_enable();
3906 }
3907 
kvm_s390_try_set_tod_clock(struct kvm * kvm,const struct kvm_s390_vm_tod_clock * gtod)3908 int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
3909 {
3910 	if (!mutex_trylock(&kvm->lock))
3911 		return 0;
3912 	__kvm_s390_set_tod_clock(kvm, gtod);
3913 	mutex_unlock(&kvm->lock);
3914 	return 1;
3915 }
3916 
3917 /**
3918  * kvm_arch_fault_in_page - fault-in guest page if necessary
3919  * @vcpu: The corresponding virtual cpu
3920  * @gpa: Guest physical address
3921  * @writable: Whether the page should be writable or not
3922  *
3923  * Make sure that a guest page has been faulted-in on the host.
3924  *
3925  * Return: Zero on success, negative error code otherwise.
3926  */
kvm_arch_fault_in_page(struct kvm_vcpu * vcpu,gpa_t gpa,int writable)3927 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3928 {
3929 	return gmap_fault(vcpu->arch.gmap, gpa,
3930 			  writable ? FAULT_FLAG_WRITE : 0);
3931 }
3932 
__kvm_inject_pfault_token(struct kvm_vcpu * vcpu,bool start_token,unsigned long token)3933 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3934 				      unsigned long token)
3935 {
3936 	struct kvm_s390_interrupt inti;
3937 	struct kvm_s390_irq irq;
3938 
3939 	if (start_token) {
3940 		irq.u.ext.ext_params2 = token;
3941 		irq.type = KVM_S390_INT_PFAULT_INIT;
3942 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3943 	} else {
3944 		inti.type = KVM_S390_INT_PFAULT_DONE;
3945 		inti.parm64 = token;
3946 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3947 	}
3948 }
3949 
kvm_arch_async_page_not_present(struct kvm_vcpu * vcpu,struct kvm_async_pf * work)3950 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3951 				     struct kvm_async_pf *work)
3952 {
3953 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3954 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3955 
3956 	return true;
3957 }
3958 
kvm_arch_async_page_present(struct kvm_vcpu * vcpu,struct kvm_async_pf * work)3959 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3960 				 struct kvm_async_pf *work)
3961 {
3962 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3963 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3964 }
3965 
kvm_arch_async_page_ready(struct kvm_vcpu * vcpu,struct kvm_async_pf * work)3966 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3967 			       struct kvm_async_pf *work)
3968 {
3969 	/* s390 will always inject the page directly */
3970 }
3971 
kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu * vcpu)3972 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
3973 {
3974 	/*
3975 	 * s390 will always inject the page directly,
3976 	 * but we still want check_async_completion to cleanup
3977 	 */
3978 	return true;
3979 }
3980 
kvm_arch_setup_async_pf(struct kvm_vcpu * vcpu)3981 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3982 {
3983 	hva_t hva;
3984 	struct kvm_arch_async_pf arch;
3985 
3986 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3987 		return false;
3988 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3989 	    vcpu->arch.pfault_compare)
3990 		return false;
3991 	if (psw_extint_disabled(vcpu))
3992 		return false;
3993 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
3994 		return false;
3995 	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3996 		return false;
3997 	if (!vcpu->arch.gmap->pfault_enabled)
3998 		return false;
3999 
4000 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
4001 	hva += current->thread.gmap_addr & ~PAGE_MASK;
4002 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4003 		return false;
4004 
4005 	return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
4006 }
4007 
vcpu_pre_run(struct kvm_vcpu * vcpu)4008 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4009 {
4010 	int rc, cpuflags;
4011 
4012 	/*
4013 	 * On s390 notifications for arriving pages will be delivered directly
4014 	 * to the guest but the house keeping for completed pfaults is
4015 	 * handled outside the worker.
4016 	 */
4017 	kvm_check_async_pf_completion(vcpu);
4018 
4019 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4020 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4021 
4022 	if (need_resched())
4023 		schedule();
4024 
4025 	if (!kvm_is_ucontrol(vcpu->kvm)) {
4026 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
4027 		if (rc)
4028 			return rc;
4029 	}
4030 
4031 	rc = kvm_s390_handle_requests(vcpu);
4032 	if (rc)
4033 		return rc;
4034 
4035 	if (guestdbg_enabled(vcpu)) {
4036 		kvm_s390_backup_guest_per_regs(vcpu);
4037 		kvm_s390_patch_guest_per_regs(vcpu);
4038 	}
4039 
4040 	clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.gisa_int.kicked_mask);
4041 
4042 	vcpu->arch.sie_block->icptcode = 0;
4043 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4044 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4045 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
4046 
4047 	return 0;
4048 }
4049 
vcpu_post_run_fault_in_sie(struct kvm_vcpu * vcpu)4050 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4051 {
4052 	struct kvm_s390_pgm_info pgm_info = {
4053 		.code = PGM_ADDRESSING,
4054 	};
4055 	u8 opcode, ilen;
4056 	int rc;
4057 
4058 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4059 	trace_kvm_s390_sie_fault(vcpu);
4060 
4061 	/*
4062 	 * We want to inject an addressing exception, which is defined as a
4063 	 * suppressing or terminating exception. However, since we came here
4064 	 * by a DAT access exception, the PSW still points to the faulting
4065 	 * instruction since DAT exceptions are nullifying. So we've got
4066 	 * to look up the current opcode to get the length of the instruction
4067 	 * to be able to forward the PSW.
4068 	 */
4069 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4070 	ilen = insn_length(opcode);
4071 	if (rc < 0) {
4072 		return rc;
4073 	} else if (rc) {
4074 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
4075 		 * Forward by arbitrary ilc, injection will take care of
4076 		 * nullification if necessary.
4077 		 */
4078 		pgm_info = vcpu->arch.pgm;
4079 		ilen = 4;
4080 	}
4081 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4082 	kvm_s390_forward_psw(vcpu, ilen);
4083 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4084 }
4085 
vcpu_post_run(struct kvm_vcpu * vcpu,int exit_reason)4086 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4087 {
4088 	struct mcck_volatile_info *mcck_info;
4089 	struct sie_page *sie_page;
4090 
4091 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4092 		   vcpu->arch.sie_block->icptcode);
4093 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4094 
4095 	if (guestdbg_enabled(vcpu))
4096 		kvm_s390_restore_guest_per_regs(vcpu);
4097 
4098 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4099 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4100 
4101 	if (exit_reason == -EINTR) {
4102 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
4103 		sie_page = container_of(vcpu->arch.sie_block,
4104 					struct sie_page, sie_block);
4105 		mcck_info = &sie_page->mcck_info;
4106 		kvm_s390_reinject_machine_check(vcpu, mcck_info);
4107 		return 0;
4108 	}
4109 
4110 	if (vcpu->arch.sie_block->icptcode > 0) {
4111 		int rc = kvm_handle_sie_intercept(vcpu);
4112 
4113 		if (rc != -EOPNOTSUPP)
4114 			return rc;
4115 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4116 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4117 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4118 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4119 		return -EREMOTE;
4120 	} else if (exit_reason != -EFAULT) {
4121 		vcpu->stat.exit_null++;
4122 		return 0;
4123 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
4124 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4125 		vcpu->run->s390_ucontrol.trans_exc_code =
4126 						current->thread.gmap_addr;
4127 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
4128 		return -EREMOTE;
4129 	} else if (current->thread.gmap_pfault) {
4130 		trace_kvm_s390_major_guest_pfault(vcpu);
4131 		current->thread.gmap_pfault = 0;
4132 		if (kvm_arch_setup_async_pf(vcpu))
4133 			return 0;
4134 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4135 	}
4136 	return vcpu_post_run_fault_in_sie(vcpu);
4137 }
4138 
4139 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
__vcpu_run(struct kvm_vcpu * vcpu)4140 static int __vcpu_run(struct kvm_vcpu *vcpu)
4141 {
4142 	int rc, exit_reason;
4143 	struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4144 
4145 	/*
4146 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4147 	 * ning the guest), so that memslots (and other stuff) are protected
4148 	 */
4149 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4150 
4151 	do {
4152 		rc = vcpu_pre_run(vcpu);
4153 		if (rc)
4154 			break;
4155 
4156 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4157 		/*
4158 		 * As PF_VCPU will be used in fault handler, between
4159 		 * guest_enter and guest_exit should be no uaccess.
4160 		 */
4161 		local_irq_disable();
4162 		guest_enter_irqoff();
4163 		__disable_cpu_timer_accounting(vcpu);
4164 		local_irq_enable();
4165 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4166 			memcpy(sie_page->pv_grregs,
4167 			       vcpu->run->s.regs.gprs,
4168 			       sizeof(sie_page->pv_grregs));
4169 		}
4170 		exit_reason = sie64a(vcpu->arch.sie_block,
4171 				     vcpu->run->s.regs.gprs);
4172 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4173 			memcpy(vcpu->run->s.regs.gprs,
4174 			       sie_page->pv_grregs,
4175 			       sizeof(sie_page->pv_grregs));
4176 			/*
4177 			 * We're not allowed to inject interrupts on intercepts
4178 			 * that leave the guest state in an "in-between" state
4179 			 * where the next SIE entry will do a continuation.
4180 			 * Fence interrupts in our "internal" PSW.
4181 			 */
4182 			if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4183 			    vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4184 				vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4185 			}
4186 		}
4187 		local_irq_disable();
4188 		__enable_cpu_timer_accounting(vcpu);
4189 		guest_exit_irqoff();
4190 		local_irq_enable();
4191 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4192 
4193 		rc = vcpu_post_run(vcpu, exit_reason);
4194 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4195 
4196 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4197 	return rc;
4198 }
4199 
sync_regs_fmt2(struct kvm_vcpu * vcpu)4200 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4201 {
4202 	struct kvm_run *kvm_run = vcpu->run;
4203 	struct runtime_instr_cb *riccb;
4204 	struct gs_cb *gscb;
4205 
4206 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4207 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4208 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4209 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4210 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4211 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4212 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4213 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4214 	}
4215 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4216 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4217 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4218 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4219 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4220 			kvm_clear_async_pf_completion_queue(vcpu);
4221 	}
4222 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4223 		vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4224 		vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4225 	}
4226 	/*
4227 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
4228 	 * we should enable RI here instead of doing the lazy enablement.
4229 	 */
4230 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4231 	    test_kvm_facility(vcpu->kvm, 64) &&
4232 	    riccb->v &&
4233 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4234 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4235 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4236 	}
4237 	/*
4238 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
4239 	 * we should enable GS here instead of doing the lazy enablement.
4240 	 */
4241 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4242 	    test_kvm_facility(vcpu->kvm, 133) &&
4243 	    gscb->gssm &&
4244 	    !vcpu->arch.gs_enabled) {
4245 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4246 		vcpu->arch.sie_block->ecb |= ECB_GS;
4247 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4248 		vcpu->arch.gs_enabled = 1;
4249 	}
4250 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4251 	    test_kvm_facility(vcpu->kvm, 82)) {
4252 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4253 		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4254 	}
4255 	if (MACHINE_HAS_GS) {
4256 		preempt_disable();
4257 		__ctl_set_bit(2, 4);
4258 		if (current->thread.gs_cb) {
4259 			vcpu->arch.host_gscb = current->thread.gs_cb;
4260 			save_gs_cb(vcpu->arch.host_gscb);
4261 		}
4262 		if (vcpu->arch.gs_enabled) {
4263 			current->thread.gs_cb = (struct gs_cb *)
4264 						&vcpu->run->s.regs.gscb;
4265 			restore_gs_cb(current->thread.gs_cb);
4266 		}
4267 		preempt_enable();
4268 	}
4269 	/* SIE will load etoken directly from SDNX and therefore kvm_run */
4270 }
4271 
sync_regs(struct kvm_vcpu * vcpu)4272 static void sync_regs(struct kvm_vcpu *vcpu)
4273 {
4274 	struct kvm_run *kvm_run = vcpu->run;
4275 
4276 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4277 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4278 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4279 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4280 		/* some control register changes require a tlb flush */
4281 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4282 	}
4283 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4284 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4285 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4286 	}
4287 	save_access_regs(vcpu->arch.host_acrs);
4288 	restore_access_regs(vcpu->run->s.regs.acrs);
4289 	/* save host (userspace) fprs/vrs */
4290 	save_fpu_regs();
4291 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4292 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4293 	if (MACHINE_HAS_VX)
4294 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4295 	else
4296 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4297 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4298 	if (test_fp_ctl(current->thread.fpu.fpc))
4299 		/* User space provided an invalid FPC, let's clear it */
4300 		current->thread.fpu.fpc = 0;
4301 
4302 	/* Sync fmt2 only data */
4303 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4304 		sync_regs_fmt2(vcpu);
4305 	} else {
4306 		/*
4307 		 * In several places we have to modify our internal view to
4308 		 * not do things that are disallowed by the ultravisor. For
4309 		 * example we must not inject interrupts after specific exits
4310 		 * (e.g. 112 prefix page not secure). We do this by turning
4311 		 * off the machine check, external and I/O interrupt bits
4312 		 * of our PSW copy. To avoid getting validity intercepts, we
4313 		 * do only accept the condition code from userspace.
4314 		 */
4315 		vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4316 		vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4317 						   PSW_MASK_CC;
4318 	}
4319 
4320 	kvm_run->kvm_dirty_regs = 0;
4321 }
4322 
store_regs_fmt2(struct kvm_vcpu * vcpu)4323 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4324 {
4325 	struct kvm_run *kvm_run = vcpu->run;
4326 
4327 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4328 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4329 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4330 	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4331 	kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4332 	if (MACHINE_HAS_GS) {
4333 		preempt_disable();
4334 		__ctl_set_bit(2, 4);
4335 		if (vcpu->arch.gs_enabled)
4336 			save_gs_cb(current->thread.gs_cb);
4337 		current->thread.gs_cb = vcpu->arch.host_gscb;
4338 		restore_gs_cb(vcpu->arch.host_gscb);
4339 		if (!vcpu->arch.host_gscb)
4340 			__ctl_clear_bit(2, 4);
4341 		vcpu->arch.host_gscb = NULL;
4342 		preempt_enable();
4343 	}
4344 	/* SIE will save etoken directly into SDNX and therefore kvm_run */
4345 }
4346 
store_regs(struct kvm_vcpu * vcpu)4347 static void store_regs(struct kvm_vcpu *vcpu)
4348 {
4349 	struct kvm_run *kvm_run = vcpu->run;
4350 
4351 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4352 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4353 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4354 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4355 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4356 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4357 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4358 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4359 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4360 	save_access_regs(vcpu->run->s.regs.acrs);
4361 	restore_access_regs(vcpu->arch.host_acrs);
4362 	/* Save guest register state */
4363 	save_fpu_regs();
4364 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4365 	/* Restore will be done lazily at return */
4366 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4367 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4368 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4369 		store_regs_fmt2(vcpu);
4370 }
4371 
kvm_arch_vcpu_ioctl_run(struct kvm_vcpu * vcpu)4372 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4373 {
4374 	struct kvm_run *kvm_run = vcpu->run;
4375 	int rc;
4376 
4377 	if (kvm_run->immediate_exit)
4378 		return -EINTR;
4379 
4380 	if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4381 	    kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4382 		return -EINVAL;
4383 
4384 	vcpu_load(vcpu);
4385 
4386 	if (guestdbg_exit_pending(vcpu)) {
4387 		kvm_s390_prepare_debug_exit(vcpu);
4388 		rc = 0;
4389 		goto out;
4390 	}
4391 
4392 	kvm_sigset_activate(vcpu);
4393 
4394 	/*
4395 	 * no need to check the return value of vcpu_start as it can only have
4396 	 * an error for protvirt, but protvirt means user cpu state
4397 	 */
4398 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4399 		kvm_s390_vcpu_start(vcpu);
4400 	} else if (is_vcpu_stopped(vcpu)) {
4401 		pr_err_ratelimited("can't run stopped vcpu %d\n",
4402 				   vcpu->vcpu_id);
4403 		rc = -EINVAL;
4404 		goto out;
4405 	}
4406 
4407 	sync_regs(vcpu);
4408 	enable_cpu_timer_accounting(vcpu);
4409 
4410 	might_fault();
4411 	rc = __vcpu_run(vcpu);
4412 
4413 	if (signal_pending(current) && !rc) {
4414 		kvm_run->exit_reason = KVM_EXIT_INTR;
4415 		rc = -EINTR;
4416 	}
4417 
4418 	if (guestdbg_exit_pending(vcpu) && !rc)  {
4419 		kvm_s390_prepare_debug_exit(vcpu);
4420 		rc = 0;
4421 	}
4422 
4423 	if (rc == -EREMOTE) {
4424 		/* userspace support is needed, kvm_run has been prepared */
4425 		rc = 0;
4426 	}
4427 
4428 	disable_cpu_timer_accounting(vcpu);
4429 	store_regs(vcpu);
4430 
4431 	kvm_sigset_deactivate(vcpu);
4432 
4433 	vcpu->stat.exit_userspace++;
4434 out:
4435 	vcpu_put(vcpu);
4436 	return rc;
4437 }
4438 
4439 /*
4440  * store status at address
4441  * we use have two special cases:
4442  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4443  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4444  */
kvm_s390_store_status_unloaded(struct kvm_vcpu * vcpu,unsigned long gpa)4445 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4446 {
4447 	unsigned char archmode = 1;
4448 	freg_t fprs[NUM_FPRS];
4449 	unsigned int px;
4450 	u64 clkcomp, cputm;
4451 	int rc;
4452 
4453 	px = kvm_s390_get_prefix(vcpu);
4454 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4455 		if (write_guest_abs(vcpu, 163, &archmode, 1))
4456 			return -EFAULT;
4457 		gpa = 0;
4458 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4459 		if (write_guest_real(vcpu, 163, &archmode, 1))
4460 			return -EFAULT;
4461 		gpa = px;
4462 	} else
4463 		gpa -= __LC_FPREGS_SAVE_AREA;
4464 
4465 	/* manually convert vector registers if necessary */
4466 	if (MACHINE_HAS_VX) {
4467 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4468 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4469 				     fprs, 128);
4470 	} else {
4471 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4472 				     vcpu->run->s.regs.fprs, 128);
4473 	}
4474 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4475 			      vcpu->run->s.regs.gprs, 128);
4476 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4477 			      &vcpu->arch.sie_block->gpsw, 16);
4478 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4479 			      &px, 4);
4480 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4481 			      &vcpu->run->s.regs.fpc, 4);
4482 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4483 			      &vcpu->arch.sie_block->todpr, 4);
4484 	cputm = kvm_s390_get_cpu_timer(vcpu);
4485 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4486 			      &cputm, 8);
4487 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
4488 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4489 			      &clkcomp, 8);
4490 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4491 			      &vcpu->run->s.regs.acrs, 64);
4492 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4493 			      &vcpu->arch.sie_block->gcr, 128);
4494 	return rc ? -EFAULT : 0;
4495 }
4496 
kvm_s390_vcpu_store_status(struct kvm_vcpu * vcpu,unsigned long addr)4497 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4498 {
4499 	/*
4500 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4501 	 * switch in the run ioctl. Let's update our copies before we save
4502 	 * it into the save area
4503 	 */
4504 	save_fpu_regs();
4505 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4506 	save_access_regs(vcpu->run->s.regs.acrs);
4507 
4508 	return kvm_s390_store_status_unloaded(vcpu, addr);
4509 }
4510 
__disable_ibs_on_vcpu(struct kvm_vcpu * vcpu)4511 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4512 {
4513 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4514 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4515 }
4516 
__disable_ibs_on_all_vcpus(struct kvm * kvm)4517 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4518 {
4519 	unsigned int i;
4520 	struct kvm_vcpu *vcpu;
4521 
4522 	kvm_for_each_vcpu(i, vcpu, kvm) {
4523 		__disable_ibs_on_vcpu(vcpu);
4524 	}
4525 }
4526 
__enable_ibs_on_vcpu(struct kvm_vcpu * vcpu)4527 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4528 {
4529 	if (!sclp.has_ibs)
4530 		return;
4531 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4532 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4533 }
4534 
kvm_s390_vcpu_start(struct kvm_vcpu * vcpu)4535 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4536 {
4537 	int i, online_vcpus, r = 0, started_vcpus = 0;
4538 
4539 	if (!is_vcpu_stopped(vcpu))
4540 		return 0;
4541 
4542 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4543 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4544 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4545 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4546 
4547 	/* Let's tell the UV that we want to change into the operating state */
4548 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4549 		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4550 		if (r) {
4551 			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4552 			return r;
4553 		}
4554 	}
4555 
4556 	for (i = 0; i < online_vcpus; i++) {
4557 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4558 			started_vcpus++;
4559 	}
4560 
4561 	if (started_vcpus == 0) {
4562 		/* we're the only active VCPU -> speed it up */
4563 		__enable_ibs_on_vcpu(vcpu);
4564 	} else if (started_vcpus == 1) {
4565 		/*
4566 		 * As we are starting a second VCPU, we have to disable
4567 		 * the IBS facility on all VCPUs to remove potentially
4568 		 * oustanding ENABLE requests.
4569 		 */
4570 		__disable_ibs_on_all_vcpus(vcpu->kvm);
4571 	}
4572 
4573 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4574 	/*
4575 	 * The real PSW might have changed due to a RESTART interpreted by the
4576 	 * ultravisor. We block all interrupts and let the next sie exit
4577 	 * refresh our view.
4578 	 */
4579 	if (kvm_s390_pv_cpu_is_protected(vcpu))
4580 		vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4581 	/*
4582 	 * Another VCPU might have used IBS while we were offline.
4583 	 * Let's play safe and flush the VCPU at startup.
4584 	 */
4585 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4586 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4587 	return 0;
4588 }
4589 
kvm_s390_vcpu_stop(struct kvm_vcpu * vcpu)4590 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4591 {
4592 	int i, online_vcpus, r = 0, started_vcpus = 0;
4593 	struct kvm_vcpu *started_vcpu = NULL;
4594 
4595 	if (is_vcpu_stopped(vcpu))
4596 		return 0;
4597 
4598 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4599 	/* Only one cpu at a time may enter/leave the STOPPED state. */
4600 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
4601 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4602 
4603 	/* Let's tell the UV that we want to change into the stopped state */
4604 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4605 		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4606 		if (r) {
4607 			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4608 			return r;
4609 		}
4610 	}
4611 
4612 	/*
4613 	 * Set the VCPU to STOPPED and THEN clear the interrupt flag,
4614 	 * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
4615 	 * have been fully processed. This will ensure that the VCPU
4616 	 * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
4617 	 */
4618 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4619 	kvm_s390_clear_stop_irq(vcpu);
4620 
4621 	__disable_ibs_on_vcpu(vcpu);
4622 
4623 	for (i = 0; i < online_vcpus; i++) {
4624 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4625 			started_vcpus++;
4626 			started_vcpu = vcpu->kvm->vcpus[i];
4627 		}
4628 	}
4629 
4630 	if (started_vcpus == 1) {
4631 		/*
4632 		 * As we only have one VCPU left, we want to enable the
4633 		 * IBS facility for that VCPU to speed it up.
4634 		 */
4635 		__enable_ibs_on_vcpu(started_vcpu);
4636 	}
4637 
4638 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4639 	return 0;
4640 }
4641 
kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu * vcpu,struct kvm_enable_cap * cap)4642 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4643 				     struct kvm_enable_cap *cap)
4644 {
4645 	int r;
4646 
4647 	if (cap->flags)
4648 		return -EINVAL;
4649 
4650 	switch (cap->cap) {
4651 	case KVM_CAP_S390_CSS_SUPPORT:
4652 		if (!vcpu->kvm->arch.css_support) {
4653 			vcpu->kvm->arch.css_support = 1;
4654 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4655 			trace_kvm_s390_enable_css(vcpu->kvm);
4656 		}
4657 		r = 0;
4658 		break;
4659 	default:
4660 		r = -EINVAL;
4661 		break;
4662 	}
4663 	return r;
4664 }
4665 
kvm_s390_guest_sida_op(struct kvm_vcpu * vcpu,struct kvm_s390_mem_op * mop)4666 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4667 				   struct kvm_s390_mem_op *mop)
4668 {
4669 	void __user *uaddr = (void __user *)mop->buf;
4670 	int r = 0;
4671 
4672 	if (mop->flags || !mop->size)
4673 		return -EINVAL;
4674 	if (mop->size + mop->sida_offset < mop->size)
4675 		return -EINVAL;
4676 	if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4677 		return -E2BIG;
4678 	if (!kvm_s390_pv_cpu_is_protected(vcpu))
4679 		return -EINVAL;
4680 
4681 	switch (mop->op) {
4682 	case KVM_S390_MEMOP_SIDA_READ:
4683 		if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4684 				 mop->sida_offset), mop->size))
4685 			r = -EFAULT;
4686 
4687 		break;
4688 	case KVM_S390_MEMOP_SIDA_WRITE:
4689 		if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4690 				   mop->sida_offset), uaddr, mop->size))
4691 			r = -EFAULT;
4692 		break;
4693 	}
4694 	return r;
4695 }
kvm_s390_guest_mem_op(struct kvm_vcpu * vcpu,struct kvm_s390_mem_op * mop)4696 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4697 				  struct kvm_s390_mem_op *mop)
4698 {
4699 	void __user *uaddr = (void __user *)mop->buf;
4700 	void *tmpbuf = NULL;
4701 	int r = 0;
4702 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4703 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
4704 
4705 	if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4706 		return -EINVAL;
4707 
4708 	if (mop->size > MEM_OP_MAX_SIZE)
4709 		return -E2BIG;
4710 
4711 	if (kvm_s390_pv_cpu_is_protected(vcpu))
4712 		return -EINVAL;
4713 
4714 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4715 		tmpbuf = vmalloc(mop->size);
4716 		if (!tmpbuf)
4717 			return -ENOMEM;
4718 	}
4719 
4720 	switch (mop->op) {
4721 	case KVM_S390_MEMOP_LOGICAL_READ:
4722 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4723 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4724 					    mop->size, GACC_FETCH);
4725 			break;
4726 		}
4727 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4728 		if (r == 0) {
4729 			if (copy_to_user(uaddr, tmpbuf, mop->size))
4730 				r = -EFAULT;
4731 		}
4732 		break;
4733 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4734 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4735 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4736 					    mop->size, GACC_STORE);
4737 			break;
4738 		}
4739 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4740 			r = -EFAULT;
4741 			break;
4742 		}
4743 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4744 		break;
4745 	}
4746 
4747 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4748 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4749 
4750 	vfree(tmpbuf);
4751 	return r;
4752 }
4753 
kvm_s390_guest_memsida_op(struct kvm_vcpu * vcpu,struct kvm_s390_mem_op * mop)4754 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4755 				      struct kvm_s390_mem_op *mop)
4756 {
4757 	int r, srcu_idx;
4758 
4759 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4760 
4761 	switch (mop->op) {
4762 	case KVM_S390_MEMOP_LOGICAL_READ:
4763 	case KVM_S390_MEMOP_LOGICAL_WRITE:
4764 		r = kvm_s390_guest_mem_op(vcpu, mop);
4765 		break;
4766 	case KVM_S390_MEMOP_SIDA_READ:
4767 	case KVM_S390_MEMOP_SIDA_WRITE:
4768 		/* we are locked against sida going away by the vcpu->mutex */
4769 		r = kvm_s390_guest_sida_op(vcpu, mop);
4770 		break;
4771 	default:
4772 		r = -EINVAL;
4773 	}
4774 
4775 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4776 	return r;
4777 }
4778 
kvm_arch_vcpu_async_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)4779 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4780 			       unsigned int ioctl, unsigned long arg)
4781 {
4782 	struct kvm_vcpu *vcpu = filp->private_data;
4783 	void __user *argp = (void __user *)arg;
4784 
4785 	switch (ioctl) {
4786 	case KVM_S390_IRQ: {
4787 		struct kvm_s390_irq s390irq;
4788 
4789 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4790 			return -EFAULT;
4791 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4792 	}
4793 	case KVM_S390_INTERRUPT: {
4794 		struct kvm_s390_interrupt s390int;
4795 		struct kvm_s390_irq s390irq = {};
4796 
4797 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
4798 			return -EFAULT;
4799 		if (s390int_to_s390irq(&s390int, &s390irq))
4800 			return -EINVAL;
4801 		return kvm_s390_inject_vcpu(vcpu, &s390irq);
4802 	}
4803 	}
4804 	return -ENOIOCTLCMD;
4805 }
4806 
kvm_arch_vcpu_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)4807 long kvm_arch_vcpu_ioctl(struct file *filp,
4808 			 unsigned int ioctl, unsigned long arg)
4809 {
4810 	struct kvm_vcpu *vcpu = filp->private_data;
4811 	void __user *argp = (void __user *)arg;
4812 	int idx;
4813 	long r;
4814 	u16 rc, rrc;
4815 
4816 	vcpu_load(vcpu);
4817 
4818 	switch (ioctl) {
4819 	case KVM_S390_STORE_STATUS:
4820 		idx = srcu_read_lock(&vcpu->kvm->srcu);
4821 		r = kvm_s390_store_status_unloaded(vcpu, arg);
4822 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
4823 		break;
4824 	case KVM_S390_SET_INITIAL_PSW: {
4825 		psw_t psw;
4826 
4827 		r = -EFAULT;
4828 		if (copy_from_user(&psw, argp, sizeof(psw)))
4829 			break;
4830 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4831 		break;
4832 	}
4833 	case KVM_S390_CLEAR_RESET:
4834 		r = 0;
4835 		kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4836 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4837 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4838 					  UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4839 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4840 				   rc, rrc);
4841 		}
4842 		break;
4843 	case KVM_S390_INITIAL_RESET:
4844 		r = 0;
4845 		kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4846 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4847 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4848 					  UVC_CMD_CPU_RESET_INITIAL,
4849 					  &rc, &rrc);
4850 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4851 				   rc, rrc);
4852 		}
4853 		break;
4854 	case KVM_S390_NORMAL_RESET:
4855 		r = 0;
4856 		kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4857 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4858 			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4859 					  UVC_CMD_CPU_RESET, &rc, &rrc);
4860 			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4861 				   rc, rrc);
4862 		}
4863 		break;
4864 	case KVM_SET_ONE_REG:
4865 	case KVM_GET_ONE_REG: {
4866 		struct kvm_one_reg reg;
4867 		r = -EINVAL;
4868 		if (kvm_s390_pv_cpu_is_protected(vcpu))
4869 			break;
4870 		r = -EFAULT;
4871 		if (copy_from_user(&reg, argp, sizeof(reg)))
4872 			break;
4873 		if (ioctl == KVM_SET_ONE_REG)
4874 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4875 		else
4876 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4877 		break;
4878 	}
4879 #ifdef CONFIG_KVM_S390_UCONTROL
4880 	case KVM_S390_UCAS_MAP: {
4881 		struct kvm_s390_ucas_mapping ucasmap;
4882 
4883 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4884 			r = -EFAULT;
4885 			break;
4886 		}
4887 
4888 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4889 			r = -EINVAL;
4890 			break;
4891 		}
4892 
4893 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4894 				     ucasmap.vcpu_addr, ucasmap.length);
4895 		break;
4896 	}
4897 	case KVM_S390_UCAS_UNMAP: {
4898 		struct kvm_s390_ucas_mapping ucasmap;
4899 
4900 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4901 			r = -EFAULT;
4902 			break;
4903 		}
4904 
4905 		if (!kvm_is_ucontrol(vcpu->kvm)) {
4906 			r = -EINVAL;
4907 			break;
4908 		}
4909 
4910 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4911 			ucasmap.length);
4912 		break;
4913 	}
4914 #endif
4915 	case KVM_S390_VCPU_FAULT: {
4916 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
4917 		break;
4918 	}
4919 	case KVM_ENABLE_CAP:
4920 	{
4921 		struct kvm_enable_cap cap;
4922 		r = -EFAULT;
4923 		if (copy_from_user(&cap, argp, sizeof(cap)))
4924 			break;
4925 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4926 		break;
4927 	}
4928 	case KVM_S390_MEM_OP: {
4929 		struct kvm_s390_mem_op mem_op;
4930 
4931 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4932 			r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4933 		else
4934 			r = -EFAULT;
4935 		break;
4936 	}
4937 	case KVM_S390_SET_IRQ_STATE: {
4938 		struct kvm_s390_irq_state irq_state;
4939 
4940 		r = -EFAULT;
4941 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4942 			break;
4943 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4944 		    irq_state.len == 0 ||
4945 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4946 			r = -EINVAL;
4947 			break;
4948 		}
4949 		/* do not use irq_state.flags, it will break old QEMUs */
4950 		r = kvm_s390_set_irq_state(vcpu,
4951 					   (void __user *) irq_state.buf,
4952 					   irq_state.len);
4953 		break;
4954 	}
4955 	case KVM_S390_GET_IRQ_STATE: {
4956 		struct kvm_s390_irq_state irq_state;
4957 
4958 		r = -EFAULT;
4959 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4960 			break;
4961 		if (irq_state.len == 0) {
4962 			r = -EINVAL;
4963 			break;
4964 		}
4965 		/* do not use irq_state.flags, it will break old QEMUs */
4966 		r = kvm_s390_get_irq_state(vcpu,
4967 					   (__u8 __user *)  irq_state.buf,
4968 					   irq_state.len);
4969 		break;
4970 	}
4971 	default:
4972 		r = -ENOTTY;
4973 	}
4974 
4975 	vcpu_put(vcpu);
4976 	return r;
4977 }
4978 
kvm_arch_vcpu_fault(struct kvm_vcpu * vcpu,struct vm_fault * vmf)4979 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4980 {
4981 #ifdef CONFIG_KVM_S390_UCONTROL
4982 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4983 		 && (kvm_is_ucontrol(vcpu->kvm))) {
4984 		vmf->page = virt_to_page(vcpu->arch.sie_block);
4985 		get_page(vmf->page);
4986 		return 0;
4987 	}
4988 #endif
4989 	return VM_FAULT_SIGBUS;
4990 }
4991 
4992 /* Section: memory related */
kvm_arch_prepare_memory_region(struct kvm * kvm,struct kvm_memory_slot * memslot,const struct kvm_userspace_memory_region * mem,enum kvm_mr_change change)4993 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4994 				   struct kvm_memory_slot *memslot,
4995 				   const struct kvm_userspace_memory_region *mem,
4996 				   enum kvm_mr_change change)
4997 {
4998 	/* A few sanity checks. We can have memory slots which have to be
4999 	   located/ended at a segment boundary (1MB). The memory in userland is
5000 	   ok to be fragmented into various different vmas. It is okay to mmap()
5001 	   and munmap() stuff in this slot after doing this call at any time */
5002 
5003 	if (mem->userspace_addr & 0xffffful)
5004 		return -EINVAL;
5005 
5006 	if (mem->memory_size & 0xffffful)
5007 		return -EINVAL;
5008 
5009 	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
5010 		return -EINVAL;
5011 
5012 	/* When we are protected, we should not change the memory slots */
5013 	if (kvm_s390_pv_get_handle(kvm))
5014 		return -EINVAL;
5015 	return 0;
5016 }
5017 
kvm_arch_commit_memory_region(struct kvm * kvm,const struct kvm_userspace_memory_region * mem,struct kvm_memory_slot * old,const struct kvm_memory_slot * new,enum kvm_mr_change change)5018 void kvm_arch_commit_memory_region(struct kvm *kvm,
5019 				const struct kvm_userspace_memory_region *mem,
5020 				struct kvm_memory_slot *old,
5021 				const struct kvm_memory_slot *new,
5022 				enum kvm_mr_change change)
5023 {
5024 	int rc = 0;
5025 
5026 	switch (change) {
5027 	case KVM_MR_DELETE:
5028 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5029 					old->npages * PAGE_SIZE);
5030 		break;
5031 	case KVM_MR_MOVE:
5032 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5033 					old->npages * PAGE_SIZE);
5034 		if (rc)
5035 			break;
5036 		fallthrough;
5037 	case KVM_MR_CREATE:
5038 		rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
5039 				      mem->guest_phys_addr, mem->memory_size);
5040 		break;
5041 	case KVM_MR_FLAGS_ONLY:
5042 		break;
5043 	default:
5044 		WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5045 	}
5046 	if (rc)
5047 		pr_warn("failed to commit memory region\n");
5048 	return;
5049 }
5050 
nonhyp_mask(int i)5051 static inline unsigned long nonhyp_mask(int i)
5052 {
5053 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5054 
5055 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5056 }
5057 
kvm_arch_vcpu_block_finish(struct kvm_vcpu * vcpu)5058 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
5059 {
5060 	vcpu->valid_wakeup = false;
5061 }
5062 
kvm_s390_init(void)5063 static int __init kvm_s390_init(void)
5064 {
5065 	int i;
5066 
5067 	if (!sclp.has_sief2) {
5068 		pr_info("SIE is not available\n");
5069 		return -ENODEV;
5070 	}
5071 
5072 	if (nested && hpage) {
5073 		pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5074 		return -EINVAL;
5075 	}
5076 
5077 	for (i = 0; i < 16; i++)
5078 		kvm_s390_fac_base[i] |=
5079 			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
5080 
5081 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5082 }
5083 
kvm_s390_exit(void)5084 static void __exit kvm_s390_exit(void)
5085 {
5086 	kvm_exit();
5087 }
5088 
5089 module_init(kvm_s390_init);
5090 module_exit(kvm_s390_exit);
5091 
5092 /*
5093  * Enable autoloading of the kvm module.
5094  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5095  * since x86 takes a different approach.
5096  */
5097 #include <linux/miscdevice.h>
5098 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5099 MODULE_ALIAS("devname:kvm");
5100