• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * hosting zSeries kernel virtual machines
3  *
4  * Copyright IBM Corp. 2008, 2009
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2 only)
8  * as published by the Free Software Foundation.
9  *
10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
11  *               Christian Borntraeger <borntraeger@de.ibm.com>
12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14  *               Jason J. Herne <jjherne@us.ibm.com>
15  */
16 
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 
35 #include <asm/asm-offsets.h>
36 #include <asm/lowcore.h>
37 #include <asm/stp.h>
38 #include <asm/pgtable.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include "kvm-s390.h"
47 #include "gaccess.h"
48 
49 #define KMSG_COMPONENT "kvm-s390"
50 #undef pr_fmt
51 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
52 
53 #define CREATE_TRACE_POINTS
54 #include "trace.h"
55 #include "trace-s390.h"
56 
57 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
58 #define LOCAL_IRQS 32
59 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
60 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
61 
62 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
63 
64 struct kvm_stats_debugfs_item debugfs_entries[] = {
65 	{ "userspace_handled", VCPU_STAT(exit_userspace) },
66 	{ "exit_null", VCPU_STAT(exit_null) },
67 	{ "exit_validity", VCPU_STAT(exit_validity) },
68 	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
69 	{ "exit_external_request", VCPU_STAT(exit_external_request) },
70 	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
71 	{ "exit_instruction", VCPU_STAT(exit_instruction) },
72 	{ "exit_pei", VCPU_STAT(exit_pei) },
73 	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
74 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
75 	{ "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
76 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
77 	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
78 	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
79 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
80 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
81 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
82 	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
83 	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
84 	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
85 	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
86 	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
87 	{ "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
88 	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
89 	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
90 	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
91 	{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
92 	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
93 	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
94 	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
95 	{ "instruction_spx", VCPU_STAT(instruction_spx) },
96 	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
97 	{ "instruction_stap", VCPU_STAT(instruction_stap) },
98 	{ "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
99 	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
100 	{ "instruction_stsch", VCPU_STAT(instruction_stsch) },
101 	{ "instruction_chsc", VCPU_STAT(instruction_chsc) },
102 	{ "instruction_essa", VCPU_STAT(instruction_essa) },
103 	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
104 	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
105 	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
106 	{ "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
107 	{ "instruction_sie", VCPU_STAT(instruction_sie) },
108 	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
109 	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
110 	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
111 	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
112 	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
113 	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
114 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
115 	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
116 	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
117 	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
118 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
119 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
120 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
121 	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
122 	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
123 	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
124 	{ "diagnose_10", VCPU_STAT(diagnose_10) },
125 	{ "diagnose_44", VCPU_STAT(diagnose_44) },
126 	{ "diagnose_9c", VCPU_STAT(diagnose_9c) },
127 	{ "diagnose_258", VCPU_STAT(diagnose_258) },
128 	{ "diagnose_308", VCPU_STAT(diagnose_308) },
129 	{ "diagnose_500", VCPU_STAT(diagnose_500) },
130 	{ NULL }
131 };
132 
133 struct kvm_s390_tod_clock_ext {
134 	__u8 epoch_idx;
135 	__u64 tod;
136 	__u8 reserved[7];
137 } __packed;
138 
139 /* allow nested virtualization in KVM (if enabled by user space) */
140 static int nested;
141 module_param(nested, int, S_IRUGO);
142 MODULE_PARM_DESC(nested, "Nested virtualization support");
143 
144 /* upper facilities limit for kvm */
145 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
146 
kvm_s390_fac_list_mask_size(void)147 unsigned long kvm_s390_fac_list_mask_size(void)
148 {
149 	BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
150 	return ARRAY_SIZE(kvm_s390_fac_list_mask);
151 }
152 
153 /* available cpu features supported by kvm */
154 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
155 /* available subfunctions indicated via query / "test bit" */
156 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
157 
158 static struct gmap_notifier gmap_notifier;
159 static struct gmap_notifier vsie_gmap_notifier;
160 debug_info_t *kvm_s390_dbf;
161 
162 /* Section: not file related */
kvm_arch_hardware_enable(void)163 int kvm_arch_hardware_enable(void)
164 {
165 	/* every s390 is virtualization enabled ;-) */
166 	return 0;
167 }
168 
169 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
170 			      unsigned long end);
171 
kvm_clock_sync_scb(struct kvm_s390_sie_block * scb,u64 delta)172 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
173 {
174 	u8 delta_idx = 0;
175 
176 	/*
177 	 * The TOD jumps by delta, we have to compensate this by adding
178 	 * -delta to the epoch.
179 	 */
180 	delta = -delta;
181 
182 	/* sign-extension - we're adding to signed values below */
183 	if ((s64)delta < 0)
184 		delta_idx = -1;
185 
186 	scb->epoch += delta;
187 	if (scb->ecd & ECD_MEF) {
188 		scb->epdx += delta_idx;
189 		if (scb->epoch < delta)
190 			scb->epdx += 1;
191 	}
192 }
193 
194 /*
195  * This callback is executed during stop_machine(). All CPUs are therefore
196  * temporarily stopped. In order not to change guest behavior, we have to
197  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
198  * so a CPU won't be stopped while calculating with the epoch.
199  */
kvm_clock_sync(struct notifier_block * notifier,unsigned long val,void * v)200 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
201 			  void *v)
202 {
203 	struct kvm *kvm;
204 	struct kvm_vcpu *vcpu;
205 	int i;
206 	unsigned long long *delta = v;
207 
208 	list_for_each_entry(kvm, &vm_list, vm_list) {
209 		kvm_for_each_vcpu(i, vcpu, kvm) {
210 			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
211 			if (i == 0) {
212 				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
213 				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
214 			}
215 			if (vcpu->arch.cputm_enabled)
216 				vcpu->arch.cputm_start += *delta;
217 			if (vcpu->arch.vsie_block)
218 				kvm_clock_sync_scb(vcpu->arch.vsie_block,
219 						   *delta);
220 		}
221 	}
222 	return NOTIFY_OK;
223 }
224 
225 static struct notifier_block kvm_clock_notifier = {
226 	.notifier_call = kvm_clock_sync,
227 };
228 
kvm_arch_hardware_setup(void)229 int kvm_arch_hardware_setup(void)
230 {
231 	gmap_notifier.notifier_call = kvm_gmap_notifier;
232 	gmap_register_pte_notifier(&gmap_notifier);
233 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
234 	gmap_register_pte_notifier(&vsie_gmap_notifier);
235 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
236 				       &kvm_clock_notifier);
237 	return 0;
238 }
239 
kvm_arch_hardware_unsetup(void)240 void kvm_arch_hardware_unsetup(void)
241 {
242 	gmap_unregister_pte_notifier(&gmap_notifier);
243 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
244 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
245 					 &kvm_clock_notifier);
246 }
247 
allow_cpu_feat(unsigned long nr)248 static void allow_cpu_feat(unsigned long nr)
249 {
250 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
251 }
252 
plo_test_bit(unsigned char nr)253 static inline int plo_test_bit(unsigned char nr)
254 {
255 	register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
256 	int cc;
257 
258 	asm volatile(
259 		/* Parameter registers are ignored for "test bit" */
260 		"	plo	0,0,0,0(0)\n"
261 		"	ipm	%0\n"
262 		"	srl	%0,28\n"
263 		: "=d" (cc)
264 		: "d" (r0)
265 		: "cc");
266 	return cc == 0;
267 }
268 
kvm_s390_cpu_feat_init(void)269 static void kvm_s390_cpu_feat_init(void)
270 {
271 	int i;
272 
273 	for (i = 0; i < 256; ++i) {
274 		if (plo_test_bit(i))
275 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
276 	}
277 
278 	if (test_facility(28)) /* TOD-clock steering */
279 		ptff(kvm_s390_available_subfunc.ptff,
280 		     sizeof(kvm_s390_available_subfunc.ptff),
281 		     PTFF_QAF);
282 
283 	if (test_facility(17)) { /* MSA */
284 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
285 			      kvm_s390_available_subfunc.kmac);
286 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
287 			      kvm_s390_available_subfunc.kmc);
288 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
289 			      kvm_s390_available_subfunc.km);
290 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
291 			      kvm_s390_available_subfunc.kimd);
292 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
293 			      kvm_s390_available_subfunc.klmd);
294 	}
295 	if (test_facility(76)) /* MSA3 */
296 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
297 			      kvm_s390_available_subfunc.pckmo);
298 	if (test_facility(77)) { /* MSA4 */
299 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
300 			      kvm_s390_available_subfunc.kmctr);
301 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
302 			      kvm_s390_available_subfunc.kmf);
303 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
304 			      kvm_s390_available_subfunc.kmo);
305 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
306 			      kvm_s390_available_subfunc.pcc);
307 	}
308 	if (test_facility(57)) /* MSA5 */
309 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
310 			      kvm_s390_available_subfunc.ppno);
311 
312 	if (test_facility(146)) /* MSA8 */
313 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
314 			      kvm_s390_available_subfunc.kma);
315 
316 	if (MACHINE_HAS_ESOP)
317 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
318 	/*
319 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
320 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
321 	 */
322 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
323 	    !test_facility(3) || !nested)
324 		return;
325 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
326 	if (sclp.has_64bscao)
327 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
328 	if (sclp.has_siif)
329 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
330 	if (sclp.has_gpere)
331 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
332 	if (sclp.has_gsls)
333 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
334 	if (sclp.has_ib)
335 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
336 	if (sclp.has_cei)
337 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
338 	if (sclp.has_ibs)
339 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
340 	if (sclp.has_kss)
341 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
342 	/*
343 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
344 	 * all skey handling functions read/set the skey from the PGSTE
345 	 * instead of the real storage key.
346 	 *
347 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
348 	 * pages being detected as preserved although they are resident.
349 	 *
350 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
351 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
352 	 *
353 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
354 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
355 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
356 	 *
357 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
358 	 * cannot easily shadow the SCA because of the ipte lock.
359 	 */
360 }
361 
kvm_arch_init(void * opaque)362 int kvm_arch_init(void *opaque)
363 {
364 	int rc;
365 
366 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
367 	if (!kvm_s390_dbf)
368 		return -ENOMEM;
369 
370 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
371 		rc = -ENOMEM;
372 		goto out_debug_unreg;
373 	}
374 
375 	kvm_s390_cpu_feat_init();
376 
377 	/* Register floating interrupt controller interface. */
378 	rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
379 	if (rc) {
380 		pr_err("Failed to register FLIC rc=%d\n", rc);
381 		goto out_debug_unreg;
382 	}
383 	return 0;
384 
385 out_debug_unreg:
386 	debug_unregister(kvm_s390_dbf);
387 	return rc;
388 }
389 
kvm_arch_exit(void)390 void kvm_arch_exit(void)
391 {
392 	debug_unregister(kvm_s390_dbf);
393 }
394 
395 /* Section: device related */
kvm_arch_dev_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)396 long kvm_arch_dev_ioctl(struct file *filp,
397 			unsigned int ioctl, unsigned long arg)
398 {
399 	if (ioctl == KVM_S390_ENABLE_SIE)
400 		return s390_enable_sie();
401 	return -EINVAL;
402 }
403 
kvm_vm_ioctl_check_extension(struct kvm * kvm,long ext)404 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
405 {
406 	int r;
407 
408 	switch (ext) {
409 	case KVM_CAP_S390_PSW:
410 	case KVM_CAP_S390_GMAP:
411 	case KVM_CAP_SYNC_MMU:
412 #ifdef CONFIG_KVM_S390_UCONTROL
413 	case KVM_CAP_S390_UCONTROL:
414 #endif
415 	case KVM_CAP_ASYNC_PF:
416 	case KVM_CAP_SYNC_REGS:
417 	case KVM_CAP_ONE_REG:
418 	case KVM_CAP_ENABLE_CAP:
419 	case KVM_CAP_S390_CSS_SUPPORT:
420 	case KVM_CAP_IOEVENTFD:
421 	case KVM_CAP_DEVICE_CTRL:
422 	case KVM_CAP_ENABLE_CAP_VM:
423 	case KVM_CAP_S390_IRQCHIP:
424 	case KVM_CAP_VM_ATTRIBUTES:
425 	case KVM_CAP_MP_STATE:
426 	case KVM_CAP_IMMEDIATE_EXIT:
427 	case KVM_CAP_S390_INJECT_IRQ:
428 	case KVM_CAP_S390_USER_SIGP:
429 	case KVM_CAP_S390_USER_STSI:
430 	case KVM_CAP_S390_SKEYS:
431 	case KVM_CAP_S390_IRQ_STATE:
432 	case KVM_CAP_S390_USER_INSTR0:
433 	case KVM_CAP_S390_CMMA_MIGRATION:
434 	case KVM_CAP_S390_AIS:
435 		r = 1;
436 		break;
437 	case KVM_CAP_S390_MEM_OP:
438 		r = MEM_OP_MAX_SIZE;
439 		break;
440 	case KVM_CAP_NR_VCPUS:
441 	case KVM_CAP_MAX_VCPUS:
442 	case KVM_CAP_MAX_VCPU_ID:
443 		r = KVM_S390_BSCA_CPU_SLOTS;
444 		if (!kvm_s390_use_sca_entries())
445 			r = KVM_MAX_VCPUS;
446 		else if (sclp.has_esca && sclp.has_64bscao)
447 			r = KVM_S390_ESCA_CPU_SLOTS;
448 		break;
449 	case KVM_CAP_NR_MEMSLOTS:
450 		r = KVM_USER_MEM_SLOTS;
451 		break;
452 	case KVM_CAP_S390_COW:
453 		r = MACHINE_HAS_ESOP;
454 		break;
455 	case KVM_CAP_S390_VECTOR_REGISTERS:
456 		r = MACHINE_HAS_VX;
457 		break;
458 	case KVM_CAP_S390_RI:
459 		r = test_facility(64);
460 		break;
461 	case KVM_CAP_S390_GS:
462 		r = test_facility(133);
463 		break;
464 	case KVM_CAP_S390_BPB:
465 		r = test_facility(82);
466 		break;
467 	default:
468 		r = 0;
469 	}
470 	return r;
471 }
472 
kvm_s390_sync_dirty_log(struct kvm * kvm,struct kvm_memory_slot * memslot)473 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
474 					struct kvm_memory_slot *memslot)
475 {
476 	gfn_t cur_gfn, last_gfn;
477 	unsigned long address;
478 	struct gmap *gmap = kvm->arch.gmap;
479 
480 	/* Loop over all guest pages */
481 	last_gfn = memslot->base_gfn + memslot->npages;
482 	for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
483 		address = gfn_to_hva_memslot(memslot, cur_gfn);
484 
485 		if (test_and_clear_guest_dirty(gmap->mm, address))
486 			mark_page_dirty(kvm, cur_gfn);
487 		if (fatal_signal_pending(current))
488 			return;
489 		cond_resched();
490 	}
491 }
492 
493 /* Section: vm related */
494 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
495 
496 /*
497  * Get (and clear) the dirty memory log for a memory slot.
498  */
kvm_vm_ioctl_get_dirty_log(struct kvm * kvm,struct kvm_dirty_log * log)499 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
500 			       struct kvm_dirty_log *log)
501 {
502 	int r;
503 	unsigned long n;
504 	struct kvm_memslots *slots;
505 	struct kvm_memory_slot *memslot;
506 	int is_dirty = 0;
507 
508 	if (kvm_is_ucontrol(kvm))
509 		return -EINVAL;
510 
511 	mutex_lock(&kvm->slots_lock);
512 
513 	r = -EINVAL;
514 	if (log->slot >= KVM_USER_MEM_SLOTS)
515 		goto out;
516 
517 	slots = kvm_memslots(kvm);
518 	memslot = id_to_memslot(slots, log->slot);
519 	r = -ENOENT;
520 	if (!memslot->dirty_bitmap)
521 		goto out;
522 
523 	kvm_s390_sync_dirty_log(kvm, memslot);
524 	r = kvm_get_dirty_log(kvm, log, &is_dirty);
525 	if (r)
526 		goto out;
527 
528 	/* Clear the dirty log */
529 	if (is_dirty) {
530 		n = kvm_dirty_bitmap_bytes(memslot);
531 		memset(memslot->dirty_bitmap, 0, n);
532 	}
533 	r = 0;
534 out:
535 	mutex_unlock(&kvm->slots_lock);
536 	return r;
537 }
538 
icpt_operexc_on_all_vcpus(struct kvm * kvm)539 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
540 {
541 	unsigned int i;
542 	struct kvm_vcpu *vcpu;
543 
544 	kvm_for_each_vcpu(i, vcpu, kvm) {
545 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
546 	}
547 }
548 
kvm_vm_ioctl_enable_cap(struct kvm * kvm,struct kvm_enable_cap * cap)549 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
550 {
551 	int r;
552 
553 	if (cap->flags)
554 		return -EINVAL;
555 
556 	switch (cap->cap) {
557 	case KVM_CAP_S390_IRQCHIP:
558 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
559 		kvm->arch.use_irqchip = 1;
560 		r = 0;
561 		break;
562 	case KVM_CAP_S390_USER_SIGP:
563 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
564 		kvm->arch.user_sigp = 1;
565 		r = 0;
566 		break;
567 	case KVM_CAP_S390_VECTOR_REGISTERS:
568 		mutex_lock(&kvm->lock);
569 		if (kvm->created_vcpus) {
570 			r = -EBUSY;
571 		} else if (MACHINE_HAS_VX) {
572 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
573 			set_kvm_facility(kvm->arch.model.fac_list, 129);
574 			if (test_facility(134)) {
575 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
576 				set_kvm_facility(kvm->arch.model.fac_list, 134);
577 			}
578 			if (test_facility(135)) {
579 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
580 				set_kvm_facility(kvm->arch.model.fac_list, 135);
581 			}
582 			r = 0;
583 		} else
584 			r = -EINVAL;
585 		mutex_unlock(&kvm->lock);
586 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
587 			 r ? "(not available)" : "(success)");
588 		break;
589 	case KVM_CAP_S390_RI:
590 		r = -EINVAL;
591 		mutex_lock(&kvm->lock);
592 		if (kvm->created_vcpus) {
593 			r = -EBUSY;
594 		} else if (test_facility(64)) {
595 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
596 			set_kvm_facility(kvm->arch.model.fac_list, 64);
597 			r = 0;
598 		}
599 		mutex_unlock(&kvm->lock);
600 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
601 			 r ? "(not available)" : "(success)");
602 		break;
603 	case KVM_CAP_S390_AIS:
604 		mutex_lock(&kvm->lock);
605 		if (kvm->created_vcpus) {
606 			r = -EBUSY;
607 		} else {
608 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
609 			set_kvm_facility(kvm->arch.model.fac_list, 72);
610 			r = 0;
611 		}
612 		mutex_unlock(&kvm->lock);
613 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
614 			 r ? "(not available)" : "(success)");
615 		break;
616 	case KVM_CAP_S390_GS:
617 		r = -EINVAL;
618 		mutex_lock(&kvm->lock);
619 		if (kvm->created_vcpus) {
620 			r = -EBUSY;
621 		} else if (test_facility(133)) {
622 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
623 			set_kvm_facility(kvm->arch.model.fac_list, 133);
624 			r = 0;
625 		}
626 		mutex_unlock(&kvm->lock);
627 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
628 			 r ? "(not available)" : "(success)");
629 		break;
630 	case KVM_CAP_S390_USER_STSI:
631 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
632 		kvm->arch.user_stsi = 1;
633 		r = 0;
634 		break;
635 	case KVM_CAP_S390_USER_INSTR0:
636 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
637 		kvm->arch.user_instr0 = 1;
638 		icpt_operexc_on_all_vcpus(kvm);
639 		r = 0;
640 		break;
641 	default:
642 		r = -EINVAL;
643 		break;
644 	}
645 	return r;
646 }
647 
kvm_s390_get_mem_control(struct kvm * kvm,struct kvm_device_attr * attr)648 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
649 {
650 	int ret;
651 
652 	switch (attr->attr) {
653 	case KVM_S390_VM_MEM_LIMIT_SIZE:
654 		ret = 0;
655 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
656 			 kvm->arch.mem_limit);
657 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
658 			ret = -EFAULT;
659 		break;
660 	default:
661 		ret = -ENXIO;
662 		break;
663 	}
664 	return ret;
665 }
666 
kvm_s390_set_mem_control(struct kvm * kvm,struct kvm_device_attr * attr)667 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
668 {
669 	int ret;
670 	unsigned int idx;
671 	switch (attr->attr) {
672 	case KVM_S390_VM_MEM_ENABLE_CMMA:
673 		ret = -ENXIO;
674 		if (!sclp.has_cmma)
675 			break;
676 
677 		ret = -EBUSY;
678 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
679 		mutex_lock(&kvm->lock);
680 		if (!kvm->created_vcpus) {
681 			kvm->arch.use_cmma = 1;
682 			ret = 0;
683 		}
684 		mutex_unlock(&kvm->lock);
685 		break;
686 	case KVM_S390_VM_MEM_CLR_CMMA:
687 		ret = -ENXIO;
688 		if (!sclp.has_cmma)
689 			break;
690 		ret = -EINVAL;
691 		if (!kvm->arch.use_cmma)
692 			break;
693 
694 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
695 		mutex_lock(&kvm->lock);
696 		idx = srcu_read_lock(&kvm->srcu);
697 		s390_reset_cmma(kvm->arch.gmap->mm);
698 		srcu_read_unlock(&kvm->srcu, idx);
699 		mutex_unlock(&kvm->lock);
700 		ret = 0;
701 		break;
702 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
703 		unsigned long new_limit;
704 
705 		if (kvm_is_ucontrol(kvm))
706 			return -EINVAL;
707 
708 		if (get_user(new_limit, (u64 __user *)attr->addr))
709 			return -EFAULT;
710 
711 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
712 		    new_limit > kvm->arch.mem_limit)
713 			return -E2BIG;
714 
715 		if (!new_limit)
716 			return -EINVAL;
717 
718 		/* gmap_create takes last usable address */
719 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
720 			new_limit -= 1;
721 
722 		ret = -EBUSY;
723 		mutex_lock(&kvm->lock);
724 		if (!kvm->created_vcpus) {
725 			/* gmap_create will round the limit up */
726 			struct gmap *new = gmap_create(current->mm, new_limit);
727 
728 			if (!new) {
729 				ret = -ENOMEM;
730 			} else {
731 				gmap_remove(kvm->arch.gmap);
732 				new->private = kvm;
733 				kvm->arch.gmap = new;
734 				ret = 0;
735 			}
736 		}
737 		mutex_unlock(&kvm->lock);
738 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
739 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
740 			 (void *) kvm->arch.gmap->asce);
741 		break;
742 	}
743 	default:
744 		ret = -ENXIO;
745 		break;
746 	}
747 	return ret;
748 }
749 
750 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
751 
kvm_s390_vm_set_crypto(struct kvm * kvm,struct kvm_device_attr * attr)752 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
753 {
754 	struct kvm_vcpu *vcpu;
755 	int i;
756 
757 	if (!test_kvm_facility(kvm, 76))
758 		return -EINVAL;
759 
760 	mutex_lock(&kvm->lock);
761 	switch (attr->attr) {
762 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
763 		get_random_bytes(
764 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
765 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
766 		kvm->arch.crypto.aes_kw = 1;
767 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
768 		break;
769 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
770 		get_random_bytes(
771 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
772 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
773 		kvm->arch.crypto.dea_kw = 1;
774 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
775 		break;
776 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
777 		kvm->arch.crypto.aes_kw = 0;
778 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
779 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
780 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
781 		break;
782 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
783 		kvm->arch.crypto.dea_kw = 0;
784 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
785 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
786 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
787 		break;
788 	default:
789 		mutex_unlock(&kvm->lock);
790 		return -ENXIO;
791 	}
792 
793 	kvm_for_each_vcpu(i, vcpu, kvm) {
794 		kvm_s390_vcpu_crypto_setup(vcpu);
795 		exit_sie(vcpu);
796 	}
797 	mutex_unlock(&kvm->lock);
798 	return 0;
799 }
800 
kvm_s390_sync_request_broadcast(struct kvm * kvm,int req)801 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
802 {
803 	int cx;
804 	struct kvm_vcpu *vcpu;
805 
806 	kvm_for_each_vcpu(cx, vcpu, kvm)
807 		kvm_s390_sync_request(req, vcpu);
808 }
809 
810 /*
811  * Must be called with kvm->srcu held to avoid races on memslots, and with
812  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
813  */
kvm_s390_vm_start_migration(struct kvm * kvm)814 static int kvm_s390_vm_start_migration(struct kvm *kvm)
815 {
816 	struct kvm_s390_migration_state *mgs;
817 	struct kvm_memory_slot *ms;
818 	/* should be the only one */
819 	struct kvm_memslots *slots;
820 	unsigned long ram_pages;
821 	int slotnr;
822 
823 	/* migration mode already enabled */
824 	if (kvm->arch.migration_state)
825 		return 0;
826 
827 	slots = kvm_memslots(kvm);
828 	if (!slots || !slots->used_slots)
829 		return -EINVAL;
830 
831 	mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
832 	if (!mgs)
833 		return -ENOMEM;
834 	kvm->arch.migration_state = mgs;
835 
836 	if (kvm->arch.use_cmma) {
837 		/*
838 		 * Get the first slot. They are reverse sorted by base_gfn, so
839 		 * the first slot is also the one at the end of the address
840 		 * space. We have verified above that at least one slot is
841 		 * present.
842 		 */
843 		ms = slots->memslots;
844 		/* round up so we only use full longs */
845 		ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
846 		/* allocate enough bytes to store all the bits */
847 		mgs->pgste_bitmap = vmalloc(ram_pages / 8);
848 		if (!mgs->pgste_bitmap) {
849 			kfree(mgs);
850 			kvm->arch.migration_state = NULL;
851 			return -ENOMEM;
852 		}
853 
854 		mgs->bitmap_size = ram_pages;
855 		atomic64_set(&mgs->dirty_pages, ram_pages);
856 		/* mark all the pages in active slots as dirty */
857 		for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
858 			ms = slots->memslots + slotnr;
859 			bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
860 		}
861 
862 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
863 	}
864 	return 0;
865 }
866 
867 /*
868  * Must be called with kvm->slots_lock to avoid races with ourselves and
869  * kvm_s390_vm_start_migration.
870  */
kvm_s390_vm_stop_migration(struct kvm * kvm)871 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
872 {
873 	struct kvm_s390_migration_state *mgs;
874 
875 	/* migration mode already disabled */
876 	if (!kvm->arch.migration_state)
877 		return 0;
878 	mgs = kvm->arch.migration_state;
879 	kvm->arch.migration_state = NULL;
880 
881 	if (kvm->arch.use_cmma) {
882 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
883 		/* We have to wait for the essa emulation to finish */
884 		synchronize_srcu(&kvm->srcu);
885 		vfree(mgs->pgste_bitmap);
886 	}
887 	kfree(mgs);
888 	return 0;
889 }
890 
kvm_s390_vm_set_migration(struct kvm * kvm,struct kvm_device_attr * attr)891 static int kvm_s390_vm_set_migration(struct kvm *kvm,
892 				     struct kvm_device_attr *attr)
893 {
894 	int res = -ENXIO;
895 
896 	mutex_lock(&kvm->slots_lock);
897 	switch (attr->attr) {
898 	case KVM_S390_VM_MIGRATION_START:
899 		res = kvm_s390_vm_start_migration(kvm);
900 		break;
901 	case KVM_S390_VM_MIGRATION_STOP:
902 		res = kvm_s390_vm_stop_migration(kvm);
903 		break;
904 	default:
905 		break;
906 	}
907 	mutex_unlock(&kvm->slots_lock);
908 
909 	return res;
910 }
911 
kvm_s390_vm_get_migration(struct kvm * kvm,struct kvm_device_attr * attr)912 static int kvm_s390_vm_get_migration(struct kvm *kvm,
913 				     struct kvm_device_attr *attr)
914 {
915 	u64 mig = (kvm->arch.migration_state != NULL);
916 
917 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
918 		return -ENXIO;
919 
920 	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
921 		return -EFAULT;
922 	return 0;
923 }
924 
kvm_s390_set_tod_ext(struct kvm * kvm,struct kvm_device_attr * attr)925 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
926 {
927 	struct kvm_s390_vm_tod_clock gtod;
928 
929 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
930 		return -EFAULT;
931 
932 	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
933 		return -EINVAL;
934 	kvm_s390_set_tod_clock(kvm, &gtod);
935 
936 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
937 		gtod.epoch_idx, gtod.tod);
938 
939 	return 0;
940 }
941 
kvm_s390_set_tod_high(struct kvm * kvm,struct kvm_device_attr * attr)942 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
943 {
944 	u8 gtod_high;
945 
946 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
947 					   sizeof(gtod_high)))
948 		return -EFAULT;
949 
950 	if (gtod_high != 0)
951 		return -EINVAL;
952 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
953 
954 	return 0;
955 }
956 
kvm_s390_set_tod_low(struct kvm * kvm,struct kvm_device_attr * attr)957 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
958 {
959 	struct kvm_s390_vm_tod_clock gtod = { 0 };
960 
961 	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
962 			   sizeof(gtod.tod)))
963 		return -EFAULT;
964 
965 	kvm_s390_set_tod_clock(kvm, &gtod);
966 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
967 	return 0;
968 }
969 
kvm_s390_set_tod(struct kvm * kvm,struct kvm_device_attr * attr)970 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
971 {
972 	int ret;
973 
974 	if (attr->flags)
975 		return -EINVAL;
976 
977 	switch (attr->attr) {
978 	case KVM_S390_VM_TOD_EXT:
979 		ret = kvm_s390_set_tod_ext(kvm, attr);
980 		break;
981 	case KVM_S390_VM_TOD_HIGH:
982 		ret = kvm_s390_set_tod_high(kvm, attr);
983 		break;
984 	case KVM_S390_VM_TOD_LOW:
985 		ret = kvm_s390_set_tod_low(kvm, attr);
986 		break;
987 	default:
988 		ret = -ENXIO;
989 		break;
990 	}
991 	return ret;
992 }
993 
kvm_s390_get_tod_clock_ext(struct kvm * kvm,struct kvm_s390_vm_tod_clock * gtod)994 static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
995 					struct kvm_s390_vm_tod_clock *gtod)
996 {
997 	struct kvm_s390_tod_clock_ext htod;
998 
999 	preempt_disable();
1000 
1001 	get_tod_clock_ext((char *)&htod);
1002 
1003 	gtod->tod = htod.tod + kvm->arch.epoch;
1004 	gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1005 
1006 	if (gtod->tod < htod.tod)
1007 		gtod->epoch_idx += 1;
1008 
1009 	preempt_enable();
1010 }
1011 
kvm_s390_get_tod_ext(struct kvm * kvm,struct kvm_device_attr * attr)1012 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1013 {
1014 	struct kvm_s390_vm_tod_clock gtod;
1015 
1016 	memset(&gtod, 0, sizeof(gtod));
1017 
1018 	if (test_kvm_facility(kvm, 139))
1019 		kvm_s390_get_tod_clock_ext(kvm, &gtod);
1020 	else
1021 		gtod.tod = kvm_s390_get_tod_clock_fast(kvm);
1022 
1023 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1024 		return -EFAULT;
1025 
1026 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1027 		gtod.epoch_idx, gtod.tod);
1028 	return 0;
1029 }
1030 
kvm_s390_get_tod_high(struct kvm * kvm,struct kvm_device_attr * attr)1031 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1032 {
1033 	u8 gtod_high = 0;
1034 
1035 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
1036 					 sizeof(gtod_high)))
1037 		return -EFAULT;
1038 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1039 
1040 	return 0;
1041 }
1042 
kvm_s390_get_tod_low(struct kvm * kvm,struct kvm_device_attr * attr)1043 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1044 {
1045 	u64 gtod;
1046 
1047 	gtod = kvm_s390_get_tod_clock_fast(kvm);
1048 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1049 		return -EFAULT;
1050 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1051 
1052 	return 0;
1053 }
1054 
kvm_s390_get_tod(struct kvm * kvm,struct kvm_device_attr * attr)1055 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1056 {
1057 	int ret;
1058 
1059 	if (attr->flags)
1060 		return -EINVAL;
1061 
1062 	switch (attr->attr) {
1063 	case KVM_S390_VM_TOD_EXT:
1064 		ret = kvm_s390_get_tod_ext(kvm, attr);
1065 		break;
1066 	case KVM_S390_VM_TOD_HIGH:
1067 		ret = kvm_s390_get_tod_high(kvm, attr);
1068 		break;
1069 	case KVM_S390_VM_TOD_LOW:
1070 		ret = kvm_s390_get_tod_low(kvm, attr);
1071 		break;
1072 	default:
1073 		ret = -ENXIO;
1074 		break;
1075 	}
1076 	return ret;
1077 }
1078 
kvm_s390_set_processor(struct kvm * kvm,struct kvm_device_attr * attr)1079 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1080 {
1081 	struct kvm_s390_vm_cpu_processor *proc;
1082 	u16 lowest_ibc, unblocked_ibc;
1083 	int ret = 0;
1084 
1085 	mutex_lock(&kvm->lock);
1086 	if (kvm->created_vcpus) {
1087 		ret = -EBUSY;
1088 		goto out;
1089 	}
1090 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1091 	if (!proc) {
1092 		ret = -ENOMEM;
1093 		goto out;
1094 	}
1095 	if (!copy_from_user(proc, (void __user *)attr->addr,
1096 			    sizeof(*proc))) {
1097 		kvm->arch.model.cpuid = proc->cpuid;
1098 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1099 		unblocked_ibc = sclp.ibc & 0xfff;
1100 		if (lowest_ibc && proc->ibc) {
1101 			if (proc->ibc > unblocked_ibc)
1102 				kvm->arch.model.ibc = unblocked_ibc;
1103 			else if (proc->ibc < lowest_ibc)
1104 				kvm->arch.model.ibc = lowest_ibc;
1105 			else
1106 				kvm->arch.model.ibc = proc->ibc;
1107 		}
1108 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1109 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1110 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1111 			 kvm->arch.model.ibc,
1112 			 kvm->arch.model.cpuid);
1113 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1114 			 kvm->arch.model.fac_list[0],
1115 			 kvm->arch.model.fac_list[1],
1116 			 kvm->arch.model.fac_list[2]);
1117 	} else
1118 		ret = -EFAULT;
1119 	kfree(proc);
1120 out:
1121 	mutex_unlock(&kvm->lock);
1122 	return ret;
1123 }
1124 
kvm_s390_set_processor_feat(struct kvm * kvm,struct kvm_device_attr * attr)1125 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1126 				       struct kvm_device_attr *attr)
1127 {
1128 	struct kvm_s390_vm_cpu_feat data;
1129 	int ret = -EBUSY;
1130 
1131 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1132 		return -EFAULT;
1133 	if (!bitmap_subset((unsigned long *) data.feat,
1134 			   kvm_s390_available_cpu_feat,
1135 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1136 		return -EINVAL;
1137 
1138 	mutex_lock(&kvm->lock);
1139 	if (!kvm->created_vcpus) {
1140 		bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1141 			    KVM_S390_VM_CPU_FEAT_NR_BITS);
1142 		ret = 0;
1143 	}
1144 	mutex_unlock(&kvm->lock);
1145 	return ret;
1146 }
1147 
kvm_s390_set_processor_subfunc(struct kvm * kvm,struct kvm_device_attr * attr)1148 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1149 					  struct kvm_device_attr *attr)
1150 {
1151 	/*
1152 	 * Once supported by kernel + hw, we have to store the subfunctions
1153 	 * in kvm->arch and remember that user space configured them.
1154 	 */
1155 	return -ENXIO;
1156 }
1157 
kvm_s390_set_cpu_model(struct kvm * kvm,struct kvm_device_attr * attr)1158 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1159 {
1160 	int ret = -ENXIO;
1161 
1162 	switch (attr->attr) {
1163 	case KVM_S390_VM_CPU_PROCESSOR:
1164 		ret = kvm_s390_set_processor(kvm, attr);
1165 		break;
1166 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1167 		ret = kvm_s390_set_processor_feat(kvm, attr);
1168 		break;
1169 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1170 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1171 		break;
1172 	}
1173 	return ret;
1174 }
1175 
kvm_s390_get_processor(struct kvm * kvm,struct kvm_device_attr * attr)1176 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1177 {
1178 	struct kvm_s390_vm_cpu_processor *proc;
1179 	int ret = 0;
1180 
1181 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1182 	if (!proc) {
1183 		ret = -ENOMEM;
1184 		goto out;
1185 	}
1186 	proc->cpuid = kvm->arch.model.cpuid;
1187 	proc->ibc = kvm->arch.model.ibc;
1188 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1189 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1190 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1191 		 kvm->arch.model.ibc,
1192 		 kvm->arch.model.cpuid);
1193 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1194 		 kvm->arch.model.fac_list[0],
1195 		 kvm->arch.model.fac_list[1],
1196 		 kvm->arch.model.fac_list[2]);
1197 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1198 		ret = -EFAULT;
1199 	kfree(proc);
1200 out:
1201 	return ret;
1202 }
1203 
kvm_s390_get_machine(struct kvm * kvm,struct kvm_device_attr * attr)1204 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1205 {
1206 	struct kvm_s390_vm_cpu_machine *mach;
1207 	int ret = 0;
1208 
1209 	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1210 	if (!mach) {
1211 		ret = -ENOMEM;
1212 		goto out;
1213 	}
1214 	get_cpu_id((struct cpuid *) &mach->cpuid);
1215 	mach->ibc = sclp.ibc;
1216 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1217 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1218 	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1219 	       sizeof(S390_lowcore.stfle_fac_list));
1220 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1221 		 kvm->arch.model.ibc,
1222 		 kvm->arch.model.cpuid);
1223 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1224 		 mach->fac_mask[0],
1225 		 mach->fac_mask[1],
1226 		 mach->fac_mask[2]);
1227 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1228 		 mach->fac_list[0],
1229 		 mach->fac_list[1],
1230 		 mach->fac_list[2]);
1231 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1232 		ret = -EFAULT;
1233 	kfree(mach);
1234 out:
1235 	return ret;
1236 }
1237 
kvm_s390_get_processor_feat(struct kvm * kvm,struct kvm_device_attr * attr)1238 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1239 				       struct kvm_device_attr *attr)
1240 {
1241 	struct kvm_s390_vm_cpu_feat data;
1242 
1243 	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1244 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1245 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1246 		return -EFAULT;
1247 	return 0;
1248 }
1249 
kvm_s390_get_machine_feat(struct kvm * kvm,struct kvm_device_attr * attr)1250 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1251 				     struct kvm_device_attr *attr)
1252 {
1253 	struct kvm_s390_vm_cpu_feat data;
1254 
1255 	bitmap_copy((unsigned long *) data.feat,
1256 		    kvm_s390_available_cpu_feat,
1257 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1258 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1259 		return -EFAULT;
1260 	return 0;
1261 }
1262 
kvm_s390_get_processor_subfunc(struct kvm * kvm,struct kvm_device_attr * attr)1263 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1264 					  struct kvm_device_attr *attr)
1265 {
1266 	/*
1267 	 * Once we can actually configure subfunctions (kernel + hw support),
1268 	 * we have to check if they were already set by user space, if so copy
1269 	 * them from kvm->arch.
1270 	 */
1271 	return -ENXIO;
1272 }
1273 
kvm_s390_get_machine_subfunc(struct kvm * kvm,struct kvm_device_attr * attr)1274 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1275 					struct kvm_device_attr *attr)
1276 {
1277 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1278 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1279 		return -EFAULT;
1280 	return 0;
1281 }
kvm_s390_get_cpu_model(struct kvm * kvm,struct kvm_device_attr * attr)1282 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1283 {
1284 	int ret = -ENXIO;
1285 
1286 	switch (attr->attr) {
1287 	case KVM_S390_VM_CPU_PROCESSOR:
1288 		ret = kvm_s390_get_processor(kvm, attr);
1289 		break;
1290 	case KVM_S390_VM_CPU_MACHINE:
1291 		ret = kvm_s390_get_machine(kvm, attr);
1292 		break;
1293 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1294 		ret = kvm_s390_get_processor_feat(kvm, attr);
1295 		break;
1296 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1297 		ret = kvm_s390_get_machine_feat(kvm, attr);
1298 		break;
1299 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1300 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1301 		break;
1302 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1303 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1304 		break;
1305 	}
1306 	return ret;
1307 }
1308 
kvm_s390_vm_set_attr(struct kvm * kvm,struct kvm_device_attr * attr)1309 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1310 {
1311 	int ret;
1312 
1313 	switch (attr->group) {
1314 	case KVM_S390_VM_MEM_CTRL:
1315 		ret = kvm_s390_set_mem_control(kvm, attr);
1316 		break;
1317 	case KVM_S390_VM_TOD:
1318 		ret = kvm_s390_set_tod(kvm, attr);
1319 		break;
1320 	case KVM_S390_VM_CPU_MODEL:
1321 		ret = kvm_s390_set_cpu_model(kvm, attr);
1322 		break;
1323 	case KVM_S390_VM_CRYPTO:
1324 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1325 		break;
1326 	case KVM_S390_VM_MIGRATION:
1327 		ret = kvm_s390_vm_set_migration(kvm, attr);
1328 		break;
1329 	default:
1330 		ret = -ENXIO;
1331 		break;
1332 	}
1333 
1334 	return ret;
1335 }
1336 
kvm_s390_vm_get_attr(struct kvm * kvm,struct kvm_device_attr * attr)1337 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1338 {
1339 	int ret;
1340 
1341 	switch (attr->group) {
1342 	case KVM_S390_VM_MEM_CTRL:
1343 		ret = kvm_s390_get_mem_control(kvm, attr);
1344 		break;
1345 	case KVM_S390_VM_TOD:
1346 		ret = kvm_s390_get_tod(kvm, attr);
1347 		break;
1348 	case KVM_S390_VM_CPU_MODEL:
1349 		ret = kvm_s390_get_cpu_model(kvm, attr);
1350 		break;
1351 	case KVM_S390_VM_MIGRATION:
1352 		ret = kvm_s390_vm_get_migration(kvm, attr);
1353 		break;
1354 	default:
1355 		ret = -ENXIO;
1356 		break;
1357 	}
1358 
1359 	return ret;
1360 }
1361 
kvm_s390_vm_has_attr(struct kvm * kvm,struct kvm_device_attr * attr)1362 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1363 {
1364 	int ret;
1365 
1366 	switch (attr->group) {
1367 	case KVM_S390_VM_MEM_CTRL:
1368 		switch (attr->attr) {
1369 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1370 		case KVM_S390_VM_MEM_CLR_CMMA:
1371 			ret = sclp.has_cmma ? 0 : -ENXIO;
1372 			break;
1373 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1374 			ret = 0;
1375 			break;
1376 		default:
1377 			ret = -ENXIO;
1378 			break;
1379 		}
1380 		break;
1381 	case KVM_S390_VM_TOD:
1382 		switch (attr->attr) {
1383 		case KVM_S390_VM_TOD_LOW:
1384 		case KVM_S390_VM_TOD_HIGH:
1385 			ret = 0;
1386 			break;
1387 		default:
1388 			ret = -ENXIO;
1389 			break;
1390 		}
1391 		break;
1392 	case KVM_S390_VM_CPU_MODEL:
1393 		switch (attr->attr) {
1394 		case KVM_S390_VM_CPU_PROCESSOR:
1395 		case KVM_S390_VM_CPU_MACHINE:
1396 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1397 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1398 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1399 			ret = 0;
1400 			break;
1401 		/* configuring subfunctions is not supported yet */
1402 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1403 		default:
1404 			ret = -ENXIO;
1405 			break;
1406 		}
1407 		break;
1408 	case KVM_S390_VM_CRYPTO:
1409 		switch (attr->attr) {
1410 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1411 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1412 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1413 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1414 			ret = 0;
1415 			break;
1416 		default:
1417 			ret = -ENXIO;
1418 			break;
1419 		}
1420 		break;
1421 	case KVM_S390_VM_MIGRATION:
1422 		ret = 0;
1423 		break;
1424 	default:
1425 		ret = -ENXIO;
1426 		break;
1427 	}
1428 
1429 	return ret;
1430 }
1431 
kvm_s390_get_skeys(struct kvm * kvm,struct kvm_s390_skeys * args)1432 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1433 {
1434 	uint8_t *keys;
1435 	uint64_t hva;
1436 	int srcu_idx, i, r = 0;
1437 
1438 	if (args->flags != 0)
1439 		return -EINVAL;
1440 
1441 	/* Is this guest using storage keys? */
1442 	if (!mm_use_skey(current->mm))
1443 		return KVM_S390_GET_SKEYS_NONE;
1444 
1445 	/* Enforce sane limit on memory allocation */
1446 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1447 		return -EINVAL;
1448 
1449 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1450 	if (!keys)
1451 		return -ENOMEM;
1452 
1453 	down_read(&current->mm->mmap_sem);
1454 	srcu_idx = srcu_read_lock(&kvm->srcu);
1455 	for (i = 0; i < args->count; i++) {
1456 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1457 		if (kvm_is_error_hva(hva)) {
1458 			r = -EFAULT;
1459 			break;
1460 		}
1461 
1462 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1463 		if (r)
1464 			break;
1465 	}
1466 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1467 	up_read(&current->mm->mmap_sem);
1468 
1469 	if (!r) {
1470 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1471 				 sizeof(uint8_t) * args->count);
1472 		if (r)
1473 			r = -EFAULT;
1474 	}
1475 
1476 	kvfree(keys);
1477 	return r;
1478 }
1479 
kvm_s390_set_skeys(struct kvm * kvm,struct kvm_s390_skeys * args)1480 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1481 {
1482 	uint8_t *keys;
1483 	uint64_t hva;
1484 	int srcu_idx, i, r = 0;
1485 
1486 	if (args->flags != 0)
1487 		return -EINVAL;
1488 
1489 	/* Enforce sane limit on memory allocation */
1490 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1491 		return -EINVAL;
1492 
1493 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1494 	if (!keys)
1495 		return -ENOMEM;
1496 
1497 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1498 			   sizeof(uint8_t) * args->count);
1499 	if (r) {
1500 		r = -EFAULT;
1501 		goto out;
1502 	}
1503 
1504 	/* Enable storage key handling for the guest */
1505 	r = s390_enable_skey();
1506 	if (r)
1507 		goto out;
1508 
1509 	down_read(&current->mm->mmap_sem);
1510 	srcu_idx = srcu_read_lock(&kvm->srcu);
1511 	for (i = 0; i < args->count; i++) {
1512 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1513 		if (kvm_is_error_hva(hva)) {
1514 			r = -EFAULT;
1515 			break;
1516 		}
1517 
1518 		/* Lowest order bit is reserved */
1519 		if (keys[i] & 0x01) {
1520 			r = -EINVAL;
1521 			break;
1522 		}
1523 
1524 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1525 		if (r)
1526 			break;
1527 	}
1528 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1529 	up_read(&current->mm->mmap_sem);
1530 out:
1531 	kvfree(keys);
1532 	return r;
1533 }
1534 
1535 /*
1536  * Base address and length must be sent at the start of each block, therefore
1537  * it's cheaper to send some clean data, as long as it's less than the size of
1538  * two longs.
1539  */
1540 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1541 /* for consistency */
1542 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1543 
1544 /*
1545  * This function searches for the next page with dirty CMMA attributes, and
1546  * saves the attributes in the buffer up to either the end of the buffer or
1547  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1548  * no trailing clean bytes are saved.
1549  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1550  * output buffer will indicate 0 as length.
1551  */
kvm_s390_get_cmma_bits(struct kvm * kvm,struct kvm_s390_cmma_log * args)1552 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1553 				  struct kvm_s390_cmma_log *args)
1554 {
1555 	struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1556 	unsigned long bufsize, hva, pgstev, i, next, cur;
1557 	int srcu_idx, peek, r = 0, rr;
1558 	u8 *res;
1559 
1560 	cur = args->start_gfn;
1561 	i = next = pgstev = 0;
1562 
1563 	if (unlikely(!kvm->arch.use_cmma))
1564 		return -ENXIO;
1565 	/* Invalid/unsupported flags were specified */
1566 	if (args->flags & ~KVM_S390_CMMA_PEEK)
1567 		return -EINVAL;
1568 	/* Migration mode query, and we are not doing a migration */
1569 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1570 	if (!peek && !s)
1571 		return -EINVAL;
1572 	/* CMMA is disabled or was not used, or the buffer has length zero */
1573 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1574 	if (!bufsize || !kvm->mm->context.use_cmma) {
1575 		memset(args, 0, sizeof(*args));
1576 		return 0;
1577 	}
1578 
1579 	if (!peek) {
1580 		/* We are not peeking, and there are no dirty pages */
1581 		if (!atomic64_read(&s->dirty_pages)) {
1582 			memset(args, 0, sizeof(*args));
1583 			return 0;
1584 		}
1585 		cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1586 				    args->start_gfn);
1587 		if (cur >= s->bitmap_size)	/* nothing found, loop back */
1588 			cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1589 		if (cur >= s->bitmap_size) {	/* again! (very unlikely) */
1590 			memset(args, 0, sizeof(*args));
1591 			return 0;
1592 		}
1593 		next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1594 	}
1595 
1596 	res = vmalloc(bufsize);
1597 	if (!res)
1598 		return -ENOMEM;
1599 
1600 	args->start_gfn = cur;
1601 
1602 	down_read(&kvm->mm->mmap_sem);
1603 	srcu_idx = srcu_read_lock(&kvm->srcu);
1604 	while (i < bufsize) {
1605 		hva = gfn_to_hva(kvm, cur);
1606 		if (kvm_is_error_hva(hva)) {
1607 			r = -EFAULT;
1608 			break;
1609 		}
1610 		/* decrement only if we actually flipped the bit to 0 */
1611 		if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1612 			atomic64_dec(&s->dirty_pages);
1613 		r = get_pgste(kvm->mm, hva, &pgstev);
1614 		if (r < 0)
1615 			pgstev = 0;
1616 		/* save the value */
1617 		res[i++] = (pgstev >> 24) & 0x43;
1618 		/*
1619 		 * if the next bit is too far away, stop.
1620 		 * if we reached the previous "next", find the next one
1621 		 */
1622 		if (!peek) {
1623 			if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1624 				break;
1625 			if (cur == next)
1626 				next = find_next_bit(s->pgste_bitmap,
1627 						     s->bitmap_size, cur + 1);
1628 		/* reached the end of the bitmap or of the buffer, stop */
1629 			if ((next >= s->bitmap_size) ||
1630 			    (next >= args->start_gfn + bufsize))
1631 				break;
1632 		}
1633 		cur++;
1634 	}
1635 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1636 	up_read(&kvm->mm->mmap_sem);
1637 	args->count = i;
1638 	args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1639 
1640 	rr = copy_to_user((void __user *)args->values, res, args->count);
1641 	if (rr)
1642 		r = -EFAULT;
1643 
1644 	vfree(res);
1645 	return r;
1646 }
1647 
1648 /*
1649  * This function sets the CMMA attributes for the given pages. If the input
1650  * buffer has zero length, no action is taken, otherwise the attributes are
1651  * set and the mm->context.use_cmma flag is set.
1652  */
kvm_s390_set_cmma_bits(struct kvm * kvm,const struct kvm_s390_cmma_log * args)1653 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1654 				  const struct kvm_s390_cmma_log *args)
1655 {
1656 	unsigned long hva, mask, pgstev, i;
1657 	uint8_t *bits;
1658 	int srcu_idx, r = 0;
1659 
1660 	mask = args->mask;
1661 
1662 	if (!kvm->arch.use_cmma)
1663 		return -ENXIO;
1664 	/* invalid/unsupported flags */
1665 	if (args->flags != 0)
1666 		return -EINVAL;
1667 	/* Enforce sane limit on memory allocation */
1668 	if (args->count > KVM_S390_CMMA_SIZE_MAX)
1669 		return -EINVAL;
1670 	/* Nothing to do */
1671 	if (args->count == 0)
1672 		return 0;
1673 
1674 	bits = vmalloc(sizeof(*bits) * args->count);
1675 	if (!bits)
1676 		return -ENOMEM;
1677 
1678 	r = copy_from_user(bits, (void __user *)args->values, args->count);
1679 	if (r) {
1680 		r = -EFAULT;
1681 		goto out;
1682 	}
1683 
1684 	down_read(&kvm->mm->mmap_sem);
1685 	srcu_idx = srcu_read_lock(&kvm->srcu);
1686 	for (i = 0; i < args->count; i++) {
1687 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1688 		if (kvm_is_error_hva(hva)) {
1689 			r = -EFAULT;
1690 			break;
1691 		}
1692 
1693 		pgstev = bits[i];
1694 		pgstev = pgstev << 24;
1695 		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1696 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
1697 	}
1698 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1699 	up_read(&kvm->mm->mmap_sem);
1700 
1701 	if (!kvm->mm->context.use_cmma) {
1702 		down_write(&kvm->mm->mmap_sem);
1703 		kvm->mm->context.use_cmma = 1;
1704 		up_write(&kvm->mm->mmap_sem);
1705 	}
1706 out:
1707 	vfree(bits);
1708 	return r;
1709 }
1710 
kvm_arch_vm_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)1711 long kvm_arch_vm_ioctl(struct file *filp,
1712 		       unsigned int ioctl, unsigned long arg)
1713 {
1714 	struct kvm *kvm = filp->private_data;
1715 	void __user *argp = (void __user *)arg;
1716 	struct kvm_device_attr attr;
1717 	int r;
1718 
1719 	switch (ioctl) {
1720 	case KVM_S390_INTERRUPT: {
1721 		struct kvm_s390_interrupt s390int;
1722 
1723 		r = -EFAULT;
1724 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
1725 			break;
1726 		r = kvm_s390_inject_vm(kvm, &s390int);
1727 		break;
1728 	}
1729 	case KVM_ENABLE_CAP: {
1730 		struct kvm_enable_cap cap;
1731 		r = -EFAULT;
1732 		if (copy_from_user(&cap, argp, sizeof(cap)))
1733 			break;
1734 		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1735 		break;
1736 	}
1737 	case KVM_CREATE_IRQCHIP: {
1738 		struct kvm_irq_routing_entry routing;
1739 
1740 		r = -EINVAL;
1741 		if (kvm->arch.use_irqchip) {
1742 			/* Set up dummy routing. */
1743 			memset(&routing, 0, sizeof(routing));
1744 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1745 		}
1746 		break;
1747 	}
1748 	case KVM_SET_DEVICE_ATTR: {
1749 		r = -EFAULT;
1750 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1751 			break;
1752 		r = kvm_s390_vm_set_attr(kvm, &attr);
1753 		break;
1754 	}
1755 	case KVM_GET_DEVICE_ATTR: {
1756 		r = -EFAULT;
1757 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1758 			break;
1759 		r = kvm_s390_vm_get_attr(kvm, &attr);
1760 		break;
1761 	}
1762 	case KVM_HAS_DEVICE_ATTR: {
1763 		r = -EFAULT;
1764 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1765 			break;
1766 		r = kvm_s390_vm_has_attr(kvm, &attr);
1767 		break;
1768 	}
1769 	case KVM_S390_GET_SKEYS: {
1770 		struct kvm_s390_skeys args;
1771 
1772 		r = -EFAULT;
1773 		if (copy_from_user(&args, argp,
1774 				   sizeof(struct kvm_s390_skeys)))
1775 			break;
1776 		r = kvm_s390_get_skeys(kvm, &args);
1777 		break;
1778 	}
1779 	case KVM_S390_SET_SKEYS: {
1780 		struct kvm_s390_skeys args;
1781 
1782 		r = -EFAULT;
1783 		if (copy_from_user(&args, argp,
1784 				   sizeof(struct kvm_s390_skeys)))
1785 			break;
1786 		r = kvm_s390_set_skeys(kvm, &args);
1787 		break;
1788 	}
1789 	case KVM_S390_GET_CMMA_BITS: {
1790 		struct kvm_s390_cmma_log args;
1791 
1792 		r = -EFAULT;
1793 		if (copy_from_user(&args, argp, sizeof(args)))
1794 			break;
1795 		mutex_lock(&kvm->slots_lock);
1796 		r = kvm_s390_get_cmma_bits(kvm, &args);
1797 		mutex_unlock(&kvm->slots_lock);
1798 		if (!r) {
1799 			r = copy_to_user(argp, &args, sizeof(args));
1800 			if (r)
1801 				r = -EFAULT;
1802 		}
1803 		break;
1804 	}
1805 	case KVM_S390_SET_CMMA_BITS: {
1806 		struct kvm_s390_cmma_log args;
1807 
1808 		r = -EFAULT;
1809 		if (copy_from_user(&args, argp, sizeof(args)))
1810 			break;
1811 		mutex_lock(&kvm->slots_lock);
1812 		r = kvm_s390_set_cmma_bits(kvm, &args);
1813 		mutex_unlock(&kvm->slots_lock);
1814 		break;
1815 	}
1816 	default:
1817 		r = -ENOTTY;
1818 	}
1819 
1820 	return r;
1821 }
1822 
kvm_s390_query_ap_config(u8 * config)1823 static int kvm_s390_query_ap_config(u8 *config)
1824 {
1825 	u32 fcn_code = 0x04000000UL;
1826 	u32 cc = 0;
1827 
1828 	memset(config, 0, 128);
1829 	asm volatile(
1830 		"lgr 0,%1\n"
1831 		"lgr 2,%2\n"
1832 		".long 0xb2af0000\n"		/* PQAP(QCI) */
1833 		"0: ipm %0\n"
1834 		"srl %0,28\n"
1835 		"1:\n"
1836 		EX_TABLE(0b, 1b)
1837 		: "+r" (cc)
1838 		: "r" (fcn_code), "r" (config)
1839 		: "cc", "0", "2", "memory"
1840 	);
1841 
1842 	return cc;
1843 }
1844 
kvm_s390_apxa_installed(void)1845 static int kvm_s390_apxa_installed(void)
1846 {
1847 	u8 config[128];
1848 	int cc;
1849 
1850 	if (test_facility(12)) {
1851 		cc = kvm_s390_query_ap_config(config);
1852 
1853 		if (cc)
1854 			pr_err("PQAP(QCI) failed with cc=%d", cc);
1855 		else
1856 			return config[0] & 0x40;
1857 	}
1858 
1859 	return 0;
1860 }
1861 
kvm_s390_set_crycb_format(struct kvm * kvm)1862 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1863 {
1864 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1865 
1866 	if (kvm_s390_apxa_installed())
1867 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1868 	else
1869 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1870 }
1871 
kvm_s390_get_initial_cpuid(void)1872 static u64 kvm_s390_get_initial_cpuid(void)
1873 {
1874 	struct cpuid cpuid;
1875 
1876 	get_cpu_id(&cpuid);
1877 	cpuid.version = 0xff;
1878 	return *((u64 *) &cpuid);
1879 }
1880 
kvm_s390_crypto_init(struct kvm * kvm)1881 static void kvm_s390_crypto_init(struct kvm *kvm)
1882 {
1883 	if (!test_kvm_facility(kvm, 76))
1884 		return;
1885 
1886 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1887 	kvm_s390_set_crycb_format(kvm);
1888 
1889 	/* Enable AES/DEA protected key functions by default */
1890 	kvm->arch.crypto.aes_kw = 1;
1891 	kvm->arch.crypto.dea_kw = 1;
1892 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1893 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1894 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1895 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1896 }
1897 
sca_dispose(struct kvm * kvm)1898 static void sca_dispose(struct kvm *kvm)
1899 {
1900 	if (kvm->arch.use_esca)
1901 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1902 	else
1903 		free_page((unsigned long)(kvm->arch.sca));
1904 	kvm->arch.sca = NULL;
1905 }
1906 
kvm_arch_init_vm(struct kvm * kvm,unsigned long type)1907 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1908 {
1909 	gfp_t alloc_flags = GFP_KERNEL;
1910 	int i, rc;
1911 	char debug_name[16];
1912 	static unsigned long sca_offset;
1913 
1914 	rc = -EINVAL;
1915 #ifdef CONFIG_KVM_S390_UCONTROL
1916 	if (type & ~KVM_VM_S390_UCONTROL)
1917 		goto out_err;
1918 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1919 		goto out_err;
1920 #else
1921 	if (type)
1922 		goto out_err;
1923 #endif
1924 
1925 	rc = s390_enable_sie();
1926 	if (rc)
1927 		goto out_err;
1928 
1929 	rc = -ENOMEM;
1930 
1931 	ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1932 
1933 	kvm->arch.use_esca = 0; /* start with basic SCA */
1934 	if (!sclp.has_64bscao)
1935 		alloc_flags |= GFP_DMA;
1936 	rwlock_init(&kvm->arch.sca_lock);
1937 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1938 	if (!kvm->arch.sca)
1939 		goto out_err;
1940 	mutex_lock(&kvm_lock);
1941 	sca_offset += 16;
1942 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1943 		sca_offset = 0;
1944 	kvm->arch.sca = (struct bsca_block *)
1945 			((char *) kvm->arch.sca + sca_offset);
1946 	mutex_unlock(&kvm_lock);
1947 
1948 	sprintf(debug_name, "kvm-%u", current->pid);
1949 
1950 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1951 	if (!kvm->arch.dbf)
1952 		goto out_err;
1953 
1954 	kvm->arch.sie_page2 =
1955 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1956 	if (!kvm->arch.sie_page2)
1957 		goto out_err;
1958 
1959 	/* Populate the facility mask initially. */
1960 	memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1961 	       sizeof(S390_lowcore.stfle_fac_list));
1962 	for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1963 		if (i < kvm_s390_fac_list_mask_size())
1964 			kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1965 		else
1966 			kvm->arch.model.fac_mask[i] = 0UL;
1967 	}
1968 
1969 	/* Populate the facility list initially. */
1970 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1971 	memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1972 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1973 
1974 	/* we are always in czam mode - even on pre z14 machines */
1975 	set_kvm_facility(kvm->arch.model.fac_mask, 138);
1976 	set_kvm_facility(kvm->arch.model.fac_list, 138);
1977 	/* we emulate STHYI in kvm */
1978 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
1979 	set_kvm_facility(kvm->arch.model.fac_list, 74);
1980 	if (MACHINE_HAS_TLB_GUEST) {
1981 		set_kvm_facility(kvm->arch.model.fac_mask, 147);
1982 		set_kvm_facility(kvm->arch.model.fac_list, 147);
1983 	}
1984 
1985 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1986 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1987 
1988 	kvm_s390_crypto_init(kvm);
1989 
1990 	mutex_init(&kvm->arch.float_int.ais_lock);
1991 	kvm->arch.float_int.simm = 0;
1992 	kvm->arch.float_int.nimm = 0;
1993 	spin_lock_init(&kvm->arch.float_int.lock);
1994 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
1995 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1996 	init_waitqueue_head(&kvm->arch.ipte_wq);
1997 	mutex_init(&kvm->arch.ipte_mutex);
1998 
1999 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2000 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
2001 
2002 	if (type & KVM_VM_S390_UCONTROL) {
2003 		kvm->arch.gmap = NULL;
2004 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2005 	} else {
2006 		if (sclp.hamax == U64_MAX)
2007 			kvm->arch.mem_limit = TASK_SIZE_MAX;
2008 		else
2009 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2010 						    sclp.hamax + 1);
2011 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2012 		if (!kvm->arch.gmap)
2013 			goto out_err;
2014 		kvm->arch.gmap->private = kvm;
2015 		kvm->arch.gmap->pfault_enabled = 0;
2016 	}
2017 
2018 	kvm->arch.css_support = 0;
2019 	kvm->arch.use_irqchip = 0;
2020 	kvm->arch.epoch = 0;
2021 
2022 	spin_lock_init(&kvm->arch.start_stop_lock);
2023 	kvm_s390_vsie_init(kvm);
2024 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2025 
2026 	return 0;
2027 out_err:
2028 	free_page((unsigned long)kvm->arch.sie_page2);
2029 	debug_unregister(kvm->arch.dbf);
2030 	sca_dispose(kvm);
2031 	KVM_EVENT(3, "creation of vm failed: %d", rc);
2032 	return rc;
2033 }
2034 
kvm_arch_has_vcpu_debugfs(void)2035 bool kvm_arch_has_vcpu_debugfs(void)
2036 {
2037 	return false;
2038 }
2039 
kvm_arch_create_vcpu_debugfs(struct kvm_vcpu * vcpu)2040 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2041 {
2042 	return 0;
2043 }
2044 
kvm_arch_vcpu_destroy(struct kvm_vcpu * vcpu)2045 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2046 {
2047 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2048 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2049 	kvm_s390_clear_local_irqs(vcpu);
2050 	kvm_clear_async_pf_completion_queue(vcpu);
2051 	if (!kvm_is_ucontrol(vcpu->kvm))
2052 		sca_del_vcpu(vcpu);
2053 
2054 	if (kvm_is_ucontrol(vcpu->kvm))
2055 		gmap_remove(vcpu->arch.gmap);
2056 
2057 	if (vcpu->kvm->arch.use_cmma)
2058 		kvm_s390_vcpu_unsetup_cmma(vcpu);
2059 	free_page((unsigned long)(vcpu->arch.sie_block));
2060 
2061 	kvm_vcpu_uninit(vcpu);
2062 	kmem_cache_free(kvm_vcpu_cache, vcpu);
2063 }
2064 
kvm_free_vcpus(struct kvm * kvm)2065 static void kvm_free_vcpus(struct kvm *kvm)
2066 {
2067 	unsigned int i;
2068 	struct kvm_vcpu *vcpu;
2069 
2070 	kvm_for_each_vcpu(i, vcpu, kvm)
2071 		kvm_arch_vcpu_destroy(vcpu);
2072 
2073 	mutex_lock(&kvm->lock);
2074 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2075 		kvm->vcpus[i] = NULL;
2076 
2077 	atomic_set(&kvm->online_vcpus, 0);
2078 	mutex_unlock(&kvm->lock);
2079 }
2080 
kvm_arch_destroy_vm(struct kvm * kvm)2081 void kvm_arch_destroy_vm(struct kvm *kvm)
2082 {
2083 	kvm_free_vcpus(kvm);
2084 	sca_dispose(kvm);
2085 	debug_unregister(kvm->arch.dbf);
2086 	free_page((unsigned long)kvm->arch.sie_page2);
2087 	if (!kvm_is_ucontrol(kvm))
2088 		gmap_remove(kvm->arch.gmap);
2089 	kvm_s390_destroy_adapters(kvm);
2090 	kvm_s390_clear_float_irqs(kvm);
2091 	kvm_s390_vsie_destroy(kvm);
2092 	if (kvm->arch.migration_state) {
2093 		vfree(kvm->arch.migration_state->pgste_bitmap);
2094 		kfree(kvm->arch.migration_state);
2095 	}
2096 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2097 }
2098 
2099 /* Section: vcpu related */
__kvm_ucontrol_vcpu_init(struct kvm_vcpu * vcpu)2100 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2101 {
2102 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2103 	if (!vcpu->arch.gmap)
2104 		return -ENOMEM;
2105 	vcpu->arch.gmap->private = vcpu->kvm;
2106 
2107 	return 0;
2108 }
2109 
sca_del_vcpu(struct kvm_vcpu * vcpu)2110 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2111 {
2112 	if (!kvm_s390_use_sca_entries())
2113 		return;
2114 	read_lock(&vcpu->kvm->arch.sca_lock);
2115 	if (vcpu->kvm->arch.use_esca) {
2116 		struct esca_block *sca = vcpu->kvm->arch.sca;
2117 
2118 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2119 		sca->cpu[vcpu->vcpu_id].sda = 0;
2120 	} else {
2121 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2122 
2123 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2124 		sca->cpu[vcpu->vcpu_id].sda = 0;
2125 	}
2126 	read_unlock(&vcpu->kvm->arch.sca_lock);
2127 }
2128 
sca_add_vcpu(struct kvm_vcpu * vcpu)2129 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2130 {
2131 	if (!kvm_s390_use_sca_entries()) {
2132 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2133 
2134 		/* we still need the basic sca for the ipte control */
2135 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2136 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2137 		return;
2138 	}
2139 	read_lock(&vcpu->kvm->arch.sca_lock);
2140 	if (vcpu->kvm->arch.use_esca) {
2141 		struct esca_block *sca = vcpu->kvm->arch.sca;
2142 
2143 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2144 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2145 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2146 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2147 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2148 	} else {
2149 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2150 
2151 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2152 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2153 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2154 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2155 	}
2156 	read_unlock(&vcpu->kvm->arch.sca_lock);
2157 }
2158 
2159 /* Basic SCA to Extended SCA data copy routines */
sca_copy_entry(struct esca_entry * d,struct bsca_entry * s)2160 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2161 {
2162 	d->sda = s->sda;
2163 	d->sigp_ctrl.c = s->sigp_ctrl.c;
2164 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2165 }
2166 
sca_copy_b_to_e(struct esca_block * d,struct bsca_block * s)2167 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2168 {
2169 	int i;
2170 
2171 	d->ipte_control = s->ipte_control;
2172 	d->mcn[0] = s->mcn;
2173 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2174 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2175 }
2176 
sca_switch_to_extended(struct kvm * kvm)2177 static int sca_switch_to_extended(struct kvm *kvm)
2178 {
2179 	struct bsca_block *old_sca = kvm->arch.sca;
2180 	struct esca_block *new_sca;
2181 	struct kvm_vcpu *vcpu;
2182 	unsigned int vcpu_idx;
2183 	u32 scaol, scaoh;
2184 
2185 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2186 	if (!new_sca)
2187 		return -ENOMEM;
2188 
2189 	scaoh = (u32)((u64)(new_sca) >> 32);
2190 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
2191 
2192 	kvm_s390_vcpu_block_all(kvm);
2193 	write_lock(&kvm->arch.sca_lock);
2194 
2195 	sca_copy_b_to_e(new_sca, old_sca);
2196 
2197 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2198 		vcpu->arch.sie_block->scaoh = scaoh;
2199 		vcpu->arch.sie_block->scaol = scaol;
2200 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2201 	}
2202 	kvm->arch.sca = new_sca;
2203 	kvm->arch.use_esca = 1;
2204 
2205 	write_unlock(&kvm->arch.sca_lock);
2206 	kvm_s390_vcpu_unblock_all(kvm);
2207 
2208 	free_page((unsigned long)old_sca);
2209 
2210 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2211 		 old_sca, kvm->arch.sca);
2212 	return 0;
2213 }
2214 
sca_can_add_vcpu(struct kvm * kvm,unsigned int id)2215 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2216 {
2217 	int rc;
2218 
2219 	if (!kvm_s390_use_sca_entries()) {
2220 		if (id < KVM_MAX_VCPUS)
2221 			return true;
2222 		return false;
2223 	}
2224 	if (id < KVM_S390_BSCA_CPU_SLOTS)
2225 		return true;
2226 	if (!sclp.has_esca || !sclp.has_64bscao)
2227 		return false;
2228 
2229 	mutex_lock(&kvm->lock);
2230 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2231 	mutex_unlock(&kvm->lock);
2232 
2233 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2234 }
2235 
kvm_arch_vcpu_init(struct kvm_vcpu * vcpu)2236 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2237 {
2238 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2239 	kvm_clear_async_pf_completion_queue(vcpu);
2240 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2241 				    KVM_SYNC_GPRS |
2242 				    KVM_SYNC_ACRS |
2243 				    KVM_SYNC_CRS |
2244 				    KVM_SYNC_ARCH0 |
2245 				    KVM_SYNC_PFAULT;
2246 	kvm_s390_set_prefix(vcpu, 0);
2247 	if (test_kvm_facility(vcpu->kvm, 64))
2248 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2249 	if (test_kvm_facility(vcpu->kvm, 82))
2250 		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2251 	if (test_kvm_facility(vcpu->kvm, 133))
2252 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2253 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
2254 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2255 	 */
2256 	if (MACHINE_HAS_VX)
2257 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2258 	else
2259 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2260 
2261 	if (kvm_is_ucontrol(vcpu->kvm))
2262 		return __kvm_ucontrol_vcpu_init(vcpu);
2263 
2264 	return 0;
2265 }
2266 
2267 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__start_cpu_timer_accounting(struct kvm_vcpu * vcpu)2268 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2269 {
2270 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2271 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2272 	vcpu->arch.cputm_start = get_tod_clock_fast();
2273 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2274 }
2275 
2276 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__stop_cpu_timer_accounting(struct kvm_vcpu * vcpu)2277 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2278 {
2279 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2280 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2281 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2282 	vcpu->arch.cputm_start = 0;
2283 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2284 }
2285 
2286 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__enable_cpu_timer_accounting(struct kvm_vcpu * vcpu)2287 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2288 {
2289 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2290 	vcpu->arch.cputm_enabled = true;
2291 	__start_cpu_timer_accounting(vcpu);
2292 }
2293 
2294 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__disable_cpu_timer_accounting(struct kvm_vcpu * vcpu)2295 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2296 {
2297 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2298 	__stop_cpu_timer_accounting(vcpu);
2299 	vcpu->arch.cputm_enabled = false;
2300 }
2301 
enable_cpu_timer_accounting(struct kvm_vcpu * vcpu)2302 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2303 {
2304 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2305 	__enable_cpu_timer_accounting(vcpu);
2306 	preempt_enable();
2307 }
2308 
disable_cpu_timer_accounting(struct kvm_vcpu * vcpu)2309 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2310 {
2311 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2312 	__disable_cpu_timer_accounting(vcpu);
2313 	preempt_enable();
2314 }
2315 
2316 /* set the cpu timer - may only be called from the VCPU thread itself */
kvm_s390_set_cpu_timer(struct kvm_vcpu * vcpu,__u64 cputm)2317 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2318 {
2319 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2320 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2321 	if (vcpu->arch.cputm_enabled)
2322 		vcpu->arch.cputm_start = get_tod_clock_fast();
2323 	vcpu->arch.sie_block->cputm = cputm;
2324 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2325 	preempt_enable();
2326 }
2327 
2328 /* update and get the cpu timer - can also be called from other VCPU threads */
kvm_s390_get_cpu_timer(struct kvm_vcpu * vcpu)2329 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2330 {
2331 	unsigned int seq;
2332 	__u64 value;
2333 
2334 	if (unlikely(!vcpu->arch.cputm_enabled))
2335 		return vcpu->arch.sie_block->cputm;
2336 
2337 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2338 	do {
2339 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2340 		/*
2341 		 * If the writer would ever execute a read in the critical
2342 		 * section, e.g. in irq context, we have a deadlock.
2343 		 */
2344 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2345 		value = vcpu->arch.sie_block->cputm;
2346 		/* if cputm_start is 0, accounting is being started/stopped */
2347 		if (likely(vcpu->arch.cputm_start))
2348 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2349 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2350 	preempt_enable();
2351 	return value;
2352 }
2353 
kvm_arch_vcpu_load(struct kvm_vcpu * vcpu,int cpu)2354 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2355 {
2356 
2357 	gmap_enable(vcpu->arch.enabled_gmap);
2358 	atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2359 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2360 		__start_cpu_timer_accounting(vcpu);
2361 	vcpu->cpu = cpu;
2362 }
2363 
kvm_arch_vcpu_put(struct kvm_vcpu * vcpu)2364 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2365 {
2366 	vcpu->cpu = -1;
2367 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2368 		__stop_cpu_timer_accounting(vcpu);
2369 	atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2370 	vcpu->arch.enabled_gmap = gmap_get_enabled();
2371 	gmap_disable(vcpu->arch.enabled_gmap);
2372 
2373 }
2374 
kvm_s390_vcpu_initial_reset(struct kvm_vcpu * vcpu)2375 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2376 {
2377 	/* this equals initial cpu reset in pop, but we don't switch to ESA */
2378 	vcpu->arch.sie_block->gpsw.mask = 0UL;
2379 	vcpu->arch.sie_block->gpsw.addr = 0UL;
2380 	kvm_s390_set_prefix(vcpu, 0);
2381 	kvm_s390_set_cpu_timer(vcpu, 0);
2382 	vcpu->arch.sie_block->ckc       = 0UL;
2383 	vcpu->arch.sie_block->todpr     = 0;
2384 	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2385 	vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
2386 	vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2387 	vcpu->run->s.regs.fpc = 0;
2388 	vcpu->arch.sie_block->gbea = 1;
2389 	vcpu->arch.sie_block->pp = 0;
2390 	vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2391 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2392 	kvm_clear_async_pf_completion_queue(vcpu);
2393 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2394 		kvm_s390_vcpu_stop(vcpu);
2395 	kvm_s390_clear_local_irqs(vcpu);
2396 }
2397 
kvm_arch_vcpu_postcreate(struct kvm_vcpu * vcpu)2398 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2399 {
2400 	mutex_lock(&vcpu->kvm->lock);
2401 	preempt_disable();
2402 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2403 	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2404 	preempt_enable();
2405 	mutex_unlock(&vcpu->kvm->lock);
2406 	if (!kvm_is_ucontrol(vcpu->kvm)) {
2407 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2408 		sca_add_vcpu(vcpu);
2409 	}
2410 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2411 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2412 	/* make vcpu_load load the right gmap on the first trigger */
2413 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2414 }
2415 
kvm_s390_vcpu_crypto_setup(struct kvm_vcpu * vcpu)2416 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2417 {
2418 	if (!test_kvm_facility(vcpu->kvm, 76))
2419 		return;
2420 
2421 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2422 
2423 	if (vcpu->kvm->arch.crypto.aes_kw)
2424 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2425 	if (vcpu->kvm->arch.crypto.dea_kw)
2426 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2427 
2428 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2429 }
2430 
kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu * vcpu)2431 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2432 {
2433 	free_page(vcpu->arch.sie_block->cbrlo);
2434 	vcpu->arch.sie_block->cbrlo = 0;
2435 }
2436 
kvm_s390_vcpu_setup_cmma(struct kvm_vcpu * vcpu)2437 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2438 {
2439 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2440 	if (!vcpu->arch.sie_block->cbrlo)
2441 		return -ENOMEM;
2442 
2443 	vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2444 	return 0;
2445 }
2446 
kvm_s390_vcpu_setup_model(struct kvm_vcpu * vcpu)2447 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2448 {
2449 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2450 
2451 	vcpu->arch.sie_block->ibc = model->ibc;
2452 	if (test_kvm_facility(vcpu->kvm, 7))
2453 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2454 }
2455 
kvm_arch_vcpu_setup(struct kvm_vcpu * vcpu)2456 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2457 {
2458 	int rc = 0;
2459 
2460 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2461 						    CPUSTAT_SM |
2462 						    CPUSTAT_STOPPED);
2463 
2464 	if (test_kvm_facility(vcpu->kvm, 78))
2465 		atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2466 	else if (test_kvm_facility(vcpu->kvm, 8))
2467 		atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2468 
2469 	kvm_s390_vcpu_setup_model(vcpu);
2470 
2471 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2472 	if (MACHINE_HAS_ESOP)
2473 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2474 	if (test_kvm_facility(vcpu->kvm, 9))
2475 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
2476 	if (test_kvm_facility(vcpu->kvm, 73))
2477 		vcpu->arch.sie_block->ecb |= ECB_TE;
2478 
2479 	if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2480 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2481 	if (test_kvm_facility(vcpu->kvm, 130))
2482 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2483 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2484 	if (sclp.has_cei)
2485 		vcpu->arch.sie_block->eca |= ECA_CEI;
2486 	if (sclp.has_ib)
2487 		vcpu->arch.sie_block->eca |= ECA_IB;
2488 	if (sclp.has_siif)
2489 		vcpu->arch.sie_block->eca |= ECA_SII;
2490 	if (sclp.has_sigpif)
2491 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
2492 	if (test_kvm_facility(vcpu->kvm, 129)) {
2493 		vcpu->arch.sie_block->eca |= ECA_VX;
2494 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2495 	}
2496 	if (test_kvm_facility(vcpu->kvm, 139))
2497 		vcpu->arch.sie_block->ecd |= ECD_MEF;
2498 
2499 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2500 					| SDNXC;
2501 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2502 
2503 	if (sclp.has_kss)
2504 		atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2505 	else
2506 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2507 
2508 	if (vcpu->kvm->arch.use_cmma) {
2509 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
2510 		if (rc)
2511 			return rc;
2512 	}
2513 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2514 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2515 
2516 	kvm_s390_vcpu_crypto_setup(vcpu);
2517 
2518 	return rc;
2519 }
2520 
kvm_arch_vcpu_create(struct kvm * kvm,unsigned int id)2521 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2522 				      unsigned int id)
2523 {
2524 	struct kvm_vcpu *vcpu;
2525 	struct sie_page *sie_page;
2526 	int rc = -EINVAL;
2527 
2528 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2529 		goto out;
2530 
2531 	rc = -ENOMEM;
2532 
2533 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2534 	if (!vcpu)
2535 		goto out;
2536 
2537 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2538 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2539 	if (!sie_page)
2540 		goto out_free_cpu;
2541 
2542 	vcpu->arch.sie_block = &sie_page->sie_block;
2543 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2544 
2545 	/* the real guest size will always be smaller than msl */
2546 	vcpu->arch.sie_block->mso = 0;
2547 	vcpu->arch.sie_block->msl = sclp.hamax;
2548 
2549 	vcpu->arch.sie_block->icpua = id;
2550 	spin_lock_init(&vcpu->arch.local_int.lock);
2551 	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2552 	vcpu->arch.local_int.wq = &vcpu->wq;
2553 	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2554 	seqcount_init(&vcpu->arch.cputm_seqcount);
2555 
2556 	rc = kvm_vcpu_init(vcpu, kvm, id);
2557 	if (rc)
2558 		goto out_free_sie_block;
2559 	VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2560 		 vcpu->arch.sie_block);
2561 	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2562 
2563 	return vcpu;
2564 out_free_sie_block:
2565 	free_page((unsigned long)(vcpu->arch.sie_block));
2566 out_free_cpu:
2567 	kmem_cache_free(kvm_vcpu_cache, vcpu);
2568 out:
2569 	return ERR_PTR(rc);
2570 }
2571 
kvm_arch_vcpu_runnable(struct kvm_vcpu * vcpu)2572 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2573 {
2574 	return kvm_s390_vcpu_has_irq(vcpu, 0);
2575 }
2576 
kvm_arch_vcpu_in_kernel(struct kvm_vcpu * vcpu)2577 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2578 {
2579 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2580 }
2581 
kvm_s390_vcpu_block(struct kvm_vcpu * vcpu)2582 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2583 {
2584 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2585 	exit_sie(vcpu);
2586 }
2587 
kvm_s390_vcpu_unblock(struct kvm_vcpu * vcpu)2588 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2589 {
2590 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2591 }
2592 
kvm_s390_vcpu_request(struct kvm_vcpu * vcpu)2593 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2594 {
2595 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2596 	exit_sie(vcpu);
2597 }
2598 
kvm_s390_vcpu_request_handled(struct kvm_vcpu * vcpu)2599 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2600 {
2601 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2602 }
2603 
2604 /*
2605  * Kick a guest cpu out of SIE and wait until SIE is not running.
2606  * If the CPU is not running (e.g. waiting as idle) the function will
2607  * return immediately. */
exit_sie(struct kvm_vcpu * vcpu)2608 void exit_sie(struct kvm_vcpu *vcpu)
2609 {
2610 	atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2611 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2612 		cpu_relax();
2613 }
2614 
2615 /* Kick a guest cpu out of SIE to process a request synchronously */
kvm_s390_sync_request(int req,struct kvm_vcpu * vcpu)2616 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2617 {
2618 	kvm_make_request(req, vcpu);
2619 	kvm_s390_vcpu_request(vcpu);
2620 }
2621 
kvm_gmap_notifier(struct gmap * gmap,unsigned long start,unsigned long end)2622 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2623 			      unsigned long end)
2624 {
2625 	struct kvm *kvm = gmap->private;
2626 	struct kvm_vcpu *vcpu;
2627 	unsigned long prefix;
2628 	int i;
2629 
2630 	if (gmap_is_shadow(gmap))
2631 		return;
2632 	if (start >= 1UL << 31)
2633 		/* We are only interested in prefix pages */
2634 		return;
2635 	kvm_for_each_vcpu(i, vcpu, kvm) {
2636 		/* match against both prefix pages */
2637 		prefix = kvm_s390_get_prefix(vcpu);
2638 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2639 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2640 				   start, end);
2641 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2642 		}
2643 	}
2644 }
2645 
kvm_arch_vcpu_should_kick(struct kvm_vcpu * vcpu)2646 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2647 {
2648 	/* kvm common code refers to this, but never calls it */
2649 	BUG();
2650 	return 0;
2651 }
2652 
kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu * vcpu,struct kvm_one_reg * reg)2653 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2654 					   struct kvm_one_reg *reg)
2655 {
2656 	int r = -EINVAL;
2657 
2658 	switch (reg->id) {
2659 	case KVM_REG_S390_TODPR:
2660 		r = put_user(vcpu->arch.sie_block->todpr,
2661 			     (u32 __user *)reg->addr);
2662 		break;
2663 	case KVM_REG_S390_EPOCHDIFF:
2664 		r = put_user(vcpu->arch.sie_block->epoch,
2665 			     (u64 __user *)reg->addr);
2666 		break;
2667 	case KVM_REG_S390_CPU_TIMER:
2668 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
2669 			     (u64 __user *)reg->addr);
2670 		break;
2671 	case KVM_REG_S390_CLOCK_COMP:
2672 		r = put_user(vcpu->arch.sie_block->ckc,
2673 			     (u64 __user *)reg->addr);
2674 		break;
2675 	case KVM_REG_S390_PFTOKEN:
2676 		r = put_user(vcpu->arch.pfault_token,
2677 			     (u64 __user *)reg->addr);
2678 		break;
2679 	case KVM_REG_S390_PFCOMPARE:
2680 		r = put_user(vcpu->arch.pfault_compare,
2681 			     (u64 __user *)reg->addr);
2682 		break;
2683 	case KVM_REG_S390_PFSELECT:
2684 		r = put_user(vcpu->arch.pfault_select,
2685 			     (u64 __user *)reg->addr);
2686 		break;
2687 	case KVM_REG_S390_PP:
2688 		r = put_user(vcpu->arch.sie_block->pp,
2689 			     (u64 __user *)reg->addr);
2690 		break;
2691 	case KVM_REG_S390_GBEA:
2692 		r = put_user(vcpu->arch.sie_block->gbea,
2693 			     (u64 __user *)reg->addr);
2694 		break;
2695 	default:
2696 		break;
2697 	}
2698 
2699 	return r;
2700 }
2701 
kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu * vcpu,struct kvm_one_reg * reg)2702 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2703 					   struct kvm_one_reg *reg)
2704 {
2705 	int r = -EINVAL;
2706 	__u64 val;
2707 
2708 	switch (reg->id) {
2709 	case KVM_REG_S390_TODPR:
2710 		r = get_user(vcpu->arch.sie_block->todpr,
2711 			     (u32 __user *)reg->addr);
2712 		break;
2713 	case KVM_REG_S390_EPOCHDIFF:
2714 		r = get_user(vcpu->arch.sie_block->epoch,
2715 			     (u64 __user *)reg->addr);
2716 		break;
2717 	case KVM_REG_S390_CPU_TIMER:
2718 		r = get_user(val, (u64 __user *)reg->addr);
2719 		if (!r)
2720 			kvm_s390_set_cpu_timer(vcpu, val);
2721 		break;
2722 	case KVM_REG_S390_CLOCK_COMP:
2723 		r = get_user(vcpu->arch.sie_block->ckc,
2724 			     (u64 __user *)reg->addr);
2725 		break;
2726 	case KVM_REG_S390_PFTOKEN:
2727 		r = get_user(vcpu->arch.pfault_token,
2728 			     (u64 __user *)reg->addr);
2729 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2730 			kvm_clear_async_pf_completion_queue(vcpu);
2731 		break;
2732 	case KVM_REG_S390_PFCOMPARE:
2733 		r = get_user(vcpu->arch.pfault_compare,
2734 			     (u64 __user *)reg->addr);
2735 		break;
2736 	case KVM_REG_S390_PFSELECT:
2737 		r = get_user(vcpu->arch.pfault_select,
2738 			     (u64 __user *)reg->addr);
2739 		break;
2740 	case KVM_REG_S390_PP:
2741 		r = get_user(vcpu->arch.sie_block->pp,
2742 			     (u64 __user *)reg->addr);
2743 		break;
2744 	case KVM_REG_S390_GBEA:
2745 		r = get_user(vcpu->arch.sie_block->gbea,
2746 			     (u64 __user *)reg->addr);
2747 		break;
2748 	default:
2749 		break;
2750 	}
2751 
2752 	return r;
2753 }
2754 
kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu * vcpu)2755 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2756 {
2757 	kvm_s390_vcpu_initial_reset(vcpu);
2758 	return 0;
2759 }
2760 
kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu * vcpu,struct kvm_regs * regs)2761 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2762 {
2763 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2764 	return 0;
2765 }
2766 
kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu * vcpu,struct kvm_regs * regs)2767 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2768 {
2769 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2770 	return 0;
2771 }
2772 
kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu * vcpu,struct kvm_sregs * sregs)2773 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2774 				  struct kvm_sregs *sregs)
2775 {
2776 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2777 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2778 	return 0;
2779 }
2780 
kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu * vcpu,struct kvm_sregs * sregs)2781 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2782 				  struct kvm_sregs *sregs)
2783 {
2784 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2785 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2786 	return 0;
2787 }
2788 
kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu * vcpu,struct kvm_fpu * fpu)2789 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2790 {
2791 	if (test_fp_ctl(fpu->fpc))
2792 		return -EINVAL;
2793 	vcpu->run->s.regs.fpc = fpu->fpc;
2794 	if (MACHINE_HAS_VX)
2795 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2796 				 (freg_t *) fpu->fprs);
2797 	else
2798 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2799 	return 0;
2800 }
2801 
kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu * vcpu,struct kvm_fpu * fpu)2802 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2803 {
2804 	/* make sure we have the latest values */
2805 	save_fpu_regs();
2806 	if (MACHINE_HAS_VX)
2807 		convert_vx_to_fp((freg_t *) fpu->fprs,
2808 				 (__vector128 *) vcpu->run->s.regs.vrs);
2809 	else
2810 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2811 	fpu->fpc = vcpu->run->s.regs.fpc;
2812 	return 0;
2813 }
2814 
kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu * vcpu,psw_t psw)2815 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2816 {
2817 	int rc = 0;
2818 
2819 	if (!is_vcpu_stopped(vcpu))
2820 		rc = -EBUSY;
2821 	else {
2822 		vcpu->run->psw_mask = psw.mask;
2823 		vcpu->run->psw_addr = psw.addr;
2824 	}
2825 	return rc;
2826 }
2827 
kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu * vcpu,struct kvm_translation * tr)2828 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2829 				  struct kvm_translation *tr)
2830 {
2831 	return -EINVAL; /* not implemented yet */
2832 }
2833 
2834 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2835 			      KVM_GUESTDBG_USE_HW_BP | \
2836 			      KVM_GUESTDBG_ENABLE)
2837 
kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu * vcpu,struct kvm_guest_debug * dbg)2838 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2839 					struct kvm_guest_debug *dbg)
2840 {
2841 	int rc = 0;
2842 
2843 	vcpu->guest_debug = 0;
2844 	kvm_s390_clear_bp_data(vcpu);
2845 
2846 	if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2847 		return -EINVAL;
2848 	if (!sclp.has_gpere)
2849 		return -EINVAL;
2850 
2851 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
2852 		vcpu->guest_debug = dbg->control;
2853 		/* enforce guest PER */
2854 		atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2855 
2856 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2857 			rc = kvm_s390_import_bp_data(vcpu, dbg);
2858 	} else {
2859 		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2860 		vcpu->arch.guestdbg.last_bp = 0;
2861 	}
2862 
2863 	if (rc) {
2864 		vcpu->guest_debug = 0;
2865 		kvm_s390_clear_bp_data(vcpu);
2866 		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2867 	}
2868 
2869 	return rc;
2870 }
2871 
kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu * vcpu,struct kvm_mp_state * mp_state)2872 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2873 				    struct kvm_mp_state *mp_state)
2874 {
2875 	/* CHECK_STOP and LOAD are not supported yet */
2876 	return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2877 				       KVM_MP_STATE_OPERATING;
2878 }
2879 
kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu * vcpu,struct kvm_mp_state * mp_state)2880 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2881 				    struct kvm_mp_state *mp_state)
2882 {
2883 	int rc = 0;
2884 
2885 	/* user space knows about this interface - let it control the state */
2886 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2887 
2888 	switch (mp_state->mp_state) {
2889 	case KVM_MP_STATE_STOPPED:
2890 		kvm_s390_vcpu_stop(vcpu);
2891 		break;
2892 	case KVM_MP_STATE_OPERATING:
2893 		kvm_s390_vcpu_start(vcpu);
2894 		break;
2895 	case KVM_MP_STATE_LOAD:
2896 	case KVM_MP_STATE_CHECK_STOP:
2897 		/* fall through - CHECK_STOP and LOAD are not supported yet */
2898 	default:
2899 		rc = -ENXIO;
2900 	}
2901 
2902 	return rc;
2903 }
2904 
ibs_enabled(struct kvm_vcpu * vcpu)2905 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2906 {
2907 	return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2908 }
2909 
kvm_s390_handle_requests(struct kvm_vcpu * vcpu)2910 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2911 {
2912 retry:
2913 	kvm_s390_vcpu_request_handled(vcpu);
2914 	if (!kvm_request_pending(vcpu))
2915 		return 0;
2916 	/*
2917 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2918 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2919 	 * This ensures that the ipte instruction for this request has
2920 	 * already finished. We might race against a second unmapper that
2921 	 * wants to set the blocking bit. Lets just retry the request loop.
2922 	 */
2923 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2924 		int rc;
2925 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
2926 					  kvm_s390_get_prefix(vcpu),
2927 					  PAGE_SIZE * 2, PROT_WRITE);
2928 		if (rc) {
2929 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2930 			return rc;
2931 		}
2932 		goto retry;
2933 	}
2934 
2935 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2936 		vcpu->arch.sie_block->ihcpu = 0xffff;
2937 		goto retry;
2938 	}
2939 
2940 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2941 		if (!ibs_enabled(vcpu)) {
2942 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2943 			atomic_or(CPUSTAT_IBS,
2944 					&vcpu->arch.sie_block->cpuflags);
2945 		}
2946 		goto retry;
2947 	}
2948 
2949 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2950 		if (ibs_enabled(vcpu)) {
2951 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2952 			atomic_andnot(CPUSTAT_IBS,
2953 					  &vcpu->arch.sie_block->cpuflags);
2954 		}
2955 		goto retry;
2956 	}
2957 
2958 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2959 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2960 		goto retry;
2961 	}
2962 
2963 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
2964 		/*
2965 		 * Disable CMMA virtualization; we will emulate the ESSA
2966 		 * instruction manually, in order to provide additional
2967 		 * functionalities needed for live migration.
2968 		 */
2969 		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
2970 		goto retry;
2971 	}
2972 
2973 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
2974 		/*
2975 		 * Re-enable CMMA virtualization if CMMA is available and
2976 		 * was used.
2977 		 */
2978 		if ((vcpu->kvm->arch.use_cmma) &&
2979 		    (vcpu->kvm->mm->context.use_cmma))
2980 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
2981 		goto retry;
2982 	}
2983 
2984 	/* nothing to do, just clear the request */
2985 	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2986 
2987 	return 0;
2988 }
2989 
kvm_s390_set_tod_clock(struct kvm * kvm,const struct kvm_s390_vm_tod_clock * gtod)2990 void kvm_s390_set_tod_clock(struct kvm *kvm,
2991 			    const struct kvm_s390_vm_tod_clock *gtod)
2992 {
2993 	struct kvm_vcpu *vcpu;
2994 	struct kvm_s390_tod_clock_ext htod;
2995 	int i;
2996 
2997 	mutex_lock(&kvm->lock);
2998 	preempt_disable();
2999 
3000 	get_tod_clock_ext((char *)&htod);
3001 
3002 	kvm->arch.epoch = gtod->tod - htod.tod;
3003 	kvm->arch.epdx = 0;
3004 	if (test_kvm_facility(kvm, 139)) {
3005 		kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3006 		if (kvm->arch.epoch > gtod->tod)
3007 			kvm->arch.epdx -= 1;
3008 	}
3009 
3010 	kvm_s390_vcpu_block_all(kvm);
3011 	kvm_for_each_vcpu(i, vcpu, kvm) {
3012 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3013 		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3014 	}
3015 
3016 	kvm_s390_vcpu_unblock_all(kvm);
3017 	preempt_enable();
3018 	mutex_unlock(&kvm->lock);
3019 }
3020 
3021 /**
3022  * kvm_arch_fault_in_page - fault-in guest page if necessary
3023  * @vcpu: The corresponding virtual cpu
3024  * @gpa: Guest physical address
3025  * @writable: Whether the page should be writable or not
3026  *
3027  * Make sure that a guest page has been faulted-in on the host.
3028  *
3029  * Return: Zero on success, negative error code otherwise.
3030  */
kvm_arch_fault_in_page(struct kvm_vcpu * vcpu,gpa_t gpa,int writable)3031 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3032 {
3033 	return gmap_fault(vcpu->arch.gmap, gpa,
3034 			  writable ? FAULT_FLAG_WRITE : 0);
3035 }
3036 
__kvm_inject_pfault_token(struct kvm_vcpu * vcpu,bool start_token,unsigned long token)3037 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3038 				      unsigned long token)
3039 {
3040 	struct kvm_s390_interrupt inti;
3041 	struct kvm_s390_irq irq;
3042 
3043 	if (start_token) {
3044 		irq.u.ext.ext_params2 = token;
3045 		irq.type = KVM_S390_INT_PFAULT_INIT;
3046 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3047 	} else {
3048 		inti.type = KVM_S390_INT_PFAULT_DONE;
3049 		inti.parm64 = token;
3050 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3051 	}
3052 }
3053 
kvm_arch_async_page_not_present(struct kvm_vcpu * vcpu,struct kvm_async_pf * work)3054 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3055 				     struct kvm_async_pf *work)
3056 {
3057 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3058 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3059 }
3060 
kvm_arch_async_page_present(struct kvm_vcpu * vcpu,struct kvm_async_pf * work)3061 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3062 				 struct kvm_async_pf *work)
3063 {
3064 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3065 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3066 }
3067 
kvm_arch_async_page_ready(struct kvm_vcpu * vcpu,struct kvm_async_pf * work)3068 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3069 			       struct kvm_async_pf *work)
3070 {
3071 	/* s390 will always inject the page directly */
3072 }
3073 
kvm_arch_can_inject_async_page_present(struct kvm_vcpu * vcpu)3074 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3075 {
3076 	/*
3077 	 * s390 will always inject the page directly,
3078 	 * but we still want check_async_completion to cleanup
3079 	 */
3080 	return true;
3081 }
3082 
kvm_arch_setup_async_pf(struct kvm_vcpu * vcpu)3083 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3084 {
3085 	hva_t hva;
3086 	struct kvm_arch_async_pf arch;
3087 	int rc;
3088 
3089 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3090 		return 0;
3091 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3092 	    vcpu->arch.pfault_compare)
3093 		return 0;
3094 	if (psw_extint_disabled(vcpu))
3095 		return 0;
3096 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
3097 		return 0;
3098 	if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
3099 		return 0;
3100 	if (!vcpu->arch.gmap->pfault_enabled)
3101 		return 0;
3102 
3103 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3104 	hva += current->thread.gmap_addr & ~PAGE_MASK;
3105 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3106 		return 0;
3107 
3108 	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3109 	return rc;
3110 }
3111 
vcpu_pre_run(struct kvm_vcpu * vcpu)3112 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3113 {
3114 	int rc, cpuflags;
3115 
3116 	/*
3117 	 * On s390 notifications for arriving pages will be delivered directly
3118 	 * to the guest but the house keeping for completed pfaults is
3119 	 * handled outside the worker.
3120 	 */
3121 	kvm_check_async_pf_completion(vcpu);
3122 
3123 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3124 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3125 
3126 	if (need_resched())
3127 		schedule();
3128 
3129 	if (test_cpu_flag(CIF_MCCK_PENDING))
3130 		s390_handle_mcck();
3131 
3132 	if (!kvm_is_ucontrol(vcpu->kvm)) {
3133 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
3134 		if (rc)
3135 			return rc;
3136 	}
3137 
3138 	rc = kvm_s390_handle_requests(vcpu);
3139 	if (rc)
3140 		return rc;
3141 
3142 	if (guestdbg_enabled(vcpu)) {
3143 		kvm_s390_backup_guest_per_regs(vcpu);
3144 		kvm_s390_patch_guest_per_regs(vcpu);
3145 	}
3146 
3147 	vcpu->arch.sie_block->icptcode = 0;
3148 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3149 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3150 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
3151 
3152 	return 0;
3153 }
3154 
vcpu_post_run_fault_in_sie(struct kvm_vcpu * vcpu)3155 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3156 {
3157 	struct kvm_s390_pgm_info pgm_info = {
3158 		.code = PGM_ADDRESSING,
3159 	};
3160 	u8 opcode, ilen;
3161 	int rc;
3162 
3163 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3164 	trace_kvm_s390_sie_fault(vcpu);
3165 
3166 	/*
3167 	 * We want to inject an addressing exception, which is defined as a
3168 	 * suppressing or terminating exception. However, since we came here
3169 	 * by a DAT access exception, the PSW still points to the faulting
3170 	 * instruction since DAT exceptions are nullifying. So we've got
3171 	 * to look up the current opcode to get the length of the instruction
3172 	 * to be able to forward the PSW.
3173 	 */
3174 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3175 	ilen = insn_length(opcode);
3176 	if (rc < 0) {
3177 		return rc;
3178 	} else if (rc) {
3179 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
3180 		 * Forward by arbitrary ilc, injection will take care of
3181 		 * nullification if necessary.
3182 		 */
3183 		pgm_info = vcpu->arch.pgm;
3184 		ilen = 4;
3185 	}
3186 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3187 	kvm_s390_forward_psw(vcpu, ilen);
3188 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3189 }
3190 
vcpu_post_run(struct kvm_vcpu * vcpu,int exit_reason)3191 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3192 {
3193 	struct mcck_volatile_info *mcck_info;
3194 	struct sie_page *sie_page;
3195 
3196 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3197 		   vcpu->arch.sie_block->icptcode);
3198 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3199 
3200 	if (guestdbg_enabled(vcpu))
3201 		kvm_s390_restore_guest_per_regs(vcpu);
3202 
3203 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3204 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3205 
3206 	if (exit_reason == -EINTR) {
3207 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
3208 		sie_page = container_of(vcpu->arch.sie_block,
3209 					struct sie_page, sie_block);
3210 		mcck_info = &sie_page->mcck_info;
3211 		kvm_s390_reinject_machine_check(vcpu, mcck_info);
3212 		return 0;
3213 	}
3214 
3215 	if (vcpu->arch.sie_block->icptcode > 0) {
3216 		int rc = kvm_handle_sie_intercept(vcpu);
3217 
3218 		if (rc != -EOPNOTSUPP)
3219 			return rc;
3220 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3221 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3222 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3223 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3224 		return -EREMOTE;
3225 	} else if (exit_reason != -EFAULT) {
3226 		vcpu->stat.exit_null++;
3227 		return 0;
3228 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
3229 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3230 		vcpu->run->s390_ucontrol.trans_exc_code =
3231 						current->thread.gmap_addr;
3232 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
3233 		return -EREMOTE;
3234 	} else if (current->thread.gmap_pfault) {
3235 		trace_kvm_s390_major_guest_pfault(vcpu);
3236 		current->thread.gmap_pfault = 0;
3237 		if (kvm_arch_setup_async_pf(vcpu))
3238 			return 0;
3239 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3240 	}
3241 	return vcpu_post_run_fault_in_sie(vcpu);
3242 }
3243 
__vcpu_run(struct kvm_vcpu * vcpu)3244 static int __vcpu_run(struct kvm_vcpu *vcpu)
3245 {
3246 	int rc, exit_reason;
3247 
3248 	/*
3249 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3250 	 * ning the guest), so that memslots (and other stuff) are protected
3251 	 */
3252 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3253 
3254 	do {
3255 		rc = vcpu_pre_run(vcpu);
3256 		if (rc)
3257 			break;
3258 
3259 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3260 		/*
3261 		 * As PF_VCPU will be used in fault handler, between
3262 		 * guest_enter and guest_exit should be no uaccess.
3263 		 */
3264 		local_irq_disable();
3265 		guest_enter_irqoff();
3266 		__disable_cpu_timer_accounting(vcpu);
3267 		local_irq_enable();
3268 		exit_reason = sie64a(vcpu->arch.sie_block,
3269 				     vcpu->run->s.regs.gprs);
3270 		local_irq_disable();
3271 		__enable_cpu_timer_accounting(vcpu);
3272 		guest_exit_irqoff();
3273 		local_irq_enable();
3274 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3275 
3276 		rc = vcpu_post_run(vcpu, exit_reason);
3277 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3278 
3279 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3280 	return rc;
3281 }
3282 
sync_regs(struct kvm_vcpu * vcpu,struct kvm_run * kvm_run)3283 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3284 {
3285 	struct runtime_instr_cb *riccb;
3286 	struct gs_cb *gscb;
3287 
3288 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3289 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3290 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3291 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3292 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3293 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3294 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3295 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3296 		/* some control register changes require a tlb flush */
3297 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3298 	}
3299 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3300 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3301 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3302 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3303 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3304 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3305 	}
3306 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3307 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3308 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3309 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3310 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3311 			kvm_clear_async_pf_completion_queue(vcpu);
3312 	}
3313 	/*
3314 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
3315 	 * we should enable RI here instead of doing the lazy enablement.
3316 	 */
3317 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3318 	    test_kvm_facility(vcpu->kvm, 64) &&
3319 	    riccb->valid &&
3320 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3321 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3322 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3323 	}
3324 	/*
3325 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
3326 	 * we should enable GS here instead of doing the lazy enablement.
3327 	 */
3328 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3329 	    test_kvm_facility(vcpu->kvm, 133) &&
3330 	    gscb->gssm &&
3331 	    !vcpu->arch.gs_enabled) {
3332 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3333 		vcpu->arch.sie_block->ecb |= ECB_GS;
3334 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3335 		vcpu->arch.gs_enabled = 1;
3336 	}
3337 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3338 	    test_kvm_facility(vcpu->kvm, 82)) {
3339 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3340 		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3341 	}
3342 	save_access_regs(vcpu->arch.host_acrs);
3343 	restore_access_regs(vcpu->run->s.regs.acrs);
3344 	/* save host (userspace) fprs/vrs */
3345 	save_fpu_regs();
3346 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3347 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3348 	if (MACHINE_HAS_VX)
3349 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3350 	else
3351 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3352 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3353 	if (test_fp_ctl(current->thread.fpu.fpc))
3354 		/* User space provided an invalid FPC, let's clear it */
3355 		current->thread.fpu.fpc = 0;
3356 	if (MACHINE_HAS_GS) {
3357 		preempt_disable();
3358 		__ctl_set_bit(2, 4);
3359 		if (current->thread.gs_cb) {
3360 			vcpu->arch.host_gscb = current->thread.gs_cb;
3361 			save_gs_cb(vcpu->arch.host_gscb);
3362 		}
3363 		if (vcpu->arch.gs_enabled) {
3364 			current->thread.gs_cb = (struct gs_cb *)
3365 						&vcpu->run->s.regs.gscb;
3366 			restore_gs_cb(current->thread.gs_cb);
3367 		}
3368 		preempt_enable();
3369 	}
3370 
3371 	kvm_run->kvm_dirty_regs = 0;
3372 }
3373 
store_regs(struct kvm_vcpu * vcpu,struct kvm_run * kvm_run)3374 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3375 {
3376 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3377 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3378 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3379 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3380 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3381 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3382 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3383 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3384 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3385 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3386 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3387 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3388 	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3389 	save_access_regs(vcpu->run->s.regs.acrs);
3390 	restore_access_regs(vcpu->arch.host_acrs);
3391 	/* Save guest register state */
3392 	save_fpu_regs();
3393 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3394 	/* Restore will be done lazily at return */
3395 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3396 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3397 	if (MACHINE_HAS_GS) {
3398 		__ctl_set_bit(2, 4);
3399 		if (vcpu->arch.gs_enabled)
3400 			save_gs_cb(current->thread.gs_cb);
3401 		preempt_disable();
3402 		current->thread.gs_cb = vcpu->arch.host_gscb;
3403 		restore_gs_cb(vcpu->arch.host_gscb);
3404 		preempt_enable();
3405 		if (!vcpu->arch.host_gscb)
3406 			__ctl_clear_bit(2, 4);
3407 		vcpu->arch.host_gscb = NULL;
3408 	}
3409 
3410 }
3411 
kvm_arch_vcpu_ioctl_run(struct kvm_vcpu * vcpu,struct kvm_run * kvm_run)3412 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3413 {
3414 	int rc;
3415 
3416 	if (kvm_run->immediate_exit)
3417 		return -EINTR;
3418 
3419 	if (guestdbg_exit_pending(vcpu)) {
3420 		kvm_s390_prepare_debug_exit(vcpu);
3421 		return 0;
3422 	}
3423 
3424 	kvm_sigset_activate(vcpu);
3425 
3426 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3427 		kvm_s390_vcpu_start(vcpu);
3428 	} else if (is_vcpu_stopped(vcpu)) {
3429 		pr_err_ratelimited("can't run stopped vcpu %d\n",
3430 				   vcpu->vcpu_id);
3431 		return -EINVAL;
3432 	}
3433 
3434 	sync_regs(vcpu, kvm_run);
3435 	enable_cpu_timer_accounting(vcpu);
3436 
3437 	might_fault();
3438 	rc = __vcpu_run(vcpu);
3439 
3440 	if (signal_pending(current) && !rc) {
3441 		kvm_run->exit_reason = KVM_EXIT_INTR;
3442 		rc = -EINTR;
3443 	}
3444 
3445 	if (guestdbg_exit_pending(vcpu) && !rc)  {
3446 		kvm_s390_prepare_debug_exit(vcpu);
3447 		rc = 0;
3448 	}
3449 
3450 	if (rc == -EREMOTE) {
3451 		/* userspace support is needed, kvm_run has been prepared */
3452 		rc = 0;
3453 	}
3454 
3455 	disable_cpu_timer_accounting(vcpu);
3456 	store_regs(vcpu, kvm_run);
3457 
3458 	kvm_sigset_deactivate(vcpu);
3459 
3460 	vcpu->stat.exit_userspace++;
3461 	return rc;
3462 }
3463 
3464 /*
3465  * store status at address
3466  * we use have two special cases:
3467  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3468  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3469  */
kvm_s390_store_status_unloaded(struct kvm_vcpu * vcpu,unsigned long gpa)3470 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3471 {
3472 	unsigned char archmode = 1;
3473 	freg_t fprs[NUM_FPRS];
3474 	unsigned int px;
3475 	u64 clkcomp, cputm;
3476 	int rc;
3477 
3478 	px = kvm_s390_get_prefix(vcpu);
3479 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3480 		if (write_guest_abs(vcpu, 163, &archmode, 1))
3481 			return -EFAULT;
3482 		gpa = 0;
3483 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3484 		if (write_guest_real(vcpu, 163, &archmode, 1))
3485 			return -EFAULT;
3486 		gpa = px;
3487 	} else
3488 		gpa -= __LC_FPREGS_SAVE_AREA;
3489 
3490 	/* manually convert vector registers if necessary */
3491 	if (MACHINE_HAS_VX) {
3492 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3493 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3494 				     fprs, 128);
3495 	} else {
3496 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3497 				     vcpu->run->s.regs.fprs, 128);
3498 	}
3499 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3500 			      vcpu->run->s.regs.gprs, 128);
3501 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3502 			      &vcpu->arch.sie_block->gpsw, 16);
3503 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3504 			      &px, 4);
3505 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3506 			      &vcpu->run->s.regs.fpc, 4);
3507 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3508 			      &vcpu->arch.sie_block->todpr, 4);
3509 	cputm = kvm_s390_get_cpu_timer(vcpu);
3510 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3511 			      &cputm, 8);
3512 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
3513 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3514 			      &clkcomp, 8);
3515 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3516 			      &vcpu->run->s.regs.acrs, 64);
3517 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3518 			      &vcpu->arch.sie_block->gcr, 128);
3519 	return rc ? -EFAULT : 0;
3520 }
3521 
kvm_s390_vcpu_store_status(struct kvm_vcpu * vcpu,unsigned long addr)3522 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3523 {
3524 	/*
3525 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3526 	 * switch in the run ioctl. Let's update our copies before we save
3527 	 * it into the save area
3528 	 */
3529 	save_fpu_regs();
3530 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3531 	save_access_regs(vcpu->run->s.regs.acrs);
3532 
3533 	return kvm_s390_store_status_unloaded(vcpu, addr);
3534 }
3535 
__disable_ibs_on_vcpu(struct kvm_vcpu * vcpu)3536 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3537 {
3538 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3539 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3540 }
3541 
__disable_ibs_on_all_vcpus(struct kvm * kvm)3542 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3543 {
3544 	unsigned int i;
3545 	struct kvm_vcpu *vcpu;
3546 
3547 	kvm_for_each_vcpu(i, vcpu, kvm) {
3548 		__disable_ibs_on_vcpu(vcpu);
3549 	}
3550 }
3551 
__enable_ibs_on_vcpu(struct kvm_vcpu * vcpu)3552 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3553 {
3554 	if (!sclp.has_ibs)
3555 		return;
3556 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3557 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3558 }
3559 
kvm_s390_vcpu_start(struct kvm_vcpu * vcpu)3560 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3561 {
3562 	int i, online_vcpus, started_vcpus = 0;
3563 
3564 	if (!is_vcpu_stopped(vcpu))
3565 		return;
3566 
3567 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3568 	/* Only one cpu at a time may enter/leave the STOPPED state. */
3569 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
3570 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3571 
3572 	for (i = 0; i < online_vcpus; i++) {
3573 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3574 			started_vcpus++;
3575 	}
3576 
3577 	if (started_vcpus == 0) {
3578 		/* we're the only active VCPU -> speed it up */
3579 		__enable_ibs_on_vcpu(vcpu);
3580 	} else if (started_vcpus == 1) {
3581 		/*
3582 		 * As we are starting a second VCPU, we have to disable
3583 		 * the IBS facility on all VCPUs to remove potentially
3584 		 * oustanding ENABLE requests.
3585 		 */
3586 		__disable_ibs_on_all_vcpus(vcpu->kvm);
3587 	}
3588 
3589 	atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3590 	/*
3591 	 * Another VCPU might have used IBS while we were offline.
3592 	 * Let's play safe and flush the VCPU at startup.
3593 	 */
3594 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3595 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3596 	return;
3597 }
3598 
kvm_s390_vcpu_stop(struct kvm_vcpu * vcpu)3599 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3600 {
3601 	int i, online_vcpus, started_vcpus = 0;
3602 	struct kvm_vcpu *started_vcpu = NULL;
3603 
3604 	if (is_vcpu_stopped(vcpu))
3605 		return;
3606 
3607 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3608 	/* Only one cpu at a time may enter/leave the STOPPED state. */
3609 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
3610 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3611 
3612 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3613 	kvm_s390_clear_stop_irq(vcpu);
3614 
3615 	atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3616 	__disable_ibs_on_vcpu(vcpu);
3617 
3618 	for (i = 0; i < online_vcpus; i++) {
3619 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3620 			started_vcpus++;
3621 			started_vcpu = vcpu->kvm->vcpus[i];
3622 		}
3623 	}
3624 
3625 	if (started_vcpus == 1) {
3626 		/*
3627 		 * As we only have one VCPU left, we want to enable the
3628 		 * IBS facility for that VCPU to speed it up.
3629 		 */
3630 		__enable_ibs_on_vcpu(started_vcpu);
3631 	}
3632 
3633 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3634 	return;
3635 }
3636 
kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu * vcpu,struct kvm_enable_cap * cap)3637 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3638 				     struct kvm_enable_cap *cap)
3639 {
3640 	int r;
3641 
3642 	if (cap->flags)
3643 		return -EINVAL;
3644 
3645 	switch (cap->cap) {
3646 	case KVM_CAP_S390_CSS_SUPPORT:
3647 		if (!vcpu->kvm->arch.css_support) {
3648 			vcpu->kvm->arch.css_support = 1;
3649 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3650 			trace_kvm_s390_enable_css(vcpu->kvm);
3651 		}
3652 		r = 0;
3653 		break;
3654 	default:
3655 		r = -EINVAL;
3656 		break;
3657 	}
3658 	return r;
3659 }
3660 
kvm_s390_guest_mem_op(struct kvm_vcpu * vcpu,struct kvm_s390_mem_op * mop)3661 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3662 				  struct kvm_s390_mem_op *mop)
3663 {
3664 	void __user *uaddr = (void __user *)mop->buf;
3665 	void *tmpbuf = NULL;
3666 	int r, srcu_idx;
3667 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3668 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
3669 
3670 	if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
3671 		return -EINVAL;
3672 
3673 	if (mop->size > MEM_OP_MAX_SIZE)
3674 		return -E2BIG;
3675 
3676 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3677 		tmpbuf = vmalloc(mop->size);
3678 		if (!tmpbuf)
3679 			return -ENOMEM;
3680 	}
3681 
3682 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3683 
3684 	switch (mop->op) {
3685 	case KVM_S390_MEMOP_LOGICAL_READ:
3686 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3687 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3688 					    mop->size, GACC_FETCH);
3689 			break;
3690 		}
3691 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3692 		if (r == 0) {
3693 			if (copy_to_user(uaddr, tmpbuf, mop->size))
3694 				r = -EFAULT;
3695 		}
3696 		break;
3697 	case KVM_S390_MEMOP_LOGICAL_WRITE:
3698 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3699 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3700 					    mop->size, GACC_STORE);
3701 			break;
3702 		}
3703 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3704 			r = -EFAULT;
3705 			break;
3706 		}
3707 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3708 		break;
3709 	default:
3710 		r = -EINVAL;
3711 	}
3712 
3713 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3714 
3715 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3716 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3717 
3718 	vfree(tmpbuf);
3719 	return r;
3720 }
3721 
kvm_arch_vcpu_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)3722 long kvm_arch_vcpu_ioctl(struct file *filp,
3723 			 unsigned int ioctl, unsigned long arg)
3724 {
3725 	struct kvm_vcpu *vcpu = filp->private_data;
3726 	void __user *argp = (void __user *)arg;
3727 	int idx;
3728 	long r;
3729 
3730 	switch (ioctl) {
3731 	case KVM_S390_IRQ: {
3732 		struct kvm_s390_irq s390irq;
3733 
3734 		r = -EFAULT;
3735 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3736 			break;
3737 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3738 		break;
3739 	}
3740 	case KVM_S390_INTERRUPT: {
3741 		struct kvm_s390_interrupt s390int;
3742 		struct kvm_s390_irq s390irq = {};
3743 
3744 		r = -EFAULT;
3745 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
3746 			break;
3747 		if (s390int_to_s390irq(&s390int, &s390irq))
3748 			return -EINVAL;
3749 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3750 		break;
3751 	}
3752 	case KVM_S390_STORE_STATUS:
3753 		idx = srcu_read_lock(&vcpu->kvm->srcu);
3754 		r = kvm_s390_store_status_unloaded(vcpu, arg);
3755 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
3756 		break;
3757 	case KVM_S390_SET_INITIAL_PSW: {
3758 		psw_t psw;
3759 
3760 		r = -EFAULT;
3761 		if (copy_from_user(&psw, argp, sizeof(psw)))
3762 			break;
3763 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3764 		break;
3765 	}
3766 	case KVM_S390_INITIAL_RESET:
3767 		r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3768 		break;
3769 	case KVM_SET_ONE_REG:
3770 	case KVM_GET_ONE_REG: {
3771 		struct kvm_one_reg reg;
3772 		r = -EFAULT;
3773 		if (copy_from_user(&reg, argp, sizeof(reg)))
3774 			break;
3775 		if (ioctl == KVM_SET_ONE_REG)
3776 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3777 		else
3778 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3779 		break;
3780 	}
3781 #ifdef CONFIG_KVM_S390_UCONTROL
3782 	case KVM_S390_UCAS_MAP: {
3783 		struct kvm_s390_ucas_mapping ucasmap;
3784 
3785 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3786 			r = -EFAULT;
3787 			break;
3788 		}
3789 
3790 		if (!kvm_is_ucontrol(vcpu->kvm)) {
3791 			r = -EINVAL;
3792 			break;
3793 		}
3794 
3795 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3796 				     ucasmap.vcpu_addr, ucasmap.length);
3797 		break;
3798 	}
3799 	case KVM_S390_UCAS_UNMAP: {
3800 		struct kvm_s390_ucas_mapping ucasmap;
3801 
3802 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3803 			r = -EFAULT;
3804 			break;
3805 		}
3806 
3807 		if (!kvm_is_ucontrol(vcpu->kvm)) {
3808 			r = -EINVAL;
3809 			break;
3810 		}
3811 
3812 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3813 			ucasmap.length);
3814 		break;
3815 	}
3816 #endif
3817 	case KVM_S390_VCPU_FAULT: {
3818 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
3819 		break;
3820 	}
3821 	case KVM_ENABLE_CAP:
3822 	{
3823 		struct kvm_enable_cap cap;
3824 		r = -EFAULT;
3825 		if (copy_from_user(&cap, argp, sizeof(cap)))
3826 			break;
3827 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3828 		break;
3829 	}
3830 	case KVM_S390_MEM_OP: {
3831 		struct kvm_s390_mem_op mem_op;
3832 
3833 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3834 			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3835 		else
3836 			r = -EFAULT;
3837 		break;
3838 	}
3839 	case KVM_S390_SET_IRQ_STATE: {
3840 		struct kvm_s390_irq_state irq_state;
3841 
3842 		r = -EFAULT;
3843 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3844 			break;
3845 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3846 		    irq_state.len == 0 ||
3847 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3848 			r = -EINVAL;
3849 			break;
3850 		}
3851 		r = kvm_s390_set_irq_state(vcpu,
3852 					   (void __user *) irq_state.buf,
3853 					   irq_state.len);
3854 		break;
3855 	}
3856 	case KVM_S390_GET_IRQ_STATE: {
3857 		struct kvm_s390_irq_state irq_state;
3858 
3859 		r = -EFAULT;
3860 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3861 			break;
3862 		if (irq_state.len == 0) {
3863 			r = -EINVAL;
3864 			break;
3865 		}
3866 		r = kvm_s390_get_irq_state(vcpu,
3867 					   (__u8 __user *)  irq_state.buf,
3868 					   irq_state.len);
3869 		break;
3870 	}
3871 	default:
3872 		r = -ENOTTY;
3873 	}
3874 	return r;
3875 }
3876 
kvm_arch_vcpu_fault(struct kvm_vcpu * vcpu,struct vm_fault * vmf)3877 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3878 {
3879 #ifdef CONFIG_KVM_S390_UCONTROL
3880 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3881 		 && (kvm_is_ucontrol(vcpu->kvm))) {
3882 		vmf->page = virt_to_page(vcpu->arch.sie_block);
3883 		get_page(vmf->page);
3884 		return 0;
3885 	}
3886 #endif
3887 	return VM_FAULT_SIGBUS;
3888 }
3889 
kvm_arch_create_memslot(struct kvm * kvm,struct kvm_memory_slot * slot,unsigned long npages)3890 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3891 			    unsigned long npages)
3892 {
3893 	return 0;
3894 }
3895 
3896 /* Section: memory related */
kvm_arch_prepare_memory_region(struct kvm * kvm,struct kvm_memory_slot * memslot,const struct kvm_userspace_memory_region * mem,enum kvm_mr_change change)3897 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3898 				   struct kvm_memory_slot *memslot,
3899 				   const struct kvm_userspace_memory_region *mem,
3900 				   enum kvm_mr_change change)
3901 {
3902 	/* A few sanity checks. We can have memory slots which have to be
3903 	   located/ended at a segment boundary (1MB). The memory in userland is
3904 	   ok to be fragmented into various different vmas. It is okay to mmap()
3905 	   and munmap() stuff in this slot after doing this call at any time */
3906 
3907 	if (mem->userspace_addr & 0xffffful)
3908 		return -EINVAL;
3909 
3910 	if (mem->memory_size & 0xffffful)
3911 		return -EINVAL;
3912 
3913 	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3914 		return -EINVAL;
3915 
3916 	return 0;
3917 }
3918 
kvm_arch_commit_memory_region(struct kvm * kvm,const struct kvm_userspace_memory_region * mem,const struct kvm_memory_slot * old,const struct kvm_memory_slot * new,enum kvm_mr_change change)3919 void kvm_arch_commit_memory_region(struct kvm *kvm,
3920 				const struct kvm_userspace_memory_region *mem,
3921 				const struct kvm_memory_slot *old,
3922 				const struct kvm_memory_slot *new,
3923 				enum kvm_mr_change change)
3924 {
3925 	int rc = 0;
3926 
3927 	switch (change) {
3928 	case KVM_MR_DELETE:
3929 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
3930 					old->npages * PAGE_SIZE);
3931 		break;
3932 	case KVM_MR_MOVE:
3933 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
3934 					old->npages * PAGE_SIZE);
3935 		if (rc)
3936 			break;
3937 		/* FALLTHROUGH */
3938 	case KVM_MR_CREATE:
3939 		rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3940 				      mem->guest_phys_addr, mem->memory_size);
3941 		break;
3942 	case KVM_MR_FLAGS_ONLY:
3943 		break;
3944 	default:
3945 		WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
3946 	}
3947 	if (rc)
3948 		pr_warn("failed to commit memory region\n");
3949 	return;
3950 }
3951 
nonhyp_mask(int i)3952 static inline unsigned long nonhyp_mask(int i)
3953 {
3954 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3955 
3956 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3957 }
3958 
kvm_arch_vcpu_block_finish(struct kvm_vcpu * vcpu)3959 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3960 {
3961 	vcpu->valid_wakeup = false;
3962 }
3963 
kvm_s390_init(void)3964 static int __init kvm_s390_init(void)
3965 {
3966 	int i;
3967 
3968 	if (!sclp.has_sief2) {
3969 		pr_info("SIE not available\n");
3970 		return -ENODEV;
3971 	}
3972 
3973 	for (i = 0; i < 16; i++)
3974 		kvm_s390_fac_list_mask[i] |=
3975 			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3976 
3977 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3978 }
3979 
kvm_s390_exit(void)3980 static void __exit kvm_s390_exit(void)
3981 {
3982 	kvm_exit();
3983 }
3984 
3985 module_init(kvm_s390_init);
3986 module_exit(kvm_s390_exit);
3987 
3988 /*
3989  * Enable autoloading of the kvm module.
3990  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3991  * since x86 takes a different approach.
3992  */
3993 #include <linux/miscdevice.h>
3994 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3995 MODULE_ALIAS("devname:kvm");
3996