1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * hosting IBM Z kernel virtual machines (s390x)
4 *
5 * Copyright IBM Corp. 2008, 2020
6 *
7 * Author(s): Carsten Otte <cotte@de.ibm.com>
8 * Christian Borntraeger <borntraeger@de.ibm.com>
9 * Heiko Carstens <heiko.carstens@de.ibm.com>
10 * Christian Ehrhardt <ehrhardt@de.ibm.com>
11 * Jason J. Herne <jjherne@us.ibm.com>
12 */
13
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
16
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 #include <linux/pgtable.h>
35
36 #include <asm/asm-offsets.h>
37 #include <asm/lowcore.h>
38 #include <asm/stp.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include <asm/ap.h>
47 #include <asm/uv.h>
48 #include "kvm-s390.h"
49 #include "gaccess.h"
50
51 #define CREATE_TRACE_POINTS
52 #include "trace.h"
53 #include "trace-s390.h"
54
55 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
56 #define LOCAL_IRQS 32
57 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
58 (KVM_MAX_VCPUS + LOCAL_IRQS))
59
60 struct kvm_stats_debugfs_item debugfs_entries[] = {
61 VCPU_STAT("userspace_handled", exit_userspace),
62 VCPU_STAT("exit_null", exit_null),
63 VCPU_STAT("exit_validity", exit_validity),
64 VCPU_STAT("exit_stop_request", exit_stop_request),
65 VCPU_STAT("exit_external_request", exit_external_request),
66 VCPU_STAT("exit_io_request", exit_io_request),
67 VCPU_STAT("exit_external_interrupt", exit_external_interrupt),
68 VCPU_STAT("exit_instruction", exit_instruction),
69 VCPU_STAT("exit_pei", exit_pei),
70 VCPU_STAT("exit_program_interruption", exit_program_interruption),
71 VCPU_STAT("exit_instr_and_program_int", exit_instr_and_program),
72 VCPU_STAT("exit_operation_exception", exit_operation_exception),
73 VCPU_STAT("halt_successful_poll", halt_successful_poll),
74 VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
75 VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
76 VCPU_STAT("halt_no_poll_steal", halt_no_poll_steal),
77 VCPU_STAT("halt_wakeup", halt_wakeup),
78 VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
79 VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
80 VCPU_STAT("instruction_lctlg", instruction_lctlg),
81 VCPU_STAT("instruction_lctl", instruction_lctl),
82 VCPU_STAT("instruction_stctl", instruction_stctl),
83 VCPU_STAT("instruction_stctg", instruction_stctg),
84 VCPU_STAT("deliver_ckc", deliver_ckc),
85 VCPU_STAT("deliver_cputm", deliver_cputm),
86 VCPU_STAT("deliver_emergency_signal", deliver_emergency_signal),
87 VCPU_STAT("deliver_external_call", deliver_external_call),
88 VCPU_STAT("deliver_service_signal", deliver_service_signal),
89 VCPU_STAT("deliver_virtio", deliver_virtio),
90 VCPU_STAT("deliver_stop_signal", deliver_stop_signal),
91 VCPU_STAT("deliver_prefix_signal", deliver_prefix_signal),
92 VCPU_STAT("deliver_restart_signal", deliver_restart_signal),
93 VCPU_STAT("deliver_program", deliver_program),
94 VCPU_STAT("deliver_io", deliver_io),
95 VCPU_STAT("deliver_machine_check", deliver_machine_check),
96 VCPU_STAT("exit_wait_state", exit_wait_state),
97 VCPU_STAT("inject_ckc", inject_ckc),
98 VCPU_STAT("inject_cputm", inject_cputm),
99 VCPU_STAT("inject_external_call", inject_external_call),
100 VM_STAT("inject_float_mchk", inject_float_mchk),
101 VCPU_STAT("inject_emergency_signal", inject_emergency_signal),
102 VM_STAT("inject_io", inject_io),
103 VCPU_STAT("inject_mchk", inject_mchk),
104 VM_STAT("inject_pfault_done", inject_pfault_done),
105 VCPU_STAT("inject_program", inject_program),
106 VCPU_STAT("inject_restart", inject_restart),
107 VM_STAT("inject_service_signal", inject_service_signal),
108 VCPU_STAT("inject_set_prefix", inject_set_prefix),
109 VCPU_STAT("inject_stop_signal", inject_stop_signal),
110 VCPU_STAT("inject_pfault_init", inject_pfault_init),
111 VM_STAT("inject_virtio", inject_virtio),
112 VCPU_STAT("instruction_epsw", instruction_epsw),
113 VCPU_STAT("instruction_gs", instruction_gs),
114 VCPU_STAT("instruction_io_other", instruction_io_other),
115 VCPU_STAT("instruction_lpsw", instruction_lpsw),
116 VCPU_STAT("instruction_lpswe", instruction_lpswe),
117 VCPU_STAT("instruction_pfmf", instruction_pfmf),
118 VCPU_STAT("instruction_ptff", instruction_ptff),
119 VCPU_STAT("instruction_stidp", instruction_stidp),
120 VCPU_STAT("instruction_sck", instruction_sck),
121 VCPU_STAT("instruction_sckpf", instruction_sckpf),
122 VCPU_STAT("instruction_spx", instruction_spx),
123 VCPU_STAT("instruction_stpx", instruction_stpx),
124 VCPU_STAT("instruction_stap", instruction_stap),
125 VCPU_STAT("instruction_iske", instruction_iske),
126 VCPU_STAT("instruction_ri", instruction_ri),
127 VCPU_STAT("instruction_rrbe", instruction_rrbe),
128 VCPU_STAT("instruction_sske", instruction_sske),
129 VCPU_STAT("instruction_ipte_interlock", instruction_ipte_interlock),
130 VCPU_STAT("instruction_essa", instruction_essa),
131 VCPU_STAT("instruction_stsi", instruction_stsi),
132 VCPU_STAT("instruction_stfl", instruction_stfl),
133 VCPU_STAT("instruction_tb", instruction_tb),
134 VCPU_STAT("instruction_tpi", instruction_tpi),
135 VCPU_STAT("instruction_tprot", instruction_tprot),
136 VCPU_STAT("instruction_tsch", instruction_tsch),
137 VCPU_STAT("instruction_sthyi", instruction_sthyi),
138 VCPU_STAT("instruction_sie", instruction_sie),
139 VCPU_STAT("instruction_sigp_sense", instruction_sigp_sense),
140 VCPU_STAT("instruction_sigp_sense_running", instruction_sigp_sense_running),
141 VCPU_STAT("instruction_sigp_external_call", instruction_sigp_external_call),
142 VCPU_STAT("instruction_sigp_emergency", instruction_sigp_emergency),
143 VCPU_STAT("instruction_sigp_cond_emergency", instruction_sigp_cond_emergency),
144 VCPU_STAT("instruction_sigp_start", instruction_sigp_start),
145 VCPU_STAT("instruction_sigp_stop", instruction_sigp_stop),
146 VCPU_STAT("instruction_sigp_stop_store_status", instruction_sigp_stop_store_status),
147 VCPU_STAT("instruction_sigp_store_status", instruction_sigp_store_status),
148 VCPU_STAT("instruction_sigp_store_adtl_status", instruction_sigp_store_adtl_status),
149 VCPU_STAT("instruction_sigp_set_arch", instruction_sigp_arch),
150 VCPU_STAT("instruction_sigp_set_prefix", instruction_sigp_prefix),
151 VCPU_STAT("instruction_sigp_restart", instruction_sigp_restart),
152 VCPU_STAT("instruction_sigp_cpu_reset", instruction_sigp_cpu_reset),
153 VCPU_STAT("instruction_sigp_init_cpu_reset", instruction_sigp_init_cpu_reset),
154 VCPU_STAT("instruction_sigp_unknown", instruction_sigp_unknown),
155 VCPU_STAT("instruction_diag_10", diagnose_10),
156 VCPU_STAT("instruction_diag_44", diagnose_44),
157 VCPU_STAT("instruction_diag_9c", diagnose_9c),
158 VCPU_STAT("diag_9c_ignored", diagnose_9c_ignored),
159 VCPU_STAT("instruction_diag_258", diagnose_258),
160 VCPU_STAT("instruction_diag_308", diagnose_308),
161 VCPU_STAT("instruction_diag_500", diagnose_500),
162 VCPU_STAT("instruction_diag_other", diagnose_other),
163 { NULL }
164 };
165
166 struct kvm_s390_tod_clock_ext {
167 __u8 epoch_idx;
168 __u64 tod;
169 __u8 reserved[7];
170 } __packed;
171
172 /* allow nested virtualization in KVM (if enabled by user space) */
173 static int nested;
174 module_param(nested, int, S_IRUGO);
175 MODULE_PARM_DESC(nested, "Nested virtualization support");
176
177 /* allow 1m huge page guest backing, if !nested */
178 static int hpage;
179 module_param(hpage, int, 0444);
180 MODULE_PARM_DESC(hpage, "1m huge page backing support");
181
182 /* maximum percentage of steal time for polling. >100 is treated like 100 */
183 static u8 halt_poll_max_steal = 10;
184 module_param(halt_poll_max_steal, byte, 0644);
185 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
186
187 /* if set to true, the GISA will be initialized and used if available */
188 static bool use_gisa = true;
189 module_param(use_gisa, bool, 0644);
190 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
191
192 /*
193 * For now we handle at most 16 double words as this is what the s390 base
194 * kernel handles and stores in the prefix page. If we ever need to go beyond
195 * this, this requires changes to code, but the external uapi can stay.
196 */
197 #define SIZE_INTERNAL 16
198
199 /*
200 * Base feature mask that defines default mask for facilities. Consists of the
201 * defines in FACILITIES_KVM and the non-hypervisor managed bits.
202 */
203 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
204 /*
205 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
206 * and defines the facilities that can be enabled via a cpu model.
207 */
208 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
209
kvm_s390_fac_size(void)210 static unsigned long kvm_s390_fac_size(void)
211 {
212 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
213 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
214 BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
215 sizeof(S390_lowcore.stfle_fac_list));
216
217 return SIZE_INTERNAL;
218 }
219
220 /* available cpu features supported by kvm */
221 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
222 /* available subfunctions indicated via query / "test bit" */
223 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
224
225 static struct gmap_notifier gmap_notifier;
226 static struct gmap_notifier vsie_gmap_notifier;
227 debug_info_t *kvm_s390_dbf;
228 debug_info_t *kvm_s390_dbf_uv;
229
230 /* Section: not file related */
kvm_arch_hardware_enable(void)231 int kvm_arch_hardware_enable(void)
232 {
233 /* every s390 is virtualization enabled ;-) */
234 return 0;
235 }
236
kvm_arch_check_processor_compat(void * opaque)237 int kvm_arch_check_processor_compat(void *opaque)
238 {
239 return 0;
240 }
241
242 /* forward declarations */
243 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
244 unsigned long end);
245 static int sca_switch_to_extended(struct kvm *kvm);
246
kvm_clock_sync_scb(struct kvm_s390_sie_block * scb,u64 delta)247 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
248 {
249 u8 delta_idx = 0;
250
251 /*
252 * The TOD jumps by delta, we have to compensate this by adding
253 * -delta to the epoch.
254 */
255 delta = -delta;
256
257 /* sign-extension - we're adding to signed values below */
258 if ((s64)delta < 0)
259 delta_idx = -1;
260
261 scb->epoch += delta;
262 if (scb->ecd & ECD_MEF) {
263 scb->epdx += delta_idx;
264 if (scb->epoch < delta)
265 scb->epdx += 1;
266 }
267 }
268
269 /*
270 * This callback is executed during stop_machine(). All CPUs are therefore
271 * temporarily stopped. In order not to change guest behavior, we have to
272 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
273 * so a CPU won't be stopped while calculating with the epoch.
274 */
kvm_clock_sync(struct notifier_block * notifier,unsigned long val,void * v)275 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
276 void *v)
277 {
278 struct kvm *kvm;
279 struct kvm_vcpu *vcpu;
280 int i;
281 unsigned long long *delta = v;
282
283 list_for_each_entry(kvm, &vm_list, vm_list) {
284 kvm_for_each_vcpu(i, vcpu, kvm) {
285 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
286 if (i == 0) {
287 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
288 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
289 }
290 if (vcpu->arch.cputm_enabled)
291 vcpu->arch.cputm_start += *delta;
292 if (vcpu->arch.vsie_block)
293 kvm_clock_sync_scb(vcpu->arch.vsie_block,
294 *delta);
295 }
296 }
297 return NOTIFY_OK;
298 }
299
300 static struct notifier_block kvm_clock_notifier = {
301 .notifier_call = kvm_clock_sync,
302 };
303
kvm_arch_hardware_setup(void * opaque)304 int kvm_arch_hardware_setup(void *opaque)
305 {
306 gmap_notifier.notifier_call = kvm_gmap_notifier;
307 gmap_register_pte_notifier(&gmap_notifier);
308 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
309 gmap_register_pte_notifier(&vsie_gmap_notifier);
310 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
311 &kvm_clock_notifier);
312 return 0;
313 }
314
kvm_arch_hardware_unsetup(void)315 void kvm_arch_hardware_unsetup(void)
316 {
317 gmap_unregister_pte_notifier(&gmap_notifier);
318 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
319 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
320 &kvm_clock_notifier);
321 }
322
allow_cpu_feat(unsigned long nr)323 static void allow_cpu_feat(unsigned long nr)
324 {
325 set_bit_inv(nr, kvm_s390_available_cpu_feat);
326 }
327
plo_test_bit(unsigned char nr)328 static inline int plo_test_bit(unsigned char nr)
329 {
330 unsigned long function = (unsigned long)nr | 0x100;
331 int cc;
332
333 asm volatile(
334 " lgr 0,%[function]\n"
335 /* Parameter registers are ignored for "test bit" */
336 " plo 0,0,0,0(0)\n"
337 " ipm %0\n"
338 " srl %0,28\n"
339 : "=d" (cc)
340 : [function] "d" (function)
341 : "cc", "0");
342 return cc == 0;
343 }
344
__insn32_query(unsigned int opcode,u8 * query)345 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
346 {
347 asm volatile(
348 " lghi 0,0\n"
349 " lgr 1,%[query]\n"
350 /* Parameter registers are ignored */
351 " .insn rrf,%[opc] << 16,2,4,6,0\n"
352 :
353 : [query] "d" ((unsigned long)query), [opc] "i" (opcode)
354 : "cc", "memory", "0", "1");
355 }
356
357 #define INSN_SORTL 0xb938
358 #define INSN_DFLTCC 0xb939
359
kvm_s390_cpu_feat_init(void)360 static void kvm_s390_cpu_feat_init(void)
361 {
362 int i;
363
364 for (i = 0; i < 256; ++i) {
365 if (plo_test_bit(i))
366 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
367 }
368
369 if (test_facility(28)) /* TOD-clock steering */
370 ptff(kvm_s390_available_subfunc.ptff,
371 sizeof(kvm_s390_available_subfunc.ptff),
372 PTFF_QAF);
373
374 if (test_facility(17)) { /* MSA */
375 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
376 kvm_s390_available_subfunc.kmac);
377 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
378 kvm_s390_available_subfunc.kmc);
379 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
380 kvm_s390_available_subfunc.km);
381 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
382 kvm_s390_available_subfunc.kimd);
383 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
384 kvm_s390_available_subfunc.klmd);
385 }
386 if (test_facility(76)) /* MSA3 */
387 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
388 kvm_s390_available_subfunc.pckmo);
389 if (test_facility(77)) { /* MSA4 */
390 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
391 kvm_s390_available_subfunc.kmctr);
392 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
393 kvm_s390_available_subfunc.kmf);
394 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
395 kvm_s390_available_subfunc.kmo);
396 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
397 kvm_s390_available_subfunc.pcc);
398 }
399 if (test_facility(57)) /* MSA5 */
400 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
401 kvm_s390_available_subfunc.ppno);
402
403 if (test_facility(146)) /* MSA8 */
404 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
405 kvm_s390_available_subfunc.kma);
406
407 if (test_facility(155)) /* MSA9 */
408 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
409 kvm_s390_available_subfunc.kdsa);
410
411 if (test_facility(150)) /* SORTL */
412 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
413
414 if (test_facility(151)) /* DFLTCC */
415 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
416
417 if (MACHINE_HAS_ESOP)
418 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
419 /*
420 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
421 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
422 */
423 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
424 !test_facility(3) || !nested)
425 return;
426 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
427 if (sclp.has_64bscao)
428 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
429 if (sclp.has_siif)
430 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
431 if (sclp.has_gpere)
432 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
433 if (sclp.has_gsls)
434 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
435 if (sclp.has_ib)
436 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
437 if (sclp.has_cei)
438 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
439 if (sclp.has_ibs)
440 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
441 if (sclp.has_kss)
442 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
443 /*
444 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
445 * all skey handling functions read/set the skey from the PGSTE
446 * instead of the real storage key.
447 *
448 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
449 * pages being detected as preserved although they are resident.
450 *
451 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
452 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
453 *
454 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
455 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
456 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
457 *
458 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
459 * cannot easily shadow the SCA because of the ipte lock.
460 */
461 }
462
kvm_arch_init(void * opaque)463 int kvm_arch_init(void *opaque)
464 {
465 int rc = -ENOMEM;
466
467 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
468 if (!kvm_s390_dbf)
469 return -ENOMEM;
470
471 kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
472 if (!kvm_s390_dbf_uv)
473 goto out;
474
475 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
476 debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
477 goto out;
478
479 kvm_s390_cpu_feat_init();
480
481 /* Register floating interrupt controller interface. */
482 rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
483 if (rc) {
484 pr_err("A FLIC registration call failed with rc=%d\n", rc);
485 goto out;
486 }
487
488 rc = kvm_s390_gib_init(GAL_ISC);
489 if (rc)
490 goto out;
491
492 return 0;
493
494 out:
495 kvm_arch_exit();
496 return rc;
497 }
498
kvm_arch_exit(void)499 void kvm_arch_exit(void)
500 {
501 kvm_s390_gib_destroy();
502 debug_unregister(kvm_s390_dbf);
503 debug_unregister(kvm_s390_dbf_uv);
504 }
505
506 /* Section: device related */
kvm_arch_dev_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)507 long kvm_arch_dev_ioctl(struct file *filp,
508 unsigned int ioctl, unsigned long arg)
509 {
510 if (ioctl == KVM_S390_ENABLE_SIE)
511 return s390_enable_sie();
512 return -EINVAL;
513 }
514
kvm_vm_ioctl_check_extension(struct kvm * kvm,long ext)515 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
516 {
517 int r;
518
519 switch (ext) {
520 case KVM_CAP_S390_PSW:
521 case KVM_CAP_S390_GMAP:
522 case KVM_CAP_SYNC_MMU:
523 #ifdef CONFIG_KVM_S390_UCONTROL
524 case KVM_CAP_S390_UCONTROL:
525 #endif
526 case KVM_CAP_ASYNC_PF:
527 case KVM_CAP_SYNC_REGS:
528 case KVM_CAP_ONE_REG:
529 case KVM_CAP_ENABLE_CAP:
530 case KVM_CAP_S390_CSS_SUPPORT:
531 case KVM_CAP_IOEVENTFD:
532 case KVM_CAP_DEVICE_CTRL:
533 case KVM_CAP_S390_IRQCHIP:
534 case KVM_CAP_VM_ATTRIBUTES:
535 case KVM_CAP_MP_STATE:
536 case KVM_CAP_IMMEDIATE_EXIT:
537 case KVM_CAP_S390_INJECT_IRQ:
538 case KVM_CAP_S390_USER_SIGP:
539 case KVM_CAP_S390_USER_STSI:
540 case KVM_CAP_S390_SKEYS:
541 case KVM_CAP_S390_IRQ_STATE:
542 case KVM_CAP_S390_USER_INSTR0:
543 case KVM_CAP_S390_CMMA_MIGRATION:
544 case KVM_CAP_S390_AIS:
545 case KVM_CAP_S390_AIS_MIGRATION:
546 case KVM_CAP_S390_VCPU_RESETS:
547 case KVM_CAP_SET_GUEST_DEBUG:
548 case KVM_CAP_S390_DIAG318:
549 r = 1;
550 break;
551 case KVM_CAP_S390_HPAGE_1M:
552 r = 0;
553 if (hpage && !kvm_is_ucontrol(kvm))
554 r = 1;
555 break;
556 case KVM_CAP_S390_MEM_OP:
557 r = MEM_OP_MAX_SIZE;
558 break;
559 case KVM_CAP_NR_VCPUS:
560 case KVM_CAP_MAX_VCPUS:
561 case KVM_CAP_MAX_VCPU_ID:
562 r = KVM_S390_BSCA_CPU_SLOTS;
563 if (!kvm_s390_use_sca_entries())
564 r = KVM_MAX_VCPUS;
565 else if (sclp.has_esca && sclp.has_64bscao)
566 r = KVM_S390_ESCA_CPU_SLOTS;
567 break;
568 case KVM_CAP_S390_COW:
569 r = MACHINE_HAS_ESOP;
570 break;
571 case KVM_CAP_S390_VECTOR_REGISTERS:
572 r = MACHINE_HAS_VX;
573 break;
574 case KVM_CAP_S390_RI:
575 r = test_facility(64);
576 break;
577 case KVM_CAP_S390_GS:
578 r = test_facility(133);
579 break;
580 case KVM_CAP_S390_BPB:
581 r = test_facility(82);
582 break;
583 case KVM_CAP_S390_PROTECTED:
584 r = is_prot_virt_host();
585 break;
586 default:
587 r = 0;
588 }
589 return r;
590 }
591
kvm_arch_sync_dirty_log(struct kvm * kvm,struct kvm_memory_slot * memslot)592 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
593 {
594 int i;
595 gfn_t cur_gfn, last_gfn;
596 unsigned long gaddr, vmaddr;
597 struct gmap *gmap = kvm->arch.gmap;
598 DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
599
600 /* Loop over all guest segments */
601 cur_gfn = memslot->base_gfn;
602 last_gfn = memslot->base_gfn + memslot->npages;
603 for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
604 gaddr = gfn_to_gpa(cur_gfn);
605 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
606 if (kvm_is_error_hva(vmaddr))
607 continue;
608
609 bitmap_zero(bitmap, _PAGE_ENTRIES);
610 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
611 for (i = 0; i < _PAGE_ENTRIES; i++) {
612 if (test_bit(i, bitmap))
613 mark_page_dirty(kvm, cur_gfn + i);
614 }
615
616 if (fatal_signal_pending(current))
617 return;
618 cond_resched();
619 }
620 }
621
622 /* Section: vm related */
623 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
624
625 /*
626 * Get (and clear) the dirty memory log for a memory slot.
627 */
kvm_vm_ioctl_get_dirty_log(struct kvm * kvm,struct kvm_dirty_log * log)628 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
629 struct kvm_dirty_log *log)
630 {
631 int r;
632 unsigned long n;
633 struct kvm_memory_slot *memslot;
634 int is_dirty;
635
636 if (kvm_is_ucontrol(kvm))
637 return -EINVAL;
638
639 mutex_lock(&kvm->slots_lock);
640
641 r = -EINVAL;
642 if (log->slot >= KVM_USER_MEM_SLOTS)
643 goto out;
644
645 r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
646 if (r)
647 goto out;
648
649 /* Clear the dirty log */
650 if (is_dirty) {
651 n = kvm_dirty_bitmap_bytes(memslot);
652 memset(memslot->dirty_bitmap, 0, n);
653 }
654 r = 0;
655 out:
656 mutex_unlock(&kvm->slots_lock);
657 return r;
658 }
659
icpt_operexc_on_all_vcpus(struct kvm * kvm)660 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
661 {
662 unsigned int i;
663 struct kvm_vcpu *vcpu;
664
665 kvm_for_each_vcpu(i, vcpu, kvm) {
666 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
667 }
668 }
669
kvm_vm_ioctl_enable_cap(struct kvm * kvm,struct kvm_enable_cap * cap)670 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
671 {
672 int r;
673
674 if (cap->flags)
675 return -EINVAL;
676
677 switch (cap->cap) {
678 case KVM_CAP_S390_IRQCHIP:
679 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
680 kvm->arch.use_irqchip = 1;
681 r = 0;
682 break;
683 case KVM_CAP_S390_USER_SIGP:
684 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
685 kvm->arch.user_sigp = 1;
686 r = 0;
687 break;
688 case KVM_CAP_S390_VECTOR_REGISTERS:
689 mutex_lock(&kvm->lock);
690 if (kvm->created_vcpus) {
691 r = -EBUSY;
692 } else if (MACHINE_HAS_VX) {
693 set_kvm_facility(kvm->arch.model.fac_mask, 129);
694 set_kvm_facility(kvm->arch.model.fac_list, 129);
695 if (test_facility(134)) {
696 set_kvm_facility(kvm->arch.model.fac_mask, 134);
697 set_kvm_facility(kvm->arch.model.fac_list, 134);
698 }
699 if (test_facility(135)) {
700 set_kvm_facility(kvm->arch.model.fac_mask, 135);
701 set_kvm_facility(kvm->arch.model.fac_list, 135);
702 }
703 if (test_facility(148)) {
704 set_kvm_facility(kvm->arch.model.fac_mask, 148);
705 set_kvm_facility(kvm->arch.model.fac_list, 148);
706 }
707 if (test_facility(152)) {
708 set_kvm_facility(kvm->arch.model.fac_mask, 152);
709 set_kvm_facility(kvm->arch.model.fac_list, 152);
710 }
711 r = 0;
712 } else
713 r = -EINVAL;
714 mutex_unlock(&kvm->lock);
715 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
716 r ? "(not available)" : "(success)");
717 break;
718 case KVM_CAP_S390_RI:
719 r = -EINVAL;
720 mutex_lock(&kvm->lock);
721 if (kvm->created_vcpus) {
722 r = -EBUSY;
723 } else if (test_facility(64)) {
724 set_kvm_facility(kvm->arch.model.fac_mask, 64);
725 set_kvm_facility(kvm->arch.model.fac_list, 64);
726 r = 0;
727 }
728 mutex_unlock(&kvm->lock);
729 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
730 r ? "(not available)" : "(success)");
731 break;
732 case KVM_CAP_S390_AIS:
733 mutex_lock(&kvm->lock);
734 if (kvm->created_vcpus) {
735 r = -EBUSY;
736 } else {
737 set_kvm_facility(kvm->arch.model.fac_mask, 72);
738 set_kvm_facility(kvm->arch.model.fac_list, 72);
739 r = 0;
740 }
741 mutex_unlock(&kvm->lock);
742 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
743 r ? "(not available)" : "(success)");
744 break;
745 case KVM_CAP_S390_GS:
746 r = -EINVAL;
747 mutex_lock(&kvm->lock);
748 if (kvm->created_vcpus) {
749 r = -EBUSY;
750 } else if (test_facility(133)) {
751 set_kvm_facility(kvm->arch.model.fac_mask, 133);
752 set_kvm_facility(kvm->arch.model.fac_list, 133);
753 r = 0;
754 }
755 mutex_unlock(&kvm->lock);
756 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
757 r ? "(not available)" : "(success)");
758 break;
759 case KVM_CAP_S390_HPAGE_1M:
760 mutex_lock(&kvm->lock);
761 if (kvm->created_vcpus)
762 r = -EBUSY;
763 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
764 r = -EINVAL;
765 else {
766 r = 0;
767 mmap_write_lock(kvm->mm);
768 kvm->mm->context.allow_gmap_hpage_1m = 1;
769 mmap_write_unlock(kvm->mm);
770 /*
771 * We might have to create fake 4k page
772 * tables. To avoid that the hardware works on
773 * stale PGSTEs, we emulate these instructions.
774 */
775 kvm->arch.use_skf = 0;
776 kvm->arch.use_pfmfi = 0;
777 }
778 mutex_unlock(&kvm->lock);
779 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
780 r ? "(not available)" : "(success)");
781 break;
782 case KVM_CAP_S390_USER_STSI:
783 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
784 kvm->arch.user_stsi = 1;
785 r = 0;
786 break;
787 case KVM_CAP_S390_USER_INSTR0:
788 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
789 kvm->arch.user_instr0 = 1;
790 icpt_operexc_on_all_vcpus(kvm);
791 r = 0;
792 break;
793 default:
794 r = -EINVAL;
795 break;
796 }
797 return r;
798 }
799
kvm_s390_get_mem_control(struct kvm * kvm,struct kvm_device_attr * attr)800 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
801 {
802 int ret;
803
804 switch (attr->attr) {
805 case KVM_S390_VM_MEM_LIMIT_SIZE:
806 ret = 0;
807 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
808 kvm->arch.mem_limit);
809 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
810 ret = -EFAULT;
811 break;
812 default:
813 ret = -ENXIO;
814 break;
815 }
816 return ret;
817 }
818
kvm_s390_set_mem_control(struct kvm * kvm,struct kvm_device_attr * attr)819 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
820 {
821 int ret;
822 unsigned int idx;
823 switch (attr->attr) {
824 case KVM_S390_VM_MEM_ENABLE_CMMA:
825 ret = -ENXIO;
826 if (!sclp.has_cmma)
827 break;
828
829 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
830 mutex_lock(&kvm->lock);
831 if (kvm->created_vcpus)
832 ret = -EBUSY;
833 else if (kvm->mm->context.allow_gmap_hpage_1m)
834 ret = -EINVAL;
835 else {
836 kvm->arch.use_cmma = 1;
837 /* Not compatible with cmma. */
838 kvm->arch.use_pfmfi = 0;
839 ret = 0;
840 }
841 mutex_unlock(&kvm->lock);
842 break;
843 case KVM_S390_VM_MEM_CLR_CMMA:
844 ret = -ENXIO;
845 if (!sclp.has_cmma)
846 break;
847 ret = -EINVAL;
848 if (!kvm->arch.use_cmma)
849 break;
850
851 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
852 mutex_lock(&kvm->lock);
853 idx = srcu_read_lock(&kvm->srcu);
854 s390_reset_cmma(kvm->arch.gmap->mm);
855 srcu_read_unlock(&kvm->srcu, idx);
856 mutex_unlock(&kvm->lock);
857 ret = 0;
858 break;
859 case KVM_S390_VM_MEM_LIMIT_SIZE: {
860 unsigned long new_limit;
861
862 if (kvm_is_ucontrol(kvm))
863 return -EINVAL;
864
865 if (get_user(new_limit, (u64 __user *)attr->addr))
866 return -EFAULT;
867
868 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
869 new_limit > kvm->arch.mem_limit)
870 return -E2BIG;
871
872 if (!new_limit)
873 return -EINVAL;
874
875 /* gmap_create takes last usable address */
876 if (new_limit != KVM_S390_NO_MEM_LIMIT)
877 new_limit -= 1;
878
879 ret = -EBUSY;
880 mutex_lock(&kvm->lock);
881 if (!kvm->created_vcpus) {
882 /* gmap_create will round the limit up */
883 struct gmap *new = gmap_create(current->mm, new_limit);
884
885 if (!new) {
886 ret = -ENOMEM;
887 } else {
888 gmap_remove(kvm->arch.gmap);
889 new->private = kvm;
890 kvm->arch.gmap = new;
891 ret = 0;
892 }
893 }
894 mutex_unlock(&kvm->lock);
895 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
896 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
897 (void *) kvm->arch.gmap->asce);
898 break;
899 }
900 default:
901 ret = -ENXIO;
902 break;
903 }
904 return ret;
905 }
906
907 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
908
kvm_s390_vcpu_crypto_reset_all(struct kvm * kvm)909 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
910 {
911 struct kvm_vcpu *vcpu;
912 int i;
913
914 kvm_s390_vcpu_block_all(kvm);
915
916 kvm_for_each_vcpu(i, vcpu, kvm) {
917 kvm_s390_vcpu_crypto_setup(vcpu);
918 /* recreate the shadow crycb by leaving the VSIE handler */
919 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
920 }
921
922 kvm_s390_vcpu_unblock_all(kvm);
923 }
924
kvm_s390_vm_set_crypto(struct kvm * kvm,struct kvm_device_attr * attr)925 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
926 {
927 mutex_lock(&kvm->lock);
928 switch (attr->attr) {
929 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
930 if (!test_kvm_facility(kvm, 76)) {
931 mutex_unlock(&kvm->lock);
932 return -EINVAL;
933 }
934 get_random_bytes(
935 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
936 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
937 kvm->arch.crypto.aes_kw = 1;
938 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
939 break;
940 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
941 if (!test_kvm_facility(kvm, 76)) {
942 mutex_unlock(&kvm->lock);
943 return -EINVAL;
944 }
945 get_random_bytes(
946 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
947 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
948 kvm->arch.crypto.dea_kw = 1;
949 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
950 break;
951 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
952 if (!test_kvm_facility(kvm, 76)) {
953 mutex_unlock(&kvm->lock);
954 return -EINVAL;
955 }
956 kvm->arch.crypto.aes_kw = 0;
957 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
958 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
959 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
960 break;
961 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
962 if (!test_kvm_facility(kvm, 76)) {
963 mutex_unlock(&kvm->lock);
964 return -EINVAL;
965 }
966 kvm->arch.crypto.dea_kw = 0;
967 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
968 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
969 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
970 break;
971 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
972 if (!ap_instructions_available()) {
973 mutex_unlock(&kvm->lock);
974 return -EOPNOTSUPP;
975 }
976 kvm->arch.crypto.apie = 1;
977 break;
978 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
979 if (!ap_instructions_available()) {
980 mutex_unlock(&kvm->lock);
981 return -EOPNOTSUPP;
982 }
983 kvm->arch.crypto.apie = 0;
984 break;
985 default:
986 mutex_unlock(&kvm->lock);
987 return -ENXIO;
988 }
989
990 kvm_s390_vcpu_crypto_reset_all(kvm);
991 mutex_unlock(&kvm->lock);
992 return 0;
993 }
994
kvm_s390_sync_request_broadcast(struct kvm * kvm,int req)995 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
996 {
997 int cx;
998 struct kvm_vcpu *vcpu;
999
1000 kvm_for_each_vcpu(cx, vcpu, kvm)
1001 kvm_s390_sync_request(req, vcpu);
1002 }
1003
1004 /*
1005 * Must be called with kvm->srcu held to avoid races on memslots, and with
1006 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1007 */
kvm_s390_vm_start_migration(struct kvm * kvm)1008 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1009 {
1010 struct kvm_memory_slot *ms;
1011 struct kvm_memslots *slots;
1012 unsigned long ram_pages = 0;
1013 int slotnr;
1014
1015 /* migration mode already enabled */
1016 if (kvm->arch.migration_mode)
1017 return 0;
1018 slots = kvm_memslots(kvm);
1019 if (!slots || !slots->used_slots)
1020 return -EINVAL;
1021
1022 if (!kvm->arch.use_cmma) {
1023 kvm->arch.migration_mode = 1;
1024 return 0;
1025 }
1026 /* mark all the pages in active slots as dirty */
1027 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1028 ms = slots->memslots + slotnr;
1029 if (!ms->dirty_bitmap)
1030 return -EINVAL;
1031 /*
1032 * The second half of the bitmap is only used on x86,
1033 * and would be wasted otherwise, so we put it to good
1034 * use here to keep track of the state of the storage
1035 * attributes.
1036 */
1037 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1038 ram_pages += ms->npages;
1039 }
1040 atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1041 kvm->arch.migration_mode = 1;
1042 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1043 return 0;
1044 }
1045
1046 /*
1047 * Must be called with kvm->slots_lock to avoid races with ourselves and
1048 * kvm_s390_vm_start_migration.
1049 */
kvm_s390_vm_stop_migration(struct kvm * kvm)1050 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1051 {
1052 /* migration mode already disabled */
1053 if (!kvm->arch.migration_mode)
1054 return 0;
1055 kvm->arch.migration_mode = 0;
1056 if (kvm->arch.use_cmma)
1057 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1058 return 0;
1059 }
1060
kvm_s390_vm_set_migration(struct kvm * kvm,struct kvm_device_attr * attr)1061 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1062 struct kvm_device_attr *attr)
1063 {
1064 int res = -ENXIO;
1065
1066 mutex_lock(&kvm->slots_lock);
1067 switch (attr->attr) {
1068 case KVM_S390_VM_MIGRATION_START:
1069 res = kvm_s390_vm_start_migration(kvm);
1070 break;
1071 case KVM_S390_VM_MIGRATION_STOP:
1072 res = kvm_s390_vm_stop_migration(kvm);
1073 break;
1074 default:
1075 break;
1076 }
1077 mutex_unlock(&kvm->slots_lock);
1078
1079 return res;
1080 }
1081
kvm_s390_vm_get_migration(struct kvm * kvm,struct kvm_device_attr * attr)1082 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1083 struct kvm_device_attr *attr)
1084 {
1085 u64 mig = kvm->arch.migration_mode;
1086
1087 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1088 return -ENXIO;
1089
1090 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1091 return -EFAULT;
1092 return 0;
1093 }
1094
1095 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod);
1096
kvm_s390_set_tod_ext(struct kvm * kvm,struct kvm_device_attr * attr)1097 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1098 {
1099 struct kvm_s390_vm_tod_clock gtod;
1100
1101 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
1102 return -EFAULT;
1103
1104 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1105 return -EINVAL;
1106 __kvm_s390_set_tod_clock(kvm, >od);
1107
1108 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1109 gtod.epoch_idx, gtod.tod);
1110
1111 return 0;
1112 }
1113
kvm_s390_set_tod_high(struct kvm * kvm,struct kvm_device_attr * attr)1114 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1115 {
1116 u8 gtod_high;
1117
1118 if (copy_from_user(>od_high, (void __user *)attr->addr,
1119 sizeof(gtod_high)))
1120 return -EFAULT;
1121
1122 if (gtod_high != 0)
1123 return -EINVAL;
1124 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1125
1126 return 0;
1127 }
1128
kvm_s390_set_tod_low(struct kvm * kvm,struct kvm_device_attr * attr)1129 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1130 {
1131 struct kvm_s390_vm_tod_clock gtod = { 0 };
1132
1133 if (copy_from_user(>od.tod, (void __user *)attr->addr,
1134 sizeof(gtod.tod)))
1135 return -EFAULT;
1136
1137 __kvm_s390_set_tod_clock(kvm, >od);
1138 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1139 return 0;
1140 }
1141
kvm_s390_set_tod(struct kvm * kvm,struct kvm_device_attr * attr)1142 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1143 {
1144 int ret;
1145
1146 if (attr->flags)
1147 return -EINVAL;
1148
1149 mutex_lock(&kvm->lock);
1150 /*
1151 * For protected guests, the TOD is managed by the ultravisor, so trying
1152 * to change it will never bring the expected results.
1153 */
1154 if (kvm_s390_pv_is_protected(kvm)) {
1155 ret = -EOPNOTSUPP;
1156 goto out_unlock;
1157 }
1158
1159 switch (attr->attr) {
1160 case KVM_S390_VM_TOD_EXT:
1161 ret = kvm_s390_set_tod_ext(kvm, attr);
1162 break;
1163 case KVM_S390_VM_TOD_HIGH:
1164 ret = kvm_s390_set_tod_high(kvm, attr);
1165 break;
1166 case KVM_S390_VM_TOD_LOW:
1167 ret = kvm_s390_set_tod_low(kvm, attr);
1168 break;
1169 default:
1170 ret = -ENXIO;
1171 break;
1172 }
1173
1174 out_unlock:
1175 mutex_unlock(&kvm->lock);
1176 return ret;
1177 }
1178
kvm_s390_get_tod_clock(struct kvm * kvm,struct kvm_s390_vm_tod_clock * gtod)1179 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1180 struct kvm_s390_vm_tod_clock *gtod)
1181 {
1182 struct kvm_s390_tod_clock_ext htod;
1183
1184 preempt_disable();
1185
1186 get_tod_clock_ext((char *)&htod);
1187
1188 gtod->tod = htod.tod + kvm->arch.epoch;
1189 gtod->epoch_idx = 0;
1190 if (test_kvm_facility(kvm, 139)) {
1191 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1192 if (gtod->tod < htod.tod)
1193 gtod->epoch_idx += 1;
1194 }
1195
1196 preempt_enable();
1197 }
1198
kvm_s390_get_tod_ext(struct kvm * kvm,struct kvm_device_attr * attr)1199 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1200 {
1201 struct kvm_s390_vm_tod_clock gtod;
1202
1203 memset(>od, 0, sizeof(gtod));
1204 kvm_s390_get_tod_clock(kvm, >od);
1205 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1206 return -EFAULT;
1207
1208 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1209 gtod.epoch_idx, gtod.tod);
1210 return 0;
1211 }
1212
kvm_s390_get_tod_high(struct kvm * kvm,struct kvm_device_attr * attr)1213 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1214 {
1215 u8 gtod_high = 0;
1216
1217 if (copy_to_user((void __user *)attr->addr, >od_high,
1218 sizeof(gtod_high)))
1219 return -EFAULT;
1220 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1221
1222 return 0;
1223 }
1224
kvm_s390_get_tod_low(struct kvm * kvm,struct kvm_device_attr * attr)1225 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1226 {
1227 u64 gtod;
1228
1229 gtod = kvm_s390_get_tod_clock_fast(kvm);
1230 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1231 return -EFAULT;
1232 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1233
1234 return 0;
1235 }
1236
kvm_s390_get_tod(struct kvm * kvm,struct kvm_device_attr * attr)1237 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1238 {
1239 int ret;
1240
1241 if (attr->flags)
1242 return -EINVAL;
1243
1244 switch (attr->attr) {
1245 case KVM_S390_VM_TOD_EXT:
1246 ret = kvm_s390_get_tod_ext(kvm, attr);
1247 break;
1248 case KVM_S390_VM_TOD_HIGH:
1249 ret = kvm_s390_get_tod_high(kvm, attr);
1250 break;
1251 case KVM_S390_VM_TOD_LOW:
1252 ret = kvm_s390_get_tod_low(kvm, attr);
1253 break;
1254 default:
1255 ret = -ENXIO;
1256 break;
1257 }
1258 return ret;
1259 }
1260
kvm_s390_set_processor(struct kvm * kvm,struct kvm_device_attr * attr)1261 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1262 {
1263 struct kvm_s390_vm_cpu_processor *proc;
1264 u16 lowest_ibc, unblocked_ibc;
1265 int ret = 0;
1266
1267 mutex_lock(&kvm->lock);
1268 if (kvm->created_vcpus) {
1269 ret = -EBUSY;
1270 goto out;
1271 }
1272 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1273 if (!proc) {
1274 ret = -ENOMEM;
1275 goto out;
1276 }
1277 if (!copy_from_user(proc, (void __user *)attr->addr,
1278 sizeof(*proc))) {
1279 kvm->arch.model.cpuid = proc->cpuid;
1280 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1281 unblocked_ibc = sclp.ibc & 0xfff;
1282 if (lowest_ibc && proc->ibc) {
1283 if (proc->ibc > unblocked_ibc)
1284 kvm->arch.model.ibc = unblocked_ibc;
1285 else if (proc->ibc < lowest_ibc)
1286 kvm->arch.model.ibc = lowest_ibc;
1287 else
1288 kvm->arch.model.ibc = proc->ibc;
1289 }
1290 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1291 S390_ARCH_FAC_LIST_SIZE_BYTE);
1292 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1293 kvm->arch.model.ibc,
1294 kvm->arch.model.cpuid);
1295 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1296 kvm->arch.model.fac_list[0],
1297 kvm->arch.model.fac_list[1],
1298 kvm->arch.model.fac_list[2]);
1299 } else
1300 ret = -EFAULT;
1301 kfree(proc);
1302 out:
1303 mutex_unlock(&kvm->lock);
1304 return ret;
1305 }
1306
kvm_s390_set_processor_feat(struct kvm * kvm,struct kvm_device_attr * attr)1307 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1308 struct kvm_device_attr *attr)
1309 {
1310 struct kvm_s390_vm_cpu_feat data;
1311
1312 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1313 return -EFAULT;
1314 if (!bitmap_subset((unsigned long *) data.feat,
1315 kvm_s390_available_cpu_feat,
1316 KVM_S390_VM_CPU_FEAT_NR_BITS))
1317 return -EINVAL;
1318
1319 mutex_lock(&kvm->lock);
1320 if (kvm->created_vcpus) {
1321 mutex_unlock(&kvm->lock);
1322 return -EBUSY;
1323 }
1324 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1325 KVM_S390_VM_CPU_FEAT_NR_BITS);
1326 mutex_unlock(&kvm->lock);
1327 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1328 data.feat[0],
1329 data.feat[1],
1330 data.feat[2]);
1331 return 0;
1332 }
1333
kvm_s390_set_processor_subfunc(struct kvm * kvm,struct kvm_device_attr * attr)1334 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1335 struct kvm_device_attr *attr)
1336 {
1337 mutex_lock(&kvm->lock);
1338 if (kvm->created_vcpus) {
1339 mutex_unlock(&kvm->lock);
1340 return -EBUSY;
1341 }
1342
1343 if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1344 sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1345 mutex_unlock(&kvm->lock);
1346 return -EFAULT;
1347 }
1348 mutex_unlock(&kvm->lock);
1349
1350 VM_EVENT(kvm, 3, "SET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1351 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1352 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1353 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1354 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1355 VM_EVENT(kvm, 3, "SET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1356 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1357 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1358 VM_EVENT(kvm, 3, "SET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1359 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1360 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1361 VM_EVENT(kvm, 3, "SET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1362 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1363 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1364 VM_EVENT(kvm, 3, "SET: guest KM subfunc 0x%16.16lx.%16.16lx",
1365 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1366 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1367 VM_EVENT(kvm, 3, "SET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1368 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1369 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1370 VM_EVENT(kvm, 3, "SET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1371 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1372 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1373 VM_EVENT(kvm, 3, "SET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1374 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1375 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1376 VM_EVENT(kvm, 3, "SET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1377 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1378 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1379 VM_EVENT(kvm, 3, "SET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1380 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1381 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1382 VM_EVENT(kvm, 3, "SET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1383 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1384 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1385 VM_EVENT(kvm, 3, "SET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1386 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1387 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1388 VM_EVENT(kvm, 3, "SET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1389 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1390 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1391 VM_EVENT(kvm, 3, "SET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1392 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1393 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1394 VM_EVENT(kvm, 3, "SET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
1395 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1396 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1397 VM_EVENT(kvm, 3, "SET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1398 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1399 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1400 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1401 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1402 VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1403 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1404 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1405 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1406 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1407
1408 return 0;
1409 }
1410
kvm_s390_set_cpu_model(struct kvm * kvm,struct kvm_device_attr * attr)1411 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1412 {
1413 int ret = -ENXIO;
1414
1415 switch (attr->attr) {
1416 case KVM_S390_VM_CPU_PROCESSOR:
1417 ret = kvm_s390_set_processor(kvm, attr);
1418 break;
1419 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1420 ret = kvm_s390_set_processor_feat(kvm, attr);
1421 break;
1422 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1423 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1424 break;
1425 }
1426 return ret;
1427 }
1428
kvm_s390_get_processor(struct kvm * kvm,struct kvm_device_attr * attr)1429 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1430 {
1431 struct kvm_s390_vm_cpu_processor *proc;
1432 int ret = 0;
1433
1434 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1435 if (!proc) {
1436 ret = -ENOMEM;
1437 goto out;
1438 }
1439 proc->cpuid = kvm->arch.model.cpuid;
1440 proc->ibc = kvm->arch.model.ibc;
1441 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1442 S390_ARCH_FAC_LIST_SIZE_BYTE);
1443 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1444 kvm->arch.model.ibc,
1445 kvm->arch.model.cpuid);
1446 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1447 kvm->arch.model.fac_list[0],
1448 kvm->arch.model.fac_list[1],
1449 kvm->arch.model.fac_list[2]);
1450 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1451 ret = -EFAULT;
1452 kfree(proc);
1453 out:
1454 return ret;
1455 }
1456
kvm_s390_get_machine(struct kvm * kvm,struct kvm_device_attr * attr)1457 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1458 {
1459 struct kvm_s390_vm_cpu_machine *mach;
1460 int ret = 0;
1461
1462 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1463 if (!mach) {
1464 ret = -ENOMEM;
1465 goto out;
1466 }
1467 get_cpu_id((struct cpuid *) &mach->cpuid);
1468 mach->ibc = sclp.ibc;
1469 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1470 S390_ARCH_FAC_LIST_SIZE_BYTE);
1471 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1472 sizeof(S390_lowcore.stfle_fac_list));
1473 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1474 kvm->arch.model.ibc,
1475 kvm->arch.model.cpuid);
1476 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1477 mach->fac_mask[0],
1478 mach->fac_mask[1],
1479 mach->fac_mask[2]);
1480 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1481 mach->fac_list[0],
1482 mach->fac_list[1],
1483 mach->fac_list[2]);
1484 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1485 ret = -EFAULT;
1486 kfree(mach);
1487 out:
1488 return ret;
1489 }
1490
kvm_s390_get_processor_feat(struct kvm * kvm,struct kvm_device_attr * attr)1491 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1492 struct kvm_device_attr *attr)
1493 {
1494 struct kvm_s390_vm_cpu_feat data;
1495
1496 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1497 KVM_S390_VM_CPU_FEAT_NR_BITS);
1498 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1499 return -EFAULT;
1500 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1501 data.feat[0],
1502 data.feat[1],
1503 data.feat[2]);
1504 return 0;
1505 }
1506
kvm_s390_get_machine_feat(struct kvm * kvm,struct kvm_device_attr * attr)1507 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1508 struct kvm_device_attr *attr)
1509 {
1510 struct kvm_s390_vm_cpu_feat data;
1511
1512 bitmap_copy((unsigned long *) data.feat,
1513 kvm_s390_available_cpu_feat,
1514 KVM_S390_VM_CPU_FEAT_NR_BITS);
1515 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1516 return -EFAULT;
1517 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1518 data.feat[0],
1519 data.feat[1],
1520 data.feat[2]);
1521 return 0;
1522 }
1523
kvm_s390_get_processor_subfunc(struct kvm * kvm,struct kvm_device_attr * attr)1524 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1525 struct kvm_device_attr *attr)
1526 {
1527 if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1528 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1529 return -EFAULT;
1530
1531 VM_EVENT(kvm, 3, "GET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1532 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1533 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1534 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1535 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1536 VM_EVENT(kvm, 3, "GET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1537 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1538 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1539 VM_EVENT(kvm, 3, "GET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1540 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1541 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1542 VM_EVENT(kvm, 3, "GET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1543 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1544 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1545 VM_EVENT(kvm, 3, "GET: guest KM subfunc 0x%16.16lx.%16.16lx",
1546 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1547 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1548 VM_EVENT(kvm, 3, "GET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1549 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1550 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1551 VM_EVENT(kvm, 3, "GET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1552 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1553 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1554 VM_EVENT(kvm, 3, "GET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1555 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1556 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1557 VM_EVENT(kvm, 3, "GET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1558 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1559 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1560 VM_EVENT(kvm, 3, "GET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1561 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1562 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1563 VM_EVENT(kvm, 3, "GET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1564 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1565 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1566 VM_EVENT(kvm, 3, "GET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1567 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1568 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1569 VM_EVENT(kvm, 3, "GET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1570 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1571 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1572 VM_EVENT(kvm, 3, "GET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1573 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1574 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1575 VM_EVENT(kvm, 3, "GET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
1576 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1577 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1578 VM_EVENT(kvm, 3, "GET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1579 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1580 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1581 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1582 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1583 VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1584 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1585 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1586 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1587 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1588
1589 return 0;
1590 }
1591
kvm_s390_get_machine_subfunc(struct kvm * kvm,struct kvm_device_attr * attr)1592 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1593 struct kvm_device_attr *attr)
1594 {
1595 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1596 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1597 return -EFAULT;
1598
1599 VM_EVENT(kvm, 3, "GET: host PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1600 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1601 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1602 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1603 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1604 VM_EVENT(kvm, 3, "GET: host PTFF subfunc 0x%16.16lx.%16.16lx",
1605 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1606 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1607 VM_EVENT(kvm, 3, "GET: host KMAC subfunc 0x%16.16lx.%16.16lx",
1608 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1609 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1610 VM_EVENT(kvm, 3, "GET: host KMC subfunc 0x%16.16lx.%16.16lx",
1611 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1612 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1613 VM_EVENT(kvm, 3, "GET: host KM subfunc 0x%16.16lx.%16.16lx",
1614 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1615 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1616 VM_EVENT(kvm, 3, "GET: host KIMD subfunc 0x%16.16lx.%16.16lx",
1617 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1618 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1619 VM_EVENT(kvm, 3, "GET: host KLMD subfunc 0x%16.16lx.%16.16lx",
1620 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1621 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1622 VM_EVENT(kvm, 3, "GET: host PCKMO subfunc 0x%16.16lx.%16.16lx",
1623 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1624 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1625 VM_EVENT(kvm, 3, "GET: host KMCTR subfunc 0x%16.16lx.%16.16lx",
1626 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1627 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1628 VM_EVENT(kvm, 3, "GET: host KMF subfunc 0x%16.16lx.%16.16lx",
1629 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1630 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1631 VM_EVENT(kvm, 3, "GET: host KMO subfunc 0x%16.16lx.%16.16lx",
1632 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1633 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1634 VM_EVENT(kvm, 3, "GET: host PCC subfunc 0x%16.16lx.%16.16lx",
1635 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1636 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1637 VM_EVENT(kvm, 3, "GET: host PPNO subfunc 0x%16.16lx.%16.16lx",
1638 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1639 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1640 VM_EVENT(kvm, 3, "GET: host KMA subfunc 0x%16.16lx.%16.16lx",
1641 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1642 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1643 VM_EVENT(kvm, 3, "GET: host KDSA subfunc 0x%16.16lx.%16.16lx",
1644 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1645 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1646 VM_EVENT(kvm, 3, "GET: host SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1647 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1648 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1649 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1650 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1651 VM_EVENT(kvm, 3, "GET: host DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1652 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1653 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1654 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1655 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1656
1657 return 0;
1658 }
1659
kvm_s390_get_cpu_model(struct kvm * kvm,struct kvm_device_attr * attr)1660 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1661 {
1662 int ret = -ENXIO;
1663
1664 switch (attr->attr) {
1665 case KVM_S390_VM_CPU_PROCESSOR:
1666 ret = kvm_s390_get_processor(kvm, attr);
1667 break;
1668 case KVM_S390_VM_CPU_MACHINE:
1669 ret = kvm_s390_get_machine(kvm, attr);
1670 break;
1671 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1672 ret = kvm_s390_get_processor_feat(kvm, attr);
1673 break;
1674 case KVM_S390_VM_CPU_MACHINE_FEAT:
1675 ret = kvm_s390_get_machine_feat(kvm, attr);
1676 break;
1677 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1678 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1679 break;
1680 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1681 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1682 break;
1683 }
1684 return ret;
1685 }
1686
kvm_s390_vm_set_attr(struct kvm * kvm,struct kvm_device_attr * attr)1687 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1688 {
1689 int ret;
1690
1691 switch (attr->group) {
1692 case KVM_S390_VM_MEM_CTRL:
1693 ret = kvm_s390_set_mem_control(kvm, attr);
1694 break;
1695 case KVM_S390_VM_TOD:
1696 ret = kvm_s390_set_tod(kvm, attr);
1697 break;
1698 case KVM_S390_VM_CPU_MODEL:
1699 ret = kvm_s390_set_cpu_model(kvm, attr);
1700 break;
1701 case KVM_S390_VM_CRYPTO:
1702 ret = kvm_s390_vm_set_crypto(kvm, attr);
1703 break;
1704 case KVM_S390_VM_MIGRATION:
1705 ret = kvm_s390_vm_set_migration(kvm, attr);
1706 break;
1707 default:
1708 ret = -ENXIO;
1709 break;
1710 }
1711
1712 return ret;
1713 }
1714
kvm_s390_vm_get_attr(struct kvm * kvm,struct kvm_device_attr * attr)1715 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1716 {
1717 int ret;
1718
1719 switch (attr->group) {
1720 case KVM_S390_VM_MEM_CTRL:
1721 ret = kvm_s390_get_mem_control(kvm, attr);
1722 break;
1723 case KVM_S390_VM_TOD:
1724 ret = kvm_s390_get_tod(kvm, attr);
1725 break;
1726 case KVM_S390_VM_CPU_MODEL:
1727 ret = kvm_s390_get_cpu_model(kvm, attr);
1728 break;
1729 case KVM_S390_VM_MIGRATION:
1730 ret = kvm_s390_vm_get_migration(kvm, attr);
1731 break;
1732 default:
1733 ret = -ENXIO;
1734 break;
1735 }
1736
1737 return ret;
1738 }
1739
kvm_s390_vm_has_attr(struct kvm * kvm,struct kvm_device_attr * attr)1740 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1741 {
1742 int ret;
1743
1744 switch (attr->group) {
1745 case KVM_S390_VM_MEM_CTRL:
1746 switch (attr->attr) {
1747 case KVM_S390_VM_MEM_ENABLE_CMMA:
1748 case KVM_S390_VM_MEM_CLR_CMMA:
1749 ret = sclp.has_cmma ? 0 : -ENXIO;
1750 break;
1751 case KVM_S390_VM_MEM_LIMIT_SIZE:
1752 ret = 0;
1753 break;
1754 default:
1755 ret = -ENXIO;
1756 break;
1757 }
1758 break;
1759 case KVM_S390_VM_TOD:
1760 switch (attr->attr) {
1761 case KVM_S390_VM_TOD_LOW:
1762 case KVM_S390_VM_TOD_HIGH:
1763 ret = 0;
1764 break;
1765 default:
1766 ret = -ENXIO;
1767 break;
1768 }
1769 break;
1770 case KVM_S390_VM_CPU_MODEL:
1771 switch (attr->attr) {
1772 case KVM_S390_VM_CPU_PROCESSOR:
1773 case KVM_S390_VM_CPU_MACHINE:
1774 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1775 case KVM_S390_VM_CPU_MACHINE_FEAT:
1776 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1777 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1778 ret = 0;
1779 break;
1780 default:
1781 ret = -ENXIO;
1782 break;
1783 }
1784 break;
1785 case KVM_S390_VM_CRYPTO:
1786 switch (attr->attr) {
1787 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1788 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1789 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1790 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1791 ret = 0;
1792 break;
1793 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1794 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1795 ret = ap_instructions_available() ? 0 : -ENXIO;
1796 break;
1797 default:
1798 ret = -ENXIO;
1799 break;
1800 }
1801 break;
1802 case KVM_S390_VM_MIGRATION:
1803 ret = 0;
1804 break;
1805 default:
1806 ret = -ENXIO;
1807 break;
1808 }
1809
1810 return ret;
1811 }
1812
kvm_s390_get_skeys(struct kvm * kvm,struct kvm_s390_skeys * args)1813 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1814 {
1815 uint8_t *keys;
1816 uint64_t hva;
1817 int srcu_idx, i, r = 0;
1818
1819 if (args->flags != 0)
1820 return -EINVAL;
1821
1822 /* Is this guest using storage keys? */
1823 if (!mm_uses_skeys(current->mm))
1824 return KVM_S390_GET_SKEYS_NONE;
1825
1826 /* Enforce sane limit on memory allocation */
1827 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1828 return -EINVAL;
1829
1830 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1831 if (!keys)
1832 return -ENOMEM;
1833
1834 mmap_read_lock(current->mm);
1835 srcu_idx = srcu_read_lock(&kvm->srcu);
1836 for (i = 0; i < args->count; i++) {
1837 hva = gfn_to_hva(kvm, args->start_gfn + i);
1838 if (kvm_is_error_hva(hva)) {
1839 r = -EFAULT;
1840 break;
1841 }
1842
1843 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1844 if (r)
1845 break;
1846 }
1847 srcu_read_unlock(&kvm->srcu, srcu_idx);
1848 mmap_read_unlock(current->mm);
1849
1850 if (!r) {
1851 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1852 sizeof(uint8_t) * args->count);
1853 if (r)
1854 r = -EFAULT;
1855 }
1856
1857 kvfree(keys);
1858 return r;
1859 }
1860
kvm_s390_set_skeys(struct kvm * kvm,struct kvm_s390_skeys * args)1861 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1862 {
1863 uint8_t *keys;
1864 uint64_t hva;
1865 int srcu_idx, i, r = 0;
1866 bool unlocked;
1867
1868 if (args->flags != 0)
1869 return -EINVAL;
1870
1871 /* Enforce sane limit on memory allocation */
1872 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1873 return -EINVAL;
1874
1875 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1876 if (!keys)
1877 return -ENOMEM;
1878
1879 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1880 sizeof(uint8_t) * args->count);
1881 if (r) {
1882 r = -EFAULT;
1883 goto out;
1884 }
1885
1886 /* Enable storage key handling for the guest */
1887 r = s390_enable_skey();
1888 if (r)
1889 goto out;
1890
1891 i = 0;
1892 mmap_read_lock(current->mm);
1893 srcu_idx = srcu_read_lock(&kvm->srcu);
1894 while (i < args->count) {
1895 unlocked = false;
1896 hva = gfn_to_hva(kvm, args->start_gfn + i);
1897 if (kvm_is_error_hva(hva)) {
1898 r = -EFAULT;
1899 break;
1900 }
1901
1902 /* Lowest order bit is reserved */
1903 if (keys[i] & 0x01) {
1904 r = -EINVAL;
1905 break;
1906 }
1907
1908 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1909 if (r) {
1910 r = fixup_user_fault(current->mm, hva,
1911 FAULT_FLAG_WRITE, &unlocked);
1912 if (r)
1913 break;
1914 }
1915 if (!r)
1916 i++;
1917 }
1918 srcu_read_unlock(&kvm->srcu, srcu_idx);
1919 mmap_read_unlock(current->mm);
1920 out:
1921 kvfree(keys);
1922 return r;
1923 }
1924
1925 /*
1926 * Base address and length must be sent at the start of each block, therefore
1927 * it's cheaper to send some clean data, as long as it's less than the size of
1928 * two longs.
1929 */
1930 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1931 /* for consistency */
1932 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1933
1934 /*
1935 * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1936 * address falls in a hole. In that case the index of one of the memslots
1937 * bordering the hole is returned.
1938 */
gfn_to_memslot_approx(struct kvm_memslots * slots,gfn_t gfn)1939 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1940 {
1941 int start = 0, end = slots->used_slots;
1942 int slot = atomic_read(&slots->lru_slot);
1943 struct kvm_memory_slot *memslots = slots->memslots;
1944
1945 if (gfn >= memslots[slot].base_gfn &&
1946 gfn < memslots[slot].base_gfn + memslots[slot].npages)
1947 return slot;
1948
1949 while (start < end) {
1950 slot = start + (end - start) / 2;
1951
1952 if (gfn >= memslots[slot].base_gfn)
1953 end = slot;
1954 else
1955 start = slot + 1;
1956 }
1957
1958 if (start >= slots->used_slots)
1959 return slots->used_slots - 1;
1960
1961 if (gfn >= memslots[start].base_gfn &&
1962 gfn < memslots[start].base_gfn + memslots[start].npages) {
1963 atomic_set(&slots->lru_slot, start);
1964 }
1965
1966 return start;
1967 }
1968
kvm_s390_peek_cmma(struct kvm * kvm,struct kvm_s390_cmma_log * args,u8 * res,unsigned long bufsize)1969 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1970 u8 *res, unsigned long bufsize)
1971 {
1972 unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1973
1974 args->count = 0;
1975 while (args->count < bufsize) {
1976 hva = gfn_to_hva(kvm, cur_gfn);
1977 /*
1978 * We return an error if the first value was invalid, but we
1979 * return successfully if at least one value was copied.
1980 */
1981 if (kvm_is_error_hva(hva))
1982 return args->count ? 0 : -EFAULT;
1983 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1984 pgstev = 0;
1985 res[args->count++] = (pgstev >> 24) & 0x43;
1986 cur_gfn++;
1987 }
1988
1989 return 0;
1990 }
1991
kvm_s390_next_dirty_cmma(struct kvm_memslots * slots,unsigned long cur_gfn)1992 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1993 unsigned long cur_gfn)
1994 {
1995 int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1996 struct kvm_memory_slot *ms = slots->memslots + slotidx;
1997 unsigned long ofs = cur_gfn - ms->base_gfn;
1998
1999 if (ms->base_gfn + ms->npages <= cur_gfn) {
2000 slotidx--;
2001 /* If we are above the highest slot, wrap around */
2002 if (slotidx < 0)
2003 slotidx = slots->used_slots - 1;
2004
2005 ms = slots->memslots + slotidx;
2006 ofs = 0;
2007 }
2008 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
2009 while ((slotidx > 0) && (ofs >= ms->npages)) {
2010 slotidx--;
2011 ms = slots->memslots + slotidx;
2012 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
2013 }
2014 return ms->base_gfn + ofs;
2015 }
2016
kvm_s390_get_cmma(struct kvm * kvm,struct kvm_s390_cmma_log * args,u8 * res,unsigned long bufsize)2017 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2018 u8 *res, unsigned long bufsize)
2019 {
2020 unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2021 struct kvm_memslots *slots = kvm_memslots(kvm);
2022 struct kvm_memory_slot *ms;
2023
2024 if (unlikely(!slots->used_slots))
2025 return 0;
2026
2027 cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2028 ms = gfn_to_memslot(kvm, cur_gfn);
2029 args->count = 0;
2030 args->start_gfn = cur_gfn;
2031 if (!ms)
2032 return 0;
2033 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2034 mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2035
2036 while (args->count < bufsize) {
2037 hva = gfn_to_hva(kvm, cur_gfn);
2038 if (kvm_is_error_hva(hva))
2039 return 0;
2040 /* Decrement only if we actually flipped the bit to 0 */
2041 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2042 atomic64_dec(&kvm->arch.cmma_dirty_pages);
2043 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2044 pgstev = 0;
2045 /* Save the value */
2046 res[args->count++] = (pgstev >> 24) & 0x43;
2047 /* If the next bit is too far away, stop. */
2048 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2049 return 0;
2050 /* If we reached the previous "next", find the next one */
2051 if (cur_gfn == next_gfn)
2052 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2053 /* Reached the end of memory or of the buffer, stop */
2054 if ((next_gfn >= mem_end) ||
2055 (next_gfn - args->start_gfn >= bufsize))
2056 return 0;
2057 cur_gfn++;
2058 /* Reached the end of the current memslot, take the next one. */
2059 if (cur_gfn - ms->base_gfn >= ms->npages) {
2060 ms = gfn_to_memslot(kvm, cur_gfn);
2061 if (!ms)
2062 return 0;
2063 }
2064 }
2065 return 0;
2066 }
2067
2068 /*
2069 * This function searches for the next page with dirty CMMA attributes, and
2070 * saves the attributes in the buffer up to either the end of the buffer or
2071 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2072 * no trailing clean bytes are saved.
2073 * In case no dirty bits were found, or if CMMA was not enabled or used, the
2074 * output buffer will indicate 0 as length.
2075 */
kvm_s390_get_cmma_bits(struct kvm * kvm,struct kvm_s390_cmma_log * args)2076 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2077 struct kvm_s390_cmma_log *args)
2078 {
2079 unsigned long bufsize;
2080 int srcu_idx, peek, ret;
2081 u8 *values;
2082
2083 if (!kvm->arch.use_cmma)
2084 return -ENXIO;
2085 /* Invalid/unsupported flags were specified */
2086 if (args->flags & ~KVM_S390_CMMA_PEEK)
2087 return -EINVAL;
2088 /* Migration mode query, and we are not doing a migration */
2089 peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2090 if (!peek && !kvm->arch.migration_mode)
2091 return -EINVAL;
2092 /* CMMA is disabled or was not used, or the buffer has length zero */
2093 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2094 if (!bufsize || !kvm->mm->context.uses_cmm) {
2095 memset(args, 0, sizeof(*args));
2096 return 0;
2097 }
2098 /* We are not peeking, and there are no dirty pages */
2099 if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2100 memset(args, 0, sizeof(*args));
2101 return 0;
2102 }
2103
2104 values = vmalloc(bufsize);
2105 if (!values)
2106 return -ENOMEM;
2107
2108 mmap_read_lock(kvm->mm);
2109 srcu_idx = srcu_read_lock(&kvm->srcu);
2110 if (peek)
2111 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2112 else
2113 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2114 srcu_read_unlock(&kvm->srcu, srcu_idx);
2115 mmap_read_unlock(kvm->mm);
2116
2117 if (kvm->arch.migration_mode)
2118 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2119 else
2120 args->remaining = 0;
2121
2122 if (copy_to_user((void __user *)args->values, values, args->count))
2123 ret = -EFAULT;
2124
2125 vfree(values);
2126 return ret;
2127 }
2128
2129 /*
2130 * This function sets the CMMA attributes for the given pages. If the input
2131 * buffer has zero length, no action is taken, otherwise the attributes are
2132 * set and the mm->context.uses_cmm flag is set.
2133 */
kvm_s390_set_cmma_bits(struct kvm * kvm,const struct kvm_s390_cmma_log * args)2134 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2135 const struct kvm_s390_cmma_log *args)
2136 {
2137 unsigned long hva, mask, pgstev, i;
2138 uint8_t *bits;
2139 int srcu_idx, r = 0;
2140
2141 mask = args->mask;
2142
2143 if (!kvm->arch.use_cmma)
2144 return -ENXIO;
2145 /* invalid/unsupported flags */
2146 if (args->flags != 0)
2147 return -EINVAL;
2148 /* Enforce sane limit on memory allocation */
2149 if (args->count > KVM_S390_CMMA_SIZE_MAX)
2150 return -EINVAL;
2151 /* Nothing to do */
2152 if (args->count == 0)
2153 return 0;
2154
2155 bits = vmalloc(array_size(sizeof(*bits), args->count));
2156 if (!bits)
2157 return -ENOMEM;
2158
2159 r = copy_from_user(bits, (void __user *)args->values, args->count);
2160 if (r) {
2161 r = -EFAULT;
2162 goto out;
2163 }
2164
2165 mmap_read_lock(kvm->mm);
2166 srcu_idx = srcu_read_lock(&kvm->srcu);
2167 for (i = 0; i < args->count; i++) {
2168 hva = gfn_to_hva(kvm, args->start_gfn + i);
2169 if (kvm_is_error_hva(hva)) {
2170 r = -EFAULT;
2171 break;
2172 }
2173
2174 pgstev = bits[i];
2175 pgstev = pgstev << 24;
2176 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2177 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2178 }
2179 srcu_read_unlock(&kvm->srcu, srcu_idx);
2180 mmap_read_unlock(kvm->mm);
2181
2182 if (!kvm->mm->context.uses_cmm) {
2183 mmap_write_lock(kvm->mm);
2184 kvm->mm->context.uses_cmm = 1;
2185 mmap_write_unlock(kvm->mm);
2186 }
2187 out:
2188 vfree(bits);
2189 return r;
2190 }
2191
kvm_s390_cpus_from_pv(struct kvm * kvm,u16 * rcp,u16 * rrcp)2192 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2193 {
2194 struct kvm_vcpu *vcpu;
2195 u16 rc, rrc;
2196 int ret = 0;
2197 int i;
2198
2199 /*
2200 * We ignore failures and try to destroy as many CPUs as possible.
2201 * At the same time we must not free the assigned resources when
2202 * this fails, as the ultravisor has still access to that memory.
2203 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2204 * behind.
2205 * We want to return the first failure rc and rrc, though.
2206 */
2207 kvm_for_each_vcpu(i, vcpu, kvm) {
2208 mutex_lock(&vcpu->mutex);
2209 if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2210 *rcp = rc;
2211 *rrcp = rrc;
2212 ret = -EIO;
2213 }
2214 mutex_unlock(&vcpu->mutex);
2215 }
2216 return ret;
2217 }
2218
kvm_s390_cpus_to_pv(struct kvm * kvm,u16 * rc,u16 * rrc)2219 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2220 {
2221 int i, r = 0;
2222 u16 dummy;
2223
2224 struct kvm_vcpu *vcpu;
2225
2226 kvm_for_each_vcpu(i, vcpu, kvm) {
2227 mutex_lock(&vcpu->mutex);
2228 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2229 mutex_unlock(&vcpu->mutex);
2230 if (r)
2231 break;
2232 }
2233 if (r)
2234 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2235 return r;
2236 }
2237
kvm_s390_handle_pv(struct kvm * kvm,struct kvm_pv_cmd * cmd)2238 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2239 {
2240 int r = 0;
2241 u16 dummy;
2242 void __user *argp = (void __user *)cmd->data;
2243
2244 switch (cmd->cmd) {
2245 case KVM_PV_ENABLE: {
2246 r = -EINVAL;
2247 if (kvm_s390_pv_is_protected(kvm))
2248 break;
2249
2250 /*
2251 * FMT 4 SIE needs esca. As we never switch back to bsca from
2252 * esca, we need no cleanup in the error cases below
2253 */
2254 r = sca_switch_to_extended(kvm);
2255 if (r)
2256 break;
2257
2258 mmap_write_lock(current->mm);
2259 r = gmap_mark_unmergeable();
2260 mmap_write_unlock(current->mm);
2261 if (r)
2262 break;
2263
2264 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2265 if (r)
2266 break;
2267
2268 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2269 if (r)
2270 kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2271
2272 /* we need to block service interrupts from now on */
2273 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2274 break;
2275 }
2276 case KVM_PV_DISABLE: {
2277 r = -EINVAL;
2278 if (!kvm_s390_pv_is_protected(kvm))
2279 break;
2280
2281 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2282 /*
2283 * If a CPU could not be destroyed, destroy VM will also fail.
2284 * There is no point in trying to destroy it. Instead return
2285 * the rc and rrc from the first CPU that failed destroying.
2286 */
2287 if (r)
2288 break;
2289 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2290
2291 /* no need to block service interrupts any more */
2292 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2293 break;
2294 }
2295 case KVM_PV_SET_SEC_PARMS: {
2296 struct kvm_s390_pv_sec_parm parms = {};
2297 void *hdr;
2298
2299 r = -EINVAL;
2300 if (!kvm_s390_pv_is_protected(kvm))
2301 break;
2302
2303 r = -EFAULT;
2304 if (copy_from_user(&parms, argp, sizeof(parms)))
2305 break;
2306
2307 /* Currently restricted to 8KB */
2308 r = -EINVAL;
2309 if (parms.length > PAGE_SIZE * 2)
2310 break;
2311
2312 r = -ENOMEM;
2313 hdr = vmalloc(parms.length);
2314 if (!hdr)
2315 break;
2316
2317 r = -EFAULT;
2318 if (!copy_from_user(hdr, (void __user *)parms.origin,
2319 parms.length))
2320 r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2321 &cmd->rc, &cmd->rrc);
2322
2323 vfree(hdr);
2324 break;
2325 }
2326 case KVM_PV_UNPACK: {
2327 struct kvm_s390_pv_unp unp = {};
2328
2329 r = -EINVAL;
2330 if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2331 break;
2332
2333 r = -EFAULT;
2334 if (copy_from_user(&unp, argp, sizeof(unp)))
2335 break;
2336
2337 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2338 &cmd->rc, &cmd->rrc);
2339 break;
2340 }
2341 case KVM_PV_VERIFY: {
2342 r = -EINVAL;
2343 if (!kvm_s390_pv_is_protected(kvm))
2344 break;
2345
2346 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2347 UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2348 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2349 cmd->rrc);
2350 break;
2351 }
2352 case KVM_PV_PREP_RESET: {
2353 r = -EINVAL;
2354 if (!kvm_s390_pv_is_protected(kvm))
2355 break;
2356
2357 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2358 UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2359 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2360 cmd->rc, cmd->rrc);
2361 break;
2362 }
2363 case KVM_PV_UNSHARE_ALL: {
2364 r = -EINVAL;
2365 if (!kvm_s390_pv_is_protected(kvm))
2366 break;
2367
2368 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2369 UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2370 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2371 cmd->rc, cmd->rrc);
2372 break;
2373 }
2374 default:
2375 r = -ENOTTY;
2376 }
2377 return r;
2378 }
2379
kvm_arch_vm_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)2380 long kvm_arch_vm_ioctl(struct file *filp,
2381 unsigned int ioctl, unsigned long arg)
2382 {
2383 struct kvm *kvm = filp->private_data;
2384 void __user *argp = (void __user *)arg;
2385 struct kvm_device_attr attr;
2386 int r;
2387
2388 switch (ioctl) {
2389 case KVM_S390_INTERRUPT: {
2390 struct kvm_s390_interrupt s390int;
2391
2392 r = -EFAULT;
2393 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2394 break;
2395 r = kvm_s390_inject_vm(kvm, &s390int);
2396 break;
2397 }
2398 case KVM_CREATE_IRQCHIP: {
2399 struct kvm_irq_routing_entry routing;
2400
2401 r = -EINVAL;
2402 if (kvm->arch.use_irqchip) {
2403 /* Set up dummy routing. */
2404 memset(&routing, 0, sizeof(routing));
2405 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2406 }
2407 break;
2408 }
2409 case KVM_SET_DEVICE_ATTR: {
2410 r = -EFAULT;
2411 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2412 break;
2413 r = kvm_s390_vm_set_attr(kvm, &attr);
2414 break;
2415 }
2416 case KVM_GET_DEVICE_ATTR: {
2417 r = -EFAULT;
2418 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2419 break;
2420 r = kvm_s390_vm_get_attr(kvm, &attr);
2421 break;
2422 }
2423 case KVM_HAS_DEVICE_ATTR: {
2424 r = -EFAULT;
2425 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2426 break;
2427 r = kvm_s390_vm_has_attr(kvm, &attr);
2428 break;
2429 }
2430 case KVM_S390_GET_SKEYS: {
2431 struct kvm_s390_skeys args;
2432
2433 r = -EFAULT;
2434 if (copy_from_user(&args, argp,
2435 sizeof(struct kvm_s390_skeys)))
2436 break;
2437 r = kvm_s390_get_skeys(kvm, &args);
2438 break;
2439 }
2440 case KVM_S390_SET_SKEYS: {
2441 struct kvm_s390_skeys args;
2442
2443 r = -EFAULT;
2444 if (copy_from_user(&args, argp,
2445 sizeof(struct kvm_s390_skeys)))
2446 break;
2447 r = kvm_s390_set_skeys(kvm, &args);
2448 break;
2449 }
2450 case KVM_S390_GET_CMMA_BITS: {
2451 struct kvm_s390_cmma_log args;
2452
2453 r = -EFAULT;
2454 if (copy_from_user(&args, argp, sizeof(args)))
2455 break;
2456 mutex_lock(&kvm->slots_lock);
2457 r = kvm_s390_get_cmma_bits(kvm, &args);
2458 mutex_unlock(&kvm->slots_lock);
2459 if (!r) {
2460 r = copy_to_user(argp, &args, sizeof(args));
2461 if (r)
2462 r = -EFAULT;
2463 }
2464 break;
2465 }
2466 case KVM_S390_SET_CMMA_BITS: {
2467 struct kvm_s390_cmma_log args;
2468
2469 r = -EFAULT;
2470 if (copy_from_user(&args, argp, sizeof(args)))
2471 break;
2472 mutex_lock(&kvm->slots_lock);
2473 r = kvm_s390_set_cmma_bits(kvm, &args);
2474 mutex_unlock(&kvm->slots_lock);
2475 break;
2476 }
2477 case KVM_S390_PV_COMMAND: {
2478 struct kvm_pv_cmd args;
2479
2480 /* protvirt means user sigp */
2481 kvm->arch.user_cpu_state_ctrl = 1;
2482 r = 0;
2483 if (!is_prot_virt_host()) {
2484 r = -EINVAL;
2485 break;
2486 }
2487 if (copy_from_user(&args, argp, sizeof(args))) {
2488 r = -EFAULT;
2489 break;
2490 }
2491 if (args.flags) {
2492 r = -EINVAL;
2493 break;
2494 }
2495 mutex_lock(&kvm->lock);
2496 r = kvm_s390_handle_pv(kvm, &args);
2497 mutex_unlock(&kvm->lock);
2498 if (copy_to_user(argp, &args, sizeof(args))) {
2499 r = -EFAULT;
2500 break;
2501 }
2502 break;
2503 }
2504 default:
2505 r = -ENOTTY;
2506 }
2507
2508 return r;
2509 }
2510
kvm_s390_apxa_installed(void)2511 static int kvm_s390_apxa_installed(void)
2512 {
2513 struct ap_config_info info;
2514
2515 if (ap_instructions_available()) {
2516 if (ap_qci(&info) == 0)
2517 return info.apxa;
2518 }
2519
2520 return 0;
2521 }
2522
2523 /*
2524 * The format of the crypto control block (CRYCB) is specified in the 3 low
2525 * order bits of the CRYCB designation (CRYCBD) field as follows:
2526 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2527 * AP extended addressing (APXA) facility are installed.
2528 * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2529 * Format 2: Both the APXA and MSAX3 facilities are installed
2530 */
kvm_s390_set_crycb_format(struct kvm * kvm)2531 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2532 {
2533 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2534
2535 /* Clear the CRYCB format bits - i.e., set format 0 by default */
2536 kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2537
2538 /* Check whether MSAX3 is installed */
2539 if (!test_kvm_facility(kvm, 76))
2540 return;
2541
2542 if (kvm_s390_apxa_installed())
2543 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2544 else
2545 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2546 }
2547
kvm_arch_crypto_set_masks(struct kvm * kvm,unsigned long * apm,unsigned long * aqm,unsigned long * adm)2548 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2549 unsigned long *aqm, unsigned long *adm)
2550 {
2551 struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2552
2553 mutex_lock(&kvm->lock);
2554 kvm_s390_vcpu_block_all(kvm);
2555
2556 switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2557 case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2558 memcpy(crycb->apcb1.apm, apm, 32);
2559 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2560 apm[0], apm[1], apm[2], apm[3]);
2561 memcpy(crycb->apcb1.aqm, aqm, 32);
2562 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2563 aqm[0], aqm[1], aqm[2], aqm[3]);
2564 memcpy(crycb->apcb1.adm, adm, 32);
2565 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2566 adm[0], adm[1], adm[2], adm[3]);
2567 break;
2568 case CRYCB_FORMAT1:
2569 case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2570 memcpy(crycb->apcb0.apm, apm, 8);
2571 memcpy(crycb->apcb0.aqm, aqm, 2);
2572 memcpy(crycb->apcb0.adm, adm, 2);
2573 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2574 apm[0], *((unsigned short *)aqm),
2575 *((unsigned short *)adm));
2576 break;
2577 default: /* Can not happen */
2578 break;
2579 }
2580
2581 /* recreate the shadow crycb for each vcpu */
2582 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2583 kvm_s390_vcpu_unblock_all(kvm);
2584 mutex_unlock(&kvm->lock);
2585 }
2586 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2587
kvm_arch_crypto_clear_masks(struct kvm * kvm)2588 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2589 {
2590 mutex_lock(&kvm->lock);
2591 kvm_s390_vcpu_block_all(kvm);
2592
2593 memset(&kvm->arch.crypto.crycb->apcb0, 0,
2594 sizeof(kvm->arch.crypto.crycb->apcb0));
2595 memset(&kvm->arch.crypto.crycb->apcb1, 0,
2596 sizeof(kvm->arch.crypto.crycb->apcb1));
2597
2598 VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2599 /* recreate the shadow crycb for each vcpu */
2600 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2601 kvm_s390_vcpu_unblock_all(kvm);
2602 mutex_unlock(&kvm->lock);
2603 }
2604 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2605
kvm_s390_get_initial_cpuid(void)2606 static u64 kvm_s390_get_initial_cpuid(void)
2607 {
2608 struct cpuid cpuid;
2609
2610 get_cpu_id(&cpuid);
2611 cpuid.version = 0xff;
2612 return *((u64 *) &cpuid);
2613 }
2614
kvm_s390_crypto_init(struct kvm * kvm)2615 static void kvm_s390_crypto_init(struct kvm *kvm)
2616 {
2617 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2618 kvm_s390_set_crycb_format(kvm);
2619
2620 if (!test_kvm_facility(kvm, 76))
2621 return;
2622
2623 /* Enable AES/DEA protected key functions by default */
2624 kvm->arch.crypto.aes_kw = 1;
2625 kvm->arch.crypto.dea_kw = 1;
2626 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2627 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2628 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2629 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2630 }
2631
sca_dispose(struct kvm * kvm)2632 static void sca_dispose(struct kvm *kvm)
2633 {
2634 if (kvm->arch.use_esca)
2635 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2636 else
2637 free_page((unsigned long)(kvm->arch.sca));
2638 kvm->arch.sca = NULL;
2639 }
2640
kvm_arch_init_vm(struct kvm * kvm,unsigned long type)2641 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2642 {
2643 gfp_t alloc_flags = GFP_KERNEL;
2644 int i, rc;
2645 char debug_name[16];
2646 static unsigned long sca_offset;
2647
2648 rc = -EINVAL;
2649 #ifdef CONFIG_KVM_S390_UCONTROL
2650 if (type & ~KVM_VM_S390_UCONTROL)
2651 goto out_err;
2652 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2653 goto out_err;
2654 #else
2655 if (type)
2656 goto out_err;
2657 #endif
2658
2659 rc = s390_enable_sie();
2660 if (rc)
2661 goto out_err;
2662
2663 rc = -ENOMEM;
2664
2665 if (!sclp.has_64bscao)
2666 alloc_flags |= GFP_DMA;
2667 rwlock_init(&kvm->arch.sca_lock);
2668 /* start with basic SCA */
2669 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2670 if (!kvm->arch.sca)
2671 goto out_err;
2672 mutex_lock(&kvm_lock);
2673 sca_offset += 16;
2674 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2675 sca_offset = 0;
2676 kvm->arch.sca = (struct bsca_block *)
2677 ((char *) kvm->arch.sca + sca_offset);
2678 mutex_unlock(&kvm_lock);
2679
2680 sprintf(debug_name, "kvm-%u", current->pid);
2681
2682 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2683 if (!kvm->arch.dbf)
2684 goto out_err;
2685
2686 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2687 kvm->arch.sie_page2 =
2688 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2689 if (!kvm->arch.sie_page2)
2690 goto out_err;
2691
2692 kvm->arch.sie_page2->kvm = kvm;
2693 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2694
2695 for (i = 0; i < kvm_s390_fac_size(); i++) {
2696 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2697 (kvm_s390_fac_base[i] |
2698 kvm_s390_fac_ext[i]);
2699 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2700 kvm_s390_fac_base[i];
2701 }
2702 kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2703
2704 /* we are always in czam mode - even on pre z14 machines */
2705 set_kvm_facility(kvm->arch.model.fac_mask, 138);
2706 set_kvm_facility(kvm->arch.model.fac_list, 138);
2707 /* we emulate STHYI in kvm */
2708 set_kvm_facility(kvm->arch.model.fac_mask, 74);
2709 set_kvm_facility(kvm->arch.model.fac_list, 74);
2710 if (MACHINE_HAS_TLB_GUEST) {
2711 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2712 set_kvm_facility(kvm->arch.model.fac_list, 147);
2713 }
2714
2715 if (css_general_characteristics.aiv && test_facility(65))
2716 set_kvm_facility(kvm->arch.model.fac_mask, 65);
2717
2718 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2719 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2720
2721 kvm_s390_crypto_init(kvm);
2722
2723 mutex_init(&kvm->arch.float_int.ais_lock);
2724 spin_lock_init(&kvm->arch.float_int.lock);
2725 for (i = 0; i < FIRQ_LIST_COUNT; i++)
2726 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2727 init_waitqueue_head(&kvm->arch.ipte_wq);
2728 mutex_init(&kvm->arch.ipte_mutex);
2729
2730 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2731 VM_EVENT(kvm, 3, "vm created with type %lu", type);
2732
2733 if (type & KVM_VM_S390_UCONTROL) {
2734 kvm->arch.gmap = NULL;
2735 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2736 } else {
2737 if (sclp.hamax == U64_MAX)
2738 kvm->arch.mem_limit = TASK_SIZE_MAX;
2739 else
2740 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2741 sclp.hamax + 1);
2742 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2743 if (!kvm->arch.gmap)
2744 goto out_err;
2745 kvm->arch.gmap->private = kvm;
2746 kvm->arch.gmap->pfault_enabled = 0;
2747 }
2748
2749 kvm->arch.use_pfmfi = sclp.has_pfmfi;
2750 kvm->arch.use_skf = sclp.has_skey;
2751 spin_lock_init(&kvm->arch.start_stop_lock);
2752 kvm_s390_vsie_init(kvm);
2753 if (use_gisa)
2754 kvm_s390_gisa_init(kvm);
2755 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2756
2757 return 0;
2758 out_err:
2759 free_page((unsigned long)kvm->arch.sie_page2);
2760 debug_unregister(kvm->arch.dbf);
2761 sca_dispose(kvm);
2762 KVM_EVENT(3, "creation of vm failed: %d", rc);
2763 return rc;
2764 }
2765
kvm_arch_vcpu_destroy(struct kvm_vcpu * vcpu)2766 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2767 {
2768 u16 rc, rrc;
2769
2770 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2771 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2772 kvm_s390_clear_local_irqs(vcpu);
2773 kvm_clear_async_pf_completion_queue(vcpu);
2774 if (!kvm_is_ucontrol(vcpu->kvm))
2775 sca_del_vcpu(vcpu);
2776
2777 if (kvm_is_ucontrol(vcpu->kvm))
2778 gmap_remove(vcpu->arch.gmap);
2779
2780 if (vcpu->kvm->arch.use_cmma)
2781 kvm_s390_vcpu_unsetup_cmma(vcpu);
2782 /* We can not hold the vcpu mutex here, we are already dying */
2783 if (kvm_s390_pv_cpu_get_handle(vcpu))
2784 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2785 free_page((unsigned long)(vcpu->arch.sie_block));
2786 }
2787
kvm_free_vcpus(struct kvm * kvm)2788 static void kvm_free_vcpus(struct kvm *kvm)
2789 {
2790 unsigned int i;
2791 struct kvm_vcpu *vcpu;
2792
2793 kvm_for_each_vcpu(i, vcpu, kvm)
2794 kvm_vcpu_destroy(vcpu);
2795
2796 mutex_lock(&kvm->lock);
2797 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2798 kvm->vcpus[i] = NULL;
2799
2800 atomic_set(&kvm->online_vcpus, 0);
2801 mutex_unlock(&kvm->lock);
2802 }
2803
kvm_arch_destroy_vm(struct kvm * kvm)2804 void kvm_arch_destroy_vm(struct kvm *kvm)
2805 {
2806 u16 rc, rrc;
2807
2808 kvm_free_vcpus(kvm);
2809 sca_dispose(kvm);
2810 kvm_s390_gisa_destroy(kvm);
2811 /*
2812 * We are already at the end of life and kvm->lock is not taken.
2813 * This is ok as the file descriptor is closed by now and nobody
2814 * can mess with the pv state. To avoid lockdep_assert_held from
2815 * complaining we do not use kvm_s390_pv_is_protected.
2816 */
2817 if (kvm_s390_pv_get_handle(kvm))
2818 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2819 debug_unregister(kvm->arch.dbf);
2820 free_page((unsigned long)kvm->arch.sie_page2);
2821 if (!kvm_is_ucontrol(kvm))
2822 gmap_remove(kvm->arch.gmap);
2823 kvm_s390_destroy_adapters(kvm);
2824 kvm_s390_clear_float_irqs(kvm);
2825 kvm_s390_vsie_destroy(kvm);
2826 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2827 }
2828
2829 /* Section: vcpu related */
__kvm_ucontrol_vcpu_init(struct kvm_vcpu * vcpu)2830 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2831 {
2832 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2833 if (!vcpu->arch.gmap)
2834 return -ENOMEM;
2835 vcpu->arch.gmap->private = vcpu->kvm;
2836
2837 return 0;
2838 }
2839
sca_del_vcpu(struct kvm_vcpu * vcpu)2840 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2841 {
2842 if (!kvm_s390_use_sca_entries())
2843 return;
2844 read_lock(&vcpu->kvm->arch.sca_lock);
2845 if (vcpu->kvm->arch.use_esca) {
2846 struct esca_block *sca = vcpu->kvm->arch.sca;
2847
2848 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2849 sca->cpu[vcpu->vcpu_id].sda = 0;
2850 } else {
2851 struct bsca_block *sca = vcpu->kvm->arch.sca;
2852
2853 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2854 sca->cpu[vcpu->vcpu_id].sda = 0;
2855 }
2856 read_unlock(&vcpu->kvm->arch.sca_lock);
2857 }
2858
sca_add_vcpu(struct kvm_vcpu * vcpu)2859 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2860 {
2861 if (!kvm_s390_use_sca_entries()) {
2862 struct bsca_block *sca = vcpu->kvm->arch.sca;
2863
2864 /* we still need the basic sca for the ipte control */
2865 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2866 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2867 return;
2868 }
2869 read_lock(&vcpu->kvm->arch.sca_lock);
2870 if (vcpu->kvm->arch.use_esca) {
2871 struct esca_block *sca = vcpu->kvm->arch.sca;
2872
2873 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2874 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2875 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2876 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2877 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2878 } else {
2879 struct bsca_block *sca = vcpu->kvm->arch.sca;
2880
2881 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2882 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2883 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2884 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2885 }
2886 read_unlock(&vcpu->kvm->arch.sca_lock);
2887 }
2888
2889 /* Basic SCA to Extended SCA data copy routines */
sca_copy_entry(struct esca_entry * d,struct bsca_entry * s)2890 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2891 {
2892 d->sda = s->sda;
2893 d->sigp_ctrl.c = s->sigp_ctrl.c;
2894 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2895 }
2896
sca_copy_b_to_e(struct esca_block * d,struct bsca_block * s)2897 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2898 {
2899 int i;
2900
2901 d->ipte_control = s->ipte_control;
2902 d->mcn[0] = s->mcn;
2903 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2904 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2905 }
2906
sca_switch_to_extended(struct kvm * kvm)2907 static int sca_switch_to_extended(struct kvm *kvm)
2908 {
2909 struct bsca_block *old_sca = kvm->arch.sca;
2910 struct esca_block *new_sca;
2911 struct kvm_vcpu *vcpu;
2912 unsigned int vcpu_idx;
2913 u32 scaol, scaoh;
2914
2915 if (kvm->arch.use_esca)
2916 return 0;
2917
2918 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2919 if (!new_sca)
2920 return -ENOMEM;
2921
2922 scaoh = (u32)((u64)(new_sca) >> 32);
2923 scaol = (u32)(u64)(new_sca) & ~0x3fU;
2924
2925 kvm_s390_vcpu_block_all(kvm);
2926 write_lock(&kvm->arch.sca_lock);
2927
2928 sca_copy_b_to_e(new_sca, old_sca);
2929
2930 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2931 vcpu->arch.sie_block->scaoh = scaoh;
2932 vcpu->arch.sie_block->scaol = scaol;
2933 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2934 }
2935 kvm->arch.sca = new_sca;
2936 kvm->arch.use_esca = 1;
2937
2938 write_unlock(&kvm->arch.sca_lock);
2939 kvm_s390_vcpu_unblock_all(kvm);
2940
2941 free_page((unsigned long)old_sca);
2942
2943 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2944 old_sca, kvm->arch.sca);
2945 return 0;
2946 }
2947
sca_can_add_vcpu(struct kvm * kvm,unsigned int id)2948 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2949 {
2950 int rc;
2951
2952 if (!kvm_s390_use_sca_entries()) {
2953 if (id < KVM_MAX_VCPUS)
2954 return true;
2955 return false;
2956 }
2957 if (id < KVM_S390_BSCA_CPU_SLOTS)
2958 return true;
2959 if (!sclp.has_esca || !sclp.has_64bscao)
2960 return false;
2961
2962 mutex_lock(&kvm->lock);
2963 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2964 mutex_unlock(&kvm->lock);
2965
2966 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2967 }
2968
2969 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__start_cpu_timer_accounting(struct kvm_vcpu * vcpu)2970 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2971 {
2972 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2973 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2974 vcpu->arch.cputm_start = get_tod_clock_fast();
2975 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2976 }
2977
2978 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__stop_cpu_timer_accounting(struct kvm_vcpu * vcpu)2979 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2980 {
2981 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2982 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2983 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2984 vcpu->arch.cputm_start = 0;
2985 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2986 }
2987
2988 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__enable_cpu_timer_accounting(struct kvm_vcpu * vcpu)2989 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2990 {
2991 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2992 vcpu->arch.cputm_enabled = true;
2993 __start_cpu_timer_accounting(vcpu);
2994 }
2995
2996 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__disable_cpu_timer_accounting(struct kvm_vcpu * vcpu)2997 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2998 {
2999 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3000 __stop_cpu_timer_accounting(vcpu);
3001 vcpu->arch.cputm_enabled = false;
3002 }
3003
enable_cpu_timer_accounting(struct kvm_vcpu * vcpu)3004 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3005 {
3006 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3007 __enable_cpu_timer_accounting(vcpu);
3008 preempt_enable();
3009 }
3010
disable_cpu_timer_accounting(struct kvm_vcpu * vcpu)3011 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3012 {
3013 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3014 __disable_cpu_timer_accounting(vcpu);
3015 preempt_enable();
3016 }
3017
3018 /* set the cpu timer - may only be called from the VCPU thread itself */
kvm_s390_set_cpu_timer(struct kvm_vcpu * vcpu,__u64 cputm)3019 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3020 {
3021 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3022 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3023 if (vcpu->arch.cputm_enabled)
3024 vcpu->arch.cputm_start = get_tod_clock_fast();
3025 vcpu->arch.sie_block->cputm = cputm;
3026 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3027 preempt_enable();
3028 }
3029
3030 /* update and get the cpu timer - can also be called from other VCPU threads */
kvm_s390_get_cpu_timer(struct kvm_vcpu * vcpu)3031 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3032 {
3033 unsigned int seq;
3034 __u64 value;
3035
3036 if (unlikely(!vcpu->arch.cputm_enabled))
3037 return vcpu->arch.sie_block->cputm;
3038
3039 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3040 do {
3041 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3042 /*
3043 * If the writer would ever execute a read in the critical
3044 * section, e.g. in irq context, we have a deadlock.
3045 */
3046 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3047 value = vcpu->arch.sie_block->cputm;
3048 /* if cputm_start is 0, accounting is being started/stopped */
3049 if (likely(vcpu->arch.cputm_start))
3050 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3051 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3052 preempt_enable();
3053 return value;
3054 }
3055
kvm_arch_vcpu_load(struct kvm_vcpu * vcpu,int cpu)3056 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3057 {
3058
3059 gmap_enable(vcpu->arch.enabled_gmap);
3060 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3061 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3062 __start_cpu_timer_accounting(vcpu);
3063 vcpu->cpu = cpu;
3064 }
3065
kvm_arch_vcpu_put(struct kvm_vcpu * vcpu)3066 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3067 {
3068 vcpu->cpu = -1;
3069 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3070 __stop_cpu_timer_accounting(vcpu);
3071 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3072 vcpu->arch.enabled_gmap = gmap_get_enabled();
3073 gmap_disable(vcpu->arch.enabled_gmap);
3074
3075 }
3076
kvm_arch_vcpu_postcreate(struct kvm_vcpu * vcpu)3077 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3078 {
3079 mutex_lock(&vcpu->kvm->lock);
3080 preempt_disable();
3081 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3082 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3083 preempt_enable();
3084 mutex_unlock(&vcpu->kvm->lock);
3085 if (!kvm_is_ucontrol(vcpu->kvm)) {
3086 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3087 sca_add_vcpu(vcpu);
3088 }
3089 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3090 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3091 /* make vcpu_load load the right gmap on the first trigger */
3092 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3093 }
3094
kvm_has_pckmo_subfunc(struct kvm * kvm,unsigned long nr)3095 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3096 {
3097 if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3098 test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3099 return true;
3100 return false;
3101 }
3102
kvm_has_pckmo_ecc(struct kvm * kvm)3103 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3104 {
3105 /* At least one ECC subfunction must be present */
3106 return kvm_has_pckmo_subfunc(kvm, 32) ||
3107 kvm_has_pckmo_subfunc(kvm, 33) ||
3108 kvm_has_pckmo_subfunc(kvm, 34) ||
3109 kvm_has_pckmo_subfunc(kvm, 40) ||
3110 kvm_has_pckmo_subfunc(kvm, 41);
3111
3112 }
3113
kvm_s390_vcpu_crypto_setup(struct kvm_vcpu * vcpu)3114 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3115 {
3116 /*
3117 * If the AP instructions are not being interpreted and the MSAX3
3118 * facility is not configured for the guest, there is nothing to set up.
3119 */
3120 if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3121 return;
3122
3123 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3124 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3125 vcpu->arch.sie_block->eca &= ~ECA_APIE;
3126 vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3127
3128 if (vcpu->kvm->arch.crypto.apie)
3129 vcpu->arch.sie_block->eca |= ECA_APIE;
3130
3131 /* Set up protected key support */
3132 if (vcpu->kvm->arch.crypto.aes_kw) {
3133 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3134 /* ecc is also wrapped with AES key */
3135 if (kvm_has_pckmo_ecc(vcpu->kvm))
3136 vcpu->arch.sie_block->ecd |= ECD_ECC;
3137 }
3138
3139 if (vcpu->kvm->arch.crypto.dea_kw)
3140 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3141 }
3142
kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu * vcpu)3143 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3144 {
3145 free_page(vcpu->arch.sie_block->cbrlo);
3146 vcpu->arch.sie_block->cbrlo = 0;
3147 }
3148
kvm_s390_vcpu_setup_cmma(struct kvm_vcpu * vcpu)3149 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3150 {
3151 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
3152 if (!vcpu->arch.sie_block->cbrlo)
3153 return -ENOMEM;
3154 return 0;
3155 }
3156
kvm_s390_vcpu_setup_model(struct kvm_vcpu * vcpu)3157 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3158 {
3159 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3160
3161 vcpu->arch.sie_block->ibc = model->ibc;
3162 if (test_kvm_facility(vcpu->kvm, 7))
3163 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3164 }
3165
kvm_s390_vcpu_setup(struct kvm_vcpu * vcpu)3166 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3167 {
3168 int rc = 0;
3169 u16 uvrc, uvrrc;
3170
3171 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3172 CPUSTAT_SM |
3173 CPUSTAT_STOPPED);
3174
3175 if (test_kvm_facility(vcpu->kvm, 78))
3176 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3177 else if (test_kvm_facility(vcpu->kvm, 8))
3178 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3179
3180 kvm_s390_vcpu_setup_model(vcpu);
3181
3182 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3183 if (MACHINE_HAS_ESOP)
3184 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3185 if (test_kvm_facility(vcpu->kvm, 9))
3186 vcpu->arch.sie_block->ecb |= ECB_SRSI;
3187 if (test_kvm_facility(vcpu->kvm, 73))
3188 vcpu->arch.sie_block->ecb |= ECB_TE;
3189
3190 if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3191 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3192 if (test_kvm_facility(vcpu->kvm, 130))
3193 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3194 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3195 if (sclp.has_cei)
3196 vcpu->arch.sie_block->eca |= ECA_CEI;
3197 if (sclp.has_ib)
3198 vcpu->arch.sie_block->eca |= ECA_IB;
3199 if (sclp.has_siif)
3200 vcpu->arch.sie_block->eca |= ECA_SII;
3201 if (sclp.has_sigpif)
3202 vcpu->arch.sie_block->eca |= ECA_SIGPI;
3203 if (test_kvm_facility(vcpu->kvm, 129)) {
3204 vcpu->arch.sie_block->eca |= ECA_VX;
3205 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3206 }
3207 if (test_kvm_facility(vcpu->kvm, 139))
3208 vcpu->arch.sie_block->ecd |= ECD_MEF;
3209 if (test_kvm_facility(vcpu->kvm, 156))
3210 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3211 if (vcpu->arch.sie_block->gd) {
3212 vcpu->arch.sie_block->eca |= ECA_AIV;
3213 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3214 vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3215 }
3216 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3217 | SDNXC;
3218 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3219
3220 if (sclp.has_kss)
3221 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3222 else
3223 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3224
3225 if (vcpu->kvm->arch.use_cmma) {
3226 rc = kvm_s390_vcpu_setup_cmma(vcpu);
3227 if (rc)
3228 return rc;
3229 }
3230 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3231 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3232
3233 vcpu->arch.sie_block->hpid = HPID_KVM;
3234
3235 kvm_s390_vcpu_crypto_setup(vcpu);
3236
3237 mutex_lock(&vcpu->kvm->lock);
3238 if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3239 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3240 if (rc)
3241 kvm_s390_vcpu_unsetup_cmma(vcpu);
3242 }
3243 mutex_unlock(&vcpu->kvm->lock);
3244
3245 return rc;
3246 }
3247
kvm_arch_vcpu_precreate(struct kvm * kvm,unsigned int id)3248 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3249 {
3250 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3251 return -EINVAL;
3252 return 0;
3253 }
3254
kvm_arch_vcpu_create(struct kvm_vcpu * vcpu)3255 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3256 {
3257 struct sie_page *sie_page;
3258 int rc;
3259
3260 BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3261 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
3262 if (!sie_page)
3263 return -ENOMEM;
3264
3265 vcpu->arch.sie_block = &sie_page->sie_block;
3266 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3267
3268 /* the real guest size will always be smaller than msl */
3269 vcpu->arch.sie_block->mso = 0;
3270 vcpu->arch.sie_block->msl = sclp.hamax;
3271
3272 vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3273 spin_lock_init(&vcpu->arch.local_int.lock);
3274 vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3275 if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3276 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3277 seqcount_init(&vcpu->arch.cputm_seqcount);
3278
3279 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3280 kvm_clear_async_pf_completion_queue(vcpu);
3281 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3282 KVM_SYNC_GPRS |
3283 KVM_SYNC_ACRS |
3284 KVM_SYNC_CRS |
3285 KVM_SYNC_ARCH0 |
3286 KVM_SYNC_PFAULT |
3287 KVM_SYNC_DIAG318;
3288 kvm_s390_set_prefix(vcpu, 0);
3289 if (test_kvm_facility(vcpu->kvm, 64))
3290 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3291 if (test_kvm_facility(vcpu->kvm, 82))
3292 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3293 if (test_kvm_facility(vcpu->kvm, 133))
3294 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3295 if (test_kvm_facility(vcpu->kvm, 156))
3296 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3297 /* fprs can be synchronized via vrs, even if the guest has no vx. With
3298 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3299 */
3300 if (MACHINE_HAS_VX)
3301 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3302 else
3303 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3304
3305 if (kvm_is_ucontrol(vcpu->kvm)) {
3306 rc = __kvm_ucontrol_vcpu_init(vcpu);
3307 if (rc)
3308 goto out_free_sie_block;
3309 }
3310
3311 VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3312 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3313 trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3314
3315 rc = kvm_s390_vcpu_setup(vcpu);
3316 if (rc)
3317 goto out_ucontrol_uninit;
3318 return 0;
3319
3320 out_ucontrol_uninit:
3321 if (kvm_is_ucontrol(vcpu->kvm))
3322 gmap_remove(vcpu->arch.gmap);
3323 out_free_sie_block:
3324 free_page((unsigned long)(vcpu->arch.sie_block));
3325 return rc;
3326 }
3327
kvm_arch_vcpu_runnable(struct kvm_vcpu * vcpu)3328 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3329 {
3330 clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
3331 return kvm_s390_vcpu_has_irq(vcpu, 0);
3332 }
3333
kvm_arch_vcpu_in_kernel(struct kvm_vcpu * vcpu)3334 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3335 {
3336 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3337 }
3338
kvm_s390_vcpu_block(struct kvm_vcpu * vcpu)3339 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3340 {
3341 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3342 exit_sie(vcpu);
3343 }
3344
kvm_s390_vcpu_unblock(struct kvm_vcpu * vcpu)3345 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3346 {
3347 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3348 }
3349
kvm_s390_vcpu_request(struct kvm_vcpu * vcpu)3350 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3351 {
3352 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3353 exit_sie(vcpu);
3354 }
3355
kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu * vcpu)3356 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3357 {
3358 return atomic_read(&vcpu->arch.sie_block->prog20) &
3359 (PROG_BLOCK_SIE | PROG_REQUEST);
3360 }
3361
kvm_s390_vcpu_request_handled(struct kvm_vcpu * vcpu)3362 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3363 {
3364 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3365 }
3366
3367 /*
3368 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3369 * If the CPU is not running (e.g. waiting as idle) the function will
3370 * return immediately. */
exit_sie(struct kvm_vcpu * vcpu)3371 void exit_sie(struct kvm_vcpu *vcpu)
3372 {
3373 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3374 kvm_s390_vsie_kick(vcpu);
3375 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3376 cpu_relax();
3377 }
3378
3379 /* Kick a guest cpu out of SIE to process a request synchronously */
kvm_s390_sync_request(int req,struct kvm_vcpu * vcpu)3380 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3381 {
3382 kvm_make_request(req, vcpu);
3383 kvm_s390_vcpu_request(vcpu);
3384 }
3385
kvm_gmap_notifier(struct gmap * gmap,unsigned long start,unsigned long end)3386 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3387 unsigned long end)
3388 {
3389 struct kvm *kvm = gmap->private;
3390 struct kvm_vcpu *vcpu;
3391 unsigned long prefix;
3392 int i;
3393
3394 if (gmap_is_shadow(gmap))
3395 return;
3396 if (start >= 1UL << 31)
3397 /* We are only interested in prefix pages */
3398 return;
3399 kvm_for_each_vcpu(i, vcpu, kvm) {
3400 /* match against both prefix pages */
3401 prefix = kvm_s390_get_prefix(vcpu);
3402 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3403 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3404 start, end);
3405 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3406 }
3407 }
3408 }
3409
kvm_arch_no_poll(struct kvm_vcpu * vcpu)3410 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3411 {
3412 /* do not poll with more than halt_poll_max_steal percent of steal time */
3413 if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3414 halt_poll_max_steal) {
3415 vcpu->stat.halt_no_poll_steal++;
3416 return true;
3417 }
3418 return false;
3419 }
3420
kvm_arch_vcpu_should_kick(struct kvm_vcpu * vcpu)3421 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3422 {
3423 /* kvm common code refers to this, but never calls it */
3424 BUG();
3425 return 0;
3426 }
3427
kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu * vcpu,struct kvm_one_reg * reg)3428 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3429 struct kvm_one_reg *reg)
3430 {
3431 int r = -EINVAL;
3432
3433 switch (reg->id) {
3434 case KVM_REG_S390_TODPR:
3435 r = put_user(vcpu->arch.sie_block->todpr,
3436 (u32 __user *)reg->addr);
3437 break;
3438 case KVM_REG_S390_EPOCHDIFF:
3439 r = put_user(vcpu->arch.sie_block->epoch,
3440 (u64 __user *)reg->addr);
3441 break;
3442 case KVM_REG_S390_CPU_TIMER:
3443 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3444 (u64 __user *)reg->addr);
3445 break;
3446 case KVM_REG_S390_CLOCK_COMP:
3447 r = put_user(vcpu->arch.sie_block->ckc,
3448 (u64 __user *)reg->addr);
3449 break;
3450 case KVM_REG_S390_PFTOKEN:
3451 r = put_user(vcpu->arch.pfault_token,
3452 (u64 __user *)reg->addr);
3453 break;
3454 case KVM_REG_S390_PFCOMPARE:
3455 r = put_user(vcpu->arch.pfault_compare,
3456 (u64 __user *)reg->addr);
3457 break;
3458 case KVM_REG_S390_PFSELECT:
3459 r = put_user(vcpu->arch.pfault_select,
3460 (u64 __user *)reg->addr);
3461 break;
3462 case KVM_REG_S390_PP:
3463 r = put_user(vcpu->arch.sie_block->pp,
3464 (u64 __user *)reg->addr);
3465 break;
3466 case KVM_REG_S390_GBEA:
3467 r = put_user(vcpu->arch.sie_block->gbea,
3468 (u64 __user *)reg->addr);
3469 break;
3470 default:
3471 break;
3472 }
3473
3474 return r;
3475 }
3476
kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu * vcpu,struct kvm_one_reg * reg)3477 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3478 struct kvm_one_reg *reg)
3479 {
3480 int r = -EINVAL;
3481 __u64 val;
3482
3483 switch (reg->id) {
3484 case KVM_REG_S390_TODPR:
3485 r = get_user(vcpu->arch.sie_block->todpr,
3486 (u32 __user *)reg->addr);
3487 break;
3488 case KVM_REG_S390_EPOCHDIFF:
3489 r = get_user(vcpu->arch.sie_block->epoch,
3490 (u64 __user *)reg->addr);
3491 break;
3492 case KVM_REG_S390_CPU_TIMER:
3493 r = get_user(val, (u64 __user *)reg->addr);
3494 if (!r)
3495 kvm_s390_set_cpu_timer(vcpu, val);
3496 break;
3497 case KVM_REG_S390_CLOCK_COMP:
3498 r = get_user(vcpu->arch.sie_block->ckc,
3499 (u64 __user *)reg->addr);
3500 break;
3501 case KVM_REG_S390_PFTOKEN:
3502 r = get_user(vcpu->arch.pfault_token,
3503 (u64 __user *)reg->addr);
3504 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3505 kvm_clear_async_pf_completion_queue(vcpu);
3506 break;
3507 case KVM_REG_S390_PFCOMPARE:
3508 r = get_user(vcpu->arch.pfault_compare,
3509 (u64 __user *)reg->addr);
3510 break;
3511 case KVM_REG_S390_PFSELECT:
3512 r = get_user(vcpu->arch.pfault_select,
3513 (u64 __user *)reg->addr);
3514 break;
3515 case KVM_REG_S390_PP:
3516 r = get_user(vcpu->arch.sie_block->pp,
3517 (u64 __user *)reg->addr);
3518 break;
3519 case KVM_REG_S390_GBEA:
3520 r = get_user(vcpu->arch.sie_block->gbea,
3521 (u64 __user *)reg->addr);
3522 break;
3523 default:
3524 break;
3525 }
3526
3527 return r;
3528 }
3529
kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu * vcpu)3530 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3531 {
3532 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3533 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3534 memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3535
3536 kvm_clear_async_pf_completion_queue(vcpu);
3537 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3538 kvm_s390_vcpu_stop(vcpu);
3539 kvm_s390_clear_local_irqs(vcpu);
3540 }
3541
kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu * vcpu)3542 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3543 {
3544 /* Initial reset is a superset of the normal reset */
3545 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3546
3547 /*
3548 * This equals initial cpu reset in pop, but we don't switch to ESA.
3549 * We do not only reset the internal data, but also ...
3550 */
3551 vcpu->arch.sie_block->gpsw.mask = 0;
3552 vcpu->arch.sie_block->gpsw.addr = 0;
3553 kvm_s390_set_prefix(vcpu, 0);
3554 kvm_s390_set_cpu_timer(vcpu, 0);
3555 vcpu->arch.sie_block->ckc = 0;
3556 memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3557 vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3558 vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3559
3560 /* ... the data in sync regs */
3561 memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3562 vcpu->run->s.regs.ckc = 0;
3563 vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3564 vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3565 vcpu->run->psw_addr = 0;
3566 vcpu->run->psw_mask = 0;
3567 vcpu->run->s.regs.todpr = 0;
3568 vcpu->run->s.regs.cputm = 0;
3569 vcpu->run->s.regs.ckc = 0;
3570 vcpu->run->s.regs.pp = 0;
3571 vcpu->run->s.regs.gbea = 1;
3572 vcpu->run->s.regs.fpc = 0;
3573 /*
3574 * Do not reset these registers in the protected case, as some of
3575 * them are overlayed and they are not accessible in this case
3576 * anyway.
3577 */
3578 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3579 vcpu->arch.sie_block->gbea = 1;
3580 vcpu->arch.sie_block->pp = 0;
3581 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3582 vcpu->arch.sie_block->todpr = 0;
3583 }
3584 }
3585
kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu * vcpu)3586 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3587 {
3588 struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3589
3590 /* Clear reset is a superset of the initial reset */
3591 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3592
3593 memset(®s->gprs, 0, sizeof(regs->gprs));
3594 memset(®s->vrs, 0, sizeof(regs->vrs));
3595 memset(®s->acrs, 0, sizeof(regs->acrs));
3596 memset(®s->gscb, 0, sizeof(regs->gscb));
3597
3598 regs->etoken = 0;
3599 regs->etoken_extension = 0;
3600 }
3601
kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu * vcpu,struct kvm_regs * regs)3602 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3603 {
3604 vcpu_load(vcpu);
3605 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
3606 vcpu_put(vcpu);
3607 return 0;
3608 }
3609
kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu * vcpu,struct kvm_regs * regs)3610 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3611 {
3612 vcpu_load(vcpu);
3613 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3614 vcpu_put(vcpu);
3615 return 0;
3616 }
3617
kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu * vcpu,struct kvm_sregs * sregs)3618 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3619 struct kvm_sregs *sregs)
3620 {
3621 vcpu_load(vcpu);
3622
3623 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3624 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3625
3626 vcpu_put(vcpu);
3627 return 0;
3628 }
3629
kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu * vcpu,struct kvm_sregs * sregs)3630 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3631 struct kvm_sregs *sregs)
3632 {
3633 vcpu_load(vcpu);
3634
3635 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3636 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3637
3638 vcpu_put(vcpu);
3639 return 0;
3640 }
3641
kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu * vcpu,struct kvm_fpu * fpu)3642 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3643 {
3644 int ret = 0;
3645
3646 vcpu_load(vcpu);
3647
3648 if (test_fp_ctl(fpu->fpc)) {
3649 ret = -EINVAL;
3650 goto out;
3651 }
3652 vcpu->run->s.regs.fpc = fpu->fpc;
3653 if (MACHINE_HAS_VX)
3654 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3655 (freg_t *) fpu->fprs);
3656 else
3657 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3658
3659 out:
3660 vcpu_put(vcpu);
3661 return ret;
3662 }
3663
kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu * vcpu,struct kvm_fpu * fpu)3664 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3665 {
3666 vcpu_load(vcpu);
3667
3668 /* make sure we have the latest values */
3669 save_fpu_regs();
3670 if (MACHINE_HAS_VX)
3671 convert_vx_to_fp((freg_t *) fpu->fprs,
3672 (__vector128 *) vcpu->run->s.regs.vrs);
3673 else
3674 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3675 fpu->fpc = vcpu->run->s.regs.fpc;
3676
3677 vcpu_put(vcpu);
3678 return 0;
3679 }
3680
kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu * vcpu,psw_t psw)3681 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3682 {
3683 int rc = 0;
3684
3685 if (!is_vcpu_stopped(vcpu))
3686 rc = -EBUSY;
3687 else {
3688 vcpu->run->psw_mask = psw.mask;
3689 vcpu->run->psw_addr = psw.addr;
3690 }
3691 return rc;
3692 }
3693
kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu * vcpu,struct kvm_translation * tr)3694 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3695 struct kvm_translation *tr)
3696 {
3697 return -EINVAL; /* not implemented yet */
3698 }
3699
3700 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3701 KVM_GUESTDBG_USE_HW_BP | \
3702 KVM_GUESTDBG_ENABLE)
3703
kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu * vcpu,struct kvm_guest_debug * dbg)3704 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3705 struct kvm_guest_debug *dbg)
3706 {
3707 int rc = 0;
3708
3709 vcpu_load(vcpu);
3710
3711 vcpu->guest_debug = 0;
3712 kvm_s390_clear_bp_data(vcpu);
3713
3714 if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3715 rc = -EINVAL;
3716 goto out;
3717 }
3718 if (!sclp.has_gpere) {
3719 rc = -EINVAL;
3720 goto out;
3721 }
3722
3723 if (dbg->control & KVM_GUESTDBG_ENABLE) {
3724 vcpu->guest_debug = dbg->control;
3725 /* enforce guest PER */
3726 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3727
3728 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3729 rc = kvm_s390_import_bp_data(vcpu, dbg);
3730 } else {
3731 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3732 vcpu->arch.guestdbg.last_bp = 0;
3733 }
3734
3735 if (rc) {
3736 vcpu->guest_debug = 0;
3737 kvm_s390_clear_bp_data(vcpu);
3738 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3739 }
3740
3741 out:
3742 vcpu_put(vcpu);
3743 return rc;
3744 }
3745
kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu * vcpu,struct kvm_mp_state * mp_state)3746 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3747 struct kvm_mp_state *mp_state)
3748 {
3749 int ret;
3750
3751 vcpu_load(vcpu);
3752
3753 /* CHECK_STOP and LOAD are not supported yet */
3754 ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3755 KVM_MP_STATE_OPERATING;
3756
3757 vcpu_put(vcpu);
3758 return ret;
3759 }
3760
kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu * vcpu,struct kvm_mp_state * mp_state)3761 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3762 struct kvm_mp_state *mp_state)
3763 {
3764 int rc = 0;
3765
3766 vcpu_load(vcpu);
3767
3768 /* user space knows about this interface - let it control the state */
3769 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3770
3771 switch (mp_state->mp_state) {
3772 case KVM_MP_STATE_STOPPED:
3773 rc = kvm_s390_vcpu_stop(vcpu);
3774 break;
3775 case KVM_MP_STATE_OPERATING:
3776 rc = kvm_s390_vcpu_start(vcpu);
3777 break;
3778 case KVM_MP_STATE_LOAD:
3779 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3780 rc = -ENXIO;
3781 break;
3782 }
3783 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3784 break;
3785 case KVM_MP_STATE_CHECK_STOP:
3786 fallthrough; /* CHECK_STOP and LOAD are not supported yet */
3787 default:
3788 rc = -ENXIO;
3789 }
3790
3791 vcpu_put(vcpu);
3792 return rc;
3793 }
3794
ibs_enabled(struct kvm_vcpu * vcpu)3795 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3796 {
3797 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3798 }
3799
kvm_s390_handle_requests(struct kvm_vcpu * vcpu)3800 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3801 {
3802 retry:
3803 kvm_s390_vcpu_request_handled(vcpu);
3804 if (!kvm_request_pending(vcpu))
3805 return 0;
3806 /*
3807 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3808 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3809 * This ensures that the ipte instruction for this request has
3810 * already finished. We might race against a second unmapper that
3811 * wants to set the blocking bit. Lets just retry the request loop.
3812 */
3813 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3814 int rc;
3815 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3816 kvm_s390_get_prefix(vcpu),
3817 PAGE_SIZE * 2, PROT_WRITE);
3818 if (rc) {
3819 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3820 return rc;
3821 }
3822 goto retry;
3823 }
3824
3825 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3826 vcpu->arch.sie_block->ihcpu = 0xffff;
3827 goto retry;
3828 }
3829
3830 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3831 if (!ibs_enabled(vcpu)) {
3832 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3833 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3834 }
3835 goto retry;
3836 }
3837
3838 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3839 if (ibs_enabled(vcpu)) {
3840 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3841 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3842 }
3843 goto retry;
3844 }
3845
3846 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3847 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3848 goto retry;
3849 }
3850
3851 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3852 /*
3853 * Disable CMM virtualization; we will emulate the ESSA
3854 * instruction manually, in order to provide additional
3855 * functionalities needed for live migration.
3856 */
3857 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3858 goto retry;
3859 }
3860
3861 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3862 /*
3863 * Re-enable CMM virtualization if CMMA is available and
3864 * CMM has been used.
3865 */
3866 if ((vcpu->kvm->arch.use_cmma) &&
3867 (vcpu->kvm->mm->context.uses_cmm))
3868 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3869 goto retry;
3870 }
3871
3872 /* nothing to do, just clear the request */
3873 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3874 /* we left the vsie handler, nothing to do, just clear the request */
3875 kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3876
3877 return 0;
3878 }
3879
__kvm_s390_set_tod_clock(struct kvm * kvm,const struct kvm_s390_vm_tod_clock * gtod)3880 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
3881 {
3882 struct kvm_vcpu *vcpu;
3883 struct kvm_s390_tod_clock_ext htod;
3884 int i;
3885
3886 preempt_disable();
3887
3888 get_tod_clock_ext((char *)&htod);
3889
3890 kvm->arch.epoch = gtod->tod - htod.tod;
3891 kvm->arch.epdx = 0;
3892 if (test_kvm_facility(kvm, 139)) {
3893 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3894 if (kvm->arch.epoch > gtod->tod)
3895 kvm->arch.epdx -= 1;
3896 }
3897
3898 kvm_s390_vcpu_block_all(kvm);
3899 kvm_for_each_vcpu(i, vcpu, kvm) {
3900 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3901 vcpu->arch.sie_block->epdx = kvm->arch.epdx;
3902 }
3903
3904 kvm_s390_vcpu_unblock_all(kvm);
3905 preempt_enable();
3906 }
3907
kvm_s390_try_set_tod_clock(struct kvm * kvm,const struct kvm_s390_vm_tod_clock * gtod)3908 int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
3909 {
3910 if (!mutex_trylock(&kvm->lock))
3911 return 0;
3912 __kvm_s390_set_tod_clock(kvm, gtod);
3913 mutex_unlock(&kvm->lock);
3914 return 1;
3915 }
3916
3917 /**
3918 * kvm_arch_fault_in_page - fault-in guest page if necessary
3919 * @vcpu: The corresponding virtual cpu
3920 * @gpa: Guest physical address
3921 * @writable: Whether the page should be writable or not
3922 *
3923 * Make sure that a guest page has been faulted-in on the host.
3924 *
3925 * Return: Zero on success, negative error code otherwise.
3926 */
kvm_arch_fault_in_page(struct kvm_vcpu * vcpu,gpa_t gpa,int writable)3927 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3928 {
3929 return gmap_fault(vcpu->arch.gmap, gpa,
3930 writable ? FAULT_FLAG_WRITE : 0);
3931 }
3932
__kvm_inject_pfault_token(struct kvm_vcpu * vcpu,bool start_token,unsigned long token)3933 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3934 unsigned long token)
3935 {
3936 struct kvm_s390_interrupt inti;
3937 struct kvm_s390_irq irq;
3938
3939 if (start_token) {
3940 irq.u.ext.ext_params2 = token;
3941 irq.type = KVM_S390_INT_PFAULT_INIT;
3942 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3943 } else {
3944 inti.type = KVM_S390_INT_PFAULT_DONE;
3945 inti.parm64 = token;
3946 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3947 }
3948 }
3949
kvm_arch_async_page_not_present(struct kvm_vcpu * vcpu,struct kvm_async_pf * work)3950 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3951 struct kvm_async_pf *work)
3952 {
3953 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3954 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3955
3956 return true;
3957 }
3958
kvm_arch_async_page_present(struct kvm_vcpu * vcpu,struct kvm_async_pf * work)3959 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3960 struct kvm_async_pf *work)
3961 {
3962 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3963 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3964 }
3965
kvm_arch_async_page_ready(struct kvm_vcpu * vcpu,struct kvm_async_pf * work)3966 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3967 struct kvm_async_pf *work)
3968 {
3969 /* s390 will always inject the page directly */
3970 }
3971
kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu * vcpu)3972 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
3973 {
3974 /*
3975 * s390 will always inject the page directly,
3976 * but we still want check_async_completion to cleanup
3977 */
3978 return true;
3979 }
3980
kvm_arch_setup_async_pf(struct kvm_vcpu * vcpu)3981 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3982 {
3983 hva_t hva;
3984 struct kvm_arch_async_pf arch;
3985
3986 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3987 return false;
3988 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3989 vcpu->arch.pfault_compare)
3990 return false;
3991 if (psw_extint_disabled(vcpu))
3992 return false;
3993 if (kvm_s390_vcpu_has_irq(vcpu, 0))
3994 return false;
3995 if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3996 return false;
3997 if (!vcpu->arch.gmap->pfault_enabled)
3998 return false;
3999
4000 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
4001 hva += current->thread.gmap_addr & ~PAGE_MASK;
4002 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4003 return false;
4004
4005 return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
4006 }
4007
vcpu_pre_run(struct kvm_vcpu * vcpu)4008 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4009 {
4010 int rc, cpuflags;
4011
4012 /*
4013 * On s390 notifications for arriving pages will be delivered directly
4014 * to the guest but the house keeping for completed pfaults is
4015 * handled outside the worker.
4016 */
4017 kvm_check_async_pf_completion(vcpu);
4018
4019 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4020 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4021
4022 if (need_resched())
4023 schedule();
4024
4025 if (!kvm_is_ucontrol(vcpu->kvm)) {
4026 rc = kvm_s390_deliver_pending_interrupts(vcpu);
4027 if (rc)
4028 return rc;
4029 }
4030
4031 rc = kvm_s390_handle_requests(vcpu);
4032 if (rc)
4033 return rc;
4034
4035 if (guestdbg_enabled(vcpu)) {
4036 kvm_s390_backup_guest_per_regs(vcpu);
4037 kvm_s390_patch_guest_per_regs(vcpu);
4038 }
4039
4040 clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.gisa_int.kicked_mask);
4041
4042 vcpu->arch.sie_block->icptcode = 0;
4043 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4044 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4045 trace_kvm_s390_sie_enter(vcpu, cpuflags);
4046
4047 return 0;
4048 }
4049
vcpu_post_run_fault_in_sie(struct kvm_vcpu * vcpu)4050 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4051 {
4052 struct kvm_s390_pgm_info pgm_info = {
4053 .code = PGM_ADDRESSING,
4054 };
4055 u8 opcode, ilen;
4056 int rc;
4057
4058 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4059 trace_kvm_s390_sie_fault(vcpu);
4060
4061 /*
4062 * We want to inject an addressing exception, which is defined as a
4063 * suppressing or terminating exception. However, since we came here
4064 * by a DAT access exception, the PSW still points to the faulting
4065 * instruction since DAT exceptions are nullifying. So we've got
4066 * to look up the current opcode to get the length of the instruction
4067 * to be able to forward the PSW.
4068 */
4069 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4070 ilen = insn_length(opcode);
4071 if (rc < 0) {
4072 return rc;
4073 } else if (rc) {
4074 /* Instruction-Fetching Exceptions - we can't detect the ilen.
4075 * Forward by arbitrary ilc, injection will take care of
4076 * nullification if necessary.
4077 */
4078 pgm_info = vcpu->arch.pgm;
4079 ilen = 4;
4080 }
4081 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4082 kvm_s390_forward_psw(vcpu, ilen);
4083 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4084 }
4085
vcpu_post_run(struct kvm_vcpu * vcpu,int exit_reason)4086 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4087 {
4088 struct mcck_volatile_info *mcck_info;
4089 struct sie_page *sie_page;
4090
4091 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4092 vcpu->arch.sie_block->icptcode);
4093 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4094
4095 if (guestdbg_enabled(vcpu))
4096 kvm_s390_restore_guest_per_regs(vcpu);
4097
4098 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4099 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4100
4101 if (exit_reason == -EINTR) {
4102 VCPU_EVENT(vcpu, 3, "%s", "machine check");
4103 sie_page = container_of(vcpu->arch.sie_block,
4104 struct sie_page, sie_block);
4105 mcck_info = &sie_page->mcck_info;
4106 kvm_s390_reinject_machine_check(vcpu, mcck_info);
4107 return 0;
4108 }
4109
4110 if (vcpu->arch.sie_block->icptcode > 0) {
4111 int rc = kvm_handle_sie_intercept(vcpu);
4112
4113 if (rc != -EOPNOTSUPP)
4114 return rc;
4115 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4116 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4117 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4118 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4119 return -EREMOTE;
4120 } else if (exit_reason != -EFAULT) {
4121 vcpu->stat.exit_null++;
4122 return 0;
4123 } else if (kvm_is_ucontrol(vcpu->kvm)) {
4124 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4125 vcpu->run->s390_ucontrol.trans_exc_code =
4126 current->thread.gmap_addr;
4127 vcpu->run->s390_ucontrol.pgm_code = 0x10;
4128 return -EREMOTE;
4129 } else if (current->thread.gmap_pfault) {
4130 trace_kvm_s390_major_guest_pfault(vcpu);
4131 current->thread.gmap_pfault = 0;
4132 if (kvm_arch_setup_async_pf(vcpu))
4133 return 0;
4134 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4135 }
4136 return vcpu_post_run_fault_in_sie(vcpu);
4137 }
4138
4139 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
__vcpu_run(struct kvm_vcpu * vcpu)4140 static int __vcpu_run(struct kvm_vcpu *vcpu)
4141 {
4142 int rc, exit_reason;
4143 struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4144
4145 /*
4146 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4147 * ning the guest), so that memslots (and other stuff) are protected
4148 */
4149 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4150
4151 do {
4152 rc = vcpu_pre_run(vcpu);
4153 if (rc)
4154 break;
4155
4156 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4157 /*
4158 * As PF_VCPU will be used in fault handler, between
4159 * guest_enter and guest_exit should be no uaccess.
4160 */
4161 local_irq_disable();
4162 guest_enter_irqoff();
4163 __disable_cpu_timer_accounting(vcpu);
4164 local_irq_enable();
4165 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4166 memcpy(sie_page->pv_grregs,
4167 vcpu->run->s.regs.gprs,
4168 sizeof(sie_page->pv_grregs));
4169 }
4170 exit_reason = sie64a(vcpu->arch.sie_block,
4171 vcpu->run->s.regs.gprs);
4172 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4173 memcpy(vcpu->run->s.regs.gprs,
4174 sie_page->pv_grregs,
4175 sizeof(sie_page->pv_grregs));
4176 /*
4177 * We're not allowed to inject interrupts on intercepts
4178 * that leave the guest state in an "in-between" state
4179 * where the next SIE entry will do a continuation.
4180 * Fence interrupts in our "internal" PSW.
4181 */
4182 if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4183 vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4184 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4185 }
4186 }
4187 local_irq_disable();
4188 __enable_cpu_timer_accounting(vcpu);
4189 guest_exit_irqoff();
4190 local_irq_enable();
4191 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4192
4193 rc = vcpu_post_run(vcpu, exit_reason);
4194 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4195
4196 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4197 return rc;
4198 }
4199
sync_regs_fmt2(struct kvm_vcpu * vcpu)4200 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4201 {
4202 struct kvm_run *kvm_run = vcpu->run;
4203 struct runtime_instr_cb *riccb;
4204 struct gs_cb *gscb;
4205
4206 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4207 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4208 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4209 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4210 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4211 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4212 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4213 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4214 }
4215 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4216 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4217 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4218 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4219 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4220 kvm_clear_async_pf_completion_queue(vcpu);
4221 }
4222 if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4223 vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4224 vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4225 }
4226 /*
4227 * If userspace sets the riccb (e.g. after migration) to a valid state,
4228 * we should enable RI here instead of doing the lazy enablement.
4229 */
4230 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4231 test_kvm_facility(vcpu->kvm, 64) &&
4232 riccb->v &&
4233 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4234 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4235 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4236 }
4237 /*
4238 * If userspace sets the gscb (e.g. after migration) to non-zero,
4239 * we should enable GS here instead of doing the lazy enablement.
4240 */
4241 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4242 test_kvm_facility(vcpu->kvm, 133) &&
4243 gscb->gssm &&
4244 !vcpu->arch.gs_enabled) {
4245 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4246 vcpu->arch.sie_block->ecb |= ECB_GS;
4247 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4248 vcpu->arch.gs_enabled = 1;
4249 }
4250 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4251 test_kvm_facility(vcpu->kvm, 82)) {
4252 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4253 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4254 }
4255 if (MACHINE_HAS_GS) {
4256 preempt_disable();
4257 __ctl_set_bit(2, 4);
4258 if (current->thread.gs_cb) {
4259 vcpu->arch.host_gscb = current->thread.gs_cb;
4260 save_gs_cb(vcpu->arch.host_gscb);
4261 }
4262 if (vcpu->arch.gs_enabled) {
4263 current->thread.gs_cb = (struct gs_cb *)
4264 &vcpu->run->s.regs.gscb;
4265 restore_gs_cb(current->thread.gs_cb);
4266 }
4267 preempt_enable();
4268 }
4269 /* SIE will load etoken directly from SDNX and therefore kvm_run */
4270 }
4271
sync_regs(struct kvm_vcpu * vcpu)4272 static void sync_regs(struct kvm_vcpu *vcpu)
4273 {
4274 struct kvm_run *kvm_run = vcpu->run;
4275
4276 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4277 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4278 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4279 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4280 /* some control register changes require a tlb flush */
4281 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4282 }
4283 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4284 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4285 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4286 }
4287 save_access_regs(vcpu->arch.host_acrs);
4288 restore_access_regs(vcpu->run->s.regs.acrs);
4289 /* save host (userspace) fprs/vrs */
4290 save_fpu_regs();
4291 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4292 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4293 if (MACHINE_HAS_VX)
4294 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4295 else
4296 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4297 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4298 if (test_fp_ctl(current->thread.fpu.fpc))
4299 /* User space provided an invalid FPC, let's clear it */
4300 current->thread.fpu.fpc = 0;
4301
4302 /* Sync fmt2 only data */
4303 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4304 sync_regs_fmt2(vcpu);
4305 } else {
4306 /*
4307 * In several places we have to modify our internal view to
4308 * not do things that are disallowed by the ultravisor. For
4309 * example we must not inject interrupts after specific exits
4310 * (e.g. 112 prefix page not secure). We do this by turning
4311 * off the machine check, external and I/O interrupt bits
4312 * of our PSW copy. To avoid getting validity intercepts, we
4313 * do only accept the condition code from userspace.
4314 */
4315 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4316 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4317 PSW_MASK_CC;
4318 }
4319
4320 kvm_run->kvm_dirty_regs = 0;
4321 }
4322
store_regs_fmt2(struct kvm_vcpu * vcpu)4323 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4324 {
4325 struct kvm_run *kvm_run = vcpu->run;
4326
4327 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4328 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4329 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4330 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4331 kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4332 if (MACHINE_HAS_GS) {
4333 preempt_disable();
4334 __ctl_set_bit(2, 4);
4335 if (vcpu->arch.gs_enabled)
4336 save_gs_cb(current->thread.gs_cb);
4337 current->thread.gs_cb = vcpu->arch.host_gscb;
4338 restore_gs_cb(vcpu->arch.host_gscb);
4339 if (!vcpu->arch.host_gscb)
4340 __ctl_clear_bit(2, 4);
4341 vcpu->arch.host_gscb = NULL;
4342 preempt_enable();
4343 }
4344 /* SIE will save etoken directly into SDNX and therefore kvm_run */
4345 }
4346
store_regs(struct kvm_vcpu * vcpu)4347 static void store_regs(struct kvm_vcpu *vcpu)
4348 {
4349 struct kvm_run *kvm_run = vcpu->run;
4350
4351 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4352 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4353 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4354 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4355 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4356 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4357 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4358 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4359 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4360 save_access_regs(vcpu->run->s.regs.acrs);
4361 restore_access_regs(vcpu->arch.host_acrs);
4362 /* Save guest register state */
4363 save_fpu_regs();
4364 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4365 /* Restore will be done lazily at return */
4366 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4367 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4368 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4369 store_regs_fmt2(vcpu);
4370 }
4371
kvm_arch_vcpu_ioctl_run(struct kvm_vcpu * vcpu)4372 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4373 {
4374 struct kvm_run *kvm_run = vcpu->run;
4375 int rc;
4376
4377 if (kvm_run->immediate_exit)
4378 return -EINTR;
4379
4380 if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4381 kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4382 return -EINVAL;
4383
4384 vcpu_load(vcpu);
4385
4386 if (guestdbg_exit_pending(vcpu)) {
4387 kvm_s390_prepare_debug_exit(vcpu);
4388 rc = 0;
4389 goto out;
4390 }
4391
4392 kvm_sigset_activate(vcpu);
4393
4394 /*
4395 * no need to check the return value of vcpu_start as it can only have
4396 * an error for protvirt, but protvirt means user cpu state
4397 */
4398 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4399 kvm_s390_vcpu_start(vcpu);
4400 } else if (is_vcpu_stopped(vcpu)) {
4401 pr_err_ratelimited("can't run stopped vcpu %d\n",
4402 vcpu->vcpu_id);
4403 rc = -EINVAL;
4404 goto out;
4405 }
4406
4407 sync_regs(vcpu);
4408 enable_cpu_timer_accounting(vcpu);
4409
4410 might_fault();
4411 rc = __vcpu_run(vcpu);
4412
4413 if (signal_pending(current) && !rc) {
4414 kvm_run->exit_reason = KVM_EXIT_INTR;
4415 rc = -EINTR;
4416 }
4417
4418 if (guestdbg_exit_pending(vcpu) && !rc) {
4419 kvm_s390_prepare_debug_exit(vcpu);
4420 rc = 0;
4421 }
4422
4423 if (rc == -EREMOTE) {
4424 /* userspace support is needed, kvm_run has been prepared */
4425 rc = 0;
4426 }
4427
4428 disable_cpu_timer_accounting(vcpu);
4429 store_regs(vcpu);
4430
4431 kvm_sigset_deactivate(vcpu);
4432
4433 vcpu->stat.exit_userspace++;
4434 out:
4435 vcpu_put(vcpu);
4436 return rc;
4437 }
4438
4439 /*
4440 * store status at address
4441 * we use have two special cases:
4442 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4443 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4444 */
kvm_s390_store_status_unloaded(struct kvm_vcpu * vcpu,unsigned long gpa)4445 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4446 {
4447 unsigned char archmode = 1;
4448 freg_t fprs[NUM_FPRS];
4449 unsigned int px;
4450 u64 clkcomp, cputm;
4451 int rc;
4452
4453 px = kvm_s390_get_prefix(vcpu);
4454 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4455 if (write_guest_abs(vcpu, 163, &archmode, 1))
4456 return -EFAULT;
4457 gpa = 0;
4458 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4459 if (write_guest_real(vcpu, 163, &archmode, 1))
4460 return -EFAULT;
4461 gpa = px;
4462 } else
4463 gpa -= __LC_FPREGS_SAVE_AREA;
4464
4465 /* manually convert vector registers if necessary */
4466 if (MACHINE_HAS_VX) {
4467 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4468 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4469 fprs, 128);
4470 } else {
4471 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4472 vcpu->run->s.regs.fprs, 128);
4473 }
4474 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4475 vcpu->run->s.regs.gprs, 128);
4476 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4477 &vcpu->arch.sie_block->gpsw, 16);
4478 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4479 &px, 4);
4480 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4481 &vcpu->run->s.regs.fpc, 4);
4482 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4483 &vcpu->arch.sie_block->todpr, 4);
4484 cputm = kvm_s390_get_cpu_timer(vcpu);
4485 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4486 &cputm, 8);
4487 clkcomp = vcpu->arch.sie_block->ckc >> 8;
4488 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4489 &clkcomp, 8);
4490 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4491 &vcpu->run->s.regs.acrs, 64);
4492 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4493 &vcpu->arch.sie_block->gcr, 128);
4494 return rc ? -EFAULT : 0;
4495 }
4496
kvm_s390_vcpu_store_status(struct kvm_vcpu * vcpu,unsigned long addr)4497 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4498 {
4499 /*
4500 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4501 * switch in the run ioctl. Let's update our copies before we save
4502 * it into the save area
4503 */
4504 save_fpu_regs();
4505 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4506 save_access_regs(vcpu->run->s.regs.acrs);
4507
4508 return kvm_s390_store_status_unloaded(vcpu, addr);
4509 }
4510
__disable_ibs_on_vcpu(struct kvm_vcpu * vcpu)4511 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4512 {
4513 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4514 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4515 }
4516
__disable_ibs_on_all_vcpus(struct kvm * kvm)4517 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4518 {
4519 unsigned int i;
4520 struct kvm_vcpu *vcpu;
4521
4522 kvm_for_each_vcpu(i, vcpu, kvm) {
4523 __disable_ibs_on_vcpu(vcpu);
4524 }
4525 }
4526
__enable_ibs_on_vcpu(struct kvm_vcpu * vcpu)4527 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4528 {
4529 if (!sclp.has_ibs)
4530 return;
4531 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4532 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4533 }
4534
kvm_s390_vcpu_start(struct kvm_vcpu * vcpu)4535 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4536 {
4537 int i, online_vcpus, r = 0, started_vcpus = 0;
4538
4539 if (!is_vcpu_stopped(vcpu))
4540 return 0;
4541
4542 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4543 /* Only one cpu at a time may enter/leave the STOPPED state. */
4544 spin_lock(&vcpu->kvm->arch.start_stop_lock);
4545 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4546
4547 /* Let's tell the UV that we want to change into the operating state */
4548 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4549 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4550 if (r) {
4551 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4552 return r;
4553 }
4554 }
4555
4556 for (i = 0; i < online_vcpus; i++) {
4557 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4558 started_vcpus++;
4559 }
4560
4561 if (started_vcpus == 0) {
4562 /* we're the only active VCPU -> speed it up */
4563 __enable_ibs_on_vcpu(vcpu);
4564 } else if (started_vcpus == 1) {
4565 /*
4566 * As we are starting a second VCPU, we have to disable
4567 * the IBS facility on all VCPUs to remove potentially
4568 * oustanding ENABLE requests.
4569 */
4570 __disable_ibs_on_all_vcpus(vcpu->kvm);
4571 }
4572
4573 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4574 /*
4575 * The real PSW might have changed due to a RESTART interpreted by the
4576 * ultravisor. We block all interrupts and let the next sie exit
4577 * refresh our view.
4578 */
4579 if (kvm_s390_pv_cpu_is_protected(vcpu))
4580 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4581 /*
4582 * Another VCPU might have used IBS while we were offline.
4583 * Let's play safe and flush the VCPU at startup.
4584 */
4585 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4586 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4587 return 0;
4588 }
4589
kvm_s390_vcpu_stop(struct kvm_vcpu * vcpu)4590 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4591 {
4592 int i, online_vcpus, r = 0, started_vcpus = 0;
4593 struct kvm_vcpu *started_vcpu = NULL;
4594
4595 if (is_vcpu_stopped(vcpu))
4596 return 0;
4597
4598 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4599 /* Only one cpu at a time may enter/leave the STOPPED state. */
4600 spin_lock(&vcpu->kvm->arch.start_stop_lock);
4601 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4602
4603 /* Let's tell the UV that we want to change into the stopped state */
4604 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4605 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4606 if (r) {
4607 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4608 return r;
4609 }
4610 }
4611
4612 /*
4613 * Set the VCPU to STOPPED and THEN clear the interrupt flag,
4614 * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
4615 * have been fully processed. This will ensure that the VCPU
4616 * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
4617 */
4618 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4619 kvm_s390_clear_stop_irq(vcpu);
4620
4621 __disable_ibs_on_vcpu(vcpu);
4622
4623 for (i = 0; i < online_vcpus; i++) {
4624 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4625 started_vcpus++;
4626 started_vcpu = vcpu->kvm->vcpus[i];
4627 }
4628 }
4629
4630 if (started_vcpus == 1) {
4631 /*
4632 * As we only have one VCPU left, we want to enable the
4633 * IBS facility for that VCPU to speed it up.
4634 */
4635 __enable_ibs_on_vcpu(started_vcpu);
4636 }
4637
4638 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4639 return 0;
4640 }
4641
kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu * vcpu,struct kvm_enable_cap * cap)4642 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4643 struct kvm_enable_cap *cap)
4644 {
4645 int r;
4646
4647 if (cap->flags)
4648 return -EINVAL;
4649
4650 switch (cap->cap) {
4651 case KVM_CAP_S390_CSS_SUPPORT:
4652 if (!vcpu->kvm->arch.css_support) {
4653 vcpu->kvm->arch.css_support = 1;
4654 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4655 trace_kvm_s390_enable_css(vcpu->kvm);
4656 }
4657 r = 0;
4658 break;
4659 default:
4660 r = -EINVAL;
4661 break;
4662 }
4663 return r;
4664 }
4665
kvm_s390_guest_sida_op(struct kvm_vcpu * vcpu,struct kvm_s390_mem_op * mop)4666 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4667 struct kvm_s390_mem_op *mop)
4668 {
4669 void __user *uaddr = (void __user *)mop->buf;
4670 int r = 0;
4671
4672 if (mop->flags || !mop->size)
4673 return -EINVAL;
4674 if (mop->size + mop->sida_offset < mop->size)
4675 return -EINVAL;
4676 if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4677 return -E2BIG;
4678 if (!kvm_s390_pv_cpu_is_protected(vcpu))
4679 return -EINVAL;
4680
4681 switch (mop->op) {
4682 case KVM_S390_MEMOP_SIDA_READ:
4683 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4684 mop->sida_offset), mop->size))
4685 r = -EFAULT;
4686
4687 break;
4688 case KVM_S390_MEMOP_SIDA_WRITE:
4689 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4690 mop->sida_offset), uaddr, mop->size))
4691 r = -EFAULT;
4692 break;
4693 }
4694 return r;
4695 }
kvm_s390_guest_mem_op(struct kvm_vcpu * vcpu,struct kvm_s390_mem_op * mop)4696 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4697 struct kvm_s390_mem_op *mop)
4698 {
4699 void __user *uaddr = (void __user *)mop->buf;
4700 void *tmpbuf = NULL;
4701 int r = 0;
4702 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4703 | KVM_S390_MEMOP_F_CHECK_ONLY;
4704
4705 if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4706 return -EINVAL;
4707
4708 if (mop->size > MEM_OP_MAX_SIZE)
4709 return -E2BIG;
4710
4711 if (kvm_s390_pv_cpu_is_protected(vcpu))
4712 return -EINVAL;
4713
4714 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4715 tmpbuf = vmalloc(mop->size);
4716 if (!tmpbuf)
4717 return -ENOMEM;
4718 }
4719
4720 switch (mop->op) {
4721 case KVM_S390_MEMOP_LOGICAL_READ:
4722 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4723 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4724 mop->size, GACC_FETCH);
4725 break;
4726 }
4727 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4728 if (r == 0) {
4729 if (copy_to_user(uaddr, tmpbuf, mop->size))
4730 r = -EFAULT;
4731 }
4732 break;
4733 case KVM_S390_MEMOP_LOGICAL_WRITE:
4734 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4735 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4736 mop->size, GACC_STORE);
4737 break;
4738 }
4739 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4740 r = -EFAULT;
4741 break;
4742 }
4743 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4744 break;
4745 }
4746
4747 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4748 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4749
4750 vfree(tmpbuf);
4751 return r;
4752 }
4753
kvm_s390_guest_memsida_op(struct kvm_vcpu * vcpu,struct kvm_s390_mem_op * mop)4754 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4755 struct kvm_s390_mem_op *mop)
4756 {
4757 int r, srcu_idx;
4758
4759 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4760
4761 switch (mop->op) {
4762 case KVM_S390_MEMOP_LOGICAL_READ:
4763 case KVM_S390_MEMOP_LOGICAL_WRITE:
4764 r = kvm_s390_guest_mem_op(vcpu, mop);
4765 break;
4766 case KVM_S390_MEMOP_SIDA_READ:
4767 case KVM_S390_MEMOP_SIDA_WRITE:
4768 /* we are locked against sida going away by the vcpu->mutex */
4769 r = kvm_s390_guest_sida_op(vcpu, mop);
4770 break;
4771 default:
4772 r = -EINVAL;
4773 }
4774
4775 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4776 return r;
4777 }
4778
kvm_arch_vcpu_async_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)4779 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4780 unsigned int ioctl, unsigned long arg)
4781 {
4782 struct kvm_vcpu *vcpu = filp->private_data;
4783 void __user *argp = (void __user *)arg;
4784
4785 switch (ioctl) {
4786 case KVM_S390_IRQ: {
4787 struct kvm_s390_irq s390irq;
4788
4789 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4790 return -EFAULT;
4791 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4792 }
4793 case KVM_S390_INTERRUPT: {
4794 struct kvm_s390_interrupt s390int;
4795 struct kvm_s390_irq s390irq = {};
4796
4797 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4798 return -EFAULT;
4799 if (s390int_to_s390irq(&s390int, &s390irq))
4800 return -EINVAL;
4801 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4802 }
4803 }
4804 return -ENOIOCTLCMD;
4805 }
4806
kvm_arch_vcpu_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)4807 long kvm_arch_vcpu_ioctl(struct file *filp,
4808 unsigned int ioctl, unsigned long arg)
4809 {
4810 struct kvm_vcpu *vcpu = filp->private_data;
4811 void __user *argp = (void __user *)arg;
4812 int idx;
4813 long r;
4814 u16 rc, rrc;
4815
4816 vcpu_load(vcpu);
4817
4818 switch (ioctl) {
4819 case KVM_S390_STORE_STATUS:
4820 idx = srcu_read_lock(&vcpu->kvm->srcu);
4821 r = kvm_s390_store_status_unloaded(vcpu, arg);
4822 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4823 break;
4824 case KVM_S390_SET_INITIAL_PSW: {
4825 psw_t psw;
4826
4827 r = -EFAULT;
4828 if (copy_from_user(&psw, argp, sizeof(psw)))
4829 break;
4830 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4831 break;
4832 }
4833 case KVM_S390_CLEAR_RESET:
4834 r = 0;
4835 kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4836 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4837 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4838 UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4839 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4840 rc, rrc);
4841 }
4842 break;
4843 case KVM_S390_INITIAL_RESET:
4844 r = 0;
4845 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4846 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4847 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4848 UVC_CMD_CPU_RESET_INITIAL,
4849 &rc, &rrc);
4850 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4851 rc, rrc);
4852 }
4853 break;
4854 case KVM_S390_NORMAL_RESET:
4855 r = 0;
4856 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4857 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4858 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4859 UVC_CMD_CPU_RESET, &rc, &rrc);
4860 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4861 rc, rrc);
4862 }
4863 break;
4864 case KVM_SET_ONE_REG:
4865 case KVM_GET_ONE_REG: {
4866 struct kvm_one_reg reg;
4867 r = -EINVAL;
4868 if (kvm_s390_pv_cpu_is_protected(vcpu))
4869 break;
4870 r = -EFAULT;
4871 if (copy_from_user(®, argp, sizeof(reg)))
4872 break;
4873 if (ioctl == KVM_SET_ONE_REG)
4874 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
4875 else
4876 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
4877 break;
4878 }
4879 #ifdef CONFIG_KVM_S390_UCONTROL
4880 case KVM_S390_UCAS_MAP: {
4881 struct kvm_s390_ucas_mapping ucasmap;
4882
4883 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4884 r = -EFAULT;
4885 break;
4886 }
4887
4888 if (!kvm_is_ucontrol(vcpu->kvm)) {
4889 r = -EINVAL;
4890 break;
4891 }
4892
4893 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4894 ucasmap.vcpu_addr, ucasmap.length);
4895 break;
4896 }
4897 case KVM_S390_UCAS_UNMAP: {
4898 struct kvm_s390_ucas_mapping ucasmap;
4899
4900 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4901 r = -EFAULT;
4902 break;
4903 }
4904
4905 if (!kvm_is_ucontrol(vcpu->kvm)) {
4906 r = -EINVAL;
4907 break;
4908 }
4909
4910 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4911 ucasmap.length);
4912 break;
4913 }
4914 #endif
4915 case KVM_S390_VCPU_FAULT: {
4916 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4917 break;
4918 }
4919 case KVM_ENABLE_CAP:
4920 {
4921 struct kvm_enable_cap cap;
4922 r = -EFAULT;
4923 if (copy_from_user(&cap, argp, sizeof(cap)))
4924 break;
4925 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4926 break;
4927 }
4928 case KVM_S390_MEM_OP: {
4929 struct kvm_s390_mem_op mem_op;
4930
4931 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4932 r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4933 else
4934 r = -EFAULT;
4935 break;
4936 }
4937 case KVM_S390_SET_IRQ_STATE: {
4938 struct kvm_s390_irq_state irq_state;
4939
4940 r = -EFAULT;
4941 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4942 break;
4943 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4944 irq_state.len == 0 ||
4945 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4946 r = -EINVAL;
4947 break;
4948 }
4949 /* do not use irq_state.flags, it will break old QEMUs */
4950 r = kvm_s390_set_irq_state(vcpu,
4951 (void __user *) irq_state.buf,
4952 irq_state.len);
4953 break;
4954 }
4955 case KVM_S390_GET_IRQ_STATE: {
4956 struct kvm_s390_irq_state irq_state;
4957
4958 r = -EFAULT;
4959 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4960 break;
4961 if (irq_state.len == 0) {
4962 r = -EINVAL;
4963 break;
4964 }
4965 /* do not use irq_state.flags, it will break old QEMUs */
4966 r = kvm_s390_get_irq_state(vcpu,
4967 (__u8 __user *) irq_state.buf,
4968 irq_state.len);
4969 break;
4970 }
4971 default:
4972 r = -ENOTTY;
4973 }
4974
4975 vcpu_put(vcpu);
4976 return r;
4977 }
4978
kvm_arch_vcpu_fault(struct kvm_vcpu * vcpu,struct vm_fault * vmf)4979 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4980 {
4981 #ifdef CONFIG_KVM_S390_UCONTROL
4982 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4983 && (kvm_is_ucontrol(vcpu->kvm))) {
4984 vmf->page = virt_to_page(vcpu->arch.sie_block);
4985 get_page(vmf->page);
4986 return 0;
4987 }
4988 #endif
4989 return VM_FAULT_SIGBUS;
4990 }
4991
4992 /* Section: memory related */
kvm_arch_prepare_memory_region(struct kvm * kvm,struct kvm_memory_slot * memslot,const struct kvm_userspace_memory_region * mem,enum kvm_mr_change change)4993 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4994 struct kvm_memory_slot *memslot,
4995 const struct kvm_userspace_memory_region *mem,
4996 enum kvm_mr_change change)
4997 {
4998 /* A few sanity checks. We can have memory slots which have to be
4999 located/ended at a segment boundary (1MB). The memory in userland is
5000 ok to be fragmented into various different vmas. It is okay to mmap()
5001 and munmap() stuff in this slot after doing this call at any time */
5002
5003 if (mem->userspace_addr & 0xffffful)
5004 return -EINVAL;
5005
5006 if (mem->memory_size & 0xffffful)
5007 return -EINVAL;
5008
5009 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
5010 return -EINVAL;
5011
5012 /* When we are protected, we should not change the memory slots */
5013 if (kvm_s390_pv_get_handle(kvm))
5014 return -EINVAL;
5015 return 0;
5016 }
5017
kvm_arch_commit_memory_region(struct kvm * kvm,const struct kvm_userspace_memory_region * mem,struct kvm_memory_slot * old,const struct kvm_memory_slot * new,enum kvm_mr_change change)5018 void kvm_arch_commit_memory_region(struct kvm *kvm,
5019 const struct kvm_userspace_memory_region *mem,
5020 struct kvm_memory_slot *old,
5021 const struct kvm_memory_slot *new,
5022 enum kvm_mr_change change)
5023 {
5024 int rc = 0;
5025
5026 switch (change) {
5027 case KVM_MR_DELETE:
5028 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5029 old->npages * PAGE_SIZE);
5030 break;
5031 case KVM_MR_MOVE:
5032 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5033 old->npages * PAGE_SIZE);
5034 if (rc)
5035 break;
5036 fallthrough;
5037 case KVM_MR_CREATE:
5038 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
5039 mem->guest_phys_addr, mem->memory_size);
5040 break;
5041 case KVM_MR_FLAGS_ONLY:
5042 break;
5043 default:
5044 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5045 }
5046 if (rc)
5047 pr_warn("failed to commit memory region\n");
5048 return;
5049 }
5050
nonhyp_mask(int i)5051 static inline unsigned long nonhyp_mask(int i)
5052 {
5053 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5054
5055 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5056 }
5057
kvm_arch_vcpu_block_finish(struct kvm_vcpu * vcpu)5058 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
5059 {
5060 vcpu->valid_wakeup = false;
5061 }
5062
kvm_s390_init(void)5063 static int __init kvm_s390_init(void)
5064 {
5065 int i;
5066
5067 if (!sclp.has_sief2) {
5068 pr_info("SIE is not available\n");
5069 return -ENODEV;
5070 }
5071
5072 if (nested && hpage) {
5073 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5074 return -EINVAL;
5075 }
5076
5077 for (i = 0; i < 16; i++)
5078 kvm_s390_fac_base[i] |=
5079 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
5080
5081 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5082 }
5083
kvm_s390_exit(void)5084 static void __exit kvm_s390_exit(void)
5085 {
5086 kvm_exit();
5087 }
5088
5089 module_init(kvm_s390_init);
5090 module_exit(kvm_s390_exit);
5091
5092 /*
5093 * Enable autoloading of the kvm module.
5094 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5095 * since x86 takes a different approach.
5096 */
5097 #include <linux/miscdevice.h>
5098 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5099 MODULE_ALIAS("devname:kvm");
5100