1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * hosting IBM Z kernel virtual machines (s390x)
4 *
5 * Copyright IBM Corp. 2008, 2020
6 *
7 * Author(s): Carsten Otte <cotte@de.ibm.com>
8 * Christian Borntraeger <borntraeger@de.ibm.com>
9 * Heiko Carstens <heiko.carstens@de.ibm.com>
10 * Christian Ehrhardt <ehrhardt@de.ibm.com>
11 * Jason J. Herne <jjherne@us.ibm.com>
12 */
13
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
16
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 #include <linux/pgtable.h>
35
36 #include <asm/asm-offsets.h>
37 #include <asm/lowcore.h>
38 #include <asm/stp.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include <asm/ap.h>
47 #include <asm/uv.h>
48 #include "kvm-s390.h"
49 #include "gaccess.h"
50
51 #define CREATE_TRACE_POINTS
52 #include "trace.h"
53 #include "trace-s390.h"
54
55 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
56 #define LOCAL_IRQS 32
57 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
58 (KVM_MAX_VCPUS + LOCAL_IRQS))
59
60 struct kvm_stats_debugfs_item debugfs_entries[] = {
61 VCPU_STAT("userspace_handled", exit_userspace),
62 VCPU_STAT("exit_null", exit_null),
63 VCPU_STAT("exit_validity", exit_validity),
64 VCPU_STAT("exit_stop_request", exit_stop_request),
65 VCPU_STAT("exit_external_request", exit_external_request),
66 VCPU_STAT("exit_io_request", exit_io_request),
67 VCPU_STAT("exit_external_interrupt", exit_external_interrupt),
68 VCPU_STAT("exit_instruction", exit_instruction),
69 VCPU_STAT("exit_pei", exit_pei),
70 VCPU_STAT("exit_program_interruption", exit_program_interruption),
71 VCPU_STAT("exit_instr_and_program_int", exit_instr_and_program),
72 VCPU_STAT("exit_operation_exception", exit_operation_exception),
73 VCPU_STAT("halt_successful_poll", halt_successful_poll),
74 VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
75 VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
76 VCPU_STAT("halt_no_poll_steal", halt_no_poll_steal),
77 VCPU_STAT("halt_wakeup", halt_wakeup),
78 VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
79 VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
80 VCPU_STAT("instruction_lctlg", instruction_lctlg),
81 VCPU_STAT("instruction_lctl", instruction_lctl),
82 VCPU_STAT("instruction_stctl", instruction_stctl),
83 VCPU_STAT("instruction_stctg", instruction_stctg),
84 VCPU_STAT("deliver_ckc", deliver_ckc),
85 VCPU_STAT("deliver_cputm", deliver_cputm),
86 VCPU_STAT("deliver_emergency_signal", deliver_emergency_signal),
87 VCPU_STAT("deliver_external_call", deliver_external_call),
88 VCPU_STAT("deliver_service_signal", deliver_service_signal),
89 VCPU_STAT("deliver_virtio", deliver_virtio),
90 VCPU_STAT("deliver_stop_signal", deliver_stop_signal),
91 VCPU_STAT("deliver_prefix_signal", deliver_prefix_signal),
92 VCPU_STAT("deliver_restart_signal", deliver_restart_signal),
93 VCPU_STAT("deliver_program", deliver_program),
94 VCPU_STAT("deliver_io", deliver_io),
95 VCPU_STAT("deliver_machine_check", deliver_machine_check),
96 VCPU_STAT("exit_wait_state", exit_wait_state),
97 VCPU_STAT("inject_ckc", inject_ckc),
98 VCPU_STAT("inject_cputm", inject_cputm),
99 VCPU_STAT("inject_external_call", inject_external_call),
100 VM_STAT("inject_float_mchk", inject_float_mchk),
101 VCPU_STAT("inject_emergency_signal", inject_emergency_signal),
102 VM_STAT("inject_io", inject_io),
103 VCPU_STAT("inject_mchk", inject_mchk),
104 VM_STAT("inject_pfault_done", inject_pfault_done),
105 VCPU_STAT("inject_program", inject_program),
106 VCPU_STAT("inject_restart", inject_restart),
107 VM_STAT("inject_service_signal", inject_service_signal),
108 VCPU_STAT("inject_set_prefix", inject_set_prefix),
109 VCPU_STAT("inject_stop_signal", inject_stop_signal),
110 VCPU_STAT("inject_pfault_init", inject_pfault_init),
111 VM_STAT("inject_virtio", inject_virtio),
112 VCPU_STAT("instruction_epsw", instruction_epsw),
113 VCPU_STAT("instruction_gs", instruction_gs),
114 VCPU_STAT("instruction_io_other", instruction_io_other),
115 VCPU_STAT("instruction_lpsw", instruction_lpsw),
116 VCPU_STAT("instruction_lpswe", instruction_lpswe),
117 VCPU_STAT("instruction_pfmf", instruction_pfmf),
118 VCPU_STAT("instruction_ptff", instruction_ptff),
119 VCPU_STAT("instruction_stidp", instruction_stidp),
120 VCPU_STAT("instruction_sck", instruction_sck),
121 VCPU_STAT("instruction_sckpf", instruction_sckpf),
122 VCPU_STAT("instruction_spx", instruction_spx),
123 VCPU_STAT("instruction_stpx", instruction_stpx),
124 VCPU_STAT("instruction_stap", instruction_stap),
125 VCPU_STAT("instruction_iske", instruction_iske),
126 VCPU_STAT("instruction_ri", instruction_ri),
127 VCPU_STAT("instruction_rrbe", instruction_rrbe),
128 VCPU_STAT("instruction_sske", instruction_sske),
129 VCPU_STAT("instruction_ipte_interlock", instruction_ipte_interlock),
130 VCPU_STAT("instruction_essa", instruction_essa),
131 VCPU_STAT("instruction_stsi", instruction_stsi),
132 VCPU_STAT("instruction_stfl", instruction_stfl),
133 VCPU_STAT("instruction_tb", instruction_tb),
134 VCPU_STAT("instruction_tpi", instruction_tpi),
135 VCPU_STAT("instruction_tprot", instruction_tprot),
136 VCPU_STAT("instruction_tsch", instruction_tsch),
137 VCPU_STAT("instruction_sthyi", instruction_sthyi),
138 VCPU_STAT("instruction_sie", instruction_sie),
139 VCPU_STAT("instruction_sigp_sense", instruction_sigp_sense),
140 VCPU_STAT("instruction_sigp_sense_running", instruction_sigp_sense_running),
141 VCPU_STAT("instruction_sigp_external_call", instruction_sigp_external_call),
142 VCPU_STAT("instruction_sigp_emergency", instruction_sigp_emergency),
143 VCPU_STAT("instruction_sigp_cond_emergency", instruction_sigp_cond_emergency),
144 VCPU_STAT("instruction_sigp_start", instruction_sigp_start),
145 VCPU_STAT("instruction_sigp_stop", instruction_sigp_stop),
146 VCPU_STAT("instruction_sigp_stop_store_status", instruction_sigp_stop_store_status),
147 VCPU_STAT("instruction_sigp_store_status", instruction_sigp_store_status),
148 VCPU_STAT("instruction_sigp_store_adtl_status", instruction_sigp_store_adtl_status),
149 VCPU_STAT("instruction_sigp_set_arch", instruction_sigp_arch),
150 VCPU_STAT("instruction_sigp_set_prefix", instruction_sigp_prefix),
151 VCPU_STAT("instruction_sigp_restart", instruction_sigp_restart),
152 VCPU_STAT("instruction_sigp_cpu_reset", instruction_sigp_cpu_reset),
153 VCPU_STAT("instruction_sigp_init_cpu_reset", instruction_sigp_init_cpu_reset),
154 VCPU_STAT("instruction_sigp_unknown", instruction_sigp_unknown),
155 VCPU_STAT("instruction_diag_10", diagnose_10),
156 VCPU_STAT("instruction_diag_44", diagnose_44),
157 VCPU_STAT("instruction_diag_9c", diagnose_9c),
158 VCPU_STAT("diag_9c_ignored", diagnose_9c_ignored),
159 VCPU_STAT("instruction_diag_258", diagnose_258),
160 VCPU_STAT("instruction_diag_308", diagnose_308),
161 VCPU_STAT("instruction_diag_500", diagnose_500),
162 VCPU_STAT("instruction_diag_other", diagnose_other),
163 { NULL }
164 };
165
166 struct kvm_s390_tod_clock_ext {
167 __u8 epoch_idx;
168 __u64 tod;
169 __u8 reserved[7];
170 } __packed;
171
172 /* allow nested virtualization in KVM (if enabled by user space) */
173 static int nested;
174 module_param(nested, int, S_IRUGO);
175 MODULE_PARM_DESC(nested, "Nested virtualization support");
176
177 /* allow 1m huge page guest backing, if !nested */
178 static int hpage;
179 module_param(hpage, int, 0444);
180 MODULE_PARM_DESC(hpage, "1m huge page backing support");
181
182 /* maximum percentage of steal time for polling. >100 is treated like 100 */
183 static u8 halt_poll_max_steal = 10;
184 module_param(halt_poll_max_steal, byte, 0644);
185 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
186
187 /* if set to true, the GISA will be initialized and used if available */
188 static bool use_gisa = true;
189 module_param(use_gisa, bool, 0644);
190 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
191
192 /*
193 * For now we handle at most 16 double words as this is what the s390 base
194 * kernel handles and stores in the prefix page. If we ever need to go beyond
195 * this, this requires changes to code, but the external uapi can stay.
196 */
197 #define SIZE_INTERNAL 16
198
199 /*
200 * Base feature mask that defines default mask for facilities. Consists of the
201 * defines in FACILITIES_KVM and the non-hypervisor managed bits.
202 */
203 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
204 /*
205 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
206 * and defines the facilities that can be enabled via a cpu model.
207 */
208 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
209
kvm_s390_fac_size(void)210 static unsigned long kvm_s390_fac_size(void)
211 {
212 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
213 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
214 BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
215 sizeof(S390_lowcore.stfle_fac_list));
216
217 return SIZE_INTERNAL;
218 }
219
220 /* available cpu features supported by kvm */
221 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
222 /* available subfunctions indicated via query / "test bit" */
223 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
224
225 static struct gmap_notifier gmap_notifier;
226 static struct gmap_notifier vsie_gmap_notifier;
227 debug_info_t *kvm_s390_dbf;
228 debug_info_t *kvm_s390_dbf_uv;
229
230 /* Section: not file related */
kvm_arch_hardware_enable(void)231 int kvm_arch_hardware_enable(void)
232 {
233 /* every s390 is virtualization enabled ;-) */
234 return 0;
235 }
236
kvm_arch_check_processor_compat(void * opaque)237 int kvm_arch_check_processor_compat(void *opaque)
238 {
239 return 0;
240 }
241
242 /* forward declarations */
243 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
244 unsigned long end);
245 static int sca_switch_to_extended(struct kvm *kvm);
246
kvm_clock_sync_scb(struct kvm_s390_sie_block * scb,u64 delta)247 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
248 {
249 u8 delta_idx = 0;
250
251 /*
252 * The TOD jumps by delta, we have to compensate this by adding
253 * -delta to the epoch.
254 */
255 delta = -delta;
256
257 /* sign-extension - we're adding to signed values below */
258 if ((s64)delta < 0)
259 delta_idx = -1;
260
261 scb->epoch += delta;
262 if (scb->ecd & ECD_MEF) {
263 scb->epdx += delta_idx;
264 if (scb->epoch < delta)
265 scb->epdx += 1;
266 }
267 }
268
269 /*
270 * This callback is executed during stop_machine(). All CPUs are therefore
271 * temporarily stopped. In order not to change guest behavior, we have to
272 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
273 * so a CPU won't be stopped while calculating with the epoch.
274 */
kvm_clock_sync(struct notifier_block * notifier,unsigned long val,void * v)275 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
276 void *v)
277 {
278 struct kvm *kvm;
279 struct kvm_vcpu *vcpu;
280 int i;
281 unsigned long long *delta = v;
282
283 list_for_each_entry(kvm, &vm_list, vm_list) {
284 kvm_for_each_vcpu(i, vcpu, kvm) {
285 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
286 if (i == 0) {
287 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
288 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
289 }
290 if (vcpu->arch.cputm_enabled)
291 vcpu->arch.cputm_start += *delta;
292 if (vcpu->arch.vsie_block)
293 kvm_clock_sync_scb(vcpu->arch.vsie_block,
294 *delta);
295 }
296 }
297 return NOTIFY_OK;
298 }
299
300 static struct notifier_block kvm_clock_notifier = {
301 .notifier_call = kvm_clock_sync,
302 };
303
kvm_arch_hardware_setup(void * opaque)304 int kvm_arch_hardware_setup(void *opaque)
305 {
306 gmap_notifier.notifier_call = kvm_gmap_notifier;
307 gmap_register_pte_notifier(&gmap_notifier);
308 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
309 gmap_register_pte_notifier(&vsie_gmap_notifier);
310 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
311 &kvm_clock_notifier);
312 return 0;
313 }
314
kvm_arch_hardware_unsetup(void)315 void kvm_arch_hardware_unsetup(void)
316 {
317 gmap_unregister_pte_notifier(&gmap_notifier);
318 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
319 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
320 &kvm_clock_notifier);
321 }
322
allow_cpu_feat(unsigned long nr)323 static void allow_cpu_feat(unsigned long nr)
324 {
325 set_bit_inv(nr, kvm_s390_available_cpu_feat);
326 }
327
plo_test_bit(unsigned char nr)328 static inline int plo_test_bit(unsigned char nr)
329 {
330 unsigned long function = (unsigned long)nr | 0x100;
331 int cc;
332
333 asm volatile(
334 " lgr 0,%[function]\n"
335 /* Parameter registers are ignored for "test bit" */
336 " plo 0,0,0,0(0)\n"
337 " ipm %0\n"
338 " srl %0,28\n"
339 : "=d" (cc)
340 : [function] "d" (function)
341 : "cc", "0");
342 return cc == 0;
343 }
344
__insn32_query(unsigned int opcode,u8 * query)345 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
346 {
347 asm volatile(
348 " lghi 0,0\n"
349 " lgr 1,%[query]\n"
350 /* Parameter registers are ignored */
351 " .insn rrf,%[opc] << 16,2,4,6,0\n"
352 :
353 : [query] "d" ((unsigned long)query), [opc] "i" (opcode)
354 : "cc", "memory", "0", "1");
355 }
356
357 #define INSN_SORTL 0xb938
358 #define INSN_DFLTCC 0xb939
359
kvm_s390_cpu_feat_init(void)360 static void kvm_s390_cpu_feat_init(void)
361 {
362 int i;
363
364 for (i = 0; i < 256; ++i) {
365 if (plo_test_bit(i))
366 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
367 }
368
369 if (test_facility(28)) /* TOD-clock steering */
370 ptff(kvm_s390_available_subfunc.ptff,
371 sizeof(kvm_s390_available_subfunc.ptff),
372 PTFF_QAF);
373
374 if (test_facility(17)) { /* MSA */
375 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
376 kvm_s390_available_subfunc.kmac);
377 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
378 kvm_s390_available_subfunc.kmc);
379 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
380 kvm_s390_available_subfunc.km);
381 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
382 kvm_s390_available_subfunc.kimd);
383 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
384 kvm_s390_available_subfunc.klmd);
385 }
386 if (test_facility(76)) /* MSA3 */
387 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
388 kvm_s390_available_subfunc.pckmo);
389 if (test_facility(77)) { /* MSA4 */
390 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
391 kvm_s390_available_subfunc.kmctr);
392 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
393 kvm_s390_available_subfunc.kmf);
394 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
395 kvm_s390_available_subfunc.kmo);
396 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
397 kvm_s390_available_subfunc.pcc);
398 }
399 if (test_facility(57)) /* MSA5 */
400 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
401 kvm_s390_available_subfunc.ppno);
402
403 if (test_facility(146)) /* MSA8 */
404 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
405 kvm_s390_available_subfunc.kma);
406
407 if (test_facility(155)) /* MSA9 */
408 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
409 kvm_s390_available_subfunc.kdsa);
410
411 if (test_facility(150)) /* SORTL */
412 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
413
414 if (test_facility(151)) /* DFLTCC */
415 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
416
417 if (MACHINE_HAS_ESOP)
418 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
419 /*
420 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
421 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
422 */
423 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
424 !test_facility(3) || !nested)
425 return;
426 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
427 if (sclp.has_64bscao)
428 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
429 if (sclp.has_siif)
430 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
431 if (sclp.has_gpere)
432 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
433 if (sclp.has_gsls)
434 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
435 if (sclp.has_ib)
436 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
437 if (sclp.has_cei)
438 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
439 if (sclp.has_ibs)
440 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
441 if (sclp.has_kss)
442 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
443 /*
444 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
445 * all skey handling functions read/set the skey from the PGSTE
446 * instead of the real storage key.
447 *
448 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
449 * pages being detected as preserved although they are resident.
450 *
451 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
452 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
453 *
454 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
455 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
456 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
457 *
458 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
459 * cannot easily shadow the SCA because of the ipte lock.
460 */
461 }
462
kvm_arch_init(void * opaque)463 int kvm_arch_init(void *opaque)
464 {
465 int rc = -ENOMEM;
466
467 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
468 if (!kvm_s390_dbf)
469 return -ENOMEM;
470
471 kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
472 if (!kvm_s390_dbf_uv)
473 goto out;
474
475 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
476 debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
477 goto out;
478
479 kvm_s390_cpu_feat_init();
480
481 /* Register floating interrupt controller interface. */
482 rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
483 if (rc) {
484 pr_err("A FLIC registration call failed with rc=%d\n", rc);
485 goto out;
486 }
487
488 rc = kvm_s390_gib_init(GAL_ISC);
489 if (rc)
490 goto out;
491
492 return 0;
493
494 out:
495 kvm_arch_exit();
496 return rc;
497 }
498
kvm_arch_exit(void)499 void kvm_arch_exit(void)
500 {
501 kvm_s390_gib_destroy();
502 debug_unregister(kvm_s390_dbf);
503 debug_unregister(kvm_s390_dbf_uv);
504 }
505
506 /* Section: device related */
kvm_arch_dev_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)507 long kvm_arch_dev_ioctl(struct file *filp,
508 unsigned int ioctl, unsigned long arg)
509 {
510 if (ioctl == KVM_S390_ENABLE_SIE)
511 return s390_enable_sie();
512 return -EINVAL;
513 }
514
kvm_vm_ioctl_check_extension(struct kvm * kvm,long ext)515 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
516 {
517 int r;
518
519 switch (ext) {
520 case KVM_CAP_S390_PSW:
521 case KVM_CAP_S390_GMAP:
522 case KVM_CAP_SYNC_MMU:
523 #ifdef CONFIG_KVM_S390_UCONTROL
524 case KVM_CAP_S390_UCONTROL:
525 #endif
526 case KVM_CAP_ASYNC_PF:
527 case KVM_CAP_SYNC_REGS:
528 case KVM_CAP_ONE_REG:
529 case KVM_CAP_ENABLE_CAP:
530 case KVM_CAP_S390_CSS_SUPPORT:
531 case KVM_CAP_IOEVENTFD:
532 case KVM_CAP_DEVICE_CTRL:
533 case KVM_CAP_S390_IRQCHIP:
534 case KVM_CAP_VM_ATTRIBUTES:
535 case KVM_CAP_MP_STATE:
536 case KVM_CAP_IMMEDIATE_EXIT:
537 case KVM_CAP_S390_INJECT_IRQ:
538 case KVM_CAP_S390_USER_SIGP:
539 case KVM_CAP_S390_USER_STSI:
540 case KVM_CAP_S390_SKEYS:
541 case KVM_CAP_S390_IRQ_STATE:
542 case KVM_CAP_S390_USER_INSTR0:
543 case KVM_CAP_S390_CMMA_MIGRATION:
544 case KVM_CAP_S390_AIS:
545 case KVM_CAP_S390_AIS_MIGRATION:
546 case KVM_CAP_S390_VCPU_RESETS:
547 case KVM_CAP_SET_GUEST_DEBUG:
548 case KVM_CAP_S390_DIAG318:
549 r = 1;
550 break;
551 case KVM_CAP_S390_HPAGE_1M:
552 r = 0;
553 if (hpage && !kvm_is_ucontrol(kvm))
554 r = 1;
555 break;
556 case KVM_CAP_S390_MEM_OP:
557 r = MEM_OP_MAX_SIZE;
558 break;
559 case KVM_CAP_NR_VCPUS:
560 case KVM_CAP_MAX_VCPUS:
561 case KVM_CAP_MAX_VCPU_ID:
562 r = KVM_S390_BSCA_CPU_SLOTS;
563 if (!kvm_s390_use_sca_entries())
564 r = KVM_MAX_VCPUS;
565 else if (sclp.has_esca && sclp.has_64bscao)
566 r = KVM_S390_ESCA_CPU_SLOTS;
567 break;
568 case KVM_CAP_S390_COW:
569 r = MACHINE_HAS_ESOP;
570 break;
571 case KVM_CAP_S390_VECTOR_REGISTERS:
572 r = MACHINE_HAS_VX;
573 break;
574 case KVM_CAP_S390_RI:
575 r = test_facility(64);
576 break;
577 case KVM_CAP_S390_GS:
578 r = test_facility(133);
579 break;
580 case KVM_CAP_S390_BPB:
581 r = test_facility(82);
582 break;
583 case KVM_CAP_S390_PROTECTED:
584 r = is_prot_virt_host();
585 break;
586 default:
587 r = 0;
588 }
589 return r;
590 }
591
kvm_arch_sync_dirty_log(struct kvm * kvm,struct kvm_memory_slot * memslot)592 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
593 {
594 int i;
595 gfn_t cur_gfn, last_gfn;
596 unsigned long gaddr, vmaddr;
597 struct gmap *gmap = kvm->arch.gmap;
598 DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
599
600 /* Loop over all guest segments */
601 cur_gfn = memslot->base_gfn;
602 last_gfn = memslot->base_gfn + memslot->npages;
603 for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
604 gaddr = gfn_to_gpa(cur_gfn);
605 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
606 if (kvm_is_error_hva(vmaddr))
607 continue;
608
609 bitmap_zero(bitmap, _PAGE_ENTRIES);
610 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
611 for (i = 0; i < _PAGE_ENTRIES; i++) {
612 if (test_bit(i, bitmap))
613 mark_page_dirty(kvm, cur_gfn + i);
614 }
615
616 if (fatal_signal_pending(current))
617 return;
618 cond_resched();
619 }
620 }
621
622 /* Section: vm related */
623 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
624
625 /*
626 * Get (and clear) the dirty memory log for a memory slot.
627 */
kvm_vm_ioctl_get_dirty_log(struct kvm * kvm,struct kvm_dirty_log * log)628 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
629 struct kvm_dirty_log *log)
630 {
631 int r;
632 unsigned long n;
633 struct kvm_memory_slot *memslot;
634 int is_dirty;
635
636 if (kvm_is_ucontrol(kvm))
637 return -EINVAL;
638
639 mutex_lock(&kvm->slots_lock);
640
641 r = -EINVAL;
642 if (log->slot >= KVM_USER_MEM_SLOTS)
643 goto out;
644
645 r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
646 if (r)
647 goto out;
648
649 /* Clear the dirty log */
650 if (is_dirty) {
651 n = kvm_dirty_bitmap_bytes(memslot);
652 memset(memslot->dirty_bitmap, 0, n);
653 }
654 r = 0;
655 out:
656 mutex_unlock(&kvm->slots_lock);
657 return r;
658 }
659
icpt_operexc_on_all_vcpus(struct kvm * kvm)660 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
661 {
662 unsigned int i;
663 struct kvm_vcpu *vcpu;
664
665 kvm_for_each_vcpu(i, vcpu, kvm) {
666 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
667 }
668 }
669
kvm_vm_ioctl_enable_cap(struct kvm * kvm,struct kvm_enable_cap * cap)670 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
671 {
672 int r;
673
674 if (cap->flags)
675 return -EINVAL;
676
677 switch (cap->cap) {
678 case KVM_CAP_S390_IRQCHIP:
679 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
680 kvm->arch.use_irqchip = 1;
681 r = 0;
682 break;
683 case KVM_CAP_S390_USER_SIGP:
684 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
685 kvm->arch.user_sigp = 1;
686 r = 0;
687 break;
688 case KVM_CAP_S390_VECTOR_REGISTERS:
689 mutex_lock(&kvm->lock);
690 if (kvm->created_vcpus) {
691 r = -EBUSY;
692 } else if (MACHINE_HAS_VX) {
693 set_kvm_facility(kvm->arch.model.fac_mask, 129);
694 set_kvm_facility(kvm->arch.model.fac_list, 129);
695 if (test_facility(134)) {
696 set_kvm_facility(kvm->arch.model.fac_mask, 134);
697 set_kvm_facility(kvm->arch.model.fac_list, 134);
698 }
699 if (test_facility(135)) {
700 set_kvm_facility(kvm->arch.model.fac_mask, 135);
701 set_kvm_facility(kvm->arch.model.fac_list, 135);
702 }
703 if (test_facility(148)) {
704 set_kvm_facility(kvm->arch.model.fac_mask, 148);
705 set_kvm_facility(kvm->arch.model.fac_list, 148);
706 }
707 if (test_facility(152)) {
708 set_kvm_facility(kvm->arch.model.fac_mask, 152);
709 set_kvm_facility(kvm->arch.model.fac_list, 152);
710 }
711 r = 0;
712 } else
713 r = -EINVAL;
714 mutex_unlock(&kvm->lock);
715 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
716 r ? "(not available)" : "(success)");
717 break;
718 case KVM_CAP_S390_RI:
719 r = -EINVAL;
720 mutex_lock(&kvm->lock);
721 if (kvm->created_vcpus) {
722 r = -EBUSY;
723 } else if (test_facility(64)) {
724 set_kvm_facility(kvm->arch.model.fac_mask, 64);
725 set_kvm_facility(kvm->arch.model.fac_list, 64);
726 r = 0;
727 }
728 mutex_unlock(&kvm->lock);
729 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
730 r ? "(not available)" : "(success)");
731 break;
732 case KVM_CAP_S390_AIS:
733 mutex_lock(&kvm->lock);
734 if (kvm->created_vcpus) {
735 r = -EBUSY;
736 } else {
737 set_kvm_facility(kvm->arch.model.fac_mask, 72);
738 set_kvm_facility(kvm->arch.model.fac_list, 72);
739 r = 0;
740 }
741 mutex_unlock(&kvm->lock);
742 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
743 r ? "(not available)" : "(success)");
744 break;
745 case KVM_CAP_S390_GS:
746 r = -EINVAL;
747 mutex_lock(&kvm->lock);
748 if (kvm->created_vcpus) {
749 r = -EBUSY;
750 } else if (test_facility(133)) {
751 set_kvm_facility(kvm->arch.model.fac_mask, 133);
752 set_kvm_facility(kvm->arch.model.fac_list, 133);
753 r = 0;
754 }
755 mutex_unlock(&kvm->lock);
756 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
757 r ? "(not available)" : "(success)");
758 break;
759 case KVM_CAP_S390_HPAGE_1M:
760 mutex_lock(&kvm->lock);
761 if (kvm->created_vcpus)
762 r = -EBUSY;
763 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
764 r = -EINVAL;
765 else {
766 r = 0;
767 mmap_write_lock(kvm->mm);
768 kvm->mm->context.allow_gmap_hpage_1m = 1;
769 mmap_write_unlock(kvm->mm);
770 /*
771 * We might have to create fake 4k page
772 * tables. To avoid that the hardware works on
773 * stale PGSTEs, we emulate these instructions.
774 */
775 kvm->arch.use_skf = 0;
776 kvm->arch.use_pfmfi = 0;
777 }
778 mutex_unlock(&kvm->lock);
779 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
780 r ? "(not available)" : "(success)");
781 break;
782 case KVM_CAP_S390_USER_STSI:
783 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
784 kvm->arch.user_stsi = 1;
785 r = 0;
786 break;
787 case KVM_CAP_S390_USER_INSTR0:
788 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
789 kvm->arch.user_instr0 = 1;
790 icpt_operexc_on_all_vcpus(kvm);
791 r = 0;
792 break;
793 default:
794 r = -EINVAL;
795 break;
796 }
797 return r;
798 }
799
kvm_s390_get_mem_control(struct kvm * kvm,struct kvm_device_attr * attr)800 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
801 {
802 int ret;
803
804 switch (attr->attr) {
805 case KVM_S390_VM_MEM_LIMIT_SIZE:
806 ret = 0;
807 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
808 kvm->arch.mem_limit);
809 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
810 ret = -EFAULT;
811 break;
812 default:
813 ret = -ENXIO;
814 break;
815 }
816 return ret;
817 }
818
kvm_s390_set_mem_control(struct kvm * kvm,struct kvm_device_attr * attr)819 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
820 {
821 int ret;
822 unsigned int idx;
823 switch (attr->attr) {
824 case KVM_S390_VM_MEM_ENABLE_CMMA:
825 ret = -ENXIO;
826 if (!sclp.has_cmma)
827 break;
828
829 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
830 mutex_lock(&kvm->lock);
831 if (kvm->created_vcpus)
832 ret = -EBUSY;
833 else if (kvm->mm->context.allow_gmap_hpage_1m)
834 ret = -EINVAL;
835 else {
836 kvm->arch.use_cmma = 1;
837 /* Not compatible with cmma. */
838 kvm->arch.use_pfmfi = 0;
839 ret = 0;
840 }
841 mutex_unlock(&kvm->lock);
842 break;
843 case KVM_S390_VM_MEM_CLR_CMMA:
844 ret = -ENXIO;
845 if (!sclp.has_cmma)
846 break;
847 ret = -EINVAL;
848 if (!kvm->arch.use_cmma)
849 break;
850
851 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
852 mutex_lock(&kvm->lock);
853 idx = srcu_read_lock(&kvm->srcu);
854 s390_reset_cmma(kvm->arch.gmap->mm);
855 srcu_read_unlock(&kvm->srcu, idx);
856 mutex_unlock(&kvm->lock);
857 ret = 0;
858 break;
859 case KVM_S390_VM_MEM_LIMIT_SIZE: {
860 unsigned long new_limit;
861
862 if (kvm_is_ucontrol(kvm))
863 return -EINVAL;
864
865 if (get_user(new_limit, (u64 __user *)attr->addr))
866 return -EFAULT;
867
868 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
869 new_limit > kvm->arch.mem_limit)
870 return -E2BIG;
871
872 if (!new_limit)
873 return -EINVAL;
874
875 /* gmap_create takes last usable address */
876 if (new_limit != KVM_S390_NO_MEM_LIMIT)
877 new_limit -= 1;
878
879 ret = -EBUSY;
880 mutex_lock(&kvm->lock);
881 if (!kvm->created_vcpus) {
882 /* gmap_create will round the limit up */
883 struct gmap *new = gmap_create(current->mm, new_limit);
884
885 if (!new) {
886 ret = -ENOMEM;
887 } else {
888 gmap_remove(kvm->arch.gmap);
889 new->private = kvm;
890 kvm->arch.gmap = new;
891 ret = 0;
892 }
893 }
894 mutex_unlock(&kvm->lock);
895 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
896 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
897 (void *) kvm->arch.gmap->asce);
898 break;
899 }
900 default:
901 ret = -ENXIO;
902 break;
903 }
904 return ret;
905 }
906
907 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
908
kvm_s390_vcpu_crypto_reset_all(struct kvm * kvm)909 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
910 {
911 struct kvm_vcpu *vcpu;
912 int i;
913
914 kvm_s390_vcpu_block_all(kvm);
915
916 kvm_for_each_vcpu(i, vcpu, kvm) {
917 kvm_s390_vcpu_crypto_setup(vcpu);
918 /* recreate the shadow crycb by leaving the VSIE handler */
919 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
920 }
921
922 kvm_s390_vcpu_unblock_all(kvm);
923 }
924
kvm_s390_vm_set_crypto(struct kvm * kvm,struct kvm_device_attr * attr)925 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
926 {
927 mutex_lock(&kvm->lock);
928 switch (attr->attr) {
929 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
930 if (!test_kvm_facility(kvm, 76)) {
931 mutex_unlock(&kvm->lock);
932 return -EINVAL;
933 }
934 get_random_bytes(
935 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
936 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
937 kvm->arch.crypto.aes_kw = 1;
938 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
939 break;
940 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
941 if (!test_kvm_facility(kvm, 76)) {
942 mutex_unlock(&kvm->lock);
943 return -EINVAL;
944 }
945 get_random_bytes(
946 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
947 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
948 kvm->arch.crypto.dea_kw = 1;
949 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
950 break;
951 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
952 if (!test_kvm_facility(kvm, 76)) {
953 mutex_unlock(&kvm->lock);
954 return -EINVAL;
955 }
956 kvm->arch.crypto.aes_kw = 0;
957 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
958 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
959 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
960 break;
961 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
962 if (!test_kvm_facility(kvm, 76)) {
963 mutex_unlock(&kvm->lock);
964 return -EINVAL;
965 }
966 kvm->arch.crypto.dea_kw = 0;
967 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
968 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
969 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
970 break;
971 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
972 if (!ap_instructions_available()) {
973 mutex_unlock(&kvm->lock);
974 return -EOPNOTSUPP;
975 }
976 kvm->arch.crypto.apie = 1;
977 break;
978 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
979 if (!ap_instructions_available()) {
980 mutex_unlock(&kvm->lock);
981 return -EOPNOTSUPP;
982 }
983 kvm->arch.crypto.apie = 0;
984 break;
985 default:
986 mutex_unlock(&kvm->lock);
987 return -ENXIO;
988 }
989
990 kvm_s390_vcpu_crypto_reset_all(kvm);
991 mutex_unlock(&kvm->lock);
992 return 0;
993 }
994
kvm_s390_sync_request_broadcast(struct kvm * kvm,int req)995 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
996 {
997 int cx;
998 struct kvm_vcpu *vcpu;
999
1000 kvm_for_each_vcpu(cx, vcpu, kvm)
1001 kvm_s390_sync_request(req, vcpu);
1002 }
1003
1004 /*
1005 * Must be called with kvm->srcu held to avoid races on memslots, and with
1006 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1007 */
kvm_s390_vm_start_migration(struct kvm * kvm)1008 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1009 {
1010 struct kvm_memory_slot *ms;
1011 struct kvm_memslots *slots;
1012 unsigned long ram_pages = 0;
1013 int slotnr;
1014
1015 /* migration mode already enabled */
1016 if (kvm->arch.migration_mode)
1017 return 0;
1018 slots = kvm_memslots(kvm);
1019 if (!slots || !slots->used_slots)
1020 return -EINVAL;
1021
1022 if (!kvm->arch.use_cmma) {
1023 kvm->arch.migration_mode = 1;
1024 return 0;
1025 }
1026 /* mark all the pages in active slots as dirty */
1027 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1028 ms = slots->memslots + slotnr;
1029 if (!ms->dirty_bitmap)
1030 return -EINVAL;
1031 /*
1032 * The second half of the bitmap is only used on x86,
1033 * and would be wasted otherwise, so we put it to good
1034 * use here to keep track of the state of the storage
1035 * attributes.
1036 */
1037 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1038 ram_pages += ms->npages;
1039 }
1040 atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1041 kvm->arch.migration_mode = 1;
1042 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1043 return 0;
1044 }
1045
1046 /*
1047 * Must be called with kvm->slots_lock to avoid races with ourselves and
1048 * kvm_s390_vm_start_migration.
1049 */
kvm_s390_vm_stop_migration(struct kvm * kvm)1050 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1051 {
1052 /* migration mode already disabled */
1053 if (!kvm->arch.migration_mode)
1054 return 0;
1055 kvm->arch.migration_mode = 0;
1056 if (kvm->arch.use_cmma)
1057 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1058 return 0;
1059 }
1060
kvm_s390_vm_set_migration(struct kvm * kvm,struct kvm_device_attr * attr)1061 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1062 struct kvm_device_attr *attr)
1063 {
1064 int res = -ENXIO;
1065
1066 mutex_lock(&kvm->slots_lock);
1067 switch (attr->attr) {
1068 case KVM_S390_VM_MIGRATION_START:
1069 res = kvm_s390_vm_start_migration(kvm);
1070 break;
1071 case KVM_S390_VM_MIGRATION_STOP:
1072 res = kvm_s390_vm_stop_migration(kvm);
1073 break;
1074 default:
1075 break;
1076 }
1077 mutex_unlock(&kvm->slots_lock);
1078
1079 return res;
1080 }
1081
kvm_s390_vm_get_migration(struct kvm * kvm,struct kvm_device_attr * attr)1082 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1083 struct kvm_device_attr *attr)
1084 {
1085 u64 mig = kvm->arch.migration_mode;
1086
1087 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1088 return -ENXIO;
1089
1090 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1091 return -EFAULT;
1092 return 0;
1093 }
1094
1095 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod);
1096
kvm_s390_set_tod_ext(struct kvm * kvm,struct kvm_device_attr * attr)1097 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1098 {
1099 struct kvm_s390_vm_tod_clock gtod;
1100
1101 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
1102 return -EFAULT;
1103
1104 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1105 return -EINVAL;
1106 __kvm_s390_set_tod_clock(kvm, >od);
1107
1108 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1109 gtod.epoch_idx, gtod.tod);
1110
1111 return 0;
1112 }
1113
kvm_s390_set_tod_high(struct kvm * kvm,struct kvm_device_attr * attr)1114 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1115 {
1116 u8 gtod_high;
1117
1118 if (copy_from_user(>od_high, (void __user *)attr->addr,
1119 sizeof(gtod_high)))
1120 return -EFAULT;
1121
1122 if (gtod_high != 0)
1123 return -EINVAL;
1124 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1125
1126 return 0;
1127 }
1128
kvm_s390_set_tod_low(struct kvm * kvm,struct kvm_device_attr * attr)1129 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1130 {
1131 struct kvm_s390_vm_tod_clock gtod = { 0 };
1132
1133 if (copy_from_user(>od.tod, (void __user *)attr->addr,
1134 sizeof(gtod.tod)))
1135 return -EFAULT;
1136
1137 __kvm_s390_set_tod_clock(kvm, >od);
1138 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1139 return 0;
1140 }
1141
kvm_s390_set_tod(struct kvm * kvm,struct kvm_device_attr * attr)1142 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1143 {
1144 int ret;
1145
1146 if (attr->flags)
1147 return -EINVAL;
1148
1149 mutex_lock(&kvm->lock);
1150 /*
1151 * For protected guests, the TOD is managed by the ultravisor, so trying
1152 * to change it will never bring the expected results.
1153 */
1154 if (kvm_s390_pv_is_protected(kvm)) {
1155 ret = -EOPNOTSUPP;
1156 goto out_unlock;
1157 }
1158
1159 switch (attr->attr) {
1160 case KVM_S390_VM_TOD_EXT:
1161 ret = kvm_s390_set_tod_ext(kvm, attr);
1162 break;
1163 case KVM_S390_VM_TOD_HIGH:
1164 ret = kvm_s390_set_tod_high(kvm, attr);
1165 break;
1166 case KVM_S390_VM_TOD_LOW:
1167 ret = kvm_s390_set_tod_low(kvm, attr);
1168 break;
1169 default:
1170 ret = -ENXIO;
1171 break;
1172 }
1173
1174 out_unlock:
1175 mutex_unlock(&kvm->lock);
1176 return ret;
1177 }
1178
kvm_s390_get_tod_clock(struct kvm * kvm,struct kvm_s390_vm_tod_clock * gtod)1179 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1180 struct kvm_s390_vm_tod_clock *gtod)
1181 {
1182 struct kvm_s390_tod_clock_ext htod;
1183
1184 preempt_disable();
1185
1186 get_tod_clock_ext((char *)&htod);
1187
1188 gtod->tod = htod.tod + kvm->arch.epoch;
1189 gtod->epoch_idx = 0;
1190 if (test_kvm_facility(kvm, 139)) {
1191 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1192 if (gtod->tod < htod.tod)
1193 gtod->epoch_idx += 1;
1194 }
1195
1196 preempt_enable();
1197 }
1198
kvm_s390_get_tod_ext(struct kvm * kvm,struct kvm_device_attr * attr)1199 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1200 {
1201 struct kvm_s390_vm_tod_clock gtod;
1202
1203 memset(>od, 0, sizeof(gtod));
1204 kvm_s390_get_tod_clock(kvm, >od);
1205 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1206 return -EFAULT;
1207
1208 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1209 gtod.epoch_idx, gtod.tod);
1210 return 0;
1211 }
1212
kvm_s390_get_tod_high(struct kvm * kvm,struct kvm_device_attr * attr)1213 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1214 {
1215 u8 gtod_high = 0;
1216
1217 if (copy_to_user((void __user *)attr->addr, >od_high,
1218 sizeof(gtod_high)))
1219 return -EFAULT;
1220 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1221
1222 return 0;
1223 }
1224
kvm_s390_get_tod_low(struct kvm * kvm,struct kvm_device_attr * attr)1225 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1226 {
1227 u64 gtod;
1228
1229 gtod = kvm_s390_get_tod_clock_fast(kvm);
1230 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1231 return -EFAULT;
1232 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1233
1234 return 0;
1235 }
1236
kvm_s390_get_tod(struct kvm * kvm,struct kvm_device_attr * attr)1237 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1238 {
1239 int ret;
1240
1241 if (attr->flags)
1242 return -EINVAL;
1243
1244 switch (attr->attr) {
1245 case KVM_S390_VM_TOD_EXT:
1246 ret = kvm_s390_get_tod_ext(kvm, attr);
1247 break;
1248 case KVM_S390_VM_TOD_HIGH:
1249 ret = kvm_s390_get_tod_high(kvm, attr);
1250 break;
1251 case KVM_S390_VM_TOD_LOW:
1252 ret = kvm_s390_get_tod_low(kvm, attr);
1253 break;
1254 default:
1255 ret = -ENXIO;
1256 break;
1257 }
1258 return ret;
1259 }
1260
kvm_s390_set_processor(struct kvm * kvm,struct kvm_device_attr * attr)1261 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1262 {
1263 struct kvm_s390_vm_cpu_processor *proc;
1264 u16 lowest_ibc, unblocked_ibc;
1265 int ret = 0;
1266
1267 mutex_lock(&kvm->lock);
1268 if (kvm->created_vcpus) {
1269 ret = -EBUSY;
1270 goto out;
1271 }
1272 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1273 if (!proc) {
1274 ret = -ENOMEM;
1275 goto out;
1276 }
1277 if (!copy_from_user(proc, (void __user *)attr->addr,
1278 sizeof(*proc))) {
1279 kvm->arch.model.cpuid = proc->cpuid;
1280 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1281 unblocked_ibc = sclp.ibc & 0xfff;
1282 if (lowest_ibc && proc->ibc) {
1283 if (proc->ibc > unblocked_ibc)
1284 kvm->arch.model.ibc = unblocked_ibc;
1285 else if (proc->ibc < lowest_ibc)
1286 kvm->arch.model.ibc = lowest_ibc;
1287 else
1288 kvm->arch.model.ibc = proc->ibc;
1289 }
1290 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1291 S390_ARCH_FAC_LIST_SIZE_BYTE);
1292 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1293 kvm->arch.model.ibc,
1294 kvm->arch.model.cpuid);
1295 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1296 kvm->arch.model.fac_list[0],
1297 kvm->arch.model.fac_list[1],
1298 kvm->arch.model.fac_list[2]);
1299 } else
1300 ret = -EFAULT;
1301 kfree(proc);
1302 out:
1303 mutex_unlock(&kvm->lock);
1304 return ret;
1305 }
1306
kvm_s390_set_processor_feat(struct kvm * kvm,struct kvm_device_attr * attr)1307 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1308 struct kvm_device_attr *attr)
1309 {
1310 struct kvm_s390_vm_cpu_feat data;
1311
1312 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1313 return -EFAULT;
1314 if (!bitmap_subset((unsigned long *) data.feat,
1315 kvm_s390_available_cpu_feat,
1316 KVM_S390_VM_CPU_FEAT_NR_BITS))
1317 return -EINVAL;
1318
1319 mutex_lock(&kvm->lock);
1320 if (kvm->created_vcpus) {
1321 mutex_unlock(&kvm->lock);
1322 return -EBUSY;
1323 }
1324 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1325 KVM_S390_VM_CPU_FEAT_NR_BITS);
1326 mutex_unlock(&kvm->lock);
1327 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1328 data.feat[0],
1329 data.feat[1],
1330 data.feat[2]);
1331 return 0;
1332 }
1333
kvm_s390_set_processor_subfunc(struct kvm * kvm,struct kvm_device_attr * attr)1334 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1335 struct kvm_device_attr *attr)
1336 {
1337 mutex_lock(&kvm->lock);
1338 if (kvm->created_vcpus) {
1339 mutex_unlock(&kvm->lock);
1340 return -EBUSY;
1341 }
1342
1343 if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1344 sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1345 mutex_unlock(&kvm->lock);
1346 return -EFAULT;
1347 }
1348 mutex_unlock(&kvm->lock);
1349
1350 VM_EVENT(kvm, 3, "SET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1351 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1352 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1353 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1354 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1355 VM_EVENT(kvm, 3, "SET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1356 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1357 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1358 VM_EVENT(kvm, 3, "SET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1359 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1360 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1361 VM_EVENT(kvm, 3, "SET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1362 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1363 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1364 VM_EVENT(kvm, 3, "SET: guest KM subfunc 0x%16.16lx.%16.16lx",
1365 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1366 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1367 VM_EVENT(kvm, 3, "SET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1368 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1369 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1370 VM_EVENT(kvm, 3, "SET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1371 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1372 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1373 VM_EVENT(kvm, 3, "SET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1374 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1375 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1376 VM_EVENT(kvm, 3, "SET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1377 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1378 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1379 VM_EVENT(kvm, 3, "SET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1380 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1381 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1382 VM_EVENT(kvm, 3, "SET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1383 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1384 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1385 VM_EVENT(kvm, 3, "SET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1386 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1387 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1388 VM_EVENT(kvm, 3, "SET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1389 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1390 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1391 VM_EVENT(kvm, 3, "SET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1392 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1393 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1394 VM_EVENT(kvm, 3, "SET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
1395 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1396 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1397 VM_EVENT(kvm, 3, "SET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1398 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1399 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1400 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1401 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1402 VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1403 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1404 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1405 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1406 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1407
1408 return 0;
1409 }
1410
kvm_s390_set_cpu_model(struct kvm * kvm,struct kvm_device_attr * attr)1411 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1412 {
1413 int ret = -ENXIO;
1414
1415 switch (attr->attr) {
1416 case KVM_S390_VM_CPU_PROCESSOR:
1417 ret = kvm_s390_set_processor(kvm, attr);
1418 break;
1419 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1420 ret = kvm_s390_set_processor_feat(kvm, attr);
1421 break;
1422 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1423 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1424 break;
1425 }
1426 return ret;
1427 }
1428
kvm_s390_get_processor(struct kvm * kvm,struct kvm_device_attr * attr)1429 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1430 {
1431 struct kvm_s390_vm_cpu_processor *proc;
1432 int ret = 0;
1433
1434 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1435 if (!proc) {
1436 ret = -ENOMEM;
1437 goto out;
1438 }
1439 proc->cpuid = kvm->arch.model.cpuid;
1440 proc->ibc = kvm->arch.model.ibc;
1441 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1442 S390_ARCH_FAC_LIST_SIZE_BYTE);
1443 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1444 kvm->arch.model.ibc,
1445 kvm->arch.model.cpuid);
1446 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1447 kvm->arch.model.fac_list[0],
1448 kvm->arch.model.fac_list[1],
1449 kvm->arch.model.fac_list[2]);
1450 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1451 ret = -EFAULT;
1452 kfree(proc);
1453 out:
1454 return ret;
1455 }
1456
kvm_s390_get_machine(struct kvm * kvm,struct kvm_device_attr * attr)1457 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1458 {
1459 struct kvm_s390_vm_cpu_machine *mach;
1460 int ret = 0;
1461
1462 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1463 if (!mach) {
1464 ret = -ENOMEM;
1465 goto out;
1466 }
1467 get_cpu_id((struct cpuid *) &mach->cpuid);
1468 mach->ibc = sclp.ibc;
1469 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1470 S390_ARCH_FAC_LIST_SIZE_BYTE);
1471 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1472 sizeof(S390_lowcore.stfle_fac_list));
1473 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1474 kvm->arch.model.ibc,
1475 kvm->arch.model.cpuid);
1476 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1477 mach->fac_mask[0],
1478 mach->fac_mask[1],
1479 mach->fac_mask[2]);
1480 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1481 mach->fac_list[0],
1482 mach->fac_list[1],
1483 mach->fac_list[2]);
1484 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1485 ret = -EFAULT;
1486 kfree(mach);
1487 out:
1488 return ret;
1489 }
1490
kvm_s390_get_processor_feat(struct kvm * kvm,struct kvm_device_attr * attr)1491 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1492 struct kvm_device_attr *attr)
1493 {
1494 struct kvm_s390_vm_cpu_feat data;
1495
1496 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1497 KVM_S390_VM_CPU_FEAT_NR_BITS);
1498 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1499 return -EFAULT;
1500 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1501 data.feat[0],
1502 data.feat[1],
1503 data.feat[2]);
1504 return 0;
1505 }
1506
kvm_s390_get_machine_feat(struct kvm * kvm,struct kvm_device_attr * attr)1507 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1508 struct kvm_device_attr *attr)
1509 {
1510 struct kvm_s390_vm_cpu_feat data;
1511
1512 bitmap_copy((unsigned long *) data.feat,
1513 kvm_s390_available_cpu_feat,
1514 KVM_S390_VM_CPU_FEAT_NR_BITS);
1515 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1516 return -EFAULT;
1517 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1518 data.feat[0],
1519 data.feat[1],
1520 data.feat[2]);
1521 return 0;
1522 }
1523
kvm_s390_get_processor_subfunc(struct kvm * kvm,struct kvm_device_attr * attr)1524 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1525 struct kvm_device_attr *attr)
1526 {
1527 if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1528 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1529 return -EFAULT;
1530
1531 VM_EVENT(kvm, 3, "GET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1532 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1533 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1534 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1535 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1536 VM_EVENT(kvm, 3, "GET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1537 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1538 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1539 VM_EVENT(kvm, 3, "GET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1540 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1541 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1542 VM_EVENT(kvm, 3, "GET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1543 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1544 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1545 VM_EVENT(kvm, 3, "GET: guest KM subfunc 0x%16.16lx.%16.16lx",
1546 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1547 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1548 VM_EVENT(kvm, 3, "GET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1549 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1550 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1551 VM_EVENT(kvm, 3, "GET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1552 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1553 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1554 VM_EVENT(kvm, 3, "GET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1555 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1556 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1557 VM_EVENT(kvm, 3, "GET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1558 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1559 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1560 VM_EVENT(kvm, 3, "GET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1561 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1562 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1563 VM_EVENT(kvm, 3, "GET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1564 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1565 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1566 VM_EVENT(kvm, 3, "GET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1567 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1568 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1569 VM_EVENT(kvm, 3, "GET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1570 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1571 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1572 VM_EVENT(kvm, 3, "GET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1573 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1574 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1575 VM_EVENT(kvm, 3, "GET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
1576 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1577 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1578 VM_EVENT(kvm, 3, "GET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1579 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1580 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1581 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1582 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1583 VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1584 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1585 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1586 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1587 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1588
1589 return 0;
1590 }
1591
kvm_s390_get_machine_subfunc(struct kvm * kvm,struct kvm_device_attr * attr)1592 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1593 struct kvm_device_attr *attr)
1594 {
1595 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1596 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1597 return -EFAULT;
1598
1599 VM_EVENT(kvm, 3, "GET: host PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1600 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1601 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1602 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1603 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1604 VM_EVENT(kvm, 3, "GET: host PTFF subfunc 0x%16.16lx.%16.16lx",
1605 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1606 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1607 VM_EVENT(kvm, 3, "GET: host KMAC subfunc 0x%16.16lx.%16.16lx",
1608 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1609 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1610 VM_EVENT(kvm, 3, "GET: host KMC subfunc 0x%16.16lx.%16.16lx",
1611 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1612 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1613 VM_EVENT(kvm, 3, "GET: host KM subfunc 0x%16.16lx.%16.16lx",
1614 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1615 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1616 VM_EVENT(kvm, 3, "GET: host KIMD subfunc 0x%16.16lx.%16.16lx",
1617 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1618 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1619 VM_EVENT(kvm, 3, "GET: host KLMD subfunc 0x%16.16lx.%16.16lx",
1620 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1621 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1622 VM_EVENT(kvm, 3, "GET: host PCKMO subfunc 0x%16.16lx.%16.16lx",
1623 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1624 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1625 VM_EVENT(kvm, 3, "GET: host KMCTR subfunc 0x%16.16lx.%16.16lx",
1626 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1627 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1628 VM_EVENT(kvm, 3, "GET: host KMF subfunc 0x%16.16lx.%16.16lx",
1629 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1630 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1631 VM_EVENT(kvm, 3, "GET: host KMO subfunc 0x%16.16lx.%16.16lx",
1632 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1633 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1634 VM_EVENT(kvm, 3, "GET: host PCC subfunc 0x%16.16lx.%16.16lx",
1635 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1636 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1637 VM_EVENT(kvm, 3, "GET: host PPNO subfunc 0x%16.16lx.%16.16lx",
1638 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1639 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1640 VM_EVENT(kvm, 3, "GET: host KMA subfunc 0x%16.16lx.%16.16lx",
1641 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1642 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1643 VM_EVENT(kvm, 3, "GET: host KDSA subfunc 0x%16.16lx.%16.16lx",
1644 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1645 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1646 VM_EVENT(kvm, 3, "GET: host SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1647 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1648 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1649 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1650 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1651 VM_EVENT(kvm, 3, "GET: host DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1652 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1653 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1654 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1655 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1656
1657 return 0;
1658 }
1659
kvm_s390_get_cpu_model(struct kvm * kvm,struct kvm_device_attr * attr)1660 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1661 {
1662 int ret = -ENXIO;
1663
1664 switch (attr->attr) {
1665 case KVM_S390_VM_CPU_PROCESSOR:
1666 ret = kvm_s390_get_processor(kvm, attr);
1667 break;
1668 case KVM_S390_VM_CPU_MACHINE:
1669 ret = kvm_s390_get_machine(kvm, attr);
1670 break;
1671 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1672 ret = kvm_s390_get_processor_feat(kvm, attr);
1673 break;
1674 case KVM_S390_VM_CPU_MACHINE_FEAT:
1675 ret = kvm_s390_get_machine_feat(kvm, attr);
1676 break;
1677 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1678 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1679 break;
1680 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1681 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1682 break;
1683 }
1684 return ret;
1685 }
1686
kvm_s390_vm_set_attr(struct kvm * kvm,struct kvm_device_attr * attr)1687 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1688 {
1689 int ret;
1690
1691 switch (attr->group) {
1692 case KVM_S390_VM_MEM_CTRL:
1693 ret = kvm_s390_set_mem_control(kvm, attr);
1694 break;
1695 case KVM_S390_VM_TOD:
1696 ret = kvm_s390_set_tod(kvm, attr);
1697 break;
1698 case KVM_S390_VM_CPU_MODEL:
1699 ret = kvm_s390_set_cpu_model(kvm, attr);
1700 break;
1701 case KVM_S390_VM_CRYPTO:
1702 ret = kvm_s390_vm_set_crypto(kvm, attr);
1703 break;
1704 case KVM_S390_VM_MIGRATION:
1705 ret = kvm_s390_vm_set_migration(kvm, attr);
1706 break;
1707 default:
1708 ret = -ENXIO;
1709 break;
1710 }
1711
1712 return ret;
1713 }
1714
kvm_s390_vm_get_attr(struct kvm * kvm,struct kvm_device_attr * attr)1715 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1716 {
1717 int ret;
1718
1719 switch (attr->group) {
1720 case KVM_S390_VM_MEM_CTRL:
1721 ret = kvm_s390_get_mem_control(kvm, attr);
1722 break;
1723 case KVM_S390_VM_TOD:
1724 ret = kvm_s390_get_tod(kvm, attr);
1725 break;
1726 case KVM_S390_VM_CPU_MODEL:
1727 ret = kvm_s390_get_cpu_model(kvm, attr);
1728 break;
1729 case KVM_S390_VM_MIGRATION:
1730 ret = kvm_s390_vm_get_migration(kvm, attr);
1731 break;
1732 default:
1733 ret = -ENXIO;
1734 break;
1735 }
1736
1737 return ret;
1738 }
1739
kvm_s390_vm_has_attr(struct kvm * kvm,struct kvm_device_attr * attr)1740 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1741 {
1742 int ret;
1743
1744 switch (attr->group) {
1745 case KVM_S390_VM_MEM_CTRL:
1746 switch (attr->attr) {
1747 case KVM_S390_VM_MEM_ENABLE_CMMA:
1748 case KVM_S390_VM_MEM_CLR_CMMA:
1749 ret = sclp.has_cmma ? 0 : -ENXIO;
1750 break;
1751 case KVM_S390_VM_MEM_LIMIT_SIZE:
1752 ret = 0;
1753 break;
1754 default:
1755 ret = -ENXIO;
1756 break;
1757 }
1758 break;
1759 case KVM_S390_VM_TOD:
1760 switch (attr->attr) {
1761 case KVM_S390_VM_TOD_LOW:
1762 case KVM_S390_VM_TOD_HIGH:
1763 ret = 0;
1764 break;
1765 default:
1766 ret = -ENXIO;
1767 break;
1768 }
1769 break;
1770 case KVM_S390_VM_CPU_MODEL:
1771 switch (attr->attr) {
1772 case KVM_S390_VM_CPU_PROCESSOR:
1773 case KVM_S390_VM_CPU_MACHINE:
1774 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1775 case KVM_S390_VM_CPU_MACHINE_FEAT:
1776 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1777 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1778 ret = 0;
1779 break;
1780 default:
1781 ret = -ENXIO;
1782 break;
1783 }
1784 break;
1785 case KVM_S390_VM_CRYPTO:
1786 switch (attr->attr) {
1787 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1788 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1789 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1790 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1791 ret = 0;
1792 break;
1793 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1794 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1795 ret = ap_instructions_available() ? 0 : -ENXIO;
1796 break;
1797 default:
1798 ret = -ENXIO;
1799 break;
1800 }
1801 break;
1802 case KVM_S390_VM_MIGRATION:
1803 ret = 0;
1804 break;
1805 default:
1806 ret = -ENXIO;
1807 break;
1808 }
1809
1810 return ret;
1811 }
1812
kvm_s390_get_skeys(struct kvm * kvm,struct kvm_s390_skeys * args)1813 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1814 {
1815 uint8_t *keys;
1816 uint64_t hva;
1817 int srcu_idx, i, r = 0;
1818
1819 if (args->flags != 0)
1820 return -EINVAL;
1821
1822 /* Is this guest using storage keys? */
1823 if (!mm_uses_skeys(current->mm))
1824 return KVM_S390_GET_SKEYS_NONE;
1825
1826 /* Enforce sane limit on memory allocation */
1827 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1828 return -EINVAL;
1829
1830 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1831 if (!keys)
1832 return -ENOMEM;
1833
1834 mmap_read_lock(current->mm);
1835 srcu_idx = srcu_read_lock(&kvm->srcu);
1836 for (i = 0; i < args->count; i++) {
1837 hva = gfn_to_hva(kvm, args->start_gfn + i);
1838 if (kvm_is_error_hva(hva)) {
1839 r = -EFAULT;
1840 break;
1841 }
1842
1843 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1844 if (r)
1845 break;
1846 }
1847 srcu_read_unlock(&kvm->srcu, srcu_idx);
1848 mmap_read_unlock(current->mm);
1849
1850 if (!r) {
1851 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1852 sizeof(uint8_t) * args->count);
1853 if (r)
1854 r = -EFAULT;
1855 }
1856
1857 kvfree(keys);
1858 return r;
1859 }
1860
kvm_s390_set_skeys(struct kvm * kvm,struct kvm_s390_skeys * args)1861 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1862 {
1863 uint8_t *keys;
1864 uint64_t hva;
1865 int srcu_idx, i, r = 0;
1866 bool unlocked;
1867
1868 if (args->flags != 0)
1869 return -EINVAL;
1870
1871 /* Enforce sane limit on memory allocation */
1872 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1873 return -EINVAL;
1874
1875 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1876 if (!keys)
1877 return -ENOMEM;
1878
1879 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1880 sizeof(uint8_t) * args->count);
1881 if (r) {
1882 r = -EFAULT;
1883 goto out;
1884 }
1885
1886 /* Enable storage key handling for the guest */
1887 r = s390_enable_skey();
1888 if (r)
1889 goto out;
1890
1891 i = 0;
1892 mmap_read_lock(current->mm);
1893 srcu_idx = srcu_read_lock(&kvm->srcu);
1894 while (i < args->count) {
1895 unlocked = false;
1896 hva = gfn_to_hva(kvm, args->start_gfn + i);
1897 if (kvm_is_error_hva(hva)) {
1898 r = -EFAULT;
1899 break;
1900 }
1901
1902 /* Lowest order bit is reserved */
1903 if (keys[i] & 0x01) {
1904 r = -EINVAL;
1905 break;
1906 }
1907
1908 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1909 if (r) {
1910 r = fixup_user_fault(current->mm, hva,
1911 FAULT_FLAG_WRITE, &unlocked);
1912 if (r)
1913 break;
1914 }
1915 if (!r)
1916 i++;
1917 }
1918 srcu_read_unlock(&kvm->srcu, srcu_idx);
1919 mmap_read_unlock(current->mm);
1920 out:
1921 kvfree(keys);
1922 return r;
1923 }
1924
1925 /*
1926 * Base address and length must be sent at the start of each block, therefore
1927 * it's cheaper to send some clean data, as long as it's less than the size of
1928 * two longs.
1929 */
1930 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1931 /* for consistency */
1932 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1933
1934 /*
1935 * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1936 * address falls in a hole. In that case the index of one of the memslots
1937 * bordering the hole is returned.
1938 */
gfn_to_memslot_approx(struct kvm_memslots * slots,gfn_t gfn)1939 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1940 {
1941 int start = 0, end = slots->used_slots;
1942 int slot = atomic_read(&slots->lru_slot);
1943 struct kvm_memory_slot *memslots = slots->memslots;
1944
1945 if (gfn >= memslots[slot].base_gfn &&
1946 gfn < memslots[slot].base_gfn + memslots[slot].npages)
1947 return slot;
1948
1949 while (start < end) {
1950 slot = start + (end - start) / 2;
1951
1952 if (gfn >= memslots[slot].base_gfn)
1953 end = slot;
1954 else
1955 start = slot + 1;
1956 }
1957
1958 if (start >= slots->used_slots)
1959 return slots->used_slots - 1;
1960
1961 if (gfn >= memslots[start].base_gfn &&
1962 gfn < memslots[start].base_gfn + memslots[start].npages) {
1963 atomic_set(&slots->lru_slot, start);
1964 }
1965
1966 return start;
1967 }
1968
kvm_s390_peek_cmma(struct kvm * kvm,struct kvm_s390_cmma_log * args,u8 * res,unsigned long bufsize)1969 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1970 u8 *res, unsigned long bufsize)
1971 {
1972 unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1973
1974 args->count = 0;
1975 while (args->count < bufsize) {
1976 hva = gfn_to_hva(kvm, cur_gfn);
1977 /*
1978 * We return an error if the first value was invalid, but we
1979 * return successfully if at least one value was copied.
1980 */
1981 if (kvm_is_error_hva(hva))
1982 return args->count ? 0 : -EFAULT;
1983 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1984 pgstev = 0;
1985 res[args->count++] = (pgstev >> 24) & 0x43;
1986 cur_gfn++;
1987 }
1988
1989 return 0;
1990 }
1991
kvm_s390_next_dirty_cmma(struct kvm_memslots * slots,unsigned long cur_gfn)1992 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1993 unsigned long cur_gfn)
1994 {
1995 int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1996 struct kvm_memory_slot *ms = slots->memslots + slotidx;
1997 unsigned long ofs = cur_gfn - ms->base_gfn;
1998
1999 if (ms->base_gfn + ms->npages <= cur_gfn) {
2000 slotidx--;
2001 /* If we are above the highest slot, wrap around */
2002 if (slotidx < 0)
2003 slotidx = slots->used_slots - 1;
2004
2005 ms = slots->memslots + slotidx;
2006 ofs = 0;
2007 }
2008
2009 if (cur_gfn < ms->base_gfn)
2010 ofs = 0;
2011
2012 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
2013 while ((slotidx > 0) && (ofs >= ms->npages)) {
2014 slotidx--;
2015 ms = slots->memslots + slotidx;
2016 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
2017 }
2018 return ms->base_gfn + ofs;
2019 }
2020
kvm_s390_get_cmma(struct kvm * kvm,struct kvm_s390_cmma_log * args,u8 * res,unsigned long bufsize)2021 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2022 u8 *res, unsigned long bufsize)
2023 {
2024 unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2025 struct kvm_memslots *slots = kvm_memslots(kvm);
2026 struct kvm_memory_slot *ms;
2027
2028 if (unlikely(!slots->used_slots))
2029 return 0;
2030
2031 cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2032 ms = gfn_to_memslot(kvm, cur_gfn);
2033 args->count = 0;
2034 args->start_gfn = cur_gfn;
2035 if (!ms)
2036 return 0;
2037 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2038 mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2039
2040 while (args->count < bufsize) {
2041 hva = gfn_to_hva(kvm, cur_gfn);
2042 if (kvm_is_error_hva(hva))
2043 return 0;
2044 /* Decrement only if we actually flipped the bit to 0 */
2045 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2046 atomic64_dec(&kvm->arch.cmma_dirty_pages);
2047 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2048 pgstev = 0;
2049 /* Save the value */
2050 res[args->count++] = (pgstev >> 24) & 0x43;
2051 /* If the next bit is too far away, stop. */
2052 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2053 return 0;
2054 /* If we reached the previous "next", find the next one */
2055 if (cur_gfn == next_gfn)
2056 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2057 /* Reached the end of memory or of the buffer, stop */
2058 if ((next_gfn >= mem_end) ||
2059 (next_gfn - args->start_gfn >= bufsize))
2060 return 0;
2061 cur_gfn++;
2062 /* Reached the end of the current memslot, take the next one. */
2063 if (cur_gfn - ms->base_gfn >= ms->npages) {
2064 ms = gfn_to_memslot(kvm, cur_gfn);
2065 if (!ms)
2066 return 0;
2067 }
2068 }
2069 return 0;
2070 }
2071
2072 /*
2073 * This function searches for the next page with dirty CMMA attributes, and
2074 * saves the attributes in the buffer up to either the end of the buffer or
2075 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2076 * no trailing clean bytes are saved.
2077 * In case no dirty bits were found, or if CMMA was not enabled or used, the
2078 * output buffer will indicate 0 as length.
2079 */
kvm_s390_get_cmma_bits(struct kvm * kvm,struct kvm_s390_cmma_log * args)2080 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2081 struct kvm_s390_cmma_log *args)
2082 {
2083 unsigned long bufsize;
2084 int srcu_idx, peek, ret;
2085 u8 *values;
2086
2087 if (!kvm->arch.use_cmma)
2088 return -ENXIO;
2089 /* Invalid/unsupported flags were specified */
2090 if (args->flags & ~KVM_S390_CMMA_PEEK)
2091 return -EINVAL;
2092 /* Migration mode query, and we are not doing a migration */
2093 peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2094 if (!peek && !kvm->arch.migration_mode)
2095 return -EINVAL;
2096 /* CMMA is disabled or was not used, or the buffer has length zero */
2097 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2098 if (!bufsize || !kvm->mm->context.uses_cmm) {
2099 memset(args, 0, sizeof(*args));
2100 return 0;
2101 }
2102 /* We are not peeking, and there are no dirty pages */
2103 if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2104 memset(args, 0, sizeof(*args));
2105 return 0;
2106 }
2107
2108 values = vmalloc(bufsize);
2109 if (!values)
2110 return -ENOMEM;
2111
2112 mmap_read_lock(kvm->mm);
2113 srcu_idx = srcu_read_lock(&kvm->srcu);
2114 if (peek)
2115 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2116 else
2117 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2118 srcu_read_unlock(&kvm->srcu, srcu_idx);
2119 mmap_read_unlock(kvm->mm);
2120
2121 if (kvm->arch.migration_mode)
2122 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2123 else
2124 args->remaining = 0;
2125
2126 if (copy_to_user((void __user *)args->values, values, args->count))
2127 ret = -EFAULT;
2128
2129 vfree(values);
2130 return ret;
2131 }
2132
2133 /*
2134 * This function sets the CMMA attributes for the given pages. If the input
2135 * buffer has zero length, no action is taken, otherwise the attributes are
2136 * set and the mm->context.uses_cmm flag is set.
2137 */
kvm_s390_set_cmma_bits(struct kvm * kvm,const struct kvm_s390_cmma_log * args)2138 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2139 const struct kvm_s390_cmma_log *args)
2140 {
2141 unsigned long hva, mask, pgstev, i;
2142 uint8_t *bits;
2143 int srcu_idx, r = 0;
2144
2145 mask = args->mask;
2146
2147 if (!kvm->arch.use_cmma)
2148 return -ENXIO;
2149 /* invalid/unsupported flags */
2150 if (args->flags != 0)
2151 return -EINVAL;
2152 /* Enforce sane limit on memory allocation */
2153 if (args->count > KVM_S390_CMMA_SIZE_MAX)
2154 return -EINVAL;
2155 /* Nothing to do */
2156 if (args->count == 0)
2157 return 0;
2158
2159 bits = vmalloc(array_size(sizeof(*bits), args->count));
2160 if (!bits)
2161 return -ENOMEM;
2162
2163 r = copy_from_user(bits, (void __user *)args->values, args->count);
2164 if (r) {
2165 r = -EFAULT;
2166 goto out;
2167 }
2168
2169 mmap_read_lock(kvm->mm);
2170 srcu_idx = srcu_read_lock(&kvm->srcu);
2171 for (i = 0; i < args->count; i++) {
2172 hva = gfn_to_hva(kvm, args->start_gfn + i);
2173 if (kvm_is_error_hva(hva)) {
2174 r = -EFAULT;
2175 break;
2176 }
2177
2178 pgstev = bits[i];
2179 pgstev = pgstev << 24;
2180 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2181 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2182 }
2183 srcu_read_unlock(&kvm->srcu, srcu_idx);
2184 mmap_read_unlock(kvm->mm);
2185
2186 if (!kvm->mm->context.uses_cmm) {
2187 mmap_write_lock(kvm->mm);
2188 kvm->mm->context.uses_cmm = 1;
2189 mmap_write_unlock(kvm->mm);
2190 }
2191 out:
2192 vfree(bits);
2193 return r;
2194 }
2195
kvm_s390_cpus_from_pv(struct kvm * kvm,u16 * rcp,u16 * rrcp)2196 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2197 {
2198 struct kvm_vcpu *vcpu;
2199 u16 rc, rrc;
2200 int ret = 0;
2201 int i;
2202
2203 /*
2204 * We ignore failures and try to destroy as many CPUs as possible.
2205 * At the same time we must not free the assigned resources when
2206 * this fails, as the ultravisor has still access to that memory.
2207 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2208 * behind.
2209 * We want to return the first failure rc and rrc, though.
2210 */
2211 kvm_for_each_vcpu(i, vcpu, kvm) {
2212 mutex_lock(&vcpu->mutex);
2213 if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2214 *rcp = rc;
2215 *rrcp = rrc;
2216 ret = -EIO;
2217 }
2218 mutex_unlock(&vcpu->mutex);
2219 }
2220 return ret;
2221 }
2222
kvm_s390_cpus_to_pv(struct kvm * kvm,u16 * rc,u16 * rrc)2223 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2224 {
2225 int i, r = 0;
2226 u16 dummy;
2227
2228 struct kvm_vcpu *vcpu;
2229
2230 kvm_for_each_vcpu(i, vcpu, kvm) {
2231 mutex_lock(&vcpu->mutex);
2232 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2233 mutex_unlock(&vcpu->mutex);
2234 if (r)
2235 break;
2236 }
2237 if (r)
2238 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2239 return r;
2240 }
2241
kvm_s390_handle_pv(struct kvm * kvm,struct kvm_pv_cmd * cmd)2242 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2243 {
2244 int r = 0;
2245 u16 dummy;
2246 void __user *argp = (void __user *)cmd->data;
2247
2248 switch (cmd->cmd) {
2249 case KVM_PV_ENABLE: {
2250 r = -EINVAL;
2251 if (kvm_s390_pv_is_protected(kvm))
2252 break;
2253
2254 /*
2255 * FMT 4 SIE needs esca. As we never switch back to bsca from
2256 * esca, we need no cleanup in the error cases below
2257 */
2258 r = sca_switch_to_extended(kvm);
2259 if (r)
2260 break;
2261
2262 mmap_write_lock(current->mm);
2263 r = gmap_mark_unmergeable();
2264 mmap_write_unlock(current->mm);
2265 if (r)
2266 break;
2267
2268 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2269 if (r)
2270 break;
2271
2272 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2273 if (r)
2274 kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2275
2276 /* we need to block service interrupts from now on */
2277 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2278 break;
2279 }
2280 case KVM_PV_DISABLE: {
2281 r = -EINVAL;
2282 if (!kvm_s390_pv_is_protected(kvm))
2283 break;
2284
2285 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2286 /*
2287 * If a CPU could not be destroyed, destroy VM will also fail.
2288 * There is no point in trying to destroy it. Instead return
2289 * the rc and rrc from the first CPU that failed destroying.
2290 */
2291 if (r)
2292 break;
2293 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2294
2295 /* no need to block service interrupts any more */
2296 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2297 break;
2298 }
2299 case KVM_PV_SET_SEC_PARMS: {
2300 struct kvm_s390_pv_sec_parm parms = {};
2301 void *hdr;
2302
2303 r = -EINVAL;
2304 if (!kvm_s390_pv_is_protected(kvm))
2305 break;
2306
2307 r = -EFAULT;
2308 if (copy_from_user(&parms, argp, sizeof(parms)))
2309 break;
2310
2311 /* Currently restricted to 8KB */
2312 r = -EINVAL;
2313 if (parms.length > PAGE_SIZE * 2)
2314 break;
2315
2316 r = -ENOMEM;
2317 hdr = vmalloc(parms.length);
2318 if (!hdr)
2319 break;
2320
2321 r = -EFAULT;
2322 if (!copy_from_user(hdr, (void __user *)parms.origin,
2323 parms.length))
2324 r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2325 &cmd->rc, &cmd->rrc);
2326
2327 vfree(hdr);
2328 break;
2329 }
2330 case KVM_PV_UNPACK: {
2331 struct kvm_s390_pv_unp unp = {};
2332
2333 r = -EINVAL;
2334 if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2335 break;
2336
2337 r = -EFAULT;
2338 if (copy_from_user(&unp, argp, sizeof(unp)))
2339 break;
2340
2341 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2342 &cmd->rc, &cmd->rrc);
2343 break;
2344 }
2345 case KVM_PV_VERIFY: {
2346 r = -EINVAL;
2347 if (!kvm_s390_pv_is_protected(kvm))
2348 break;
2349
2350 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2351 UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2352 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2353 cmd->rrc);
2354 break;
2355 }
2356 case KVM_PV_PREP_RESET: {
2357 r = -EINVAL;
2358 if (!kvm_s390_pv_is_protected(kvm))
2359 break;
2360
2361 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2362 UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2363 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2364 cmd->rc, cmd->rrc);
2365 break;
2366 }
2367 case KVM_PV_UNSHARE_ALL: {
2368 r = -EINVAL;
2369 if (!kvm_s390_pv_is_protected(kvm))
2370 break;
2371
2372 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2373 UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2374 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2375 cmd->rc, cmd->rrc);
2376 break;
2377 }
2378 default:
2379 r = -ENOTTY;
2380 }
2381 return r;
2382 }
2383
kvm_arch_vm_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)2384 long kvm_arch_vm_ioctl(struct file *filp,
2385 unsigned int ioctl, unsigned long arg)
2386 {
2387 struct kvm *kvm = filp->private_data;
2388 void __user *argp = (void __user *)arg;
2389 struct kvm_device_attr attr;
2390 int r;
2391
2392 switch (ioctl) {
2393 case KVM_S390_INTERRUPT: {
2394 struct kvm_s390_interrupt s390int;
2395
2396 r = -EFAULT;
2397 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2398 break;
2399 r = kvm_s390_inject_vm(kvm, &s390int);
2400 break;
2401 }
2402 case KVM_CREATE_IRQCHIP: {
2403 struct kvm_irq_routing_entry routing;
2404
2405 r = -EINVAL;
2406 if (kvm->arch.use_irqchip) {
2407 /* Set up dummy routing. */
2408 memset(&routing, 0, sizeof(routing));
2409 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2410 }
2411 break;
2412 }
2413 case KVM_SET_DEVICE_ATTR: {
2414 r = -EFAULT;
2415 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2416 break;
2417 r = kvm_s390_vm_set_attr(kvm, &attr);
2418 break;
2419 }
2420 case KVM_GET_DEVICE_ATTR: {
2421 r = -EFAULT;
2422 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2423 break;
2424 r = kvm_s390_vm_get_attr(kvm, &attr);
2425 break;
2426 }
2427 case KVM_HAS_DEVICE_ATTR: {
2428 r = -EFAULT;
2429 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2430 break;
2431 r = kvm_s390_vm_has_attr(kvm, &attr);
2432 break;
2433 }
2434 case KVM_S390_GET_SKEYS: {
2435 struct kvm_s390_skeys args;
2436
2437 r = -EFAULT;
2438 if (copy_from_user(&args, argp,
2439 sizeof(struct kvm_s390_skeys)))
2440 break;
2441 r = kvm_s390_get_skeys(kvm, &args);
2442 break;
2443 }
2444 case KVM_S390_SET_SKEYS: {
2445 struct kvm_s390_skeys args;
2446
2447 r = -EFAULT;
2448 if (copy_from_user(&args, argp,
2449 sizeof(struct kvm_s390_skeys)))
2450 break;
2451 r = kvm_s390_set_skeys(kvm, &args);
2452 break;
2453 }
2454 case KVM_S390_GET_CMMA_BITS: {
2455 struct kvm_s390_cmma_log args;
2456
2457 r = -EFAULT;
2458 if (copy_from_user(&args, argp, sizeof(args)))
2459 break;
2460 mutex_lock(&kvm->slots_lock);
2461 r = kvm_s390_get_cmma_bits(kvm, &args);
2462 mutex_unlock(&kvm->slots_lock);
2463 if (!r) {
2464 r = copy_to_user(argp, &args, sizeof(args));
2465 if (r)
2466 r = -EFAULT;
2467 }
2468 break;
2469 }
2470 case KVM_S390_SET_CMMA_BITS: {
2471 struct kvm_s390_cmma_log args;
2472
2473 r = -EFAULT;
2474 if (copy_from_user(&args, argp, sizeof(args)))
2475 break;
2476 mutex_lock(&kvm->slots_lock);
2477 r = kvm_s390_set_cmma_bits(kvm, &args);
2478 mutex_unlock(&kvm->slots_lock);
2479 break;
2480 }
2481 case KVM_S390_PV_COMMAND: {
2482 struct kvm_pv_cmd args;
2483
2484 /* protvirt means user sigp */
2485 kvm->arch.user_cpu_state_ctrl = 1;
2486 r = 0;
2487 if (!is_prot_virt_host()) {
2488 r = -EINVAL;
2489 break;
2490 }
2491 if (copy_from_user(&args, argp, sizeof(args))) {
2492 r = -EFAULT;
2493 break;
2494 }
2495 if (args.flags) {
2496 r = -EINVAL;
2497 break;
2498 }
2499 mutex_lock(&kvm->lock);
2500 r = kvm_s390_handle_pv(kvm, &args);
2501 mutex_unlock(&kvm->lock);
2502 if (copy_to_user(argp, &args, sizeof(args))) {
2503 r = -EFAULT;
2504 break;
2505 }
2506 break;
2507 }
2508 default:
2509 r = -ENOTTY;
2510 }
2511
2512 return r;
2513 }
2514
kvm_s390_apxa_installed(void)2515 static int kvm_s390_apxa_installed(void)
2516 {
2517 struct ap_config_info info;
2518
2519 if (ap_instructions_available()) {
2520 if (ap_qci(&info) == 0)
2521 return info.apxa;
2522 }
2523
2524 return 0;
2525 }
2526
2527 /*
2528 * The format of the crypto control block (CRYCB) is specified in the 3 low
2529 * order bits of the CRYCB designation (CRYCBD) field as follows:
2530 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2531 * AP extended addressing (APXA) facility are installed.
2532 * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2533 * Format 2: Both the APXA and MSAX3 facilities are installed
2534 */
kvm_s390_set_crycb_format(struct kvm * kvm)2535 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2536 {
2537 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2538
2539 /* Clear the CRYCB format bits - i.e., set format 0 by default */
2540 kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2541
2542 /* Check whether MSAX3 is installed */
2543 if (!test_kvm_facility(kvm, 76))
2544 return;
2545
2546 if (kvm_s390_apxa_installed())
2547 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2548 else
2549 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2550 }
2551
kvm_arch_crypto_set_masks(struct kvm * kvm,unsigned long * apm,unsigned long * aqm,unsigned long * adm)2552 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2553 unsigned long *aqm, unsigned long *adm)
2554 {
2555 struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2556
2557 mutex_lock(&kvm->lock);
2558 kvm_s390_vcpu_block_all(kvm);
2559
2560 switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2561 case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2562 memcpy(crycb->apcb1.apm, apm, 32);
2563 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2564 apm[0], apm[1], apm[2], apm[3]);
2565 memcpy(crycb->apcb1.aqm, aqm, 32);
2566 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2567 aqm[0], aqm[1], aqm[2], aqm[3]);
2568 memcpy(crycb->apcb1.adm, adm, 32);
2569 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2570 adm[0], adm[1], adm[2], adm[3]);
2571 break;
2572 case CRYCB_FORMAT1:
2573 case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2574 memcpy(crycb->apcb0.apm, apm, 8);
2575 memcpy(crycb->apcb0.aqm, aqm, 2);
2576 memcpy(crycb->apcb0.adm, adm, 2);
2577 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2578 apm[0], *((unsigned short *)aqm),
2579 *((unsigned short *)adm));
2580 break;
2581 default: /* Can not happen */
2582 break;
2583 }
2584
2585 /* recreate the shadow crycb for each vcpu */
2586 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2587 kvm_s390_vcpu_unblock_all(kvm);
2588 mutex_unlock(&kvm->lock);
2589 }
2590 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2591
kvm_arch_crypto_clear_masks(struct kvm * kvm)2592 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2593 {
2594 mutex_lock(&kvm->lock);
2595 kvm_s390_vcpu_block_all(kvm);
2596
2597 memset(&kvm->arch.crypto.crycb->apcb0, 0,
2598 sizeof(kvm->arch.crypto.crycb->apcb0));
2599 memset(&kvm->arch.crypto.crycb->apcb1, 0,
2600 sizeof(kvm->arch.crypto.crycb->apcb1));
2601
2602 VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2603 /* recreate the shadow crycb for each vcpu */
2604 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2605 kvm_s390_vcpu_unblock_all(kvm);
2606 mutex_unlock(&kvm->lock);
2607 }
2608 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2609
kvm_s390_get_initial_cpuid(void)2610 static u64 kvm_s390_get_initial_cpuid(void)
2611 {
2612 struct cpuid cpuid;
2613
2614 get_cpu_id(&cpuid);
2615 cpuid.version = 0xff;
2616 return *((u64 *) &cpuid);
2617 }
2618
kvm_s390_crypto_init(struct kvm * kvm)2619 static void kvm_s390_crypto_init(struct kvm *kvm)
2620 {
2621 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2622 kvm_s390_set_crycb_format(kvm);
2623
2624 if (!test_kvm_facility(kvm, 76))
2625 return;
2626
2627 /* Enable AES/DEA protected key functions by default */
2628 kvm->arch.crypto.aes_kw = 1;
2629 kvm->arch.crypto.dea_kw = 1;
2630 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2631 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2632 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2633 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2634 }
2635
sca_dispose(struct kvm * kvm)2636 static void sca_dispose(struct kvm *kvm)
2637 {
2638 if (kvm->arch.use_esca)
2639 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2640 else
2641 free_page((unsigned long)(kvm->arch.sca));
2642 kvm->arch.sca = NULL;
2643 }
2644
kvm_arch_init_vm(struct kvm * kvm,unsigned long type)2645 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2646 {
2647 gfp_t alloc_flags = GFP_KERNEL;
2648 int i, rc;
2649 char debug_name[16];
2650 static unsigned long sca_offset;
2651
2652 rc = -EINVAL;
2653 #ifdef CONFIG_KVM_S390_UCONTROL
2654 if (type & ~KVM_VM_S390_UCONTROL)
2655 goto out_err;
2656 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2657 goto out_err;
2658 #else
2659 if (type)
2660 goto out_err;
2661 #endif
2662
2663 rc = s390_enable_sie();
2664 if (rc)
2665 goto out_err;
2666
2667 rc = -ENOMEM;
2668
2669 if (!sclp.has_64bscao)
2670 alloc_flags |= GFP_DMA;
2671 rwlock_init(&kvm->arch.sca_lock);
2672 /* start with basic SCA */
2673 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2674 if (!kvm->arch.sca)
2675 goto out_err;
2676 mutex_lock(&kvm_lock);
2677 sca_offset += 16;
2678 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2679 sca_offset = 0;
2680 kvm->arch.sca = (struct bsca_block *)
2681 ((char *) kvm->arch.sca + sca_offset);
2682 mutex_unlock(&kvm_lock);
2683
2684 sprintf(debug_name, "kvm-%u", current->pid);
2685
2686 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2687 if (!kvm->arch.dbf)
2688 goto out_err;
2689
2690 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2691 kvm->arch.sie_page2 =
2692 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2693 if (!kvm->arch.sie_page2)
2694 goto out_err;
2695
2696 kvm->arch.sie_page2->kvm = kvm;
2697 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2698
2699 for (i = 0; i < kvm_s390_fac_size(); i++) {
2700 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2701 (kvm_s390_fac_base[i] |
2702 kvm_s390_fac_ext[i]);
2703 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2704 kvm_s390_fac_base[i];
2705 }
2706 kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2707
2708 /* we are always in czam mode - even on pre z14 machines */
2709 set_kvm_facility(kvm->arch.model.fac_mask, 138);
2710 set_kvm_facility(kvm->arch.model.fac_list, 138);
2711 /* we emulate STHYI in kvm */
2712 set_kvm_facility(kvm->arch.model.fac_mask, 74);
2713 set_kvm_facility(kvm->arch.model.fac_list, 74);
2714 if (MACHINE_HAS_TLB_GUEST) {
2715 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2716 set_kvm_facility(kvm->arch.model.fac_list, 147);
2717 }
2718
2719 if (css_general_characteristics.aiv && test_facility(65))
2720 set_kvm_facility(kvm->arch.model.fac_mask, 65);
2721
2722 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2723 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2724
2725 kvm_s390_crypto_init(kvm);
2726
2727 mutex_init(&kvm->arch.float_int.ais_lock);
2728 spin_lock_init(&kvm->arch.float_int.lock);
2729 for (i = 0; i < FIRQ_LIST_COUNT; i++)
2730 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2731 init_waitqueue_head(&kvm->arch.ipte_wq);
2732 mutex_init(&kvm->arch.ipte_mutex);
2733
2734 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2735 VM_EVENT(kvm, 3, "vm created with type %lu", type);
2736
2737 if (type & KVM_VM_S390_UCONTROL) {
2738 kvm->arch.gmap = NULL;
2739 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2740 } else {
2741 if (sclp.hamax == U64_MAX)
2742 kvm->arch.mem_limit = TASK_SIZE_MAX;
2743 else
2744 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2745 sclp.hamax + 1);
2746 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2747 if (!kvm->arch.gmap)
2748 goto out_err;
2749 kvm->arch.gmap->private = kvm;
2750 kvm->arch.gmap->pfault_enabled = 0;
2751 }
2752
2753 kvm->arch.use_pfmfi = sclp.has_pfmfi;
2754 kvm->arch.use_skf = sclp.has_skey;
2755 spin_lock_init(&kvm->arch.start_stop_lock);
2756 kvm_s390_vsie_init(kvm);
2757 if (use_gisa)
2758 kvm_s390_gisa_init(kvm);
2759 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2760
2761 return 0;
2762 out_err:
2763 free_page((unsigned long)kvm->arch.sie_page2);
2764 debug_unregister(kvm->arch.dbf);
2765 sca_dispose(kvm);
2766 KVM_EVENT(3, "creation of vm failed: %d", rc);
2767 return rc;
2768 }
2769
kvm_arch_vcpu_destroy(struct kvm_vcpu * vcpu)2770 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2771 {
2772 u16 rc, rrc;
2773
2774 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2775 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2776 kvm_s390_clear_local_irqs(vcpu);
2777 kvm_clear_async_pf_completion_queue(vcpu);
2778 if (!kvm_is_ucontrol(vcpu->kvm))
2779 sca_del_vcpu(vcpu);
2780
2781 if (kvm_is_ucontrol(vcpu->kvm))
2782 gmap_remove(vcpu->arch.gmap);
2783
2784 if (vcpu->kvm->arch.use_cmma)
2785 kvm_s390_vcpu_unsetup_cmma(vcpu);
2786 /* We can not hold the vcpu mutex here, we are already dying */
2787 if (kvm_s390_pv_cpu_get_handle(vcpu))
2788 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2789 free_page((unsigned long)(vcpu->arch.sie_block));
2790 }
2791
kvm_free_vcpus(struct kvm * kvm)2792 static void kvm_free_vcpus(struct kvm *kvm)
2793 {
2794 unsigned int i;
2795 struct kvm_vcpu *vcpu;
2796
2797 kvm_for_each_vcpu(i, vcpu, kvm)
2798 kvm_vcpu_destroy(vcpu);
2799
2800 mutex_lock(&kvm->lock);
2801 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2802 kvm->vcpus[i] = NULL;
2803
2804 atomic_set(&kvm->online_vcpus, 0);
2805 mutex_unlock(&kvm->lock);
2806 }
2807
kvm_arch_destroy_vm(struct kvm * kvm)2808 void kvm_arch_destroy_vm(struct kvm *kvm)
2809 {
2810 u16 rc, rrc;
2811
2812 kvm_free_vcpus(kvm);
2813 sca_dispose(kvm);
2814 kvm_s390_gisa_destroy(kvm);
2815 /*
2816 * We are already at the end of life and kvm->lock is not taken.
2817 * This is ok as the file descriptor is closed by now and nobody
2818 * can mess with the pv state. To avoid lockdep_assert_held from
2819 * complaining we do not use kvm_s390_pv_is_protected.
2820 */
2821 if (kvm_s390_pv_get_handle(kvm))
2822 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2823 debug_unregister(kvm->arch.dbf);
2824 free_page((unsigned long)kvm->arch.sie_page2);
2825 if (!kvm_is_ucontrol(kvm))
2826 gmap_remove(kvm->arch.gmap);
2827 kvm_s390_destroy_adapters(kvm);
2828 kvm_s390_clear_float_irqs(kvm);
2829 kvm_s390_vsie_destroy(kvm);
2830 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2831 }
2832
2833 /* Section: vcpu related */
__kvm_ucontrol_vcpu_init(struct kvm_vcpu * vcpu)2834 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2835 {
2836 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2837 if (!vcpu->arch.gmap)
2838 return -ENOMEM;
2839 vcpu->arch.gmap->private = vcpu->kvm;
2840
2841 return 0;
2842 }
2843
sca_del_vcpu(struct kvm_vcpu * vcpu)2844 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2845 {
2846 if (!kvm_s390_use_sca_entries())
2847 return;
2848 read_lock(&vcpu->kvm->arch.sca_lock);
2849 if (vcpu->kvm->arch.use_esca) {
2850 struct esca_block *sca = vcpu->kvm->arch.sca;
2851
2852 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2853 sca->cpu[vcpu->vcpu_id].sda = 0;
2854 } else {
2855 struct bsca_block *sca = vcpu->kvm->arch.sca;
2856
2857 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2858 sca->cpu[vcpu->vcpu_id].sda = 0;
2859 }
2860 read_unlock(&vcpu->kvm->arch.sca_lock);
2861 }
2862
sca_add_vcpu(struct kvm_vcpu * vcpu)2863 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2864 {
2865 if (!kvm_s390_use_sca_entries()) {
2866 struct bsca_block *sca = vcpu->kvm->arch.sca;
2867
2868 /* we still need the basic sca for the ipte control */
2869 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2870 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2871 return;
2872 }
2873 read_lock(&vcpu->kvm->arch.sca_lock);
2874 if (vcpu->kvm->arch.use_esca) {
2875 struct esca_block *sca = vcpu->kvm->arch.sca;
2876
2877 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2878 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2879 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2880 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2881 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2882 } else {
2883 struct bsca_block *sca = vcpu->kvm->arch.sca;
2884
2885 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2886 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2887 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2888 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2889 }
2890 read_unlock(&vcpu->kvm->arch.sca_lock);
2891 }
2892
2893 /* Basic SCA to Extended SCA data copy routines */
sca_copy_entry(struct esca_entry * d,struct bsca_entry * s)2894 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2895 {
2896 d->sda = s->sda;
2897 d->sigp_ctrl.c = s->sigp_ctrl.c;
2898 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2899 }
2900
sca_copy_b_to_e(struct esca_block * d,struct bsca_block * s)2901 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2902 {
2903 int i;
2904
2905 d->ipte_control = s->ipte_control;
2906 d->mcn[0] = s->mcn;
2907 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2908 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2909 }
2910
sca_switch_to_extended(struct kvm * kvm)2911 static int sca_switch_to_extended(struct kvm *kvm)
2912 {
2913 struct bsca_block *old_sca = kvm->arch.sca;
2914 struct esca_block *new_sca;
2915 struct kvm_vcpu *vcpu;
2916 unsigned int vcpu_idx;
2917 u32 scaol, scaoh;
2918
2919 if (kvm->arch.use_esca)
2920 return 0;
2921
2922 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2923 if (!new_sca)
2924 return -ENOMEM;
2925
2926 scaoh = (u32)((u64)(new_sca) >> 32);
2927 scaol = (u32)(u64)(new_sca) & ~0x3fU;
2928
2929 kvm_s390_vcpu_block_all(kvm);
2930 write_lock(&kvm->arch.sca_lock);
2931
2932 sca_copy_b_to_e(new_sca, old_sca);
2933
2934 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2935 vcpu->arch.sie_block->scaoh = scaoh;
2936 vcpu->arch.sie_block->scaol = scaol;
2937 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2938 }
2939 kvm->arch.sca = new_sca;
2940 kvm->arch.use_esca = 1;
2941
2942 write_unlock(&kvm->arch.sca_lock);
2943 kvm_s390_vcpu_unblock_all(kvm);
2944
2945 free_page((unsigned long)old_sca);
2946
2947 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2948 old_sca, kvm->arch.sca);
2949 return 0;
2950 }
2951
sca_can_add_vcpu(struct kvm * kvm,unsigned int id)2952 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2953 {
2954 int rc;
2955
2956 if (!kvm_s390_use_sca_entries()) {
2957 if (id < KVM_MAX_VCPUS)
2958 return true;
2959 return false;
2960 }
2961 if (id < KVM_S390_BSCA_CPU_SLOTS)
2962 return true;
2963 if (!sclp.has_esca || !sclp.has_64bscao)
2964 return false;
2965
2966 mutex_lock(&kvm->lock);
2967 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2968 mutex_unlock(&kvm->lock);
2969
2970 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2971 }
2972
2973 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__start_cpu_timer_accounting(struct kvm_vcpu * vcpu)2974 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2975 {
2976 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2977 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2978 vcpu->arch.cputm_start = get_tod_clock_fast();
2979 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2980 }
2981
2982 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__stop_cpu_timer_accounting(struct kvm_vcpu * vcpu)2983 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2984 {
2985 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2986 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2987 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2988 vcpu->arch.cputm_start = 0;
2989 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2990 }
2991
2992 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__enable_cpu_timer_accounting(struct kvm_vcpu * vcpu)2993 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2994 {
2995 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2996 vcpu->arch.cputm_enabled = true;
2997 __start_cpu_timer_accounting(vcpu);
2998 }
2999
3000 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__disable_cpu_timer_accounting(struct kvm_vcpu * vcpu)3001 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3002 {
3003 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3004 __stop_cpu_timer_accounting(vcpu);
3005 vcpu->arch.cputm_enabled = false;
3006 }
3007
enable_cpu_timer_accounting(struct kvm_vcpu * vcpu)3008 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3009 {
3010 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3011 __enable_cpu_timer_accounting(vcpu);
3012 preempt_enable();
3013 }
3014
disable_cpu_timer_accounting(struct kvm_vcpu * vcpu)3015 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3016 {
3017 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3018 __disable_cpu_timer_accounting(vcpu);
3019 preempt_enable();
3020 }
3021
3022 /* set the cpu timer - may only be called from the VCPU thread itself */
kvm_s390_set_cpu_timer(struct kvm_vcpu * vcpu,__u64 cputm)3023 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3024 {
3025 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3026 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3027 if (vcpu->arch.cputm_enabled)
3028 vcpu->arch.cputm_start = get_tod_clock_fast();
3029 vcpu->arch.sie_block->cputm = cputm;
3030 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3031 preempt_enable();
3032 }
3033
3034 /* update and get the cpu timer - can also be called from other VCPU threads */
kvm_s390_get_cpu_timer(struct kvm_vcpu * vcpu)3035 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3036 {
3037 unsigned int seq;
3038 __u64 value;
3039
3040 if (unlikely(!vcpu->arch.cputm_enabled))
3041 return vcpu->arch.sie_block->cputm;
3042
3043 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3044 do {
3045 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3046 /*
3047 * If the writer would ever execute a read in the critical
3048 * section, e.g. in irq context, we have a deadlock.
3049 */
3050 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3051 value = vcpu->arch.sie_block->cputm;
3052 /* if cputm_start is 0, accounting is being started/stopped */
3053 if (likely(vcpu->arch.cputm_start))
3054 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3055 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3056 preempt_enable();
3057 return value;
3058 }
3059
kvm_arch_vcpu_load(struct kvm_vcpu * vcpu,int cpu)3060 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3061 {
3062
3063 gmap_enable(vcpu->arch.enabled_gmap);
3064 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3065 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3066 __start_cpu_timer_accounting(vcpu);
3067 vcpu->cpu = cpu;
3068 }
3069
kvm_arch_vcpu_put(struct kvm_vcpu * vcpu)3070 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3071 {
3072 vcpu->cpu = -1;
3073 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3074 __stop_cpu_timer_accounting(vcpu);
3075 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3076 vcpu->arch.enabled_gmap = gmap_get_enabled();
3077 gmap_disable(vcpu->arch.enabled_gmap);
3078
3079 }
3080
kvm_arch_vcpu_postcreate(struct kvm_vcpu * vcpu)3081 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3082 {
3083 mutex_lock(&vcpu->kvm->lock);
3084 preempt_disable();
3085 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3086 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3087 preempt_enable();
3088 mutex_unlock(&vcpu->kvm->lock);
3089 if (!kvm_is_ucontrol(vcpu->kvm)) {
3090 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3091 sca_add_vcpu(vcpu);
3092 }
3093 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3094 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3095 /* make vcpu_load load the right gmap on the first trigger */
3096 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3097 }
3098
kvm_has_pckmo_subfunc(struct kvm * kvm,unsigned long nr)3099 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3100 {
3101 if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3102 test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3103 return true;
3104 return false;
3105 }
3106
kvm_has_pckmo_ecc(struct kvm * kvm)3107 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3108 {
3109 /* At least one ECC subfunction must be present */
3110 return kvm_has_pckmo_subfunc(kvm, 32) ||
3111 kvm_has_pckmo_subfunc(kvm, 33) ||
3112 kvm_has_pckmo_subfunc(kvm, 34) ||
3113 kvm_has_pckmo_subfunc(kvm, 40) ||
3114 kvm_has_pckmo_subfunc(kvm, 41);
3115
3116 }
3117
kvm_s390_vcpu_crypto_setup(struct kvm_vcpu * vcpu)3118 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3119 {
3120 /*
3121 * If the AP instructions are not being interpreted and the MSAX3
3122 * facility is not configured for the guest, there is nothing to set up.
3123 */
3124 if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3125 return;
3126
3127 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3128 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3129 vcpu->arch.sie_block->eca &= ~ECA_APIE;
3130 vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3131
3132 if (vcpu->kvm->arch.crypto.apie)
3133 vcpu->arch.sie_block->eca |= ECA_APIE;
3134
3135 /* Set up protected key support */
3136 if (vcpu->kvm->arch.crypto.aes_kw) {
3137 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3138 /* ecc is also wrapped with AES key */
3139 if (kvm_has_pckmo_ecc(vcpu->kvm))
3140 vcpu->arch.sie_block->ecd |= ECD_ECC;
3141 }
3142
3143 if (vcpu->kvm->arch.crypto.dea_kw)
3144 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3145 }
3146
kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu * vcpu)3147 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3148 {
3149 free_page(vcpu->arch.sie_block->cbrlo);
3150 vcpu->arch.sie_block->cbrlo = 0;
3151 }
3152
kvm_s390_vcpu_setup_cmma(struct kvm_vcpu * vcpu)3153 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3154 {
3155 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
3156 if (!vcpu->arch.sie_block->cbrlo)
3157 return -ENOMEM;
3158 return 0;
3159 }
3160
kvm_s390_vcpu_setup_model(struct kvm_vcpu * vcpu)3161 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3162 {
3163 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3164
3165 vcpu->arch.sie_block->ibc = model->ibc;
3166 if (test_kvm_facility(vcpu->kvm, 7))
3167 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3168 }
3169
kvm_s390_vcpu_setup(struct kvm_vcpu * vcpu)3170 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3171 {
3172 int rc = 0;
3173 u16 uvrc, uvrrc;
3174
3175 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3176 CPUSTAT_SM |
3177 CPUSTAT_STOPPED);
3178
3179 if (test_kvm_facility(vcpu->kvm, 78))
3180 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3181 else if (test_kvm_facility(vcpu->kvm, 8))
3182 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3183
3184 kvm_s390_vcpu_setup_model(vcpu);
3185
3186 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3187 if (MACHINE_HAS_ESOP)
3188 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3189 if (test_kvm_facility(vcpu->kvm, 9))
3190 vcpu->arch.sie_block->ecb |= ECB_SRSI;
3191 if (test_kvm_facility(vcpu->kvm, 73))
3192 vcpu->arch.sie_block->ecb |= ECB_TE;
3193
3194 if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3195 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3196 if (test_kvm_facility(vcpu->kvm, 130))
3197 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3198 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3199 if (sclp.has_cei)
3200 vcpu->arch.sie_block->eca |= ECA_CEI;
3201 if (sclp.has_ib)
3202 vcpu->arch.sie_block->eca |= ECA_IB;
3203 if (sclp.has_siif)
3204 vcpu->arch.sie_block->eca |= ECA_SII;
3205 if (sclp.has_sigpif)
3206 vcpu->arch.sie_block->eca |= ECA_SIGPI;
3207 if (test_kvm_facility(vcpu->kvm, 129)) {
3208 vcpu->arch.sie_block->eca |= ECA_VX;
3209 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3210 }
3211 if (test_kvm_facility(vcpu->kvm, 139))
3212 vcpu->arch.sie_block->ecd |= ECD_MEF;
3213 if (test_kvm_facility(vcpu->kvm, 156))
3214 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3215 if (vcpu->arch.sie_block->gd) {
3216 vcpu->arch.sie_block->eca |= ECA_AIV;
3217 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3218 vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3219 }
3220 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3221 | SDNXC;
3222 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3223
3224 if (sclp.has_kss)
3225 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3226 else
3227 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3228
3229 if (vcpu->kvm->arch.use_cmma) {
3230 rc = kvm_s390_vcpu_setup_cmma(vcpu);
3231 if (rc)
3232 return rc;
3233 }
3234 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3235 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3236
3237 vcpu->arch.sie_block->hpid = HPID_KVM;
3238
3239 kvm_s390_vcpu_crypto_setup(vcpu);
3240
3241 mutex_lock(&vcpu->kvm->lock);
3242 if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3243 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3244 if (rc)
3245 kvm_s390_vcpu_unsetup_cmma(vcpu);
3246 }
3247 mutex_unlock(&vcpu->kvm->lock);
3248
3249 return rc;
3250 }
3251
kvm_arch_vcpu_precreate(struct kvm * kvm,unsigned int id)3252 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3253 {
3254 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3255 return -EINVAL;
3256 return 0;
3257 }
3258
kvm_arch_vcpu_create(struct kvm_vcpu * vcpu)3259 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3260 {
3261 struct sie_page *sie_page;
3262 int rc;
3263
3264 BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3265 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
3266 if (!sie_page)
3267 return -ENOMEM;
3268
3269 vcpu->arch.sie_block = &sie_page->sie_block;
3270 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3271
3272 /* the real guest size will always be smaller than msl */
3273 vcpu->arch.sie_block->mso = 0;
3274 vcpu->arch.sie_block->msl = sclp.hamax;
3275
3276 vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3277 spin_lock_init(&vcpu->arch.local_int.lock);
3278 vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3279 if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3280 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3281 seqcount_init(&vcpu->arch.cputm_seqcount);
3282
3283 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3284 kvm_clear_async_pf_completion_queue(vcpu);
3285 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3286 KVM_SYNC_GPRS |
3287 KVM_SYNC_ACRS |
3288 KVM_SYNC_CRS |
3289 KVM_SYNC_ARCH0 |
3290 KVM_SYNC_PFAULT |
3291 KVM_SYNC_DIAG318;
3292 kvm_s390_set_prefix(vcpu, 0);
3293 if (test_kvm_facility(vcpu->kvm, 64))
3294 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3295 if (test_kvm_facility(vcpu->kvm, 82))
3296 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3297 if (test_kvm_facility(vcpu->kvm, 133))
3298 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3299 if (test_kvm_facility(vcpu->kvm, 156))
3300 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3301 /* fprs can be synchronized via vrs, even if the guest has no vx. With
3302 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3303 */
3304 if (MACHINE_HAS_VX)
3305 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3306 else
3307 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3308
3309 if (kvm_is_ucontrol(vcpu->kvm)) {
3310 rc = __kvm_ucontrol_vcpu_init(vcpu);
3311 if (rc)
3312 goto out_free_sie_block;
3313 }
3314
3315 VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3316 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3317 trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3318
3319 rc = kvm_s390_vcpu_setup(vcpu);
3320 if (rc)
3321 goto out_ucontrol_uninit;
3322 return 0;
3323
3324 out_ucontrol_uninit:
3325 if (kvm_is_ucontrol(vcpu->kvm))
3326 gmap_remove(vcpu->arch.gmap);
3327 out_free_sie_block:
3328 free_page((unsigned long)(vcpu->arch.sie_block));
3329 return rc;
3330 }
3331
kvm_arch_vcpu_runnable(struct kvm_vcpu * vcpu)3332 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3333 {
3334 clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
3335 return kvm_s390_vcpu_has_irq(vcpu, 0);
3336 }
3337
kvm_arch_vcpu_in_kernel(struct kvm_vcpu * vcpu)3338 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3339 {
3340 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3341 }
3342
kvm_s390_vcpu_block(struct kvm_vcpu * vcpu)3343 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3344 {
3345 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3346 exit_sie(vcpu);
3347 }
3348
kvm_s390_vcpu_unblock(struct kvm_vcpu * vcpu)3349 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3350 {
3351 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3352 }
3353
kvm_s390_vcpu_request(struct kvm_vcpu * vcpu)3354 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3355 {
3356 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3357 exit_sie(vcpu);
3358 }
3359
kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu * vcpu)3360 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3361 {
3362 return atomic_read(&vcpu->arch.sie_block->prog20) &
3363 (PROG_BLOCK_SIE | PROG_REQUEST);
3364 }
3365
kvm_s390_vcpu_request_handled(struct kvm_vcpu * vcpu)3366 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3367 {
3368 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3369 }
3370
3371 /*
3372 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3373 * If the CPU is not running (e.g. waiting as idle) the function will
3374 * return immediately. */
exit_sie(struct kvm_vcpu * vcpu)3375 void exit_sie(struct kvm_vcpu *vcpu)
3376 {
3377 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3378 kvm_s390_vsie_kick(vcpu);
3379 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3380 cpu_relax();
3381 }
3382
3383 /* Kick a guest cpu out of SIE to process a request synchronously */
kvm_s390_sync_request(int req,struct kvm_vcpu * vcpu)3384 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3385 {
3386 kvm_make_request(req, vcpu);
3387 kvm_s390_vcpu_request(vcpu);
3388 }
3389
kvm_gmap_notifier(struct gmap * gmap,unsigned long start,unsigned long end)3390 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3391 unsigned long end)
3392 {
3393 struct kvm *kvm = gmap->private;
3394 struct kvm_vcpu *vcpu;
3395 unsigned long prefix;
3396 int i;
3397
3398 if (gmap_is_shadow(gmap))
3399 return;
3400 if (start >= 1UL << 31)
3401 /* We are only interested in prefix pages */
3402 return;
3403 kvm_for_each_vcpu(i, vcpu, kvm) {
3404 /* match against both prefix pages */
3405 prefix = kvm_s390_get_prefix(vcpu);
3406 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3407 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3408 start, end);
3409 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3410 }
3411 }
3412 }
3413
kvm_arch_no_poll(struct kvm_vcpu * vcpu)3414 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3415 {
3416 /* do not poll with more than halt_poll_max_steal percent of steal time */
3417 if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3418 halt_poll_max_steal) {
3419 vcpu->stat.halt_no_poll_steal++;
3420 return true;
3421 }
3422 return false;
3423 }
3424
kvm_arch_vcpu_should_kick(struct kvm_vcpu * vcpu)3425 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3426 {
3427 /* kvm common code refers to this, but never calls it */
3428 BUG();
3429 return 0;
3430 }
3431
kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu * vcpu,struct kvm_one_reg * reg)3432 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3433 struct kvm_one_reg *reg)
3434 {
3435 int r = -EINVAL;
3436
3437 switch (reg->id) {
3438 case KVM_REG_S390_TODPR:
3439 r = put_user(vcpu->arch.sie_block->todpr,
3440 (u32 __user *)reg->addr);
3441 break;
3442 case KVM_REG_S390_EPOCHDIFF:
3443 r = put_user(vcpu->arch.sie_block->epoch,
3444 (u64 __user *)reg->addr);
3445 break;
3446 case KVM_REG_S390_CPU_TIMER:
3447 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3448 (u64 __user *)reg->addr);
3449 break;
3450 case KVM_REG_S390_CLOCK_COMP:
3451 r = put_user(vcpu->arch.sie_block->ckc,
3452 (u64 __user *)reg->addr);
3453 break;
3454 case KVM_REG_S390_PFTOKEN:
3455 r = put_user(vcpu->arch.pfault_token,
3456 (u64 __user *)reg->addr);
3457 break;
3458 case KVM_REG_S390_PFCOMPARE:
3459 r = put_user(vcpu->arch.pfault_compare,
3460 (u64 __user *)reg->addr);
3461 break;
3462 case KVM_REG_S390_PFSELECT:
3463 r = put_user(vcpu->arch.pfault_select,
3464 (u64 __user *)reg->addr);
3465 break;
3466 case KVM_REG_S390_PP:
3467 r = put_user(vcpu->arch.sie_block->pp,
3468 (u64 __user *)reg->addr);
3469 break;
3470 case KVM_REG_S390_GBEA:
3471 r = put_user(vcpu->arch.sie_block->gbea,
3472 (u64 __user *)reg->addr);
3473 break;
3474 default:
3475 break;
3476 }
3477
3478 return r;
3479 }
3480
kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu * vcpu,struct kvm_one_reg * reg)3481 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3482 struct kvm_one_reg *reg)
3483 {
3484 int r = -EINVAL;
3485 __u64 val;
3486
3487 switch (reg->id) {
3488 case KVM_REG_S390_TODPR:
3489 r = get_user(vcpu->arch.sie_block->todpr,
3490 (u32 __user *)reg->addr);
3491 break;
3492 case KVM_REG_S390_EPOCHDIFF:
3493 r = get_user(vcpu->arch.sie_block->epoch,
3494 (u64 __user *)reg->addr);
3495 break;
3496 case KVM_REG_S390_CPU_TIMER:
3497 r = get_user(val, (u64 __user *)reg->addr);
3498 if (!r)
3499 kvm_s390_set_cpu_timer(vcpu, val);
3500 break;
3501 case KVM_REG_S390_CLOCK_COMP:
3502 r = get_user(vcpu->arch.sie_block->ckc,
3503 (u64 __user *)reg->addr);
3504 break;
3505 case KVM_REG_S390_PFTOKEN:
3506 r = get_user(vcpu->arch.pfault_token,
3507 (u64 __user *)reg->addr);
3508 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3509 kvm_clear_async_pf_completion_queue(vcpu);
3510 break;
3511 case KVM_REG_S390_PFCOMPARE:
3512 r = get_user(vcpu->arch.pfault_compare,
3513 (u64 __user *)reg->addr);
3514 break;
3515 case KVM_REG_S390_PFSELECT:
3516 r = get_user(vcpu->arch.pfault_select,
3517 (u64 __user *)reg->addr);
3518 break;
3519 case KVM_REG_S390_PP:
3520 r = get_user(vcpu->arch.sie_block->pp,
3521 (u64 __user *)reg->addr);
3522 break;
3523 case KVM_REG_S390_GBEA:
3524 r = get_user(vcpu->arch.sie_block->gbea,
3525 (u64 __user *)reg->addr);
3526 break;
3527 default:
3528 break;
3529 }
3530
3531 return r;
3532 }
3533
kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu * vcpu)3534 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3535 {
3536 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3537 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3538 memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3539
3540 kvm_clear_async_pf_completion_queue(vcpu);
3541 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3542 kvm_s390_vcpu_stop(vcpu);
3543 kvm_s390_clear_local_irqs(vcpu);
3544 }
3545
kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu * vcpu)3546 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3547 {
3548 /* Initial reset is a superset of the normal reset */
3549 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3550
3551 /*
3552 * This equals initial cpu reset in pop, but we don't switch to ESA.
3553 * We do not only reset the internal data, but also ...
3554 */
3555 vcpu->arch.sie_block->gpsw.mask = 0;
3556 vcpu->arch.sie_block->gpsw.addr = 0;
3557 kvm_s390_set_prefix(vcpu, 0);
3558 kvm_s390_set_cpu_timer(vcpu, 0);
3559 vcpu->arch.sie_block->ckc = 0;
3560 memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3561 vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3562 vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3563
3564 /* ... the data in sync regs */
3565 memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3566 vcpu->run->s.regs.ckc = 0;
3567 vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3568 vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3569 vcpu->run->psw_addr = 0;
3570 vcpu->run->psw_mask = 0;
3571 vcpu->run->s.regs.todpr = 0;
3572 vcpu->run->s.regs.cputm = 0;
3573 vcpu->run->s.regs.ckc = 0;
3574 vcpu->run->s.regs.pp = 0;
3575 vcpu->run->s.regs.gbea = 1;
3576 vcpu->run->s.regs.fpc = 0;
3577 /*
3578 * Do not reset these registers in the protected case, as some of
3579 * them are overlayed and they are not accessible in this case
3580 * anyway.
3581 */
3582 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3583 vcpu->arch.sie_block->gbea = 1;
3584 vcpu->arch.sie_block->pp = 0;
3585 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3586 vcpu->arch.sie_block->todpr = 0;
3587 }
3588 }
3589
kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu * vcpu)3590 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3591 {
3592 struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3593
3594 /* Clear reset is a superset of the initial reset */
3595 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3596
3597 memset(®s->gprs, 0, sizeof(regs->gprs));
3598 memset(®s->vrs, 0, sizeof(regs->vrs));
3599 memset(®s->acrs, 0, sizeof(regs->acrs));
3600 memset(®s->gscb, 0, sizeof(regs->gscb));
3601
3602 regs->etoken = 0;
3603 regs->etoken_extension = 0;
3604 }
3605
kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu * vcpu,struct kvm_regs * regs)3606 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3607 {
3608 vcpu_load(vcpu);
3609 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
3610 vcpu_put(vcpu);
3611 return 0;
3612 }
3613
kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu * vcpu,struct kvm_regs * regs)3614 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3615 {
3616 vcpu_load(vcpu);
3617 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3618 vcpu_put(vcpu);
3619 return 0;
3620 }
3621
kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu * vcpu,struct kvm_sregs * sregs)3622 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3623 struct kvm_sregs *sregs)
3624 {
3625 vcpu_load(vcpu);
3626
3627 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3628 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3629
3630 vcpu_put(vcpu);
3631 return 0;
3632 }
3633
kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu * vcpu,struct kvm_sregs * sregs)3634 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3635 struct kvm_sregs *sregs)
3636 {
3637 vcpu_load(vcpu);
3638
3639 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3640 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3641
3642 vcpu_put(vcpu);
3643 return 0;
3644 }
3645
kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu * vcpu,struct kvm_fpu * fpu)3646 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3647 {
3648 int ret = 0;
3649
3650 vcpu_load(vcpu);
3651
3652 if (test_fp_ctl(fpu->fpc)) {
3653 ret = -EINVAL;
3654 goto out;
3655 }
3656 vcpu->run->s.regs.fpc = fpu->fpc;
3657 if (MACHINE_HAS_VX)
3658 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3659 (freg_t *) fpu->fprs);
3660 else
3661 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3662
3663 out:
3664 vcpu_put(vcpu);
3665 return ret;
3666 }
3667
kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu * vcpu,struct kvm_fpu * fpu)3668 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3669 {
3670 vcpu_load(vcpu);
3671
3672 /* make sure we have the latest values */
3673 save_fpu_regs();
3674 if (MACHINE_HAS_VX)
3675 convert_vx_to_fp((freg_t *) fpu->fprs,
3676 (__vector128 *) vcpu->run->s.regs.vrs);
3677 else
3678 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3679 fpu->fpc = vcpu->run->s.regs.fpc;
3680
3681 vcpu_put(vcpu);
3682 return 0;
3683 }
3684
kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu * vcpu,psw_t psw)3685 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3686 {
3687 int rc = 0;
3688
3689 if (!is_vcpu_stopped(vcpu))
3690 rc = -EBUSY;
3691 else {
3692 vcpu->run->psw_mask = psw.mask;
3693 vcpu->run->psw_addr = psw.addr;
3694 }
3695 return rc;
3696 }
3697
kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu * vcpu,struct kvm_translation * tr)3698 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3699 struct kvm_translation *tr)
3700 {
3701 return -EINVAL; /* not implemented yet */
3702 }
3703
3704 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3705 KVM_GUESTDBG_USE_HW_BP | \
3706 KVM_GUESTDBG_ENABLE)
3707
kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu * vcpu,struct kvm_guest_debug * dbg)3708 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3709 struct kvm_guest_debug *dbg)
3710 {
3711 int rc = 0;
3712
3713 vcpu_load(vcpu);
3714
3715 vcpu->guest_debug = 0;
3716 kvm_s390_clear_bp_data(vcpu);
3717
3718 if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3719 rc = -EINVAL;
3720 goto out;
3721 }
3722 if (!sclp.has_gpere) {
3723 rc = -EINVAL;
3724 goto out;
3725 }
3726
3727 if (dbg->control & KVM_GUESTDBG_ENABLE) {
3728 vcpu->guest_debug = dbg->control;
3729 /* enforce guest PER */
3730 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3731
3732 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3733 rc = kvm_s390_import_bp_data(vcpu, dbg);
3734 } else {
3735 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3736 vcpu->arch.guestdbg.last_bp = 0;
3737 }
3738
3739 if (rc) {
3740 vcpu->guest_debug = 0;
3741 kvm_s390_clear_bp_data(vcpu);
3742 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3743 }
3744
3745 out:
3746 vcpu_put(vcpu);
3747 return rc;
3748 }
3749
kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu * vcpu,struct kvm_mp_state * mp_state)3750 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3751 struct kvm_mp_state *mp_state)
3752 {
3753 int ret;
3754
3755 vcpu_load(vcpu);
3756
3757 /* CHECK_STOP and LOAD are not supported yet */
3758 ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3759 KVM_MP_STATE_OPERATING;
3760
3761 vcpu_put(vcpu);
3762 return ret;
3763 }
3764
kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu * vcpu,struct kvm_mp_state * mp_state)3765 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3766 struct kvm_mp_state *mp_state)
3767 {
3768 int rc = 0;
3769
3770 vcpu_load(vcpu);
3771
3772 /* user space knows about this interface - let it control the state */
3773 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3774
3775 switch (mp_state->mp_state) {
3776 case KVM_MP_STATE_STOPPED:
3777 rc = kvm_s390_vcpu_stop(vcpu);
3778 break;
3779 case KVM_MP_STATE_OPERATING:
3780 rc = kvm_s390_vcpu_start(vcpu);
3781 break;
3782 case KVM_MP_STATE_LOAD:
3783 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3784 rc = -ENXIO;
3785 break;
3786 }
3787 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3788 break;
3789 case KVM_MP_STATE_CHECK_STOP:
3790 fallthrough; /* CHECK_STOP and LOAD are not supported yet */
3791 default:
3792 rc = -ENXIO;
3793 }
3794
3795 vcpu_put(vcpu);
3796 return rc;
3797 }
3798
ibs_enabled(struct kvm_vcpu * vcpu)3799 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3800 {
3801 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3802 }
3803
kvm_s390_handle_requests(struct kvm_vcpu * vcpu)3804 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3805 {
3806 retry:
3807 kvm_s390_vcpu_request_handled(vcpu);
3808 if (!kvm_request_pending(vcpu))
3809 return 0;
3810 /*
3811 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3812 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3813 * This ensures that the ipte instruction for this request has
3814 * already finished. We might race against a second unmapper that
3815 * wants to set the blocking bit. Lets just retry the request loop.
3816 */
3817 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3818 int rc;
3819 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3820 kvm_s390_get_prefix(vcpu),
3821 PAGE_SIZE * 2, PROT_WRITE);
3822 if (rc) {
3823 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3824 return rc;
3825 }
3826 goto retry;
3827 }
3828
3829 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3830 vcpu->arch.sie_block->ihcpu = 0xffff;
3831 goto retry;
3832 }
3833
3834 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3835 if (!ibs_enabled(vcpu)) {
3836 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3837 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3838 }
3839 goto retry;
3840 }
3841
3842 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3843 if (ibs_enabled(vcpu)) {
3844 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3845 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3846 }
3847 goto retry;
3848 }
3849
3850 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3851 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3852 goto retry;
3853 }
3854
3855 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3856 /*
3857 * Disable CMM virtualization; we will emulate the ESSA
3858 * instruction manually, in order to provide additional
3859 * functionalities needed for live migration.
3860 */
3861 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3862 goto retry;
3863 }
3864
3865 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3866 /*
3867 * Re-enable CMM virtualization if CMMA is available and
3868 * CMM has been used.
3869 */
3870 if ((vcpu->kvm->arch.use_cmma) &&
3871 (vcpu->kvm->mm->context.uses_cmm))
3872 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3873 goto retry;
3874 }
3875
3876 /* nothing to do, just clear the request */
3877 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3878 /* we left the vsie handler, nothing to do, just clear the request */
3879 kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3880
3881 return 0;
3882 }
3883
__kvm_s390_set_tod_clock(struct kvm * kvm,const struct kvm_s390_vm_tod_clock * gtod)3884 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
3885 {
3886 struct kvm_vcpu *vcpu;
3887 struct kvm_s390_tod_clock_ext htod;
3888 int i;
3889
3890 preempt_disable();
3891
3892 get_tod_clock_ext((char *)&htod);
3893
3894 kvm->arch.epoch = gtod->tod - htod.tod;
3895 kvm->arch.epdx = 0;
3896 if (test_kvm_facility(kvm, 139)) {
3897 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3898 if (kvm->arch.epoch > gtod->tod)
3899 kvm->arch.epdx -= 1;
3900 }
3901
3902 kvm_s390_vcpu_block_all(kvm);
3903 kvm_for_each_vcpu(i, vcpu, kvm) {
3904 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3905 vcpu->arch.sie_block->epdx = kvm->arch.epdx;
3906 }
3907
3908 kvm_s390_vcpu_unblock_all(kvm);
3909 preempt_enable();
3910 }
3911
kvm_s390_try_set_tod_clock(struct kvm * kvm,const struct kvm_s390_vm_tod_clock * gtod)3912 int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
3913 {
3914 if (!mutex_trylock(&kvm->lock))
3915 return 0;
3916 __kvm_s390_set_tod_clock(kvm, gtod);
3917 mutex_unlock(&kvm->lock);
3918 return 1;
3919 }
3920
3921 /**
3922 * kvm_arch_fault_in_page - fault-in guest page if necessary
3923 * @vcpu: The corresponding virtual cpu
3924 * @gpa: Guest physical address
3925 * @writable: Whether the page should be writable or not
3926 *
3927 * Make sure that a guest page has been faulted-in on the host.
3928 *
3929 * Return: Zero on success, negative error code otherwise.
3930 */
kvm_arch_fault_in_page(struct kvm_vcpu * vcpu,gpa_t gpa,int writable)3931 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3932 {
3933 return gmap_fault(vcpu->arch.gmap, gpa,
3934 writable ? FAULT_FLAG_WRITE : 0);
3935 }
3936
__kvm_inject_pfault_token(struct kvm_vcpu * vcpu,bool start_token,unsigned long token)3937 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3938 unsigned long token)
3939 {
3940 struct kvm_s390_interrupt inti;
3941 struct kvm_s390_irq irq;
3942
3943 if (start_token) {
3944 irq.u.ext.ext_params2 = token;
3945 irq.type = KVM_S390_INT_PFAULT_INIT;
3946 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3947 } else {
3948 inti.type = KVM_S390_INT_PFAULT_DONE;
3949 inti.parm64 = token;
3950 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3951 }
3952 }
3953
kvm_arch_async_page_not_present(struct kvm_vcpu * vcpu,struct kvm_async_pf * work)3954 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3955 struct kvm_async_pf *work)
3956 {
3957 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3958 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3959
3960 return true;
3961 }
3962
kvm_arch_async_page_present(struct kvm_vcpu * vcpu,struct kvm_async_pf * work)3963 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3964 struct kvm_async_pf *work)
3965 {
3966 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3967 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3968 }
3969
kvm_arch_async_page_ready(struct kvm_vcpu * vcpu,struct kvm_async_pf * work)3970 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3971 struct kvm_async_pf *work)
3972 {
3973 /* s390 will always inject the page directly */
3974 }
3975
kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu * vcpu)3976 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
3977 {
3978 /*
3979 * s390 will always inject the page directly,
3980 * but we still want check_async_completion to cleanup
3981 */
3982 return true;
3983 }
3984
kvm_arch_setup_async_pf(struct kvm_vcpu * vcpu)3985 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3986 {
3987 hva_t hva;
3988 struct kvm_arch_async_pf arch;
3989
3990 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3991 return false;
3992 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3993 vcpu->arch.pfault_compare)
3994 return false;
3995 if (psw_extint_disabled(vcpu))
3996 return false;
3997 if (kvm_s390_vcpu_has_irq(vcpu, 0))
3998 return false;
3999 if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
4000 return false;
4001 if (!vcpu->arch.gmap->pfault_enabled)
4002 return false;
4003
4004 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
4005 hva += current->thread.gmap_addr & ~PAGE_MASK;
4006 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4007 return false;
4008
4009 return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
4010 }
4011
vcpu_pre_run(struct kvm_vcpu * vcpu)4012 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4013 {
4014 int rc, cpuflags;
4015
4016 /*
4017 * On s390 notifications for arriving pages will be delivered directly
4018 * to the guest but the house keeping for completed pfaults is
4019 * handled outside the worker.
4020 */
4021 kvm_check_async_pf_completion(vcpu);
4022
4023 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4024 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4025
4026 if (need_resched())
4027 schedule();
4028
4029 if (!kvm_is_ucontrol(vcpu->kvm)) {
4030 rc = kvm_s390_deliver_pending_interrupts(vcpu);
4031 if (rc)
4032 return rc;
4033 }
4034
4035 rc = kvm_s390_handle_requests(vcpu);
4036 if (rc)
4037 return rc;
4038
4039 if (guestdbg_enabled(vcpu)) {
4040 kvm_s390_backup_guest_per_regs(vcpu);
4041 kvm_s390_patch_guest_per_regs(vcpu);
4042 }
4043
4044 clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.gisa_int.kicked_mask);
4045
4046 vcpu->arch.sie_block->icptcode = 0;
4047 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4048 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4049 trace_kvm_s390_sie_enter(vcpu, cpuflags);
4050
4051 return 0;
4052 }
4053
vcpu_post_run_fault_in_sie(struct kvm_vcpu * vcpu)4054 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4055 {
4056 struct kvm_s390_pgm_info pgm_info = {
4057 .code = PGM_ADDRESSING,
4058 };
4059 u8 opcode, ilen;
4060 int rc;
4061
4062 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4063 trace_kvm_s390_sie_fault(vcpu);
4064
4065 /*
4066 * We want to inject an addressing exception, which is defined as a
4067 * suppressing or terminating exception. However, since we came here
4068 * by a DAT access exception, the PSW still points to the faulting
4069 * instruction since DAT exceptions are nullifying. So we've got
4070 * to look up the current opcode to get the length of the instruction
4071 * to be able to forward the PSW.
4072 */
4073 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4074 ilen = insn_length(opcode);
4075 if (rc < 0) {
4076 return rc;
4077 } else if (rc) {
4078 /* Instruction-Fetching Exceptions - we can't detect the ilen.
4079 * Forward by arbitrary ilc, injection will take care of
4080 * nullification if necessary.
4081 */
4082 pgm_info = vcpu->arch.pgm;
4083 ilen = 4;
4084 }
4085 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4086 kvm_s390_forward_psw(vcpu, ilen);
4087 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4088 }
4089
vcpu_post_run(struct kvm_vcpu * vcpu,int exit_reason)4090 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4091 {
4092 struct mcck_volatile_info *mcck_info;
4093 struct sie_page *sie_page;
4094
4095 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4096 vcpu->arch.sie_block->icptcode);
4097 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4098
4099 if (guestdbg_enabled(vcpu))
4100 kvm_s390_restore_guest_per_regs(vcpu);
4101
4102 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4103 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4104
4105 if (exit_reason == -EINTR) {
4106 VCPU_EVENT(vcpu, 3, "%s", "machine check");
4107 sie_page = container_of(vcpu->arch.sie_block,
4108 struct sie_page, sie_block);
4109 mcck_info = &sie_page->mcck_info;
4110 kvm_s390_reinject_machine_check(vcpu, mcck_info);
4111 return 0;
4112 }
4113
4114 if (vcpu->arch.sie_block->icptcode > 0) {
4115 int rc = kvm_handle_sie_intercept(vcpu);
4116
4117 if (rc != -EOPNOTSUPP)
4118 return rc;
4119 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4120 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4121 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4122 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4123 return -EREMOTE;
4124 } else if (exit_reason != -EFAULT) {
4125 vcpu->stat.exit_null++;
4126 return 0;
4127 } else if (kvm_is_ucontrol(vcpu->kvm)) {
4128 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4129 vcpu->run->s390_ucontrol.trans_exc_code =
4130 current->thread.gmap_addr;
4131 vcpu->run->s390_ucontrol.pgm_code = 0x10;
4132 return -EREMOTE;
4133 } else if (current->thread.gmap_pfault) {
4134 trace_kvm_s390_major_guest_pfault(vcpu);
4135 current->thread.gmap_pfault = 0;
4136 if (kvm_arch_setup_async_pf(vcpu))
4137 return 0;
4138 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4139 }
4140 return vcpu_post_run_fault_in_sie(vcpu);
4141 }
4142
4143 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
__vcpu_run(struct kvm_vcpu * vcpu)4144 static int __vcpu_run(struct kvm_vcpu *vcpu)
4145 {
4146 int rc, exit_reason;
4147 struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4148
4149 /*
4150 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4151 * ning the guest), so that memslots (and other stuff) are protected
4152 */
4153 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4154
4155 do {
4156 rc = vcpu_pre_run(vcpu);
4157 if (rc)
4158 break;
4159
4160 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4161 /*
4162 * As PF_VCPU will be used in fault handler, between
4163 * guest_enter and guest_exit should be no uaccess.
4164 */
4165 local_irq_disable();
4166 guest_enter_irqoff();
4167 __disable_cpu_timer_accounting(vcpu);
4168 local_irq_enable();
4169 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4170 memcpy(sie_page->pv_grregs,
4171 vcpu->run->s.regs.gprs,
4172 sizeof(sie_page->pv_grregs));
4173 }
4174 exit_reason = sie64a(vcpu->arch.sie_block,
4175 vcpu->run->s.regs.gprs);
4176 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4177 memcpy(vcpu->run->s.regs.gprs,
4178 sie_page->pv_grregs,
4179 sizeof(sie_page->pv_grregs));
4180 /*
4181 * We're not allowed to inject interrupts on intercepts
4182 * that leave the guest state in an "in-between" state
4183 * where the next SIE entry will do a continuation.
4184 * Fence interrupts in our "internal" PSW.
4185 */
4186 if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4187 vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4188 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4189 }
4190 }
4191 local_irq_disable();
4192 __enable_cpu_timer_accounting(vcpu);
4193 guest_exit_irqoff();
4194 local_irq_enable();
4195 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4196
4197 rc = vcpu_post_run(vcpu, exit_reason);
4198 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4199
4200 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4201 return rc;
4202 }
4203
sync_regs_fmt2(struct kvm_vcpu * vcpu)4204 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4205 {
4206 struct kvm_run *kvm_run = vcpu->run;
4207 struct runtime_instr_cb *riccb;
4208 struct gs_cb *gscb;
4209
4210 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4211 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4212 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4213 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4214 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4215 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4216 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4217 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4218 }
4219 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4220 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4221 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4222 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4223 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4224 kvm_clear_async_pf_completion_queue(vcpu);
4225 }
4226 if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4227 vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4228 vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4229 }
4230 /*
4231 * If userspace sets the riccb (e.g. after migration) to a valid state,
4232 * we should enable RI here instead of doing the lazy enablement.
4233 */
4234 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4235 test_kvm_facility(vcpu->kvm, 64) &&
4236 riccb->v &&
4237 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4238 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4239 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4240 }
4241 /*
4242 * If userspace sets the gscb (e.g. after migration) to non-zero,
4243 * we should enable GS here instead of doing the lazy enablement.
4244 */
4245 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4246 test_kvm_facility(vcpu->kvm, 133) &&
4247 gscb->gssm &&
4248 !vcpu->arch.gs_enabled) {
4249 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4250 vcpu->arch.sie_block->ecb |= ECB_GS;
4251 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4252 vcpu->arch.gs_enabled = 1;
4253 }
4254 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4255 test_kvm_facility(vcpu->kvm, 82)) {
4256 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4257 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4258 }
4259 if (MACHINE_HAS_GS) {
4260 preempt_disable();
4261 __ctl_set_bit(2, 4);
4262 if (current->thread.gs_cb) {
4263 vcpu->arch.host_gscb = current->thread.gs_cb;
4264 save_gs_cb(vcpu->arch.host_gscb);
4265 }
4266 if (vcpu->arch.gs_enabled) {
4267 current->thread.gs_cb = (struct gs_cb *)
4268 &vcpu->run->s.regs.gscb;
4269 restore_gs_cb(current->thread.gs_cb);
4270 }
4271 preempt_enable();
4272 }
4273 /* SIE will load etoken directly from SDNX and therefore kvm_run */
4274 }
4275
sync_regs(struct kvm_vcpu * vcpu)4276 static void sync_regs(struct kvm_vcpu *vcpu)
4277 {
4278 struct kvm_run *kvm_run = vcpu->run;
4279
4280 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4281 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4282 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4283 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4284 /* some control register changes require a tlb flush */
4285 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4286 }
4287 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4288 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4289 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4290 }
4291 save_access_regs(vcpu->arch.host_acrs);
4292 restore_access_regs(vcpu->run->s.regs.acrs);
4293 /* save host (userspace) fprs/vrs */
4294 save_fpu_regs();
4295 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4296 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4297 if (MACHINE_HAS_VX)
4298 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4299 else
4300 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4301 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4302 if (test_fp_ctl(current->thread.fpu.fpc))
4303 /* User space provided an invalid FPC, let's clear it */
4304 current->thread.fpu.fpc = 0;
4305
4306 /* Sync fmt2 only data */
4307 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4308 sync_regs_fmt2(vcpu);
4309 } else {
4310 /*
4311 * In several places we have to modify our internal view to
4312 * not do things that are disallowed by the ultravisor. For
4313 * example we must not inject interrupts after specific exits
4314 * (e.g. 112 prefix page not secure). We do this by turning
4315 * off the machine check, external and I/O interrupt bits
4316 * of our PSW copy. To avoid getting validity intercepts, we
4317 * do only accept the condition code from userspace.
4318 */
4319 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4320 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4321 PSW_MASK_CC;
4322 }
4323
4324 kvm_run->kvm_dirty_regs = 0;
4325 }
4326
store_regs_fmt2(struct kvm_vcpu * vcpu)4327 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4328 {
4329 struct kvm_run *kvm_run = vcpu->run;
4330
4331 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4332 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4333 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4334 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4335 kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4336 if (MACHINE_HAS_GS) {
4337 preempt_disable();
4338 __ctl_set_bit(2, 4);
4339 if (vcpu->arch.gs_enabled)
4340 save_gs_cb(current->thread.gs_cb);
4341 current->thread.gs_cb = vcpu->arch.host_gscb;
4342 restore_gs_cb(vcpu->arch.host_gscb);
4343 if (!vcpu->arch.host_gscb)
4344 __ctl_clear_bit(2, 4);
4345 vcpu->arch.host_gscb = NULL;
4346 preempt_enable();
4347 }
4348 /* SIE will save etoken directly into SDNX and therefore kvm_run */
4349 }
4350
store_regs(struct kvm_vcpu * vcpu)4351 static void store_regs(struct kvm_vcpu *vcpu)
4352 {
4353 struct kvm_run *kvm_run = vcpu->run;
4354
4355 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4356 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4357 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4358 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4359 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4360 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4361 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4362 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4363 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4364 save_access_regs(vcpu->run->s.regs.acrs);
4365 restore_access_regs(vcpu->arch.host_acrs);
4366 /* Save guest register state */
4367 save_fpu_regs();
4368 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4369 /* Restore will be done lazily at return */
4370 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4371 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4372 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4373 store_regs_fmt2(vcpu);
4374 }
4375
kvm_arch_vcpu_ioctl_run(struct kvm_vcpu * vcpu)4376 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4377 {
4378 struct kvm_run *kvm_run = vcpu->run;
4379 int rc;
4380
4381 if (kvm_run->immediate_exit)
4382 return -EINTR;
4383
4384 if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4385 kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4386 return -EINVAL;
4387
4388 vcpu_load(vcpu);
4389
4390 if (guestdbg_exit_pending(vcpu)) {
4391 kvm_s390_prepare_debug_exit(vcpu);
4392 rc = 0;
4393 goto out;
4394 }
4395
4396 kvm_sigset_activate(vcpu);
4397
4398 /*
4399 * no need to check the return value of vcpu_start as it can only have
4400 * an error for protvirt, but protvirt means user cpu state
4401 */
4402 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4403 kvm_s390_vcpu_start(vcpu);
4404 } else if (is_vcpu_stopped(vcpu)) {
4405 pr_err_ratelimited("can't run stopped vcpu %d\n",
4406 vcpu->vcpu_id);
4407 rc = -EINVAL;
4408 goto out;
4409 }
4410
4411 sync_regs(vcpu);
4412 enable_cpu_timer_accounting(vcpu);
4413
4414 might_fault();
4415 rc = __vcpu_run(vcpu);
4416
4417 if (signal_pending(current) && !rc) {
4418 kvm_run->exit_reason = KVM_EXIT_INTR;
4419 rc = -EINTR;
4420 }
4421
4422 if (guestdbg_exit_pending(vcpu) && !rc) {
4423 kvm_s390_prepare_debug_exit(vcpu);
4424 rc = 0;
4425 }
4426
4427 if (rc == -EREMOTE) {
4428 /* userspace support is needed, kvm_run has been prepared */
4429 rc = 0;
4430 }
4431
4432 disable_cpu_timer_accounting(vcpu);
4433 store_regs(vcpu);
4434
4435 kvm_sigset_deactivate(vcpu);
4436
4437 vcpu->stat.exit_userspace++;
4438 out:
4439 vcpu_put(vcpu);
4440 return rc;
4441 }
4442
4443 /*
4444 * store status at address
4445 * we use have two special cases:
4446 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4447 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4448 */
kvm_s390_store_status_unloaded(struct kvm_vcpu * vcpu,unsigned long gpa)4449 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4450 {
4451 unsigned char archmode = 1;
4452 freg_t fprs[NUM_FPRS];
4453 unsigned int px;
4454 u64 clkcomp, cputm;
4455 int rc;
4456
4457 px = kvm_s390_get_prefix(vcpu);
4458 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4459 if (write_guest_abs(vcpu, 163, &archmode, 1))
4460 return -EFAULT;
4461 gpa = 0;
4462 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4463 if (write_guest_real(vcpu, 163, &archmode, 1))
4464 return -EFAULT;
4465 gpa = px;
4466 } else
4467 gpa -= __LC_FPREGS_SAVE_AREA;
4468
4469 /* manually convert vector registers if necessary */
4470 if (MACHINE_HAS_VX) {
4471 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4472 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4473 fprs, 128);
4474 } else {
4475 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4476 vcpu->run->s.regs.fprs, 128);
4477 }
4478 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4479 vcpu->run->s.regs.gprs, 128);
4480 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4481 &vcpu->arch.sie_block->gpsw, 16);
4482 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4483 &px, 4);
4484 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4485 &vcpu->run->s.regs.fpc, 4);
4486 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4487 &vcpu->arch.sie_block->todpr, 4);
4488 cputm = kvm_s390_get_cpu_timer(vcpu);
4489 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4490 &cputm, 8);
4491 clkcomp = vcpu->arch.sie_block->ckc >> 8;
4492 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4493 &clkcomp, 8);
4494 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4495 &vcpu->run->s.regs.acrs, 64);
4496 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4497 &vcpu->arch.sie_block->gcr, 128);
4498 return rc ? -EFAULT : 0;
4499 }
4500
kvm_s390_vcpu_store_status(struct kvm_vcpu * vcpu,unsigned long addr)4501 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4502 {
4503 /*
4504 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4505 * switch in the run ioctl. Let's update our copies before we save
4506 * it into the save area
4507 */
4508 save_fpu_regs();
4509 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4510 save_access_regs(vcpu->run->s.regs.acrs);
4511
4512 return kvm_s390_store_status_unloaded(vcpu, addr);
4513 }
4514
__disable_ibs_on_vcpu(struct kvm_vcpu * vcpu)4515 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4516 {
4517 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4518 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4519 }
4520
__disable_ibs_on_all_vcpus(struct kvm * kvm)4521 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4522 {
4523 unsigned int i;
4524 struct kvm_vcpu *vcpu;
4525
4526 kvm_for_each_vcpu(i, vcpu, kvm) {
4527 __disable_ibs_on_vcpu(vcpu);
4528 }
4529 }
4530
__enable_ibs_on_vcpu(struct kvm_vcpu * vcpu)4531 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4532 {
4533 if (!sclp.has_ibs)
4534 return;
4535 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4536 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4537 }
4538
kvm_s390_vcpu_start(struct kvm_vcpu * vcpu)4539 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4540 {
4541 int i, online_vcpus, r = 0, started_vcpus = 0;
4542
4543 if (!is_vcpu_stopped(vcpu))
4544 return 0;
4545
4546 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4547 /* Only one cpu at a time may enter/leave the STOPPED state. */
4548 spin_lock(&vcpu->kvm->arch.start_stop_lock);
4549 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4550
4551 /* Let's tell the UV that we want to change into the operating state */
4552 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4553 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4554 if (r) {
4555 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4556 return r;
4557 }
4558 }
4559
4560 for (i = 0; i < online_vcpus; i++) {
4561 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4562 started_vcpus++;
4563 }
4564
4565 if (started_vcpus == 0) {
4566 /* we're the only active VCPU -> speed it up */
4567 __enable_ibs_on_vcpu(vcpu);
4568 } else if (started_vcpus == 1) {
4569 /*
4570 * As we are starting a second VCPU, we have to disable
4571 * the IBS facility on all VCPUs to remove potentially
4572 * oustanding ENABLE requests.
4573 */
4574 __disable_ibs_on_all_vcpus(vcpu->kvm);
4575 }
4576
4577 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4578 /*
4579 * The real PSW might have changed due to a RESTART interpreted by the
4580 * ultravisor. We block all interrupts and let the next sie exit
4581 * refresh our view.
4582 */
4583 if (kvm_s390_pv_cpu_is_protected(vcpu))
4584 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4585 /*
4586 * Another VCPU might have used IBS while we were offline.
4587 * Let's play safe and flush the VCPU at startup.
4588 */
4589 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4590 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4591 return 0;
4592 }
4593
kvm_s390_vcpu_stop(struct kvm_vcpu * vcpu)4594 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4595 {
4596 int i, online_vcpus, r = 0, started_vcpus = 0;
4597 struct kvm_vcpu *started_vcpu = NULL;
4598
4599 if (is_vcpu_stopped(vcpu))
4600 return 0;
4601
4602 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4603 /* Only one cpu at a time may enter/leave the STOPPED state. */
4604 spin_lock(&vcpu->kvm->arch.start_stop_lock);
4605 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4606
4607 /* Let's tell the UV that we want to change into the stopped state */
4608 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4609 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4610 if (r) {
4611 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4612 return r;
4613 }
4614 }
4615
4616 /*
4617 * Set the VCPU to STOPPED and THEN clear the interrupt flag,
4618 * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
4619 * have been fully processed. This will ensure that the VCPU
4620 * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
4621 */
4622 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4623 kvm_s390_clear_stop_irq(vcpu);
4624
4625 __disable_ibs_on_vcpu(vcpu);
4626
4627 for (i = 0; i < online_vcpus; i++) {
4628 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4629 started_vcpus++;
4630 started_vcpu = vcpu->kvm->vcpus[i];
4631 }
4632 }
4633
4634 if (started_vcpus == 1) {
4635 /*
4636 * As we only have one VCPU left, we want to enable the
4637 * IBS facility for that VCPU to speed it up.
4638 */
4639 __enable_ibs_on_vcpu(started_vcpu);
4640 }
4641
4642 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4643 return 0;
4644 }
4645
kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu * vcpu,struct kvm_enable_cap * cap)4646 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4647 struct kvm_enable_cap *cap)
4648 {
4649 int r;
4650
4651 if (cap->flags)
4652 return -EINVAL;
4653
4654 switch (cap->cap) {
4655 case KVM_CAP_S390_CSS_SUPPORT:
4656 if (!vcpu->kvm->arch.css_support) {
4657 vcpu->kvm->arch.css_support = 1;
4658 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4659 trace_kvm_s390_enable_css(vcpu->kvm);
4660 }
4661 r = 0;
4662 break;
4663 default:
4664 r = -EINVAL;
4665 break;
4666 }
4667 return r;
4668 }
4669
kvm_s390_guest_sida_op(struct kvm_vcpu * vcpu,struct kvm_s390_mem_op * mop)4670 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4671 struct kvm_s390_mem_op *mop)
4672 {
4673 void __user *uaddr = (void __user *)mop->buf;
4674 int r = 0;
4675
4676 if (mop->flags || !mop->size)
4677 return -EINVAL;
4678 if (mop->size + mop->sida_offset < mop->size)
4679 return -EINVAL;
4680 if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4681 return -E2BIG;
4682 if (!kvm_s390_pv_cpu_is_protected(vcpu))
4683 return -EINVAL;
4684
4685 switch (mop->op) {
4686 case KVM_S390_MEMOP_SIDA_READ:
4687 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4688 mop->sida_offset), mop->size))
4689 r = -EFAULT;
4690
4691 break;
4692 case KVM_S390_MEMOP_SIDA_WRITE:
4693 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4694 mop->sida_offset), uaddr, mop->size))
4695 r = -EFAULT;
4696 break;
4697 }
4698 return r;
4699 }
kvm_s390_guest_mem_op(struct kvm_vcpu * vcpu,struct kvm_s390_mem_op * mop)4700 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4701 struct kvm_s390_mem_op *mop)
4702 {
4703 void __user *uaddr = (void __user *)mop->buf;
4704 void *tmpbuf = NULL;
4705 int r = 0;
4706 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4707 | KVM_S390_MEMOP_F_CHECK_ONLY;
4708
4709 if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4710 return -EINVAL;
4711
4712 if (mop->size > MEM_OP_MAX_SIZE)
4713 return -E2BIG;
4714
4715 if (kvm_s390_pv_cpu_is_protected(vcpu))
4716 return -EINVAL;
4717
4718 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4719 tmpbuf = vmalloc(mop->size);
4720 if (!tmpbuf)
4721 return -ENOMEM;
4722 }
4723
4724 switch (mop->op) {
4725 case KVM_S390_MEMOP_LOGICAL_READ:
4726 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4727 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4728 mop->size, GACC_FETCH);
4729 break;
4730 }
4731 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4732 if (r == 0) {
4733 if (copy_to_user(uaddr, tmpbuf, mop->size))
4734 r = -EFAULT;
4735 }
4736 break;
4737 case KVM_S390_MEMOP_LOGICAL_WRITE:
4738 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4739 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4740 mop->size, GACC_STORE);
4741 break;
4742 }
4743 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4744 r = -EFAULT;
4745 break;
4746 }
4747 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4748 break;
4749 }
4750
4751 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4752 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4753
4754 vfree(tmpbuf);
4755 return r;
4756 }
4757
kvm_s390_guest_memsida_op(struct kvm_vcpu * vcpu,struct kvm_s390_mem_op * mop)4758 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4759 struct kvm_s390_mem_op *mop)
4760 {
4761 int r, srcu_idx;
4762
4763 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4764
4765 switch (mop->op) {
4766 case KVM_S390_MEMOP_LOGICAL_READ:
4767 case KVM_S390_MEMOP_LOGICAL_WRITE:
4768 r = kvm_s390_guest_mem_op(vcpu, mop);
4769 break;
4770 case KVM_S390_MEMOP_SIDA_READ:
4771 case KVM_S390_MEMOP_SIDA_WRITE:
4772 /* we are locked against sida going away by the vcpu->mutex */
4773 r = kvm_s390_guest_sida_op(vcpu, mop);
4774 break;
4775 default:
4776 r = -EINVAL;
4777 }
4778
4779 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4780 return r;
4781 }
4782
kvm_arch_vcpu_async_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)4783 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4784 unsigned int ioctl, unsigned long arg)
4785 {
4786 struct kvm_vcpu *vcpu = filp->private_data;
4787 void __user *argp = (void __user *)arg;
4788
4789 switch (ioctl) {
4790 case KVM_S390_IRQ: {
4791 struct kvm_s390_irq s390irq;
4792
4793 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4794 return -EFAULT;
4795 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4796 }
4797 case KVM_S390_INTERRUPT: {
4798 struct kvm_s390_interrupt s390int;
4799 struct kvm_s390_irq s390irq = {};
4800
4801 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4802 return -EFAULT;
4803 if (s390int_to_s390irq(&s390int, &s390irq))
4804 return -EINVAL;
4805 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4806 }
4807 }
4808 return -ENOIOCTLCMD;
4809 }
4810
kvm_arch_vcpu_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)4811 long kvm_arch_vcpu_ioctl(struct file *filp,
4812 unsigned int ioctl, unsigned long arg)
4813 {
4814 struct kvm_vcpu *vcpu = filp->private_data;
4815 void __user *argp = (void __user *)arg;
4816 int idx;
4817 long r;
4818 u16 rc, rrc;
4819
4820 vcpu_load(vcpu);
4821
4822 switch (ioctl) {
4823 case KVM_S390_STORE_STATUS:
4824 idx = srcu_read_lock(&vcpu->kvm->srcu);
4825 r = kvm_s390_store_status_unloaded(vcpu, arg);
4826 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4827 break;
4828 case KVM_S390_SET_INITIAL_PSW: {
4829 psw_t psw;
4830
4831 r = -EFAULT;
4832 if (copy_from_user(&psw, argp, sizeof(psw)))
4833 break;
4834 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4835 break;
4836 }
4837 case KVM_S390_CLEAR_RESET:
4838 r = 0;
4839 kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4840 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4841 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4842 UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4843 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4844 rc, rrc);
4845 }
4846 break;
4847 case KVM_S390_INITIAL_RESET:
4848 r = 0;
4849 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4850 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4851 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4852 UVC_CMD_CPU_RESET_INITIAL,
4853 &rc, &rrc);
4854 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4855 rc, rrc);
4856 }
4857 break;
4858 case KVM_S390_NORMAL_RESET:
4859 r = 0;
4860 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4861 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4862 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4863 UVC_CMD_CPU_RESET, &rc, &rrc);
4864 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4865 rc, rrc);
4866 }
4867 break;
4868 case KVM_SET_ONE_REG:
4869 case KVM_GET_ONE_REG: {
4870 struct kvm_one_reg reg;
4871 r = -EINVAL;
4872 if (kvm_s390_pv_cpu_is_protected(vcpu))
4873 break;
4874 r = -EFAULT;
4875 if (copy_from_user(®, argp, sizeof(reg)))
4876 break;
4877 if (ioctl == KVM_SET_ONE_REG)
4878 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
4879 else
4880 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
4881 break;
4882 }
4883 #ifdef CONFIG_KVM_S390_UCONTROL
4884 case KVM_S390_UCAS_MAP: {
4885 struct kvm_s390_ucas_mapping ucasmap;
4886
4887 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4888 r = -EFAULT;
4889 break;
4890 }
4891
4892 if (!kvm_is_ucontrol(vcpu->kvm)) {
4893 r = -EINVAL;
4894 break;
4895 }
4896
4897 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4898 ucasmap.vcpu_addr, ucasmap.length);
4899 break;
4900 }
4901 case KVM_S390_UCAS_UNMAP: {
4902 struct kvm_s390_ucas_mapping ucasmap;
4903
4904 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4905 r = -EFAULT;
4906 break;
4907 }
4908
4909 if (!kvm_is_ucontrol(vcpu->kvm)) {
4910 r = -EINVAL;
4911 break;
4912 }
4913
4914 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4915 ucasmap.length);
4916 break;
4917 }
4918 #endif
4919 case KVM_S390_VCPU_FAULT: {
4920 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4921 break;
4922 }
4923 case KVM_ENABLE_CAP:
4924 {
4925 struct kvm_enable_cap cap;
4926 r = -EFAULT;
4927 if (copy_from_user(&cap, argp, sizeof(cap)))
4928 break;
4929 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4930 break;
4931 }
4932 case KVM_S390_MEM_OP: {
4933 struct kvm_s390_mem_op mem_op;
4934
4935 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4936 r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4937 else
4938 r = -EFAULT;
4939 break;
4940 }
4941 case KVM_S390_SET_IRQ_STATE: {
4942 struct kvm_s390_irq_state irq_state;
4943
4944 r = -EFAULT;
4945 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4946 break;
4947 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4948 irq_state.len == 0 ||
4949 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4950 r = -EINVAL;
4951 break;
4952 }
4953 /* do not use irq_state.flags, it will break old QEMUs */
4954 r = kvm_s390_set_irq_state(vcpu,
4955 (void __user *) irq_state.buf,
4956 irq_state.len);
4957 break;
4958 }
4959 case KVM_S390_GET_IRQ_STATE: {
4960 struct kvm_s390_irq_state irq_state;
4961
4962 r = -EFAULT;
4963 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4964 break;
4965 if (irq_state.len == 0) {
4966 r = -EINVAL;
4967 break;
4968 }
4969 /* do not use irq_state.flags, it will break old QEMUs */
4970 r = kvm_s390_get_irq_state(vcpu,
4971 (__u8 __user *) irq_state.buf,
4972 irq_state.len);
4973 break;
4974 }
4975 default:
4976 r = -ENOTTY;
4977 }
4978
4979 vcpu_put(vcpu);
4980 return r;
4981 }
4982
kvm_arch_vcpu_fault(struct kvm_vcpu * vcpu,struct vm_fault * vmf)4983 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4984 {
4985 #ifdef CONFIG_KVM_S390_UCONTROL
4986 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4987 && (kvm_is_ucontrol(vcpu->kvm))) {
4988 vmf->page = virt_to_page(vcpu->arch.sie_block);
4989 get_page(vmf->page);
4990 return 0;
4991 }
4992 #endif
4993 return VM_FAULT_SIGBUS;
4994 }
4995
4996 /* Section: memory related */
kvm_arch_prepare_memory_region(struct kvm * kvm,struct kvm_memory_slot * memslot,const struct kvm_userspace_memory_region * mem,enum kvm_mr_change change)4997 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4998 struct kvm_memory_slot *memslot,
4999 const struct kvm_userspace_memory_region *mem,
5000 enum kvm_mr_change change)
5001 {
5002 /* A few sanity checks. We can have memory slots which have to be
5003 located/ended at a segment boundary (1MB). The memory in userland is
5004 ok to be fragmented into various different vmas. It is okay to mmap()
5005 and munmap() stuff in this slot after doing this call at any time */
5006
5007 if (mem->userspace_addr & 0xffffful)
5008 return -EINVAL;
5009
5010 if (mem->memory_size & 0xffffful)
5011 return -EINVAL;
5012
5013 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
5014 return -EINVAL;
5015
5016 /* When we are protected, we should not change the memory slots */
5017 if (kvm_s390_pv_get_handle(kvm))
5018 return -EINVAL;
5019
5020 if (!kvm->arch.migration_mode)
5021 return 0;
5022
5023 /*
5024 * Turn off migration mode when:
5025 * - userspace creates a new memslot with dirty logging off,
5026 * - userspace modifies an existing memslot (MOVE or FLAGS_ONLY) and
5027 * dirty logging is turned off.
5028 * Migration mode expects dirty page logging being enabled to store
5029 * its dirty bitmap.
5030 */
5031 if (change != KVM_MR_DELETE &&
5032 !(mem->flags & KVM_MEM_LOG_DIRTY_PAGES))
5033 WARN(kvm_s390_vm_stop_migration(kvm),
5034 "Failed to stop migration mode");
5035
5036 return 0;
5037 }
5038
kvm_arch_commit_memory_region(struct kvm * kvm,const struct kvm_userspace_memory_region * mem,struct kvm_memory_slot * old,const struct kvm_memory_slot * new,enum kvm_mr_change change)5039 void kvm_arch_commit_memory_region(struct kvm *kvm,
5040 const struct kvm_userspace_memory_region *mem,
5041 struct kvm_memory_slot *old,
5042 const struct kvm_memory_slot *new,
5043 enum kvm_mr_change change)
5044 {
5045 int rc = 0;
5046
5047 switch (change) {
5048 case KVM_MR_DELETE:
5049 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5050 old->npages * PAGE_SIZE);
5051 break;
5052 case KVM_MR_MOVE:
5053 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5054 old->npages * PAGE_SIZE);
5055 if (rc)
5056 break;
5057 fallthrough;
5058 case KVM_MR_CREATE:
5059 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
5060 mem->guest_phys_addr, mem->memory_size);
5061 break;
5062 case KVM_MR_FLAGS_ONLY:
5063 break;
5064 default:
5065 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5066 }
5067 if (rc)
5068 pr_warn("failed to commit memory region\n");
5069 return;
5070 }
5071
nonhyp_mask(int i)5072 static inline unsigned long nonhyp_mask(int i)
5073 {
5074 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5075
5076 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5077 }
5078
kvm_arch_vcpu_block_finish(struct kvm_vcpu * vcpu)5079 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
5080 {
5081 vcpu->valid_wakeup = false;
5082 }
5083
kvm_s390_init(void)5084 static int __init kvm_s390_init(void)
5085 {
5086 int i;
5087
5088 if (!sclp.has_sief2) {
5089 pr_info("SIE is not available\n");
5090 return -ENODEV;
5091 }
5092
5093 if (nested && hpage) {
5094 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5095 return -EINVAL;
5096 }
5097
5098 for (i = 0; i < 16; i++)
5099 kvm_s390_fac_base[i] |=
5100 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
5101
5102 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5103 }
5104
kvm_s390_exit(void)5105 static void __exit kvm_s390_exit(void)
5106 {
5107 kvm_exit();
5108 }
5109
5110 module_init(kvm_s390_init);
5111 module_exit(kvm_s390_exit);
5112
5113 /*
5114 * Enable autoloading of the kvm module.
5115 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5116 * since x86 takes a different approach.
5117 */
5118 #include <linux/miscdevice.h>
5119 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5120 MODULE_ALIAS("devname:kvm");
5121