1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * hosting IBM Z kernel virtual machines (s390x)
4 *
5 * Copyright IBM Corp. 2008, 2020
6 *
7 * Author(s): Carsten Otte <cotte@de.ibm.com>
8 * Christian Borntraeger <borntraeger@de.ibm.com>
9 * Heiko Carstens <heiko.carstens@de.ibm.com>
10 * Christian Ehrhardt <ehrhardt@de.ibm.com>
11 * Jason J. Herne <jjherne@us.ibm.com>
12 */
13
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
16
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 #include <linux/pgtable.h>
35
36 #include <asm/asm-offsets.h>
37 #include <asm/lowcore.h>
38 #include <asm/stp.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include <asm/ap.h>
47 #include <asm/uv.h>
48 #include "kvm-s390.h"
49 #include "gaccess.h"
50
51 #define CREATE_TRACE_POINTS
52 #include "trace.h"
53 #include "trace-s390.h"
54
55 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
56 #define LOCAL_IRQS 32
57 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
58 (KVM_MAX_VCPUS + LOCAL_IRQS))
59
60 struct kvm_stats_debugfs_item debugfs_entries[] = {
61 VCPU_STAT("userspace_handled", exit_userspace),
62 VCPU_STAT("exit_null", exit_null),
63 VCPU_STAT("exit_validity", exit_validity),
64 VCPU_STAT("exit_stop_request", exit_stop_request),
65 VCPU_STAT("exit_external_request", exit_external_request),
66 VCPU_STAT("exit_io_request", exit_io_request),
67 VCPU_STAT("exit_external_interrupt", exit_external_interrupt),
68 VCPU_STAT("exit_instruction", exit_instruction),
69 VCPU_STAT("exit_pei", exit_pei),
70 VCPU_STAT("exit_program_interruption", exit_program_interruption),
71 VCPU_STAT("exit_instr_and_program_int", exit_instr_and_program),
72 VCPU_STAT("exit_operation_exception", exit_operation_exception),
73 VCPU_STAT("halt_successful_poll", halt_successful_poll),
74 VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
75 VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
76 VCPU_STAT("halt_no_poll_steal", halt_no_poll_steal),
77 VCPU_STAT("halt_wakeup", halt_wakeup),
78 VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
79 VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
80 VCPU_STAT("instruction_lctlg", instruction_lctlg),
81 VCPU_STAT("instruction_lctl", instruction_lctl),
82 VCPU_STAT("instruction_stctl", instruction_stctl),
83 VCPU_STAT("instruction_stctg", instruction_stctg),
84 VCPU_STAT("deliver_ckc", deliver_ckc),
85 VCPU_STAT("deliver_cputm", deliver_cputm),
86 VCPU_STAT("deliver_emergency_signal", deliver_emergency_signal),
87 VCPU_STAT("deliver_external_call", deliver_external_call),
88 VCPU_STAT("deliver_service_signal", deliver_service_signal),
89 VCPU_STAT("deliver_virtio", deliver_virtio),
90 VCPU_STAT("deliver_stop_signal", deliver_stop_signal),
91 VCPU_STAT("deliver_prefix_signal", deliver_prefix_signal),
92 VCPU_STAT("deliver_restart_signal", deliver_restart_signal),
93 VCPU_STAT("deliver_program", deliver_program),
94 VCPU_STAT("deliver_io", deliver_io),
95 VCPU_STAT("deliver_machine_check", deliver_machine_check),
96 VCPU_STAT("exit_wait_state", exit_wait_state),
97 VCPU_STAT("inject_ckc", inject_ckc),
98 VCPU_STAT("inject_cputm", inject_cputm),
99 VCPU_STAT("inject_external_call", inject_external_call),
100 VM_STAT("inject_float_mchk", inject_float_mchk),
101 VCPU_STAT("inject_emergency_signal", inject_emergency_signal),
102 VM_STAT("inject_io", inject_io),
103 VCPU_STAT("inject_mchk", inject_mchk),
104 VM_STAT("inject_pfault_done", inject_pfault_done),
105 VCPU_STAT("inject_program", inject_program),
106 VCPU_STAT("inject_restart", inject_restart),
107 VM_STAT("inject_service_signal", inject_service_signal),
108 VCPU_STAT("inject_set_prefix", inject_set_prefix),
109 VCPU_STAT("inject_stop_signal", inject_stop_signal),
110 VCPU_STAT("inject_pfault_init", inject_pfault_init),
111 VM_STAT("inject_virtio", inject_virtio),
112 VCPU_STAT("instruction_epsw", instruction_epsw),
113 VCPU_STAT("instruction_gs", instruction_gs),
114 VCPU_STAT("instruction_io_other", instruction_io_other),
115 VCPU_STAT("instruction_lpsw", instruction_lpsw),
116 VCPU_STAT("instruction_lpswe", instruction_lpswe),
117 VCPU_STAT("instruction_pfmf", instruction_pfmf),
118 VCPU_STAT("instruction_ptff", instruction_ptff),
119 VCPU_STAT("instruction_stidp", instruction_stidp),
120 VCPU_STAT("instruction_sck", instruction_sck),
121 VCPU_STAT("instruction_sckpf", instruction_sckpf),
122 VCPU_STAT("instruction_spx", instruction_spx),
123 VCPU_STAT("instruction_stpx", instruction_stpx),
124 VCPU_STAT("instruction_stap", instruction_stap),
125 VCPU_STAT("instruction_iske", instruction_iske),
126 VCPU_STAT("instruction_ri", instruction_ri),
127 VCPU_STAT("instruction_rrbe", instruction_rrbe),
128 VCPU_STAT("instruction_sske", instruction_sske),
129 VCPU_STAT("instruction_ipte_interlock", instruction_ipte_interlock),
130 VCPU_STAT("instruction_essa", instruction_essa),
131 VCPU_STAT("instruction_stsi", instruction_stsi),
132 VCPU_STAT("instruction_stfl", instruction_stfl),
133 VCPU_STAT("instruction_tb", instruction_tb),
134 VCPU_STAT("instruction_tpi", instruction_tpi),
135 VCPU_STAT("instruction_tprot", instruction_tprot),
136 VCPU_STAT("instruction_tsch", instruction_tsch),
137 VCPU_STAT("instruction_sthyi", instruction_sthyi),
138 VCPU_STAT("instruction_sie", instruction_sie),
139 VCPU_STAT("instruction_sigp_sense", instruction_sigp_sense),
140 VCPU_STAT("instruction_sigp_sense_running", instruction_sigp_sense_running),
141 VCPU_STAT("instruction_sigp_external_call", instruction_sigp_external_call),
142 VCPU_STAT("instruction_sigp_emergency", instruction_sigp_emergency),
143 VCPU_STAT("instruction_sigp_cond_emergency", instruction_sigp_cond_emergency),
144 VCPU_STAT("instruction_sigp_start", instruction_sigp_start),
145 VCPU_STAT("instruction_sigp_stop", instruction_sigp_stop),
146 VCPU_STAT("instruction_sigp_stop_store_status", instruction_sigp_stop_store_status),
147 VCPU_STAT("instruction_sigp_store_status", instruction_sigp_store_status),
148 VCPU_STAT("instruction_sigp_store_adtl_status", instruction_sigp_store_adtl_status),
149 VCPU_STAT("instruction_sigp_set_arch", instruction_sigp_arch),
150 VCPU_STAT("instruction_sigp_set_prefix", instruction_sigp_prefix),
151 VCPU_STAT("instruction_sigp_restart", instruction_sigp_restart),
152 VCPU_STAT("instruction_sigp_cpu_reset", instruction_sigp_cpu_reset),
153 VCPU_STAT("instruction_sigp_init_cpu_reset", instruction_sigp_init_cpu_reset),
154 VCPU_STAT("instruction_sigp_unknown", instruction_sigp_unknown),
155 VCPU_STAT("instruction_diag_10", diagnose_10),
156 VCPU_STAT("instruction_diag_44", diagnose_44),
157 VCPU_STAT("instruction_diag_9c", diagnose_9c),
158 VCPU_STAT("diag_9c_ignored", diagnose_9c_ignored),
159 VCPU_STAT("instruction_diag_258", diagnose_258),
160 VCPU_STAT("instruction_diag_308", diagnose_308),
161 VCPU_STAT("instruction_diag_500", diagnose_500),
162 VCPU_STAT("instruction_diag_other", diagnose_other),
163 { NULL }
164 };
165
166 struct kvm_s390_tod_clock_ext {
167 __u8 epoch_idx;
168 __u64 tod;
169 __u8 reserved[7];
170 } __packed;
171
172 /* allow nested virtualization in KVM (if enabled by user space) */
173 static int nested;
174 module_param(nested, int, S_IRUGO);
175 MODULE_PARM_DESC(nested, "Nested virtualization support");
176
177 /* allow 1m huge page guest backing, if !nested */
178 static int hpage;
179 module_param(hpage, int, 0444);
180 MODULE_PARM_DESC(hpage, "1m huge page backing support");
181
182 /* maximum percentage of steal time for polling. >100 is treated like 100 */
183 static u8 halt_poll_max_steal = 10;
184 module_param(halt_poll_max_steal, byte, 0644);
185 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
186
187 /* if set to true, the GISA will be initialized and used if available */
188 static bool use_gisa = true;
189 module_param(use_gisa, bool, 0644);
190 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
191
192 /*
193 * For now we handle at most 16 double words as this is what the s390 base
194 * kernel handles and stores in the prefix page. If we ever need to go beyond
195 * this, this requires changes to code, but the external uapi can stay.
196 */
197 #define SIZE_INTERNAL 16
198
199 /*
200 * Base feature mask that defines default mask for facilities. Consists of the
201 * defines in FACILITIES_KVM and the non-hypervisor managed bits.
202 */
203 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
204 /*
205 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
206 * and defines the facilities that can be enabled via a cpu model.
207 */
208 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
209
kvm_s390_fac_size(void)210 static unsigned long kvm_s390_fac_size(void)
211 {
212 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
213 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
214 BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
215 sizeof(S390_lowcore.stfle_fac_list));
216
217 return SIZE_INTERNAL;
218 }
219
220 /* available cpu features supported by kvm */
221 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
222 /* available subfunctions indicated via query / "test bit" */
223 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
224
225 static struct gmap_notifier gmap_notifier;
226 static struct gmap_notifier vsie_gmap_notifier;
227 debug_info_t *kvm_s390_dbf;
228 debug_info_t *kvm_s390_dbf_uv;
229
230 /* Section: not file related */
kvm_arch_hardware_enable(void)231 int kvm_arch_hardware_enable(void)
232 {
233 /* every s390 is virtualization enabled ;-) */
234 return 0;
235 }
236
kvm_arch_check_processor_compat(void * opaque)237 int kvm_arch_check_processor_compat(void *opaque)
238 {
239 return 0;
240 }
241
242 /* forward declarations */
243 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
244 unsigned long end);
245 static int sca_switch_to_extended(struct kvm *kvm);
246
kvm_clock_sync_scb(struct kvm_s390_sie_block * scb,u64 delta)247 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
248 {
249 u8 delta_idx = 0;
250
251 /*
252 * The TOD jumps by delta, we have to compensate this by adding
253 * -delta to the epoch.
254 */
255 delta = -delta;
256
257 /* sign-extension - we're adding to signed values below */
258 if ((s64)delta < 0)
259 delta_idx = -1;
260
261 scb->epoch += delta;
262 if (scb->ecd & ECD_MEF) {
263 scb->epdx += delta_idx;
264 if (scb->epoch < delta)
265 scb->epdx += 1;
266 }
267 }
268
269 /*
270 * This callback is executed during stop_machine(). All CPUs are therefore
271 * temporarily stopped. In order not to change guest behavior, we have to
272 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
273 * so a CPU won't be stopped while calculating with the epoch.
274 */
kvm_clock_sync(struct notifier_block * notifier,unsigned long val,void * v)275 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
276 void *v)
277 {
278 struct kvm *kvm;
279 struct kvm_vcpu *vcpu;
280 int i;
281 unsigned long long *delta = v;
282
283 list_for_each_entry(kvm, &vm_list, vm_list) {
284 kvm_for_each_vcpu(i, vcpu, kvm) {
285 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
286 if (i == 0) {
287 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
288 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
289 }
290 if (vcpu->arch.cputm_enabled)
291 vcpu->arch.cputm_start += *delta;
292 if (vcpu->arch.vsie_block)
293 kvm_clock_sync_scb(vcpu->arch.vsie_block,
294 *delta);
295 }
296 }
297 return NOTIFY_OK;
298 }
299
300 static struct notifier_block kvm_clock_notifier = {
301 .notifier_call = kvm_clock_sync,
302 };
303
kvm_arch_hardware_setup(void * opaque)304 int kvm_arch_hardware_setup(void *opaque)
305 {
306 gmap_notifier.notifier_call = kvm_gmap_notifier;
307 gmap_register_pte_notifier(&gmap_notifier);
308 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
309 gmap_register_pte_notifier(&vsie_gmap_notifier);
310 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
311 &kvm_clock_notifier);
312 return 0;
313 }
314
kvm_arch_hardware_unsetup(void)315 void kvm_arch_hardware_unsetup(void)
316 {
317 gmap_unregister_pte_notifier(&gmap_notifier);
318 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
319 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
320 &kvm_clock_notifier);
321 }
322
allow_cpu_feat(unsigned long nr)323 static void allow_cpu_feat(unsigned long nr)
324 {
325 set_bit_inv(nr, kvm_s390_available_cpu_feat);
326 }
327
plo_test_bit(unsigned char nr)328 static inline int plo_test_bit(unsigned char nr)
329 {
330 unsigned long function = (unsigned long)nr | 0x100;
331 int cc;
332
333 asm volatile(
334 " lgr 0,%[function]\n"
335 /* Parameter registers are ignored for "test bit" */
336 " plo 0,0,0,0(0)\n"
337 " ipm %0\n"
338 " srl %0,28\n"
339 : "=d" (cc)
340 : [function] "d" (function)
341 : "cc", "0");
342 return cc == 0;
343 }
344
__insn32_query(unsigned int opcode,u8 * query)345 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
346 {
347 asm volatile(
348 " lghi 0,0\n"
349 " lgr 1,%[query]\n"
350 /* Parameter registers are ignored */
351 " .insn rrf,%[opc] << 16,2,4,6,0\n"
352 :
353 : [query] "d" ((unsigned long)query), [opc] "i" (opcode)
354 : "cc", "memory", "0", "1");
355 }
356
357 #define INSN_SORTL 0xb938
358 #define INSN_DFLTCC 0xb939
359
kvm_s390_cpu_feat_init(void)360 static void kvm_s390_cpu_feat_init(void)
361 {
362 int i;
363
364 for (i = 0; i < 256; ++i) {
365 if (plo_test_bit(i))
366 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
367 }
368
369 if (test_facility(28)) /* TOD-clock steering */
370 ptff(kvm_s390_available_subfunc.ptff,
371 sizeof(kvm_s390_available_subfunc.ptff),
372 PTFF_QAF);
373
374 if (test_facility(17)) { /* MSA */
375 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
376 kvm_s390_available_subfunc.kmac);
377 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
378 kvm_s390_available_subfunc.kmc);
379 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
380 kvm_s390_available_subfunc.km);
381 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
382 kvm_s390_available_subfunc.kimd);
383 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
384 kvm_s390_available_subfunc.klmd);
385 }
386 if (test_facility(76)) /* MSA3 */
387 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
388 kvm_s390_available_subfunc.pckmo);
389 if (test_facility(77)) { /* MSA4 */
390 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
391 kvm_s390_available_subfunc.kmctr);
392 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
393 kvm_s390_available_subfunc.kmf);
394 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
395 kvm_s390_available_subfunc.kmo);
396 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
397 kvm_s390_available_subfunc.pcc);
398 }
399 if (test_facility(57)) /* MSA5 */
400 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
401 kvm_s390_available_subfunc.ppno);
402
403 if (test_facility(146)) /* MSA8 */
404 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
405 kvm_s390_available_subfunc.kma);
406
407 if (test_facility(155)) /* MSA9 */
408 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
409 kvm_s390_available_subfunc.kdsa);
410
411 if (test_facility(150)) /* SORTL */
412 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
413
414 if (test_facility(151)) /* DFLTCC */
415 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
416
417 if (MACHINE_HAS_ESOP)
418 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
419 /*
420 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
421 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
422 */
423 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
424 !test_facility(3) || !nested)
425 return;
426 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
427 if (sclp.has_64bscao)
428 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
429 if (sclp.has_siif)
430 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
431 if (sclp.has_gpere)
432 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
433 if (sclp.has_gsls)
434 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
435 if (sclp.has_ib)
436 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
437 if (sclp.has_cei)
438 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
439 if (sclp.has_ibs)
440 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
441 if (sclp.has_kss)
442 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
443 /*
444 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
445 * all skey handling functions read/set the skey from the PGSTE
446 * instead of the real storage key.
447 *
448 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
449 * pages being detected as preserved although they are resident.
450 *
451 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
452 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
453 *
454 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
455 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
456 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
457 *
458 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
459 * cannot easily shadow the SCA because of the ipte lock.
460 */
461 }
462
kvm_arch_init(void * opaque)463 int kvm_arch_init(void *opaque)
464 {
465 int rc = -ENOMEM;
466
467 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
468 if (!kvm_s390_dbf)
469 return -ENOMEM;
470
471 kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
472 if (!kvm_s390_dbf_uv)
473 goto out;
474
475 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
476 debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
477 goto out;
478
479 kvm_s390_cpu_feat_init();
480
481 /* Register floating interrupt controller interface. */
482 rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
483 if (rc) {
484 pr_err("A FLIC registration call failed with rc=%d\n", rc);
485 goto out;
486 }
487
488 rc = kvm_s390_gib_init(GAL_ISC);
489 if (rc)
490 goto out;
491
492 return 0;
493
494 out:
495 kvm_arch_exit();
496 return rc;
497 }
498
kvm_arch_exit(void)499 void kvm_arch_exit(void)
500 {
501 kvm_s390_gib_destroy();
502 debug_unregister(kvm_s390_dbf);
503 debug_unregister(kvm_s390_dbf_uv);
504 }
505
506 /* Section: device related */
kvm_arch_dev_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)507 long kvm_arch_dev_ioctl(struct file *filp,
508 unsigned int ioctl, unsigned long arg)
509 {
510 if (ioctl == KVM_S390_ENABLE_SIE)
511 return s390_enable_sie();
512 return -EINVAL;
513 }
514
kvm_vm_ioctl_check_extension(struct kvm * kvm,long ext)515 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
516 {
517 int r;
518
519 switch (ext) {
520 case KVM_CAP_S390_PSW:
521 case KVM_CAP_S390_GMAP:
522 case KVM_CAP_SYNC_MMU:
523 #ifdef CONFIG_KVM_S390_UCONTROL
524 case KVM_CAP_S390_UCONTROL:
525 #endif
526 case KVM_CAP_ASYNC_PF:
527 case KVM_CAP_SYNC_REGS:
528 case KVM_CAP_ONE_REG:
529 case KVM_CAP_ENABLE_CAP:
530 case KVM_CAP_S390_CSS_SUPPORT:
531 case KVM_CAP_IOEVENTFD:
532 case KVM_CAP_DEVICE_CTRL:
533 case KVM_CAP_S390_IRQCHIP:
534 case KVM_CAP_VM_ATTRIBUTES:
535 case KVM_CAP_MP_STATE:
536 case KVM_CAP_IMMEDIATE_EXIT:
537 case KVM_CAP_S390_INJECT_IRQ:
538 case KVM_CAP_S390_USER_SIGP:
539 case KVM_CAP_S390_USER_STSI:
540 case KVM_CAP_S390_SKEYS:
541 case KVM_CAP_S390_IRQ_STATE:
542 case KVM_CAP_S390_USER_INSTR0:
543 case KVM_CAP_S390_CMMA_MIGRATION:
544 case KVM_CAP_S390_AIS:
545 case KVM_CAP_S390_AIS_MIGRATION:
546 case KVM_CAP_S390_VCPU_RESETS:
547 case KVM_CAP_SET_GUEST_DEBUG:
548 case KVM_CAP_S390_DIAG318:
549 r = 1;
550 break;
551 case KVM_CAP_S390_HPAGE_1M:
552 r = 0;
553 if (hpage && !kvm_is_ucontrol(kvm))
554 r = 1;
555 break;
556 case KVM_CAP_S390_MEM_OP:
557 r = MEM_OP_MAX_SIZE;
558 break;
559 case KVM_CAP_NR_VCPUS:
560 case KVM_CAP_MAX_VCPUS:
561 case KVM_CAP_MAX_VCPU_ID:
562 r = KVM_S390_BSCA_CPU_SLOTS;
563 if (!kvm_s390_use_sca_entries())
564 r = KVM_MAX_VCPUS;
565 else if (sclp.has_esca && sclp.has_64bscao)
566 r = KVM_S390_ESCA_CPU_SLOTS;
567 break;
568 case KVM_CAP_S390_COW:
569 r = MACHINE_HAS_ESOP;
570 break;
571 case KVM_CAP_S390_VECTOR_REGISTERS:
572 r = MACHINE_HAS_VX;
573 break;
574 case KVM_CAP_S390_RI:
575 r = test_facility(64);
576 break;
577 case KVM_CAP_S390_GS:
578 r = test_facility(133);
579 break;
580 case KVM_CAP_S390_BPB:
581 r = test_facility(82);
582 break;
583 case KVM_CAP_S390_PROTECTED:
584 r = is_prot_virt_host();
585 break;
586 default:
587 r = 0;
588 }
589 return r;
590 }
591
kvm_arch_sync_dirty_log(struct kvm * kvm,struct kvm_memory_slot * memslot)592 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
593 {
594 int i;
595 gfn_t cur_gfn, last_gfn;
596 unsigned long gaddr, vmaddr;
597 struct gmap *gmap = kvm->arch.gmap;
598 DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
599
600 /* Loop over all guest segments */
601 cur_gfn = memslot->base_gfn;
602 last_gfn = memslot->base_gfn + memslot->npages;
603 for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
604 gaddr = gfn_to_gpa(cur_gfn);
605 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
606 if (kvm_is_error_hva(vmaddr))
607 continue;
608
609 bitmap_zero(bitmap, _PAGE_ENTRIES);
610 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
611 for (i = 0; i < _PAGE_ENTRIES; i++) {
612 if (test_bit(i, bitmap))
613 mark_page_dirty(kvm, cur_gfn + i);
614 }
615
616 if (fatal_signal_pending(current))
617 return;
618 cond_resched();
619 }
620 }
621
622 /* Section: vm related */
623 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
624
625 /*
626 * Get (and clear) the dirty memory log for a memory slot.
627 */
kvm_vm_ioctl_get_dirty_log(struct kvm * kvm,struct kvm_dirty_log * log)628 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
629 struct kvm_dirty_log *log)
630 {
631 int r;
632 unsigned long n;
633 struct kvm_memory_slot *memslot;
634 int is_dirty;
635
636 if (kvm_is_ucontrol(kvm))
637 return -EINVAL;
638
639 mutex_lock(&kvm->slots_lock);
640
641 r = -EINVAL;
642 if (log->slot >= KVM_USER_MEM_SLOTS)
643 goto out;
644
645 r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
646 if (r)
647 goto out;
648
649 /* Clear the dirty log */
650 if (is_dirty) {
651 n = kvm_dirty_bitmap_bytes(memslot);
652 memset(memslot->dirty_bitmap, 0, n);
653 }
654 r = 0;
655 out:
656 mutex_unlock(&kvm->slots_lock);
657 return r;
658 }
659
icpt_operexc_on_all_vcpus(struct kvm * kvm)660 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
661 {
662 unsigned int i;
663 struct kvm_vcpu *vcpu;
664
665 kvm_for_each_vcpu(i, vcpu, kvm) {
666 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
667 }
668 }
669
kvm_vm_ioctl_enable_cap(struct kvm * kvm,struct kvm_enable_cap * cap)670 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
671 {
672 int r;
673
674 if (cap->flags)
675 return -EINVAL;
676
677 switch (cap->cap) {
678 case KVM_CAP_S390_IRQCHIP:
679 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
680 kvm->arch.use_irqchip = 1;
681 r = 0;
682 break;
683 case KVM_CAP_S390_USER_SIGP:
684 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
685 kvm->arch.user_sigp = 1;
686 r = 0;
687 break;
688 case KVM_CAP_S390_VECTOR_REGISTERS:
689 mutex_lock(&kvm->lock);
690 if (kvm->created_vcpus) {
691 r = -EBUSY;
692 } else if (MACHINE_HAS_VX) {
693 set_kvm_facility(kvm->arch.model.fac_mask, 129);
694 set_kvm_facility(kvm->arch.model.fac_list, 129);
695 if (test_facility(134)) {
696 set_kvm_facility(kvm->arch.model.fac_mask, 134);
697 set_kvm_facility(kvm->arch.model.fac_list, 134);
698 }
699 if (test_facility(135)) {
700 set_kvm_facility(kvm->arch.model.fac_mask, 135);
701 set_kvm_facility(kvm->arch.model.fac_list, 135);
702 }
703 if (test_facility(148)) {
704 set_kvm_facility(kvm->arch.model.fac_mask, 148);
705 set_kvm_facility(kvm->arch.model.fac_list, 148);
706 }
707 if (test_facility(152)) {
708 set_kvm_facility(kvm->arch.model.fac_mask, 152);
709 set_kvm_facility(kvm->arch.model.fac_list, 152);
710 }
711 r = 0;
712 } else
713 r = -EINVAL;
714 mutex_unlock(&kvm->lock);
715 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
716 r ? "(not available)" : "(success)");
717 break;
718 case KVM_CAP_S390_RI:
719 r = -EINVAL;
720 mutex_lock(&kvm->lock);
721 if (kvm->created_vcpus) {
722 r = -EBUSY;
723 } else if (test_facility(64)) {
724 set_kvm_facility(kvm->arch.model.fac_mask, 64);
725 set_kvm_facility(kvm->arch.model.fac_list, 64);
726 r = 0;
727 }
728 mutex_unlock(&kvm->lock);
729 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
730 r ? "(not available)" : "(success)");
731 break;
732 case KVM_CAP_S390_AIS:
733 mutex_lock(&kvm->lock);
734 if (kvm->created_vcpus) {
735 r = -EBUSY;
736 } else {
737 set_kvm_facility(kvm->arch.model.fac_mask, 72);
738 set_kvm_facility(kvm->arch.model.fac_list, 72);
739 r = 0;
740 }
741 mutex_unlock(&kvm->lock);
742 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
743 r ? "(not available)" : "(success)");
744 break;
745 case KVM_CAP_S390_GS:
746 r = -EINVAL;
747 mutex_lock(&kvm->lock);
748 if (kvm->created_vcpus) {
749 r = -EBUSY;
750 } else if (test_facility(133)) {
751 set_kvm_facility(kvm->arch.model.fac_mask, 133);
752 set_kvm_facility(kvm->arch.model.fac_list, 133);
753 r = 0;
754 }
755 mutex_unlock(&kvm->lock);
756 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
757 r ? "(not available)" : "(success)");
758 break;
759 case KVM_CAP_S390_HPAGE_1M:
760 mutex_lock(&kvm->lock);
761 if (kvm->created_vcpus)
762 r = -EBUSY;
763 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
764 r = -EINVAL;
765 else {
766 r = 0;
767 mmap_write_lock(kvm->mm);
768 kvm->mm->context.allow_gmap_hpage_1m = 1;
769 mmap_write_unlock(kvm->mm);
770 /*
771 * We might have to create fake 4k page
772 * tables. To avoid that the hardware works on
773 * stale PGSTEs, we emulate these instructions.
774 */
775 kvm->arch.use_skf = 0;
776 kvm->arch.use_pfmfi = 0;
777 }
778 mutex_unlock(&kvm->lock);
779 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
780 r ? "(not available)" : "(success)");
781 break;
782 case KVM_CAP_S390_USER_STSI:
783 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
784 kvm->arch.user_stsi = 1;
785 r = 0;
786 break;
787 case KVM_CAP_S390_USER_INSTR0:
788 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
789 kvm->arch.user_instr0 = 1;
790 icpt_operexc_on_all_vcpus(kvm);
791 r = 0;
792 break;
793 default:
794 r = -EINVAL;
795 break;
796 }
797 return r;
798 }
799
kvm_s390_get_mem_control(struct kvm * kvm,struct kvm_device_attr * attr)800 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
801 {
802 int ret;
803
804 switch (attr->attr) {
805 case KVM_S390_VM_MEM_LIMIT_SIZE:
806 ret = 0;
807 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
808 kvm->arch.mem_limit);
809 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
810 ret = -EFAULT;
811 break;
812 default:
813 ret = -ENXIO;
814 break;
815 }
816 return ret;
817 }
818
kvm_s390_set_mem_control(struct kvm * kvm,struct kvm_device_attr * attr)819 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
820 {
821 int ret;
822 unsigned int idx;
823 switch (attr->attr) {
824 case KVM_S390_VM_MEM_ENABLE_CMMA:
825 ret = -ENXIO;
826 if (!sclp.has_cmma)
827 break;
828
829 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
830 mutex_lock(&kvm->lock);
831 if (kvm->created_vcpus)
832 ret = -EBUSY;
833 else if (kvm->mm->context.allow_gmap_hpage_1m)
834 ret = -EINVAL;
835 else {
836 kvm->arch.use_cmma = 1;
837 /* Not compatible with cmma. */
838 kvm->arch.use_pfmfi = 0;
839 ret = 0;
840 }
841 mutex_unlock(&kvm->lock);
842 break;
843 case KVM_S390_VM_MEM_CLR_CMMA:
844 ret = -ENXIO;
845 if (!sclp.has_cmma)
846 break;
847 ret = -EINVAL;
848 if (!kvm->arch.use_cmma)
849 break;
850
851 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
852 mutex_lock(&kvm->lock);
853 idx = srcu_read_lock(&kvm->srcu);
854 s390_reset_cmma(kvm->arch.gmap->mm);
855 srcu_read_unlock(&kvm->srcu, idx);
856 mutex_unlock(&kvm->lock);
857 ret = 0;
858 break;
859 case KVM_S390_VM_MEM_LIMIT_SIZE: {
860 unsigned long new_limit;
861
862 if (kvm_is_ucontrol(kvm))
863 return -EINVAL;
864
865 if (get_user(new_limit, (u64 __user *)attr->addr))
866 return -EFAULT;
867
868 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
869 new_limit > kvm->arch.mem_limit)
870 return -E2BIG;
871
872 if (!new_limit)
873 return -EINVAL;
874
875 /* gmap_create takes last usable address */
876 if (new_limit != KVM_S390_NO_MEM_LIMIT)
877 new_limit -= 1;
878
879 ret = -EBUSY;
880 mutex_lock(&kvm->lock);
881 if (!kvm->created_vcpus) {
882 /* gmap_create will round the limit up */
883 struct gmap *new = gmap_create(current->mm, new_limit);
884
885 if (!new) {
886 ret = -ENOMEM;
887 } else {
888 gmap_remove(kvm->arch.gmap);
889 new->private = kvm;
890 kvm->arch.gmap = new;
891 ret = 0;
892 }
893 }
894 mutex_unlock(&kvm->lock);
895 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
896 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
897 (void *) kvm->arch.gmap->asce);
898 break;
899 }
900 default:
901 ret = -ENXIO;
902 break;
903 }
904 return ret;
905 }
906
907 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
908
kvm_s390_vcpu_crypto_reset_all(struct kvm * kvm)909 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
910 {
911 struct kvm_vcpu *vcpu;
912 int i;
913
914 kvm_s390_vcpu_block_all(kvm);
915
916 kvm_for_each_vcpu(i, vcpu, kvm) {
917 kvm_s390_vcpu_crypto_setup(vcpu);
918 /* recreate the shadow crycb by leaving the VSIE handler */
919 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
920 }
921
922 kvm_s390_vcpu_unblock_all(kvm);
923 }
924
kvm_s390_vm_set_crypto(struct kvm * kvm,struct kvm_device_attr * attr)925 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
926 {
927 mutex_lock(&kvm->lock);
928 switch (attr->attr) {
929 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
930 if (!test_kvm_facility(kvm, 76)) {
931 mutex_unlock(&kvm->lock);
932 return -EINVAL;
933 }
934 get_random_bytes(
935 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
936 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
937 kvm->arch.crypto.aes_kw = 1;
938 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
939 break;
940 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
941 if (!test_kvm_facility(kvm, 76)) {
942 mutex_unlock(&kvm->lock);
943 return -EINVAL;
944 }
945 get_random_bytes(
946 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
947 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
948 kvm->arch.crypto.dea_kw = 1;
949 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
950 break;
951 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
952 if (!test_kvm_facility(kvm, 76)) {
953 mutex_unlock(&kvm->lock);
954 return -EINVAL;
955 }
956 kvm->arch.crypto.aes_kw = 0;
957 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
958 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
959 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
960 break;
961 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
962 if (!test_kvm_facility(kvm, 76)) {
963 mutex_unlock(&kvm->lock);
964 return -EINVAL;
965 }
966 kvm->arch.crypto.dea_kw = 0;
967 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
968 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
969 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
970 break;
971 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
972 if (!ap_instructions_available()) {
973 mutex_unlock(&kvm->lock);
974 return -EOPNOTSUPP;
975 }
976 kvm->arch.crypto.apie = 1;
977 break;
978 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
979 if (!ap_instructions_available()) {
980 mutex_unlock(&kvm->lock);
981 return -EOPNOTSUPP;
982 }
983 kvm->arch.crypto.apie = 0;
984 break;
985 default:
986 mutex_unlock(&kvm->lock);
987 return -ENXIO;
988 }
989
990 kvm_s390_vcpu_crypto_reset_all(kvm);
991 mutex_unlock(&kvm->lock);
992 return 0;
993 }
994
kvm_s390_sync_request_broadcast(struct kvm * kvm,int req)995 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
996 {
997 int cx;
998 struct kvm_vcpu *vcpu;
999
1000 kvm_for_each_vcpu(cx, vcpu, kvm)
1001 kvm_s390_sync_request(req, vcpu);
1002 }
1003
1004 /*
1005 * Must be called with kvm->srcu held to avoid races on memslots, and with
1006 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1007 */
kvm_s390_vm_start_migration(struct kvm * kvm)1008 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1009 {
1010 struct kvm_memory_slot *ms;
1011 struct kvm_memslots *slots;
1012 unsigned long ram_pages = 0;
1013 int slotnr;
1014
1015 /* migration mode already enabled */
1016 if (kvm->arch.migration_mode)
1017 return 0;
1018 slots = kvm_memslots(kvm);
1019 if (!slots || !slots->used_slots)
1020 return -EINVAL;
1021
1022 if (!kvm->arch.use_cmma) {
1023 kvm->arch.migration_mode = 1;
1024 return 0;
1025 }
1026 /* mark all the pages in active slots as dirty */
1027 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1028 ms = slots->memslots + slotnr;
1029 if (!ms->dirty_bitmap)
1030 return -EINVAL;
1031 /*
1032 * The second half of the bitmap is only used on x86,
1033 * and would be wasted otherwise, so we put it to good
1034 * use here to keep track of the state of the storage
1035 * attributes.
1036 */
1037 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1038 ram_pages += ms->npages;
1039 }
1040 atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1041 kvm->arch.migration_mode = 1;
1042 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1043 return 0;
1044 }
1045
1046 /*
1047 * Must be called with kvm->slots_lock to avoid races with ourselves and
1048 * kvm_s390_vm_start_migration.
1049 */
kvm_s390_vm_stop_migration(struct kvm * kvm)1050 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1051 {
1052 /* migration mode already disabled */
1053 if (!kvm->arch.migration_mode)
1054 return 0;
1055 kvm->arch.migration_mode = 0;
1056 if (kvm->arch.use_cmma)
1057 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1058 return 0;
1059 }
1060
kvm_s390_vm_set_migration(struct kvm * kvm,struct kvm_device_attr * attr)1061 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1062 struct kvm_device_attr *attr)
1063 {
1064 int res = -ENXIO;
1065
1066 mutex_lock(&kvm->slots_lock);
1067 switch (attr->attr) {
1068 case KVM_S390_VM_MIGRATION_START:
1069 res = kvm_s390_vm_start_migration(kvm);
1070 break;
1071 case KVM_S390_VM_MIGRATION_STOP:
1072 res = kvm_s390_vm_stop_migration(kvm);
1073 break;
1074 default:
1075 break;
1076 }
1077 mutex_unlock(&kvm->slots_lock);
1078
1079 return res;
1080 }
1081
kvm_s390_vm_get_migration(struct kvm * kvm,struct kvm_device_attr * attr)1082 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1083 struct kvm_device_attr *attr)
1084 {
1085 u64 mig = kvm->arch.migration_mode;
1086
1087 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1088 return -ENXIO;
1089
1090 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1091 return -EFAULT;
1092 return 0;
1093 }
1094
kvm_s390_set_tod_ext(struct kvm * kvm,struct kvm_device_attr * attr)1095 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1096 {
1097 struct kvm_s390_vm_tod_clock gtod;
1098
1099 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
1100 return -EFAULT;
1101
1102 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1103 return -EINVAL;
1104 kvm_s390_set_tod_clock(kvm, >od);
1105
1106 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1107 gtod.epoch_idx, gtod.tod);
1108
1109 return 0;
1110 }
1111
kvm_s390_set_tod_high(struct kvm * kvm,struct kvm_device_attr * attr)1112 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1113 {
1114 u8 gtod_high;
1115
1116 if (copy_from_user(>od_high, (void __user *)attr->addr,
1117 sizeof(gtod_high)))
1118 return -EFAULT;
1119
1120 if (gtod_high != 0)
1121 return -EINVAL;
1122 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1123
1124 return 0;
1125 }
1126
kvm_s390_set_tod_low(struct kvm * kvm,struct kvm_device_attr * attr)1127 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1128 {
1129 struct kvm_s390_vm_tod_clock gtod = { 0 };
1130
1131 if (copy_from_user(>od.tod, (void __user *)attr->addr,
1132 sizeof(gtod.tod)))
1133 return -EFAULT;
1134
1135 kvm_s390_set_tod_clock(kvm, >od);
1136 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1137 return 0;
1138 }
1139
kvm_s390_set_tod(struct kvm * kvm,struct kvm_device_attr * attr)1140 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1141 {
1142 int ret;
1143
1144 if (attr->flags)
1145 return -EINVAL;
1146
1147 switch (attr->attr) {
1148 case KVM_S390_VM_TOD_EXT:
1149 ret = kvm_s390_set_tod_ext(kvm, attr);
1150 break;
1151 case KVM_S390_VM_TOD_HIGH:
1152 ret = kvm_s390_set_tod_high(kvm, attr);
1153 break;
1154 case KVM_S390_VM_TOD_LOW:
1155 ret = kvm_s390_set_tod_low(kvm, attr);
1156 break;
1157 default:
1158 ret = -ENXIO;
1159 break;
1160 }
1161 return ret;
1162 }
1163
kvm_s390_get_tod_clock(struct kvm * kvm,struct kvm_s390_vm_tod_clock * gtod)1164 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1165 struct kvm_s390_vm_tod_clock *gtod)
1166 {
1167 struct kvm_s390_tod_clock_ext htod;
1168
1169 preempt_disable();
1170
1171 get_tod_clock_ext((char *)&htod);
1172
1173 gtod->tod = htod.tod + kvm->arch.epoch;
1174 gtod->epoch_idx = 0;
1175 if (test_kvm_facility(kvm, 139)) {
1176 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1177 if (gtod->tod < htod.tod)
1178 gtod->epoch_idx += 1;
1179 }
1180
1181 preempt_enable();
1182 }
1183
kvm_s390_get_tod_ext(struct kvm * kvm,struct kvm_device_attr * attr)1184 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1185 {
1186 struct kvm_s390_vm_tod_clock gtod;
1187
1188 memset(>od, 0, sizeof(gtod));
1189 kvm_s390_get_tod_clock(kvm, >od);
1190 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1191 return -EFAULT;
1192
1193 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1194 gtod.epoch_idx, gtod.tod);
1195 return 0;
1196 }
1197
kvm_s390_get_tod_high(struct kvm * kvm,struct kvm_device_attr * attr)1198 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1199 {
1200 u8 gtod_high = 0;
1201
1202 if (copy_to_user((void __user *)attr->addr, >od_high,
1203 sizeof(gtod_high)))
1204 return -EFAULT;
1205 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1206
1207 return 0;
1208 }
1209
kvm_s390_get_tod_low(struct kvm * kvm,struct kvm_device_attr * attr)1210 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1211 {
1212 u64 gtod;
1213
1214 gtod = kvm_s390_get_tod_clock_fast(kvm);
1215 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1216 return -EFAULT;
1217 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1218
1219 return 0;
1220 }
1221
kvm_s390_get_tod(struct kvm * kvm,struct kvm_device_attr * attr)1222 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1223 {
1224 int ret;
1225
1226 if (attr->flags)
1227 return -EINVAL;
1228
1229 switch (attr->attr) {
1230 case KVM_S390_VM_TOD_EXT:
1231 ret = kvm_s390_get_tod_ext(kvm, attr);
1232 break;
1233 case KVM_S390_VM_TOD_HIGH:
1234 ret = kvm_s390_get_tod_high(kvm, attr);
1235 break;
1236 case KVM_S390_VM_TOD_LOW:
1237 ret = kvm_s390_get_tod_low(kvm, attr);
1238 break;
1239 default:
1240 ret = -ENXIO;
1241 break;
1242 }
1243 return ret;
1244 }
1245
kvm_s390_set_processor(struct kvm * kvm,struct kvm_device_attr * attr)1246 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1247 {
1248 struct kvm_s390_vm_cpu_processor *proc;
1249 u16 lowest_ibc, unblocked_ibc;
1250 int ret = 0;
1251
1252 mutex_lock(&kvm->lock);
1253 if (kvm->created_vcpus) {
1254 ret = -EBUSY;
1255 goto out;
1256 }
1257 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1258 if (!proc) {
1259 ret = -ENOMEM;
1260 goto out;
1261 }
1262 if (!copy_from_user(proc, (void __user *)attr->addr,
1263 sizeof(*proc))) {
1264 kvm->arch.model.cpuid = proc->cpuid;
1265 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1266 unblocked_ibc = sclp.ibc & 0xfff;
1267 if (lowest_ibc && proc->ibc) {
1268 if (proc->ibc > unblocked_ibc)
1269 kvm->arch.model.ibc = unblocked_ibc;
1270 else if (proc->ibc < lowest_ibc)
1271 kvm->arch.model.ibc = lowest_ibc;
1272 else
1273 kvm->arch.model.ibc = proc->ibc;
1274 }
1275 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1276 S390_ARCH_FAC_LIST_SIZE_BYTE);
1277 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1278 kvm->arch.model.ibc,
1279 kvm->arch.model.cpuid);
1280 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1281 kvm->arch.model.fac_list[0],
1282 kvm->arch.model.fac_list[1],
1283 kvm->arch.model.fac_list[2]);
1284 } else
1285 ret = -EFAULT;
1286 kfree(proc);
1287 out:
1288 mutex_unlock(&kvm->lock);
1289 return ret;
1290 }
1291
kvm_s390_set_processor_feat(struct kvm * kvm,struct kvm_device_attr * attr)1292 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1293 struct kvm_device_attr *attr)
1294 {
1295 struct kvm_s390_vm_cpu_feat data;
1296
1297 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1298 return -EFAULT;
1299 if (!bitmap_subset((unsigned long *) data.feat,
1300 kvm_s390_available_cpu_feat,
1301 KVM_S390_VM_CPU_FEAT_NR_BITS))
1302 return -EINVAL;
1303
1304 mutex_lock(&kvm->lock);
1305 if (kvm->created_vcpus) {
1306 mutex_unlock(&kvm->lock);
1307 return -EBUSY;
1308 }
1309 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1310 KVM_S390_VM_CPU_FEAT_NR_BITS);
1311 mutex_unlock(&kvm->lock);
1312 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1313 data.feat[0],
1314 data.feat[1],
1315 data.feat[2]);
1316 return 0;
1317 }
1318
kvm_s390_set_processor_subfunc(struct kvm * kvm,struct kvm_device_attr * attr)1319 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1320 struct kvm_device_attr *attr)
1321 {
1322 mutex_lock(&kvm->lock);
1323 if (kvm->created_vcpus) {
1324 mutex_unlock(&kvm->lock);
1325 return -EBUSY;
1326 }
1327
1328 if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1329 sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1330 mutex_unlock(&kvm->lock);
1331 return -EFAULT;
1332 }
1333 mutex_unlock(&kvm->lock);
1334
1335 VM_EVENT(kvm, 3, "SET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1336 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1337 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1338 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1339 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1340 VM_EVENT(kvm, 3, "SET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1341 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1342 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1343 VM_EVENT(kvm, 3, "SET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1344 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1345 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1346 VM_EVENT(kvm, 3, "SET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1347 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1348 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1349 VM_EVENT(kvm, 3, "SET: guest KM subfunc 0x%16.16lx.%16.16lx",
1350 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1351 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1352 VM_EVENT(kvm, 3, "SET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1353 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1354 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1355 VM_EVENT(kvm, 3, "SET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1356 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1357 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1358 VM_EVENT(kvm, 3, "SET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1359 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1360 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1361 VM_EVENT(kvm, 3, "SET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1362 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1363 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1364 VM_EVENT(kvm, 3, "SET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1365 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1366 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1367 VM_EVENT(kvm, 3, "SET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1368 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1369 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1370 VM_EVENT(kvm, 3, "SET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1371 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1372 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1373 VM_EVENT(kvm, 3, "SET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1374 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1375 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1376 VM_EVENT(kvm, 3, "SET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1377 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1378 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1379 VM_EVENT(kvm, 3, "SET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
1380 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1381 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1382 VM_EVENT(kvm, 3, "SET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1383 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1384 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1385 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1386 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1387 VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1388 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1389 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1390 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1391 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1392
1393 return 0;
1394 }
1395
kvm_s390_set_cpu_model(struct kvm * kvm,struct kvm_device_attr * attr)1396 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1397 {
1398 int ret = -ENXIO;
1399
1400 switch (attr->attr) {
1401 case KVM_S390_VM_CPU_PROCESSOR:
1402 ret = kvm_s390_set_processor(kvm, attr);
1403 break;
1404 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1405 ret = kvm_s390_set_processor_feat(kvm, attr);
1406 break;
1407 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1408 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1409 break;
1410 }
1411 return ret;
1412 }
1413
kvm_s390_get_processor(struct kvm * kvm,struct kvm_device_attr * attr)1414 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1415 {
1416 struct kvm_s390_vm_cpu_processor *proc;
1417 int ret = 0;
1418
1419 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1420 if (!proc) {
1421 ret = -ENOMEM;
1422 goto out;
1423 }
1424 proc->cpuid = kvm->arch.model.cpuid;
1425 proc->ibc = kvm->arch.model.ibc;
1426 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1427 S390_ARCH_FAC_LIST_SIZE_BYTE);
1428 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1429 kvm->arch.model.ibc,
1430 kvm->arch.model.cpuid);
1431 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1432 kvm->arch.model.fac_list[0],
1433 kvm->arch.model.fac_list[1],
1434 kvm->arch.model.fac_list[2]);
1435 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1436 ret = -EFAULT;
1437 kfree(proc);
1438 out:
1439 return ret;
1440 }
1441
kvm_s390_get_machine(struct kvm * kvm,struct kvm_device_attr * attr)1442 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1443 {
1444 struct kvm_s390_vm_cpu_machine *mach;
1445 int ret = 0;
1446
1447 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1448 if (!mach) {
1449 ret = -ENOMEM;
1450 goto out;
1451 }
1452 get_cpu_id((struct cpuid *) &mach->cpuid);
1453 mach->ibc = sclp.ibc;
1454 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1455 S390_ARCH_FAC_LIST_SIZE_BYTE);
1456 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1457 sizeof(S390_lowcore.stfle_fac_list));
1458 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1459 kvm->arch.model.ibc,
1460 kvm->arch.model.cpuid);
1461 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1462 mach->fac_mask[0],
1463 mach->fac_mask[1],
1464 mach->fac_mask[2]);
1465 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1466 mach->fac_list[0],
1467 mach->fac_list[1],
1468 mach->fac_list[2]);
1469 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1470 ret = -EFAULT;
1471 kfree(mach);
1472 out:
1473 return ret;
1474 }
1475
kvm_s390_get_processor_feat(struct kvm * kvm,struct kvm_device_attr * attr)1476 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1477 struct kvm_device_attr *attr)
1478 {
1479 struct kvm_s390_vm_cpu_feat data;
1480
1481 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1482 KVM_S390_VM_CPU_FEAT_NR_BITS);
1483 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1484 return -EFAULT;
1485 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1486 data.feat[0],
1487 data.feat[1],
1488 data.feat[2]);
1489 return 0;
1490 }
1491
kvm_s390_get_machine_feat(struct kvm * kvm,struct kvm_device_attr * attr)1492 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1493 struct kvm_device_attr *attr)
1494 {
1495 struct kvm_s390_vm_cpu_feat data;
1496
1497 bitmap_copy((unsigned long *) data.feat,
1498 kvm_s390_available_cpu_feat,
1499 KVM_S390_VM_CPU_FEAT_NR_BITS);
1500 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1501 return -EFAULT;
1502 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1503 data.feat[0],
1504 data.feat[1],
1505 data.feat[2]);
1506 return 0;
1507 }
1508
kvm_s390_get_processor_subfunc(struct kvm * kvm,struct kvm_device_attr * attr)1509 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1510 struct kvm_device_attr *attr)
1511 {
1512 if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1513 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1514 return -EFAULT;
1515
1516 VM_EVENT(kvm, 3, "GET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1517 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1518 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1519 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1520 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1521 VM_EVENT(kvm, 3, "GET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1522 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1523 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1524 VM_EVENT(kvm, 3, "GET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1525 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1526 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1527 VM_EVENT(kvm, 3, "GET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1528 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1529 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1530 VM_EVENT(kvm, 3, "GET: guest KM subfunc 0x%16.16lx.%16.16lx",
1531 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1532 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1533 VM_EVENT(kvm, 3, "GET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1534 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1535 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1536 VM_EVENT(kvm, 3, "GET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1537 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1538 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1539 VM_EVENT(kvm, 3, "GET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1540 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1541 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1542 VM_EVENT(kvm, 3, "GET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1543 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1544 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1545 VM_EVENT(kvm, 3, "GET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1546 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1547 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1548 VM_EVENT(kvm, 3, "GET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1549 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1550 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1551 VM_EVENT(kvm, 3, "GET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1552 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1553 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1554 VM_EVENT(kvm, 3, "GET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1555 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1556 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1557 VM_EVENT(kvm, 3, "GET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1558 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1559 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1560 VM_EVENT(kvm, 3, "GET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
1561 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1562 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1563 VM_EVENT(kvm, 3, "GET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1564 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1565 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1566 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1567 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1568 VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1569 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1570 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1571 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1572 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1573
1574 return 0;
1575 }
1576
kvm_s390_get_machine_subfunc(struct kvm * kvm,struct kvm_device_attr * attr)1577 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1578 struct kvm_device_attr *attr)
1579 {
1580 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1581 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1582 return -EFAULT;
1583
1584 VM_EVENT(kvm, 3, "GET: host PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1585 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1586 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1587 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1588 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1589 VM_EVENT(kvm, 3, "GET: host PTFF subfunc 0x%16.16lx.%16.16lx",
1590 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1591 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1592 VM_EVENT(kvm, 3, "GET: host KMAC subfunc 0x%16.16lx.%16.16lx",
1593 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1594 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1595 VM_EVENT(kvm, 3, "GET: host KMC subfunc 0x%16.16lx.%16.16lx",
1596 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1597 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1598 VM_EVENT(kvm, 3, "GET: host KM subfunc 0x%16.16lx.%16.16lx",
1599 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1600 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1601 VM_EVENT(kvm, 3, "GET: host KIMD subfunc 0x%16.16lx.%16.16lx",
1602 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1603 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1604 VM_EVENT(kvm, 3, "GET: host KLMD subfunc 0x%16.16lx.%16.16lx",
1605 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1606 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1607 VM_EVENT(kvm, 3, "GET: host PCKMO subfunc 0x%16.16lx.%16.16lx",
1608 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1609 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1610 VM_EVENT(kvm, 3, "GET: host KMCTR subfunc 0x%16.16lx.%16.16lx",
1611 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1612 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1613 VM_EVENT(kvm, 3, "GET: host KMF subfunc 0x%16.16lx.%16.16lx",
1614 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1615 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1616 VM_EVENT(kvm, 3, "GET: host KMO subfunc 0x%16.16lx.%16.16lx",
1617 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1618 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1619 VM_EVENT(kvm, 3, "GET: host PCC subfunc 0x%16.16lx.%16.16lx",
1620 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1621 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1622 VM_EVENT(kvm, 3, "GET: host PPNO subfunc 0x%16.16lx.%16.16lx",
1623 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1624 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1625 VM_EVENT(kvm, 3, "GET: host KMA subfunc 0x%16.16lx.%16.16lx",
1626 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1627 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1628 VM_EVENT(kvm, 3, "GET: host KDSA subfunc 0x%16.16lx.%16.16lx",
1629 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1630 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1631 VM_EVENT(kvm, 3, "GET: host SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1632 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1633 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1634 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1635 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1636 VM_EVENT(kvm, 3, "GET: host DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1637 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1638 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1639 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1640 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1641
1642 return 0;
1643 }
1644
kvm_s390_get_cpu_model(struct kvm * kvm,struct kvm_device_attr * attr)1645 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1646 {
1647 int ret = -ENXIO;
1648
1649 switch (attr->attr) {
1650 case KVM_S390_VM_CPU_PROCESSOR:
1651 ret = kvm_s390_get_processor(kvm, attr);
1652 break;
1653 case KVM_S390_VM_CPU_MACHINE:
1654 ret = kvm_s390_get_machine(kvm, attr);
1655 break;
1656 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1657 ret = kvm_s390_get_processor_feat(kvm, attr);
1658 break;
1659 case KVM_S390_VM_CPU_MACHINE_FEAT:
1660 ret = kvm_s390_get_machine_feat(kvm, attr);
1661 break;
1662 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1663 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1664 break;
1665 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1666 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1667 break;
1668 }
1669 return ret;
1670 }
1671
kvm_s390_vm_set_attr(struct kvm * kvm,struct kvm_device_attr * attr)1672 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1673 {
1674 int ret;
1675
1676 switch (attr->group) {
1677 case KVM_S390_VM_MEM_CTRL:
1678 ret = kvm_s390_set_mem_control(kvm, attr);
1679 break;
1680 case KVM_S390_VM_TOD:
1681 ret = kvm_s390_set_tod(kvm, attr);
1682 break;
1683 case KVM_S390_VM_CPU_MODEL:
1684 ret = kvm_s390_set_cpu_model(kvm, attr);
1685 break;
1686 case KVM_S390_VM_CRYPTO:
1687 ret = kvm_s390_vm_set_crypto(kvm, attr);
1688 break;
1689 case KVM_S390_VM_MIGRATION:
1690 ret = kvm_s390_vm_set_migration(kvm, attr);
1691 break;
1692 default:
1693 ret = -ENXIO;
1694 break;
1695 }
1696
1697 return ret;
1698 }
1699
kvm_s390_vm_get_attr(struct kvm * kvm,struct kvm_device_attr * attr)1700 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1701 {
1702 int ret;
1703
1704 switch (attr->group) {
1705 case KVM_S390_VM_MEM_CTRL:
1706 ret = kvm_s390_get_mem_control(kvm, attr);
1707 break;
1708 case KVM_S390_VM_TOD:
1709 ret = kvm_s390_get_tod(kvm, attr);
1710 break;
1711 case KVM_S390_VM_CPU_MODEL:
1712 ret = kvm_s390_get_cpu_model(kvm, attr);
1713 break;
1714 case KVM_S390_VM_MIGRATION:
1715 ret = kvm_s390_vm_get_migration(kvm, attr);
1716 break;
1717 default:
1718 ret = -ENXIO;
1719 break;
1720 }
1721
1722 return ret;
1723 }
1724
kvm_s390_vm_has_attr(struct kvm * kvm,struct kvm_device_attr * attr)1725 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1726 {
1727 int ret;
1728
1729 switch (attr->group) {
1730 case KVM_S390_VM_MEM_CTRL:
1731 switch (attr->attr) {
1732 case KVM_S390_VM_MEM_ENABLE_CMMA:
1733 case KVM_S390_VM_MEM_CLR_CMMA:
1734 ret = sclp.has_cmma ? 0 : -ENXIO;
1735 break;
1736 case KVM_S390_VM_MEM_LIMIT_SIZE:
1737 ret = 0;
1738 break;
1739 default:
1740 ret = -ENXIO;
1741 break;
1742 }
1743 break;
1744 case KVM_S390_VM_TOD:
1745 switch (attr->attr) {
1746 case KVM_S390_VM_TOD_LOW:
1747 case KVM_S390_VM_TOD_HIGH:
1748 ret = 0;
1749 break;
1750 default:
1751 ret = -ENXIO;
1752 break;
1753 }
1754 break;
1755 case KVM_S390_VM_CPU_MODEL:
1756 switch (attr->attr) {
1757 case KVM_S390_VM_CPU_PROCESSOR:
1758 case KVM_S390_VM_CPU_MACHINE:
1759 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1760 case KVM_S390_VM_CPU_MACHINE_FEAT:
1761 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1762 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1763 ret = 0;
1764 break;
1765 default:
1766 ret = -ENXIO;
1767 break;
1768 }
1769 break;
1770 case KVM_S390_VM_CRYPTO:
1771 switch (attr->attr) {
1772 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1773 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1774 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1775 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1776 ret = 0;
1777 break;
1778 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1779 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1780 ret = ap_instructions_available() ? 0 : -ENXIO;
1781 break;
1782 default:
1783 ret = -ENXIO;
1784 break;
1785 }
1786 break;
1787 case KVM_S390_VM_MIGRATION:
1788 ret = 0;
1789 break;
1790 default:
1791 ret = -ENXIO;
1792 break;
1793 }
1794
1795 return ret;
1796 }
1797
kvm_s390_get_skeys(struct kvm * kvm,struct kvm_s390_skeys * args)1798 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1799 {
1800 uint8_t *keys;
1801 uint64_t hva;
1802 int srcu_idx, i, r = 0;
1803
1804 if (args->flags != 0)
1805 return -EINVAL;
1806
1807 /* Is this guest using storage keys? */
1808 if (!mm_uses_skeys(current->mm))
1809 return KVM_S390_GET_SKEYS_NONE;
1810
1811 /* Enforce sane limit on memory allocation */
1812 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1813 return -EINVAL;
1814
1815 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1816 if (!keys)
1817 return -ENOMEM;
1818
1819 mmap_read_lock(current->mm);
1820 srcu_idx = srcu_read_lock(&kvm->srcu);
1821 for (i = 0; i < args->count; i++) {
1822 hva = gfn_to_hva(kvm, args->start_gfn + i);
1823 if (kvm_is_error_hva(hva)) {
1824 r = -EFAULT;
1825 break;
1826 }
1827
1828 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1829 if (r)
1830 break;
1831 }
1832 srcu_read_unlock(&kvm->srcu, srcu_idx);
1833 mmap_read_unlock(current->mm);
1834
1835 if (!r) {
1836 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1837 sizeof(uint8_t) * args->count);
1838 if (r)
1839 r = -EFAULT;
1840 }
1841
1842 kvfree(keys);
1843 return r;
1844 }
1845
kvm_s390_set_skeys(struct kvm * kvm,struct kvm_s390_skeys * args)1846 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1847 {
1848 uint8_t *keys;
1849 uint64_t hva;
1850 int srcu_idx, i, r = 0;
1851 bool unlocked;
1852
1853 if (args->flags != 0)
1854 return -EINVAL;
1855
1856 /* Enforce sane limit on memory allocation */
1857 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1858 return -EINVAL;
1859
1860 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1861 if (!keys)
1862 return -ENOMEM;
1863
1864 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1865 sizeof(uint8_t) * args->count);
1866 if (r) {
1867 r = -EFAULT;
1868 goto out;
1869 }
1870
1871 /* Enable storage key handling for the guest */
1872 r = s390_enable_skey();
1873 if (r)
1874 goto out;
1875
1876 i = 0;
1877 mmap_read_lock(current->mm);
1878 srcu_idx = srcu_read_lock(&kvm->srcu);
1879 while (i < args->count) {
1880 unlocked = false;
1881 hva = gfn_to_hva(kvm, args->start_gfn + i);
1882 if (kvm_is_error_hva(hva)) {
1883 r = -EFAULT;
1884 break;
1885 }
1886
1887 /* Lowest order bit is reserved */
1888 if (keys[i] & 0x01) {
1889 r = -EINVAL;
1890 break;
1891 }
1892
1893 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1894 if (r) {
1895 r = fixup_user_fault(current->mm, hva,
1896 FAULT_FLAG_WRITE, &unlocked);
1897 if (r)
1898 break;
1899 }
1900 if (!r)
1901 i++;
1902 }
1903 srcu_read_unlock(&kvm->srcu, srcu_idx);
1904 mmap_read_unlock(current->mm);
1905 out:
1906 kvfree(keys);
1907 return r;
1908 }
1909
1910 /*
1911 * Base address and length must be sent at the start of each block, therefore
1912 * it's cheaper to send some clean data, as long as it's less than the size of
1913 * two longs.
1914 */
1915 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1916 /* for consistency */
1917 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1918
1919 /*
1920 * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1921 * address falls in a hole. In that case the index of one of the memslots
1922 * bordering the hole is returned.
1923 */
gfn_to_memslot_approx(struct kvm_memslots * slots,gfn_t gfn)1924 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1925 {
1926 int start = 0, end = slots->used_slots;
1927 int slot = atomic_read(&slots->lru_slot);
1928 struct kvm_memory_slot *memslots = slots->memslots;
1929
1930 if (gfn >= memslots[slot].base_gfn &&
1931 gfn < memslots[slot].base_gfn + memslots[slot].npages)
1932 return slot;
1933
1934 while (start < end) {
1935 slot = start + (end - start) / 2;
1936
1937 if (gfn >= memslots[slot].base_gfn)
1938 end = slot;
1939 else
1940 start = slot + 1;
1941 }
1942
1943 if (start >= slots->used_slots)
1944 return slots->used_slots - 1;
1945
1946 if (gfn >= memslots[start].base_gfn &&
1947 gfn < memslots[start].base_gfn + memslots[start].npages) {
1948 atomic_set(&slots->lru_slot, start);
1949 }
1950
1951 return start;
1952 }
1953
kvm_s390_peek_cmma(struct kvm * kvm,struct kvm_s390_cmma_log * args,u8 * res,unsigned long bufsize)1954 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1955 u8 *res, unsigned long bufsize)
1956 {
1957 unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1958
1959 args->count = 0;
1960 while (args->count < bufsize) {
1961 hva = gfn_to_hva(kvm, cur_gfn);
1962 /*
1963 * We return an error if the first value was invalid, but we
1964 * return successfully if at least one value was copied.
1965 */
1966 if (kvm_is_error_hva(hva))
1967 return args->count ? 0 : -EFAULT;
1968 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1969 pgstev = 0;
1970 res[args->count++] = (pgstev >> 24) & 0x43;
1971 cur_gfn++;
1972 }
1973
1974 return 0;
1975 }
1976
kvm_s390_next_dirty_cmma(struct kvm_memslots * slots,unsigned long cur_gfn)1977 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1978 unsigned long cur_gfn)
1979 {
1980 int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1981 struct kvm_memory_slot *ms = slots->memslots + slotidx;
1982 unsigned long ofs = cur_gfn - ms->base_gfn;
1983
1984 if (ms->base_gfn + ms->npages <= cur_gfn) {
1985 slotidx--;
1986 /* If we are above the highest slot, wrap around */
1987 if (slotidx < 0)
1988 slotidx = slots->used_slots - 1;
1989
1990 ms = slots->memslots + slotidx;
1991 ofs = 0;
1992 }
1993 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1994 while ((slotidx > 0) && (ofs >= ms->npages)) {
1995 slotidx--;
1996 ms = slots->memslots + slotidx;
1997 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1998 }
1999 return ms->base_gfn + ofs;
2000 }
2001
kvm_s390_get_cmma(struct kvm * kvm,struct kvm_s390_cmma_log * args,u8 * res,unsigned long bufsize)2002 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2003 u8 *res, unsigned long bufsize)
2004 {
2005 unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2006 struct kvm_memslots *slots = kvm_memslots(kvm);
2007 struct kvm_memory_slot *ms;
2008
2009 if (unlikely(!slots->used_slots))
2010 return 0;
2011
2012 cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2013 ms = gfn_to_memslot(kvm, cur_gfn);
2014 args->count = 0;
2015 args->start_gfn = cur_gfn;
2016 if (!ms)
2017 return 0;
2018 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2019 mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2020
2021 while (args->count < bufsize) {
2022 hva = gfn_to_hva(kvm, cur_gfn);
2023 if (kvm_is_error_hva(hva))
2024 return 0;
2025 /* Decrement only if we actually flipped the bit to 0 */
2026 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2027 atomic64_dec(&kvm->arch.cmma_dirty_pages);
2028 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2029 pgstev = 0;
2030 /* Save the value */
2031 res[args->count++] = (pgstev >> 24) & 0x43;
2032 /* If the next bit is too far away, stop. */
2033 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2034 return 0;
2035 /* If we reached the previous "next", find the next one */
2036 if (cur_gfn == next_gfn)
2037 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2038 /* Reached the end of memory or of the buffer, stop */
2039 if ((next_gfn >= mem_end) ||
2040 (next_gfn - args->start_gfn >= bufsize))
2041 return 0;
2042 cur_gfn++;
2043 /* Reached the end of the current memslot, take the next one. */
2044 if (cur_gfn - ms->base_gfn >= ms->npages) {
2045 ms = gfn_to_memslot(kvm, cur_gfn);
2046 if (!ms)
2047 return 0;
2048 }
2049 }
2050 return 0;
2051 }
2052
2053 /*
2054 * This function searches for the next page with dirty CMMA attributes, and
2055 * saves the attributes in the buffer up to either the end of the buffer or
2056 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2057 * no trailing clean bytes are saved.
2058 * In case no dirty bits were found, or if CMMA was not enabled or used, the
2059 * output buffer will indicate 0 as length.
2060 */
kvm_s390_get_cmma_bits(struct kvm * kvm,struct kvm_s390_cmma_log * args)2061 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2062 struct kvm_s390_cmma_log *args)
2063 {
2064 unsigned long bufsize;
2065 int srcu_idx, peek, ret;
2066 u8 *values;
2067
2068 if (!kvm->arch.use_cmma)
2069 return -ENXIO;
2070 /* Invalid/unsupported flags were specified */
2071 if (args->flags & ~KVM_S390_CMMA_PEEK)
2072 return -EINVAL;
2073 /* Migration mode query, and we are not doing a migration */
2074 peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2075 if (!peek && !kvm->arch.migration_mode)
2076 return -EINVAL;
2077 /* CMMA is disabled or was not used, or the buffer has length zero */
2078 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2079 if (!bufsize || !kvm->mm->context.uses_cmm) {
2080 memset(args, 0, sizeof(*args));
2081 return 0;
2082 }
2083 /* We are not peeking, and there are no dirty pages */
2084 if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2085 memset(args, 0, sizeof(*args));
2086 return 0;
2087 }
2088
2089 values = vmalloc(bufsize);
2090 if (!values)
2091 return -ENOMEM;
2092
2093 mmap_read_lock(kvm->mm);
2094 srcu_idx = srcu_read_lock(&kvm->srcu);
2095 if (peek)
2096 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2097 else
2098 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2099 srcu_read_unlock(&kvm->srcu, srcu_idx);
2100 mmap_read_unlock(kvm->mm);
2101
2102 if (kvm->arch.migration_mode)
2103 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2104 else
2105 args->remaining = 0;
2106
2107 if (copy_to_user((void __user *)args->values, values, args->count))
2108 ret = -EFAULT;
2109
2110 vfree(values);
2111 return ret;
2112 }
2113
2114 /*
2115 * This function sets the CMMA attributes for the given pages. If the input
2116 * buffer has zero length, no action is taken, otherwise the attributes are
2117 * set and the mm->context.uses_cmm flag is set.
2118 */
kvm_s390_set_cmma_bits(struct kvm * kvm,const struct kvm_s390_cmma_log * args)2119 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2120 const struct kvm_s390_cmma_log *args)
2121 {
2122 unsigned long hva, mask, pgstev, i;
2123 uint8_t *bits;
2124 int srcu_idx, r = 0;
2125
2126 mask = args->mask;
2127
2128 if (!kvm->arch.use_cmma)
2129 return -ENXIO;
2130 /* invalid/unsupported flags */
2131 if (args->flags != 0)
2132 return -EINVAL;
2133 /* Enforce sane limit on memory allocation */
2134 if (args->count > KVM_S390_CMMA_SIZE_MAX)
2135 return -EINVAL;
2136 /* Nothing to do */
2137 if (args->count == 0)
2138 return 0;
2139
2140 bits = vmalloc(array_size(sizeof(*bits), args->count));
2141 if (!bits)
2142 return -ENOMEM;
2143
2144 r = copy_from_user(bits, (void __user *)args->values, args->count);
2145 if (r) {
2146 r = -EFAULT;
2147 goto out;
2148 }
2149
2150 mmap_read_lock(kvm->mm);
2151 srcu_idx = srcu_read_lock(&kvm->srcu);
2152 for (i = 0; i < args->count; i++) {
2153 hva = gfn_to_hva(kvm, args->start_gfn + i);
2154 if (kvm_is_error_hva(hva)) {
2155 r = -EFAULT;
2156 break;
2157 }
2158
2159 pgstev = bits[i];
2160 pgstev = pgstev << 24;
2161 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2162 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2163 }
2164 srcu_read_unlock(&kvm->srcu, srcu_idx);
2165 mmap_read_unlock(kvm->mm);
2166
2167 if (!kvm->mm->context.uses_cmm) {
2168 mmap_write_lock(kvm->mm);
2169 kvm->mm->context.uses_cmm = 1;
2170 mmap_write_unlock(kvm->mm);
2171 }
2172 out:
2173 vfree(bits);
2174 return r;
2175 }
2176
kvm_s390_cpus_from_pv(struct kvm * kvm,u16 * rcp,u16 * rrcp)2177 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2178 {
2179 struct kvm_vcpu *vcpu;
2180 u16 rc, rrc;
2181 int ret = 0;
2182 int i;
2183
2184 /*
2185 * We ignore failures and try to destroy as many CPUs as possible.
2186 * At the same time we must not free the assigned resources when
2187 * this fails, as the ultravisor has still access to that memory.
2188 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2189 * behind.
2190 * We want to return the first failure rc and rrc, though.
2191 */
2192 kvm_for_each_vcpu(i, vcpu, kvm) {
2193 mutex_lock(&vcpu->mutex);
2194 if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2195 *rcp = rc;
2196 *rrcp = rrc;
2197 ret = -EIO;
2198 }
2199 mutex_unlock(&vcpu->mutex);
2200 }
2201 return ret;
2202 }
2203
kvm_s390_cpus_to_pv(struct kvm * kvm,u16 * rc,u16 * rrc)2204 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2205 {
2206 int i, r = 0;
2207 u16 dummy;
2208
2209 struct kvm_vcpu *vcpu;
2210
2211 kvm_for_each_vcpu(i, vcpu, kvm) {
2212 mutex_lock(&vcpu->mutex);
2213 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2214 mutex_unlock(&vcpu->mutex);
2215 if (r)
2216 break;
2217 }
2218 if (r)
2219 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2220 return r;
2221 }
2222
kvm_s390_handle_pv(struct kvm * kvm,struct kvm_pv_cmd * cmd)2223 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2224 {
2225 int r = 0;
2226 u16 dummy;
2227 void __user *argp = (void __user *)cmd->data;
2228
2229 switch (cmd->cmd) {
2230 case KVM_PV_ENABLE: {
2231 r = -EINVAL;
2232 if (kvm_s390_pv_is_protected(kvm))
2233 break;
2234
2235 /*
2236 * FMT 4 SIE needs esca. As we never switch back to bsca from
2237 * esca, we need no cleanup in the error cases below
2238 */
2239 r = sca_switch_to_extended(kvm);
2240 if (r)
2241 break;
2242
2243 mmap_write_lock(current->mm);
2244 r = gmap_mark_unmergeable();
2245 mmap_write_unlock(current->mm);
2246 if (r)
2247 break;
2248
2249 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2250 if (r)
2251 break;
2252
2253 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2254 if (r)
2255 kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2256
2257 /* we need to block service interrupts from now on */
2258 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2259 break;
2260 }
2261 case KVM_PV_DISABLE: {
2262 r = -EINVAL;
2263 if (!kvm_s390_pv_is_protected(kvm))
2264 break;
2265
2266 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2267 /*
2268 * If a CPU could not be destroyed, destroy VM will also fail.
2269 * There is no point in trying to destroy it. Instead return
2270 * the rc and rrc from the first CPU that failed destroying.
2271 */
2272 if (r)
2273 break;
2274 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2275
2276 /* no need to block service interrupts any more */
2277 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2278 break;
2279 }
2280 case KVM_PV_SET_SEC_PARMS: {
2281 struct kvm_s390_pv_sec_parm parms = {};
2282 void *hdr;
2283
2284 r = -EINVAL;
2285 if (!kvm_s390_pv_is_protected(kvm))
2286 break;
2287
2288 r = -EFAULT;
2289 if (copy_from_user(&parms, argp, sizeof(parms)))
2290 break;
2291
2292 /* Currently restricted to 8KB */
2293 r = -EINVAL;
2294 if (parms.length > PAGE_SIZE * 2)
2295 break;
2296
2297 r = -ENOMEM;
2298 hdr = vmalloc(parms.length);
2299 if (!hdr)
2300 break;
2301
2302 r = -EFAULT;
2303 if (!copy_from_user(hdr, (void __user *)parms.origin,
2304 parms.length))
2305 r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2306 &cmd->rc, &cmd->rrc);
2307
2308 vfree(hdr);
2309 break;
2310 }
2311 case KVM_PV_UNPACK: {
2312 struct kvm_s390_pv_unp unp = {};
2313
2314 r = -EINVAL;
2315 if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2316 break;
2317
2318 r = -EFAULT;
2319 if (copy_from_user(&unp, argp, sizeof(unp)))
2320 break;
2321
2322 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2323 &cmd->rc, &cmd->rrc);
2324 break;
2325 }
2326 case KVM_PV_VERIFY: {
2327 r = -EINVAL;
2328 if (!kvm_s390_pv_is_protected(kvm))
2329 break;
2330
2331 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2332 UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2333 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2334 cmd->rrc);
2335 break;
2336 }
2337 case KVM_PV_PREP_RESET: {
2338 r = -EINVAL;
2339 if (!kvm_s390_pv_is_protected(kvm))
2340 break;
2341
2342 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2343 UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2344 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2345 cmd->rc, cmd->rrc);
2346 break;
2347 }
2348 case KVM_PV_UNSHARE_ALL: {
2349 r = -EINVAL;
2350 if (!kvm_s390_pv_is_protected(kvm))
2351 break;
2352
2353 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2354 UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2355 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2356 cmd->rc, cmd->rrc);
2357 break;
2358 }
2359 default:
2360 r = -ENOTTY;
2361 }
2362 return r;
2363 }
2364
kvm_arch_vm_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)2365 long kvm_arch_vm_ioctl(struct file *filp,
2366 unsigned int ioctl, unsigned long arg)
2367 {
2368 struct kvm *kvm = filp->private_data;
2369 void __user *argp = (void __user *)arg;
2370 struct kvm_device_attr attr;
2371 int r;
2372
2373 switch (ioctl) {
2374 case KVM_S390_INTERRUPT: {
2375 struct kvm_s390_interrupt s390int;
2376
2377 r = -EFAULT;
2378 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2379 break;
2380 r = kvm_s390_inject_vm(kvm, &s390int);
2381 break;
2382 }
2383 case KVM_CREATE_IRQCHIP: {
2384 struct kvm_irq_routing_entry routing;
2385
2386 r = -EINVAL;
2387 if (kvm->arch.use_irqchip) {
2388 /* Set up dummy routing. */
2389 memset(&routing, 0, sizeof(routing));
2390 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2391 }
2392 break;
2393 }
2394 case KVM_SET_DEVICE_ATTR: {
2395 r = -EFAULT;
2396 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2397 break;
2398 r = kvm_s390_vm_set_attr(kvm, &attr);
2399 break;
2400 }
2401 case KVM_GET_DEVICE_ATTR: {
2402 r = -EFAULT;
2403 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2404 break;
2405 r = kvm_s390_vm_get_attr(kvm, &attr);
2406 break;
2407 }
2408 case KVM_HAS_DEVICE_ATTR: {
2409 r = -EFAULT;
2410 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2411 break;
2412 r = kvm_s390_vm_has_attr(kvm, &attr);
2413 break;
2414 }
2415 case KVM_S390_GET_SKEYS: {
2416 struct kvm_s390_skeys args;
2417
2418 r = -EFAULT;
2419 if (copy_from_user(&args, argp,
2420 sizeof(struct kvm_s390_skeys)))
2421 break;
2422 r = kvm_s390_get_skeys(kvm, &args);
2423 break;
2424 }
2425 case KVM_S390_SET_SKEYS: {
2426 struct kvm_s390_skeys args;
2427
2428 r = -EFAULT;
2429 if (copy_from_user(&args, argp,
2430 sizeof(struct kvm_s390_skeys)))
2431 break;
2432 r = kvm_s390_set_skeys(kvm, &args);
2433 break;
2434 }
2435 case KVM_S390_GET_CMMA_BITS: {
2436 struct kvm_s390_cmma_log args;
2437
2438 r = -EFAULT;
2439 if (copy_from_user(&args, argp, sizeof(args)))
2440 break;
2441 mutex_lock(&kvm->slots_lock);
2442 r = kvm_s390_get_cmma_bits(kvm, &args);
2443 mutex_unlock(&kvm->slots_lock);
2444 if (!r) {
2445 r = copy_to_user(argp, &args, sizeof(args));
2446 if (r)
2447 r = -EFAULT;
2448 }
2449 break;
2450 }
2451 case KVM_S390_SET_CMMA_BITS: {
2452 struct kvm_s390_cmma_log args;
2453
2454 r = -EFAULT;
2455 if (copy_from_user(&args, argp, sizeof(args)))
2456 break;
2457 mutex_lock(&kvm->slots_lock);
2458 r = kvm_s390_set_cmma_bits(kvm, &args);
2459 mutex_unlock(&kvm->slots_lock);
2460 break;
2461 }
2462 case KVM_S390_PV_COMMAND: {
2463 struct kvm_pv_cmd args;
2464
2465 /* protvirt means user sigp */
2466 kvm->arch.user_cpu_state_ctrl = 1;
2467 r = 0;
2468 if (!is_prot_virt_host()) {
2469 r = -EINVAL;
2470 break;
2471 }
2472 if (copy_from_user(&args, argp, sizeof(args))) {
2473 r = -EFAULT;
2474 break;
2475 }
2476 if (args.flags) {
2477 r = -EINVAL;
2478 break;
2479 }
2480 mutex_lock(&kvm->lock);
2481 r = kvm_s390_handle_pv(kvm, &args);
2482 mutex_unlock(&kvm->lock);
2483 if (copy_to_user(argp, &args, sizeof(args))) {
2484 r = -EFAULT;
2485 break;
2486 }
2487 break;
2488 }
2489 default:
2490 r = -ENOTTY;
2491 }
2492
2493 return r;
2494 }
2495
kvm_s390_apxa_installed(void)2496 static int kvm_s390_apxa_installed(void)
2497 {
2498 struct ap_config_info info;
2499
2500 if (ap_instructions_available()) {
2501 if (ap_qci(&info) == 0)
2502 return info.apxa;
2503 }
2504
2505 return 0;
2506 }
2507
2508 /*
2509 * The format of the crypto control block (CRYCB) is specified in the 3 low
2510 * order bits of the CRYCB designation (CRYCBD) field as follows:
2511 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2512 * AP extended addressing (APXA) facility are installed.
2513 * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2514 * Format 2: Both the APXA and MSAX3 facilities are installed
2515 */
kvm_s390_set_crycb_format(struct kvm * kvm)2516 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2517 {
2518 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2519
2520 /* Clear the CRYCB format bits - i.e., set format 0 by default */
2521 kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2522
2523 /* Check whether MSAX3 is installed */
2524 if (!test_kvm_facility(kvm, 76))
2525 return;
2526
2527 if (kvm_s390_apxa_installed())
2528 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2529 else
2530 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2531 }
2532
kvm_arch_crypto_set_masks(struct kvm * kvm,unsigned long * apm,unsigned long * aqm,unsigned long * adm)2533 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2534 unsigned long *aqm, unsigned long *adm)
2535 {
2536 struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2537
2538 mutex_lock(&kvm->lock);
2539 kvm_s390_vcpu_block_all(kvm);
2540
2541 switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2542 case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2543 memcpy(crycb->apcb1.apm, apm, 32);
2544 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2545 apm[0], apm[1], apm[2], apm[3]);
2546 memcpy(crycb->apcb1.aqm, aqm, 32);
2547 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2548 aqm[0], aqm[1], aqm[2], aqm[3]);
2549 memcpy(crycb->apcb1.adm, adm, 32);
2550 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2551 adm[0], adm[1], adm[2], adm[3]);
2552 break;
2553 case CRYCB_FORMAT1:
2554 case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2555 memcpy(crycb->apcb0.apm, apm, 8);
2556 memcpy(crycb->apcb0.aqm, aqm, 2);
2557 memcpy(crycb->apcb0.adm, adm, 2);
2558 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2559 apm[0], *((unsigned short *)aqm),
2560 *((unsigned short *)adm));
2561 break;
2562 default: /* Can not happen */
2563 break;
2564 }
2565
2566 /* recreate the shadow crycb for each vcpu */
2567 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2568 kvm_s390_vcpu_unblock_all(kvm);
2569 mutex_unlock(&kvm->lock);
2570 }
2571 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2572
kvm_arch_crypto_clear_masks(struct kvm * kvm)2573 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2574 {
2575 mutex_lock(&kvm->lock);
2576 kvm_s390_vcpu_block_all(kvm);
2577
2578 memset(&kvm->arch.crypto.crycb->apcb0, 0,
2579 sizeof(kvm->arch.crypto.crycb->apcb0));
2580 memset(&kvm->arch.crypto.crycb->apcb1, 0,
2581 sizeof(kvm->arch.crypto.crycb->apcb1));
2582
2583 VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2584 /* recreate the shadow crycb for each vcpu */
2585 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2586 kvm_s390_vcpu_unblock_all(kvm);
2587 mutex_unlock(&kvm->lock);
2588 }
2589 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2590
kvm_s390_get_initial_cpuid(void)2591 static u64 kvm_s390_get_initial_cpuid(void)
2592 {
2593 struct cpuid cpuid;
2594
2595 get_cpu_id(&cpuid);
2596 cpuid.version = 0xff;
2597 return *((u64 *) &cpuid);
2598 }
2599
kvm_s390_crypto_init(struct kvm * kvm)2600 static void kvm_s390_crypto_init(struct kvm *kvm)
2601 {
2602 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2603 kvm_s390_set_crycb_format(kvm);
2604
2605 if (!test_kvm_facility(kvm, 76))
2606 return;
2607
2608 /* Enable AES/DEA protected key functions by default */
2609 kvm->arch.crypto.aes_kw = 1;
2610 kvm->arch.crypto.dea_kw = 1;
2611 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2612 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2613 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2614 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2615 }
2616
sca_dispose(struct kvm * kvm)2617 static void sca_dispose(struct kvm *kvm)
2618 {
2619 if (kvm->arch.use_esca)
2620 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2621 else
2622 free_page((unsigned long)(kvm->arch.sca));
2623 kvm->arch.sca = NULL;
2624 }
2625
kvm_arch_init_vm(struct kvm * kvm,unsigned long type)2626 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2627 {
2628 gfp_t alloc_flags = GFP_KERNEL;
2629 int i, rc;
2630 char debug_name[16];
2631 static unsigned long sca_offset;
2632
2633 rc = -EINVAL;
2634 #ifdef CONFIG_KVM_S390_UCONTROL
2635 if (type & ~KVM_VM_S390_UCONTROL)
2636 goto out_err;
2637 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2638 goto out_err;
2639 #else
2640 if (type)
2641 goto out_err;
2642 #endif
2643
2644 rc = s390_enable_sie();
2645 if (rc)
2646 goto out_err;
2647
2648 rc = -ENOMEM;
2649
2650 if (!sclp.has_64bscao)
2651 alloc_flags |= GFP_DMA;
2652 rwlock_init(&kvm->arch.sca_lock);
2653 /* start with basic SCA */
2654 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2655 if (!kvm->arch.sca)
2656 goto out_err;
2657 mutex_lock(&kvm_lock);
2658 sca_offset += 16;
2659 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2660 sca_offset = 0;
2661 kvm->arch.sca = (struct bsca_block *)
2662 ((char *) kvm->arch.sca + sca_offset);
2663 mutex_unlock(&kvm_lock);
2664
2665 sprintf(debug_name, "kvm-%u", current->pid);
2666
2667 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2668 if (!kvm->arch.dbf)
2669 goto out_err;
2670
2671 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2672 kvm->arch.sie_page2 =
2673 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2674 if (!kvm->arch.sie_page2)
2675 goto out_err;
2676
2677 kvm->arch.sie_page2->kvm = kvm;
2678 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2679
2680 for (i = 0; i < kvm_s390_fac_size(); i++) {
2681 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2682 (kvm_s390_fac_base[i] |
2683 kvm_s390_fac_ext[i]);
2684 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2685 kvm_s390_fac_base[i];
2686 }
2687 kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2688
2689 /* we are always in czam mode - even on pre z14 machines */
2690 set_kvm_facility(kvm->arch.model.fac_mask, 138);
2691 set_kvm_facility(kvm->arch.model.fac_list, 138);
2692 /* we emulate STHYI in kvm */
2693 set_kvm_facility(kvm->arch.model.fac_mask, 74);
2694 set_kvm_facility(kvm->arch.model.fac_list, 74);
2695 if (MACHINE_HAS_TLB_GUEST) {
2696 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2697 set_kvm_facility(kvm->arch.model.fac_list, 147);
2698 }
2699
2700 if (css_general_characteristics.aiv && test_facility(65))
2701 set_kvm_facility(kvm->arch.model.fac_mask, 65);
2702
2703 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2704 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2705
2706 kvm_s390_crypto_init(kvm);
2707
2708 mutex_init(&kvm->arch.float_int.ais_lock);
2709 spin_lock_init(&kvm->arch.float_int.lock);
2710 for (i = 0; i < FIRQ_LIST_COUNT; i++)
2711 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2712 init_waitqueue_head(&kvm->arch.ipte_wq);
2713 mutex_init(&kvm->arch.ipte_mutex);
2714
2715 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2716 VM_EVENT(kvm, 3, "vm created with type %lu", type);
2717
2718 if (type & KVM_VM_S390_UCONTROL) {
2719 kvm->arch.gmap = NULL;
2720 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2721 } else {
2722 if (sclp.hamax == U64_MAX)
2723 kvm->arch.mem_limit = TASK_SIZE_MAX;
2724 else
2725 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2726 sclp.hamax + 1);
2727 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2728 if (!kvm->arch.gmap)
2729 goto out_err;
2730 kvm->arch.gmap->private = kvm;
2731 kvm->arch.gmap->pfault_enabled = 0;
2732 }
2733
2734 kvm->arch.use_pfmfi = sclp.has_pfmfi;
2735 kvm->arch.use_skf = sclp.has_skey;
2736 spin_lock_init(&kvm->arch.start_stop_lock);
2737 kvm_s390_vsie_init(kvm);
2738 if (use_gisa)
2739 kvm_s390_gisa_init(kvm);
2740 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2741
2742 return 0;
2743 out_err:
2744 free_page((unsigned long)kvm->arch.sie_page2);
2745 debug_unregister(kvm->arch.dbf);
2746 sca_dispose(kvm);
2747 KVM_EVENT(3, "creation of vm failed: %d", rc);
2748 return rc;
2749 }
2750
kvm_arch_vcpu_destroy(struct kvm_vcpu * vcpu)2751 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2752 {
2753 u16 rc, rrc;
2754
2755 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2756 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2757 kvm_s390_clear_local_irqs(vcpu);
2758 kvm_clear_async_pf_completion_queue(vcpu);
2759 if (!kvm_is_ucontrol(vcpu->kvm))
2760 sca_del_vcpu(vcpu);
2761
2762 if (kvm_is_ucontrol(vcpu->kvm))
2763 gmap_remove(vcpu->arch.gmap);
2764
2765 if (vcpu->kvm->arch.use_cmma)
2766 kvm_s390_vcpu_unsetup_cmma(vcpu);
2767 /* We can not hold the vcpu mutex here, we are already dying */
2768 if (kvm_s390_pv_cpu_get_handle(vcpu))
2769 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2770 free_page((unsigned long)(vcpu->arch.sie_block));
2771 }
2772
kvm_free_vcpus(struct kvm * kvm)2773 static void kvm_free_vcpus(struct kvm *kvm)
2774 {
2775 unsigned int i;
2776 struct kvm_vcpu *vcpu;
2777
2778 kvm_for_each_vcpu(i, vcpu, kvm)
2779 kvm_vcpu_destroy(vcpu);
2780
2781 mutex_lock(&kvm->lock);
2782 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2783 kvm->vcpus[i] = NULL;
2784
2785 atomic_set(&kvm->online_vcpus, 0);
2786 mutex_unlock(&kvm->lock);
2787 }
2788
kvm_arch_destroy_vm(struct kvm * kvm)2789 void kvm_arch_destroy_vm(struct kvm *kvm)
2790 {
2791 u16 rc, rrc;
2792
2793 kvm_free_vcpus(kvm);
2794 sca_dispose(kvm);
2795 kvm_s390_gisa_destroy(kvm);
2796 /*
2797 * We are already at the end of life and kvm->lock is not taken.
2798 * This is ok as the file descriptor is closed by now and nobody
2799 * can mess with the pv state. To avoid lockdep_assert_held from
2800 * complaining we do not use kvm_s390_pv_is_protected.
2801 */
2802 if (kvm_s390_pv_get_handle(kvm))
2803 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2804 debug_unregister(kvm->arch.dbf);
2805 free_page((unsigned long)kvm->arch.sie_page2);
2806 if (!kvm_is_ucontrol(kvm))
2807 gmap_remove(kvm->arch.gmap);
2808 kvm_s390_destroy_adapters(kvm);
2809 kvm_s390_clear_float_irqs(kvm);
2810 kvm_s390_vsie_destroy(kvm);
2811 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2812 }
2813
2814 /* Section: vcpu related */
__kvm_ucontrol_vcpu_init(struct kvm_vcpu * vcpu)2815 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2816 {
2817 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2818 if (!vcpu->arch.gmap)
2819 return -ENOMEM;
2820 vcpu->arch.gmap->private = vcpu->kvm;
2821
2822 return 0;
2823 }
2824
sca_del_vcpu(struct kvm_vcpu * vcpu)2825 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2826 {
2827 if (!kvm_s390_use_sca_entries())
2828 return;
2829 read_lock(&vcpu->kvm->arch.sca_lock);
2830 if (vcpu->kvm->arch.use_esca) {
2831 struct esca_block *sca = vcpu->kvm->arch.sca;
2832
2833 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2834 sca->cpu[vcpu->vcpu_id].sda = 0;
2835 } else {
2836 struct bsca_block *sca = vcpu->kvm->arch.sca;
2837
2838 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2839 sca->cpu[vcpu->vcpu_id].sda = 0;
2840 }
2841 read_unlock(&vcpu->kvm->arch.sca_lock);
2842 }
2843
sca_add_vcpu(struct kvm_vcpu * vcpu)2844 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2845 {
2846 if (!kvm_s390_use_sca_entries()) {
2847 struct bsca_block *sca = vcpu->kvm->arch.sca;
2848
2849 /* we still need the basic sca for the ipte control */
2850 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2851 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2852 return;
2853 }
2854 read_lock(&vcpu->kvm->arch.sca_lock);
2855 if (vcpu->kvm->arch.use_esca) {
2856 struct esca_block *sca = vcpu->kvm->arch.sca;
2857
2858 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2859 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2860 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2861 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2862 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2863 } else {
2864 struct bsca_block *sca = vcpu->kvm->arch.sca;
2865
2866 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2867 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2868 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2869 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2870 }
2871 read_unlock(&vcpu->kvm->arch.sca_lock);
2872 }
2873
2874 /* Basic SCA to Extended SCA data copy routines */
sca_copy_entry(struct esca_entry * d,struct bsca_entry * s)2875 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2876 {
2877 d->sda = s->sda;
2878 d->sigp_ctrl.c = s->sigp_ctrl.c;
2879 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2880 }
2881
sca_copy_b_to_e(struct esca_block * d,struct bsca_block * s)2882 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2883 {
2884 int i;
2885
2886 d->ipte_control = s->ipte_control;
2887 d->mcn[0] = s->mcn;
2888 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2889 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2890 }
2891
sca_switch_to_extended(struct kvm * kvm)2892 static int sca_switch_to_extended(struct kvm *kvm)
2893 {
2894 struct bsca_block *old_sca = kvm->arch.sca;
2895 struct esca_block *new_sca;
2896 struct kvm_vcpu *vcpu;
2897 unsigned int vcpu_idx;
2898 u32 scaol, scaoh;
2899
2900 if (kvm->arch.use_esca)
2901 return 0;
2902
2903 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2904 if (!new_sca)
2905 return -ENOMEM;
2906
2907 scaoh = (u32)((u64)(new_sca) >> 32);
2908 scaol = (u32)(u64)(new_sca) & ~0x3fU;
2909
2910 kvm_s390_vcpu_block_all(kvm);
2911 write_lock(&kvm->arch.sca_lock);
2912
2913 sca_copy_b_to_e(new_sca, old_sca);
2914
2915 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2916 vcpu->arch.sie_block->scaoh = scaoh;
2917 vcpu->arch.sie_block->scaol = scaol;
2918 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2919 }
2920 kvm->arch.sca = new_sca;
2921 kvm->arch.use_esca = 1;
2922
2923 write_unlock(&kvm->arch.sca_lock);
2924 kvm_s390_vcpu_unblock_all(kvm);
2925
2926 free_page((unsigned long)old_sca);
2927
2928 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2929 old_sca, kvm->arch.sca);
2930 return 0;
2931 }
2932
sca_can_add_vcpu(struct kvm * kvm,unsigned int id)2933 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2934 {
2935 int rc;
2936
2937 if (!kvm_s390_use_sca_entries()) {
2938 if (id < KVM_MAX_VCPUS)
2939 return true;
2940 return false;
2941 }
2942 if (id < KVM_S390_BSCA_CPU_SLOTS)
2943 return true;
2944 if (!sclp.has_esca || !sclp.has_64bscao)
2945 return false;
2946
2947 mutex_lock(&kvm->lock);
2948 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2949 mutex_unlock(&kvm->lock);
2950
2951 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2952 }
2953
2954 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__start_cpu_timer_accounting(struct kvm_vcpu * vcpu)2955 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2956 {
2957 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2958 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2959 vcpu->arch.cputm_start = get_tod_clock_fast();
2960 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2961 }
2962
2963 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__stop_cpu_timer_accounting(struct kvm_vcpu * vcpu)2964 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2965 {
2966 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2967 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2968 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2969 vcpu->arch.cputm_start = 0;
2970 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2971 }
2972
2973 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__enable_cpu_timer_accounting(struct kvm_vcpu * vcpu)2974 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2975 {
2976 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2977 vcpu->arch.cputm_enabled = true;
2978 __start_cpu_timer_accounting(vcpu);
2979 }
2980
2981 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__disable_cpu_timer_accounting(struct kvm_vcpu * vcpu)2982 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2983 {
2984 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2985 __stop_cpu_timer_accounting(vcpu);
2986 vcpu->arch.cputm_enabled = false;
2987 }
2988
enable_cpu_timer_accounting(struct kvm_vcpu * vcpu)2989 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2990 {
2991 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2992 __enable_cpu_timer_accounting(vcpu);
2993 preempt_enable();
2994 }
2995
disable_cpu_timer_accounting(struct kvm_vcpu * vcpu)2996 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2997 {
2998 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2999 __disable_cpu_timer_accounting(vcpu);
3000 preempt_enable();
3001 }
3002
3003 /* set the cpu timer - may only be called from the VCPU thread itself */
kvm_s390_set_cpu_timer(struct kvm_vcpu * vcpu,__u64 cputm)3004 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3005 {
3006 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3007 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3008 if (vcpu->arch.cputm_enabled)
3009 vcpu->arch.cputm_start = get_tod_clock_fast();
3010 vcpu->arch.sie_block->cputm = cputm;
3011 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3012 preempt_enable();
3013 }
3014
3015 /* update and get the cpu timer - can also be called from other VCPU threads */
kvm_s390_get_cpu_timer(struct kvm_vcpu * vcpu)3016 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3017 {
3018 unsigned int seq;
3019 __u64 value;
3020
3021 if (unlikely(!vcpu->arch.cputm_enabled))
3022 return vcpu->arch.sie_block->cputm;
3023
3024 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3025 do {
3026 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3027 /*
3028 * If the writer would ever execute a read in the critical
3029 * section, e.g. in irq context, we have a deadlock.
3030 */
3031 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3032 value = vcpu->arch.sie_block->cputm;
3033 /* if cputm_start is 0, accounting is being started/stopped */
3034 if (likely(vcpu->arch.cputm_start))
3035 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3036 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3037 preempt_enable();
3038 return value;
3039 }
3040
kvm_arch_vcpu_load(struct kvm_vcpu * vcpu,int cpu)3041 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3042 {
3043
3044 gmap_enable(vcpu->arch.enabled_gmap);
3045 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3046 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3047 __start_cpu_timer_accounting(vcpu);
3048 vcpu->cpu = cpu;
3049 }
3050
kvm_arch_vcpu_put(struct kvm_vcpu * vcpu)3051 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3052 {
3053 vcpu->cpu = -1;
3054 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3055 __stop_cpu_timer_accounting(vcpu);
3056 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3057 vcpu->arch.enabled_gmap = gmap_get_enabled();
3058 gmap_disable(vcpu->arch.enabled_gmap);
3059
3060 }
3061
kvm_arch_vcpu_postcreate(struct kvm_vcpu * vcpu)3062 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3063 {
3064 mutex_lock(&vcpu->kvm->lock);
3065 preempt_disable();
3066 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3067 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3068 preempt_enable();
3069 mutex_unlock(&vcpu->kvm->lock);
3070 if (!kvm_is_ucontrol(vcpu->kvm)) {
3071 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3072 sca_add_vcpu(vcpu);
3073 }
3074 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3075 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3076 /* make vcpu_load load the right gmap on the first trigger */
3077 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3078 }
3079
kvm_has_pckmo_subfunc(struct kvm * kvm,unsigned long nr)3080 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3081 {
3082 if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3083 test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3084 return true;
3085 return false;
3086 }
3087
kvm_has_pckmo_ecc(struct kvm * kvm)3088 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3089 {
3090 /* At least one ECC subfunction must be present */
3091 return kvm_has_pckmo_subfunc(kvm, 32) ||
3092 kvm_has_pckmo_subfunc(kvm, 33) ||
3093 kvm_has_pckmo_subfunc(kvm, 34) ||
3094 kvm_has_pckmo_subfunc(kvm, 40) ||
3095 kvm_has_pckmo_subfunc(kvm, 41);
3096
3097 }
3098
kvm_s390_vcpu_crypto_setup(struct kvm_vcpu * vcpu)3099 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3100 {
3101 /*
3102 * If the AP instructions are not being interpreted and the MSAX3
3103 * facility is not configured for the guest, there is nothing to set up.
3104 */
3105 if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3106 return;
3107
3108 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3109 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3110 vcpu->arch.sie_block->eca &= ~ECA_APIE;
3111 vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3112
3113 if (vcpu->kvm->arch.crypto.apie)
3114 vcpu->arch.sie_block->eca |= ECA_APIE;
3115
3116 /* Set up protected key support */
3117 if (vcpu->kvm->arch.crypto.aes_kw) {
3118 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3119 /* ecc is also wrapped with AES key */
3120 if (kvm_has_pckmo_ecc(vcpu->kvm))
3121 vcpu->arch.sie_block->ecd |= ECD_ECC;
3122 }
3123
3124 if (vcpu->kvm->arch.crypto.dea_kw)
3125 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3126 }
3127
kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu * vcpu)3128 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3129 {
3130 free_page(vcpu->arch.sie_block->cbrlo);
3131 vcpu->arch.sie_block->cbrlo = 0;
3132 }
3133
kvm_s390_vcpu_setup_cmma(struct kvm_vcpu * vcpu)3134 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3135 {
3136 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
3137 if (!vcpu->arch.sie_block->cbrlo)
3138 return -ENOMEM;
3139 return 0;
3140 }
3141
kvm_s390_vcpu_setup_model(struct kvm_vcpu * vcpu)3142 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3143 {
3144 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3145
3146 vcpu->arch.sie_block->ibc = model->ibc;
3147 if (test_kvm_facility(vcpu->kvm, 7))
3148 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3149 }
3150
kvm_s390_vcpu_setup(struct kvm_vcpu * vcpu)3151 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3152 {
3153 int rc = 0;
3154 u16 uvrc, uvrrc;
3155
3156 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3157 CPUSTAT_SM |
3158 CPUSTAT_STOPPED);
3159
3160 if (test_kvm_facility(vcpu->kvm, 78))
3161 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3162 else if (test_kvm_facility(vcpu->kvm, 8))
3163 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3164
3165 kvm_s390_vcpu_setup_model(vcpu);
3166
3167 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3168 if (MACHINE_HAS_ESOP)
3169 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3170 if (test_kvm_facility(vcpu->kvm, 9))
3171 vcpu->arch.sie_block->ecb |= ECB_SRSI;
3172 if (test_kvm_facility(vcpu->kvm, 73))
3173 vcpu->arch.sie_block->ecb |= ECB_TE;
3174
3175 if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3176 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3177 if (test_kvm_facility(vcpu->kvm, 130))
3178 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3179 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3180 if (sclp.has_cei)
3181 vcpu->arch.sie_block->eca |= ECA_CEI;
3182 if (sclp.has_ib)
3183 vcpu->arch.sie_block->eca |= ECA_IB;
3184 if (sclp.has_siif)
3185 vcpu->arch.sie_block->eca |= ECA_SII;
3186 if (sclp.has_sigpif)
3187 vcpu->arch.sie_block->eca |= ECA_SIGPI;
3188 if (test_kvm_facility(vcpu->kvm, 129)) {
3189 vcpu->arch.sie_block->eca |= ECA_VX;
3190 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3191 }
3192 if (test_kvm_facility(vcpu->kvm, 139))
3193 vcpu->arch.sie_block->ecd |= ECD_MEF;
3194 if (test_kvm_facility(vcpu->kvm, 156))
3195 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3196 if (vcpu->arch.sie_block->gd) {
3197 vcpu->arch.sie_block->eca |= ECA_AIV;
3198 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3199 vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3200 }
3201 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3202 | SDNXC;
3203 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3204
3205 if (sclp.has_kss)
3206 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3207 else
3208 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3209
3210 if (vcpu->kvm->arch.use_cmma) {
3211 rc = kvm_s390_vcpu_setup_cmma(vcpu);
3212 if (rc)
3213 return rc;
3214 }
3215 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3216 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3217
3218 vcpu->arch.sie_block->hpid = HPID_KVM;
3219
3220 kvm_s390_vcpu_crypto_setup(vcpu);
3221
3222 mutex_lock(&vcpu->kvm->lock);
3223 if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3224 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3225 if (rc)
3226 kvm_s390_vcpu_unsetup_cmma(vcpu);
3227 }
3228 mutex_unlock(&vcpu->kvm->lock);
3229
3230 return rc;
3231 }
3232
kvm_arch_vcpu_precreate(struct kvm * kvm,unsigned int id)3233 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3234 {
3235 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3236 return -EINVAL;
3237 return 0;
3238 }
3239
kvm_arch_vcpu_create(struct kvm_vcpu * vcpu)3240 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3241 {
3242 struct sie_page *sie_page;
3243 int rc;
3244
3245 BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3246 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
3247 if (!sie_page)
3248 return -ENOMEM;
3249
3250 vcpu->arch.sie_block = &sie_page->sie_block;
3251 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3252
3253 /* the real guest size will always be smaller than msl */
3254 vcpu->arch.sie_block->mso = 0;
3255 vcpu->arch.sie_block->msl = sclp.hamax;
3256
3257 vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3258 spin_lock_init(&vcpu->arch.local_int.lock);
3259 vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3260 if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3261 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3262 seqcount_init(&vcpu->arch.cputm_seqcount);
3263
3264 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3265 kvm_clear_async_pf_completion_queue(vcpu);
3266 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3267 KVM_SYNC_GPRS |
3268 KVM_SYNC_ACRS |
3269 KVM_SYNC_CRS |
3270 KVM_SYNC_ARCH0 |
3271 KVM_SYNC_PFAULT |
3272 KVM_SYNC_DIAG318;
3273 kvm_s390_set_prefix(vcpu, 0);
3274 if (test_kvm_facility(vcpu->kvm, 64))
3275 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3276 if (test_kvm_facility(vcpu->kvm, 82))
3277 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3278 if (test_kvm_facility(vcpu->kvm, 133))
3279 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3280 if (test_kvm_facility(vcpu->kvm, 156))
3281 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3282 /* fprs can be synchronized via vrs, even if the guest has no vx. With
3283 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3284 */
3285 if (MACHINE_HAS_VX)
3286 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3287 else
3288 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3289
3290 if (kvm_is_ucontrol(vcpu->kvm)) {
3291 rc = __kvm_ucontrol_vcpu_init(vcpu);
3292 if (rc)
3293 goto out_free_sie_block;
3294 }
3295
3296 VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3297 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3298 trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3299
3300 rc = kvm_s390_vcpu_setup(vcpu);
3301 if (rc)
3302 goto out_ucontrol_uninit;
3303 return 0;
3304
3305 out_ucontrol_uninit:
3306 if (kvm_is_ucontrol(vcpu->kvm))
3307 gmap_remove(vcpu->arch.gmap);
3308 out_free_sie_block:
3309 free_page((unsigned long)(vcpu->arch.sie_block));
3310 return rc;
3311 }
3312
kvm_arch_vcpu_runnable(struct kvm_vcpu * vcpu)3313 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3314 {
3315 clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
3316 return kvm_s390_vcpu_has_irq(vcpu, 0);
3317 }
3318
kvm_arch_vcpu_in_kernel(struct kvm_vcpu * vcpu)3319 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3320 {
3321 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3322 }
3323
kvm_s390_vcpu_block(struct kvm_vcpu * vcpu)3324 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3325 {
3326 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3327 exit_sie(vcpu);
3328 }
3329
kvm_s390_vcpu_unblock(struct kvm_vcpu * vcpu)3330 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3331 {
3332 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3333 }
3334
kvm_s390_vcpu_request(struct kvm_vcpu * vcpu)3335 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3336 {
3337 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3338 exit_sie(vcpu);
3339 }
3340
kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu * vcpu)3341 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3342 {
3343 return atomic_read(&vcpu->arch.sie_block->prog20) &
3344 (PROG_BLOCK_SIE | PROG_REQUEST);
3345 }
3346
kvm_s390_vcpu_request_handled(struct kvm_vcpu * vcpu)3347 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3348 {
3349 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3350 }
3351
3352 /*
3353 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3354 * If the CPU is not running (e.g. waiting as idle) the function will
3355 * return immediately. */
exit_sie(struct kvm_vcpu * vcpu)3356 void exit_sie(struct kvm_vcpu *vcpu)
3357 {
3358 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3359 kvm_s390_vsie_kick(vcpu);
3360 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3361 cpu_relax();
3362 }
3363
3364 /* Kick a guest cpu out of SIE to process a request synchronously */
kvm_s390_sync_request(int req,struct kvm_vcpu * vcpu)3365 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3366 {
3367 kvm_make_request(req, vcpu);
3368 kvm_s390_vcpu_request(vcpu);
3369 }
3370
kvm_gmap_notifier(struct gmap * gmap,unsigned long start,unsigned long end)3371 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3372 unsigned long end)
3373 {
3374 struct kvm *kvm = gmap->private;
3375 struct kvm_vcpu *vcpu;
3376 unsigned long prefix;
3377 int i;
3378
3379 if (gmap_is_shadow(gmap))
3380 return;
3381 if (start >= 1UL << 31)
3382 /* We are only interested in prefix pages */
3383 return;
3384 kvm_for_each_vcpu(i, vcpu, kvm) {
3385 /* match against both prefix pages */
3386 prefix = kvm_s390_get_prefix(vcpu);
3387 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3388 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3389 start, end);
3390 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3391 }
3392 }
3393 }
3394
kvm_arch_no_poll(struct kvm_vcpu * vcpu)3395 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3396 {
3397 /* do not poll with more than halt_poll_max_steal percent of steal time */
3398 if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3399 halt_poll_max_steal) {
3400 vcpu->stat.halt_no_poll_steal++;
3401 return true;
3402 }
3403 return false;
3404 }
3405
kvm_arch_vcpu_should_kick(struct kvm_vcpu * vcpu)3406 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3407 {
3408 /* kvm common code refers to this, but never calls it */
3409 BUG();
3410 return 0;
3411 }
3412
kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu * vcpu,struct kvm_one_reg * reg)3413 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3414 struct kvm_one_reg *reg)
3415 {
3416 int r = -EINVAL;
3417
3418 switch (reg->id) {
3419 case KVM_REG_S390_TODPR:
3420 r = put_user(vcpu->arch.sie_block->todpr,
3421 (u32 __user *)reg->addr);
3422 break;
3423 case KVM_REG_S390_EPOCHDIFF:
3424 r = put_user(vcpu->arch.sie_block->epoch,
3425 (u64 __user *)reg->addr);
3426 break;
3427 case KVM_REG_S390_CPU_TIMER:
3428 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3429 (u64 __user *)reg->addr);
3430 break;
3431 case KVM_REG_S390_CLOCK_COMP:
3432 r = put_user(vcpu->arch.sie_block->ckc,
3433 (u64 __user *)reg->addr);
3434 break;
3435 case KVM_REG_S390_PFTOKEN:
3436 r = put_user(vcpu->arch.pfault_token,
3437 (u64 __user *)reg->addr);
3438 break;
3439 case KVM_REG_S390_PFCOMPARE:
3440 r = put_user(vcpu->arch.pfault_compare,
3441 (u64 __user *)reg->addr);
3442 break;
3443 case KVM_REG_S390_PFSELECT:
3444 r = put_user(vcpu->arch.pfault_select,
3445 (u64 __user *)reg->addr);
3446 break;
3447 case KVM_REG_S390_PP:
3448 r = put_user(vcpu->arch.sie_block->pp,
3449 (u64 __user *)reg->addr);
3450 break;
3451 case KVM_REG_S390_GBEA:
3452 r = put_user(vcpu->arch.sie_block->gbea,
3453 (u64 __user *)reg->addr);
3454 break;
3455 default:
3456 break;
3457 }
3458
3459 return r;
3460 }
3461
kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu * vcpu,struct kvm_one_reg * reg)3462 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3463 struct kvm_one_reg *reg)
3464 {
3465 int r = -EINVAL;
3466 __u64 val;
3467
3468 switch (reg->id) {
3469 case KVM_REG_S390_TODPR:
3470 r = get_user(vcpu->arch.sie_block->todpr,
3471 (u32 __user *)reg->addr);
3472 break;
3473 case KVM_REG_S390_EPOCHDIFF:
3474 r = get_user(vcpu->arch.sie_block->epoch,
3475 (u64 __user *)reg->addr);
3476 break;
3477 case KVM_REG_S390_CPU_TIMER:
3478 r = get_user(val, (u64 __user *)reg->addr);
3479 if (!r)
3480 kvm_s390_set_cpu_timer(vcpu, val);
3481 break;
3482 case KVM_REG_S390_CLOCK_COMP:
3483 r = get_user(vcpu->arch.sie_block->ckc,
3484 (u64 __user *)reg->addr);
3485 break;
3486 case KVM_REG_S390_PFTOKEN:
3487 r = get_user(vcpu->arch.pfault_token,
3488 (u64 __user *)reg->addr);
3489 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3490 kvm_clear_async_pf_completion_queue(vcpu);
3491 break;
3492 case KVM_REG_S390_PFCOMPARE:
3493 r = get_user(vcpu->arch.pfault_compare,
3494 (u64 __user *)reg->addr);
3495 break;
3496 case KVM_REG_S390_PFSELECT:
3497 r = get_user(vcpu->arch.pfault_select,
3498 (u64 __user *)reg->addr);
3499 break;
3500 case KVM_REG_S390_PP:
3501 r = get_user(vcpu->arch.sie_block->pp,
3502 (u64 __user *)reg->addr);
3503 break;
3504 case KVM_REG_S390_GBEA:
3505 r = get_user(vcpu->arch.sie_block->gbea,
3506 (u64 __user *)reg->addr);
3507 break;
3508 default:
3509 break;
3510 }
3511
3512 return r;
3513 }
3514
kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu * vcpu)3515 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3516 {
3517 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3518 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3519 memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3520
3521 kvm_clear_async_pf_completion_queue(vcpu);
3522 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3523 kvm_s390_vcpu_stop(vcpu);
3524 kvm_s390_clear_local_irqs(vcpu);
3525 }
3526
kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu * vcpu)3527 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3528 {
3529 /* Initial reset is a superset of the normal reset */
3530 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3531
3532 /*
3533 * This equals initial cpu reset in pop, but we don't switch to ESA.
3534 * We do not only reset the internal data, but also ...
3535 */
3536 vcpu->arch.sie_block->gpsw.mask = 0;
3537 vcpu->arch.sie_block->gpsw.addr = 0;
3538 kvm_s390_set_prefix(vcpu, 0);
3539 kvm_s390_set_cpu_timer(vcpu, 0);
3540 vcpu->arch.sie_block->ckc = 0;
3541 memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3542 vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3543 vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3544
3545 /* ... the data in sync regs */
3546 memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3547 vcpu->run->s.regs.ckc = 0;
3548 vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3549 vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3550 vcpu->run->psw_addr = 0;
3551 vcpu->run->psw_mask = 0;
3552 vcpu->run->s.regs.todpr = 0;
3553 vcpu->run->s.regs.cputm = 0;
3554 vcpu->run->s.regs.ckc = 0;
3555 vcpu->run->s.regs.pp = 0;
3556 vcpu->run->s.regs.gbea = 1;
3557 vcpu->run->s.regs.fpc = 0;
3558 /*
3559 * Do not reset these registers in the protected case, as some of
3560 * them are overlayed and they are not accessible in this case
3561 * anyway.
3562 */
3563 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3564 vcpu->arch.sie_block->gbea = 1;
3565 vcpu->arch.sie_block->pp = 0;
3566 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3567 vcpu->arch.sie_block->todpr = 0;
3568 }
3569 }
3570
kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu * vcpu)3571 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3572 {
3573 struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3574
3575 /* Clear reset is a superset of the initial reset */
3576 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3577
3578 memset(®s->gprs, 0, sizeof(regs->gprs));
3579 memset(®s->vrs, 0, sizeof(regs->vrs));
3580 memset(®s->acrs, 0, sizeof(regs->acrs));
3581 memset(®s->gscb, 0, sizeof(regs->gscb));
3582
3583 regs->etoken = 0;
3584 regs->etoken_extension = 0;
3585 }
3586
kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu * vcpu,struct kvm_regs * regs)3587 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3588 {
3589 vcpu_load(vcpu);
3590 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
3591 vcpu_put(vcpu);
3592 return 0;
3593 }
3594
kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu * vcpu,struct kvm_regs * regs)3595 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3596 {
3597 vcpu_load(vcpu);
3598 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3599 vcpu_put(vcpu);
3600 return 0;
3601 }
3602
kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu * vcpu,struct kvm_sregs * sregs)3603 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3604 struct kvm_sregs *sregs)
3605 {
3606 vcpu_load(vcpu);
3607
3608 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3609 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3610
3611 vcpu_put(vcpu);
3612 return 0;
3613 }
3614
kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu * vcpu,struct kvm_sregs * sregs)3615 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3616 struct kvm_sregs *sregs)
3617 {
3618 vcpu_load(vcpu);
3619
3620 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3621 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3622
3623 vcpu_put(vcpu);
3624 return 0;
3625 }
3626
kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu * vcpu,struct kvm_fpu * fpu)3627 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3628 {
3629 int ret = 0;
3630
3631 vcpu_load(vcpu);
3632
3633 if (test_fp_ctl(fpu->fpc)) {
3634 ret = -EINVAL;
3635 goto out;
3636 }
3637 vcpu->run->s.regs.fpc = fpu->fpc;
3638 if (MACHINE_HAS_VX)
3639 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3640 (freg_t *) fpu->fprs);
3641 else
3642 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3643
3644 out:
3645 vcpu_put(vcpu);
3646 return ret;
3647 }
3648
kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu * vcpu,struct kvm_fpu * fpu)3649 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3650 {
3651 vcpu_load(vcpu);
3652
3653 /* make sure we have the latest values */
3654 save_fpu_regs();
3655 if (MACHINE_HAS_VX)
3656 convert_vx_to_fp((freg_t *) fpu->fprs,
3657 (__vector128 *) vcpu->run->s.regs.vrs);
3658 else
3659 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3660 fpu->fpc = vcpu->run->s.regs.fpc;
3661
3662 vcpu_put(vcpu);
3663 return 0;
3664 }
3665
kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu * vcpu,psw_t psw)3666 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3667 {
3668 int rc = 0;
3669
3670 if (!is_vcpu_stopped(vcpu))
3671 rc = -EBUSY;
3672 else {
3673 vcpu->run->psw_mask = psw.mask;
3674 vcpu->run->psw_addr = psw.addr;
3675 }
3676 return rc;
3677 }
3678
kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu * vcpu,struct kvm_translation * tr)3679 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3680 struct kvm_translation *tr)
3681 {
3682 return -EINVAL; /* not implemented yet */
3683 }
3684
3685 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3686 KVM_GUESTDBG_USE_HW_BP | \
3687 KVM_GUESTDBG_ENABLE)
3688
kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu * vcpu,struct kvm_guest_debug * dbg)3689 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3690 struct kvm_guest_debug *dbg)
3691 {
3692 int rc = 0;
3693
3694 vcpu_load(vcpu);
3695
3696 vcpu->guest_debug = 0;
3697 kvm_s390_clear_bp_data(vcpu);
3698
3699 if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3700 rc = -EINVAL;
3701 goto out;
3702 }
3703 if (!sclp.has_gpere) {
3704 rc = -EINVAL;
3705 goto out;
3706 }
3707
3708 if (dbg->control & KVM_GUESTDBG_ENABLE) {
3709 vcpu->guest_debug = dbg->control;
3710 /* enforce guest PER */
3711 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3712
3713 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3714 rc = kvm_s390_import_bp_data(vcpu, dbg);
3715 } else {
3716 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3717 vcpu->arch.guestdbg.last_bp = 0;
3718 }
3719
3720 if (rc) {
3721 vcpu->guest_debug = 0;
3722 kvm_s390_clear_bp_data(vcpu);
3723 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3724 }
3725
3726 out:
3727 vcpu_put(vcpu);
3728 return rc;
3729 }
3730
kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu * vcpu,struct kvm_mp_state * mp_state)3731 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3732 struct kvm_mp_state *mp_state)
3733 {
3734 int ret;
3735
3736 vcpu_load(vcpu);
3737
3738 /* CHECK_STOP and LOAD are not supported yet */
3739 ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3740 KVM_MP_STATE_OPERATING;
3741
3742 vcpu_put(vcpu);
3743 return ret;
3744 }
3745
kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu * vcpu,struct kvm_mp_state * mp_state)3746 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3747 struct kvm_mp_state *mp_state)
3748 {
3749 int rc = 0;
3750
3751 vcpu_load(vcpu);
3752
3753 /* user space knows about this interface - let it control the state */
3754 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3755
3756 switch (mp_state->mp_state) {
3757 case KVM_MP_STATE_STOPPED:
3758 rc = kvm_s390_vcpu_stop(vcpu);
3759 break;
3760 case KVM_MP_STATE_OPERATING:
3761 rc = kvm_s390_vcpu_start(vcpu);
3762 break;
3763 case KVM_MP_STATE_LOAD:
3764 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3765 rc = -ENXIO;
3766 break;
3767 }
3768 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3769 break;
3770 case KVM_MP_STATE_CHECK_STOP:
3771 fallthrough; /* CHECK_STOP and LOAD are not supported yet */
3772 default:
3773 rc = -ENXIO;
3774 }
3775
3776 vcpu_put(vcpu);
3777 return rc;
3778 }
3779
ibs_enabled(struct kvm_vcpu * vcpu)3780 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3781 {
3782 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3783 }
3784
kvm_s390_handle_requests(struct kvm_vcpu * vcpu)3785 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3786 {
3787 retry:
3788 kvm_s390_vcpu_request_handled(vcpu);
3789 if (!kvm_request_pending(vcpu))
3790 return 0;
3791 /*
3792 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3793 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3794 * This ensures that the ipte instruction for this request has
3795 * already finished. We might race against a second unmapper that
3796 * wants to set the blocking bit. Lets just retry the request loop.
3797 */
3798 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3799 int rc;
3800 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3801 kvm_s390_get_prefix(vcpu),
3802 PAGE_SIZE * 2, PROT_WRITE);
3803 if (rc) {
3804 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3805 return rc;
3806 }
3807 goto retry;
3808 }
3809
3810 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3811 vcpu->arch.sie_block->ihcpu = 0xffff;
3812 goto retry;
3813 }
3814
3815 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3816 if (!ibs_enabled(vcpu)) {
3817 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3818 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3819 }
3820 goto retry;
3821 }
3822
3823 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3824 if (ibs_enabled(vcpu)) {
3825 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3826 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3827 }
3828 goto retry;
3829 }
3830
3831 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3832 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3833 goto retry;
3834 }
3835
3836 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3837 /*
3838 * Disable CMM virtualization; we will emulate the ESSA
3839 * instruction manually, in order to provide additional
3840 * functionalities needed for live migration.
3841 */
3842 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3843 goto retry;
3844 }
3845
3846 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3847 /*
3848 * Re-enable CMM virtualization if CMMA is available and
3849 * CMM has been used.
3850 */
3851 if ((vcpu->kvm->arch.use_cmma) &&
3852 (vcpu->kvm->mm->context.uses_cmm))
3853 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3854 goto retry;
3855 }
3856
3857 /* nothing to do, just clear the request */
3858 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3859 /* we left the vsie handler, nothing to do, just clear the request */
3860 kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3861
3862 return 0;
3863 }
3864
kvm_s390_set_tod_clock(struct kvm * kvm,const struct kvm_s390_vm_tod_clock * gtod)3865 void kvm_s390_set_tod_clock(struct kvm *kvm,
3866 const struct kvm_s390_vm_tod_clock *gtod)
3867 {
3868 struct kvm_vcpu *vcpu;
3869 struct kvm_s390_tod_clock_ext htod;
3870 int i;
3871
3872 mutex_lock(&kvm->lock);
3873 preempt_disable();
3874
3875 get_tod_clock_ext((char *)&htod);
3876
3877 kvm->arch.epoch = gtod->tod - htod.tod;
3878 kvm->arch.epdx = 0;
3879 if (test_kvm_facility(kvm, 139)) {
3880 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3881 if (kvm->arch.epoch > gtod->tod)
3882 kvm->arch.epdx -= 1;
3883 }
3884
3885 kvm_s390_vcpu_block_all(kvm);
3886 kvm_for_each_vcpu(i, vcpu, kvm) {
3887 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3888 vcpu->arch.sie_block->epdx = kvm->arch.epdx;
3889 }
3890
3891 kvm_s390_vcpu_unblock_all(kvm);
3892 preempt_enable();
3893 mutex_unlock(&kvm->lock);
3894 }
3895
3896 /**
3897 * kvm_arch_fault_in_page - fault-in guest page if necessary
3898 * @vcpu: The corresponding virtual cpu
3899 * @gpa: Guest physical address
3900 * @writable: Whether the page should be writable or not
3901 *
3902 * Make sure that a guest page has been faulted-in on the host.
3903 *
3904 * Return: Zero on success, negative error code otherwise.
3905 */
kvm_arch_fault_in_page(struct kvm_vcpu * vcpu,gpa_t gpa,int writable)3906 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3907 {
3908 return gmap_fault(vcpu->arch.gmap, gpa,
3909 writable ? FAULT_FLAG_WRITE : 0);
3910 }
3911
__kvm_inject_pfault_token(struct kvm_vcpu * vcpu,bool start_token,unsigned long token)3912 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3913 unsigned long token)
3914 {
3915 struct kvm_s390_interrupt inti;
3916 struct kvm_s390_irq irq;
3917
3918 if (start_token) {
3919 irq.u.ext.ext_params2 = token;
3920 irq.type = KVM_S390_INT_PFAULT_INIT;
3921 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3922 } else {
3923 inti.type = KVM_S390_INT_PFAULT_DONE;
3924 inti.parm64 = token;
3925 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3926 }
3927 }
3928
kvm_arch_async_page_not_present(struct kvm_vcpu * vcpu,struct kvm_async_pf * work)3929 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3930 struct kvm_async_pf *work)
3931 {
3932 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3933 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3934
3935 return true;
3936 }
3937
kvm_arch_async_page_present(struct kvm_vcpu * vcpu,struct kvm_async_pf * work)3938 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3939 struct kvm_async_pf *work)
3940 {
3941 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3942 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3943 }
3944
kvm_arch_async_page_ready(struct kvm_vcpu * vcpu,struct kvm_async_pf * work)3945 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3946 struct kvm_async_pf *work)
3947 {
3948 /* s390 will always inject the page directly */
3949 }
3950
kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu * vcpu)3951 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
3952 {
3953 /*
3954 * s390 will always inject the page directly,
3955 * but we still want check_async_completion to cleanup
3956 */
3957 return true;
3958 }
3959
kvm_arch_setup_async_pf(struct kvm_vcpu * vcpu)3960 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3961 {
3962 hva_t hva;
3963 struct kvm_arch_async_pf arch;
3964
3965 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3966 return false;
3967 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3968 vcpu->arch.pfault_compare)
3969 return false;
3970 if (psw_extint_disabled(vcpu))
3971 return false;
3972 if (kvm_s390_vcpu_has_irq(vcpu, 0))
3973 return false;
3974 if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3975 return false;
3976 if (!vcpu->arch.gmap->pfault_enabled)
3977 return false;
3978
3979 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3980 hva += current->thread.gmap_addr & ~PAGE_MASK;
3981 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3982 return false;
3983
3984 return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3985 }
3986
vcpu_pre_run(struct kvm_vcpu * vcpu)3987 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3988 {
3989 int rc, cpuflags;
3990
3991 /*
3992 * On s390 notifications for arriving pages will be delivered directly
3993 * to the guest but the house keeping for completed pfaults is
3994 * handled outside the worker.
3995 */
3996 kvm_check_async_pf_completion(vcpu);
3997
3998 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3999 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4000
4001 if (need_resched())
4002 schedule();
4003
4004 if (!kvm_is_ucontrol(vcpu->kvm)) {
4005 rc = kvm_s390_deliver_pending_interrupts(vcpu);
4006 if (rc)
4007 return rc;
4008 }
4009
4010 rc = kvm_s390_handle_requests(vcpu);
4011 if (rc)
4012 return rc;
4013
4014 if (guestdbg_enabled(vcpu)) {
4015 kvm_s390_backup_guest_per_regs(vcpu);
4016 kvm_s390_patch_guest_per_regs(vcpu);
4017 }
4018
4019 clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.gisa_int.kicked_mask);
4020
4021 vcpu->arch.sie_block->icptcode = 0;
4022 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4023 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4024 trace_kvm_s390_sie_enter(vcpu, cpuflags);
4025
4026 return 0;
4027 }
4028
vcpu_post_run_fault_in_sie(struct kvm_vcpu * vcpu)4029 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4030 {
4031 struct kvm_s390_pgm_info pgm_info = {
4032 .code = PGM_ADDRESSING,
4033 };
4034 u8 opcode, ilen;
4035 int rc;
4036
4037 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4038 trace_kvm_s390_sie_fault(vcpu);
4039
4040 /*
4041 * We want to inject an addressing exception, which is defined as a
4042 * suppressing or terminating exception. However, since we came here
4043 * by a DAT access exception, the PSW still points to the faulting
4044 * instruction since DAT exceptions are nullifying. So we've got
4045 * to look up the current opcode to get the length of the instruction
4046 * to be able to forward the PSW.
4047 */
4048 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4049 ilen = insn_length(opcode);
4050 if (rc < 0) {
4051 return rc;
4052 } else if (rc) {
4053 /* Instruction-Fetching Exceptions - we can't detect the ilen.
4054 * Forward by arbitrary ilc, injection will take care of
4055 * nullification if necessary.
4056 */
4057 pgm_info = vcpu->arch.pgm;
4058 ilen = 4;
4059 }
4060 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4061 kvm_s390_forward_psw(vcpu, ilen);
4062 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4063 }
4064
vcpu_post_run(struct kvm_vcpu * vcpu,int exit_reason)4065 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4066 {
4067 struct mcck_volatile_info *mcck_info;
4068 struct sie_page *sie_page;
4069
4070 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4071 vcpu->arch.sie_block->icptcode);
4072 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4073
4074 if (guestdbg_enabled(vcpu))
4075 kvm_s390_restore_guest_per_regs(vcpu);
4076
4077 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4078 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4079
4080 if (exit_reason == -EINTR) {
4081 VCPU_EVENT(vcpu, 3, "%s", "machine check");
4082 sie_page = container_of(vcpu->arch.sie_block,
4083 struct sie_page, sie_block);
4084 mcck_info = &sie_page->mcck_info;
4085 kvm_s390_reinject_machine_check(vcpu, mcck_info);
4086 return 0;
4087 }
4088
4089 if (vcpu->arch.sie_block->icptcode > 0) {
4090 int rc = kvm_handle_sie_intercept(vcpu);
4091
4092 if (rc != -EOPNOTSUPP)
4093 return rc;
4094 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4095 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4096 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4097 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4098 return -EREMOTE;
4099 } else if (exit_reason != -EFAULT) {
4100 vcpu->stat.exit_null++;
4101 return 0;
4102 } else if (kvm_is_ucontrol(vcpu->kvm)) {
4103 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4104 vcpu->run->s390_ucontrol.trans_exc_code =
4105 current->thread.gmap_addr;
4106 vcpu->run->s390_ucontrol.pgm_code = 0x10;
4107 return -EREMOTE;
4108 } else if (current->thread.gmap_pfault) {
4109 trace_kvm_s390_major_guest_pfault(vcpu);
4110 current->thread.gmap_pfault = 0;
4111 if (kvm_arch_setup_async_pf(vcpu))
4112 return 0;
4113 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4114 }
4115 return vcpu_post_run_fault_in_sie(vcpu);
4116 }
4117
4118 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
__vcpu_run(struct kvm_vcpu * vcpu)4119 static int __vcpu_run(struct kvm_vcpu *vcpu)
4120 {
4121 int rc, exit_reason;
4122 struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4123
4124 /*
4125 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4126 * ning the guest), so that memslots (and other stuff) are protected
4127 */
4128 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4129
4130 do {
4131 rc = vcpu_pre_run(vcpu);
4132 if (rc)
4133 break;
4134
4135 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4136 /*
4137 * As PF_VCPU will be used in fault handler, between
4138 * guest_enter and guest_exit should be no uaccess.
4139 */
4140 local_irq_disable();
4141 guest_enter_irqoff();
4142 __disable_cpu_timer_accounting(vcpu);
4143 local_irq_enable();
4144 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4145 memcpy(sie_page->pv_grregs,
4146 vcpu->run->s.regs.gprs,
4147 sizeof(sie_page->pv_grregs));
4148 }
4149 exit_reason = sie64a(vcpu->arch.sie_block,
4150 vcpu->run->s.regs.gprs);
4151 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4152 memcpy(vcpu->run->s.regs.gprs,
4153 sie_page->pv_grregs,
4154 sizeof(sie_page->pv_grregs));
4155 /*
4156 * We're not allowed to inject interrupts on intercepts
4157 * that leave the guest state in an "in-between" state
4158 * where the next SIE entry will do a continuation.
4159 * Fence interrupts in our "internal" PSW.
4160 */
4161 if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4162 vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4163 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4164 }
4165 }
4166 local_irq_disable();
4167 __enable_cpu_timer_accounting(vcpu);
4168 guest_exit_irqoff();
4169 local_irq_enable();
4170 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4171
4172 rc = vcpu_post_run(vcpu, exit_reason);
4173 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4174
4175 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4176 return rc;
4177 }
4178
sync_regs_fmt2(struct kvm_vcpu * vcpu)4179 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4180 {
4181 struct kvm_run *kvm_run = vcpu->run;
4182 struct runtime_instr_cb *riccb;
4183 struct gs_cb *gscb;
4184
4185 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4186 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4187 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4188 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4189 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4190 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4191 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4192 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4193 }
4194 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4195 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4196 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4197 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4198 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4199 kvm_clear_async_pf_completion_queue(vcpu);
4200 }
4201 if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4202 vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4203 vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4204 }
4205 /*
4206 * If userspace sets the riccb (e.g. after migration) to a valid state,
4207 * we should enable RI here instead of doing the lazy enablement.
4208 */
4209 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4210 test_kvm_facility(vcpu->kvm, 64) &&
4211 riccb->v &&
4212 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4213 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4214 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4215 }
4216 /*
4217 * If userspace sets the gscb (e.g. after migration) to non-zero,
4218 * we should enable GS here instead of doing the lazy enablement.
4219 */
4220 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4221 test_kvm_facility(vcpu->kvm, 133) &&
4222 gscb->gssm &&
4223 !vcpu->arch.gs_enabled) {
4224 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4225 vcpu->arch.sie_block->ecb |= ECB_GS;
4226 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4227 vcpu->arch.gs_enabled = 1;
4228 }
4229 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4230 test_kvm_facility(vcpu->kvm, 82)) {
4231 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4232 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4233 }
4234 if (MACHINE_HAS_GS) {
4235 preempt_disable();
4236 __ctl_set_bit(2, 4);
4237 if (current->thread.gs_cb) {
4238 vcpu->arch.host_gscb = current->thread.gs_cb;
4239 save_gs_cb(vcpu->arch.host_gscb);
4240 }
4241 if (vcpu->arch.gs_enabled) {
4242 current->thread.gs_cb = (struct gs_cb *)
4243 &vcpu->run->s.regs.gscb;
4244 restore_gs_cb(current->thread.gs_cb);
4245 }
4246 preempt_enable();
4247 }
4248 /* SIE will load etoken directly from SDNX and therefore kvm_run */
4249 }
4250
sync_regs(struct kvm_vcpu * vcpu)4251 static void sync_regs(struct kvm_vcpu *vcpu)
4252 {
4253 struct kvm_run *kvm_run = vcpu->run;
4254
4255 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4256 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4257 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4258 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4259 /* some control register changes require a tlb flush */
4260 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4261 }
4262 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4263 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4264 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4265 }
4266 save_access_regs(vcpu->arch.host_acrs);
4267 restore_access_regs(vcpu->run->s.regs.acrs);
4268 /* save host (userspace) fprs/vrs */
4269 save_fpu_regs();
4270 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4271 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4272 if (MACHINE_HAS_VX)
4273 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4274 else
4275 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4276 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4277 if (test_fp_ctl(current->thread.fpu.fpc))
4278 /* User space provided an invalid FPC, let's clear it */
4279 current->thread.fpu.fpc = 0;
4280
4281 /* Sync fmt2 only data */
4282 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4283 sync_regs_fmt2(vcpu);
4284 } else {
4285 /*
4286 * In several places we have to modify our internal view to
4287 * not do things that are disallowed by the ultravisor. For
4288 * example we must not inject interrupts after specific exits
4289 * (e.g. 112 prefix page not secure). We do this by turning
4290 * off the machine check, external and I/O interrupt bits
4291 * of our PSW copy. To avoid getting validity intercepts, we
4292 * do only accept the condition code from userspace.
4293 */
4294 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4295 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4296 PSW_MASK_CC;
4297 }
4298
4299 kvm_run->kvm_dirty_regs = 0;
4300 }
4301
store_regs_fmt2(struct kvm_vcpu * vcpu)4302 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4303 {
4304 struct kvm_run *kvm_run = vcpu->run;
4305
4306 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4307 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4308 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4309 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4310 kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4311 if (MACHINE_HAS_GS) {
4312 preempt_disable();
4313 __ctl_set_bit(2, 4);
4314 if (vcpu->arch.gs_enabled)
4315 save_gs_cb(current->thread.gs_cb);
4316 current->thread.gs_cb = vcpu->arch.host_gscb;
4317 restore_gs_cb(vcpu->arch.host_gscb);
4318 if (!vcpu->arch.host_gscb)
4319 __ctl_clear_bit(2, 4);
4320 vcpu->arch.host_gscb = NULL;
4321 preempt_enable();
4322 }
4323 /* SIE will save etoken directly into SDNX and therefore kvm_run */
4324 }
4325
store_regs(struct kvm_vcpu * vcpu)4326 static void store_regs(struct kvm_vcpu *vcpu)
4327 {
4328 struct kvm_run *kvm_run = vcpu->run;
4329
4330 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4331 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4332 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4333 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4334 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4335 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4336 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4337 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4338 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4339 save_access_regs(vcpu->run->s.regs.acrs);
4340 restore_access_regs(vcpu->arch.host_acrs);
4341 /* Save guest register state */
4342 save_fpu_regs();
4343 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4344 /* Restore will be done lazily at return */
4345 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4346 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4347 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4348 store_regs_fmt2(vcpu);
4349 }
4350
kvm_arch_vcpu_ioctl_run(struct kvm_vcpu * vcpu)4351 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4352 {
4353 struct kvm_run *kvm_run = vcpu->run;
4354 int rc;
4355
4356 if (kvm_run->immediate_exit)
4357 return -EINTR;
4358
4359 if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4360 kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4361 return -EINVAL;
4362
4363 vcpu_load(vcpu);
4364
4365 if (guestdbg_exit_pending(vcpu)) {
4366 kvm_s390_prepare_debug_exit(vcpu);
4367 rc = 0;
4368 goto out;
4369 }
4370
4371 kvm_sigset_activate(vcpu);
4372
4373 /*
4374 * no need to check the return value of vcpu_start as it can only have
4375 * an error for protvirt, but protvirt means user cpu state
4376 */
4377 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4378 kvm_s390_vcpu_start(vcpu);
4379 } else if (is_vcpu_stopped(vcpu)) {
4380 pr_err_ratelimited("can't run stopped vcpu %d\n",
4381 vcpu->vcpu_id);
4382 rc = -EINVAL;
4383 goto out;
4384 }
4385
4386 sync_regs(vcpu);
4387 enable_cpu_timer_accounting(vcpu);
4388
4389 might_fault();
4390 rc = __vcpu_run(vcpu);
4391
4392 if (signal_pending(current) && !rc) {
4393 kvm_run->exit_reason = KVM_EXIT_INTR;
4394 rc = -EINTR;
4395 }
4396
4397 if (guestdbg_exit_pending(vcpu) && !rc) {
4398 kvm_s390_prepare_debug_exit(vcpu);
4399 rc = 0;
4400 }
4401
4402 if (rc == -EREMOTE) {
4403 /* userspace support is needed, kvm_run has been prepared */
4404 rc = 0;
4405 }
4406
4407 disable_cpu_timer_accounting(vcpu);
4408 store_regs(vcpu);
4409
4410 kvm_sigset_deactivate(vcpu);
4411
4412 vcpu->stat.exit_userspace++;
4413 out:
4414 vcpu_put(vcpu);
4415 return rc;
4416 }
4417
4418 /*
4419 * store status at address
4420 * we use have two special cases:
4421 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4422 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4423 */
kvm_s390_store_status_unloaded(struct kvm_vcpu * vcpu,unsigned long gpa)4424 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4425 {
4426 unsigned char archmode = 1;
4427 freg_t fprs[NUM_FPRS];
4428 unsigned int px;
4429 u64 clkcomp, cputm;
4430 int rc;
4431
4432 px = kvm_s390_get_prefix(vcpu);
4433 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4434 if (write_guest_abs(vcpu, 163, &archmode, 1))
4435 return -EFAULT;
4436 gpa = 0;
4437 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4438 if (write_guest_real(vcpu, 163, &archmode, 1))
4439 return -EFAULT;
4440 gpa = px;
4441 } else
4442 gpa -= __LC_FPREGS_SAVE_AREA;
4443
4444 /* manually convert vector registers if necessary */
4445 if (MACHINE_HAS_VX) {
4446 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4447 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4448 fprs, 128);
4449 } else {
4450 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4451 vcpu->run->s.regs.fprs, 128);
4452 }
4453 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4454 vcpu->run->s.regs.gprs, 128);
4455 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4456 &vcpu->arch.sie_block->gpsw, 16);
4457 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4458 &px, 4);
4459 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4460 &vcpu->run->s.regs.fpc, 4);
4461 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4462 &vcpu->arch.sie_block->todpr, 4);
4463 cputm = kvm_s390_get_cpu_timer(vcpu);
4464 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4465 &cputm, 8);
4466 clkcomp = vcpu->arch.sie_block->ckc >> 8;
4467 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4468 &clkcomp, 8);
4469 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4470 &vcpu->run->s.regs.acrs, 64);
4471 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4472 &vcpu->arch.sie_block->gcr, 128);
4473 return rc ? -EFAULT : 0;
4474 }
4475
kvm_s390_vcpu_store_status(struct kvm_vcpu * vcpu,unsigned long addr)4476 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4477 {
4478 /*
4479 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4480 * switch in the run ioctl. Let's update our copies before we save
4481 * it into the save area
4482 */
4483 save_fpu_regs();
4484 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4485 save_access_regs(vcpu->run->s.regs.acrs);
4486
4487 return kvm_s390_store_status_unloaded(vcpu, addr);
4488 }
4489
__disable_ibs_on_vcpu(struct kvm_vcpu * vcpu)4490 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4491 {
4492 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4493 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4494 }
4495
__disable_ibs_on_all_vcpus(struct kvm * kvm)4496 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4497 {
4498 unsigned int i;
4499 struct kvm_vcpu *vcpu;
4500
4501 kvm_for_each_vcpu(i, vcpu, kvm) {
4502 __disable_ibs_on_vcpu(vcpu);
4503 }
4504 }
4505
__enable_ibs_on_vcpu(struct kvm_vcpu * vcpu)4506 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4507 {
4508 if (!sclp.has_ibs)
4509 return;
4510 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4511 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4512 }
4513
kvm_s390_vcpu_start(struct kvm_vcpu * vcpu)4514 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4515 {
4516 int i, online_vcpus, r = 0, started_vcpus = 0;
4517
4518 if (!is_vcpu_stopped(vcpu))
4519 return 0;
4520
4521 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4522 /* Only one cpu at a time may enter/leave the STOPPED state. */
4523 spin_lock(&vcpu->kvm->arch.start_stop_lock);
4524 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4525
4526 /* Let's tell the UV that we want to change into the operating state */
4527 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4528 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4529 if (r) {
4530 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4531 return r;
4532 }
4533 }
4534
4535 for (i = 0; i < online_vcpus; i++) {
4536 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4537 started_vcpus++;
4538 }
4539
4540 if (started_vcpus == 0) {
4541 /* we're the only active VCPU -> speed it up */
4542 __enable_ibs_on_vcpu(vcpu);
4543 } else if (started_vcpus == 1) {
4544 /*
4545 * As we are starting a second VCPU, we have to disable
4546 * the IBS facility on all VCPUs to remove potentially
4547 * oustanding ENABLE requests.
4548 */
4549 __disable_ibs_on_all_vcpus(vcpu->kvm);
4550 }
4551
4552 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4553 /*
4554 * The real PSW might have changed due to a RESTART interpreted by the
4555 * ultravisor. We block all interrupts and let the next sie exit
4556 * refresh our view.
4557 */
4558 if (kvm_s390_pv_cpu_is_protected(vcpu))
4559 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4560 /*
4561 * Another VCPU might have used IBS while we were offline.
4562 * Let's play safe and flush the VCPU at startup.
4563 */
4564 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4565 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4566 return 0;
4567 }
4568
kvm_s390_vcpu_stop(struct kvm_vcpu * vcpu)4569 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4570 {
4571 int i, online_vcpus, r = 0, started_vcpus = 0;
4572 struct kvm_vcpu *started_vcpu = NULL;
4573
4574 if (is_vcpu_stopped(vcpu))
4575 return 0;
4576
4577 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4578 /* Only one cpu at a time may enter/leave the STOPPED state. */
4579 spin_lock(&vcpu->kvm->arch.start_stop_lock);
4580 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4581
4582 /* Let's tell the UV that we want to change into the stopped state */
4583 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4584 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4585 if (r) {
4586 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4587 return r;
4588 }
4589 }
4590
4591 /*
4592 * Set the VCPU to STOPPED and THEN clear the interrupt flag,
4593 * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
4594 * have been fully processed. This will ensure that the VCPU
4595 * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
4596 */
4597 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4598 kvm_s390_clear_stop_irq(vcpu);
4599
4600 __disable_ibs_on_vcpu(vcpu);
4601
4602 for (i = 0; i < online_vcpus; i++) {
4603 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4604 started_vcpus++;
4605 started_vcpu = vcpu->kvm->vcpus[i];
4606 }
4607 }
4608
4609 if (started_vcpus == 1) {
4610 /*
4611 * As we only have one VCPU left, we want to enable the
4612 * IBS facility for that VCPU to speed it up.
4613 */
4614 __enable_ibs_on_vcpu(started_vcpu);
4615 }
4616
4617 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4618 return 0;
4619 }
4620
kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu * vcpu,struct kvm_enable_cap * cap)4621 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4622 struct kvm_enable_cap *cap)
4623 {
4624 int r;
4625
4626 if (cap->flags)
4627 return -EINVAL;
4628
4629 switch (cap->cap) {
4630 case KVM_CAP_S390_CSS_SUPPORT:
4631 if (!vcpu->kvm->arch.css_support) {
4632 vcpu->kvm->arch.css_support = 1;
4633 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4634 trace_kvm_s390_enable_css(vcpu->kvm);
4635 }
4636 r = 0;
4637 break;
4638 default:
4639 r = -EINVAL;
4640 break;
4641 }
4642 return r;
4643 }
4644
kvm_s390_guest_sida_op(struct kvm_vcpu * vcpu,struct kvm_s390_mem_op * mop)4645 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4646 struct kvm_s390_mem_op *mop)
4647 {
4648 void __user *uaddr = (void __user *)mop->buf;
4649 int r = 0;
4650
4651 if (mop->flags || !mop->size)
4652 return -EINVAL;
4653 if (mop->size + mop->sida_offset < mop->size)
4654 return -EINVAL;
4655 if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4656 return -E2BIG;
4657 if (!kvm_s390_pv_cpu_is_protected(vcpu))
4658 return -EINVAL;
4659
4660 switch (mop->op) {
4661 case KVM_S390_MEMOP_SIDA_READ:
4662 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4663 mop->sida_offset), mop->size))
4664 r = -EFAULT;
4665
4666 break;
4667 case KVM_S390_MEMOP_SIDA_WRITE:
4668 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4669 mop->sida_offset), uaddr, mop->size))
4670 r = -EFAULT;
4671 break;
4672 }
4673 return r;
4674 }
kvm_s390_guest_mem_op(struct kvm_vcpu * vcpu,struct kvm_s390_mem_op * mop)4675 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4676 struct kvm_s390_mem_op *mop)
4677 {
4678 void __user *uaddr = (void __user *)mop->buf;
4679 void *tmpbuf = NULL;
4680 int r = 0;
4681 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4682 | KVM_S390_MEMOP_F_CHECK_ONLY;
4683
4684 if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4685 return -EINVAL;
4686
4687 if (mop->size > MEM_OP_MAX_SIZE)
4688 return -E2BIG;
4689
4690 if (kvm_s390_pv_cpu_is_protected(vcpu))
4691 return -EINVAL;
4692
4693 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4694 tmpbuf = vmalloc(mop->size);
4695 if (!tmpbuf)
4696 return -ENOMEM;
4697 }
4698
4699 switch (mop->op) {
4700 case KVM_S390_MEMOP_LOGICAL_READ:
4701 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4702 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4703 mop->size, GACC_FETCH);
4704 break;
4705 }
4706 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4707 if (r == 0) {
4708 if (copy_to_user(uaddr, tmpbuf, mop->size))
4709 r = -EFAULT;
4710 }
4711 break;
4712 case KVM_S390_MEMOP_LOGICAL_WRITE:
4713 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4714 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4715 mop->size, GACC_STORE);
4716 break;
4717 }
4718 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4719 r = -EFAULT;
4720 break;
4721 }
4722 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4723 break;
4724 }
4725
4726 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4727 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4728
4729 vfree(tmpbuf);
4730 return r;
4731 }
4732
kvm_s390_guest_memsida_op(struct kvm_vcpu * vcpu,struct kvm_s390_mem_op * mop)4733 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4734 struct kvm_s390_mem_op *mop)
4735 {
4736 int r, srcu_idx;
4737
4738 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4739
4740 switch (mop->op) {
4741 case KVM_S390_MEMOP_LOGICAL_READ:
4742 case KVM_S390_MEMOP_LOGICAL_WRITE:
4743 r = kvm_s390_guest_mem_op(vcpu, mop);
4744 break;
4745 case KVM_S390_MEMOP_SIDA_READ:
4746 case KVM_S390_MEMOP_SIDA_WRITE:
4747 /* we are locked against sida going away by the vcpu->mutex */
4748 r = kvm_s390_guest_sida_op(vcpu, mop);
4749 break;
4750 default:
4751 r = -EINVAL;
4752 }
4753
4754 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4755 return r;
4756 }
4757
kvm_arch_vcpu_async_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)4758 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4759 unsigned int ioctl, unsigned long arg)
4760 {
4761 struct kvm_vcpu *vcpu = filp->private_data;
4762 void __user *argp = (void __user *)arg;
4763
4764 switch (ioctl) {
4765 case KVM_S390_IRQ: {
4766 struct kvm_s390_irq s390irq;
4767
4768 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4769 return -EFAULT;
4770 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4771 }
4772 case KVM_S390_INTERRUPT: {
4773 struct kvm_s390_interrupt s390int;
4774 struct kvm_s390_irq s390irq = {};
4775
4776 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4777 return -EFAULT;
4778 if (s390int_to_s390irq(&s390int, &s390irq))
4779 return -EINVAL;
4780 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4781 }
4782 }
4783 return -ENOIOCTLCMD;
4784 }
4785
kvm_arch_vcpu_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)4786 long kvm_arch_vcpu_ioctl(struct file *filp,
4787 unsigned int ioctl, unsigned long arg)
4788 {
4789 struct kvm_vcpu *vcpu = filp->private_data;
4790 void __user *argp = (void __user *)arg;
4791 int idx;
4792 long r;
4793 u16 rc, rrc;
4794
4795 vcpu_load(vcpu);
4796
4797 switch (ioctl) {
4798 case KVM_S390_STORE_STATUS:
4799 idx = srcu_read_lock(&vcpu->kvm->srcu);
4800 r = kvm_s390_store_status_unloaded(vcpu, arg);
4801 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4802 break;
4803 case KVM_S390_SET_INITIAL_PSW: {
4804 psw_t psw;
4805
4806 r = -EFAULT;
4807 if (copy_from_user(&psw, argp, sizeof(psw)))
4808 break;
4809 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4810 break;
4811 }
4812 case KVM_S390_CLEAR_RESET:
4813 r = 0;
4814 kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4815 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4816 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4817 UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4818 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4819 rc, rrc);
4820 }
4821 break;
4822 case KVM_S390_INITIAL_RESET:
4823 r = 0;
4824 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4825 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4826 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4827 UVC_CMD_CPU_RESET_INITIAL,
4828 &rc, &rrc);
4829 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4830 rc, rrc);
4831 }
4832 break;
4833 case KVM_S390_NORMAL_RESET:
4834 r = 0;
4835 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4836 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4837 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4838 UVC_CMD_CPU_RESET, &rc, &rrc);
4839 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4840 rc, rrc);
4841 }
4842 break;
4843 case KVM_SET_ONE_REG:
4844 case KVM_GET_ONE_REG: {
4845 struct kvm_one_reg reg;
4846 r = -EINVAL;
4847 if (kvm_s390_pv_cpu_is_protected(vcpu))
4848 break;
4849 r = -EFAULT;
4850 if (copy_from_user(®, argp, sizeof(reg)))
4851 break;
4852 if (ioctl == KVM_SET_ONE_REG)
4853 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
4854 else
4855 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
4856 break;
4857 }
4858 #ifdef CONFIG_KVM_S390_UCONTROL
4859 case KVM_S390_UCAS_MAP: {
4860 struct kvm_s390_ucas_mapping ucasmap;
4861
4862 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4863 r = -EFAULT;
4864 break;
4865 }
4866
4867 if (!kvm_is_ucontrol(vcpu->kvm)) {
4868 r = -EINVAL;
4869 break;
4870 }
4871
4872 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4873 ucasmap.vcpu_addr, ucasmap.length);
4874 break;
4875 }
4876 case KVM_S390_UCAS_UNMAP: {
4877 struct kvm_s390_ucas_mapping ucasmap;
4878
4879 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4880 r = -EFAULT;
4881 break;
4882 }
4883
4884 if (!kvm_is_ucontrol(vcpu->kvm)) {
4885 r = -EINVAL;
4886 break;
4887 }
4888
4889 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4890 ucasmap.length);
4891 break;
4892 }
4893 #endif
4894 case KVM_S390_VCPU_FAULT: {
4895 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4896 break;
4897 }
4898 case KVM_ENABLE_CAP:
4899 {
4900 struct kvm_enable_cap cap;
4901 r = -EFAULT;
4902 if (copy_from_user(&cap, argp, sizeof(cap)))
4903 break;
4904 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4905 break;
4906 }
4907 case KVM_S390_MEM_OP: {
4908 struct kvm_s390_mem_op mem_op;
4909
4910 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4911 r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4912 else
4913 r = -EFAULT;
4914 break;
4915 }
4916 case KVM_S390_SET_IRQ_STATE: {
4917 struct kvm_s390_irq_state irq_state;
4918
4919 r = -EFAULT;
4920 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4921 break;
4922 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4923 irq_state.len == 0 ||
4924 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4925 r = -EINVAL;
4926 break;
4927 }
4928 /* do not use irq_state.flags, it will break old QEMUs */
4929 r = kvm_s390_set_irq_state(vcpu,
4930 (void __user *) irq_state.buf,
4931 irq_state.len);
4932 break;
4933 }
4934 case KVM_S390_GET_IRQ_STATE: {
4935 struct kvm_s390_irq_state irq_state;
4936
4937 r = -EFAULT;
4938 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4939 break;
4940 if (irq_state.len == 0) {
4941 r = -EINVAL;
4942 break;
4943 }
4944 /* do not use irq_state.flags, it will break old QEMUs */
4945 r = kvm_s390_get_irq_state(vcpu,
4946 (__u8 __user *) irq_state.buf,
4947 irq_state.len);
4948 break;
4949 }
4950 default:
4951 r = -ENOTTY;
4952 }
4953
4954 vcpu_put(vcpu);
4955 return r;
4956 }
4957
kvm_arch_vcpu_fault(struct kvm_vcpu * vcpu,struct vm_fault * vmf)4958 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4959 {
4960 #ifdef CONFIG_KVM_S390_UCONTROL
4961 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4962 && (kvm_is_ucontrol(vcpu->kvm))) {
4963 vmf->page = virt_to_page(vcpu->arch.sie_block);
4964 get_page(vmf->page);
4965 return 0;
4966 }
4967 #endif
4968 return VM_FAULT_SIGBUS;
4969 }
4970
4971 /* Section: memory related */
kvm_arch_prepare_memory_region(struct kvm * kvm,struct kvm_memory_slot * memslot,const struct kvm_userspace_memory_region * mem,enum kvm_mr_change change)4972 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4973 struct kvm_memory_slot *memslot,
4974 const struct kvm_userspace_memory_region *mem,
4975 enum kvm_mr_change change)
4976 {
4977 /* A few sanity checks. We can have memory slots which have to be
4978 located/ended at a segment boundary (1MB). The memory in userland is
4979 ok to be fragmented into various different vmas. It is okay to mmap()
4980 and munmap() stuff in this slot after doing this call at any time */
4981
4982 if (mem->userspace_addr & 0xffffful)
4983 return -EINVAL;
4984
4985 if (mem->memory_size & 0xffffful)
4986 return -EINVAL;
4987
4988 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4989 return -EINVAL;
4990
4991 /* When we are protected, we should not change the memory slots */
4992 if (kvm_s390_pv_get_handle(kvm))
4993 return -EINVAL;
4994 return 0;
4995 }
4996
kvm_arch_commit_memory_region(struct kvm * kvm,const struct kvm_userspace_memory_region * mem,struct kvm_memory_slot * old,const struct kvm_memory_slot * new,enum kvm_mr_change change)4997 void kvm_arch_commit_memory_region(struct kvm *kvm,
4998 const struct kvm_userspace_memory_region *mem,
4999 struct kvm_memory_slot *old,
5000 const struct kvm_memory_slot *new,
5001 enum kvm_mr_change change)
5002 {
5003 int rc = 0;
5004
5005 switch (change) {
5006 case KVM_MR_DELETE:
5007 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5008 old->npages * PAGE_SIZE);
5009 break;
5010 case KVM_MR_MOVE:
5011 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5012 old->npages * PAGE_SIZE);
5013 if (rc)
5014 break;
5015 fallthrough;
5016 case KVM_MR_CREATE:
5017 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
5018 mem->guest_phys_addr, mem->memory_size);
5019 break;
5020 case KVM_MR_FLAGS_ONLY:
5021 break;
5022 default:
5023 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5024 }
5025 if (rc)
5026 pr_warn("failed to commit memory region\n");
5027 return;
5028 }
5029
nonhyp_mask(int i)5030 static inline unsigned long nonhyp_mask(int i)
5031 {
5032 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5033
5034 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5035 }
5036
kvm_arch_vcpu_block_finish(struct kvm_vcpu * vcpu)5037 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
5038 {
5039 vcpu->valid_wakeup = false;
5040 }
5041
kvm_s390_init(void)5042 static int __init kvm_s390_init(void)
5043 {
5044 int i;
5045
5046 if (!sclp.has_sief2) {
5047 pr_info("SIE is not available\n");
5048 return -ENODEV;
5049 }
5050
5051 if (nested && hpage) {
5052 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5053 return -EINVAL;
5054 }
5055
5056 for (i = 0; i < 16; i++)
5057 kvm_s390_fac_base[i] |=
5058 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
5059
5060 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5061 }
5062
kvm_s390_exit(void)5063 static void __exit kvm_s390_exit(void)
5064 {
5065 kvm_exit();
5066 }
5067
5068 module_init(kvm_s390_init);
5069 module_exit(kvm_s390_exit);
5070
5071 /*
5072 * Enable autoloading of the kvm module.
5073 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5074 * since x86 takes a different approach.
5075 */
5076 #include <linux/miscdevice.h>
5077 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5078 MODULE_ALIAS("devname:kvm");
5079