1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * hosting IBM Z kernel virtual machines (s390x)
4 *
5 * Copyright IBM Corp. 2008, 2020
6 *
7 * Author(s): Carsten Otte <cotte@de.ibm.com>
8 * Christian Borntraeger <borntraeger@de.ibm.com>
9 * Heiko Carstens <heiko.carstens@de.ibm.com>
10 * Christian Ehrhardt <ehrhardt@de.ibm.com>
11 * Jason J. Herne <jjherne@us.ibm.com>
12 */
13
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
16
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 #include <linux/pgtable.h>
35
36 #include <asm/asm-offsets.h>
37 #include <asm/lowcore.h>
38 #include <asm/stp.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include <asm/ap.h>
47 #include <asm/uv.h>
48 #include <asm/fpu/api.h>
49 #include "kvm-s390.h"
50 #include "gaccess.h"
51
52 #define CREATE_TRACE_POINTS
53 #include "trace.h"
54 #include "trace-s390.h"
55
56 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
57 #define LOCAL_IRQS 32
58 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
59 (KVM_MAX_VCPUS + LOCAL_IRQS))
60
61 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
62 KVM_GENERIC_VM_STATS(),
63 STATS_DESC_COUNTER(VM, inject_io),
64 STATS_DESC_COUNTER(VM, inject_float_mchk),
65 STATS_DESC_COUNTER(VM, inject_pfault_done),
66 STATS_DESC_COUNTER(VM, inject_service_signal),
67 STATS_DESC_COUNTER(VM, inject_virtio)
68 };
69
70 const struct kvm_stats_header kvm_vm_stats_header = {
71 .name_size = KVM_STATS_NAME_SIZE,
72 .num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
73 .id_offset = sizeof(struct kvm_stats_header),
74 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
75 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
76 sizeof(kvm_vm_stats_desc),
77 };
78
79 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
80 KVM_GENERIC_VCPU_STATS(),
81 STATS_DESC_COUNTER(VCPU, exit_userspace),
82 STATS_DESC_COUNTER(VCPU, exit_null),
83 STATS_DESC_COUNTER(VCPU, exit_external_request),
84 STATS_DESC_COUNTER(VCPU, exit_io_request),
85 STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
86 STATS_DESC_COUNTER(VCPU, exit_stop_request),
87 STATS_DESC_COUNTER(VCPU, exit_validity),
88 STATS_DESC_COUNTER(VCPU, exit_instruction),
89 STATS_DESC_COUNTER(VCPU, exit_pei),
90 STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
91 STATS_DESC_COUNTER(VCPU, instruction_lctl),
92 STATS_DESC_COUNTER(VCPU, instruction_lctlg),
93 STATS_DESC_COUNTER(VCPU, instruction_stctl),
94 STATS_DESC_COUNTER(VCPU, instruction_stctg),
95 STATS_DESC_COUNTER(VCPU, exit_program_interruption),
96 STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
97 STATS_DESC_COUNTER(VCPU, exit_operation_exception),
98 STATS_DESC_COUNTER(VCPU, deliver_ckc),
99 STATS_DESC_COUNTER(VCPU, deliver_cputm),
100 STATS_DESC_COUNTER(VCPU, deliver_external_call),
101 STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
102 STATS_DESC_COUNTER(VCPU, deliver_service_signal),
103 STATS_DESC_COUNTER(VCPU, deliver_virtio),
104 STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
105 STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
106 STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
107 STATS_DESC_COUNTER(VCPU, deliver_program),
108 STATS_DESC_COUNTER(VCPU, deliver_io),
109 STATS_DESC_COUNTER(VCPU, deliver_machine_check),
110 STATS_DESC_COUNTER(VCPU, exit_wait_state),
111 STATS_DESC_COUNTER(VCPU, inject_ckc),
112 STATS_DESC_COUNTER(VCPU, inject_cputm),
113 STATS_DESC_COUNTER(VCPU, inject_external_call),
114 STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
115 STATS_DESC_COUNTER(VCPU, inject_mchk),
116 STATS_DESC_COUNTER(VCPU, inject_pfault_init),
117 STATS_DESC_COUNTER(VCPU, inject_program),
118 STATS_DESC_COUNTER(VCPU, inject_restart),
119 STATS_DESC_COUNTER(VCPU, inject_set_prefix),
120 STATS_DESC_COUNTER(VCPU, inject_stop_signal),
121 STATS_DESC_COUNTER(VCPU, instruction_epsw),
122 STATS_DESC_COUNTER(VCPU, instruction_gs),
123 STATS_DESC_COUNTER(VCPU, instruction_io_other),
124 STATS_DESC_COUNTER(VCPU, instruction_lpsw),
125 STATS_DESC_COUNTER(VCPU, instruction_lpswe),
126 STATS_DESC_COUNTER(VCPU, instruction_pfmf),
127 STATS_DESC_COUNTER(VCPU, instruction_ptff),
128 STATS_DESC_COUNTER(VCPU, instruction_sck),
129 STATS_DESC_COUNTER(VCPU, instruction_sckpf),
130 STATS_DESC_COUNTER(VCPU, instruction_stidp),
131 STATS_DESC_COUNTER(VCPU, instruction_spx),
132 STATS_DESC_COUNTER(VCPU, instruction_stpx),
133 STATS_DESC_COUNTER(VCPU, instruction_stap),
134 STATS_DESC_COUNTER(VCPU, instruction_iske),
135 STATS_DESC_COUNTER(VCPU, instruction_ri),
136 STATS_DESC_COUNTER(VCPU, instruction_rrbe),
137 STATS_DESC_COUNTER(VCPU, instruction_sske),
138 STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
139 STATS_DESC_COUNTER(VCPU, instruction_stsi),
140 STATS_DESC_COUNTER(VCPU, instruction_stfl),
141 STATS_DESC_COUNTER(VCPU, instruction_tb),
142 STATS_DESC_COUNTER(VCPU, instruction_tpi),
143 STATS_DESC_COUNTER(VCPU, instruction_tprot),
144 STATS_DESC_COUNTER(VCPU, instruction_tsch),
145 STATS_DESC_COUNTER(VCPU, instruction_sie),
146 STATS_DESC_COUNTER(VCPU, instruction_essa),
147 STATS_DESC_COUNTER(VCPU, instruction_sthyi),
148 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
149 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
150 STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
151 STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
152 STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
153 STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
154 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
155 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
156 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
157 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
158 STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
159 STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
160 STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
161 STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
162 STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
163 STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
164 STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
165 STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
166 STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
167 STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
168 STATS_DESC_COUNTER(VCPU, diag_9c_forward),
169 STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
170 STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
171 STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
172 STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
173 STATS_DESC_COUNTER(VCPU, pfault_sync)
174 };
175
176 const struct kvm_stats_header kvm_vcpu_stats_header = {
177 .name_size = KVM_STATS_NAME_SIZE,
178 .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
179 .id_offset = sizeof(struct kvm_stats_header),
180 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
181 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
182 sizeof(kvm_vcpu_stats_desc),
183 };
184
185 /* allow nested virtualization in KVM (if enabled by user space) */
186 static int nested;
187 module_param(nested, int, S_IRUGO);
188 MODULE_PARM_DESC(nested, "Nested virtualization support");
189
190 /* allow 1m huge page guest backing, if !nested */
191 static int hpage;
192 module_param(hpage, int, 0444);
193 MODULE_PARM_DESC(hpage, "1m huge page backing support");
194
195 /* maximum percentage of steal time for polling. >100 is treated like 100 */
196 static u8 halt_poll_max_steal = 10;
197 module_param(halt_poll_max_steal, byte, 0644);
198 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
199
200 /* if set to true, the GISA will be initialized and used if available */
201 static bool use_gisa = true;
202 module_param(use_gisa, bool, 0644);
203 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
204
205 /* maximum diag9c forwarding per second */
206 unsigned int diag9c_forwarding_hz;
207 module_param(diag9c_forwarding_hz, uint, 0644);
208 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
209
210 /*
211 * For now we handle at most 16 double words as this is what the s390 base
212 * kernel handles and stores in the prefix page. If we ever need to go beyond
213 * this, this requires changes to code, but the external uapi can stay.
214 */
215 #define SIZE_INTERNAL 16
216
217 /*
218 * Base feature mask that defines default mask for facilities. Consists of the
219 * defines in FACILITIES_KVM and the non-hypervisor managed bits.
220 */
221 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
222 /*
223 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
224 * and defines the facilities that can be enabled via a cpu model.
225 */
226 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
227
kvm_s390_fac_size(void)228 static unsigned long kvm_s390_fac_size(void)
229 {
230 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
231 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
232 BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
233 sizeof(stfle_fac_list));
234
235 return SIZE_INTERNAL;
236 }
237
238 /* available cpu features supported by kvm */
239 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
240 /* available subfunctions indicated via query / "test bit" */
241 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
242
243 static struct gmap_notifier gmap_notifier;
244 static struct gmap_notifier vsie_gmap_notifier;
245 debug_info_t *kvm_s390_dbf;
246 debug_info_t *kvm_s390_dbf_uv;
247
248 /* Section: not file related */
kvm_arch_hardware_enable(void)249 int kvm_arch_hardware_enable(void)
250 {
251 /* every s390 is virtualization enabled ;-) */
252 return 0;
253 }
254
kvm_arch_check_processor_compat(void * opaque)255 int kvm_arch_check_processor_compat(void *opaque)
256 {
257 return 0;
258 }
259
260 /* forward declarations */
261 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
262 unsigned long end);
263 static int sca_switch_to_extended(struct kvm *kvm);
264
kvm_clock_sync_scb(struct kvm_s390_sie_block * scb,u64 delta)265 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
266 {
267 u8 delta_idx = 0;
268
269 /*
270 * The TOD jumps by delta, we have to compensate this by adding
271 * -delta to the epoch.
272 */
273 delta = -delta;
274
275 /* sign-extension - we're adding to signed values below */
276 if ((s64)delta < 0)
277 delta_idx = -1;
278
279 scb->epoch += delta;
280 if (scb->ecd & ECD_MEF) {
281 scb->epdx += delta_idx;
282 if (scb->epoch < delta)
283 scb->epdx += 1;
284 }
285 }
286
287 /*
288 * This callback is executed during stop_machine(). All CPUs are therefore
289 * temporarily stopped. In order not to change guest behavior, we have to
290 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
291 * so a CPU won't be stopped while calculating with the epoch.
292 */
kvm_clock_sync(struct notifier_block * notifier,unsigned long val,void * v)293 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
294 void *v)
295 {
296 struct kvm *kvm;
297 struct kvm_vcpu *vcpu;
298 int i;
299 unsigned long long *delta = v;
300
301 list_for_each_entry(kvm, &vm_list, vm_list) {
302 kvm_for_each_vcpu(i, vcpu, kvm) {
303 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
304 if (i == 0) {
305 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
306 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
307 }
308 if (vcpu->arch.cputm_enabled)
309 vcpu->arch.cputm_start += *delta;
310 if (vcpu->arch.vsie_block)
311 kvm_clock_sync_scb(vcpu->arch.vsie_block,
312 *delta);
313 }
314 }
315 return NOTIFY_OK;
316 }
317
318 static struct notifier_block kvm_clock_notifier = {
319 .notifier_call = kvm_clock_sync,
320 };
321
kvm_arch_hardware_setup(void * opaque)322 int kvm_arch_hardware_setup(void *opaque)
323 {
324 gmap_notifier.notifier_call = kvm_gmap_notifier;
325 gmap_register_pte_notifier(&gmap_notifier);
326 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
327 gmap_register_pte_notifier(&vsie_gmap_notifier);
328 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
329 &kvm_clock_notifier);
330 return 0;
331 }
332
kvm_arch_hardware_unsetup(void)333 void kvm_arch_hardware_unsetup(void)
334 {
335 gmap_unregister_pte_notifier(&gmap_notifier);
336 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
337 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
338 &kvm_clock_notifier);
339 }
340
allow_cpu_feat(unsigned long nr)341 static void allow_cpu_feat(unsigned long nr)
342 {
343 set_bit_inv(nr, kvm_s390_available_cpu_feat);
344 }
345
plo_test_bit(unsigned char nr)346 static inline int plo_test_bit(unsigned char nr)
347 {
348 unsigned long function = (unsigned long)nr | 0x100;
349 int cc;
350
351 asm volatile(
352 " lgr 0,%[function]\n"
353 /* Parameter registers are ignored for "test bit" */
354 " plo 0,0,0,0(0)\n"
355 " ipm %0\n"
356 " srl %0,28\n"
357 : "=d" (cc)
358 : [function] "d" (function)
359 : "cc", "0");
360 return cc == 0;
361 }
362
__insn32_query(unsigned int opcode,u8 * query)363 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
364 {
365 asm volatile(
366 " lghi 0,0\n"
367 " lgr 1,%[query]\n"
368 /* Parameter registers are ignored */
369 " .insn rrf,%[opc] << 16,2,4,6,0\n"
370 :
371 : [query] "d" ((unsigned long)query), [opc] "i" (opcode)
372 : "cc", "memory", "0", "1");
373 }
374
375 #define INSN_SORTL 0xb938
376 #define INSN_DFLTCC 0xb939
377
kvm_s390_cpu_feat_init(void)378 static void kvm_s390_cpu_feat_init(void)
379 {
380 int i;
381
382 for (i = 0; i < 256; ++i) {
383 if (plo_test_bit(i))
384 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
385 }
386
387 if (test_facility(28)) /* TOD-clock steering */
388 ptff(kvm_s390_available_subfunc.ptff,
389 sizeof(kvm_s390_available_subfunc.ptff),
390 PTFF_QAF);
391
392 if (test_facility(17)) { /* MSA */
393 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
394 kvm_s390_available_subfunc.kmac);
395 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
396 kvm_s390_available_subfunc.kmc);
397 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
398 kvm_s390_available_subfunc.km);
399 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
400 kvm_s390_available_subfunc.kimd);
401 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
402 kvm_s390_available_subfunc.klmd);
403 }
404 if (test_facility(76)) /* MSA3 */
405 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
406 kvm_s390_available_subfunc.pckmo);
407 if (test_facility(77)) { /* MSA4 */
408 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
409 kvm_s390_available_subfunc.kmctr);
410 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
411 kvm_s390_available_subfunc.kmf);
412 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
413 kvm_s390_available_subfunc.kmo);
414 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
415 kvm_s390_available_subfunc.pcc);
416 }
417 if (test_facility(57)) /* MSA5 */
418 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
419 kvm_s390_available_subfunc.ppno);
420
421 if (test_facility(146)) /* MSA8 */
422 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
423 kvm_s390_available_subfunc.kma);
424
425 if (test_facility(155)) /* MSA9 */
426 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
427 kvm_s390_available_subfunc.kdsa);
428
429 if (test_facility(150)) /* SORTL */
430 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
431
432 if (test_facility(151)) /* DFLTCC */
433 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
434
435 if (MACHINE_HAS_ESOP)
436 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
437 /*
438 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
439 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
440 */
441 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
442 !test_facility(3) || !nested)
443 return;
444 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
445 if (sclp.has_64bscao)
446 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
447 if (sclp.has_siif)
448 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
449 if (sclp.has_gpere)
450 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
451 if (sclp.has_gsls)
452 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
453 if (sclp.has_ib)
454 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
455 if (sclp.has_cei)
456 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
457 if (sclp.has_ibs)
458 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
459 if (sclp.has_kss)
460 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
461 /*
462 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
463 * all skey handling functions read/set the skey from the PGSTE
464 * instead of the real storage key.
465 *
466 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
467 * pages being detected as preserved although they are resident.
468 *
469 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
470 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
471 *
472 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
473 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
474 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
475 *
476 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
477 * cannot easily shadow the SCA because of the ipte lock.
478 */
479 }
480
kvm_arch_init(void * opaque)481 int kvm_arch_init(void *opaque)
482 {
483 int rc = -ENOMEM;
484
485 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
486 if (!kvm_s390_dbf)
487 return -ENOMEM;
488
489 kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
490 if (!kvm_s390_dbf_uv)
491 goto out;
492
493 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
494 debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
495 goto out;
496
497 kvm_s390_cpu_feat_init();
498
499 /* Register floating interrupt controller interface. */
500 rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
501 if (rc) {
502 pr_err("A FLIC registration call failed with rc=%d\n", rc);
503 goto out;
504 }
505
506 rc = kvm_s390_gib_init(GAL_ISC);
507 if (rc)
508 goto out;
509
510 return 0;
511
512 out:
513 kvm_arch_exit();
514 return rc;
515 }
516
kvm_arch_exit(void)517 void kvm_arch_exit(void)
518 {
519 kvm_s390_gib_destroy();
520 debug_unregister(kvm_s390_dbf);
521 debug_unregister(kvm_s390_dbf_uv);
522 }
523
524 /* Section: device related */
kvm_arch_dev_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)525 long kvm_arch_dev_ioctl(struct file *filp,
526 unsigned int ioctl, unsigned long arg)
527 {
528 if (ioctl == KVM_S390_ENABLE_SIE)
529 return s390_enable_sie();
530 return -EINVAL;
531 }
532
kvm_vm_ioctl_check_extension(struct kvm * kvm,long ext)533 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
534 {
535 int r;
536
537 switch (ext) {
538 case KVM_CAP_S390_PSW:
539 case KVM_CAP_S390_GMAP:
540 case KVM_CAP_SYNC_MMU:
541 #ifdef CONFIG_KVM_S390_UCONTROL
542 case KVM_CAP_S390_UCONTROL:
543 #endif
544 case KVM_CAP_ASYNC_PF:
545 case KVM_CAP_SYNC_REGS:
546 case KVM_CAP_ONE_REG:
547 case KVM_CAP_ENABLE_CAP:
548 case KVM_CAP_S390_CSS_SUPPORT:
549 case KVM_CAP_IOEVENTFD:
550 case KVM_CAP_DEVICE_CTRL:
551 case KVM_CAP_S390_IRQCHIP:
552 case KVM_CAP_VM_ATTRIBUTES:
553 case KVM_CAP_MP_STATE:
554 case KVM_CAP_IMMEDIATE_EXIT:
555 case KVM_CAP_S390_INJECT_IRQ:
556 case KVM_CAP_S390_USER_SIGP:
557 case KVM_CAP_S390_USER_STSI:
558 case KVM_CAP_S390_SKEYS:
559 case KVM_CAP_S390_IRQ_STATE:
560 case KVM_CAP_S390_USER_INSTR0:
561 case KVM_CAP_S390_CMMA_MIGRATION:
562 case KVM_CAP_S390_AIS:
563 case KVM_CAP_S390_AIS_MIGRATION:
564 case KVM_CAP_S390_VCPU_RESETS:
565 case KVM_CAP_SET_GUEST_DEBUG:
566 case KVM_CAP_S390_DIAG318:
567 r = 1;
568 break;
569 case KVM_CAP_SET_GUEST_DEBUG2:
570 r = KVM_GUESTDBG_VALID_MASK;
571 break;
572 case KVM_CAP_S390_HPAGE_1M:
573 r = 0;
574 if (hpage && !kvm_is_ucontrol(kvm))
575 r = 1;
576 break;
577 case KVM_CAP_S390_MEM_OP:
578 r = MEM_OP_MAX_SIZE;
579 break;
580 case KVM_CAP_NR_VCPUS:
581 case KVM_CAP_MAX_VCPUS:
582 case KVM_CAP_MAX_VCPU_ID:
583 r = KVM_S390_BSCA_CPU_SLOTS;
584 if (!kvm_s390_use_sca_entries())
585 r = KVM_MAX_VCPUS;
586 else if (sclp.has_esca && sclp.has_64bscao)
587 r = KVM_S390_ESCA_CPU_SLOTS;
588 break;
589 case KVM_CAP_S390_COW:
590 r = MACHINE_HAS_ESOP;
591 break;
592 case KVM_CAP_S390_VECTOR_REGISTERS:
593 r = MACHINE_HAS_VX;
594 break;
595 case KVM_CAP_S390_RI:
596 r = test_facility(64);
597 break;
598 case KVM_CAP_S390_GS:
599 r = test_facility(133);
600 break;
601 case KVM_CAP_S390_BPB:
602 r = test_facility(82);
603 break;
604 case KVM_CAP_S390_PROTECTED:
605 r = is_prot_virt_host();
606 break;
607 default:
608 r = 0;
609 }
610 return r;
611 }
612
kvm_arch_sync_dirty_log(struct kvm * kvm,struct kvm_memory_slot * memslot)613 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
614 {
615 int i;
616 gfn_t cur_gfn, last_gfn;
617 unsigned long gaddr, vmaddr;
618 struct gmap *gmap = kvm->arch.gmap;
619 DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
620
621 /* Loop over all guest segments */
622 cur_gfn = memslot->base_gfn;
623 last_gfn = memslot->base_gfn + memslot->npages;
624 for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
625 gaddr = gfn_to_gpa(cur_gfn);
626 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
627 if (kvm_is_error_hva(vmaddr))
628 continue;
629
630 bitmap_zero(bitmap, _PAGE_ENTRIES);
631 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
632 for (i = 0; i < _PAGE_ENTRIES; i++) {
633 if (test_bit(i, bitmap))
634 mark_page_dirty(kvm, cur_gfn + i);
635 }
636
637 if (fatal_signal_pending(current))
638 return;
639 cond_resched();
640 }
641 }
642
643 /* Section: vm related */
644 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
645
646 /*
647 * Get (and clear) the dirty memory log for a memory slot.
648 */
kvm_vm_ioctl_get_dirty_log(struct kvm * kvm,struct kvm_dirty_log * log)649 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
650 struct kvm_dirty_log *log)
651 {
652 int r;
653 unsigned long n;
654 struct kvm_memory_slot *memslot;
655 int is_dirty;
656
657 if (kvm_is_ucontrol(kvm))
658 return -EINVAL;
659
660 mutex_lock(&kvm->slots_lock);
661
662 r = -EINVAL;
663 if (log->slot >= KVM_USER_MEM_SLOTS)
664 goto out;
665
666 r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
667 if (r)
668 goto out;
669
670 /* Clear the dirty log */
671 if (is_dirty) {
672 n = kvm_dirty_bitmap_bytes(memslot);
673 memset(memslot->dirty_bitmap, 0, n);
674 }
675 r = 0;
676 out:
677 mutex_unlock(&kvm->slots_lock);
678 return r;
679 }
680
icpt_operexc_on_all_vcpus(struct kvm * kvm)681 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
682 {
683 unsigned int i;
684 struct kvm_vcpu *vcpu;
685
686 kvm_for_each_vcpu(i, vcpu, kvm) {
687 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
688 }
689 }
690
kvm_vm_ioctl_enable_cap(struct kvm * kvm,struct kvm_enable_cap * cap)691 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
692 {
693 int r;
694
695 if (cap->flags)
696 return -EINVAL;
697
698 switch (cap->cap) {
699 case KVM_CAP_S390_IRQCHIP:
700 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
701 kvm->arch.use_irqchip = 1;
702 r = 0;
703 break;
704 case KVM_CAP_S390_USER_SIGP:
705 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
706 kvm->arch.user_sigp = 1;
707 r = 0;
708 break;
709 case KVM_CAP_S390_VECTOR_REGISTERS:
710 mutex_lock(&kvm->lock);
711 if (kvm->created_vcpus) {
712 r = -EBUSY;
713 } else if (MACHINE_HAS_VX) {
714 set_kvm_facility(kvm->arch.model.fac_mask, 129);
715 set_kvm_facility(kvm->arch.model.fac_list, 129);
716 if (test_facility(134)) {
717 set_kvm_facility(kvm->arch.model.fac_mask, 134);
718 set_kvm_facility(kvm->arch.model.fac_list, 134);
719 }
720 if (test_facility(135)) {
721 set_kvm_facility(kvm->arch.model.fac_mask, 135);
722 set_kvm_facility(kvm->arch.model.fac_list, 135);
723 }
724 if (test_facility(148)) {
725 set_kvm_facility(kvm->arch.model.fac_mask, 148);
726 set_kvm_facility(kvm->arch.model.fac_list, 148);
727 }
728 if (test_facility(152)) {
729 set_kvm_facility(kvm->arch.model.fac_mask, 152);
730 set_kvm_facility(kvm->arch.model.fac_list, 152);
731 }
732 if (test_facility(192)) {
733 set_kvm_facility(kvm->arch.model.fac_mask, 192);
734 set_kvm_facility(kvm->arch.model.fac_list, 192);
735 }
736 r = 0;
737 } else
738 r = -EINVAL;
739 mutex_unlock(&kvm->lock);
740 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
741 r ? "(not available)" : "(success)");
742 break;
743 case KVM_CAP_S390_RI:
744 r = -EINVAL;
745 mutex_lock(&kvm->lock);
746 if (kvm->created_vcpus) {
747 r = -EBUSY;
748 } else if (test_facility(64)) {
749 set_kvm_facility(kvm->arch.model.fac_mask, 64);
750 set_kvm_facility(kvm->arch.model.fac_list, 64);
751 r = 0;
752 }
753 mutex_unlock(&kvm->lock);
754 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
755 r ? "(not available)" : "(success)");
756 break;
757 case KVM_CAP_S390_AIS:
758 mutex_lock(&kvm->lock);
759 if (kvm->created_vcpus) {
760 r = -EBUSY;
761 } else {
762 set_kvm_facility(kvm->arch.model.fac_mask, 72);
763 set_kvm_facility(kvm->arch.model.fac_list, 72);
764 r = 0;
765 }
766 mutex_unlock(&kvm->lock);
767 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
768 r ? "(not available)" : "(success)");
769 break;
770 case KVM_CAP_S390_GS:
771 r = -EINVAL;
772 mutex_lock(&kvm->lock);
773 if (kvm->created_vcpus) {
774 r = -EBUSY;
775 } else if (test_facility(133)) {
776 set_kvm_facility(kvm->arch.model.fac_mask, 133);
777 set_kvm_facility(kvm->arch.model.fac_list, 133);
778 r = 0;
779 }
780 mutex_unlock(&kvm->lock);
781 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
782 r ? "(not available)" : "(success)");
783 break;
784 case KVM_CAP_S390_HPAGE_1M:
785 mutex_lock(&kvm->lock);
786 if (kvm->created_vcpus)
787 r = -EBUSY;
788 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
789 r = -EINVAL;
790 else {
791 r = 0;
792 mmap_write_lock(kvm->mm);
793 kvm->mm->context.allow_gmap_hpage_1m = 1;
794 mmap_write_unlock(kvm->mm);
795 /*
796 * We might have to create fake 4k page
797 * tables. To avoid that the hardware works on
798 * stale PGSTEs, we emulate these instructions.
799 */
800 kvm->arch.use_skf = 0;
801 kvm->arch.use_pfmfi = 0;
802 }
803 mutex_unlock(&kvm->lock);
804 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
805 r ? "(not available)" : "(success)");
806 break;
807 case KVM_CAP_S390_USER_STSI:
808 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
809 kvm->arch.user_stsi = 1;
810 r = 0;
811 break;
812 case KVM_CAP_S390_USER_INSTR0:
813 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
814 kvm->arch.user_instr0 = 1;
815 icpt_operexc_on_all_vcpus(kvm);
816 r = 0;
817 break;
818 default:
819 r = -EINVAL;
820 break;
821 }
822 return r;
823 }
824
kvm_s390_get_mem_control(struct kvm * kvm,struct kvm_device_attr * attr)825 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
826 {
827 int ret;
828
829 switch (attr->attr) {
830 case KVM_S390_VM_MEM_LIMIT_SIZE:
831 ret = 0;
832 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
833 kvm->arch.mem_limit);
834 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
835 ret = -EFAULT;
836 break;
837 default:
838 ret = -ENXIO;
839 break;
840 }
841 return ret;
842 }
843
kvm_s390_set_mem_control(struct kvm * kvm,struct kvm_device_attr * attr)844 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
845 {
846 int ret;
847 unsigned int idx;
848 switch (attr->attr) {
849 case KVM_S390_VM_MEM_ENABLE_CMMA:
850 ret = -ENXIO;
851 if (!sclp.has_cmma)
852 break;
853
854 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
855 mutex_lock(&kvm->lock);
856 if (kvm->created_vcpus)
857 ret = -EBUSY;
858 else if (kvm->mm->context.allow_gmap_hpage_1m)
859 ret = -EINVAL;
860 else {
861 kvm->arch.use_cmma = 1;
862 /* Not compatible with cmma. */
863 kvm->arch.use_pfmfi = 0;
864 ret = 0;
865 }
866 mutex_unlock(&kvm->lock);
867 break;
868 case KVM_S390_VM_MEM_CLR_CMMA:
869 ret = -ENXIO;
870 if (!sclp.has_cmma)
871 break;
872 ret = -EINVAL;
873 if (!kvm->arch.use_cmma)
874 break;
875
876 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
877 mutex_lock(&kvm->lock);
878 idx = srcu_read_lock(&kvm->srcu);
879 s390_reset_cmma(kvm->arch.gmap->mm);
880 srcu_read_unlock(&kvm->srcu, idx);
881 mutex_unlock(&kvm->lock);
882 ret = 0;
883 break;
884 case KVM_S390_VM_MEM_LIMIT_SIZE: {
885 unsigned long new_limit;
886
887 if (kvm_is_ucontrol(kvm))
888 return -EINVAL;
889
890 if (get_user(new_limit, (u64 __user *)attr->addr))
891 return -EFAULT;
892
893 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
894 new_limit > kvm->arch.mem_limit)
895 return -E2BIG;
896
897 if (!new_limit)
898 return -EINVAL;
899
900 /* gmap_create takes last usable address */
901 if (new_limit != KVM_S390_NO_MEM_LIMIT)
902 new_limit -= 1;
903
904 ret = -EBUSY;
905 mutex_lock(&kvm->lock);
906 if (!kvm->created_vcpus) {
907 /* gmap_create will round the limit up */
908 struct gmap *new = gmap_create(current->mm, new_limit);
909
910 if (!new) {
911 ret = -ENOMEM;
912 } else {
913 gmap_remove(kvm->arch.gmap);
914 new->private = kvm;
915 kvm->arch.gmap = new;
916 ret = 0;
917 }
918 }
919 mutex_unlock(&kvm->lock);
920 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
921 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
922 (void *) kvm->arch.gmap->asce);
923 break;
924 }
925 default:
926 ret = -ENXIO;
927 break;
928 }
929 return ret;
930 }
931
932 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
933
kvm_s390_vcpu_crypto_reset_all(struct kvm * kvm)934 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
935 {
936 struct kvm_vcpu *vcpu;
937 int i;
938
939 kvm_s390_vcpu_block_all(kvm);
940
941 kvm_for_each_vcpu(i, vcpu, kvm) {
942 kvm_s390_vcpu_crypto_setup(vcpu);
943 /* recreate the shadow crycb by leaving the VSIE handler */
944 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
945 }
946
947 kvm_s390_vcpu_unblock_all(kvm);
948 }
949
kvm_s390_vm_set_crypto(struct kvm * kvm,struct kvm_device_attr * attr)950 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
951 {
952 mutex_lock(&kvm->lock);
953 switch (attr->attr) {
954 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
955 if (!test_kvm_facility(kvm, 76)) {
956 mutex_unlock(&kvm->lock);
957 return -EINVAL;
958 }
959 get_random_bytes(
960 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
961 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
962 kvm->arch.crypto.aes_kw = 1;
963 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
964 break;
965 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
966 if (!test_kvm_facility(kvm, 76)) {
967 mutex_unlock(&kvm->lock);
968 return -EINVAL;
969 }
970 get_random_bytes(
971 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
972 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
973 kvm->arch.crypto.dea_kw = 1;
974 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
975 break;
976 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
977 if (!test_kvm_facility(kvm, 76)) {
978 mutex_unlock(&kvm->lock);
979 return -EINVAL;
980 }
981 kvm->arch.crypto.aes_kw = 0;
982 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
983 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
984 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
985 break;
986 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
987 if (!test_kvm_facility(kvm, 76)) {
988 mutex_unlock(&kvm->lock);
989 return -EINVAL;
990 }
991 kvm->arch.crypto.dea_kw = 0;
992 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
993 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
994 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
995 break;
996 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
997 if (!ap_instructions_available()) {
998 mutex_unlock(&kvm->lock);
999 return -EOPNOTSUPP;
1000 }
1001 kvm->arch.crypto.apie = 1;
1002 break;
1003 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1004 if (!ap_instructions_available()) {
1005 mutex_unlock(&kvm->lock);
1006 return -EOPNOTSUPP;
1007 }
1008 kvm->arch.crypto.apie = 0;
1009 break;
1010 default:
1011 mutex_unlock(&kvm->lock);
1012 return -ENXIO;
1013 }
1014
1015 kvm_s390_vcpu_crypto_reset_all(kvm);
1016 mutex_unlock(&kvm->lock);
1017 return 0;
1018 }
1019
kvm_s390_sync_request_broadcast(struct kvm * kvm,int req)1020 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1021 {
1022 int cx;
1023 struct kvm_vcpu *vcpu;
1024
1025 kvm_for_each_vcpu(cx, vcpu, kvm)
1026 kvm_s390_sync_request(req, vcpu);
1027 }
1028
1029 /*
1030 * Must be called with kvm->srcu held to avoid races on memslots, and with
1031 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1032 */
kvm_s390_vm_start_migration(struct kvm * kvm)1033 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1034 {
1035 struct kvm_memory_slot *ms;
1036 struct kvm_memslots *slots;
1037 unsigned long ram_pages = 0;
1038 int slotnr;
1039
1040 /* migration mode already enabled */
1041 if (kvm->arch.migration_mode)
1042 return 0;
1043 slots = kvm_memslots(kvm);
1044 if (!slots || !slots->used_slots)
1045 return -EINVAL;
1046
1047 if (!kvm->arch.use_cmma) {
1048 kvm->arch.migration_mode = 1;
1049 return 0;
1050 }
1051 /* mark all the pages in active slots as dirty */
1052 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1053 ms = slots->memslots + slotnr;
1054 if (!ms->dirty_bitmap)
1055 return -EINVAL;
1056 /*
1057 * The second half of the bitmap is only used on x86,
1058 * and would be wasted otherwise, so we put it to good
1059 * use here to keep track of the state of the storage
1060 * attributes.
1061 */
1062 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1063 ram_pages += ms->npages;
1064 }
1065 atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1066 kvm->arch.migration_mode = 1;
1067 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1068 return 0;
1069 }
1070
1071 /*
1072 * Must be called with kvm->slots_lock to avoid races with ourselves and
1073 * kvm_s390_vm_start_migration.
1074 */
kvm_s390_vm_stop_migration(struct kvm * kvm)1075 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1076 {
1077 /* migration mode already disabled */
1078 if (!kvm->arch.migration_mode)
1079 return 0;
1080 kvm->arch.migration_mode = 0;
1081 if (kvm->arch.use_cmma)
1082 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1083 return 0;
1084 }
1085
kvm_s390_vm_set_migration(struct kvm * kvm,struct kvm_device_attr * attr)1086 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1087 struct kvm_device_attr *attr)
1088 {
1089 int res = -ENXIO;
1090
1091 mutex_lock(&kvm->slots_lock);
1092 switch (attr->attr) {
1093 case KVM_S390_VM_MIGRATION_START:
1094 res = kvm_s390_vm_start_migration(kvm);
1095 break;
1096 case KVM_S390_VM_MIGRATION_STOP:
1097 res = kvm_s390_vm_stop_migration(kvm);
1098 break;
1099 default:
1100 break;
1101 }
1102 mutex_unlock(&kvm->slots_lock);
1103
1104 return res;
1105 }
1106
kvm_s390_vm_get_migration(struct kvm * kvm,struct kvm_device_attr * attr)1107 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1108 struct kvm_device_attr *attr)
1109 {
1110 u64 mig = kvm->arch.migration_mode;
1111
1112 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1113 return -ENXIO;
1114
1115 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1116 return -EFAULT;
1117 return 0;
1118 }
1119
1120 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod);
1121
kvm_s390_set_tod_ext(struct kvm * kvm,struct kvm_device_attr * attr)1122 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1123 {
1124 struct kvm_s390_vm_tod_clock gtod;
1125
1126 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
1127 return -EFAULT;
1128
1129 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1130 return -EINVAL;
1131 __kvm_s390_set_tod_clock(kvm, >od);
1132
1133 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1134 gtod.epoch_idx, gtod.tod);
1135
1136 return 0;
1137 }
1138
kvm_s390_set_tod_high(struct kvm * kvm,struct kvm_device_attr * attr)1139 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1140 {
1141 u8 gtod_high;
1142
1143 if (copy_from_user(>od_high, (void __user *)attr->addr,
1144 sizeof(gtod_high)))
1145 return -EFAULT;
1146
1147 if (gtod_high != 0)
1148 return -EINVAL;
1149 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1150
1151 return 0;
1152 }
1153
kvm_s390_set_tod_low(struct kvm * kvm,struct kvm_device_attr * attr)1154 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1155 {
1156 struct kvm_s390_vm_tod_clock gtod = { 0 };
1157
1158 if (copy_from_user(>od.tod, (void __user *)attr->addr,
1159 sizeof(gtod.tod)))
1160 return -EFAULT;
1161
1162 __kvm_s390_set_tod_clock(kvm, >od);
1163 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1164 return 0;
1165 }
1166
kvm_s390_set_tod(struct kvm * kvm,struct kvm_device_attr * attr)1167 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1168 {
1169 int ret;
1170
1171 if (attr->flags)
1172 return -EINVAL;
1173
1174 mutex_lock(&kvm->lock);
1175 /*
1176 * For protected guests, the TOD is managed by the ultravisor, so trying
1177 * to change it will never bring the expected results.
1178 */
1179 if (kvm_s390_pv_is_protected(kvm)) {
1180 ret = -EOPNOTSUPP;
1181 goto out_unlock;
1182 }
1183
1184 switch (attr->attr) {
1185 case KVM_S390_VM_TOD_EXT:
1186 ret = kvm_s390_set_tod_ext(kvm, attr);
1187 break;
1188 case KVM_S390_VM_TOD_HIGH:
1189 ret = kvm_s390_set_tod_high(kvm, attr);
1190 break;
1191 case KVM_S390_VM_TOD_LOW:
1192 ret = kvm_s390_set_tod_low(kvm, attr);
1193 break;
1194 default:
1195 ret = -ENXIO;
1196 break;
1197 }
1198
1199 out_unlock:
1200 mutex_unlock(&kvm->lock);
1201 return ret;
1202 }
1203
kvm_s390_get_tod_clock(struct kvm * kvm,struct kvm_s390_vm_tod_clock * gtod)1204 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1205 struct kvm_s390_vm_tod_clock *gtod)
1206 {
1207 union tod_clock clk;
1208
1209 preempt_disable();
1210
1211 store_tod_clock_ext(&clk);
1212
1213 gtod->tod = clk.tod + kvm->arch.epoch;
1214 gtod->epoch_idx = 0;
1215 if (test_kvm_facility(kvm, 139)) {
1216 gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1217 if (gtod->tod < clk.tod)
1218 gtod->epoch_idx += 1;
1219 }
1220
1221 preempt_enable();
1222 }
1223
kvm_s390_get_tod_ext(struct kvm * kvm,struct kvm_device_attr * attr)1224 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1225 {
1226 struct kvm_s390_vm_tod_clock gtod;
1227
1228 memset(>od, 0, sizeof(gtod));
1229 kvm_s390_get_tod_clock(kvm, >od);
1230 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1231 return -EFAULT;
1232
1233 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1234 gtod.epoch_idx, gtod.tod);
1235 return 0;
1236 }
1237
kvm_s390_get_tod_high(struct kvm * kvm,struct kvm_device_attr * attr)1238 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1239 {
1240 u8 gtod_high = 0;
1241
1242 if (copy_to_user((void __user *)attr->addr, >od_high,
1243 sizeof(gtod_high)))
1244 return -EFAULT;
1245 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1246
1247 return 0;
1248 }
1249
kvm_s390_get_tod_low(struct kvm * kvm,struct kvm_device_attr * attr)1250 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1251 {
1252 u64 gtod;
1253
1254 gtod = kvm_s390_get_tod_clock_fast(kvm);
1255 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1256 return -EFAULT;
1257 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1258
1259 return 0;
1260 }
1261
kvm_s390_get_tod(struct kvm * kvm,struct kvm_device_attr * attr)1262 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1263 {
1264 int ret;
1265
1266 if (attr->flags)
1267 return -EINVAL;
1268
1269 switch (attr->attr) {
1270 case KVM_S390_VM_TOD_EXT:
1271 ret = kvm_s390_get_tod_ext(kvm, attr);
1272 break;
1273 case KVM_S390_VM_TOD_HIGH:
1274 ret = kvm_s390_get_tod_high(kvm, attr);
1275 break;
1276 case KVM_S390_VM_TOD_LOW:
1277 ret = kvm_s390_get_tod_low(kvm, attr);
1278 break;
1279 default:
1280 ret = -ENXIO;
1281 break;
1282 }
1283 return ret;
1284 }
1285
kvm_s390_set_processor(struct kvm * kvm,struct kvm_device_attr * attr)1286 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1287 {
1288 struct kvm_s390_vm_cpu_processor *proc;
1289 u16 lowest_ibc, unblocked_ibc;
1290 int ret = 0;
1291
1292 mutex_lock(&kvm->lock);
1293 if (kvm->created_vcpus) {
1294 ret = -EBUSY;
1295 goto out;
1296 }
1297 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1298 if (!proc) {
1299 ret = -ENOMEM;
1300 goto out;
1301 }
1302 if (!copy_from_user(proc, (void __user *)attr->addr,
1303 sizeof(*proc))) {
1304 kvm->arch.model.cpuid = proc->cpuid;
1305 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1306 unblocked_ibc = sclp.ibc & 0xfff;
1307 if (lowest_ibc && proc->ibc) {
1308 if (proc->ibc > unblocked_ibc)
1309 kvm->arch.model.ibc = unblocked_ibc;
1310 else if (proc->ibc < lowest_ibc)
1311 kvm->arch.model.ibc = lowest_ibc;
1312 else
1313 kvm->arch.model.ibc = proc->ibc;
1314 }
1315 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1316 S390_ARCH_FAC_LIST_SIZE_BYTE);
1317 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1318 kvm->arch.model.ibc,
1319 kvm->arch.model.cpuid);
1320 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1321 kvm->arch.model.fac_list[0],
1322 kvm->arch.model.fac_list[1],
1323 kvm->arch.model.fac_list[2]);
1324 } else
1325 ret = -EFAULT;
1326 kfree(proc);
1327 out:
1328 mutex_unlock(&kvm->lock);
1329 return ret;
1330 }
1331
kvm_s390_set_processor_feat(struct kvm * kvm,struct kvm_device_attr * attr)1332 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1333 struct kvm_device_attr *attr)
1334 {
1335 struct kvm_s390_vm_cpu_feat data;
1336
1337 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1338 return -EFAULT;
1339 if (!bitmap_subset((unsigned long *) data.feat,
1340 kvm_s390_available_cpu_feat,
1341 KVM_S390_VM_CPU_FEAT_NR_BITS))
1342 return -EINVAL;
1343
1344 mutex_lock(&kvm->lock);
1345 if (kvm->created_vcpus) {
1346 mutex_unlock(&kvm->lock);
1347 return -EBUSY;
1348 }
1349 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1350 KVM_S390_VM_CPU_FEAT_NR_BITS);
1351 mutex_unlock(&kvm->lock);
1352 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1353 data.feat[0],
1354 data.feat[1],
1355 data.feat[2]);
1356 return 0;
1357 }
1358
kvm_s390_set_processor_subfunc(struct kvm * kvm,struct kvm_device_attr * attr)1359 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1360 struct kvm_device_attr *attr)
1361 {
1362 mutex_lock(&kvm->lock);
1363 if (kvm->created_vcpus) {
1364 mutex_unlock(&kvm->lock);
1365 return -EBUSY;
1366 }
1367
1368 if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1369 sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1370 mutex_unlock(&kvm->lock);
1371 return -EFAULT;
1372 }
1373 mutex_unlock(&kvm->lock);
1374
1375 VM_EVENT(kvm, 3, "SET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1376 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1377 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1378 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1379 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1380 VM_EVENT(kvm, 3, "SET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1381 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1382 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1383 VM_EVENT(kvm, 3, "SET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1384 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1385 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1386 VM_EVENT(kvm, 3, "SET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1387 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1388 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1389 VM_EVENT(kvm, 3, "SET: guest KM subfunc 0x%16.16lx.%16.16lx",
1390 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1391 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1392 VM_EVENT(kvm, 3, "SET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1393 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1394 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1395 VM_EVENT(kvm, 3, "SET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1396 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1397 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1398 VM_EVENT(kvm, 3, "SET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1399 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1400 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1401 VM_EVENT(kvm, 3, "SET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1402 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1403 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1404 VM_EVENT(kvm, 3, "SET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1405 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1406 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1407 VM_EVENT(kvm, 3, "SET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1408 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1409 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1410 VM_EVENT(kvm, 3, "SET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1411 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1412 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1413 VM_EVENT(kvm, 3, "SET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1414 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1415 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1416 VM_EVENT(kvm, 3, "SET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1417 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1418 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1419 VM_EVENT(kvm, 3, "SET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
1420 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1421 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1422 VM_EVENT(kvm, 3, "SET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1423 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1424 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1425 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1426 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1427 VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1428 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1429 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1430 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1431 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1432
1433 return 0;
1434 }
1435
kvm_s390_set_cpu_model(struct kvm * kvm,struct kvm_device_attr * attr)1436 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1437 {
1438 int ret = -ENXIO;
1439
1440 switch (attr->attr) {
1441 case KVM_S390_VM_CPU_PROCESSOR:
1442 ret = kvm_s390_set_processor(kvm, attr);
1443 break;
1444 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1445 ret = kvm_s390_set_processor_feat(kvm, attr);
1446 break;
1447 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1448 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1449 break;
1450 }
1451 return ret;
1452 }
1453
kvm_s390_get_processor(struct kvm * kvm,struct kvm_device_attr * attr)1454 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1455 {
1456 struct kvm_s390_vm_cpu_processor *proc;
1457 int ret = 0;
1458
1459 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1460 if (!proc) {
1461 ret = -ENOMEM;
1462 goto out;
1463 }
1464 proc->cpuid = kvm->arch.model.cpuid;
1465 proc->ibc = kvm->arch.model.ibc;
1466 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1467 S390_ARCH_FAC_LIST_SIZE_BYTE);
1468 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1469 kvm->arch.model.ibc,
1470 kvm->arch.model.cpuid);
1471 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1472 kvm->arch.model.fac_list[0],
1473 kvm->arch.model.fac_list[1],
1474 kvm->arch.model.fac_list[2]);
1475 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1476 ret = -EFAULT;
1477 kfree(proc);
1478 out:
1479 return ret;
1480 }
1481
kvm_s390_get_machine(struct kvm * kvm,struct kvm_device_attr * attr)1482 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1483 {
1484 struct kvm_s390_vm_cpu_machine *mach;
1485 int ret = 0;
1486
1487 mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1488 if (!mach) {
1489 ret = -ENOMEM;
1490 goto out;
1491 }
1492 get_cpu_id((struct cpuid *) &mach->cpuid);
1493 mach->ibc = sclp.ibc;
1494 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1495 S390_ARCH_FAC_LIST_SIZE_BYTE);
1496 memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
1497 sizeof(stfle_fac_list));
1498 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1499 kvm->arch.model.ibc,
1500 kvm->arch.model.cpuid);
1501 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1502 mach->fac_mask[0],
1503 mach->fac_mask[1],
1504 mach->fac_mask[2]);
1505 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1506 mach->fac_list[0],
1507 mach->fac_list[1],
1508 mach->fac_list[2]);
1509 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1510 ret = -EFAULT;
1511 kfree(mach);
1512 out:
1513 return ret;
1514 }
1515
kvm_s390_get_processor_feat(struct kvm * kvm,struct kvm_device_attr * attr)1516 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1517 struct kvm_device_attr *attr)
1518 {
1519 struct kvm_s390_vm_cpu_feat data;
1520
1521 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1522 KVM_S390_VM_CPU_FEAT_NR_BITS);
1523 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1524 return -EFAULT;
1525 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1526 data.feat[0],
1527 data.feat[1],
1528 data.feat[2]);
1529 return 0;
1530 }
1531
kvm_s390_get_machine_feat(struct kvm * kvm,struct kvm_device_attr * attr)1532 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1533 struct kvm_device_attr *attr)
1534 {
1535 struct kvm_s390_vm_cpu_feat data;
1536
1537 bitmap_copy((unsigned long *) data.feat,
1538 kvm_s390_available_cpu_feat,
1539 KVM_S390_VM_CPU_FEAT_NR_BITS);
1540 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1541 return -EFAULT;
1542 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1543 data.feat[0],
1544 data.feat[1],
1545 data.feat[2]);
1546 return 0;
1547 }
1548
kvm_s390_get_processor_subfunc(struct kvm * kvm,struct kvm_device_attr * attr)1549 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1550 struct kvm_device_attr *attr)
1551 {
1552 if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1553 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1554 return -EFAULT;
1555
1556 VM_EVENT(kvm, 3, "GET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1557 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1558 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1559 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1560 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1561 VM_EVENT(kvm, 3, "GET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1562 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1563 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1564 VM_EVENT(kvm, 3, "GET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1565 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1566 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1567 VM_EVENT(kvm, 3, "GET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1568 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1569 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1570 VM_EVENT(kvm, 3, "GET: guest KM subfunc 0x%16.16lx.%16.16lx",
1571 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1572 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1573 VM_EVENT(kvm, 3, "GET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1574 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1575 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1576 VM_EVENT(kvm, 3, "GET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1577 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1578 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1579 VM_EVENT(kvm, 3, "GET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1580 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1581 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1582 VM_EVENT(kvm, 3, "GET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1583 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1584 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1585 VM_EVENT(kvm, 3, "GET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1586 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1587 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1588 VM_EVENT(kvm, 3, "GET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1589 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1590 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1591 VM_EVENT(kvm, 3, "GET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1592 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1593 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1594 VM_EVENT(kvm, 3, "GET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1595 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1596 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1597 VM_EVENT(kvm, 3, "GET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1598 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1599 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1600 VM_EVENT(kvm, 3, "GET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
1601 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1602 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1603 VM_EVENT(kvm, 3, "GET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1604 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1605 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1606 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1607 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1608 VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1609 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1610 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1611 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1612 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1613
1614 return 0;
1615 }
1616
kvm_s390_get_machine_subfunc(struct kvm * kvm,struct kvm_device_attr * attr)1617 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1618 struct kvm_device_attr *attr)
1619 {
1620 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1621 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1622 return -EFAULT;
1623
1624 VM_EVENT(kvm, 3, "GET: host PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1625 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1626 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1627 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1628 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1629 VM_EVENT(kvm, 3, "GET: host PTFF subfunc 0x%16.16lx.%16.16lx",
1630 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1631 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1632 VM_EVENT(kvm, 3, "GET: host KMAC subfunc 0x%16.16lx.%16.16lx",
1633 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1634 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1635 VM_EVENT(kvm, 3, "GET: host KMC subfunc 0x%16.16lx.%16.16lx",
1636 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1637 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1638 VM_EVENT(kvm, 3, "GET: host KM subfunc 0x%16.16lx.%16.16lx",
1639 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1640 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1641 VM_EVENT(kvm, 3, "GET: host KIMD subfunc 0x%16.16lx.%16.16lx",
1642 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1643 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1644 VM_EVENT(kvm, 3, "GET: host KLMD subfunc 0x%16.16lx.%16.16lx",
1645 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1646 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1647 VM_EVENT(kvm, 3, "GET: host PCKMO subfunc 0x%16.16lx.%16.16lx",
1648 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1649 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1650 VM_EVENT(kvm, 3, "GET: host KMCTR subfunc 0x%16.16lx.%16.16lx",
1651 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1652 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1653 VM_EVENT(kvm, 3, "GET: host KMF subfunc 0x%16.16lx.%16.16lx",
1654 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1655 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1656 VM_EVENT(kvm, 3, "GET: host KMO subfunc 0x%16.16lx.%16.16lx",
1657 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1658 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1659 VM_EVENT(kvm, 3, "GET: host PCC subfunc 0x%16.16lx.%16.16lx",
1660 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1661 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1662 VM_EVENT(kvm, 3, "GET: host PPNO subfunc 0x%16.16lx.%16.16lx",
1663 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1664 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1665 VM_EVENT(kvm, 3, "GET: host KMA subfunc 0x%16.16lx.%16.16lx",
1666 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1667 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1668 VM_EVENT(kvm, 3, "GET: host KDSA subfunc 0x%16.16lx.%16.16lx",
1669 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1670 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1671 VM_EVENT(kvm, 3, "GET: host SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1672 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1673 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1674 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1675 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1676 VM_EVENT(kvm, 3, "GET: host DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1677 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1678 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1679 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1680 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1681
1682 return 0;
1683 }
1684
kvm_s390_get_cpu_model(struct kvm * kvm,struct kvm_device_attr * attr)1685 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1686 {
1687 int ret = -ENXIO;
1688
1689 switch (attr->attr) {
1690 case KVM_S390_VM_CPU_PROCESSOR:
1691 ret = kvm_s390_get_processor(kvm, attr);
1692 break;
1693 case KVM_S390_VM_CPU_MACHINE:
1694 ret = kvm_s390_get_machine(kvm, attr);
1695 break;
1696 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1697 ret = kvm_s390_get_processor_feat(kvm, attr);
1698 break;
1699 case KVM_S390_VM_CPU_MACHINE_FEAT:
1700 ret = kvm_s390_get_machine_feat(kvm, attr);
1701 break;
1702 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1703 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1704 break;
1705 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1706 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1707 break;
1708 }
1709 return ret;
1710 }
1711
kvm_s390_vm_set_attr(struct kvm * kvm,struct kvm_device_attr * attr)1712 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1713 {
1714 int ret;
1715
1716 switch (attr->group) {
1717 case KVM_S390_VM_MEM_CTRL:
1718 ret = kvm_s390_set_mem_control(kvm, attr);
1719 break;
1720 case KVM_S390_VM_TOD:
1721 ret = kvm_s390_set_tod(kvm, attr);
1722 break;
1723 case KVM_S390_VM_CPU_MODEL:
1724 ret = kvm_s390_set_cpu_model(kvm, attr);
1725 break;
1726 case KVM_S390_VM_CRYPTO:
1727 ret = kvm_s390_vm_set_crypto(kvm, attr);
1728 break;
1729 case KVM_S390_VM_MIGRATION:
1730 ret = kvm_s390_vm_set_migration(kvm, attr);
1731 break;
1732 default:
1733 ret = -ENXIO;
1734 break;
1735 }
1736
1737 return ret;
1738 }
1739
kvm_s390_vm_get_attr(struct kvm * kvm,struct kvm_device_attr * attr)1740 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1741 {
1742 int ret;
1743
1744 switch (attr->group) {
1745 case KVM_S390_VM_MEM_CTRL:
1746 ret = kvm_s390_get_mem_control(kvm, attr);
1747 break;
1748 case KVM_S390_VM_TOD:
1749 ret = kvm_s390_get_tod(kvm, attr);
1750 break;
1751 case KVM_S390_VM_CPU_MODEL:
1752 ret = kvm_s390_get_cpu_model(kvm, attr);
1753 break;
1754 case KVM_S390_VM_MIGRATION:
1755 ret = kvm_s390_vm_get_migration(kvm, attr);
1756 break;
1757 default:
1758 ret = -ENXIO;
1759 break;
1760 }
1761
1762 return ret;
1763 }
1764
kvm_s390_vm_has_attr(struct kvm * kvm,struct kvm_device_attr * attr)1765 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1766 {
1767 int ret;
1768
1769 switch (attr->group) {
1770 case KVM_S390_VM_MEM_CTRL:
1771 switch (attr->attr) {
1772 case KVM_S390_VM_MEM_ENABLE_CMMA:
1773 case KVM_S390_VM_MEM_CLR_CMMA:
1774 ret = sclp.has_cmma ? 0 : -ENXIO;
1775 break;
1776 case KVM_S390_VM_MEM_LIMIT_SIZE:
1777 ret = 0;
1778 break;
1779 default:
1780 ret = -ENXIO;
1781 break;
1782 }
1783 break;
1784 case KVM_S390_VM_TOD:
1785 switch (attr->attr) {
1786 case KVM_S390_VM_TOD_LOW:
1787 case KVM_S390_VM_TOD_HIGH:
1788 ret = 0;
1789 break;
1790 default:
1791 ret = -ENXIO;
1792 break;
1793 }
1794 break;
1795 case KVM_S390_VM_CPU_MODEL:
1796 switch (attr->attr) {
1797 case KVM_S390_VM_CPU_PROCESSOR:
1798 case KVM_S390_VM_CPU_MACHINE:
1799 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1800 case KVM_S390_VM_CPU_MACHINE_FEAT:
1801 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1802 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1803 ret = 0;
1804 break;
1805 default:
1806 ret = -ENXIO;
1807 break;
1808 }
1809 break;
1810 case KVM_S390_VM_CRYPTO:
1811 switch (attr->attr) {
1812 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1813 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1814 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1815 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1816 ret = 0;
1817 break;
1818 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1819 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1820 ret = ap_instructions_available() ? 0 : -ENXIO;
1821 break;
1822 default:
1823 ret = -ENXIO;
1824 break;
1825 }
1826 break;
1827 case KVM_S390_VM_MIGRATION:
1828 ret = 0;
1829 break;
1830 default:
1831 ret = -ENXIO;
1832 break;
1833 }
1834
1835 return ret;
1836 }
1837
kvm_s390_get_skeys(struct kvm * kvm,struct kvm_s390_skeys * args)1838 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1839 {
1840 uint8_t *keys;
1841 uint64_t hva;
1842 int srcu_idx, i, r = 0;
1843
1844 if (args->flags != 0)
1845 return -EINVAL;
1846
1847 /* Is this guest using storage keys? */
1848 if (!mm_uses_skeys(current->mm))
1849 return KVM_S390_GET_SKEYS_NONE;
1850
1851 /* Enforce sane limit on memory allocation */
1852 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1853 return -EINVAL;
1854
1855 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1856 if (!keys)
1857 return -ENOMEM;
1858
1859 mmap_read_lock(current->mm);
1860 srcu_idx = srcu_read_lock(&kvm->srcu);
1861 for (i = 0; i < args->count; i++) {
1862 hva = gfn_to_hva(kvm, args->start_gfn + i);
1863 if (kvm_is_error_hva(hva)) {
1864 r = -EFAULT;
1865 break;
1866 }
1867
1868 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1869 if (r)
1870 break;
1871 }
1872 srcu_read_unlock(&kvm->srcu, srcu_idx);
1873 mmap_read_unlock(current->mm);
1874
1875 if (!r) {
1876 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1877 sizeof(uint8_t) * args->count);
1878 if (r)
1879 r = -EFAULT;
1880 }
1881
1882 kvfree(keys);
1883 return r;
1884 }
1885
kvm_s390_set_skeys(struct kvm * kvm,struct kvm_s390_skeys * args)1886 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1887 {
1888 uint8_t *keys;
1889 uint64_t hva;
1890 int srcu_idx, i, r = 0;
1891 bool unlocked;
1892
1893 if (args->flags != 0)
1894 return -EINVAL;
1895
1896 /* Enforce sane limit on memory allocation */
1897 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1898 return -EINVAL;
1899
1900 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1901 if (!keys)
1902 return -ENOMEM;
1903
1904 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1905 sizeof(uint8_t) * args->count);
1906 if (r) {
1907 r = -EFAULT;
1908 goto out;
1909 }
1910
1911 /* Enable storage key handling for the guest */
1912 r = s390_enable_skey();
1913 if (r)
1914 goto out;
1915
1916 i = 0;
1917 mmap_read_lock(current->mm);
1918 srcu_idx = srcu_read_lock(&kvm->srcu);
1919 while (i < args->count) {
1920 unlocked = false;
1921 hva = gfn_to_hva(kvm, args->start_gfn + i);
1922 if (kvm_is_error_hva(hva)) {
1923 r = -EFAULT;
1924 break;
1925 }
1926
1927 /* Lowest order bit is reserved */
1928 if (keys[i] & 0x01) {
1929 r = -EINVAL;
1930 break;
1931 }
1932
1933 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1934 if (r) {
1935 r = fixup_user_fault(current->mm, hva,
1936 FAULT_FLAG_WRITE, &unlocked);
1937 if (r)
1938 break;
1939 }
1940 if (!r)
1941 i++;
1942 }
1943 srcu_read_unlock(&kvm->srcu, srcu_idx);
1944 mmap_read_unlock(current->mm);
1945 out:
1946 kvfree(keys);
1947 return r;
1948 }
1949
1950 /*
1951 * Base address and length must be sent at the start of each block, therefore
1952 * it's cheaper to send some clean data, as long as it's less than the size of
1953 * two longs.
1954 */
1955 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1956 /* for consistency */
1957 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1958
1959 /*
1960 * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1961 * address falls in a hole. In that case the index of one of the memslots
1962 * bordering the hole is returned.
1963 */
gfn_to_memslot_approx(struct kvm_memslots * slots,gfn_t gfn)1964 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1965 {
1966 int start = 0, end = slots->used_slots;
1967 int slot = atomic_read(&slots->last_used_slot);
1968 struct kvm_memory_slot *memslots = slots->memslots;
1969
1970 if (gfn >= memslots[slot].base_gfn &&
1971 gfn < memslots[slot].base_gfn + memslots[slot].npages)
1972 return slot;
1973
1974 while (start < end) {
1975 slot = start + (end - start) / 2;
1976
1977 if (gfn >= memslots[slot].base_gfn)
1978 end = slot;
1979 else
1980 start = slot + 1;
1981 }
1982
1983 if (start >= slots->used_slots)
1984 return slots->used_slots - 1;
1985
1986 if (gfn >= memslots[start].base_gfn &&
1987 gfn < memslots[start].base_gfn + memslots[start].npages) {
1988 atomic_set(&slots->last_used_slot, start);
1989 }
1990
1991 return start;
1992 }
1993
kvm_s390_peek_cmma(struct kvm * kvm,struct kvm_s390_cmma_log * args,u8 * res,unsigned long bufsize)1994 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1995 u8 *res, unsigned long bufsize)
1996 {
1997 unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1998
1999 args->count = 0;
2000 while (args->count < bufsize) {
2001 hva = gfn_to_hva(kvm, cur_gfn);
2002 /*
2003 * We return an error if the first value was invalid, but we
2004 * return successfully if at least one value was copied.
2005 */
2006 if (kvm_is_error_hva(hva))
2007 return args->count ? 0 : -EFAULT;
2008 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2009 pgstev = 0;
2010 res[args->count++] = (pgstev >> 24) & 0x43;
2011 cur_gfn++;
2012 }
2013
2014 return 0;
2015 }
2016
kvm_s390_next_dirty_cmma(struct kvm_memslots * slots,unsigned long cur_gfn)2017 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
2018 unsigned long cur_gfn)
2019 {
2020 int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
2021 struct kvm_memory_slot *ms = slots->memslots + slotidx;
2022 unsigned long ofs = cur_gfn - ms->base_gfn;
2023
2024 if (ms->base_gfn + ms->npages <= cur_gfn) {
2025 slotidx--;
2026 /* If we are above the highest slot, wrap around */
2027 if (slotidx < 0)
2028 slotidx = slots->used_slots - 1;
2029
2030 ms = slots->memslots + slotidx;
2031 ofs = 0;
2032 }
2033
2034 if (cur_gfn < ms->base_gfn)
2035 ofs = 0;
2036
2037 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
2038 while ((slotidx > 0) && (ofs >= ms->npages)) {
2039 slotidx--;
2040 ms = slots->memslots + slotidx;
2041 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
2042 }
2043 return ms->base_gfn + ofs;
2044 }
2045
kvm_s390_get_cmma(struct kvm * kvm,struct kvm_s390_cmma_log * args,u8 * res,unsigned long bufsize)2046 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2047 u8 *res, unsigned long bufsize)
2048 {
2049 unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2050 struct kvm_memslots *slots = kvm_memslots(kvm);
2051 struct kvm_memory_slot *ms;
2052
2053 if (unlikely(!slots->used_slots))
2054 return 0;
2055
2056 cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2057 ms = gfn_to_memslot(kvm, cur_gfn);
2058 args->count = 0;
2059 args->start_gfn = cur_gfn;
2060 if (!ms)
2061 return 0;
2062 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2063 mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2064
2065 while (args->count < bufsize) {
2066 hva = gfn_to_hva(kvm, cur_gfn);
2067 if (kvm_is_error_hva(hva))
2068 return 0;
2069 /* Decrement only if we actually flipped the bit to 0 */
2070 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2071 atomic64_dec(&kvm->arch.cmma_dirty_pages);
2072 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2073 pgstev = 0;
2074 /* Save the value */
2075 res[args->count++] = (pgstev >> 24) & 0x43;
2076 /* If the next bit is too far away, stop. */
2077 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2078 return 0;
2079 /* If we reached the previous "next", find the next one */
2080 if (cur_gfn == next_gfn)
2081 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2082 /* Reached the end of memory or of the buffer, stop */
2083 if ((next_gfn >= mem_end) ||
2084 (next_gfn - args->start_gfn >= bufsize))
2085 return 0;
2086 cur_gfn++;
2087 /* Reached the end of the current memslot, take the next one. */
2088 if (cur_gfn - ms->base_gfn >= ms->npages) {
2089 ms = gfn_to_memslot(kvm, cur_gfn);
2090 if (!ms)
2091 return 0;
2092 }
2093 }
2094 return 0;
2095 }
2096
2097 /*
2098 * This function searches for the next page with dirty CMMA attributes, and
2099 * saves the attributes in the buffer up to either the end of the buffer or
2100 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2101 * no trailing clean bytes are saved.
2102 * In case no dirty bits were found, or if CMMA was not enabled or used, the
2103 * output buffer will indicate 0 as length.
2104 */
kvm_s390_get_cmma_bits(struct kvm * kvm,struct kvm_s390_cmma_log * args)2105 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2106 struct kvm_s390_cmma_log *args)
2107 {
2108 unsigned long bufsize;
2109 int srcu_idx, peek, ret;
2110 u8 *values;
2111
2112 if (!kvm->arch.use_cmma)
2113 return -ENXIO;
2114 /* Invalid/unsupported flags were specified */
2115 if (args->flags & ~KVM_S390_CMMA_PEEK)
2116 return -EINVAL;
2117 /* Migration mode query, and we are not doing a migration */
2118 peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2119 if (!peek && !kvm->arch.migration_mode)
2120 return -EINVAL;
2121 /* CMMA is disabled or was not used, or the buffer has length zero */
2122 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2123 if (!bufsize || !kvm->mm->context.uses_cmm) {
2124 memset(args, 0, sizeof(*args));
2125 return 0;
2126 }
2127 /* We are not peeking, and there are no dirty pages */
2128 if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2129 memset(args, 0, sizeof(*args));
2130 return 0;
2131 }
2132
2133 values = vmalloc(bufsize);
2134 if (!values)
2135 return -ENOMEM;
2136
2137 mmap_read_lock(kvm->mm);
2138 srcu_idx = srcu_read_lock(&kvm->srcu);
2139 if (peek)
2140 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2141 else
2142 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2143 srcu_read_unlock(&kvm->srcu, srcu_idx);
2144 mmap_read_unlock(kvm->mm);
2145
2146 if (kvm->arch.migration_mode)
2147 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2148 else
2149 args->remaining = 0;
2150
2151 if (copy_to_user((void __user *)args->values, values, args->count))
2152 ret = -EFAULT;
2153
2154 vfree(values);
2155 return ret;
2156 }
2157
2158 /*
2159 * This function sets the CMMA attributes for the given pages. If the input
2160 * buffer has zero length, no action is taken, otherwise the attributes are
2161 * set and the mm->context.uses_cmm flag is set.
2162 */
kvm_s390_set_cmma_bits(struct kvm * kvm,const struct kvm_s390_cmma_log * args)2163 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2164 const struct kvm_s390_cmma_log *args)
2165 {
2166 unsigned long hva, mask, pgstev, i;
2167 uint8_t *bits;
2168 int srcu_idx, r = 0;
2169
2170 mask = args->mask;
2171
2172 if (!kvm->arch.use_cmma)
2173 return -ENXIO;
2174 /* invalid/unsupported flags */
2175 if (args->flags != 0)
2176 return -EINVAL;
2177 /* Enforce sane limit on memory allocation */
2178 if (args->count > KVM_S390_CMMA_SIZE_MAX)
2179 return -EINVAL;
2180 /* Nothing to do */
2181 if (args->count == 0)
2182 return 0;
2183
2184 bits = vmalloc(array_size(sizeof(*bits), args->count));
2185 if (!bits)
2186 return -ENOMEM;
2187
2188 r = copy_from_user(bits, (void __user *)args->values, args->count);
2189 if (r) {
2190 r = -EFAULT;
2191 goto out;
2192 }
2193
2194 mmap_read_lock(kvm->mm);
2195 srcu_idx = srcu_read_lock(&kvm->srcu);
2196 for (i = 0; i < args->count; i++) {
2197 hva = gfn_to_hva(kvm, args->start_gfn + i);
2198 if (kvm_is_error_hva(hva)) {
2199 r = -EFAULT;
2200 break;
2201 }
2202
2203 pgstev = bits[i];
2204 pgstev = pgstev << 24;
2205 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2206 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2207 }
2208 srcu_read_unlock(&kvm->srcu, srcu_idx);
2209 mmap_read_unlock(kvm->mm);
2210
2211 if (!kvm->mm->context.uses_cmm) {
2212 mmap_write_lock(kvm->mm);
2213 kvm->mm->context.uses_cmm = 1;
2214 mmap_write_unlock(kvm->mm);
2215 }
2216 out:
2217 vfree(bits);
2218 return r;
2219 }
2220
kvm_s390_cpus_from_pv(struct kvm * kvm,u16 * rcp,u16 * rrcp)2221 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2222 {
2223 struct kvm_vcpu *vcpu;
2224 u16 rc, rrc;
2225 int ret = 0;
2226 int i;
2227
2228 /*
2229 * We ignore failures and try to destroy as many CPUs as possible.
2230 * At the same time we must not free the assigned resources when
2231 * this fails, as the ultravisor has still access to that memory.
2232 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2233 * behind.
2234 * We want to return the first failure rc and rrc, though.
2235 */
2236 kvm_for_each_vcpu(i, vcpu, kvm) {
2237 mutex_lock(&vcpu->mutex);
2238 if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2239 *rcp = rc;
2240 *rrcp = rrc;
2241 ret = -EIO;
2242 }
2243 mutex_unlock(&vcpu->mutex);
2244 }
2245 return ret;
2246 }
2247
kvm_s390_cpus_to_pv(struct kvm * kvm,u16 * rc,u16 * rrc)2248 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2249 {
2250 int i, r = 0;
2251 u16 dummy;
2252
2253 struct kvm_vcpu *vcpu;
2254
2255 kvm_for_each_vcpu(i, vcpu, kvm) {
2256 mutex_lock(&vcpu->mutex);
2257 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2258 mutex_unlock(&vcpu->mutex);
2259 if (r)
2260 break;
2261 }
2262 if (r)
2263 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2264 return r;
2265 }
2266
kvm_s390_handle_pv(struct kvm * kvm,struct kvm_pv_cmd * cmd)2267 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2268 {
2269 int r = 0;
2270 u16 dummy;
2271 void __user *argp = (void __user *)cmd->data;
2272
2273 switch (cmd->cmd) {
2274 case KVM_PV_ENABLE: {
2275 r = -EINVAL;
2276 if (kvm_s390_pv_is_protected(kvm))
2277 break;
2278
2279 /*
2280 * FMT 4 SIE needs esca. As we never switch back to bsca from
2281 * esca, we need no cleanup in the error cases below
2282 */
2283 r = sca_switch_to_extended(kvm);
2284 if (r)
2285 break;
2286
2287 mmap_write_lock(current->mm);
2288 r = gmap_mark_unmergeable();
2289 mmap_write_unlock(current->mm);
2290 if (r)
2291 break;
2292
2293 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2294 if (r)
2295 break;
2296
2297 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2298 if (r)
2299 kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2300
2301 /* we need to block service interrupts from now on */
2302 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2303 break;
2304 }
2305 case KVM_PV_DISABLE: {
2306 r = -EINVAL;
2307 if (!kvm_s390_pv_is_protected(kvm))
2308 break;
2309
2310 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2311 /*
2312 * If a CPU could not be destroyed, destroy VM will also fail.
2313 * There is no point in trying to destroy it. Instead return
2314 * the rc and rrc from the first CPU that failed destroying.
2315 */
2316 if (r)
2317 break;
2318 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2319
2320 /* no need to block service interrupts any more */
2321 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2322 break;
2323 }
2324 case KVM_PV_SET_SEC_PARMS: {
2325 struct kvm_s390_pv_sec_parm parms = {};
2326 void *hdr;
2327
2328 r = -EINVAL;
2329 if (!kvm_s390_pv_is_protected(kvm))
2330 break;
2331
2332 r = -EFAULT;
2333 if (copy_from_user(&parms, argp, sizeof(parms)))
2334 break;
2335
2336 /* Currently restricted to 8KB */
2337 r = -EINVAL;
2338 if (parms.length > PAGE_SIZE * 2)
2339 break;
2340
2341 r = -ENOMEM;
2342 hdr = vmalloc(parms.length);
2343 if (!hdr)
2344 break;
2345
2346 r = -EFAULT;
2347 if (!copy_from_user(hdr, (void __user *)parms.origin,
2348 parms.length))
2349 r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2350 &cmd->rc, &cmd->rrc);
2351
2352 vfree(hdr);
2353 break;
2354 }
2355 case KVM_PV_UNPACK: {
2356 struct kvm_s390_pv_unp unp = {};
2357
2358 r = -EINVAL;
2359 if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2360 break;
2361
2362 r = -EFAULT;
2363 if (copy_from_user(&unp, argp, sizeof(unp)))
2364 break;
2365
2366 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2367 &cmd->rc, &cmd->rrc);
2368 break;
2369 }
2370 case KVM_PV_VERIFY: {
2371 r = -EINVAL;
2372 if (!kvm_s390_pv_is_protected(kvm))
2373 break;
2374
2375 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2376 UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2377 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2378 cmd->rrc);
2379 break;
2380 }
2381 case KVM_PV_PREP_RESET: {
2382 r = -EINVAL;
2383 if (!kvm_s390_pv_is_protected(kvm))
2384 break;
2385
2386 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2387 UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2388 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2389 cmd->rc, cmd->rrc);
2390 break;
2391 }
2392 case KVM_PV_UNSHARE_ALL: {
2393 r = -EINVAL;
2394 if (!kvm_s390_pv_is_protected(kvm))
2395 break;
2396
2397 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2398 UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2399 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2400 cmd->rc, cmd->rrc);
2401 break;
2402 }
2403 default:
2404 r = -ENOTTY;
2405 }
2406 return r;
2407 }
2408
kvm_arch_vm_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)2409 long kvm_arch_vm_ioctl(struct file *filp,
2410 unsigned int ioctl, unsigned long arg)
2411 {
2412 struct kvm *kvm = filp->private_data;
2413 void __user *argp = (void __user *)arg;
2414 struct kvm_device_attr attr;
2415 int r;
2416
2417 switch (ioctl) {
2418 case KVM_S390_INTERRUPT: {
2419 struct kvm_s390_interrupt s390int;
2420
2421 r = -EFAULT;
2422 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2423 break;
2424 r = kvm_s390_inject_vm(kvm, &s390int);
2425 break;
2426 }
2427 case KVM_CREATE_IRQCHIP: {
2428 struct kvm_irq_routing_entry routing;
2429
2430 r = -EINVAL;
2431 if (kvm->arch.use_irqchip) {
2432 /* Set up dummy routing. */
2433 memset(&routing, 0, sizeof(routing));
2434 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2435 }
2436 break;
2437 }
2438 case KVM_SET_DEVICE_ATTR: {
2439 r = -EFAULT;
2440 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2441 break;
2442 r = kvm_s390_vm_set_attr(kvm, &attr);
2443 break;
2444 }
2445 case KVM_GET_DEVICE_ATTR: {
2446 r = -EFAULT;
2447 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2448 break;
2449 r = kvm_s390_vm_get_attr(kvm, &attr);
2450 break;
2451 }
2452 case KVM_HAS_DEVICE_ATTR: {
2453 r = -EFAULT;
2454 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2455 break;
2456 r = kvm_s390_vm_has_attr(kvm, &attr);
2457 break;
2458 }
2459 case KVM_S390_GET_SKEYS: {
2460 struct kvm_s390_skeys args;
2461
2462 r = -EFAULT;
2463 if (copy_from_user(&args, argp,
2464 sizeof(struct kvm_s390_skeys)))
2465 break;
2466 r = kvm_s390_get_skeys(kvm, &args);
2467 break;
2468 }
2469 case KVM_S390_SET_SKEYS: {
2470 struct kvm_s390_skeys args;
2471
2472 r = -EFAULT;
2473 if (copy_from_user(&args, argp,
2474 sizeof(struct kvm_s390_skeys)))
2475 break;
2476 r = kvm_s390_set_skeys(kvm, &args);
2477 break;
2478 }
2479 case KVM_S390_GET_CMMA_BITS: {
2480 struct kvm_s390_cmma_log args;
2481
2482 r = -EFAULT;
2483 if (copy_from_user(&args, argp, sizeof(args)))
2484 break;
2485 mutex_lock(&kvm->slots_lock);
2486 r = kvm_s390_get_cmma_bits(kvm, &args);
2487 mutex_unlock(&kvm->slots_lock);
2488 if (!r) {
2489 r = copy_to_user(argp, &args, sizeof(args));
2490 if (r)
2491 r = -EFAULT;
2492 }
2493 break;
2494 }
2495 case KVM_S390_SET_CMMA_BITS: {
2496 struct kvm_s390_cmma_log args;
2497
2498 r = -EFAULT;
2499 if (copy_from_user(&args, argp, sizeof(args)))
2500 break;
2501 mutex_lock(&kvm->slots_lock);
2502 r = kvm_s390_set_cmma_bits(kvm, &args);
2503 mutex_unlock(&kvm->slots_lock);
2504 break;
2505 }
2506 case KVM_S390_PV_COMMAND: {
2507 struct kvm_pv_cmd args;
2508
2509 /* protvirt means user sigp */
2510 kvm->arch.user_cpu_state_ctrl = 1;
2511 r = 0;
2512 if (!is_prot_virt_host()) {
2513 r = -EINVAL;
2514 break;
2515 }
2516 if (copy_from_user(&args, argp, sizeof(args))) {
2517 r = -EFAULT;
2518 break;
2519 }
2520 if (args.flags) {
2521 r = -EINVAL;
2522 break;
2523 }
2524 mutex_lock(&kvm->lock);
2525 r = kvm_s390_handle_pv(kvm, &args);
2526 mutex_unlock(&kvm->lock);
2527 if (copy_to_user(argp, &args, sizeof(args))) {
2528 r = -EFAULT;
2529 break;
2530 }
2531 break;
2532 }
2533 default:
2534 r = -ENOTTY;
2535 }
2536
2537 return r;
2538 }
2539
kvm_s390_apxa_installed(void)2540 static int kvm_s390_apxa_installed(void)
2541 {
2542 struct ap_config_info info;
2543
2544 if (ap_instructions_available()) {
2545 if (ap_qci(&info) == 0)
2546 return info.apxa;
2547 }
2548
2549 return 0;
2550 }
2551
2552 /*
2553 * The format of the crypto control block (CRYCB) is specified in the 3 low
2554 * order bits of the CRYCB designation (CRYCBD) field as follows:
2555 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2556 * AP extended addressing (APXA) facility are installed.
2557 * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2558 * Format 2: Both the APXA and MSAX3 facilities are installed
2559 */
kvm_s390_set_crycb_format(struct kvm * kvm)2560 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2561 {
2562 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2563
2564 /* Clear the CRYCB format bits - i.e., set format 0 by default */
2565 kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2566
2567 /* Check whether MSAX3 is installed */
2568 if (!test_kvm_facility(kvm, 76))
2569 return;
2570
2571 if (kvm_s390_apxa_installed())
2572 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2573 else
2574 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2575 }
2576
2577 /*
2578 * kvm_arch_crypto_set_masks
2579 *
2580 * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2581 * to be set.
2582 * @apm: the mask identifying the accessible AP adapters
2583 * @aqm: the mask identifying the accessible AP domains
2584 * @adm: the mask identifying the accessible AP control domains
2585 *
2586 * Set the masks that identify the adapters, domains and control domains to
2587 * which the KVM guest is granted access.
2588 *
2589 * Note: The kvm->lock mutex must be locked by the caller before invoking this
2590 * function.
2591 */
kvm_arch_crypto_set_masks(struct kvm * kvm,unsigned long * apm,unsigned long * aqm,unsigned long * adm)2592 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2593 unsigned long *aqm, unsigned long *adm)
2594 {
2595 struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2596
2597 kvm_s390_vcpu_block_all(kvm);
2598
2599 switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2600 case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2601 memcpy(crycb->apcb1.apm, apm, 32);
2602 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2603 apm[0], apm[1], apm[2], apm[3]);
2604 memcpy(crycb->apcb1.aqm, aqm, 32);
2605 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2606 aqm[0], aqm[1], aqm[2], aqm[3]);
2607 memcpy(crycb->apcb1.adm, adm, 32);
2608 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2609 adm[0], adm[1], adm[2], adm[3]);
2610 break;
2611 case CRYCB_FORMAT1:
2612 case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2613 memcpy(crycb->apcb0.apm, apm, 8);
2614 memcpy(crycb->apcb0.aqm, aqm, 2);
2615 memcpy(crycb->apcb0.adm, adm, 2);
2616 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2617 apm[0], *((unsigned short *)aqm),
2618 *((unsigned short *)adm));
2619 break;
2620 default: /* Can not happen */
2621 break;
2622 }
2623
2624 /* recreate the shadow crycb for each vcpu */
2625 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2626 kvm_s390_vcpu_unblock_all(kvm);
2627 }
2628 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2629
2630 /*
2631 * kvm_arch_crypto_clear_masks
2632 *
2633 * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2634 * to be cleared.
2635 *
2636 * Clear the masks that identify the adapters, domains and control domains to
2637 * which the KVM guest is granted access.
2638 *
2639 * Note: The kvm->lock mutex must be locked by the caller before invoking this
2640 * function.
2641 */
kvm_arch_crypto_clear_masks(struct kvm * kvm)2642 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2643 {
2644 kvm_s390_vcpu_block_all(kvm);
2645
2646 memset(&kvm->arch.crypto.crycb->apcb0, 0,
2647 sizeof(kvm->arch.crypto.crycb->apcb0));
2648 memset(&kvm->arch.crypto.crycb->apcb1, 0,
2649 sizeof(kvm->arch.crypto.crycb->apcb1));
2650
2651 VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2652 /* recreate the shadow crycb for each vcpu */
2653 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2654 kvm_s390_vcpu_unblock_all(kvm);
2655 }
2656 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2657
kvm_s390_get_initial_cpuid(void)2658 static u64 kvm_s390_get_initial_cpuid(void)
2659 {
2660 struct cpuid cpuid;
2661
2662 get_cpu_id(&cpuid);
2663 cpuid.version = 0xff;
2664 return *((u64 *) &cpuid);
2665 }
2666
kvm_s390_crypto_init(struct kvm * kvm)2667 static void kvm_s390_crypto_init(struct kvm *kvm)
2668 {
2669 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2670 kvm_s390_set_crycb_format(kvm);
2671 init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
2672
2673 if (!test_kvm_facility(kvm, 76))
2674 return;
2675
2676 /* Enable AES/DEA protected key functions by default */
2677 kvm->arch.crypto.aes_kw = 1;
2678 kvm->arch.crypto.dea_kw = 1;
2679 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2680 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2681 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2682 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2683 }
2684
sca_dispose(struct kvm * kvm)2685 static void sca_dispose(struct kvm *kvm)
2686 {
2687 if (kvm->arch.use_esca)
2688 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2689 else
2690 free_page((unsigned long)(kvm->arch.sca));
2691 kvm->arch.sca = NULL;
2692 }
2693
kvm_arch_init_vm(struct kvm * kvm,unsigned long type)2694 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2695 {
2696 gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
2697 int i, rc;
2698 char debug_name[16];
2699 static unsigned long sca_offset;
2700
2701 rc = -EINVAL;
2702 #ifdef CONFIG_KVM_S390_UCONTROL
2703 if (type & ~KVM_VM_S390_UCONTROL)
2704 goto out_err;
2705 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2706 goto out_err;
2707 #else
2708 if (type)
2709 goto out_err;
2710 #endif
2711
2712 rc = s390_enable_sie();
2713 if (rc)
2714 goto out_err;
2715
2716 rc = -ENOMEM;
2717
2718 if (!sclp.has_64bscao)
2719 alloc_flags |= GFP_DMA;
2720 rwlock_init(&kvm->arch.sca_lock);
2721 /* start with basic SCA */
2722 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2723 if (!kvm->arch.sca)
2724 goto out_err;
2725 mutex_lock(&kvm_lock);
2726 sca_offset += 16;
2727 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2728 sca_offset = 0;
2729 kvm->arch.sca = (struct bsca_block *)
2730 ((char *) kvm->arch.sca + sca_offset);
2731 mutex_unlock(&kvm_lock);
2732
2733 sprintf(debug_name, "kvm-%u", current->pid);
2734
2735 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2736 if (!kvm->arch.dbf)
2737 goto out_err;
2738
2739 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2740 kvm->arch.sie_page2 =
2741 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
2742 if (!kvm->arch.sie_page2)
2743 goto out_err;
2744
2745 kvm->arch.sie_page2->kvm = kvm;
2746 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2747
2748 for (i = 0; i < kvm_s390_fac_size(); i++) {
2749 kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
2750 (kvm_s390_fac_base[i] |
2751 kvm_s390_fac_ext[i]);
2752 kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
2753 kvm_s390_fac_base[i];
2754 }
2755 kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2756
2757 /* we are always in czam mode - even on pre z14 machines */
2758 set_kvm_facility(kvm->arch.model.fac_mask, 138);
2759 set_kvm_facility(kvm->arch.model.fac_list, 138);
2760 /* we emulate STHYI in kvm */
2761 set_kvm_facility(kvm->arch.model.fac_mask, 74);
2762 set_kvm_facility(kvm->arch.model.fac_list, 74);
2763 if (MACHINE_HAS_TLB_GUEST) {
2764 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2765 set_kvm_facility(kvm->arch.model.fac_list, 147);
2766 }
2767
2768 if (css_general_characteristics.aiv && test_facility(65))
2769 set_kvm_facility(kvm->arch.model.fac_mask, 65);
2770
2771 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2772 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2773
2774 kvm_s390_crypto_init(kvm);
2775
2776 mutex_init(&kvm->arch.float_int.ais_lock);
2777 spin_lock_init(&kvm->arch.float_int.lock);
2778 for (i = 0; i < FIRQ_LIST_COUNT; i++)
2779 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2780 init_waitqueue_head(&kvm->arch.ipte_wq);
2781 mutex_init(&kvm->arch.ipte_mutex);
2782
2783 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2784 VM_EVENT(kvm, 3, "vm created with type %lu", type);
2785
2786 if (type & KVM_VM_S390_UCONTROL) {
2787 kvm->arch.gmap = NULL;
2788 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2789 } else {
2790 if (sclp.hamax == U64_MAX)
2791 kvm->arch.mem_limit = TASK_SIZE_MAX;
2792 else
2793 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2794 sclp.hamax + 1);
2795 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2796 if (!kvm->arch.gmap)
2797 goto out_err;
2798 kvm->arch.gmap->private = kvm;
2799 kvm->arch.gmap->pfault_enabled = 0;
2800 }
2801
2802 kvm->arch.use_pfmfi = sclp.has_pfmfi;
2803 kvm->arch.use_skf = sclp.has_skey;
2804 spin_lock_init(&kvm->arch.start_stop_lock);
2805 kvm_s390_vsie_init(kvm);
2806 if (use_gisa)
2807 kvm_s390_gisa_init(kvm);
2808 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2809
2810 return 0;
2811 out_err:
2812 free_page((unsigned long)kvm->arch.sie_page2);
2813 debug_unregister(kvm->arch.dbf);
2814 sca_dispose(kvm);
2815 KVM_EVENT(3, "creation of vm failed: %d", rc);
2816 return rc;
2817 }
2818
kvm_arch_vcpu_destroy(struct kvm_vcpu * vcpu)2819 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2820 {
2821 u16 rc, rrc;
2822
2823 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2824 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2825 kvm_s390_clear_local_irqs(vcpu);
2826 kvm_clear_async_pf_completion_queue(vcpu);
2827 if (!kvm_is_ucontrol(vcpu->kvm))
2828 sca_del_vcpu(vcpu);
2829
2830 if (kvm_is_ucontrol(vcpu->kvm))
2831 gmap_remove(vcpu->arch.gmap);
2832
2833 if (vcpu->kvm->arch.use_cmma)
2834 kvm_s390_vcpu_unsetup_cmma(vcpu);
2835 /* We can not hold the vcpu mutex here, we are already dying */
2836 if (kvm_s390_pv_cpu_get_handle(vcpu))
2837 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2838 free_page((unsigned long)(vcpu->arch.sie_block));
2839 }
2840
kvm_free_vcpus(struct kvm * kvm)2841 static void kvm_free_vcpus(struct kvm *kvm)
2842 {
2843 unsigned int i;
2844 struct kvm_vcpu *vcpu;
2845
2846 kvm_for_each_vcpu(i, vcpu, kvm)
2847 kvm_vcpu_destroy(vcpu);
2848
2849 mutex_lock(&kvm->lock);
2850 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2851 kvm->vcpus[i] = NULL;
2852
2853 atomic_set(&kvm->online_vcpus, 0);
2854 mutex_unlock(&kvm->lock);
2855 }
2856
kvm_arch_destroy_vm(struct kvm * kvm)2857 void kvm_arch_destroy_vm(struct kvm *kvm)
2858 {
2859 u16 rc, rrc;
2860
2861 kvm_free_vcpus(kvm);
2862 sca_dispose(kvm);
2863 kvm_s390_gisa_destroy(kvm);
2864 /*
2865 * We are already at the end of life and kvm->lock is not taken.
2866 * This is ok as the file descriptor is closed by now and nobody
2867 * can mess with the pv state. To avoid lockdep_assert_held from
2868 * complaining we do not use kvm_s390_pv_is_protected.
2869 */
2870 if (kvm_s390_pv_get_handle(kvm))
2871 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2872 debug_unregister(kvm->arch.dbf);
2873 free_page((unsigned long)kvm->arch.sie_page2);
2874 if (!kvm_is_ucontrol(kvm))
2875 gmap_remove(kvm->arch.gmap);
2876 kvm_s390_destroy_adapters(kvm);
2877 kvm_s390_clear_float_irqs(kvm);
2878 kvm_s390_vsie_destroy(kvm);
2879 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2880 }
2881
2882 /* Section: vcpu related */
__kvm_ucontrol_vcpu_init(struct kvm_vcpu * vcpu)2883 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2884 {
2885 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2886 if (!vcpu->arch.gmap)
2887 return -ENOMEM;
2888 vcpu->arch.gmap->private = vcpu->kvm;
2889
2890 return 0;
2891 }
2892
sca_del_vcpu(struct kvm_vcpu * vcpu)2893 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2894 {
2895 if (!kvm_s390_use_sca_entries())
2896 return;
2897 read_lock(&vcpu->kvm->arch.sca_lock);
2898 if (vcpu->kvm->arch.use_esca) {
2899 struct esca_block *sca = vcpu->kvm->arch.sca;
2900
2901 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2902 sca->cpu[vcpu->vcpu_id].sda = 0;
2903 } else {
2904 struct bsca_block *sca = vcpu->kvm->arch.sca;
2905
2906 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2907 sca->cpu[vcpu->vcpu_id].sda = 0;
2908 }
2909 read_unlock(&vcpu->kvm->arch.sca_lock);
2910 }
2911
sca_add_vcpu(struct kvm_vcpu * vcpu)2912 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2913 {
2914 if (!kvm_s390_use_sca_entries()) {
2915 struct bsca_block *sca = vcpu->kvm->arch.sca;
2916
2917 /* we still need the basic sca for the ipte control */
2918 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2919 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2920 return;
2921 }
2922 read_lock(&vcpu->kvm->arch.sca_lock);
2923 if (vcpu->kvm->arch.use_esca) {
2924 struct esca_block *sca = vcpu->kvm->arch.sca;
2925
2926 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2927 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2928 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2929 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2930 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2931 } else {
2932 struct bsca_block *sca = vcpu->kvm->arch.sca;
2933
2934 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2935 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2936 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2937 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2938 }
2939 read_unlock(&vcpu->kvm->arch.sca_lock);
2940 }
2941
2942 /* Basic SCA to Extended SCA data copy routines */
sca_copy_entry(struct esca_entry * d,struct bsca_entry * s)2943 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2944 {
2945 d->sda = s->sda;
2946 d->sigp_ctrl.c = s->sigp_ctrl.c;
2947 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2948 }
2949
sca_copy_b_to_e(struct esca_block * d,struct bsca_block * s)2950 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2951 {
2952 int i;
2953
2954 d->ipte_control = s->ipte_control;
2955 d->mcn[0] = s->mcn;
2956 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2957 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2958 }
2959
sca_switch_to_extended(struct kvm * kvm)2960 static int sca_switch_to_extended(struct kvm *kvm)
2961 {
2962 struct bsca_block *old_sca = kvm->arch.sca;
2963 struct esca_block *new_sca;
2964 struct kvm_vcpu *vcpu;
2965 unsigned int vcpu_idx;
2966 u32 scaol, scaoh;
2967
2968 if (kvm->arch.use_esca)
2969 return 0;
2970
2971 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
2972 if (!new_sca)
2973 return -ENOMEM;
2974
2975 scaoh = (u32)((u64)(new_sca) >> 32);
2976 scaol = (u32)(u64)(new_sca) & ~0x3fU;
2977
2978 kvm_s390_vcpu_block_all(kvm);
2979 write_lock(&kvm->arch.sca_lock);
2980
2981 sca_copy_b_to_e(new_sca, old_sca);
2982
2983 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2984 vcpu->arch.sie_block->scaoh = scaoh;
2985 vcpu->arch.sie_block->scaol = scaol;
2986 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2987 }
2988 kvm->arch.sca = new_sca;
2989 kvm->arch.use_esca = 1;
2990
2991 write_unlock(&kvm->arch.sca_lock);
2992 kvm_s390_vcpu_unblock_all(kvm);
2993
2994 free_page((unsigned long)old_sca);
2995
2996 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2997 old_sca, kvm->arch.sca);
2998 return 0;
2999 }
3000
sca_can_add_vcpu(struct kvm * kvm,unsigned int id)3001 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
3002 {
3003 int rc;
3004
3005 if (!kvm_s390_use_sca_entries()) {
3006 if (id < KVM_MAX_VCPUS)
3007 return true;
3008 return false;
3009 }
3010 if (id < KVM_S390_BSCA_CPU_SLOTS)
3011 return true;
3012 if (!sclp.has_esca || !sclp.has_64bscao)
3013 return false;
3014
3015 mutex_lock(&kvm->lock);
3016 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
3017 mutex_unlock(&kvm->lock);
3018
3019 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
3020 }
3021
3022 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__start_cpu_timer_accounting(struct kvm_vcpu * vcpu)3023 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3024 {
3025 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
3026 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3027 vcpu->arch.cputm_start = get_tod_clock_fast();
3028 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3029 }
3030
3031 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__stop_cpu_timer_accounting(struct kvm_vcpu * vcpu)3032 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3033 {
3034 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
3035 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3036 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3037 vcpu->arch.cputm_start = 0;
3038 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3039 }
3040
3041 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__enable_cpu_timer_accounting(struct kvm_vcpu * vcpu)3042 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3043 {
3044 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
3045 vcpu->arch.cputm_enabled = true;
3046 __start_cpu_timer_accounting(vcpu);
3047 }
3048
3049 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__disable_cpu_timer_accounting(struct kvm_vcpu * vcpu)3050 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3051 {
3052 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3053 __stop_cpu_timer_accounting(vcpu);
3054 vcpu->arch.cputm_enabled = false;
3055 }
3056
enable_cpu_timer_accounting(struct kvm_vcpu * vcpu)3057 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3058 {
3059 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3060 __enable_cpu_timer_accounting(vcpu);
3061 preempt_enable();
3062 }
3063
disable_cpu_timer_accounting(struct kvm_vcpu * vcpu)3064 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3065 {
3066 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3067 __disable_cpu_timer_accounting(vcpu);
3068 preempt_enable();
3069 }
3070
3071 /* set the cpu timer - may only be called from the VCPU thread itself */
kvm_s390_set_cpu_timer(struct kvm_vcpu * vcpu,__u64 cputm)3072 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3073 {
3074 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3075 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3076 if (vcpu->arch.cputm_enabled)
3077 vcpu->arch.cputm_start = get_tod_clock_fast();
3078 vcpu->arch.sie_block->cputm = cputm;
3079 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3080 preempt_enable();
3081 }
3082
3083 /* update and get the cpu timer - can also be called from other VCPU threads */
kvm_s390_get_cpu_timer(struct kvm_vcpu * vcpu)3084 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3085 {
3086 unsigned int seq;
3087 __u64 value;
3088
3089 if (unlikely(!vcpu->arch.cputm_enabled))
3090 return vcpu->arch.sie_block->cputm;
3091
3092 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3093 do {
3094 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3095 /*
3096 * If the writer would ever execute a read in the critical
3097 * section, e.g. in irq context, we have a deadlock.
3098 */
3099 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3100 value = vcpu->arch.sie_block->cputm;
3101 /* if cputm_start is 0, accounting is being started/stopped */
3102 if (likely(vcpu->arch.cputm_start))
3103 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3104 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3105 preempt_enable();
3106 return value;
3107 }
3108
kvm_arch_vcpu_load(struct kvm_vcpu * vcpu,int cpu)3109 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3110 {
3111
3112 gmap_enable(vcpu->arch.enabled_gmap);
3113 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3114 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3115 __start_cpu_timer_accounting(vcpu);
3116 vcpu->cpu = cpu;
3117 }
3118
kvm_arch_vcpu_put(struct kvm_vcpu * vcpu)3119 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3120 {
3121 vcpu->cpu = -1;
3122 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3123 __stop_cpu_timer_accounting(vcpu);
3124 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3125 vcpu->arch.enabled_gmap = gmap_get_enabled();
3126 gmap_disable(vcpu->arch.enabled_gmap);
3127
3128 }
3129
kvm_arch_vcpu_postcreate(struct kvm_vcpu * vcpu)3130 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3131 {
3132 mutex_lock(&vcpu->kvm->lock);
3133 preempt_disable();
3134 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3135 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3136 preempt_enable();
3137 mutex_unlock(&vcpu->kvm->lock);
3138 if (!kvm_is_ucontrol(vcpu->kvm)) {
3139 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3140 sca_add_vcpu(vcpu);
3141 }
3142 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3143 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3144 /* make vcpu_load load the right gmap on the first trigger */
3145 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3146 }
3147
kvm_has_pckmo_subfunc(struct kvm * kvm,unsigned long nr)3148 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3149 {
3150 if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3151 test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3152 return true;
3153 return false;
3154 }
3155
kvm_has_pckmo_ecc(struct kvm * kvm)3156 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3157 {
3158 /* At least one ECC subfunction must be present */
3159 return kvm_has_pckmo_subfunc(kvm, 32) ||
3160 kvm_has_pckmo_subfunc(kvm, 33) ||
3161 kvm_has_pckmo_subfunc(kvm, 34) ||
3162 kvm_has_pckmo_subfunc(kvm, 40) ||
3163 kvm_has_pckmo_subfunc(kvm, 41);
3164
3165 }
3166
kvm_s390_vcpu_crypto_setup(struct kvm_vcpu * vcpu)3167 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3168 {
3169 /*
3170 * If the AP instructions are not being interpreted and the MSAX3
3171 * facility is not configured for the guest, there is nothing to set up.
3172 */
3173 if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3174 return;
3175
3176 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3177 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3178 vcpu->arch.sie_block->eca &= ~ECA_APIE;
3179 vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3180
3181 if (vcpu->kvm->arch.crypto.apie)
3182 vcpu->arch.sie_block->eca |= ECA_APIE;
3183
3184 /* Set up protected key support */
3185 if (vcpu->kvm->arch.crypto.aes_kw) {
3186 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3187 /* ecc is also wrapped with AES key */
3188 if (kvm_has_pckmo_ecc(vcpu->kvm))
3189 vcpu->arch.sie_block->ecd |= ECD_ECC;
3190 }
3191
3192 if (vcpu->kvm->arch.crypto.dea_kw)
3193 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3194 }
3195
kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu * vcpu)3196 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3197 {
3198 free_page(vcpu->arch.sie_block->cbrlo);
3199 vcpu->arch.sie_block->cbrlo = 0;
3200 }
3201
kvm_s390_vcpu_setup_cmma(struct kvm_vcpu * vcpu)3202 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3203 {
3204 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
3205 if (!vcpu->arch.sie_block->cbrlo)
3206 return -ENOMEM;
3207 return 0;
3208 }
3209
kvm_s390_vcpu_setup_model(struct kvm_vcpu * vcpu)3210 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3211 {
3212 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3213
3214 vcpu->arch.sie_block->ibc = model->ibc;
3215 if (test_kvm_facility(vcpu->kvm, 7))
3216 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3217 }
3218
kvm_s390_vcpu_setup(struct kvm_vcpu * vcpu)3219 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3220 {
3221 int rc = 0;
3222 u16 uvrc, uvrrc;
3223
3224 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3225 CPUSTAT_SM |
3226 CPUSTAT_STOPPED);
3227
3228 if (test_kvm_facility(vcpu->kvm, 78))
3229 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3230 else if (test_kvm_facility(vcpu->kvm, 8))
3231 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3232
3233 kvm_s390_vcpu_setup_model(vcpu);
3234
3235 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3236 if (MACHINE_HAS_ESOP)
3237 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3238 if (test_kvm_facility(vcpu->kvm, 9))
3239 vcpu->arch.sie_block->ecb |= ECB_SRSI;
3240 if (test_kvm_facility(vcpu->kvm, 73))
3241 vcpu->arch.sie_block->ecb |= ECB_TE;
3242 if (!kvm_is_ucontrol(vcpu->kvm))
3243 vcpu->arch.sie_block->ecb |= ECB_SPECI;
3244
3245 if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3246 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3247 if (test_kvm_facility(vcpu->kvm, 130))
3248 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3249 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3250 if (sclp.has_cei)
3251 vcpu->arch.sie_block->eca |= ECA_CEI;
3252 if (sclp.has_ib)
3253 vcpu->arch.sie_block->eca |= ECA_IB;
3254 if (sclp.has_siif)
3255 vcpu->arch.sie_block->eca |= ECA_SII;
3256 if (sclp.has_sigpif)
3257 vcpu->arch.sie_block->eca |= ECA_SIGPI;
3258 if (test_kvm_facility(vcpu->kvm, 129)) {
3259 vcpu->arch.sie_block->eca |= ECA_VX;
3260 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3261 }
3262 if (test_kvm_facility(vcpu->kvm, 139))
3263 vcpu->arch.sie_block->ecd |= ECD_MEF;
3264 if (test_kvm_facility(vcpu->kvm, 156))
3265 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3266 if (vcpu->arch.sie_block->gd) {
3267 vcpu->arch.sie_block->eca |= ECA_AIV;
3268 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3269 vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3270 }
3271 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3272 | SDNXC;
3273 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3274
3275 if (sclp.has_kss)
3276 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3277 else
3278 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3279
3280 if (vcpu->kvm->arch.use_cmma) {
3281 rc = kvm_s390_vcpu_setup_cmma(vcpu);
3282 if (rc)
3283 return rc;
3284 }
3285 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3286 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3287
3288 vcpu->arch.sie_block->hpid = HPID_KVM;
3289
3290 kvm_s390_vcpu_crypto_setup(vcpu);
3291
3292 mutex_lock(&vcpu->kvm->lock);
3293 if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3294 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3295 if (rc)
3296 kvm_s390_vcpu_unsetup_cmma(vcpu);
3297 }
3298 mutex_unlock(&vcpu->kvm->lock);
3299
3300 return rc;
3301 }
3302
kvm_arch_vcpu_precreate(struct kvm * kvm,unsigned int id)3303 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3304 {
3305 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3306 return -EINVAL;
3307 return 0;
3308 }
3309
kvm_arch_vcpu_create(struct kvm_vcpu * vcpu)3310 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3311 {
3312 struct sie_page *sie_page;
3313 int rc;
3314
3315 BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3316 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3317 if (!sie_page)
3318 return -ENOMEM;
3319
3320 vcpu->arch.sie_block = &sie_page->sie_block;
3321 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3322
3323 /* the real guest size will always be smaller than msl */
3324 vcpu->arch.sie_block->mso = 0;
3325 vcpu->arch.sie_block->msl = sclp.hamax;
3326
3327 vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3328 spin_lock_init(&vcpu->arch.local_int.lock);
3329 vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3330 if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3331 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3332 seqcount_init(&vcpu->arch.cputm_seqcount);
3333
3334 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3335 kvm_clear_async_pf_completion_queue(vcpu);
3336 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3337 KVM_SYNC_GPRS |
3338 KVM_SYNC_ACRS |
3339 KVM_SYNC_CRS |
3340 KVM_SYNC_ARCH0 |
3341 KVM_SYNC_PFAULT |
3342 KVM_SYNC_DIAG318;
3343 kvm_s390_set_prefix(vcpu, 0);
3344 if (test_kvm_facility(vcpu->kvm, 64))
3345 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3346 if (test_kvm_facility(vcpu->kvm, 82))
3347 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3348 if (test_kvm_facility(vcpu->kvm, 133))
3349 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3350 if (test_kvm_facility(vcpu->kvm, 156))
3351 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3352 /* fprs can be synchronized via vrs, even if the guest has no vx. With
3353 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3354 */
3355 if (MACHINE_HAS_VX)
3356 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3357 else
3358 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3359
3360 if (kvm_is_ucontrol(vcpu->kvm)) {
3361 rc = __kvm_ucontrol_vcpu_init(vcpu);
3362 if (rc)
3363 goto out_free_sie_block;
3364 }
3365
3366 VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3367 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3368 trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3369
3370 rc = kvm_s390_vcpu_setup(vcpu);
3371 if (rc)
3372 goto out_ucontrol_uninit;
3373 return 0;
3374
3375 out_ucontrol_uninit:
3376 if (kvm_is_ucontrol(vcpu->kvm))
3377 gmap_remove(vcpu->arch.gmap);
3378 out_free_sie_block:
3379 free_page((unsigned long)(vcpu->arch.sie_block));
3380 return rc;
3381 }
3382
kvm_arch_vcpu_runnable(struct kvm_vcpu * vcpu)3383 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3384 {
3385 clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
3386 return kvm_s390_vcpu_has_irq(vcpu, 0);
3387 }
3388
kvm_arch_vcpu_in_kernel(struct kvm_vcpu * vcpu)3389 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3390 {
3391 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3392 }
3393
kvm_s390_vcpu_block(struct kvm_vcpu * vcpu)3394 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3395 {
3396 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3397 exit_sie(vcpu);
3398 }
3399
kvm_s390_vcpu_unblock(struct kvm_vcpu * vcpu)3400 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3401 {
3402 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3403 }
3404
kvm_s390_vcpu_request(struct kvm_vcpu * vcpu)3405 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3406 {
3407 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3408 exit_sie(vcpu);
3409 }
3410
kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu * vcpu)3411 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3412 {
3413 return atomic_read(&vcpu->arch.sie_block->prog20) &
3414 (PROG_BLOCK_SIE | PROG_REQUEST);
3415 }
3416
kvm_s390_vcpu_request_handled(struct kvm_vcpu * vcpu)3417 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3418 {
3419 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3420 }
3421
3422 /*
3423 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3424 * If the CPU is not running (e.g. waiting as idle) the function will
3425 * return immediately. */
exit_sie(struct kvm_vcpu * vcpu)3426 void exit_sie(struct kvm_vcpu *vcpu)
3427 {
3428 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3429 kvm_s390_vsie_kick(vcpu);
3430 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3431 cpu_relax();
3432 }
3433
3434 /* Kick a guest cpu out of SIE to process a request synchronously */
kvm_s390_sync_request(int req,struct kvm_vcpu * vcpu)3435 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3436 {
3437 kvm_make_request(req, vcpu);
3438 kvm_s390_vcpu_request(vcpu);
3439 }
3440
kvm_gmap_notifier(struct gmap * gmap,unsigned long start,unsigned long end)3441 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3442 unsigned long end)
3443 {
3444 struct kvm *kvm = gmap->private;
3445 struct kvm_vcpu *vcpu;
3446 unsigned long prefix;
3447 int i;
3448
3449 if (gmap_is_shadow(gmap))
3450 return;
3451 if (start >= 1UL << 31)
3452 /* We are only interested in prefix pages */
3453 return;
3454 kvm_for_each_vcpu(i, vcpu, kvm) {
3455 /* match against both prefix pages */
3456 prefix = kvm_s390_get_prefix(vcpu);
3457 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3458 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3459 start, end);
3460 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3461 }
3462 }
3463 }
3464
kvm_arch_no_poll(struct kvm_vcpu * vcpu)3465 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3466 {
3467 /* do not poll with more than halt_poll_max_steal percent of steal time */
3468 if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3469 READ_ONCE(halt_poll_max_steal)) {
3470 vcpu->stat.halt_no_poll_steal++;
3471 return true;
3472 }
3473 return false;
3474 }
3475
kvm_arch_vcpu_should_kick(struct kvm_vcpu * vcpu)3476 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3477 {
3478 /* kvm common code refers to this, but never calls it */
3479 BUG();
3480 return 0;
3481 }
3482
kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu * vcpu,struct kvm_one_reg * reg)3483 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3484 struct kvm_one_reg *reg)
3485 {
3486 int r = -EINVAL;
3487
3488 switch (reg->id) {
3489 case KVM_REG_S390_TODPR:
3490 r = put_user(vcpu->arch.sie_block->todpr,
3491 (u32 __user *)reg->addr);
3492 break;
3493 case KVM_REG_S390_EPOCHDIFF:
3494 r = put_user(vcpu->arch.sie_block->epoch,
3495 (u64 __user *)reg->addr);
3496 break;
3497 case KVM_REG_S390_CPU_TIMER:
3498 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3499 (u64 __user *)reg->addr);
3500 break;
3501 case KVM_REG_S390_CLOCK_COMP:
3502 r = put_user(vcpu->arch.sie_block->ckc,
3503 (u64 __user *)reg->addr);
3504 break;
3505 case KVM_REG_S390_PFTOKEN:
3506 r = put_user(vcpu->arch.pfault_token,
3507 (u64 __user *)reg->addr);
3508 break;
3509 case KVM_REG_S390_PFCOMPARE:
3510 r = put_user(vcpu->arch.pfault_compare,
3511 (u64 __user *)reg->addr);
3512 break;
3513 case KVM_REG_S390_PFSELECT:
3514 r = put_user(vcpu->arch.pfault_select,
3515 (u64 __user *)reg->addr);
3516 break;
3517 case KVM_REG_S390_PP:
3518 r = put_user(vcpu->arch.sie_block->pp,
3519 (u64 __user *)reg->addr);
3520 break;
3521 case KVM_REG_S390_GBEA:
3522 r = put_user(vcpu->arch.sie_block->gbea,
3523 (u64 __user *)reg->addr);
3524 break;
3525 default:
3526 break;
3527 }
3528
3529 return r;
3530 }
3531
kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu * vcpu,struct kvm_one_reg * reg)3532 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3533 struct kvm_one_reg *reg)
3534 {
3535 int r = -EINVAL;
3536 __u64 val;
3537
3538 switch (reg->id) {
3539 case KVM_REG_S390_TODPR:
3540 r = get_user(vcpu->arch.sie_block->todpr,
3541 (u32 __user *)reg->addr);
3542 break;
3543 case KVM_REG_S390_EPOCHDIFF:
3544 r = get_user(vcpu->arch.sie_block->epoch,
3545 (u64 __user *)reg->addr);
3546 break;
3547 case KVM_REG_S390_CPU_TIMER:
3548 r = get_user(val, (u64 __user *)reg->addr);
3549 if (!r)
3550 kvm_s390_set_cpu_timer(vcpu, val);
3551 break;
3552 case KVM_REG_S390_CLOCK_COMP:
3553 r = get_user(vcpu->arch.sie_block->ckc,
3554 (u64 __user *)reg->addr);
3555 break;
3556 case KVM_REG_S390_PFTOKEN:
3557 r = get_user(vcpu->arch.pfault_token,
3558 (u64 __user *)reg->addr);
3559 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3560 kvm_clear_async_pf_completion_queue(vcpu);
3561 break;
3562 case KVM_REG_S390_PFCOMPARE:
3563 r = get_user(vcpu->arch.pfault_compare,
3564 (u64 __user *)reg->addr);
3565 break;
3566 case KVM_REG_S390_PFSELECT:
3567 r = get_user(vcpu->arch.pfault_select,
3568 (u64 __user *)reg->addr);
3569 break;
3570 case KVM_REG_S390_PP:
3571 r = get_user(vcpu->arch.sie_block->pp,
3572 (u64 __user *)reg->addr);
3573 break;
3574 case KVM_REG_S390_GBEA:
3575 r = get_user(vcpu->arch.sie_block->gbea,
3576 (u64 __user *)reg->addr);
3577 break;
3578 default:
3579 break;
3580 }
3581
3582 return r;
3583 }
3584
kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu * vcpu)3585 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3586 {
3587 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3588 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3589 memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3590
3591 kvm_clear_async_pf_completion_queue(vcpu);
3592 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3593 kvm_s390_vcpu_stop(vcpu);
3594 kvm_s390_clear_local_irqs(vcpu);
3595 }
3596
kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu * vcpu)3597 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3598 {
3599 /* Initial reset is a superset of the normal reset */
3600 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3601
3602 /*
3603 * This equals initial cpu reset in pop, but we don't switch to ESA.
3604 * We do not only reset the internal data, but also ...
3605 */
3606 vcpu->arch.sie_block->gpsw.mask = 0;
3607 vcpu->arch.sie_block->gpsw.addr = 0;
3608 kvm_s390_set_prefix(vcpu, 0);
3609 kvm_s390_set_cpu_timer(vcpu, 0);
3610 vcpu->arch.sie_block->ckc = 0;
3611 memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3612 vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3613 vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3614
3615 /* ... the data in sync regs */
3616 memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3617 vcpu->run->s.regs.ckc = 0;
3618 vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3619 vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3620 vcpu->run->psw_addr = 0;
3621 vcpu->run->psw_mask = 0;
3622 vcpu->run->s.regs.todpr = 0;
3623 vcpu->run->s.regs.cputm = 0;
3624 vcpu->run->s.regs.ckc = 0;
3625 vcpu->run->s.regs.pp = 0;
3626 vcpu->run->s.regs.gbea = 1;
3627 vcpu->run->s.regs.fpc = 0;
3628 /*
3629 * Do not reset these registers in the protected case, as some of
3630 * them are overlayed and they are not accessible in this case
3631 * anyway.
3632 */
3633 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3634 vcpu->arch.sie_block->gbea = 1;
3635 vcpu->arch.sie_block->pp = 0;
3636 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3637 vcpu->arch.sie_block->todpr = 0;
3638 }
3639 }
3640
kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu * vcpu)3641 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3642 {
3643 struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3644
3645 /* Clear reset is a superset of the initial reset */
3646 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3647
3648 memset(®s->gprs, 0, sizeof(regs->gprs));
3649 memset(®s->vrs, 0, sizeof(regs->vrs));
3650 memset(®s->acrs, 0, sizeof(regs->acrs));
3651 memset(®s->gscb, 0, sizeof(regs->gscb));
3652
3653 regs->etoken = 0;
3654 regs->etoken_extension = 0;
3655 }
3656
kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu * vcpu,struct kvm_regs * regs)3657 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3658 {
3659 vcpu_load(vcpu);
3660 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
3661 vcpu_put(vcpu);
3662 return 0;
3663 }
3664
kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu * vcpu,struct kvm_regs * regs)3665 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3666 {
3667 vcpu_load(vcpu);
3668 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3669 vcpu_put(vcpu);
3670 return 0;
3671 }
3672
kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu * vcpu,struct kvm_sregs * sregs)3673 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3674 struct kvm_sregs *sregs)
3675 {
3676 vcpu_load(vcpu);
3677
3678 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3679 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3680
3681 vcpu_put(vcpu);
3682 return 0;
3683 }
3684
kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu * vcpu,struct kvm_sregs * sregs)3685 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3686 struct kvm_sregs *sregs)
3687 {
3688 vcpu_load(vcpu);
3689
3690 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3691 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3692
3693 vcpu_put(vcpu);
3694 return 0;
3695 }
3696
kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu * vcpu,struct kvm_fpu * fpu)3697 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3698 {
3699 int ret = 0;
3700
3701 vcpu_load(vcpu);
3702
3703 vcpu->run->s.regs.fpc = fpu->fpc;
3704 if (MACHINE_HAS_VX)
3705 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3706 (freg_t *) fpu->fprs);
3707 else
3708 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3709
3710 vcpu_put(vcpu);
3711 return ret;
3712 }
3713
kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu * vcpu,struct kvm_fpu * fpu)3714 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3715 {
3716 vcpu_load(vcpu);
3717
3718 /* make sure we have the latest values */
3719 save_fpu_regs();
3720 if (MACHINE_HAS_VX)
3721 convert_vx_to_fp((freg_t *) fpu->fprs,
3722 (__vector128 *) vcpu->run->s.regs.vrs);
3723 else
3724 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3725 fpu->fpc = vcpu->run->s.regs.fpc;
3726
3727 vcpu_put(vcpu);
3728 return 0;
3729 }
3730
kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu * vcpu,psw_t psw)3731 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3732 {
3733 int rc = 0;
3734
3735 if (!is_vcpu_stopped(vcpu))
3736 rc = -EBUSY;
3737 else {
3738 vcpu->run->psw_mask = psw.mask;
3739 vcpu->run->psw_addr = psw.addr;
3740 }
3741 return rc;
3742 }
3743
kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu * vcpu,struct kvm_translation * tr)3744 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3745 struct kvm_translation *tr)
3746 {
3747 return -EINVAL; /* not implemented yet */
3748 }
3749
3750 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3751 KVM_GUESTDBG_USE_HW_BP | \
3752 KVM_GUESTDBG_ENABLE)
3753
kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu * vcpu,struct kvm_guest_debug * dbg)3754 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3755 struct kvm_guest_debug *dbg)
3756 {
3757 int rc = 0;
3758
3759 vcpu_load(vcpu);
3760
3761 vcpu->guest_debug = 0;
3762 kvm_s390_clear_bp_data(vcpu);
3763
3764 if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3765 rc = -EINVAL;
3766 goto out;
3767 }
3768 if (!sclp.has_gpere) {
3769 rc = -EINVAL;
3770 goto out;
3771 }
3772
3773 if (dbg->control & KVM_GUESTDBG_ENABLE) {
3774 vcpu->guest_debug = dbg->control;
3775 /* enforce guest PER */
3776 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3777
3778 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3779 rc = kvm_s390_import_bp_data(vcpu, dbg);
3780 } else {
3781 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3782 vcpu->arch.guestdbg.last_bp = 0;
3783 }
3784
3785 if (rc) {
3786 vcpu->guest_debug = 0;
3787 kvm_s390_clear_bp_data(vcpu);
3788 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3789 }
3790
3791 out:
3792 vcpu_put(vcpu);
3793 return rc;
3794 }
3795
kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu * vcpu,struct kvm_mp_state * mp_state)3796 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3797 struct kvm_mp_state *mp_state)
3798 {
3799 int ret;
3800
3801 vcpu_load(vcpu);
3802
3803 /* CHECK_STOP and LOAD are not supported yet */
3804 ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3805 KVM_MP_STATE_OPERATING;
3806
3807 vcpu_put(vcpu);
3808 return ret;
3809 }
3810
kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu * vcpu,struct kvm_mp_state * mp_state)3811 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3812 struct kvm_mp_state *mp_state)
3813 {
3814 int rc = 0;
3815
3816 vcpu_load(vcpu);
3817
3818 /* user space knows about this interface - let it control the state */
3819 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3820
3821 switch (mp_state->mp_state) {
3822 case KVM_MP_STATE_STOPPED:
3823 rc = kvm_s390_vcpu_stop(vcpu);
3824 break;
3825 case KVM_MP_STATE_OPERATING:
3826 rc = kvm_s390_vcpu_start(vcpu);
3827 break;
3828 case KVM_MP_STATE_LOAD:
3829 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3830 rc = -ENXIO;
3831 break;
3832 }
3833 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3834 break;
3835 case KVM_MP_STATE_CHECK_STOP:
3836 fallthrough; /* CHECK_STOP and LOAD are not supported yet */
3837 default:
3838 rc = -ENXIO;
3839 }
3840
3841 vcpu_put(vcpu);
3842 return rc;
3843 }
3844
ibs_enabled(struct kvm_vcpu * vcpu)3845 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3846 {
3847 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3848 }
3849
kvm_s390_handle_requests(struct kvm_vcpu * vcpu)3850 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3851 {
3852 retry:
3853 kvm_s390_vcpu_request_handled(vcpu);
3854 if (!kvm_request_pending(vcpu))
3855 return 0;
3856 /*
3857 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3858 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3859 * This ensures that the ipte instruction for this request has
3860 * already finished. We might race against a second unmapper that
3861 * wants to set the blocking bit. Lets just retry the request loop.
3862 */
3863 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3864 int rc;
3865 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3866 kvm_s390_get_prefix(vcpu),
3867 PAGE_SIZE * 2, PROT_WRITE);
3868 if (rc) {
3869 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3870 return rc;
3871 }
3872 goto retry;
3873 }
3874
3875 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3876 vcpu->arch.sie_block->ihcpu = 0xffff;
3877 goto retry;
3878 }
3879
3880 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3881 if (!ibs_enabled(vcpu)) {
3882 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3883 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3884 }
3885 goto retry;
3886 }
3887
3888 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3889 if (ibs_enabled(vcpu)) {
3890 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3891 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3892 }
3893 goto retry;
3894 }
3895
3896 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3897 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3898 goto retry;
3899 }
3900
3901 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3902 /*
3903 * Disable CMM virtualization; we will emulate the ESSA
3904 * instruction manually, in order to provide additional
3905 * functionalities needed for live migration.
3906 */
3907 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3908 goto retry;
3909 }
3910
3911 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3912 /*
3913 * Re-enable CMM virtualization if CMMA is available and
3914 * CMM has been used.
3915 */
3916 if ((vcpu->kvm->arch.use_cmma) &&
3917 (vcpu->kvm->mm->context.uses_cmm))
3918 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3919 goto retry;
3920 }
3921
3922 /* nothing to do, just clear the request */
3923 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3924 /* we left the vsie handler, nothing to do, just clear the request */
3925 kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3926
3927 return 0;
3928 }
3929
__kvm_s390_set_tod_clock(struct kvm * kvm,const struct kvm_s390_vm_tod_clock * gtod)3930 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
3931 {
3932 struct kvm_vcpu *vcpu;
3933 union tod_clock clk;
3934 int i;
3935
3936 preempt_disable();
3937
3938 store_tod_clock_ext(&clk);
3939
3940 kvm->arch.epoch = gtod->tod - clk.tod;
3941 kvm->arch.epdx = 0;
3942 if (test_kvm_facility(kvm, 139)) {
3943 kvm->arch.epdx = gtod->epoch_idx - clk.ei;
3944 if (kvm->arch.epoch > gtod->tod)
3945 kvm->arch.epdx -= 1;
3946 }
3947
3948 kvm_s390_vcpu_block_all(kvm);
3949 kvm_for_each_vcpu(i, vcpu, kvm) {
3950 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3951 vcpu->arch.sie_block->epdx = kvm->arch.epdx;
3952 }
3953
3954 kvm_s390_vcpu_unblock_all(kvm);
3955 preempt_enable();
3956 }
3957
kvm_s390_try_set_tod_clock(struct kvm * kvm,const struct kvm_s390_vm_tod_clock * gtod)3958 int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
3959 {
3960 if (!mutex_trylock(&kvm->lock))
3961 return 0;
3962 __kvm_s390_set_tod_clock(kvm, gtod);
3963 mutex_unlock(&kvm->lock);
3964 return 1;
3965 }
3966
3967 /**
3968 * kvm_arch_fault_in_page - fault-in guest page if necessary
3969 * @vcpu: The corresponding virtual cpu
3970 * @gpa: Guest physical address
3971 * @writable: Whether the page should be writable or not
3972 *
3973 * Make sure that a guest page has been faulted-in on the host.
3974 *
3975 * Return: Zero on success, negative error code otherwise.
3976 */
kvm_arch_fault_in_page(struct kvm_vcpu * vcpu,gpa_t gpa,int writable)3977 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3978 {
3979 return gmap_fault(vcpu->arch.gmap, gpa,
3980 writable ? FAULT_FLAG_WRITE : 0);
3981 }
3982
__kvm_inject_pfault_token(struct kvm_vcpu * vcpu,bool start_token,unsigned long token)3983 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3984 unsigned long token)
3985 {
3986 struct kvm_s390_interrupt inti;
3987 struct kvm_s390_irq irq;
3988
3989 if (start_token) {
3990 irq.u.ext.ext_params2 = token;
3991 irq.type = KVM_S390_INT_PFAULT_INIT;
3992 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3993 } else {
3994 inti.type = KVM_S390_INT_PFAULT_DONE;
3995 inti.parm64 = token;
3996 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3997 }
3998 }
3999
kvm_arch_async_page_not_present(struct kvm_vcpu * vcpu,struct kvm_async_pf * work)4000 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
4001 struct kvm_async_pf *work)
4002 {
4003 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
4004 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
4005
4006 return true;
4007 }
4008
kvm_arch_async_page_present(struct kvm_vcpu * vcpu,struct kvm_async_pf * work)4009 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
4010 struct kvm_async_pf *work)
4011 {
4012 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
4013 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
4014 }
4015
kvm_arch_async_page_ready(struct kvm_vcpu * vcpu,struct kvm_async_pf * work)4016 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
4017 struct kvm_async_pf *work)
4018 {
4019 /* s390 will always inject the page directly */
4020 }
4021
kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu * vcpu)4022 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
4023 {
4024 /*
4025 * s390 will always inject the page directly,
4026 * but we still want check_async_completion to cleanup
4027 */
4028 return true;
4029 }
4030
kvm_arch_setup_async_pf(struct kvm_vcpu * vcpu)4031 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
4032 {
4033 hva_t hva;
4034 struct kvm_arch_async_pf arch;
4035
4036 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4037 return false;
4038 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
4039 vcpu->arch.pfault_compare)
4040 return false;
4041 if (psw_extint_disabled(vcpu))
4042 return false;
4043 if (kvm_s390_vcpu_has_irq(vcpu, 0))
4044 return false;
4045 if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
4046 return false;
4047 if (!vcpu->arch.gmap->pfault_enabled)
4048 return false;
4049
4050 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
4051 hva += current->thread.gmap_addr & ~PAGE_MASK;
4052 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4053 return false;
4054
4055 return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
4056 }
4057
vcpu_pre_run(struct kvm_vcpu * vcpu)4058 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4059 {
4060 int rc, cpuflags;
4061
4062 /*
4063 * On s390 notifications for arriving pages will be delivered directly
4064 * to the guest but the house keeping for completed pfaults is
4065 * handled outside the worker.
4066 */
4067 kvm_check_async_pf_completion(vcpu);
4068
4069 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4070 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4071
4072 if (need_resched())
4073 schedule();
4074
4075 if (!kvm_is_ucontrol(vcpu->kvm)) {
4076 rc = kvm_s390_deliver_pending_interrupts(vcpu);
4077 if (rc)
4078 return rc;
4079 }
4080
4081 rc = kvm_s390_handle_requests(vcpu);
4082 if (rc)
4083 return rc;
4084
4085 if (guestdbg_enabled(vcpu)) {
4086 kvm_s390_backup_guest_per_regs(vcpu);
4087 kvm_s390_patch_guest_per_regs(vcpu);
4088 }
4089
4090 clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
4091
4092 vcpu->arch.sie_block->icptcode = 0;
4093 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4094 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4095 trace_kvm_s390_sie_enter(vcpu, cpuflags);
4096
4097 return 0;
4098 }
4099
vcpu_post_run_fault_in_sie(struct kvm_vcpu * vcpu)4100 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4101 {
4102 struct kvm_s390_pgm_info pgm_info = {
4103 .code = PGM_ADDRESSING,
4104 };
4105 u8 opcode, ilen;
4106 int rc;
4107
4108 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4109 trace_kvm_s390_sie_fault(vcpu);
4110
4111 /*
4112 * We want to inject an addressing exception, which is defined as a
4113 * suppressing or terminating exception. However, since we came here
4114 * by a DAT access exception, the PSW still points to the faulting
4115 * instruction since DAT exceptions are nullifying. So we've got
4116 * to look up the current opcode to get the length of the instruction
4117 * to be able to forward the PSW.
4118 */
4119 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4120 ilen = insn_length(opcode);
4121 if (rc < 0) {
4122 return rc;
4123 } else if (rc) {
4124 /* Instruction-Fetching Exceptions - we can't detect the ilen.
4125 * Forward by arbitrary ilc, injection will take care of
4126 * nullification if necessary.
4127 */
4128 pgm_info = vcpu->arch.pgm;
4129 ilen = 4;
4130 }
4131 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4132 kvm_s390_forward_psw(vcpu, ilen);
4133 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4134 }
4135
vcpu_post_run(struct kvm_vcpu * vcpu,int exit_reason)4136 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4137 {
4138 struct mcck_volatile_info *mcck_info;
4139 struct sie_page *sie_page;
4140
4141 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4142 vcpu->arch.sie_block->icptcode);
4143 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4144
4145 if (guestdbg_enabled(vcpu))
4146 kvm_s390_restore_guest_per_regs(vcpu);
4147
4148 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4149 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4150
4151 if (exit_reason == -EINTR) {
4152 VCPU_EVENT(vcpu, 3, "%s", "machine check");
4153 sie_page = container_of(vcpu->arch.sie_block,
4154 struct sie_page, sie_block);
4155 mcck_info = &sie_page->mcck_info;
4156 kvm_s390_reinject_machine_check(vcpu, mcck_info);
4157 return 0;
4158 }
4159
4160 if (vcpu->arch.sie_block->icptcode > 0) {
4161 int rc = kvm_handle_sie_intercept(vcpu);
4162
4163 if (rc != -EOPNOTSUPP)
4164 return rc;
4165 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4166 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4167 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4168 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4169 return -EREMOTE;
4170 } else if (exit_reason != -EFAULT) {
4171 vcpu->stat.exit_null++;
4172 return 0;
4173 } else if (kvm_is_ucontrol(vcpu->kvm)) {
4174 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4175 vcpu->run->s390_ucontrol.trans_exc_code =
4176 current->thread.gmap_addr;
4177 vcpu->run->s390_ucontrol.pgm_code = 0x10;
4178 return -EREMOTE;
4179 } else if (current->thread.gmap_pfault) {
4180 trace_kvm_s390_major_guest_pfault(vcpu);
4181 current->thread.gmap_pfault = 0;
4182 if (kvm_arch_setup_async_pf(vcpu))
4183 return 0;
4184 vcpu->stat.pfault_sync++;
4185 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4186 }
4187 return vcpu_post_run_fault_in_sie(vcpu);
4188 }
4189
4190 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
__vcpu_run(struct kvm_vcpu * vcpu)4191 static int __vcpu_run(struct kvm_vcpu *vcpu)
4192 {
4193 int rc, exit_reason;
4194 struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4195
4196 /*
4197 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4198 * ning the guest), so that memslots (and other stuff) are protected
4199 */
4200 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4201
4202 do {
4203 rc = vcpu_pre_run(vcpu);
4204 if (rc)
4205 break;
4206
4207 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4208 /*
4209 * As PF_VCPU will be used in fault handler, between
4210 * guest_enter and guest_exit should be no uaccess.
4211 */
4212 local_irq_disable();
4213 guest_enter_irqoff();
4214 __disable_cpu_timer_accounting(vcpu);
4215 local_irq_enable();
4216 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4217 memcpy(sie_page->pv_grregs,
4218 vcpu->run->s.regs.gprs,
4219 sizeof(sie_page->pv_grregs));
4220 }
4221 if (test_cpu_flag(CIF_FPU))
4222 load_fpu_regs();
4223 exit_reason = sie64a(vcpu->arch.sie_block,
4224 vcpu->run->s.regs.gprs);
4225 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4226 memcpy(vcpu->run->s.regs.gprs,
4227 sie_page->pv_grregs,
4228 sizeof(sie_page->pv_grregs));
4229 /*
4230 * We're not allowed to inject interrupts on intercepts
4231 * that leave the guest state in an "in-between" state
4232 * where the next SIE entry will do a continuation.
4233 * Fence interrupts in our "internal" PSW.
4234 */
4235 if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4236 vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4237 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4238 }
4239 }
4240 local_irq_disable();
4241 __enable_cpu_timer_accounting(vcpu);
4242 guest_exit_irqoff();
4243 local_irq_enable();
4244 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4245
4246 rc = vcpu_post_run(vcpu, exit_reason);
4247 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4248
4249 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4250 return rc;
4251 }
4252
sync_regs_fmt2(struct kvm_vcpu * vcpu)4253 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4254 {
4255 struct kvm_run *kvm_run = vcpu->run;
4256 struct runtime_instr_cb *riccb;
4257 struct gs_cb *gscb;
4258
4259 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4260 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4261 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4262 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4263 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4264 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4265 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4266 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4267 }
4268 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4269 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4270 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4271 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4272 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4273 kvm_clear_async_pf_completion_queue(vcpu);
4274 }
4275 if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4276 vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4277 vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4278 }
4279 /*
4280 * If userspace sets the riccb (e.g. after migration) to a valid state,
4281 * we should enable RI here instead of doing the lazy enablement.
4282 */
4283 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4284 test_kvm_facility(vcpu->kvm, 64) &&
4285 riccb->v &&
4286 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4287 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4288 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4289 }
4290 /*
4291 * If userspace sets the gscb (e.g. after migration) to non-zero,
4292 * we should enable GS here instead of doing the lazy enablement.
4293 */
4294 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4295 test_kvm_facility(vcpu->kvm, 133) &&
4296 gscb->gssm &&
4297 !vcpu->arch.gs_enabled) {
4298 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4299 vcpu->arch.sie_block->ecb |= ECB_GS;
4300 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4301 vcpu->arch.gs_enabled = 1;
4302 }
4303 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4304 test_kvm_facility(vcpu->kvm, 82)) {
4305 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4306 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4307 }
4308 if (MACHINE_HAS_GS) {
4309 preempt_disable();
4310 __ctl_set_bit(2, 4);
4311 if (current->thread.gs_cb) {
4312 vcpu->arch.host_gscb = current->thread.gs_cb;
4313 save_gs_cb(vcpu->arch.host_gscb);
4314 }
4315 if (vcpu->arch.gs_enabled) {
4316 current->thread.gs_cb = (struct gs_cb *)
4317 &vcpu->run->s.regs.gscb;
4318 restore_gs_cb(current->thread.gs_cb);
4319 }
4320 preempt_enable();
4321 }
4322 /* SIE will load etoken directly from SDNX and therefore kvm_run */
4323 }
4324
sync_regs(struct kvm_vcpu * vcpu)4325 static void sync_regs(struct kvm_vcpu *vcpu)
4326 {
4327 struct kvm_run *kvm_run = vcpu->run;
4328
4329 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4330 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4331 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4332 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4333 /* some control register changes require a tlb flush */
4334 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4335 }
4336 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4337 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4338 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4339 }
4340 save_access_regs(vcpu->arch.host_acrs);
4341 restore_access_regs(vcpu->run->s.regs.acrs);
4342 /* save host (userspace) fprs/vrs */
4343 save_fpu_regs();
4344 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4345 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4346 if (MACHINE_HAS_VX)
4347 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4348 else
4349 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4350 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4351 if (test_fp_ctl(current->thread.fpu.fpc))
4352 /* User space provided an invalid FPC, let's clear it */
4353 current->thread.fpu.fpc = 0;
4354
4355 /* Sync fmt2 only data */
4356 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4357 sync_regs_fmt2(vcpu);
4358 } else {
4359 /*
4360 * In several places we have to modify our internal view to
4361 * not do things that are disallowed by the ultravisor. For
4362 * example we must not inject interrupts after specific exits
4363 * (e.g. 112 prefix page not secure). We do this by turning
4364 * off the machine check, external and I/O interrupt bits
4365 * of our PSW copy. To avoid getting validity intercepts, we
4366 * do only accept the condition code from userspace.
4367 */
4368 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4369 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4370 PSW_MASK_CC;
4371 }
4372
4373 kvm_run->kvm_dirty_regs = 0;
4374 }
4375
store_regs_fmt2(struct kvm_vcpu * vcpu)4376 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4377 {
4378 struct kvm_run *kvm_run = vcpu->run;
4379
4380 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4381 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4382 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4383 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4384 kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4385 if (MACHINE_HAS_GS) {
4386 preempt_disable();
4387 __ctl_set_bit(2, 4);
4388 if (vcpu->arch.gs_enabled)
4389 save_gs_cb(current->thread.gs_cb);
4390 current->thread.gs_cb = vcpu->arch.host_gscb;
4391 restore_gs_cb(vcpu->arch.host_gscb);
4392 if (!vcpu->arch.host_gscb)
4393 __ctl_clear_bit(2, 4);
4394 vcpu->arch.host_gscb = NULL;
4395 preempt_enable();
4396 }
4397 /* SIE will save etoken directly into SDNX and therefore kvm_run */
4398 }
4399
store_regs(struct kvm_vcpu * vcpu)4400 static void store_regs(struct kvm_vcpu *vcpu)
4401 {
4402 struct kvm_run *kvm_run = vcpu->run;
4403
4404 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4405 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4406 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4407 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4408 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4409 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4410 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4411 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4412 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4413 save_access_regs(vcpu->run->s.regs.acrs);
4414 restore_access_regs(vcpu->arch.host_acrs);
4415 /* Save guest register state */
4416 save_fpu_regs();
4417 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4418 /* Restore will be done lazily at return */
4419 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4420 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4421 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4422 store_regs_fmt2(vcpu);
4423 }
4424
kvm_arch_vcpu_ioctl_run(struct kvm_vcpu * vcpu)4425 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4426 {
4427 struct kvm_run *kvm_run = vcpu->run;
4428 int rc;
4429
4430 if (kvm_run->immediate_exit)
4431 return -EINTR;
4432
4433 if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4434 kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4435 return -EINVAL;
4436
4437 vcpu_load(vcpu);
4438
4439 if (guestdbg_exit_pending(vcpu)) {
4440 kvm_s390_prepare_debug_exit(vcpu);
4441 rc = 0;
4442 goto out;
4443 }
4444
4445 kvm_sigset_activate(vcpu);
4446
4447 /*
4448 * no need to check the return value of vcpu_start as it can only have
4449 * an error for protvirt, but protvirt means user cpu state
4450 */
4451 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4452 kvm_s390_vcpu_start(vcpu);
4453 } else if (is_vcpu_stopped(vcpu)) {
4454 pr_err_ratelimited("can't run stopped vcpu %d\n",
4455 vcpu->vcpu_id);
4456 rc = -EINVAL;
4457 goto out;
4458 }
4459
4460 sync_regs(vcpu);
4461 enable_cpu_timer_accounting(vcpu);
4462
4463 might_fault();
4464 rc = __vcpu_run(vcpu);
4465
4466 if (signal_pending(current) && !rc) {
4467 kvm_run->exit_reason = KVM_EXIT_INTR;
4468 rc = -EINTR;
4469 }
4470
4471 if (guestdbg_exit_pending(vcpu) && !rc) {
4472 kvm_s390_prepare_debug_exit(vcpu);
4473 rc = 0;
4474 }
4475
4476 if (rc == -EREMOTE) {
4477 /* userspace support is needed, kvm_run has been prepared */
4478 rc = 0;
4479 }
4480
4481 disable_cpu_timer_accounting(vcpu);
4482 store_regs(vcpu);
4483
4484 kvm_sigset_deactivate(vcpu);
4485
4486 vcpu->stat.exit_userspace++;
4487 out:
4488 vcpu_put(vcpu);
4489 return rc;
4490 }
4491
4492 /*
4493 * store status at address
4494 * we use have two special cases:
4495 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4496 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4497 */
kvm_s390_store_status_unloaded(struct kvm_vcpu * vcpu,unsigned long gpa)4498 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4499 {
4500 unsigned char archmode = 1;
4501 freg_t fprs[NUM_FPRS];
4502 unsigned int px;
4503 u64 clkcomp, cputm;
4504 int rc;
4505
4506 px = kvm_s390_get_prefix(vcpu);
4507 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4508 if (write_guest_abs(vcpu, 163, &archmode, 1))
4509 return -EFAULT;
4510 gpa = 0;
4511 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4512 if (write_guest_real(vcpu, 163, &archmode, 1))
4513 return -EFAULT;
4514 gpa = px;
4515 } else
4516 gpa -= __LC_FPREGS_SAVE_AREA;
4517
4518 /* manually convert vector registers if necessary */
4519 if (MACHINE_HAS_VX) {
4520 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4521 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4522 fprs, 128);
4523 } else {
4524 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4525 vcpu->run->s.regs.fprs, 128);
4526 }
4527 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4528 vcpu->run->s.regs.gprs, 128);
4529 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4530 &vcpu->arch.sie_block->gpsw, 16);
4531 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4532 &px, 4);
4533 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4534 &vcpu->run->s.regs.fpc, 4);
4535 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4536 &vcpu->arch.sie_block->todpr, 4);
4537 cputm = kvm_s390_get_cpu_timer(vcpu);
4538 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4539 &cputm, 8);
4540 clkcomp = vcpu->arch.sie_block->ckc >> 8;
4541 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4542 &clkcomp, 8);
4543 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4544 &vcpu->run->s.regs.acrs, 64);
4545 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4546 &vcpu->arch.sie_block->gcr, 128);
4547 return rc ? -EFAULT : 0;
4548 }
4549
kvm_s390_vcpu_store_status(struct kvm_vcpu * vcpu,unsigned long addr)4550 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4551 {
4552 /*
4553 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4554 * switch in the run ioctl. Let's update our copies before we save
4555 * it into the save area
4556 */
4557 save_fpu_regs();
4558 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4559 save_access_regs(vcpu->run->s.regs.acrs);
4560
4561 return kvm_s390_store_status_unloaded(vcpu, addr);
4562 }
4563
__disable_ibs_on_vcpu(struct kvm_vcpu * vcpu)4564 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4565 {
4566 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4567 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4568 }
4569
__disable_ibs_on_all_vcpus(struct kvm * kvm)4570 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4571 {
4572 unsigned int i;
4573 struct kvm_vcpu *vcpu;
4574
4575 kvm_for_each_vcpu(i, vcpu, kvm) {
4576 __disable_ibs_on_vcpu(vcpu);
4577 }
4578 }
4579
__enable_ibs_on_vcpu(struct kvm_vcpu * vcpu)4580 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4581 {
4582 if (!sclp.has_ibs)
4583 return;
4584 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4585 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4586 }
4587
kvm_s390_vcpu_start(struct kvm_vcpu * vcpu)4588 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4589 {
4590 int i, online_vcpus, r = 0, started_vcpus = 0;
4591
4592 if (!is_vcpu_stopped(vcpu))
4593 return 0;
4594
4595 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4596 /* Only one cpu at a time may enter/leave the STOPPED state. */
4597 spin_lock(&vcpu->kvm->arch.start_stop_lock);
4598 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4599
4600 /* Let's tell the UV that we want to change into the operating state */
4601 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4602 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4603 if (r) {
4604 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4605 return r;
4606 }
4607 }
4608
4609 for (i = 0; i < online_vcpus; i++) {
4610 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4611 started_vcpus++;
4612 }
4613
4614 if (started_vcpus == 0) {
4615 /* we're the only active VCPU -> speed it up */
4616 __enable_ibs_on_vcpu(vcpu);
4617 } else if (started_vcpus == 1) {
4618 /*
4619 * As we are starting a second VCPU, we have to disable
4620 * the IBS facility on all VCPUs to remove potentially
4621 * outstanding ENABLE requests.
4622 */
4623 __disable_ibs_on_all_vcpus(vcpu->kvm);
4624 }
4625
4626 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4627 /*
4628 * The real PSW might have changed due to a RESTART interpreted by the
4629 * ultravisor. We block all interrupts and let the next sie exit
4630 * refresh our view.
4631 */
4632 if (kvm_s390_pv_cpu_is_protected(vcpu))
4633 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4634 /*
4635 * Another VCPU might have used IBS while we were offline.
4636 * Let's play safe and flush the VCPU at startup.
4637 */
4638 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4639 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4640 return 0;
4641 }
4642
kvm_s390_vcpu_stop(struct kvm_vcpu * vcpu)4643 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4644 {
4645 int i, online_vcpus, r = 0, started_vcpus = 0;
4646 struct kvm_vcpu *started_vcpu = NULL;
4647
4648 if (is_vcpu_stopped(vcpu))
4649 return 0;
4650
4651 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4652 /* Only one cpu at a time may enter/leave the STOPPED state. */
4653 spin_lock(&vcpu->kvm->arch.start_stop_lock);
4654 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4655
4656 /* Let's tell the UV that we want to change into the stopped state */
4657 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4658 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4659 if (r) {
4660 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4661 return r;
4662 }
4663 }
4664
4665 /*
4666 * Set the VCPU to STOPPED and THEN clear the interrupt flag,
4667 * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
4668 * have been fully processed. This will ensure that the VCPU
4669 * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
4670 */
4671 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4672 kvm_s390_clear_stop_irq(vcpu);
4673
4674 __disable_ibs_on_vcpu(vcpu);
4675
4676 for (i = 0; i < online_vcpus; i++) {
4677 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4678 started_vcpus++;
4679 started_vcpu = vcpu->kvm->vcpus[i];
4680 }
4681 }
4682
4683 if (started_vcpus == 1) {
4684 /*
4685 * As we only have one VCPU left, we want to enable the
4686 * IBS facility for that VCPU to speed it up.
4687 */
4688 __enable_ibs_on_vcpu(started_vcpu);
4689 }
4690
4691 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4692 return 0;
4693 }
4694
kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu * vcpu,struct kvm_enable_cap * cap)4695 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4696 struct kvm_enable_cap *cap)
4697 {
4698 int r;
4699
4700 if (cap->flags)
4701 return -EINVAL;
4702
4703 switch (cap->cap) {
4704 case KVM_CAP_S390_CSS_SUPPORT:
4705 if (!vcpu->kvm->arch.css_support) {
4706 vcpu->kvm->arch.css_support = 1;
4707 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4708 trace_kvm_s390_enable_css(vcpu->kvm);
4709 }
4710 r = 0;
4711 break;
4712 default:
4713 r = -EINVAL;
4714 break;
4715 }
4716 return r;
4717 }
4718
kvm_s390_guest_sida_op(struct kvm_vcpu * vcpu,struct kvm_s390_mem_op * mop)4719 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4720 struct kvm_s390_mem_op *mop)
4721 {
4722 void __user *uaddr = (void __user *)mop->buf;
4723 int r = 0;
4724
4725 if (mop->flags || !mop->size)
4726 return -EINVAL;
4727 if (mop->size + mop->sida_offset < mop->size)
4728 return -EINVAL;
4729 if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4730 return -E2BIG;
4731 if (!kvm_s390_pv_cpu_is_protected(vcpu))
4732 return -EINVAL;
4733
4734 switch (mop->op) {
4735 case KVM_S390_MEMOP_SIDA_READ:
4736 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4737 mop->sida_offset), mop->size))
4738 r = -EFAULT;
4739
4740 break;
4741 case KVM_S390_MEMOP_SIDA_WRITE:
4742 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4743 mop->sida_offset), uaddr, mop->size))
4744 r = -EFAULT;
4745 break;
4746 }
4747 return r;
4748 }
kvm_s390_guest_mem_op(struct kvm_vcpu * vcpu,struct kvm_s390_mem_op * mop)4749 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4750 struct kvm_s390_mem_op *mop)
4751 {
4752 void __user *uaddr = (void __user *)mop->buf;
4753 void *tmpbuf = NULL;
4754 int r = 0;
4755 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4756 | KVM_S390_MEMOP_F_CHECK_ONLY;
4757
4758 if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4759 return -EINVAL;
4760
4761 if (mop->size > MEM_OP_MAX_SIZE)
4762 return -E2BIG;
4763
4764 if (kvm_s390_pv_cpu_is_protected(vcpu))
4765 return -EINVAL;
4766
4767 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4768 tmpbuf = vmalloc(mop->size);
4769 if (!tmpbuf)
4770 return -ENOMEM;
4771 }
4772
4773 switch (mop->op) {
4774 case KVM_S390_MEMOP_LOGICAL_READ:
4775 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4776 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4777 mop->size, GACC_FETCH);
4778 break;
4779 }
4780 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4781 if (r == 0) {
4782 if (copy_to_user(uaddr, tmpbuf, mop->size))
4783 r = -EFAULT;
4784 }
4785 break;
4786 case KVM_S390_MEMOP_LOGICAL_WRITE:
4787 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4788 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4789 mop->size, GACC_STORE);
4790 break;
4791 }
4792 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4793 r = -EFAULT;
4794 break;
4795 }
4796 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4797 break;
4798 }
4799
4800 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4801 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4802
4803 vfree(tmpbuf);
4804 return r;
4805 }
4806
kvm_s390_guest_memsida_op(struct kvm_vcpu * vcpu,struct kvm_s390_mem_op * mop)4807 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4808 struct kvm_s390_mem_op *mop)
4809 {
4810 int r, srcu_idx;
4811
4812 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4813
4814 switch (mop->op) {
4815 case KVM_S390_MEMOP_LOGICAL_READ:
4816 case KVM_S390_MEMOP_LOGICAL_WRITE:
4817 r = kvm_s390_guest_mem_op(vcpu, mop);
4818 break;
4819 case KVM_S390_MEMOP_SIDA_READ:
4820 case KVM_S390_MEMOP_SIDA_WRITE:
4821 /* we are locked against sida going away by the vcpu->mutex */
4822 r = kvm_s390_guest_sida_op(vcpu, mop);
4823 break;
4824 default:
4825 r = -EINVAL;
4826 }
4827
4828 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4829 return r;
4830 }
4831
kvm_arch_vcpu_async_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)4832 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4833 unsigned int ioctl, unsigned long arg)
4834 {
4835 struct kvm_vcpu *vcpu = filp->private_data;
4836 void __user *argp = (void __user *)arg;
4837
4838 switch (ioctl) {
4839 case KVM_S390_IRQ: {
4840 struct kvm_s390_irq s390irq;
4841
4842 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4843 return -EFAULT;
4844 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4845 }
4846 case KVM_S390_INTERRUPT: {
4847 struct kvm_s390_interrupt s390int;
4848 struct kvm_s390_irq s390irq = {};
4849
4850 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4851 return -EFAULT;
4852 if (s390int_to_s390irq(&s390int, &s390irq))
4853 return -EINVAL;
4854 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4855 }
4856 }
4857 return -ENOIOCTLCMD;
4858 }
4859
kvm_arch_vcpu_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)4860 long kvm_arch_vcpu_ioctl(struct file *filp,
4861 unsigned int ioctl, unsigned long arg)
4862 {
4863 struct kvm_vcpu *vcpu = filp->private_data;
4864 void __user *argp = (void __user *)arg;
4865 int idx;
4866 long r;
4867 u16 rc, rrc;
4868
4869 vcpu_load(vcpu);
4870
4871 switch (ioctl) {
4872 case KVM_S390_STORE_STATUS:
4873 idx = srcu_read_lock(&vcpu->kvm->srcu);
4874 r = kvm_s390_store_status_unloaded(vcpu, arg);
4875 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4876 break;
4877 case KVM_S390_SET_INITIAL_PSW: {
4878 psw_t psw;
4879
4880 r = -EFAULT;
4881 if (copy_from_user(&psw, argp, sizeof(psw)))
4882 break;
4883 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4884 break;
4885 }
4886 case KVM_S390_CLEAR_RESET:
4887 r = 0;
4888 kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4889 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4890 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4891 UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4892 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4893 rc, rrc);
4894 }
4895 break;
4896 case KVM_S390_INITIAL_RESET:
4897 r = 0;
4898 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4899 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4900 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4901 UVC_CMD_CPU_RESET_INITIAL,
4902 &rc, &rrc);
4903 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4904 rc, rrc);
4905 }
4906 break;
4907 case KVM_S390_NORMAL_RESET:
4908 r = 0;
4909 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4910 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4911 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4912 UVC_CMD_CPU_RESET, &rc, &rrc);
4913 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4914 rc, rrc);
4915 }
4916 break;
4917 case KVM_SET_ONE_REG:
4918 case KVM_GET_ONE_REG: {
4919 struct kvm_one_reg reg;
4920 r = -EINVAL;
4921 if (kvm_s390_pv_cpu_is_protected(vcpu))
4922 break;
4923 r = -EFAULT;
4924 if (copy_from_user(®, argp, sizeof(reg)))
4925 break;
4926 if (ioctl == KVM_SET_ONE_REG)
4927 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
4928 else
4929 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
4930 break;
4931 }
4932 #ifdef CONFIG_KVM_S390_UCONTROL
4933 case KVM_S390_UCAS_MAP: {
4934 struct kvm_s390_ucas_mapping ucasmap;
4935
4936 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4937 r = -EFAULT;
4938 break;
4939 }
4940
4941 if (!kvm_is_ucontrol(vcpu->kvm)) {
4942 r = -EINVAL;
4943 break;
4944 }
4945
4946 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4947 ucasmap.vcpu_addr, ucasmap.length);
4948 break;
4949 }
4950 case KVM_S390_UCAS_UNMAP: {
4951 struct kvm_s390_ucas_mapping ucasmap;
4952
4953 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4954 r = -EFAULT;
4955 break;
4956 }
4957
4958 if (!kvm_is_ucontrol(vcpu->kvm)) {
4959 r = -EINVAL;
4960 break;
4961 }
4962
4963 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4964 ucasmap.length);
4965 break;
4966 }
4967 #endif
4968 case KVM_S390_VCPU_FAULT: {
4969 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4970 break;
4971 }
4972 case KVM_ENABLE_CAP:
4973 {
4974 struct kvm_enable_cap cap;
4975 r = -EFAULT;
4976 if (copy_from_user(&cap, argp, sizeof(cap)))
4977 break;
4978 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4979 break;
4980 }
4981 case KVM_S390_MEM_OP: {
4982 struct kvm_s390_mem_op mem_op;
4983
4984 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4985 r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4986 else
4987 r = -EFAULT;
4988 break;
4989 }
4990 case KVM_S390_SET_IRQ_STATE: {
4991 struct kvm_s390_irq_state irq_state;
4992
4993 r = -EFAULT;
4994 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4995 break;
4996 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4997 irq_state.len == 0 ||
4998 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4999 r = -EINVAL;
5000 break;
5001 }
5002 /* do not use irq_state.flags, it will break old QEMUs */
5003 r = kvm_s390_set_irq_state(vcpu,
5004 (void __user *) irq_state.buf,
5005 irq_state.len);
5006 break;
5007 }
5008 case KVM_S390_GET_IRQ_STATE: {
5009 struct kvm_s390_irq_state irq_state;
5010
5011 r = -EFAULT;
5012 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5013 break;
5014 if (irq_state.len == 0) {
5015 r = -EINVAL;
5016 break;
5017 }
5018 /* do not use irq_state.flags, it will break old QEMUs */
5019 r = kvm_s390_get_irq_state(vcpu,
5020 (__u8 __user *) irq_state.buf,
5021 irq_state.len);
5022 break;
5023 }
5024 default:
5025 r = -ENOTTY;
5026 }
5027
5028 vcpu_put(vcpu);
5029 return r;
5030 }
5031
kvm_arch_vcpu_fault(struct kvm_vcpu * vcpu,struct vm_fault * vmf)5032 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
5033 {
5034 #ifdef CONFIG_KVM_S390_UCONTROL
5035 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
5036 && (kvm_is_ucontrol(vcpu->kvm))) {
5037 vmf->page = virt_to_page(vcpu->arch.sie_block);
5038 get_page(vmf->page);
5039 return 0;
5040 }
5041 #endif
5042 return VM_FAULT_SIGBUS;
5043 }
5044
5045 /* Section: memory related */
kvm_arch_prepare_memory_region(struct kvm * kvm,struct kvm_memory_slot * memslot,const struct kvm_userspace_memory_region * mem,enum kvm_mr_change change)5046 int kvm_arch_prepare_memory_region(struct kvm *kvm,
5047 struct kvm_memory_slot *memslot,
5048 const struct kvm_userspace_memory_region *mem,
5049 enum kvm_mr_change change)
5050 {
5051 /* A few sanity checks. We can have memory slots which have to be
5052 located/ended at a segment boundary (1MB). The memory in userland is
5053 ok to be fragmented into various different vmas. It is okay to mmap()
5054 and munmap() stuff in this slot after doing this call at any time */
5055
5056 if (mem->userspace_addr & 0xffffful)
5057 return -EINVAL;
5058
5059 if (mem->memory_size & 0xffffful)
5060 return -EINVAL;
5061
5062 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
5063 return -EINVAL;
5064
5065 /* When we are protected, we should not change the memory slots */
5066 if (kvm_s390_pv_get_handle(kvm))
5067 return -EINVAL;
5068
5069 if (!kvm->arch.migration_mode)
5070 return 0;
5071
5072 /*
5073 * Turn off migration mode when:
5074 * - userspace creates a new memslot with dirty logging off,
5075 * - userspace modifies an existing memslot (MOVE or FLAGS_ONLY) and
5076 * dirty logging is turned off.
5077 * Migration mode expects dirty page logging being enabled to store
5078 * its dirty bitmap.
5079 */
5080 if (change != KVM_MR_DELETE &&
5081 !(mem->flags & KVM_MEM_LOG_DIRTY_PAGES))
5082 WARN(kvm_s390_vm_stop_migration(kvm),
5083 "Failed to stop migration mode");
5084
5085 return 0;
5086 }
5087
kvm_arch_commit_memory_region(struct kvm * kvm,const struct kvm_userspace_memory_region * mem,struct kvm_memory_slot * old,const struct kvm_memory_slot * new,enum kvm_mr_change change)5088 void kvm_arch_commit_memory_region(struct kvm *kvm,
5089 const struct kvm_userspace_memory_region *mem,
5090 struct kvm_memory_slot *old,
5091 const struct kvm_memory_slot *new,
5092 enum kvm_mr_change change)
5093 {
5094 int rc = 0;
5095
5096 switch (change) {
5097 case KVM_MR_DELETE:
5098 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5099 old->npages * PAGE_SIZE);
5100 break;
5101 case KVM_MR_MOVE:
5102 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5103 old->npages * PAGE_SIZE);
5104 if (rc)
5105 break;
5106 fallthrough;
5107 case KVM_MR_CREATE:
5108 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
5109 mem->guest_phys_addr, mem->memory_size);
5110 break;
5111 case KVM_MR_FLAGS_ONLY:
5112 break;
5113 default:
5114 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5115 }
5116 if (rc)
5117 pr_warn("failed to commit memory region\n");
5118 return;
5119 }
5120
nonhyp_mask(int i)5121 static inline unsigned long nonhyp_mask(int i)
5122 {
5123 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5124
5125 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5126 }
5127
kvm_arch_vcpu_block_finish(struct kvm_vcpu * vcpu)5128 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
5129 {
5130 vcpu->valid_wakeup = false;
5131 }
5132
kvm_s390_init(void)5133 static int __init kvm_s390_init(void)
5134 {
5135 int i;
5136
5137 if (!sclp.has_sief2) {
5138 pr_info("SIE is not available\n");
5139 return -ENODEV;
5140 }
5141
5142 if (nested && hpage) {
5143 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5144 return -EINVAL;
5145 }
5146
5147 for (i = 0; i < 16; i++)
5148 kvm_s390_fac_base[i] |=
5149 stfle_fac_list[i] & nonhyp_mask(i);
5150
5151 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5152 }
5153
kvm_s390_exit(void)5154 static void __exit kvm_s390_exit(void)
5155 {
5156 kvm_exit();
5157 }
5158
5159 module_init(kvm_s390_init);
5160 module_exit(kvm_s390_exit);
5161
5162 /*
5163 * Enable autoloading of the kvm module.
5164 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5165 * since x86 takes a different approach.
5166 */
5167 #include <linux/miscdevice.h>
5168 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5169 MODULE_ALIAS("devname:kvm");
5170