1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * hosting IBM Z kernel virtual machines (s390x)
4 *
5 * Copyright IBM Corp. 2008, 2020
6 *
7 * Author(s): Carsten Otte <cotte@de.ibm.com>
8 * Christian Borntraeger <borntraeger@de.ibm.com>
9 * Heiko Carstens <heiko.carstens@de.ibm.com>
10 * Christian Ehrhardt <ehrhardt@de.ibm.com>
11 * Jason J. Herne <jjherne@us.ibm.com>
12 */
13
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
16
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 #include <linux/pgtable.h>
35
36 #include <asm/asm-offsets.h>
37 #include <asm/lowcore.h>
38 #include <asm/stp.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include <asm/ap.h>
47 #include <asm/uv.h>
48 #include <asm/fpu/api.h>
49 #include "kvm-s390.h"
50 #include "gaccess.h"
51
52 #define CREATE_TRACE_POINTS
53 #include "trace.h"
54 #include "trace-s390.h"
55
56 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
57 #define LOCAL_IRQS 32
58 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
59 (KVM_MAX_VCPUS + LOCAL_IRQS))
60
61 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
62 KVM_GENERIC_VM_STATS(),
63 STATS_DESC_COUNTER(VM, inject_io),
64 STATS_DESC_COUNTER(VM, inject_float_mchk),
65 STATS_DESC_COUNTER(VM, inject_pfault_done),
66 STATS_DESC_COUNTER(VM, inject_service_signal),
67 STATS_DESC_COUNTER(VM, inject_virtio)
68 };
69
70 const struct kvm_stats_header kvm_vm_stats_header = {
71 .name_size = KVM_STATS_NAME_SIZE,
72 .num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
73 .id_offset = sizeof(struct kvm_stats_header),
74 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
75 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
76 sizeof(kvm_vm_stats_desc),
77 };
78
79 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
80 KVM_GENERIC_VCPU_STATS(),
81 STATS_DESC_COUNTER(VCPU, exit_userspace),
82 STATS_DESC_COUNTER(VCPU, exit_null),
83 STATS_DESC_COUNTER(VCPU, exit_external_request),
84 STATS_DESC_COUNTER(VCPU, exit_io_request),
85 STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
86 STATS_DESC_COUNTER(VCPU, exit_stop_request),
87 STATS_DESC_COUNTER(VCPU, exit_validity),
88 STATS_DESC_COUNTER(VCPU, exit_instruction),
89 STATS_DESC_COUNTER(VCPU, exit_pei),
90 STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
91 STATS_DESC_COUNTER(VCPU, instruction_lctl),
92 STATS_DESC_COUNTER(VCPU, instruction_lctlg),
93 STATS_DESC_COUNTER(VCPU, instruction_stctl),
94 STATS_DESC_COUNTER(VCPU, instruction_stctg),
95 STATS_DESC_COUNTER(VCPU, exit_program_interruption),
96 STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
97 STATS_DESC_COUNTER(VCPU, exit_operation_exception),
98 STATS_DESC_COUNTER(VCPU, deliver_ckc),
99 STATS_DESC_COUNTER(VCPU, deliver_cputm),
100 STATS_DESC_COUNTER(VCPU, deliver_external_call),
101 STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
102 STATS_DESC_COUNTER(VCPU, deliver_service_signal),
103 STATS_DESC_COUNTER(VCPU, deliver_virtio),
104 STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
105 STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
106 STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
107 STATS_DESC_COUNTER(VCPU, deliver_program),
108 STATS_DESC_COUNTER(VCPU, deliver_io),
109 STATS_DESC_COUNTER(VCPU, deliver_machine_check),
110 STATS_DESC_COUNTER(VCPU, exit_wait_state),
111 STATS_DESC_COUNTER(VCPU, inject_ckc),
112 STATS_DESC_COUNTER(VCPU, inject_cputm),
113 STATS_DESC_COUNTER(VCPU, inject_external_call),
114 STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
115 STATS_DESC_COUNTER(VCPU, inject_mchk),
116 STATS_DESC_COUNTER(VCPU, inject_pfault_init),
117 STATS_DESC_COUNTER(VCPU, inject_program),
118 STATS_DESC_COUNTER(VCPU, inject_restart),
119 STATS_DESC_COUNTER(VCPU, inject_set_prefix),
120 STATS_DESC_COUNTER(VCPU, inject_stop_signal),
121 STATS_DESC_COUNTER(VCPU, instruction_epsw),
122 STATS_DESC_COUNTER(VCPU, instruction_gs),
123 STATS_DESC_COUNTER(VCPU, instruction_io_other),
124 STATS_DESC_COUNTER(VCPU, instruction_lpsw),
125 STATS_DESC_COUNTER(VCPU, instruction_lpswe),
126 STATS_DESC_COUNTER(VCPU, instruction_pfmf),
127 STATS_DESC_COUNTER(VCPU, instruction_ptff),
128 STATS_DESC_COUNTER(VCPU, instruction_sck),
129 STATS_DESC_COUNTER(VCPU, instruction_sckpf),
130 STATS_DESC_COUNTER(VCPU, instruction_stidp),
131 STATS_DESC_COUNTER(VCPU, instruction_spx),
132 STATS_DESC_COUNTER(VCPU, instruction_stpx),
133 STATS_DESC_COUNTER(VCPU, instruction_stap),
134 STATS_DESC_COUNTER(VCPU, instruction_iske),
135 STATS_DESC_COUNTER(VCPU, instruction_ri),
136 STATS_DESC_COUNTER(VCPU, instruction_rrbe),
137 STATS_DESC_COUNTER(VCPU, instruction_sske),
138 STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
139 STATS_DESC_COUNTER(VCPU, instruction_stsi),
140 STATS_DESC_COUNTER(VCPU, instruction_stfl),
141 STATS_DESC_COUNTER(VCPU, instruction_tb),
142 STATS_DESC_COUNTER(VCPU, instruction_tpi),
143 STATS_DESC_COUNTER(VCPU, instruction_tprot),
144 STATS_DESC_COUNTER(VCPU, instruction_tsch),
145 STATS_DESC_COUNTER(VCPU, instruction_sie),
146 STATS_DESC_COUNTER(VCPU, instruction_essa),
147 STATS_DESC_COUNTER(VCPU, instruction_sthyi),
148 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
149 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
150 STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
151 STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
152 STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
153 STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
154 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
155 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
156 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
157 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
158 STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
159 STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
160 STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
161 STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
162 STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
163 STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
164 STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
165 STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
166 STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
167 STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
168 STATS_DESC_COUNTER(VCPU, diag_9c_forward),
169 STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
170 STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
171 STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
172 STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
173 STATS_DESC_COUNTER(VCPU, pfault_sync)
174 };
175
176 const struct kvm_stats_header kvm_vcpu_stats_header = {
177 .name_size = KVM_STATS_NAME_SIZE,
178 .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
179 .id_offset = sizeof(struct kvm_stats_header),
180 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
181 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
182 sizeof(kvm_vcpu_stats_desc),
183 };
184
185 /* allow nested virtualization in KVM (if enabled by user space) */
186 static int nested;
187 module_param(nested, int, S_IRUGO);
188 MODULE_PARM_DESC(nested, "Nested virtualization support");
189
190 /* allow 1m huge page guest backing, if !nested */
191 static int hpage;
192 module_param(hpage, int, 0444);
193 MODULE_PARM_DESC(hpage, "1m huge page backing support");
194
195 /* maximum percentage of steal time for polling. >100 is treated like 100 */
196 static u8 halt_poll_max_steal = 10;
197 module_param(halt_poll_max_steal, byte, 0644);
198 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
199
200 /* if set to true, the GISA will be initialized and used if available */
201 static bool use_gisa = true;
202 module_param(use_gisa, bool, 0644);
203 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
204
205 /* maximum diag9c forwarding per second */
206 unsigned int diag9c_forwarding_hz;
207 module_param(diag9c_forwarding_hz, uint, 0644);
208 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
209
210 /*
211 * For now we handle at most 16 double words as this is what the s390 base
212 * kernel handles and stores in the prefix page. If we ever need to go beyond
213 * this, this requires changes to code, but the external uapi can stay.
214 */
215 #define SIZE_INTERNAL 16
216
217 /*
218 * Base feature mask that defines default mask for facilities. Consists of the
219 * defines in FACILITIES_KVM and the non-hypervisor managed bits.
220 */
221 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
222 /*
223 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
224 * and defines the facilities that can be enabled via a cpu model.
225 */
226 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
227
kvm_s390_fac_size(void)228 static unsigned long kvm_s390_fac_size(void)
229 {
230 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
231 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
232 BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
233 sizeof(stfle_fac_list));
234
235 return SIZE_INTERNAL;
236 }
237
238 /* available cpu features supported by kvm */
239 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
240 /* available subfunctions indicated via query / "test bit" */
241 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
242
243 static struct gmap_notifier gmap_notifier;
244 static struct gmap_notifier vsie_gmap_notifier;
245 debug_info_t *kvm_s390_dbf;
246 debug_info_t *kvm_s390_dbf_uv;
247
248 /* Section: not file related */
kvm_arch_hardware_enable(void)249 int kvm_arch_hardware_enable(void)
250 {
251 /* every s390 is virtualization enabled ;-) */
252 return 0;
253 }
254
kvm_arch_check_processor_compat(void * opaque)255 int kvm_arch_check_processor_compat(void *opaque)
256 {
257 return 0;
258 }
259
260 /* forward declarations */
261 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
262 unsigned long end);
263 static int sca_switch_to_extended(struct kvm *kvm);
264
kvm_clock_sync_scb(struct kvm_s390_sie_block * scb,u64 delta)265 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
266 {
267 u8 delta_idx = 0;
268
269 /*
270 * The TOD jumps by delta, we have to compensate this by adding
271 * -delta to the epoch.
272 */
273 delta = -delta;
274
275 /* sign-extension - we're adding to signed values below */
276 if ((s64)delta < 0)
277 delta_idx = -1;
278
279 scb->epoch += delta;
280 if (scb->ecd & ECD_MEF) {
281 scb->epdx += delta_idx;
282 if (scb->epoch < delta)
283 scb->epdx += 1;
284 }
285 }
286
287 /*
288 * This callback is executed during stop_machine(). All CPUs are therefore
289 * temporarily stopped. In order not to change guest behavior, we have to
290 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
291 * so a CPU won't be stopped while calculating with the epoch.
292 */
kvm_clock_sync(struct notifier_block * notifier,unsigned long val,void * v)293 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
294 void *v)
295 {
296 struct kvm *kvm;
297 struct kvm_vcpu *vcpu;
298 unsigned long i;
299 unsigned long long *delta = v;
300
301 list_for_each_entry(kvm, &vm_list, vm_list) {
302 kvm_for_each_vcpu(i, vcpu, kvm) {
303 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
304 if (i == 0) {
305 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
306 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
307 }
308 if (vcpu->arch.cputm_enabled)
309 vcpu->arch.cputm_start += *delta;
310 if (vcpu->arch.vsie_block)
311 kvm_clock_sync_scb(vcpu->arch.vsie_block,
312 *delta);
313 }
314 }
315 return NOTIFY_OK;
316 }
317
318 static struct notifier_block kvm_clock_notifier = {
319 .notifier_call = kvm_clock_sync,
320 };
321
kvm_arch_hardware_setup(void * opaque)322 int kvm_arch_hardware_setup(void *opaque)
323 {
324 gmap_notifier.notifier_call = kvm_gmap_notifier;
325 gmap_register_pte_notifier(&gmap_notifier);
326 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
327 gmap_register_pte_notifier(&vsie_gmap_notifier);
328 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
329 &kvm_clock_notifier);
330 return 0;
331 }
332
kvm_arch_hardware_unsetup(void)333 void kvm_arch_hardware_unsetup(void)
334 {
335 gmap_unregister_pte_notifier(&gmap_notifier);
336 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
337 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
338 &kvm_clock_notifier);
339 }
340
allow_cpu_feat(unsigned long nr)341 static void allow_cpu_feat(unsigned long nr)
342 {
343 set_bit_inv(nr, kvm_s390_available_cpu_feat);
344 }
345
plo_test_bit(unsigned char nr)346 static inline int plo_test_bit(unsigned char nr)
347 {
348 unsigned long function = (unsigned long)nr | 0x100;
349 int cc;
350
351 asm volatile(
352 " lgr 0,%[function]\n"
353 /* Parameter registers are ignored for "test bit" */
354 " plo 0,0,0,0(0)\n"
355 " ipm %0\n"
356 " srl %0,28\n"
357 : "=d" (cc)
358 : [function] "d" (function)
359 : "cc", "0");
360 return cc == 0;
361 }
362
__insn32_query(unsigned int opcode,u8 * query)363 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
364 {
365 asm volatile(
366 " lghi 0,0\n"
367 " lgr 1,%[query]\n"
368 /* Parameter registers are ignored */
369 " .insn rrf,%[opc] << 16,2,4,6,0\n"
370 :
371 : [query] "d" ((unsigned long)query), [opc] "i" (opcode)
372 : "cc", "memory", "0", "1");
373 }
374
375 #define INSN_SORTL 0xb938
376 #define INSN_DFLTCC 0xb939
377
kvm_s390_cpu_feat_init(void)378 static void kvm_s390_cpu_feat_init(void)
379 {
380 int i;
381
382 for (i = 0; i < 256; ++i) {
383 if (plo_test_bit(i))
384 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
385 }
386
387 if (test_facility(28)) /* TOD-clock steering */
388 ptff(kvm_s390_available_subfunc.ptff,
389 sizeof(kvm_s390_available_subfunc.ptff),
390 PTFF_QAF);
391
392 if (test_facility(17)) { /* MSA */
393 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
394 kvm_s390_available_subfunc.kmac);
395 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
396 kvm_s390_available_subfunc.kmc);
397 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
398 kvm_s390_available_subfunc.km);
399 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
400 kvm_s390_available_subfunc.kimd);
401 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
402 kvm_s390_available_subfunc.klmd);
403 }
404 if (test_facility(76)) /* MSA3 */
405 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
406 kvm_s390_available_subfunc.pckmo);
407 if (test_facility(77)) { /* MSA4 */
408 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
409 kvm_s390_available_subfunc.kmctr);
410 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
411 kvm_s390_available_subfunc.kmf);
412 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
413 kvm_s390_available_subfunc.kmo);
414 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
415 kvm_s390_available_subfunc.pcc);
416 }
417 if (test_facility(57)) /* MSA5 */
418 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
419 kvm_s390_available_subfunc.ppno);
420
421 if (test_facility(146)) /* MSA8 */
422 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
423 kvm_s390_available_subfunc.kma);
424
425 if (test_facility(155)) /* MSA9 */
426 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
427 kvm_s390_available_subfunc.kdsa);
428
429 if (test_facility(150)) /* SORTL */
430 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
431
432 if (test_facility(151)) /* DFLTCC */
433 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
434
435 if (MACHINE_HAS_ESOP)
436 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
437 /*
438 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
439 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
440 */
441 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
442 !test_facility(3) || !nested)
443 return;
444 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
445 if (sclp.has_64bscao)
446 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
447 if (sclp.has_siif)
448 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
449 if (sclp.has_gpere)
450 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
451 if (sclp.has_gsls)
452 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
453 if (sclp.has_ib)
454 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
455 if (sclp.has_cei)
456 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
457 if (sclp.has_ibs)
458 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
459 if (sclp.has_kss)
460 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
461 /*
462 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
463 * all skey handling functions read/set the skey from the PGSTE
464 * instead of the real storage key.
465 *
466 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
467 * pages being detected as preserved although they are resident.
468 *
469 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
470 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
471 *
472 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
473 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
474 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
475 *
476 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
477 * cannot easily shadow the SCA because of the ipte lock.
478 */
479 }
480
kvm_arch_init(void * opaque)481 int kvm_arch_init(void *opaque)
482 {
483 int rc = -ENOMEM;
484
485 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
486 if (!kvm_s390_dbf)
487 return -ENOMEM;
488
489 kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
490 if (!kvm_s390_dbf_uv)
491 goto out;
492
493 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
494 debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
495 goto out;
496
497 kvm_s390_cpu_feat_init();
498
499 /* Register floating interrupt controller interface. */
500 rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
501 if (rc) {
502 pr_err("A FLIC registration call failed with rc=%d\n", rc);
503 goto out;
504 }
505
506 rc = kvm_s390_gib_init(GAL_ISC);
507 if (rc)
508 goto out;
509
510 return 0;
511
512 out:
513 kvm_arch_exit();
514 return rc;
515 }
516
kvm_arch_exit(void)517 void kvm_arch_exit(void)
518 {
519 kvm_s390_gib_destroy();
520 debug_unregister(kvm_s390_dbf);
521 debug_unregister(kvm_s390_dbf_uv);
522 }
523
524 /* Section: device related */
kvm_arch_dev_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)525 long kvm_arch_dev_ioctl(struct file *filp,
526 unsigned int ioctl, unsigned long arg)
527 {
528 if (ioctl == KVM_S390_ENABLE_SIE)
529 return s390_enable_sie();
530 return -EINVAL;
531 }
532
kvm_vm_ioctl_check_extension(struct kvm * kvm,long ext)533 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
534 {
535 int r;
536
537 switch (ext) {
538 case KVM_CAP_S390_PSW:
539 case KVM_CAP_S390_GMAP:
540 case KVM_CAP_SYNC_MMU:
541 #ifdef CONFIG_KVM_S390_UCONTROL
542 case KVM_CAP_S390_UCONTROL:
543 #endif
544 case KVM_CAP_ASYNC_PF:
545 case KVM_CAP_SYNC_REGS:
546 case KVM_CAP_ONE_REG:
547 case KVM_CAP_ENABLE_CAP:
548 case KVM_CAP_S390_CSS_SUPPORT:
549 case KVM_CAP_IOEVENTFD:
550 case KVM_CAP_DEVICE_CTRL:
551 case KVM_CAP_S390_IRQCHIP:
552 case KVM_CAP_VM_ATTRIBUTES:
553 case KVM_CAP_MP_STATE:
554 case KVM_CAP_IMMEDIATE_EXIT:
555 case KVM_CAP_S390_INJECT_IRQ:
556 case KVM_CAP_S390_USER_SIGP:
557 case KVM_CAP_S390_USER_STSI:
558 case KVM_CAP_S390_SKEYS:
559 case KVM_CAP_S390_IRQ_STATE:
560 case KVM_CAP_S390_USER_INSTR0:
561 case KVM_CAP_S390_CMMA_MIGRATION:
562 case KVM_CAP_S390_AIS:
563 case KVM_CAP_S390_AIS_MIGRATION:
564 case KVM_CAP_S390_VCPU_RESETS:
565 case KVM_CAP_SET_GUEST_DEBUG:
566 case KVM_CAP_S390_DIAG318:
567 r = 1;
568 break;
569 case KVM_CAP_SET_GUEST_DEBUG2:
570 r = KVM_GUESTDBG_VALID_MASK;
571 break;
572 case KVM_CAP_S390_HPAGE_1M:
573 r = 0;
574 if (hpage && !kvm_is_ucontrol(kvm))
575 r = 1;
576 break;
577 case KVM_CAP_S390_MEM_OP:
578 r = MEM_OP_MAX_SIZE;
579 break;
580 case KVM_CAP_NR_VCPUS:
581 case KVM_CAP_MAX_VCPUS:
582 case KVM_CAP_MAX_VCPU_ID:
583 r = KVM_S390_BSCA_CPU_SLOTS;
584 if (!kvm_s390_use_sca_entries())
585 r = KVM_MAX_VCPUS;
586 else if (sclp.has_esca && sclp.has_64bscao)
587 r = KVM_S390_ESCA_CPU_SLOTS;
588 break;
589 case KVM_CAP_S390_COW:
590 r = MACHINE_HAS_ESOP;
591 break;
592 case KVM_CAP_S390_VECTOR_REGISTERS:
593 r = MACHINE_HAS_VX;
594 break;
595 case KVM_CAP_S390_RI:
596 r = test_facility(64);
597 break;
598 case KVM_CAP_S390_GS:
599 r = test_facility(133);
600 break;
601 case KVM_CAP_S390_BPB:
602 r = test_facility(82);
603 break;
604 case KVM_CAP_S390_PROTECTED:
605 r = is_prot_virt_host();
606 break;
607 default:
608 r = 0;
609 }
610 return r;
611 }
612
kvm_arch_sync_dirty_log(struct kvm * kvm,struct kvm_memory_slot * memslot)613 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
614 {
615 int i;
616 gfn_t cur_gfn, last_gfn;
617 unsigned long gaddr, vmaddr;
618 struct gmap *gmap = kvm->arch.gmap;
619 DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
620
621 /* Loop over all guest segments */
622 cur_gfn = memslot->base_gfn;
623 last_gfn = memslot->base_gfn + memslot->npages;
624 for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
625 gaddr = gfn_to_gpa(cur_gfn);
626 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
627 if (kvm_is_error_hva(vmaddr))
628 continue;
629
630 bitmap_zero(bitmap, _PAGE_ENTRIES);
631 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
632 for (i = 0; i < _PAGE_ENTRIES; i++) {
633 if (test_bit(i, bitmap))
634 mark_page_dirty(kvm, cur_gfn + i);
635 }
636
637 if (fatal_signal_pending(current))
638 return;
639 cond_resched();
640 }
641 }
642
643 /* Section: vm related */
644 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
645
646 /*
647 * Get (and clear) the dirty memory log for a memory slot.
648 */
kvm_vm_ioctl_get_dirty_log(struct kvm * kvm,struct kvm_dirty_log * log)649 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
650 struct kvm_dirty_log *log)
651 {
652 int r;
653 unsigned long n;
654 struct kvm_memory_slot *memslot;
655 int is_dirty;
656
657 if (kvm_is_ucontrol(kvm))
658 return -EINVAL;
659
660 mutex_lock(&kvm->slots_lock);
661
662 r = -EINVAL;
663 if (log->slot >= KVM_USER_MEM_SLOTS)
664 goto out;
665
666 r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
667 if (r)
668 goto out;
669
670 /* Clear the dirty log */
671 if (is_dirty) {
672 n = kvm_dirty_bitmap_bytes(memslot);
673 memset(memslot->dirty_bitmap, 0, n);
674 }
675 r = 0;
676 out:
677 mutex_unlock(&kvm->slots_lock);
678 return r;
679 }
680
icpt_operexc_on_all_vcpus(struct kvm * kvm)681 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
682 {
683 unsigned long i;
684 struct kvm_vcpu *vcpu;
685
686 kvm_for_each_vcpu(i, vcpu, kvm) {
687 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
688 }
689 }
690
kvm_vm_ioctl_enable_cap(struct kvm * kvm,struct kvm_enable_cap * cap)691 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
692 {
693 int r;
694
695 if (cap->flags)
696 return -EINVAL;
697
698 switch (cap->cap) {
699 case KVM_CAP_S390_IRQCHIP:
700 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
701 kvm->arch.use_irqchip = 1;
702 r = 0;
703 break;
704 case KVM_CAP_S390_USER_SIGP:
705 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
706 kvm->arch.user_sigp = 1;
707 r = 0;
708 break;
709 case KVM_CAP_S390_VECTOR_REGISTERS:
710 mutex_lock(&kvm->lock);
711 if (kvm->created_vcpus) {
712 r = -EBUSY;
713 } else if (MACHINE_HAS_VX) {
714 set_kvm_facility(kvm->arch.model.fac_mask, 129);
715 set_kvm_facility(kvm->arch.model.fac_list, 129);
716 if (test_facility(134)) {
717 set_kvm_facility(kvm->arch.model.fac_mask, 134);
718 set_kvm_facility(kvm->arch.model.fac_list, 134);
719 }
720 if (test_facility(135)) {
721 set_kvm_facility(kvm->arch.model.fac_mask, 135);
722 set_kvm_facility(kvm->arch.model.fac_list, 135);
723 }
724 if (test_facility(148)) {
725 set_kvm_facility(kvm->arch.model.fac_mask, 148);
726 set_kvm_facility(kvm->arch.model.fac_list, 148);
727 }
728 if (test_facility(152)) {
729 set_kvm_facility(kvm->arch.model.fac_mask, 152);
730 set_kvm_facility(kvm->arch.model.fac_list, 152);
731 }
732 if (test_facility(192)) {
733 set_kvm_facility(kvm->arch.model.fac_mask, 192);
734 set_kvm_facility(kvm->arch.model.fac_list, 192);
735 }
736 r = 0;
737 } else
738 r = -EINVAL;
739 mutex_unlock(&kvm->lock);
740 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
741 r ? "(not available)" : "(success)");
742 break;
743 case KVM_CAP_S390_RI:
744 r = -EINVAL;
745 mutex_lock(&kvm->lock);
746 if (kvm->created_vcpus) {
747 r = -EBUSY;
748 } else if (test_facility(64)) {
749 set_kvm_facility(kvm->arch.model.fac_mask, 64);
750 set_kvm_facility(kvm->arch.model.fac_list, 64);
751 r = 0;
752 }
753 mutex_unlock(&kvm->lock);
754 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
755 r ? "(not available)" : "(success)");
756 break;
757 case KVM_CAP_S390_AIS:
758 mutex_lock(&kvm->lock);
759 if (kvm->created_vcpus) {
760 r = -EBUSY;
761 } else {
762 set_kvm_facility(kvm->arch.model.fac_mask, 72);
763 set_kvm_facility(kvm->arch.model.fac_list, 72);
764 r = 0;
765 }
766 mutex_unlock(&kvm->lock);
767 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
768 r ? "(not available)" : "(success)");
769 break;
770 case KVM_CAP_S390_GS:
771 r = -EINVAL;
772 mutex_lock(&kvm->lock);
773 if (kvm->created_vcpus) {
774 r = -EBUSY;
775 } else if (test_facility(133)) {
776 set_kvm_facility(kvm->arch.model.fac_mask, 133);
777 set_kvm_facility(kvm->arch.model.fac_list, 133);
778 r = 0;
779 }
780 mutex_unlock(&kvm->lock);
781 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
782 r ? "(not available)" : "(success)");
783 break;
784 case KVM_CAP_S390_HPAGE_1M:
785 mutex_lock(&kvm->lock);
786 if (kvm->created_vcpus)
787 r = -EBUSY;
788 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
789 r = -EINVAL;
790 else {
791 r = 0;
792 mmap_write_lock(kvm->mm);
793 kvm->mm->context.allow_gmap_hpage_1m = 1;
794 mmap_write_unlock(kvm->mm);
795 /*
796 * We might have to create fake 4k page
797 * tables. To avoid that the hardware works on
798 * stale PGSTEs, we emulate these instructions.
799 */
800 kvm->arch.use_skf = 0;
801 kvm->arch.use_pfmfi = 0;
802 }
803 mutex_unlock(&kvm->lock);
804 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
805 r ? "(not available)" : "(success)");
806 break;
807 case KVM_CAP_S390_USER_STSI:
808 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
809 kvm->arch.user_stsi = 1;
810 r = 0;
811 break;
812 case KVM_CAP_S390_USER_INSTR0:
813 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
814 kvm->arch.user_instr0 = 1;
815 icpt_operexc_on_all_vcpus(kvm);
816 r = 0;
817 break;
818 default:
819 r = -EINVAL;
820 break;
821 }
822 return r;
823 }
824
kvm_s390_get_mem_control(struct kvm * kvm,struct kvm_device_attr * attr)825 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
826 {
827 int ret;
828
829 switch (attr->attr) {
830 case KVM_S390_VM_MEM_LIMIT_SIZE:
831 ret = 0;
832 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
833 kvm->arch.mem_limit);
834 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
835 ret = -EFAULT;
836 break;
837 default:
838 ret = -ENXIO;
839 break;
840 }
841 return ret;
842 }
843
kvm_s390_set_mem_control(struct kvm * kvm,struct kvm_device_attr * attr)844 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
845 {
846 int ret;
847 unsigned int idx;
848 switch (attr->attr) {
849 case KVM_S390_VM_MEM_ENABLE_CMMA:
850 ret = -ENXIO;
851 if (!sclp.has_cmma)
852 break;
853
854 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
855 mutex_lock(&kvm->lock);
856 if (kvm->created_vcpus)
857 ret = -EBUSY;
858 else if (kvm->mm->context.allow_gmap_hpage_1m)
859 ret = -EINVAL;
860 else {
861 kvm->arch.use_cmma = 1;
862 /* Not compatible with cmma. */
863 kvm->arch.use_pfmfi = 0;
864 ret = 0;
865 }
866 mutex_unlock(&kvm->lock);
867 break;
868 case KVM_S390_VM_MEM_CLR_CMMA:
869 ret = -ENXIO;
870 if (!sclp.has_cmma)
871 break;
872 ret = -EINVAL;
873 if (!kvm->arch.use_cmma)
874 break;
875
876 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
877 mutex_lock(&kvm->lock);
878 idx = srcu_read_lock(&kvm->srcu);
879 s390_reset_cmma(kvm->arch.gmap->mm);
880 srcu_read_unlock(&kvm->srcu, idx);
881 mutex_unlock(&kvm->lock);
882 ret = 0;
883 break;
884 case KVM_S390_VM_MEM_LIMIT_SIZE: {
885 unsigned long new_limit;
886
887 if (kvm_is_ucontrol(kvm))
888 return -EINVAL;
889
890 if (get_user(new_limit, (u64 __user *)attr->addr))
891 return -EFAULT;
892
893 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
894 new_limit > kvm->arch.mem_limit)
895 return -E2BIG;
896
897 if (!new_limit)
898 return -EINVAL;
899
900 /* gmap_create takes last usable address */
901 if (new_limit != KVM_S390_NO_MEM_LIMIT)
902 new_limit -= 1;
903
904 ret = -EBUSY;
905 mutex_lock(&kvm->lock);
906 if (!kvm->created_vcpus) {
907 /* gmap_create will round the limit up */
908 struct gmap *new = gmap_create(current->mm, new_limit);
909
910 if (!new) {
911 ret = -ENOMEM;
912 } else {
913 gmap_remove(kvm->arch.gmap);
914 new->private = kvm;
915 kvm->arch.gmap = new;
916 ret = 0;
917 }
918 }
919 mutex_unlock(&kvm->lock);
920 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
921 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
922 (void *) kvm->arch.gmap->asce);
923 break;
924 }
925 default:
926 ret = -ENXIO;
927 break;
928 }
929 return ret;
930 }
931
932 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
933
kvm_s390_vcpu_crypto_reset_all(struct kvm * kvm)934 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
935 {
936 struct kvm_vcpu *vcpu;
937 unsigned long i;
938
939 kvm_s390_vcpu_block_all(kvm);
940
941 kvm_for_each_vcpu(i, vcpu, kvm) {
942 kvm_s390_vcpu_crypto_setup(vcpu);
943 /* recreate the shadow crycb by leaving the VSIE handler */
944 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
945 }
946
947 kvm_s390_vcpu_unblock_all(kvm);
948 }
949
kvm_s390_vm_set_crypto(struct kvm * kvm,struct kvm_device_attr * attr)950 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
951 {
952 mutex_lock(&kvm->lock);
953 switch (attr->attr) {
954 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
955 if (!test_kvm_facility(kvm, 76)) {
956 mutex_unlock(&kvm->lock);
957 return -EINVAL;
958 }
959 get_random_bytes(
960 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
961 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
962 kvm->arch.crypto.aes_kw = 1;
963 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
964 break;
965 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
966 if (!test_kvm_facility(kvm, 76)) {
967 mutex_unlock(&kvm->lock);
968 return -EINVAL;
969 }
970 get_random_bytes(
971 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
972 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
973 kvm->arch.crypto.dea_kw = 1;
974 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
975 break;
976 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
977 if (!test_kvm_facility(kvm, 76)) {
978 mutex_unlock(&kvm->lock);
979 return -EINVAL;
980 }
981 kvm->arch.crypto.aes_kw = 0;
982 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
983 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
984 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
985 break;
986 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
987 if (!test_kvm_facility(kvm, 76)) {
988 mutex_unlock(&kvm->lock);
989 return -EINVAL;
990 }
991 kvm->arch.crypto.dea_kw = 0;
992 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
993 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
994 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
995 break;
996 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
997 if (!ap_instructions_available()) {
998 mutex_unlock(&kvm->lock);
999 return -EOPNOTSUPP;
1000 }
1001 kvm->arch.crypto.apie = 1;
1002 break;
1003 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1004 if (!ap_instructions_available()) {
1005 mutex_unlock(&kvm->lock);
1006 return -EOPNOTSUPP;
1007 }
1008 kvm->arch.crypto.apie = 0;
1009 break;
1010 default:
1011 mutex_unlock(&kvm->lock);
1012 return -ENXIO;
1013 }
1014
1015 kvm_s390_vcpu_crypto_reset_all(kvm);
1016 mutex_unlock(&kvm->lock);
1017 return 0;
1018 }
1019
kvm_s390_sync_request_broadcast(struct kvm * kvm,int req)1020 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1021 {
1022 unsigned long cx;
1023 struct kvm_vcpu *vcpu;
1024
1025 kvm_for_each_vcpu(cx, vcpu, kvm)
1026 kvm_s390_sync_request(req, vcpu);
1027 }
1028
1029 /*
1030 * Must be called with kvm->srcu held to avoid races on memslots, and with
1031 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1032 */
kvm_s390_vm_start_migration(struct kvm * kvm)1033 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1034 {
1035 struct kvm_memory_slot *ms;
1036 struct kvm_memslots *slots;
1037 unsigned long ram_pages = 0;
1038 int slotnr;
1039
1040 /* migration mode already enabled */
1041 if (kvm->arch.migration_mode)
1042 return 0;
1043 slots = kvm_memslots(kvm);
1044 if (!slots || !slots->used_slots)
1045 return -EINVAL;
1046
1047 if (!kvm->arch.use_cmma) {
1048 kvm->arch.migration_mode = 1;
1049 return 0;
1050 }
1051 /* mark all the pages in active slots as dirty */
1052 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1053 ms = slots->memslots + slotnr;
1054 if (!ms->dirty_bitmap)
1055 return -EINVAL;
1056 /*
1057 * The second half of the bitmap is only used on x86,
1058 * and would be wasted otherwise, so we put it to good
1059 * use here to keep track of the state of the storage
1060 * attributes.
1061 */
1062 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1063 ram_pages += ms->npages;
1064 }
1065 atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1066 kvm->arch.migration_mode = 1;
1067 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1068 return 0;
1069 }
1070
1071 /*
1072 * Must be called with kvm->slots_lock to avoid races with ourselves and
1073 * kvm_s390_vm_start_migration.
1074 */
kvm_s390_vm_stop_migration(struct kvm * kvm)1075 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1076 {
1077 /* migration mode already disabled */
1078 if (!kvm->arch.migration_mode)
1079 return 0;
1080 kvm->arch.migration_mode = 0;
1081 if (kvm->arch.use_cmma)
1082 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1083 return 0;
1084 }
1085
kvm_s390_vm_set_migration(struct kvm * kvm,struct kvm_device_attr * attr)1086 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1087 struct kvm_device_attr *attr)
1088 {
1089 int res = -ENXIO;
1090
1091 mutex_lock(&kvm->slots_lock);
1092 switch (attr->attr) {
1093 case KVM_S390_VM_MIGRATION_START:
1094 res = kvm_s390_vm_start_migration(kvm);
1095 break;
1096 case KVM_S390_VM_MIGRATION_STOP:
1097 res = kvm_s390_vm_stop_migration(kvm);
1098 break;
1099 default:
1100 break;
1101 }
1102 mutex_unlock(&kvm->slots_lock);
1103
1104 return res;
1105 }
1106
kvm_s390_vm_get_migration(struct kvm * kvm,struct kvm_device_attr * attr)1107 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1108 struct kvm_device_attr *attr)
1109 {
1110 u64 mig = kvm->arch.migration_mode;
1111
1112 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1113 return -ENXIO;
1114
1115 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1116 return -EFAULT;
1117 return 0;
1118 }
1119
1120 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod);
1121
kvm_s390_set_tod_ext(struct kvm * kvm,struct kvm_device_attr * attr)1122 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1123 {
1124 struct kvm_s390_vm_tod_clock gtod;
1125
1126 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
1127 return -EFAULT;
1128
1129 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1130 return -EINVAL;
1131 __kvm_s390_set_tod_clock(kvm, >od);
1132
1133 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1134 gtod.epoch_idx, gtod.tod);
1135
1136 return 0;
1137 }
1138
kvm_s390_set_tod_high(struct kvm * kvm,struct kvm_device_attr * attr)1139 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1140 {
1141 u8 gtod_high;
1142
1143 if (copy_from_user(>od_high, (void __user *)attr->addr,
1144 sizeof(gtod_high)))
1145 return -EFAULT;
1146
1147 if (gtod_high != 0)
1148 return -EINVAL;
1149 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1150
1151 return 0;
1152 }
1153
kvm_s390_set_tod_low(struct kvm * kvm,struct kvm_device_attr * attr)1154 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1155 {
1156 struct kvm_s390_vm_tod_clock gtod = { 0 };
1157
1158 if (copy_from_user(>od.tod, (void __user *)attr->addr,
1159 sizeof(gtod.tod)))
1160 return -EFAULT;
1161
1162 __kvm_s390_set_tod_clock(kvm, >od);
1163 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1164 return 0;
1165 }
1166
kvm_s390_set_tod(struct kvm * kvm,struct kvm_device_attr * attr)1167 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1168 {
1169 int ret;
1170
1171 if (attr->flags)
1172 return -EINVAL;
1173
1174 mutex_lock(&kvm->lock);
1175 /*
1176 * For protected guests, the TOD is managed by the ultravisor, so trying
1177 * to change it will never bring the expected results.
1178 */
1179 if (kvm_s390_pv_is_protected(kvm)) {
1180 ret = -EOPNOTSUPP;
1181 goto out_unlock;
1182 }
1183
1184 switch (attr->attr) {
1185 case KVM_S390_VM_TOD_EXT:
1186 ret = kvm_s390_set_tod_ext(kvm, attr);
1187 break;
1188 case KVM_S390_VM_TOD_HIGH:
1189 ret = kvm_s390_set_tod_high(kvm, attr);
1190 break;
1191 case KVM_S390_VM_TOD_LOW:
1192 ret = kvm_s390_set_tod_low(kvm, attr);
1193 break;
1194 default:
1195 ret = -ENXIO;
1196 break;
1197 }
1198
1199 out_unlock:
1200 mutex_unlock(&kvm->lock);
1201 return ret;
1202 }
1203
kvm_s390_get_tod_clock(struct kvm * kvm,struct kvm_s390_vm_tod_clock * gtod)1204 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1205 struct kvm_s390_vm_tod_clock *gtod)
1206 {
1207 union tod_clock clk;
1208
1209 preempt_disable();
1210
1211 store_tod_clock_ext(&clk);
1212
1213 gtod->tod = clk.tod + kvm->arch.epoch;
1214 gtod->epoch_idx = 0;
1215 if (test_kvm_facility(kvm, 139)) {
1216 gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1217 if (gtod->tod < clk.tod)
1218 gtod->epoch_idx += 1;
1219 }
1220
1221 preempt_enable();
1222 }
1223
kvm_s390_get_tod_ext(struct kvm * kvm,struct kvm_device_attr * attr)1224 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1225 {
1226 struct kvm_s390_vm_tod_clock gtod;
1227
1228 memset(>od, 0, sizeof(gtod));
1229 kvm_s390_get_tod_clock(kvm, >od);
1230 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1231 return -EFAULT;
1232
1233 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1234 gtod.epoch_idx, gtod.tod);
1235 return 0;
1236 }
1237
kvm_s390_get_tod_high(struct kvm * kvm,struct kvm_device_attr * attr)1238 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1239 {
1240 u8 gtod_high = 0;
1241
1242 if (copy_to_user((void __user *)attr->addr, >od_high,
1243 sizeof(gtod_high)))
1244 return -EFAULT;
1245 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1246
1247 return 0;
1248 }
1249
kvm_s390_get_tod_low(struct kvm * kvm,struct kvm_device_attr * attr)1250 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1251 {
1252 u64 gtod;
1253
1254 gtod = kvm_s390_get_tod_clock_fast(kvm);
1255 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1256 return -EFAULT;
1257 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1258
1259 return 0;
1260 }
1261
kvm_s390_get_tod(struct kvm * kvm,struct kvm_device_attr * attr)1262 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1263 {
1264 int ret;
1265
1266 if (attr->flags)
1267 return -EINVAL;
1268
1269 switch (attr->attr) {
1270 case KVM_S390_VM_TOD_EXT:
1271 ret = kvm_s390_get_tod_ext(kvm, attr);
1272 break;
1273 case KVM_S390_VM_TOD_HIGH:
1274 ret = kvm_s390_get_tod_high(kvm, attr);
1275 break;
1276 case KVM_S390_VM_TOD_LOW:
1277 ret = kvm_s390_get_tod_low(kvm, attr);
1278 break;
1279 default:
1280 ret = -ENXIO;
1281 break;
1282 }
1283 return ret;
1284 }
1285
kvm_s390_set_processor(struct kvm * kvm,struct kvm_device_attr * attr)1286 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1287 {
1288 struct kvm_s390_vm_cpu_processor *proc;
1289 u16 lowest_ibc, unblocked_ibc;
1290 int ret = 0;
1291
1292 mutex_lock(&kvm->lock);
1293 if (kvm->created_vcpus) {
1294 ret = -EBUSY;
1295 goto out;
1296 }
1297 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1298 if (!proc) {
1299 ret = -ENOMEM;
1300 goto out;
1301 }
1302 if (!copy_from_user(proc, (void __user *)attr->addr,
1303 sizeof(*proc))) {
1304 kvm->arch.model.cpuid = proc->cpuid;
1305 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1306 unblocked_ibc = sclp.ibc & 0xfff;
1307 if (lowest_ibc && proc->ibc) {
1308 if (proc->ibc > unblocked_ibc)
1309 kvm->arch.model.ibc = unblocked_ibc;
1310 else if (proc->ibc < lowest_ibc)
1311 kvm->arch.model.ibc = lowest_ibc;
1312 else
1313 kvm->arch.model.ibc = proc->ibc;
1314 }
1315 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1316 S390_ARCH_FAC_LIST_SIZE_BYTE);
1317 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1318 kvm->arch.model.ibc,
1319 kvm->arch.model.cpuid);
1320 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1321 kvm->arch.model.fac_list[0],
1322 kvm->arch.model.fac_list[1],
1323 kvm->arch.model.fac_list[2]);
1324 } else
1325 ret = -EFAULT;
1326 kfree(proc);
1327 out:
1328 mutex_unlock(&kvm->lock);
1329 return ret;
1330 }
1331
kvm_s390_set_processor_feat(struct kvm * kvm,struct kvm_device_attr * attr)1332 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1333 struct kvm_device_attr *attr)
1334 {
1335 struct kvm_s390_vm_cpu_feat data;
1336
1337 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1338 return -EFAULT;
1339 if (!bitmap_subset((unsigned long *) data.feat,
1340 kvm_s390_available_cpu_feat,
1341 KVM_S390_VM_CPU_FEAT_NR_BITS))
1342 return -EINVAL;
1343
1344 mutex_lock(&kvm->lock);
1345 if (kvm->created_vcpus) {
1346 mutex_unlock(&kvm->lock);
1347 return -EBUSY;
1348 }
1349 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1350 KVM_S390_VM_CPU_FEAT_NR_BITS);
1351 mutex_unlock(&kvm->lock);
1352 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1353 data.feat[0],
1354 data.feat[1],
1355 data.feat[2]);
1356 return 0;
1357 }
1358
kvm_s390_set_processor_subfunc(struct kvm * kvm,struct kvm_device_attr * attr)1359 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1360 struct kvm_device_attr *attr)
1361 {
1362 mutex_lock(&kvm->lock);
1363 if (kvm->created_vcpus) {
1364 mutex_unlock(&kvm->lock);
1365 return -EBUSY;
1366 }
1367
1368 if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1369 sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1370 mutex_unlock(&kvm->lock);
1371 return -EFAULT;
1372 }
1373 mutex_unlock(&kvm->lock);
1374
1375 VM_EVENT(kvm, 3, "SET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1376 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1377 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1378 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1379 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1380 VM_EVENT(kvm, 3, "SET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1381 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1382 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1383 VM_EVENT(kvm, 3, "SET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1384 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1385 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1386 VM_EVENT(kvm, 3, "SET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1387 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1388 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1389 VM_EVENT(kvm, 3, "SET: guest KM subfunc 0x%16.16lx.%16.16lx",
1390 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1391 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1392 VM_EVENT(kvm, 3, "SET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1393 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1394 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1395 VM_EVENT(kvm, 3, "SET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1396 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1397 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1398 VM_EVENT(kvm, 3, "SET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1399 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1400 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1401 VM_EVENT(kvm, 3, "SET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1402 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1403 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1404 VM_EVENT(kvm, 3, "SET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1405 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1406 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1407 VM_EVENT(kvm, 3, "SET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1408 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1409 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1410 VM_EVENT(kvm, 3, "SET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1411 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1412 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1413 VM_EVENT(kvm, 3, "SET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1414 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1415 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1416 VM_EVENT(kvm, 3, "SET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1417 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1418 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1419 VM_EVENT(kvm, 3, "SET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
1420 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1421 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1422 VM_EVENT(kvm, 3, "SET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1423 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1424 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1425 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1426 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1427 VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1428 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1429 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1430 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1431 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1432
1433 return 0;
1434 }
1435
kvm_s390_set_cpu_model(struct kvm * kvm,struct kvm_device_attr * attr)1436 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1437 {
1438 int ret = -ENXIO;
1439
1440 switch (attr->attr) {
1441 case KVM_S390_VM_CPU_PROCESSOR:
1442 ret = kvm_s390_set_processor(kvm, attr);
1443 break;
1444 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1445 ret = kvm_s390_set_processor_feat(kvm, attr);
1446 break;
1447 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1448 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1449 break;
1450 }
1451 return ret;
1452 }
1453
kvm_s390_get_processor(struct kvm * kvm,struct kvm_device_attr * attr)1454 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1455 {
1456 struct kvm_s390_vm_cpu_processor *proc;
1457 int ret = 0;
1458
1459 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1460 if (!proc) {
1461 ret = -ENOMEM;
1462 goto out;
1463 }
1464 proc->cpuid = kvm->arch.model.cpuid;
1465 proc->ibc = kvm->arch.model.ibc;
1466 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1467 S390_ARCH_FAC_LIST_SIZE_BYTE);
1468 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1469 kvm->arch.model.ibc,
1470 kvm->arch.model.cpuid);
1471 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1472 kvm->arch.model.fac_list[0],
1473 kvm->arch.model.fac_list[1],
1474 kvm->arch.model.fac_list[2]);
1475 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1476 ret = -EFAULT;
1477 kfree(proc);
1478 out:
1479 return ret;
1480 }
1481
kvm_s390_get_machine(struct kvm * kvm,struct kvm_device_attr * attr)1482 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1483 {
1484 struct kvm_s390_vm_cpu_machine *mach;
1485 int ret = 0;
1486
1487 mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1488 if (!mach) {
1489 ret = -ENOMEM;
1490 goto out;
1491 }
1492 get_cpu_id((struct cpuid *) &mach->cpuid);
1493 mach->ibc = sclp.ibc;
1494 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1495 S390_ARCH_FAC_LIST_SIZE_BYTE);
1496 memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
1497 sizeof(stfle_fac_list));
1498 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1499 kvm->arch.model.ibc,
1500 kvm->arch.model.cpuid);
1501 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1502 mach->fac_mask[0],
1503 mach->fac_mask[1],
1504 mach->fac_mask[2]);
1505 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1506 mach->fac_list[0],
1507 mach->fac_list[1],
1508 mach->fac_list[2]);
1509 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1510 ret = -EFAULT;
1511 kfree(mach);
1512 out:
1513 return ret;
1514 }
1515
kvm_s390_get_processor_feat(struct kvm * kvm,struct kvm_device_attr * attr)1516 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1517 struct kvm_device_attr *attr)
1518 {
1519 struct kvm_s390_vm_cpu_feat data;
1520
1521 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1522 KVM_S390_VM_CPU_FEAT_NR_BITS);
1523 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1524 return -EFAULT;
1525 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1526 data.feat[0],
1527 data.feat[1],
1528 data.feat[2]);
1529 return 0;
1530 }
1531
kvm_s390_get_machine_feat(struct kvm * kvm,struct kvm_device_attr * attr)1532 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1533 struct kvm_device_attr *attr)
1534 {
1535 struct kvm_s390_vm_cpu_feat data;
1536
1537 bitmap_copy((unsigned long *) data.feat,
1538 kvm_s390_available_cpu_feat,
1539 KVM_S390_VM_CPU_FEAT_NR_BITS);
1540 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1541 return -EFAULT;
1542 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1543 data.feat[0],
1544 data.feat[1],
1545 data.feat[2]);
1546 return 0;
1547 }
1548
kvm_s390_get_processor_subfunc(struct kvm * kvm,struct kvm_device_attr * attr)1549 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1550 struct kvm_device_attr *attr)
1551 {
1552 if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1553 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1554 return -EFAULT;
1555
1556 VM_EVENT(kvm, 3, "GET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1557 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1558 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1559 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1560 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1561 VM_EVENT(kvm, 3, "GET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1562 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1563 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1564 VM_EVENT(kvm, 3, "GET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1565 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1566 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1567 VM_EVENT(kvm, 3, "GET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1568 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1569 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1570 VM_EVENT(kvm, 3, "GET: guest KM subfunc 0x%16.16lx.%16.16lx",
1571 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1572 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1573 VM_EVENT(kvm, 3, "GET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1574 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1575 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1576 VM_EVENT(kvm, 3, "GET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1577 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1578 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1579 VM_EVENT(kvm, 3, "GET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1580 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1581 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1582 VM_EVENT(kvm, 3, "GET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1583 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1584 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1585 VM_EVENT(kvm, 3, "GET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1586 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1587 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1588 VM_EVENT(kvm, 3, "GET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1589 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1590 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1591 VM_EVENT(kvm, 3, "GET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1592 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1593 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1594 VM_EVENT(kvm, 3, "GET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1595 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1596 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1597 VM_EVENT(kvm, 3, "GET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1598 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1599 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1600 VM_EVENT(kvm, 3, "GET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
1601 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1602 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1603 VM_EVENT(kvm, 3, "GET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1604 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1605 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1606 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1607 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1608 VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1609 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1610 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1611 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1612 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1613
1614 return 0;
1615 }
1616
kvm_s390_get_machine_subfunc(struct kvm * kvm,struct kvm_device_attr * attr)1617 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1618 struct kvm_device_attr *attr)
1619 {
1620 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1621 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1622 return -EFAULT;
1623
1624 VM_EVENT(kvm, 3, "GET: host PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1625 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1626 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1627 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1628 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1629 VM_EVENT(kvm, 3, "GET: host PTFF subfunc 0x%16.16lx.%16.16lx",
1630 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1631 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1632 VM_EVENT(kvm, 3, "GET: host KMAC subfunc 0x%16.16lx.%16.16lx",
1633 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1634 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1635 VM_EVENT(kvm, 3, "GET: host KMC subfunc 0x%16.16lx.%16.16lx",
1636 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1637 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1638 VM_EVENT(kvm, 3, "GET: host KM subfunc 0x%16.16lx.%16.16lx",
1639 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1640 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1641 VM_EVENT(kvm, 3, "GET: host KIMD subfunc 0x%16.16lx.%16.16lx",
1642 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1643 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1644 VM_EVENT(kvm, 3, "GET: host KLMD subfunc 0x%16.16lx.%16.16lx",
1645 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1646 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1647 VM_EVENT(kvm, 3, "GET: host PCKMO subfunc 0x%16.16lx.%16.16lx",
1648 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1649 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1650 VM_EVENT(kvm, 3, "GET: host KMCTR subfunc 0x%16.16lx.%16.16lx",
1651 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1652 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1653 VM_EVENT(kvm, 3, "GET: host KMF subfunc 0x%16.16lx.%16.16lx",
1654 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1655 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1656 VM_EVENT(kvm, 3, "GET: host KMO subfunc 0x%16.16lx.%16.16lx",
1657 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1658 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1659 VM_EVENT(kvm, 3, "GET: host PCC subfunc 0x%16.16lx.%16.16lx",
1660 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1661 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1662 VM_EVENT(kvm, 3, "GET: host PPNO subfunc 0x%16.16lx.%16.16lx",
1663 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1664 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1665 VM_EVENT(kvm, 3, "GET: host KMA subfunc 0x%16.16lx.%16.16lx",
1666 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1667 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1668 VM_EVENT(kvm, 3, "GET: host KDSA subfunc 0x%16.16lx.%16.16lx",
1669 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1670 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1671 VM_EVENT(kvm, 3, "GET: host SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1672 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1673 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1674 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1675 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1676 VM_EVENT(kvm, 3, "GET: host DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1677 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1678 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1679 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1680 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1681
1682 return 0;
1683 }
1684
kvm_s390_get_cpu_model(struct kvm * kvm,struct kvm_device_attr * attr)1685 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1686 {
1687 int ret = -ENXIO;
1688
1689 switch (attr->attr) {
1690 case KVM_S390_VM_CPU_PROCESSOR:
1691 ret = kvm_s390_get_processor(kvm, attr);
1692 break;
1693 case KVM_S390_VM_CPU_MACHINE:
1694 ret = kvm_s390_get_machine(kvm, attr);
1695 break;
1696 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1697 ret = kvm_s390_get_processor_feat(kvm, attr);
1698 break;
1699 case KVM_S390_VM_CPU_MACHINE_FEAT:
1700 ret = kvm_s390_get_machine_feat(kvm, attr);
1701 break;
1702 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1703 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1704 break;
1705 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1706 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1707 break;
1708 }
1709 return ret;
1710 }
1711
kvm_s390_vm_set_attr(struct kvm * kvm,struct kvm_device_attr * attr)1712 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1713 {
1714 int ret;
1715
1716 switch (attr->group) {
1717 case KVM_S390_VM_MEM_CTRL:
1718 ret = kvm_s390_set_mem_control(kvm, attr);
1719 break;
1720 case KVM_S390_VM_TOD:
1721 ret = kvm_s390_set_tod(kvm, attr);
1722 break;
1723 case KVM_S390_VM_CPU_MODEL:
1724 ret = kvm_s390_set_cpu_model(kvm, attr);
1725 break;
1726 case KVM_S390_VM_CRYPTO:
1727 ret = kvm_s390_vm_set_crypto(kvm, attr);
1728 break;
1729 case KVM_S390_VM_MIGRATION:
1730 ret = kvm_s390_vm_set_migration(kvm, attr);
1731 break;
1732 default:
1733 ret = -ENXIO;
1734 break;
1735 }
1736
1737 return ret;
1738 }
1739
kvm_s390_vm_get_attr(struct kvm * kvm,struct kvm_device_attr * attr)1740 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1741 {
1742 int ret;
1743
1744 switch (attr->group) {
1745 case KVM_S390_VM_MEM_CTRL:
1746 ret = kvm_s390_get_mem_control(kvm, attr);
1747 break;
1748 case KVM_S390_VM_TOD:
1749 ret = kvm_s390_get_tod(kvm, attr);
1750 break;
1751 case KVM_S390_VM_CPU_MODEL:
1752 ret = kvm_s390_get_cpu_model(kvm, attr);
1753 break;
1754 case KVM_S390_VM_MIGRATION:
1755 ret = kvm_s390_vm_get_migration(kvm, attr);
1756 break;
1757 default:
1758 ret = -ENXIO;
1759 break;
1760 }
1761
1762 return ret;
1763 }
1764
kvm_s390_vm_has_attr(struct kvm * kvm,struct kvm_device_attr * attr)1765 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1766 {
1767 int ret;
1768
1769 switch (attr->group) {
1770 case KVM_S390_VM_MEM_CTRL:
1771 switch (attr->attr) {
1772 case KVM_S390_VM_MEM_ENABLE_CMMA:
1773 case KVM_S390_VM_MEM_CLR_CMMA:
1774 ret = sclp.has_cmma ? 0 : -ENXIO;
1775 break;
1776 case KVM_S390_VM_MEM_LIMIT_SIZE:
1777 ret = 0;
1778 break;
1779 default:
1780 ret = -ENXIO;
1781 break;
1782 }
1783 break;
1784 case KVM_S390_VM_TOD:
1785 switch (attr->attr) {
1786 case KVM_S390_VM_TOD_LOW:
1787 case KVM_S390_VM_TOD_HIGH:
1788 ret = 0;
1789 break;
1790 default:
1791 ret = -ENXIO;
1792 break;
1793 }
1794 break;
1795 case KVM_S390_VM_CPU_MODEL:
1796 switch (attr->attr) {
1797 case KVM_S390_VM_CPU_PROCESSOR:
1798 case KVM_S390_VM_CPU_MACHINE:
1799 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1800 case KVM_S390_VM_CPU_MACHINE_FEAT:
1801 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1802 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1803 ret = 0;
1804 break;
1805 default:
1806 ret = -ENXIO;
1807 break;
1808 }
1809 break;
1810 case KVM_S390_VM_CRYPTO:
1811 switch (attr->attr) {
1812 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1813 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1814 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1815 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1816 ret = 0;
1817 break;
1818 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1819 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1820 ret = ap_instructions_available() ? 0 : -ENXIO;
1821 break;
1822 default:
1823 ret = -ENXIO;
1824 break;
1825 }
1826 break;
1827 case KVM_S390_VM_MIGRATION:
1828 ret = 0;
1829 break;
1830 default:
1831 ret = -ENXIO;
1832 break;
1833 }
1834
1835 return ret;
1836 }
1837
kvm_s390_get_skeys(struct kvm * kvm,struct kvm_s390_skeys * args)1838 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1839 {
1840 uint8_t *keys;
1841 uint64_t hva;
1842 int srcu_idx, i, r = 0;
1843
1844 if (args->flags != 0)
1845 return -EINVAL;
1846
1847 /* Is this guest using storage keys? */
1848 if (!mm_uses_skeys(current->mm))
1849 return KVM_S390_GET_SKEYS_NONE;
1850
1851 /* Enforce sane limit on memory allocation */
1852 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1853 return -EINVAL;
1854
1855 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1856 if (!keys)
1857 return -ENOMEM;
1858
1859 mmap_read_lock(current->mm);
1860 srcu_idx = srcu_read_lock(&kvm->srcu);
1861 for (i = 0; i < args->count; i++) {
1862 hva = gfn_to_hva(kvm, args->start_gfn + i);
1863 if (kvm_is_error_hva(hva)) {
1864 r = -EFAULT;
1865 break;
1866 }
1867
1868 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1869 if (r)
1870 break;
1871 }
1872 srcu_read_unlock(&kvm->srcu, srcu_idx);
1873 mmap_read_unlock(current->mm);
1874
1875 if (!r) {
1876 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1877 sizeof(uint8_t) * args->count);
1878 if (r)
1879 r = -EFAULT;
1880 }
1881
1882 kvfree(keys);
1883 return r;
1884 }
1885
kvm_s390_set_skeys(struct kvm * kvm,struct kvm_s390_skeys * args)1886 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1887 {
1888 uint8_t *keys;
1889 uint64_t hva;
1890 int srcu_idx, i, r = 0;
1891 bool unlocked;
1892
1893 if (args->flags != 0)
1894 return -EINVAL;
1895
1896 /* Enforce sane limit on memory allocation */
1897 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1898 return -EINVAL;
1899
1900 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1901 if (!keys)
1902 return -ENOMEM;
1903
1904 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1905 sizeof(uint8_t) * args->count);
1906 if (r) {
1907 r = -EFAULT;
1908 goto out;
1909 }
1910
1911 /* Enable storage key handling for the guest */
1912 r = s390_enable_skey();
1913 if (r)
1914 goto out;
1915
1916 i = 0;
1917 mmap_read_lock(current->mm);
1918 srcu_idx = srcu_read_lock(&kvm->srcu);
1919 while (i < args->count) {
1920 unlocked = false;
1921 hva = gfn_to_hva(kvm, args->start_gfn + i);
1922 if (kvm_is_error_hva(hva)) {
1923 r = -EFAULT;
1924 break;
1925 }
1926
1927 /* Lowest order bit is reserved */
1928 if (keys[i] & 0x01) {
1929 r = -EINVAL;
1930 break;
1931 }
1932
1933 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1934 if (r) {
1935 r = fixup_user_fault(current->mm, hva,
1936 FAULT_FLAG_WRITE, &unlocked);
1937 if (r)
1938 break;
1939 }
1940 if (!r)
1941 i++;
1942 }
1943 srcu_read_unlock(&kvm->srcu, srcu_idx);
1944 mmap_read_unlock(current->mm);
1945 out:
1946 kvfree(keys);
1947 return r;
1948 }
1949
1950 /*
1951 * Base address and length must be sent at the start of each block, therefore
1952 * it's cheaper to send some clean data, as long as it's less than the size of
1953 * two longs.
1954 */
1955 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1956 /* for consistency */
1957 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1958
1959 /*
1960 * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1961 * address falls in a hole. In that case the index of one of the memslots
1962 * bordering the hole is returned.
1963 */
gfn_to_memslot_approx(struct kvm_memslots * slots,gfn_t gfn)1964 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1965 {
1966 int start = 0, end = slots->used_slots;
1967 int slot = atomic_read(&slots->last_used_slot);
1968 struct kvm_memory_slot *memslots = slots->memslots;
1969
1970 if (gfn >= memslots[slot].base_gfn &&
1971 gfn < memslots[slot].base_gfn + memslots[slot].npages)
1972 return slot;
1973
1974 while (start < end) {
1975 slot = start + (end - start) / 2;
1976
1977 if (gfn >= memslots[slot].base_gfn)
1978 end = slot;
1979 else
1980 start = slot + 1;
1981 }
1982
1983 if (start >= slots->used_slots)
1984 return slots->used_slots - 1;
1985
1986 if (gfn >= memslots[start].base_gfn &&
1987 gfn < memslots[start].base_gfn + memslots[start].npages) {
1988 atomic_set(&slots->last_used_slot, start);
1989 }
1990
1991 return start;
1992 }
1993
kvm_s390_peek_cmma(struct kvm * kvm,struct kvm_s390_cmma_log * args,u8 * res,unsigned long bufsize)1994 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1995 u8 *res, unsigned long bufsize)
1996 {
1997 unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1998
1999 args->count = 0;
2000 while (args->count < bufsize) {
2001 hva = gfn_to_hva(kvm, cur_gfn);
2002 /*
2003 * We return an error if the first value was invalid, but we
2004 * return successfully if at least one value was copied.
2005 */
2006 if (kvm_is_error_hva(hva))
2007 return args->count ? 0 : -EFAULT;
2008 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2009 pgstev = 0;
2010 res[args->count++] = (pgstev >> 24) & 0x43;
2011 cur_gfn++;
2012 }
2013
2014 return 0;
2015 }
2016
kvm_s390_next_dirty_cmma(struct kvm_memslots * slots,unsigned long cur_gfn)2017 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
2018 unsigned long cur_gfn)
2019 {
2020 int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
2021 struct kvm_memory_slot *ms = slots->memslots + slotidx;
2022 unsigned long ofs = cur_gfn - ms->base_gfn;
2023
2024 if (ms->base_gfn + ms->npages <= cur_gfn) {
2025 slotidx--;
2026 /* If we are above the highest slot, wrap around */
2027 if (slotidx < 0)
2028 slotidx = slots->used_slots - 1;
2029
2030 ms = slots->memslots + slotidx;
2031 ofs = 0;
2032 }
2033
2034 if (cur_gfn < ms->base_gfn)
2035 ofs = 0;
2036
2037 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
2038 while ((slotidx > 0) && (ofs >= ms->npages)) {
2039 slotidx--;
2040 ms = slots->memslots + slotidx;
2041 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
2042 }
2043 return ms->base_gfn + ofs;
2044 }
2045
kvm_s390_get_cmma(struct kvm * kvm,struct kvm_s390_cmma_log * args,u8 * res,unsigned long bufsize)2046 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2047 u8 *res, unsigned long bufsize)
2048 {
2049 unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2050 struct kvm_memslots *slots = kvm_memslots(kvm);
2051 struct kvm_memory_slot *ms;
2052
2053 if (unlikely(!slots->used_slots))
2054 return 0;
2055
2056 cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2057 ms = gfn_to_memslot(kvm, cur_gfn);
2058 args->count = 0;
2059 args->start_gfn = cur_gfn;
2060 if (!ms)
2061 return 0;
2062 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2063 mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2064
2065 while (args->count < bufsize) {
2066 hva = gfn_to_hva(kvm, cur_gfn);
2067 if (kvm_is_error_hva(hva))
2068 return 0;
2069 /* Decrement only if we actually flipped the bit to 0 */
2070 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2071 atomic64_dec(&kvm->arch.cmma_dirty_pages);
2072 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2073 pgstev = 0;
2074 /* Save the value */
2075 res[args->count++] = (pgstev >> 24) & 0x43;
2076 /* If the next bit is too far away, stop. */
2077 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2078 return 0;
2079 /* If we reached the previous "next", find the next one */
2080 if (cur_gfn == next_gfn)
2081 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2082 /* Reached the end of memory or of the buffer, stop */
2083 if ((next_gfn >= mem_end) ||
2084 (next_gfn - args->start_gfn >= bufsize))
2085 return 0;
2086 cur_gfn++;
2087 /* Reached the end of the current memslot, take the next one. */
2088 if (cur_gfn - ms->base_gfn >= ms->npages) {
2089 ms = gfn_to_memslot(kvm, cur_gfn);
2090 if (!ms)
2091 return 0;
2092 }
2093 }
2094 return 0;
2095 }
2096
2097 /*
2098 * This function searches for the next page with dirty CMMA attributes, and
2099 * saves the attributes in the buffer up to either the end of the buffer or
2100 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2101 * no trailing clean bytes are saved.
2102 * In case no dirty bits were found, or if CMMA was not enabled or used, the
2103 * output buffer will indicate 0 as length.
2104 */
kvm_s390_get_cmma_bits(struct kvm * kvm,struct kvm_s390_cmma_log * args)2105 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2106 struct kvm_s390_cmma_log *args)
2107 {
2108 unsigned long bufsize;
2109 int srcu_idx, peek, ret;
2110 u8 *values;
2111
2112 if (!kvm->arch.use_cmma)
2113 return -ENXIO;
2114 /* Invalid/unsupported flags were specified */
2115 if (args->flags & ~KVM_S390_CMMA_PEEK)
2116 return -EINVAL;
2117 /* Migration mode query, and we are not doing a migration */
2118 peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2119 if (!peek && !kvm->arch.migration_mode)
2120 return -EINVAL;
2121 /* CMMA is disabled or was not used, or the buffer has length zero */
2122 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2123 if (!bufsize || !kvm->mm->context.uses_cmm) {
2124 memset(args, 0, sizeof(*args));
2125 return 0;
2126 }
2127 /* We are not peeking, and there are no dirty pages */
2128 if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2129 memset(args, 0, sizeof(*args));
2130 return 0;
2131 }
2132
2133 values = vmalloc(bufsize);
2134 if (!values)
2135 return -ENOMEM;
2136
2137 mmap_read_lock(kvm->mm);
2138 srcu_idx = srcu_read_lock(&kvm->srcu);
2139 if (peek)
2140 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2141 else
2142 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2143 srcu_read_unlock(&kvm->srcu, srcu_idx);
2144 mmap_read_unlock(kvm->mm);
2145
2146 if (kvm->arch.migration_mode)
2147 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2148 else
2149 args->remaining = 0;
2150
2151 if (copy_to_user((void __user *)args->values, values, args->count))
2152 ret = -EFAULT;
2153
2154 vfree(values);
2155 return ret;
2156 }
2157
2158 /*
2159 * This function sets the CMMA attributes for the given pages. If the input
2160 * buffer has zero length, no action is taken, otherwise the attributes are
2161 * set and the mm->context.uses_cmm flag is set.
2162 */
kvm_s390_set_cmma_bits(struct kvm * kvm,const struct kvm_s390_cmma_log * args)2163 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2164 const struct kvm_s390_cmma_log *args)
2165 {
2166 unsigned long hva, mask, pgstev, i;
2167 uint8_t *bits;
2168 int srcu_idx, r = 0;
2169
2170 mask = args->mask;
2171
2172 if (!kvm->arch.use_cmma)
2173 return -ENXIO;
2174 /* invalid/unsupported flags */
2175 if (args->flags != 0)
2176 return -EINVAL;
2177 /* Enforce sane limit on memory allocation */
2178 if (args->count > KVM_S390_CMMA_SIZE_MAX)
2179 return -EINVAL;
2180 /* Nothing to do */
2181 if (args->count == 0)
2182 return 0;
2183
2184 bits = vmalloc(array_size(sizeof(*bits), args->count));
2185 if (!bits)
2186 return -ENOMEM;
2187
2188 r = copy_from_user(bits, (void __user *)args->values, args->count);
2189 if (r) {
2190 r = -EFAULT;
2191 goto out;
2192 }
2193
2194 mmap_read_lock(kvm->mm);
2195 srcu_idx = srcu_read_lock(&kvm->srcu);
2196 for (i = 0; i < args->count; i++) {
2197 hva = gfn_to_hva(kvm, args->start_gfn + i);
2198 if (kvm_is_error_hva(hva)) {
2199 r = -EFAULT;
2200 break;
2201 }
2202
2203 pgstev = bits[i];
2204 pgstev = pgstev << 24;
2205 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2206 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2207 }
2208 srcu_read_unlock(&kvm->srcu, srcu_idx);
2209 mmap_read_unlock(kvm->mm);
2210
2211 if (!kvm->mm->context.uses_cmm) {
2212 mmap_write_lock(kvm->mm);
2213 kvm->mm->context.uses_cmm = 1;
2214 mmap_write_unlock(kvm->mm);
2215 }
2216 out:
2217 vfree(bits);
2218 return r;
2219 }
2220
kvm_s390_cpus_from_pv(struct kvm * kvm,u16 * rcp,u16 * rrcp)2221 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2222 {
2223 struct kvm_vcpu *vcpu;
2224 u16 rc, rrc;
2225 int ret = 0;
2226 unsigned long i;
2227
2228 /*
2229 * We ignore failures and try to destroy as many CPUs as possible.
2230 * At the same time we must not free the assigned resources when
2231 * this fails, as the ultravisor has still access to that memory.
2232 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2233 * behind.
2234 * We want to return the first failure rc and rrc, though.
2235 */
2236 kvm_for_each_vcpu(i, vcpu, kvm) {
2237 mutex_lock(&vcpu->mutex);
2238 if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2239 *rcp = rc;
2240 *rrcp = rrc;
2241 ret = -EIO;
2242 }
2243 mutex_unlock(&vcpu->mutex);
2244 }
2245 return ret;
2246 }
2247
kvm_s390_cpus_to_pv(struct kvm * kvm,u16 * rc,u16 * rrc)2248 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2249 {
2250 unsigned long i;
2251 int r = 0;
2252 u16 dummy;
2253
2254 struct kvm_vcpu *vcpu;
2255
2256 kvm_for_each_vcpu(i, vcpu, kvm) {
2257 mutex_lock(&vcpu->mutex);
2258 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2259 mutex_unlock(&vcpu->mutex);
2260 if (r)
2261 break;
2262 }
2263 if (r)
2264 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2265 return r;
2266 }
2267
kvm_s390_handle_pv(struct kvm * kvm,struct kvm_pv_cmd * cmd)2268 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2269 {
2270 int r = 0;
2271 u16 dummy;
2272 void __user *argp = (void __user *)cmd->data;
2273
2274 switch (cmd->cmd) {
2275 case KVM_PV_ENABLE: {
2276 r = -EINVAL;
2277 if (kvm_s390_pv_is_protected(kvm))
2278 break;
2279
2280 /*
2281 * FMT 4 SIE needs esca. As we never switch back to bsca from
2282 * esca, we need no cleanup in the error cases below
2283 */
2284 r = sca_switch_to_extended(kvm);
2285 if (r)
2286 break;
2287
2288 mmap_write_lock(current->mm);
2289 r = gmap_mark_unmergeable();
2290 mmap_write_unlock(current->mm);
2291 if (r)
2292 break;
2293
2294 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2295 if (r)
2296 break;
2297
2298 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2299 if (r)
2300 kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2301
2302 /* we need to block service interrupts from now on */
2303 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2304 break;
2305 }
2306 case KVM_PV_DISABLE: {
2307 r = -EINVAL;
2308 if (!kvm_s390_pv_is_protected(kvm))
2309 break;
2310
2311 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2312 /*
2313 * If a CPU could not be destroyed, destroy VM will also fail.
2314 * There is no point in trying to destroy it. Instead return
2315 * the rc and rrc from the first CPU that failed destroying.
2316 */
2317 if (r)
2318 break;
2319 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2320
2321 /* no need to block service interrupts any more */
2322 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2323 break;
2324 }
2325 case KVM_PV_SET_SEC_PARMS: {
2326 struct kvm_s390_pv_sec_parm parms = {};
2327 void *hdr;
2328
2329 r = -EINVAL;
2330 if (!kvm_s390_pv_is_protected(kvm))
2331 break;
2332
2333 r = -EFAULT;
2334 if (copy_from_user(&parms, argp, sizeof(parms)))
2335 break;
2336
2337 /* Currently restricted to 8KB */
2338 r = -EINVAL;
2339 if (parms.length > PAGE_SIZE * 2)
2340 break;
2341
2342 r = -ENOMEM;
2343 hdr = vmalloc(parms.length);
2344 if (!hdr)
2345 break;
2346
2347 r = -EFAULT;
2348 if (!copy_from_user(hdr, (void __user *)parms.origin,
2349 parms.length))
2350 r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2351 &cmd->rc, &cmd->rrc);
2352
2353 vfree(hdr);
2354 break;
2355 }
2356 case KVM_PV_UNPACK: {
2357 struct kvm_s390_pv_unp unp = {};
2358
2359 r = -EINVAL;
2360 if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2361 break;
2362
2363 r = -EFAULT;
2364 if (copy_from_user(&unp, argp, sizeof(unp)))
2365 break;
2366
2367 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2368 &cmd->rc, &cmd->rrc);
2369 break;
2370 }
2371 case KVM_PV_VERIFY: {
2372 r = -EINVAL;
2373 if (!kvm_s390_pv_is_protected(kvm))
2374 break;
2375
2376 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2377 UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2378 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2379 cmd->rrc);
2380 break;
2381 }
2382 case KVM_PV_PREP_RESET: {
2383 r = -EINVAL;
2384 if (!kvm_s390_pv_is_protected(kvm))
2385 break;
2386
2387 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2388 UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2389 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2390 cmd->rc, cmd->rrc);
2391 break;
2392 }
2393 case KVM_PV_UNSHARE_ALL: {
2394 r = -EINVAL;
2395 if (!kvm_s390_pv_is_protected(kvm))
2396 break;
2397
2398 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2399 UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2400 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2401 cmd->rc, cmd->rrc);
2402 break;
2403 }
2404 default:
2405 r = -ENOTTY;
2406 }
2407 return r;
2408 }
2409
kvm_arch_vm_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)2410 long kvm_arch_vm_ioctl(struct file *filp,
2411 unsigned int ioctl, unsigned long arg)
2412 {
2413 struct kvm *kvm = filp->private_data;
2414 void __user *argp = (void __user *)arg;
2415 struct kvm_device_attr attr;
2416 int r;
2417
2418 switch (ioctl) {
2419 case KVM_S390_INTERRUPT: {
2420 struct kvm_s390_interrupt s390int;
2421
2422 r = -EFAULT;
2423 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2424 break;
2425 r = kvm_s390_inject_vm(kvm, &s390int);
2426 break;
2427 }
2428 case KVM_CREATE_IRQCHIP: {
2429 struct kvm_irq_routing_entry routing;
2430
2431 r = -EINVAL;
2432 if (kvm->arch.use_irqchip) {
2433 /* Set up dummy routing. */
2434 memset(&routing, 0, sizeof(routing));
2435 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2436 }
2437 break;
2438 }
2439 case KVM_SET_DEVICE_ATTR: {
2440 r = -EFAULT;
2441 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2442 break;
2443 r = kvm_s390_vm_set_attr(kvm, &attr);
2444 break;
2445 }
2446 case KVM_GET_DEVICE_ATTR: {
2447 r = -EFAULT;
2448 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2449 break;
2450 r = kvm_s390_vm_get_attr(kvm, &attr);
2451 break;
2452 }
2453 case KVM_HAS_DEVICE_ATTR: {
2454 r = -EFAULT;
2455 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2456 break;
2457 r = kvm_s390_vm_has_attr(kvm, &attr);
2458 break;
2459 }
2460 case KVM_S390_GET_SKEYS: {
2461 struct kvm_s390_skeys args;
2462
2463 r = -EFAULT;
2464 if (copy_from_user(&args, argp,
2465 sizeof(struct kvm_s390_skeys)))
2466 break;
2467 r = kvm_s390_get_skeys(kvm, &args);
2468 break;
2469 }
2470 case KVM_S390_SET_SKEYS: {
2471 struct kvm_s390_skeys args;
2472
2473 r = -EFAULT;
2474 if (copy_from_user(&args, argp,
2475 sizeof(struct kvm_s390_skeys)))
2476 break;
2477 r = kvm_s390_set_skeys(kvm, &args);
2478 break;
2479 }
2480 case KVM_S390_GET_CMMA_BITS: {
2481 struct kvm_s390_cmma_log args;
2482
2483 r = -EFAULT;
2484 if (copy_from_user(&args, argp, sizeof(args)))
2485 break;
2486 mutex_lock(&kvm->slots_lock);
2487 r = kvm_s390_get_cmma_bits(kvm, &args);
2488 mutex_unlock(&kvm->slots_lock);
2489 if (!r) {
2490 r = copy_to_user(argp, &args, sizeof(args));
2491 if (r)
2492 r = -EFAULT;
2493 }
2494 break;
2495 }
2496 case KVM_S390_SET_CMMA_BITS: {
2497 struct kvm_s390_cmma_log args;
2498
2499 r = -EFAULT;
2500 if (copy_from_user(&args, argp, sizeof(args)))
2501 break;
2502 mutex_lock(&kvm->slots_lock);
2503 r = kvm_s390_set_cmma_bits(kvm, &args);
2504 mutex_unlock(&kvm->slots_lock);
2505 break;
2506 }
2507 case KVM_S390_PV_COMMAND: {
2508 struct kvm_pv_cmd args;
2509
2510 /* protvirt means user sigp */
2511 kvm->arch.user_cpu_state_ctrl = 1;
2512 r = 0;
2513 if (!is_prot_virt_host()) {
2514 r = -EINVAL;
2515 break;
2516 }
2517 if (copy_from_user(&args, argp, sizeof(args))) {
2518 r = -EFAULT;
2519 break;
2520 }
2521 if (args.flags) {
2522 r = -EINVAL;
2523 break;
2524 }
2525 mutex_lock(&kvm->lock);
2526 r = kvm_s390_handle_pv(kvm, &args);
2527 mutex_unlock(&kvm->lock);
2528 if (copy_to_user(argp, &args, sizeof(args))) {
2529 r = -EFAULT;
2530 break;
2531 }
2532 break;
2533 }
2534 default:
2535 r = -ENOTTY;
2536 }
2537
2538 return r;
2539 }
2540
kvm_s390_apxa_installed(void)2541 static int kvm_s390_apxa_installed(void)
2542 {
2543 struct ap_config_info info;
2544
2545 if (ap_instructions_available()) {
2546 if (ap_qci(&info) == 0)
2547 return info.apxa;
2548 }
2549
2550 return 0;
2551 }
2552
2553 /*
2554 * The format of the crypto control block (CRYCB) is specified in the 3 low
2555 * order bits of the CRYCB designation (CRYCBD) field as follows:
2556 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2557 * AP extended addressing (APXA) facility are installed.
2558 * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2559 * Format 2: Both the APXA and MSAX3 facilities are installed
2560 */
kvm_s390_set_crycb_format(struct kvm * kvm)2561 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2562 {
2563 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2564
2565 /* Clear the CRYCB format bits - i.e., set format 0 by default */
2566 kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2567
2568 /* Check whether MSAX3 is installed */
2569 if (!test_kvm_facility(kvm, 76))
2570 return;
2571
2572 if (kvm_s390_apxa_installed())
2573 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2574 else
2575 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2576 }
2577
2578 /*
2579 * kvm_arch_crypto_set_masks
2580 *
2581 * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2582 * to be set.
2583 * @apm: the mask identifying the accessible AP adapters
2584 * @aqm: the mask identifying the accessible AP domains
2585 * @adm: the mask identifying the accessible AP control domains
2586 *
2587 * Set the masks that identify the adapters, domains and control domains to
2588 * which the KVM guest is granted access.
2589 *
2590 * Note: The kvm->lock mutex must be locked by the caller before invoking this
2591 * function.
2592 */
kvm_arch_crypto_set_masks(struct kvm * kvm,unsigned long * apm,unsigned long * aqm,unsigned long * adm)2593 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2594 unsigned long *aqm, unsigned long *adm)
2595 {
2596 struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2597
2598 kvm_s390_vcpu_block_all(kvm);
2599
2600 switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2601 case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2602 memcpy(crycb->apcb1.apm, apm, 32);
2603 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2604 apm[0], apm[1], apm[2], apm[3]);
2605 memcpy(crycb->apcb1.aqm, aqm, 32);
2606 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2607 aqm[0], aqm[1], aqm[2], aqm[3]);
2608 memcpy(crycb->apcb1.adm, adm, 32);
2609 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2610 adm[0], adm[1], adm[2], adm[3]);
2611 break;
2612 case CRYCB_FORMAT1:
2613 case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2614 memcpy(crycb->apcb0.apm, apm, 8);
2615 memcpy(crycb->apcb0.aqm, aqm, 2);
2616 memcpy(crycb->apcb0.adm, adm, 2);
2617 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2618 apm[0], *((unsigned short *)aqm),
2619 *((unsigned short *)adm));
2620 break;
2621 default: /* Can not happen */
2622 break;
2623 }
2624
2625 /* recreate the shadow crycb for each vcpu */
2626 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2627 kvm_s390_vcpu_unblock_all(kvm);
2628 }
2629 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2630
2631 /*
2632 * kvm_arch_crypto_clear_masks
2633 *
2634 * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2635 * to be cleared.
2636 *
2637 * Clear the masks that identify the adapters, domains and control domains to
2638 * which the KVM guest is granted access.
2639 *
2640 * Note: The kvm->lock mutex must be locked by the caller before invoking this
2641 * function.
2642 */
kvm_arch_crypto_clear_masks(struct kvm * kvm)2643 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2644 {
2645 kvm_s390_vcpu_block_all(kvm);
2646
2647 memset(&kvm->arch.crypto.crycb->apcb0, 0,
2648 sizeof(kvm->arch.crypto.crycb->apcb0));
2649 memset(&kvm->arch.crypto.crycb->apcb1, 0,
2650 sizeof(kvm->arch.crypto.crycb->apcb1));
2651
2652 VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2653 /* recreate the shadow crycb for each vcpu */
2654 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2655 kvm_s390_vcpu_unblock_all(kvm);
2656 }
2657 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2658
kvm_s390_get_initial_cpuid(void)2659 static u64 kvm_s390_get_initial_cpuid(void)
2660 {
2661 struct cpuid cpuid;
2662
2663 get_cpu_id(&cpuid);
2664 cpuid.version = 0xff;
2665 return *((u64 *) &cpuid);
2666 }
2667
kvm_s390_crypto_init(struct kvm * kvm)2668 static void kvm_s390_crypto_init(struct kvm *kvm)
2669 {
2670 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2671 kvm_s390_set_crycb_format(kvm);
2672 init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
2673
2674 if (!test_kvm_facility(kvm, 76))
2675 return;
2676
2677 /* Enable AES/DEA protected key functions by default */
2678 kvm->arch.crypto.aes_kw = 1;
2679 kvm->arch.crypto.dea_kw = 1;
2680 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2681 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2682 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2683 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2684 }
2685
sca_dispose(struct kvm * kvm)2686 static void sca_dispose(struct kvm *kvm)
2687 {
2688 if (kvm->arch.use_esca)
2689 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2690 else
2691 free_page((unsigned long)(kvm->arch.sca));
2692 kvm->arch.sca = NULL;
2693 }
2694
kvm_arch_init_vm(struct kvm * kvm,unsigned long type)2695 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2696 {
2697 gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
2698 int i, rc;
2699 char debug_name[16];
2700 static unsigned long sca_offset;
2701
2702 rc = -EINVAL;
2703 #ifdef CONFIG_KVM_S390_UCONTROL
2704 if (type & ~KVM_VM_S390_UCONTROL)
2705 goto out_err;
2706 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2707 goto out_err;
2708 #else
2709 if (type)
2710 goto out_err;
2711 #endif
2712
2713 rc = s390_enable_sie();
2714 if (rc)
2715 goto out_err;
2716
2717 rc = -ENOMEM;
2718
2719 if (!sclp.has_64bscao)
2720 alloc_flags |= GFP_DMA;
2721 rwlock_init(&kvm->arch.sca_lock);
2722 /* start with basic SCA */
2723 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2724 if (!kvm->arch.sca)
2725 goto out_err;
2726 mutex_lock(&kvm_lock);
2727 sca_offset += 16;
2728 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2729 sca_offset = 0;
2730 kvm->arch.sca = (struct bsca_block *)
2731 ((char *) kvm->arch.sca + sca_offset);
2732 mutex_unlock(&kvm_lock);
2733
2734 sprintf(debug_name, "kvm-%u", current->pid);
2735
2736 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2737 if (!kvm->arch.dbf)
2738 goto out_err;
2739
2740 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2741 kvm->arch.sie_page2 =
2742 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
2743 if (!kvm->arch.sie_page2)
2744 goto out_err;
2745
2746 kvm->arch.sie_page2->kvm = kvm;
2747 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2748
2749 for (i = 0; i < kvm_s390_fac_size(); i++) {
2750 kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
2751 (kvm_s390_fac_base[i] |
2752 kvm_s390_fac_ext[i]);
2753 kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
2754 kvm_s390_fac_base[i];
2755 }
2756 kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2757
2758 /* we are always in czam mode - even on pre z14 machines */
2759 set_kvm_facility(kvm->arch.model.fac_mask, 138);
2760 set_kvm_facility(kvm->arch.model.fac_list, 138);
2761 /* we emulate STHYI in kvm */
2762 set_kvm_facility(kvm->arch.model.fac_mask, 74);
2763 set_kvm_facility(kvm->arch.model.fac_list, 74);
2764 if (MACHINE_HAS_TLB_GUEST) {
2765 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2766 set_kvm_facility(kvm->arch.model.fac_list, 147);
2767 }
2768
2769 if (css_general_characteristics.aiv && test_facility(65))
2770 set_kvm_facility(kvm->arch.model.fac_mask, 65);
2771
2772 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2773 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2774
2775 kvm_s390_crypto_init(kvm);
2776
2777 mutex_init(&kvm->arch.float_int.ais_lock);
2778 spin_lock_init(&kvm->arch.float_int.lock);
2779 for (i = 0; i < FIRQ_LIST_COUNT; i++)
2780 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2781 init_waitqueue_head(&kvm->arch.ipte_wq);
2782 mutex_init(&kvm->arch.ipte_mutex);
2783
2784 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2785 VM_EVENT(kvm, 3, "vm created with type %lu", type);
2786
2787 if (type & KVM_VM_S390_UCONTROL) {
2788 kvm->arch.gmap = NULL;
2789 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2790 } else {
2791 if (sclp.hamax == U64_MAX)
2792 kvm->arch.mem_limit = TASK_SIZE_MAX;
2793 else
2794 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2795 sclp.hamax + 1);
2796 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2797 if (!kvm->arch.gmap)
2798 goto out_err;
2799 kvm->arch.gmap->private = kvm;
2800 kvm->arch.gmap->pfault_enabled = 0;
2801 }
2802
2803 kvm->arch.use_pfmfi = sclp.has_pfmfi;
2804 kvm->arch.use_skf = sclp.has_skey;
2805 spin_lock_init(&kvm->arch.start_stop_lock);
2806 kvm_s390_vsie_init(kvm);
2807 if (use_gisa)
2808 kvm_s390_gisa_init(kvm);
2809 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2810
2811 return 0;
2812 out_err:
2813 free_page((unsigned long)kvm->arch.sie_page2);
2814 debug_unregister(kvm->arch.dbf);
2815 sca_dispose(kvm);
2816 KVM_EVENT(3, "creation of vm failed: %d", rc);
2817 return rc;
2818 }
2819
kvm_arch_vcpu_destroy(struct kvm_vcpu * vcpu)2820 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2821 {
2822 u16 rc, rrc;
2823
2824 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2825 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2826 kvm_s390_clear_local_irqs(vcpu);
2827 kvm_clear_async_pf_completion_queue(vcpu);
2828 if (!kvm_is_ucontrol(vcpu->kvm))
2829 sca_del_vcpu(vcpu);
2830
2831 if (kvm_is_ucontrol(vcpu->kvm))
2832 gmap_remove(vcpu->arch.gmap);
2833
2834 if (vcpu->kvm->arch.use_cmma)
2835 kvm_s390_vcpu_unsetup_cmma(vcpu);
2836 /* We can not hold the vcpu mutex here, we are already dying */
2837 if (kvm_s390_pv_cpu_get_handle(vcpu))
2838 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2839 free_page((unsigned long)(vcpu->arch.sie_block));
2840 }
2841
kvm_arch_destroy_vm(struct kvm * kvm)2842 void kvm_arch_destroy_vm(struct kvm *kvm)
2843 {
2844 u16 rc, rrc;
2845
2846 kvm_destroy_vcpus(kvm);
2847 sca_dispose(kvm);
2848 kvm_s390_gisa_destroy(kvm);
2849 /*
2850 * We are already at the end of life and kvm->lock is not taken.
2851 * This is ok as the file descriptor is closed by now and nobody
2852 * can mess with the pv state. To avoid lockdep_assert_held from
2853 * complaining we do not use kvm_s390_pv_is_protected.
2854 */
2855 if (kvm_s390_pv_get_handle(kvm))
2856 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2857 debug_unregister(kvm->arch.dbf);
2858 free_page((unsigned long)kvm->arch.sie_page2);
2859 if (!kvm_is_ucontrol(kvm))
2860 gmap_remove(kvm->arch.gmap);
2861 kvm_s390_destroy_adapters(kvm);
2862 kvm_s390_clear_float_irqs(kvm);
2863 kvm_s390_vsie_destroy(kvm);
2864 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2865 }
2866
2867 /* Section: vcpu related */
__kvm_ucontrol_vcpu_init(struct kvm_vcpu * vcpu)2868 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2869 {
2870 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2871 if (!vcpu->arch.gmap)
2872 return -ENOMEM;
2873 vcpu->arch.gmap->private = vcpu->kvm;
2874
2875 return 0;
2876 }
2877
sca_del_vcpu(struct kvm_vcpu * vcpu)2878 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2879 {
2880 if (!kvm_s390_use_sca_entries())
2881 return;
2882 read_lock(&vcpu->kvm->arch.sca_lock);
2883 if (vcpu->kvm->arch.use_esca) {
2884 struct esca_block *sca = vcpu->kvm->arch.sca;
2885
2886 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2887 sca->cpu[vcpu->vcpu_id].sda = 0;
2888 } else {
2889 struct bsca_block *sca = vcpu->kvm->arch.sca;
2890
2891 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2892 sca->cpu[vcpu->vcpu_id].sda = 0;
2893 }
2894 read_unlock(&vcpu->kvm->arch.sca_lock);
2895 }
2896
sca_add_vcpu(struct kvm_vcpu * vcpu)2897 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2898 {
2899 if (!kvm_s390_use_sca_entries()) {
2900 struct bsca_block *sca = vcpu->kvm->arch.sca;
2901
2902 /* we still need the basic sca for the ipte control */
2903 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2904 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2905 return;
2906 }
2907 read_lock(&vcpu->kvm->arch.sca_lock);
2908 if (vcpu->kvm->arch.use_esca) {
2909 struct esca_block *sca = vcpu->kvm->arch.sca;
2910
2911 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2912 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2913 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2914 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2915 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2916 } else {
2917 struct bsca_block *sca = vcpu->kvm->arch.sca;
2918
2919 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2920 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2921 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2922 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2923 }
2924 read_unlock(&vcpu->kvm->arch.sca_lock);
2925 }
2926
2927 /* Basic SCA to Extended SCA data copy routines */
sca_copy_entry(struct esca_entry * d,struct bsca_entry * s)2928 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2929 {
2930 d->sda = s->sda;
2931 d->sigp_ctrl.c = s->sigp_ctrl.c;
2932 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2933 }
2934
sca_copy_b_to_e(struct esca_block * d,struct bsca_block * s)2935 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2936 {
2937 int i;
2938
2939 d->ipte_control = s->ipte_control;
2940 d->mcn[0] = s->mcn;
2941 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2942 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2943 }
2944
sca_switch_to_extended(struct kvm * kvm)2945 static int sca_switch_to_extended(struct kvm *kvm)
2946 {
2947 struct bsca_block *old_sca = kvm->arch.sca;
2948 struct esca_block *new_sca;
2949 struct kvm_vcpu *vcpu;
2950 unsigned long vcpu_idx;
2951 u32 scaol, scaoh;
2952
2953 if (kvm->arch.use_esca)
2954 return 0;
2955
2956 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
2957 if (!new_sca)
2958 return -ENOMEM;
2959
2960 scaoh = (u32)((u64)(new_sca) >> 32);
2961 scaol = (u32)(u64)(new_sca) & ~0x3fU;
2962
2963 kvm_s390_vcpu_block_all(kvm);
2964 write_lock(&kvm->arch.sca_lock);
2965
2966 sca_copy_b_to_e(new_sca, old_sca);
2967
2968 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2969 vcpu->arch.sie_block->scaoh = scaoh;
2970 vcpu->arch.sie_block->scaol = scaol;
2971 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2972 }
2973 kvm->arch.sca = new_sca;
2974 kvm->arch.use_esca = 1;
2975
2976 write_unlock(&kvm->arch.sca_lock);
2977 kvm_s390_vcpu_unblock_all(kvm);
2978
2979 free_page((unsigned long)old_sca);
2980
2981 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2982 old_sca, kvm->arch.sca);
2983 return 0;
2984 }
2985
sca_can_add_vcpu(struct kvm * kvm,unsigned int id)2986 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2987 {
2988 int rc;
2989
2990 if (!kvm_s390_use_sca_entries()) {
2991 if (id < KVM_MAX_VCPUS)
2992 return true;
2993 return false;
2994 }
2995 if (id < KVM_S390_BSCA_CPU_SLOTS)
2996 return true;
2997 if (!sclp.has_esca || !sclp.has_64bscao)
2998 return false;
2999
3000 mutex_lock(&kvm->lock);
3001 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
3002 mutex_unlock(&kvm->lock);
3003
3004 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
3005 }
3006
3007 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__start_cpu_timer_accounting(struct kvm_vcpu * vcpu)3008 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3009 {
3010 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
3011 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3012 vcpu->arch.cputm_start = get_tod_clock_fast();
3013 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3014 }
3015
3016 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__stop_cpu_timer_accounting(struct kvm_vcpu * vcpu)3017 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3018 {
3019 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
3020 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3021 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3022 vcpu->arch.cputm_start = 0;
3023 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3024 }
3025
3026 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__enable_cpu_timer_accounting(struct kvm_vcpu * vcpu)3027 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3028 {
3029 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
3030 vcpu->arch.cputm_enabled = true;
3031 __start_cpu_timer_accounting(vcpu);
3032 }
3033
3034 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
__disable_cpu_timer_accounting(struct kvm_vcpu * vcpu)3035 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3036 {
3037 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3038 __stop_cpu_timer_accounting(vcpu);
3039 vcpu->arch.cputm_enabled = false;
3040 }
3041
enable_cpu_timer_accounting(struct kvm_vcpu * vcpu)3042 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3043 {
3044 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3045 __enable_cpu_timer_accounting(vcpu);
3046 preempt_enable();
3047 }
3048
disable_cpu_timer_accounting(struct kvm_vcpu * vcpu)3049 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3050 {
3051 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3052 __disable_cpu_timer_accounting(vcpu);
3053 preempt_enable();
3054 }
3055
3056 /* set the cpu timer - may only be called from the VCPU thread itself */
kvm_s390_set_cpu_timer(struct kvm_vcpu * vcpu,__u64 cputm)3057 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3058 {
3059 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3060 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3061 if (vcpu->arch.cputm_enabled)
3062 vcpu->arch.cputm_start = get_tod_clock_fast();
3063 vcpu->arch.sie_block->cputm = cputm;
3064 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3065 preempt_enable();
3066 }
3067
3068 /* update and get the cpu timer - can also be called from other VCPU threads */
kvm_s390_get_cpu_timer(struct kvm_vcpu * vcpu)3069 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3070 {
3071 unsigned int seq;
3072 __u64 value;
3073
3074 if (unlikely(!vcpu->arch.cputm_enabled))
3075 return vcpu->arch.sie_block->cputm;
3076
3077 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3078 do {
3079 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3080 /*
3081 * If the writer would ever execute a read in the critical
3082 * section, e.g. in irq context, we have a deadlock.
3083 */
3084 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3085 value = vcpu->arch.sie_block->cputm;
3086 /* if cputm_start is 0, accounting is being started/stopped */
3087 if (likely(vcpu->arch.cputm_start))
3088 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3089 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3090 preempt_enable();
3091 return value;
3092 }
3093
kvm_arch_vcpu_load(struct kvm_vcpu * vcpu,int cpu)3094 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3095 {
3096
3097 gmap_enable(vcpu->arch.enabled_gmap);
3098 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3099 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3100 __start_cpu_timer_accounting(vcpu);
3101 vcpu->cpu = cpu;
3102 }
3103
kvm_arch_vcpu_put(struct kvm_vcpu * vcpu)3104 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3105 {
3106 vcpu->cpu = -1;
3107 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3108 __stop_cpu_timer_accounting(vcpu);
3109 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3110 vcpu->arch.enabled_gmap = gmap_get_enabled();
3111 gmap_disable(vcpu->arch.enabled_gmap);
3112
3113 }
3114
kvm_arch_vcpu_postcreate(struct kvm_vcpu * vcpu)3115 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3116 {
3117 mutex_lock(&vcpu->kvm->lock);
3118 preempt_disable();
3119 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3120 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3121 preempt_enable();
3122 mutex_unlock(&vcpu->kvm->lock);
3123 if (!kvm_is_ucontrol(vcpu->kvm)) {
3124 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3125 sca_add_vcpu(vcpu);
3126 }
3127 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3128 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3129 /* make vcpu_load load the right gmap on the first trigger */
3130 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3131 }
3132
kvm_has_pckmo_subfunc(struct kvm * kvm,unsigned long nr)3133 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3134 {
3135 if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3136 test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3137 return true;
3138 return false;
3139 }
3140
kvm_has_pckmo_ecc(struct kvm * kvm)3141 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3142 {
3143 /* At least one ECC subfunction must be present */
3144 return kvm_has_pckmo_subfunc(kvm, 32) ||
3145 kvm_has_pckmo_subfunc(kvm, 33) ||
3146 kvm_has_pckmo_subfunc(kvm, 34) ||
3147 kvm_has_pckmo_subfunc(kvm, 40) ||
3148 kvm_has_pckmo_subfunc(kvm, 41);
3149
3150 }
3151
kvm_s390_vcpu_crypto_setup(struct kvm_vcpu * vcpu)3152 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3153 {
3154 /*
3155 * If the AP instructions are not being interpreted and the MSAX3
3156 * facility is not configured for the guest, there is nothing to set up.
3157 */
3158 if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3159 return;
3160
3161 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3162 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3163 vcpu->arch.sie_block->eca &= ~ECA_APIE;
3164 vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3165
3166 if (vcpu->kvm->arch.crypto.apie)
3167 vcpu->arch.sie_block->eca |= ECA_APIE;
3168
3169 /* Set up protected key support */
3170 if (vcpu->kvm->arch.crypto.aes_kw) {
3171 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3172 /* ecc is also wrapped with AES key */
3173 if (kvm_has_pckmo_ecc(vcpu->kvm))
3174 vcpu->arch.sie_block->ecd |= ECD_ECC;
3175 }
3176
3177 if (vcpu->kvm->arch.crypto.dea_kw)
3178 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3179 }
3180
kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu * vcpu)3181 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3182 {
3183 free_page(vcpu->arch.sie_block->cbrlo);
3184 vcpu->arch.sie_block->cbrlo = 0;
3185 }
3186
kvm_s390_vcpu_setup_cmma(struct kvm_vcpu * vcpu)3187 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3188 {
3189 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
3190 if (!vcpu->arch.sie_block->cbrlo)
3191 return -ENOMEM;
3192 return 0;
3193 }
3194
kvm_s390_vcpu_setup_model(struct kvm_vcpu * vcpu)3195 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3196 {
3197 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3198
3199 vcpu->arch.sie_block->ibc = model->ibc;
3200 if (test_kvm_facility(vcpu->kvm, 7))
3201 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3202 }
3203
kvm_s390_vcpu_setup(struct kvm_vcpu * vcpu)3204 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3205 {
3206 int rc = 0;
3207 u16 uvrc, uvrrc;
3208
3209 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3210 CPUSTAT_SM |
3211 CPUSTAT_STOPPED);
3212
3213 if (test_kvm_facility(vcpu->kvm, 78))
3214 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3215 else if (test_kvm_facility(vcpu->kvm, 8))
3216 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3217
3218 kvm_s390_vcpu_setup_model(vcpu);
3219
3220 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3221 if (MACHINE_HAS_ESOP)
3222 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3223 if (test_kvm_facility(vcpu->kvm, 9))
3224 vcpu->arch.sie_block->ecb |= ECB_SRSI;
3225 if (test_kvm_facility(vcpu->kvm, 73))
3226 vcpu->arch.sie_block->ecb |= ECB_TE;
3227 if (!kvm_is_ucontrol(vcpu->kvm))
3228 vcpu->arch.sie_block->ecb |= ECB_SPECI;
3229
3230 if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3231 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3232 if (test_kvm_facility(vcpu->kvm, 130))
3233 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3234 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3235 if (sclp.has_cei)
3236 vcpu->arch.sie_block->eca |= ECA_CEI;
3237 if (sclp.has_ib)
3238 vcpu->arch.sie_block->eca |= ECA_IB;
3239 if (sclp.has_siif)
3240 vcpu->arch.sie_block->eca |= ECA_SII;
3241 if (sclp.has_sigpif)
3242 vcpu->arch.sie_block->eca |= ECA_SIGPI;
3243 if (test_kvm_facility(vcpu->kvm, 129)) {
3244 vcpu->arch.sie_block->eca |= ECA_VX;
3245 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3246 }
3247 if (test_kvm_facility(vcpu->kvm, 139))
3248 vcpu->arch.sie_block->ecd |= ECD_MEF;
3249 if (test_kvm_facility(vcpu->kvm, 156))
3250 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3251 if (vcpu->arch.sie_block->gd) {
3252 vcpu->arch.sie_block->eca |= ECA_AIV;
3253 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3254 vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3255 }
3256 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3257 | SDNXC;
3258 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3259
3260 if (sclp.has_kss)
3261 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3262 else
3263 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3264
3265 if (vcpu->kvm->arch.use_cmma) {
3266 rc = kvm_s390_vcpu_setup_cmma(vcpu);
3267 if (rc)
3268 return rc;
3269 }
3270 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3271 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3272
3273 vcpu->arch.sie_block->hpid = HPID_KVM;
3274
3275 kvm_s390_vcpu_crypto_setup(vcpu);
3276
3277 mutex_lock(&vcpu->kvm->lock);
3278 if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3279 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3280 if (rc)
3281 kvm_s390_vcpu_unsetup_cmma(vcpu);
3282 }
3283 mutex_unlock(&vcpu->kvm->lock);
3284
3285 return rc;
3286 }
3287
kvm_arch_vcpu_precreate(struct kvm * kvm,unsigned int id)3288 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3289 {
3290 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3291 return -EINVAL;
3292 return 0;
3293 }
3294
kvm_arch_vcpu_create(struct kvm_vcpu * vcpu)3295 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3296 {
3297 struct sie_page *sie_page;
3298 int rc;
3299
3300 BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3301 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3302 if (!sie_page)
3303 return -ENOMEM;
3304
3305 vcpu->arch.sie_block = &sie_page->sie_block;
3306 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3307
3308 /* the real guest size will always be smaller than msl */
3309 vcpu->arch.sie_block->mso = 0;
3310 vcpu->arch.sie_block->msl = sclp.hamax;
3311
3312 vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3313 spin_lock_init(&vcpu->arch.local_int.lock);
3314 vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3315 if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3316 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3317 seqcount_init(&vcpu->arch.cputm_seqcount);
3318
3319 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3320 kvm_clear_async_pf_completion_queue(vcpu);
3321 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3322 KVM_SYNC_GPRS |
3323 KVM_SYNC_ACRS |
3324 KVM_SYNC_CRS |
3325 KVM_SYNC_ARCH0 |
3326 KVM_SYNC_PFAULT |
3327 KVM_SYNC_DIAG318;
3328 kvm_s390_set_prefix(vcpu, 0);
3329 if (test_kvm_facility(vcpu->kvm, 64))
3330 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3331 if (test_kvm_facility(vcpu->kvm, 82))
3332 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3333 if (test_kvm_facility(vcpu->kvm, 133))
3334 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3335 if (test_kvm_facility(vcpu->kvm, 156))
3336 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3337 /* fprs can be synchronized via vrs, even if the guest has no vx. With
3338 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3339 */
3340 if (MACHINE_HAS_VX)
3341 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3342 else
3343 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3344
3345 if (kvm_is_ucontrol(vcpu->kvm)) {
3346 rc = __kvm_ucontrol_vcpu_init(vcpu);
3347 if (rc)
3348 goto out_free_sie_block;
3349 }
3350
3351 VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3352 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3353 trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3354
3355 rc = kvm_s390_vcpu_setup(vcpu);
3356 if (rc)
3357 goto out_ucontrol_uninit;
3358 return 0;
3359
3360 out_ucontrol_uninit:
3361 if (kvm_is_ucontrol(vcpu->kvm))
3362 gmap_remove(vcpu->arch.gmap);
3363 out_free_sie_block:
3364 free_page((unsigned long)(vcpu->arch.sie_block));
3365 return rc;
3366 }
3367
kvm_arch_vcpu_runnable(struct kvm_vcpu * vcpu)3368 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3369 {
3370 clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
3371 return kvm_s390_vcpu_has_irq(vcpu, 0);
3372 }
3373
kvm_arch_vcpu_in_kernel(struct kvm_vcpu * vcpu)3374 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3375 {
3376 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3377 }
3378
kvm_s390_vcpu_block(struct kvm_vcpu * vcpu)3379 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3380 {
3381 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3382 exit_sie(vcpu);
3383 }
3384
kvm_s390_vcpu_unblock(struct kvm_vcpu * vcpu)3385 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3386 {
3387 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3388 }
3389
kvm_s390_vcpu_request(struct kvm_vcpu * vcpu)3390 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3391 {
3392 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3393 exit_sie(vcpu);
3394 }
3395
kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu * vcpu)3396 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3397 {
3398 return atomic_read(&vcpu->arch.sie_block->prog20) &
3399 (PROG_BLOCK_SIE | PROG_REQUEST);
3400 }
3401
kvm_s390_vcpu_request_handled(struct kvm_vcpu * vcpu)3402 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3403 {
3404 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3405 }
3406
3407 /*
3408 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3409 * If the CPU is not running (e.g. waiting as idle) the function will
3410 * return immediately. */
exit_sie(struct kvm_vcpu * vcpu)3411 void exit_sie(struct kvm_vcpu *vcpu)
3412 {
3413 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3414 kvm_s390_vsie_kick(vcpu);
3415 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3416 cpu_relax();
3417 }
3418
3419 /* Kick a guest cpu out of SIE to process a request synchronously */
kvm_s390_sync_request(int req,struct kvm_vcpu * vcpu)3420 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3421 {
3422 kvm_make_request(req, vcpu);
3423 kvm_s390_vcpu_request(vcpu);
3424 }
3425
kvm_gmap_notifier(struct gmap * gmap,unsigned long start,unsigned long end)3426 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3427 unsigned long end)
3428 {
3429 struct kvm *kvm = gmap->private;
3430 struct kvm_vcpu *vcpu;
3431 unsigned long prefix;
3432 unsigned long i;
3433
3434 if (gmap_is_shadow(gmap))
3435 return;
3436 if (start >= 1UL << 31)
3437 /* We are only interested in prefix pages */
3438 return;
3439 kvm_for_each_vcpu(i, vcpu, kvm) {
3440 /* match against both prefix pages */
3441 prefix = kvm_s390_get_prefix(vcpu);
3442 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3443 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3444 start, end);
3445 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3446 }
3447 }
3448 }
3449
kvm_arch_no_poll(struct kvm_vcpu * vcpu)3450 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3451 {
3452 /* do not poll with more than halt_poll_max_steal percent of steal time */
3453 if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3454 READ_ONCE(halt_poll_max_steal)) {
3455 vcpu->stat.halt_no_poll_steal++;
3456 return true;
3457 }
3458 return false;
3459 }
3460
kvm_arch_vcpu_should_kick(struct kvm_vcpu * vcpu)3461 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3462 {
3463 /* kvm common code refers to this, but never calls it */
3464 BUG();
3465 return 0;
3466 }
3467
kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu * vcpu,struct kvm_one_reg * reg)3468 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3469 struct kvm_one_reg *reg)
3470 {
3471 int r = -EINVAL;
3472
3473 switch (reg->id) {
3474 case KVM_REG_S390_TODPR:
3475 r = put_user(vcpu->arch.sie_block->todpr,
3476 (u32 __user *)reg->addr);
3477 break;
3478 case KVM_REG_S390_EPOCHDIFF:
3479 r = put_user(vcpu->arch.sie_block->epoch,
3480 (u64 __user *)reg->addr);
3481 break;
3482 case KVM_REG_S390_CPU_TIMER:
3483 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3484 (u64 __user *)reg->addr);
3485 break;
3486 case KVM_REG_S390_CLOCK_COMP:
3487 r = put_user(vcpu->arch.sie_block->ckc,
3488 (u64 __user *)reg->addr);
3489 break;
3490 case KVM_REG_S390_PFTOKEN:
3491 r = put_user(vcpu->arch.pfault_token,
3492 (u64 __user *)reg->addr);
3493 break;
3494 case KVM_REG_S390_PFCOMPARE:
3495 r = put_user(vcpu->arch.pfault_compare,
3496 (u64 __user *)reg->addr);
3497 break;
3498 case KVM_REG_S390_PFSELECT:
3499 r = put_user(vcpu->arch.pfault_select,
3500 (u64 __user *)reg->addr);
3501 break;
3502 case KVM_REG_S390_PP:
3503 r = put_user(vcpu->arch.sie_block->pp,
3504 (u64 __user *)reg->addr);
3505 break;
3506 case KVM_REG_S390_GBEA:
3507 r = put_user(vcpu->arch.sie_block->gbea,
3508 (u64 __user *)reg->addr);
3509 break;
3510 default:
3511 break;
3512 }
3513
3514 return r;
3515 }
3516
kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu * vcpu,struct kvm_one_reg * reg)3517 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3518 struct kvm_one_reg *reg)
3519 {
3520 int r = -EINVAL;
3521 __u64 val;
3522
3523 switch (reg->id) {
3524 case KVM_REG_S390_TODPR:
3525 r = get_user(vcpu->arch.sie_block->todpr,
3526 (u32 __user *)reg->addr);
3527 break;
3528 case KVM_REG_S390_EPOCHDIFF:
3529 r = get_user(vcpu->arch.sie_block->epoch,
3530 (u64 __user *)reg->addr);
3531 break;
3532 case KVM_REG_S390_CPU_TIMER:
3533 r = get_user(val, (u64 __user *)reg->addr);
3534 if (!r)
3535 kvm_s390_set_cpu_timer(vcpu, val);
3536 break;
3537 case KVM_REG_S390_CLOCK_COMP:
3538 r = get_user(vcpu->arch.sie_block->ckc,
3539 (u64 __user *)reg->addr);
3540 break;
3541 case KVM_REG_S390_PFTOKEN:
3542 r = get_user(vcpu->arch.pfault_token,
3543 (u64 __user *)reg->addr);
3544 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3545 kvm_clear_async_pf_completion_queue(vcpu);
3546 break;
3547 case KVM_REG_S390_PFCOMPARE:
3548 r = get_user(vcpu->arch.pfault_compare,
3549 (u64 __user *)reg->addr);
3550 break;
3551 case KVM_REG_S390_PFSELECT:
3552 r = get_user(vcpu->arch.pfault_select,
3553 (u64 __user *)reg->addr);
3554 break;
3555 case KVM_REG_S390_PP:
3556 r = get_user(vcpu->arch.sie_block->pp,
3557 (u64 __user *)reg->addr);
3558 break;
3559 case KVM_REG_S390_GBEA:
3560 r = get_user(vcpu->arch.sie_block->gbea,
3561 (u64 __user *)reg->addr);
3562 break;
3563 default:
3564 break;
3565 }
3566
3567 return r;
3568 }
3569
kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu * vcpu)3570 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3571 {
3572 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3573 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3574 memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3575
3576 kvm_clear_async_pf_completion_queue(vcpu);
3577 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3578 kvm_s390_vcpu_stop(vcpu);
3579 kvm_s390_clear_local_irqs(vcpu);
3580 }
3581
kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu * vcpu)3582 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3583 {
3584 /* Initial reset is a superset of the normal reset */
3585 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3586
3587 /*
3588 * This equals initial cpu reset in pop, but we don't switch to ESA.
3589 * We do not only reset the internal data, but also ...
3590 */
3591 vcpu->arch.sie_block->gpsw.mask = 0;
3592 vcpu->arch.sie_block->gpsw.addr = 0;
3593 kvm_s390_set_prefix(vcpu, 0);
3594 kvm_s390_set_cpu_timer(vcpu, 0);
3595 vcpu->arch.sie_block->ckc = 0;
3596 memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3597 vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3598 vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3599
3600 /* ... the data in sync regs */
3601 memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3602 vcpu->run->s.regs.ckc = 0;
3603 vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3604 vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3605 vcpu->run->psw_addr = 0;
3606 vcpu->run->psw_mask = 0;
3607 vcpu->run->s.regs.todpr = 0;
3608 vcpu->run->s.regs.cputm = 0;
3609 vcpu->run->s.regs.ckc = 0;
3610 vcpu->run->s.regs.pp = 0;
3611 vcpu->run->s.regs.gbea = 1;
3612 vcpu->run->s.regs.fpc = 0;
3613 /*
3614 * Do not reset these registers in the protected case, as some of
3615 * them are overlayed and they are not accessible in this case
3616 * anyway.
3617 */
3618 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3619 vcpu->arch.sie_block->gbea = 1;
3620 vcpu->arch.sie_block->pp = 0;
3621 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3622 vcpu->arch.sie_block->todpr = 0;
3623 }
3624 }
3625
kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu * vcpu)3626 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3627 {
3628 struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3629
3630 /* Clear reset is a superset of the initial reset */
3631 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3632
3633 memset(®s->gprs, 0, sizeof(regs->gprs));
3634 memset(®s->vrs, 0, sizeof(regs->vrs));
3635 memset(®s->acrs, 0, sizeof(regs->acrs));
3636 memset(®s->gscb, 0, sizeof(regs->gscb));
3637
3638 regs->etoken = 0;
3639 regs->etoken_extension = 0;
3640 }
3641
kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu * vcpu,struct kvm_regs * regs)3642 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3643 {
3644 vcpu_load(vcpu);
3645 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
3646 vcpu_put(vcpu);
3647 return 0;
3648 }
3649
kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu * vcpu,struct kvm_regs * regs)3650 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3651 {
3652 vcpu_load(vcpu);
3653 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3654 vcpu_put(vcpu);
3655 return 0;
3656 }
3657
kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu * vcpu,struct kvm_sregs * sregs)3658 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3659 struct kvm_sregs *sregs)
3660 {
3661 vcpu_load(vcpu);
3662
3663 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3664 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3665
3666 vcpu_put(vcpu);
3667 return 0;
3668 }
3669
kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu * vcpu,struct kvm_sregs * sregs)3670 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3671 struct kvm_sregs *sregs)
3672 {
3673 vcpu_load(vcpu);
3674
3675 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3676 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3677
3678 vcpu_put(vcpu);
3679 return 0;
3680 }
3681
kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu * vcpu,struct kvm_fpu * fpu)3682 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3683 {
3684 int ret = 0;
3685
3686 vcpu_load(vcpu);
3687
3688 vcpu->run->s.regs.fpc = fpu->fpc;
3689 if (MACHINE_HAS_VX)
3690 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3691 (freg_t *) fpu->fprs);
3692 else
3693 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3694
3695 vcpu_put(vcpu);
3696 return ret;
3697 }
3698
kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu * vcpu,struct kvm_fpu * fpu)3699 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3700 {
3701 vcpu_load(vcpu);
3702
3703 /* make sure we have the latest values */
3704 save_fpu_regs();
3705 if (MACHINE_HAS_VX)
3706 convert_vx_to_fp((freg_t *) fpu->fprs,
3707 (__vector128 *) vcpu->run->s.regs.vrs);
3708 else
3709 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3710 fpu->fpc = vcpu->run->s.regs.fpc;
3711
3712 vcpu_put(vcpu);
3713 return 0;
3714 }
3715
kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu * vcpu,psw_t psw)3716 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3717 {
3718 int rc = 0;
3719
3720 if (!is_vcpu_stopped(vcpu))
3721 rc = -EBUSY;
3722 else {
3723 vcpu->run->psw_mask = psw.mask;
3724 vcpu->run->psw_addr = psw.addr;
3725 }
3726 return rc;
3727 }
3728
kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu * vcpu,struct kvm_translation * tr)3729 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3730 struct kvm_translation *tr)
3731 {
3732 return -EINVAL; /* not implemented yet */
3733 }
3734
3735 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3736 KVM_GUESTDBG_USE_HW_BP | \
3737 KVM_GUESTDBG_ENABLE)
3738
kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu * vcpu,struct kvm_guest_debug * dbg)3739 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3740 struct kvm_guest_debug *dbg)
3741 {
3742 int rc = 0;
3743
3744 vcpu_load(vcpu);
3745
3746 vcpu->guest_debug = 0;
3747 kvm_s390_clear_bp_data(vcpu);
3748
3749 if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3750 rc = -EINVAL;
3751 goto out;
3752 }
3753 if (!sclp.has_gpere) {
3754 rc = -EINVAL;
3755 goto out;
3756 }
3757
3758 if (dbg->control & KVM_GUESTDBG_ENABLE) {
3759 vcpu->guest_debug = dbg->control;
3760 /* enforce guest PER */
3761 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3762
3763 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3764 rc = kvm_s390_import_bp_data(vcpu, dbg);
3765 } else {
3766 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3767 vcpu->arch.guestdbg.last_bp = 0;
3768 }
3769
3770 if (rc) {
3771 vcpu->guest_debug = 0;
3772 kvm_s390_clear_bp_data(vcpu);
3773 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3774 }
3775
3776 out:
3777 vcpu_put(vcpu);
3778 return rc;
3779 }
3780
kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu * vcpu,struct kvm_mp_state * mp_state)3781 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3782 struct kvm_mp_state *mp_state)
3783 {
3784 int ret;
3785
3786 vcpu_load(vcpu);
3787
3788 /* CHECK_STOP and LOAD are not supported yet */
3789 ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3790 KVM_MP_STATE_OPERATING;
3791
3792 vcpu_put(vcpu);
3793 return ret;
3794 }
3795
kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu * vcpu,struct kvm_mp_state * mp_state)3796 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3797 struct kvm_mp_state *mp_state)
3798 {
3799 int rc = 0;
3800
3801 vcpu_load(vcpu);
3802
3803 /* user space knows about this interface - let it control the state */
3804 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3805
3806 switch (mp_state->mp_state) {
3807 case KVM_MP_STATE_STOPPED:
3808 rc = kvm_s390_vcpu_stop(vcpu);
3809 break;
3810 case KVM_MP_STATE_OPERATING:
3811 rc = kvm_s390_vcpu_start(vcpu);
3812 break;
3813 case KVM_MP_STATE_LOAD:
3814 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3815 rc = -ENXIO;
3816 break;
3817 }
3818 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3819 break;
3820 case KVM_MP_STATE_CHECK_STOP:
3821 fallthrough; /* CHECK_STOP and LOAD are not supported yet */
3822 default:
3823 rc = -ENXIO;
3824 }
3825
3826 vcpu_put(vcpu);
3827 return rc;
3828 }
3829
ibs_enabled(struct kvm_vcpu * vcpu)3830 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3831 {
3832 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3833 }
3834
kvm_s390_handle_requests(struct kvm_vcpu * vcpu)3835 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3836 {
3837 retry:
3838 kvm_s390_vcpu_request_handled(vcpu);
3839 if (!kvm_request_pending(vcpu))
3840 return 0;
3841 /*
3842 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3843 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3844 * This ensures that the ipte instruction for this request has
3845 * already finished. We might race against a second unmapper that
3846 * wants to set the blocking bit. Lets just retry the request loop.
3847 */
3848 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3849 int rc;
3850 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3851 kvm_s390_get_prefix(vcpu),
3852 PAGE_SIZE * 2, PROT_WRITE);
3853 if (rc) {
3854 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3855 return rc;
3856 }
3857 goto retry;
3858 }
3859
3860 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3861 vcpu->arch.sie_block->ihcpu = 0xffff;
3862 goto retry;
3863 }
3864
3865 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3866 if (!ibs_enabled(vcpu)) {
3867 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3868 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3869 }
3870 goto retry;
3871 }
3872
3873 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3874 if (ibs_enabled(vcpu)) {
3875 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3876 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3877 }
3878 goto retry;
3879 }
3880
3881 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3882 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3883 goto retry;
3884 }
3885
3886 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3887 /*
3888 * Disable CMM virtualization; we will emulate the ESSA
3889 * instruction manually, in order to provide additional
3890 * functionalities needed for live migration.
3891 */
3892 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3893 goto retry;
3894 }
3895
3896 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3897 /*
3898 * Re-enable CMM virtualization if CMMA is available and
3899 * CMM has been used.
3900 */
3901 if ((vcpu->kvm->arch.use_cmma) &&
3902 (vcpu->kvm->mm->context.uses_cmm))
3903 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3904 goto retry;
3905 }
3906
3907 /* nothing to do, just clear the request */
3908 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3909 /* we left the vsie handler, nothing to do, just clear the request */
3910 kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3911
3912 return 0;
3913 }
3914
__kvm_s390_set_tod_clock(struct kvm * kvm,const struct kvm_s390_vm_tod_clock * gtod)3915 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
3916 {
3917 struct kvm_vcpu *vcpu;
3918 union tod_clock clk;
3919 unsigned long i;
3920
3921 preempt_disable();
3922
3923 store_tod_clock_ext(&clk);
3924
3925 kvm->arch.epoch = gtod->tod - clk.tod;
3926 kvm->arch.epdx = 0;
3927 if (test_kvm_facility(kvm, 139)) {
3928 kvm->arch.epdx = gtod->epoch_idx - clk.ei;
3929 if (kvm->arch.epoch > gtod->tod)
3930 kvm->arch.epdx -= 1;
3931 }
3932
3933 kvm_s390_vcpu_block_all(kvm);
3934 kvm_for_each_vcpu(i, vcpu, kvm) {
3935 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3936 vcpu->arch.sie_block->epdx = kvm->arch.epdx;
3937 }
3938
3939 kvm_s390_vcpu_unblock_all(kvm);
3940 preempt_enable();
3941 }
3942
kvm_s390_try_set_tod_clock(struct kvm * kvm,const struct kvm_s390_vm_tod_clock * gtod)3943 int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
3944 {
3945 if (!mutex_trylock(&kvm->lock))
3946 return 0;
3947 __kvm_s390_set_tod_clock(kvm, gtod);
3948 mutex_unlock(&kvm->lock);
3949 return 1;
3950 }
3951
3952 /**
3953 * kvm_arch_fault_in_page - fault-in guest page if necessary
3954 * @vcpu: The corresponding virtual cpu
3955 * @gpa: Guest physical address
3956 * @writable: Whether the page should be writable or not
3957 *
3958 * Make sure that a guest page has been faulted-in on the host.
3959 *
3960 * Return: Zero on success, negative error code otherwise.
3961 */
kvm_arch_fault_in_page(struct kvm_vcpu * vcpu,gpa_t gpa,int writable)3962 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3963 {
3964 return gmap_fault(vcpu->arch.gmap, gpa,
3965 writable ? FAULT_FLAG_WRITE : 0);
3966 }
3967
__kvm_inject_pfault_token(struct kvm_vcpu * vcpu,bool start_token,unsigned long token)3968 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3969 unsigned long token)
3970 {
3971 struct kvm_s390_interrupt inti;
3972 struct kvm_s390_irq irq;
3973
3974 if (start_token) {
3975 irq.u.ext.ext_params2 = token;
3976 irq.type = KVM_S390_INT_PFAULT_INIT;
3977 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3978 } else {
3979 inti.type = KVM_S390_INT_PFAULT_DONE;
3980 inti.parm64 = token;
3981 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3982 }
3983 }
3984
kvm_arch_async_page_not_present(struct kvm_vcpu * vcpu,struct kvm_async_pf * work)3985 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3986 struct kvm_async_pf *work)
3987 {
3988 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3989 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3990
3991 return true;
3992 }
3993
kvm_arch_async_page_present(struct kvm_vcpu * vcpu,struct kvm_async_pf * work)3994 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3995 struct kvm_async_pf *work)
3996 {
3997 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3998 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3999 }
4000
kvm_arch_async_page_ready(struct kvm_vcpu * vcpu,struct kvm_async_pf * work)4001 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
4002 struct kvm_async_pf *work)
4003 {
4004 /* s390 will always inject the page directly */
4005 }
4006
kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu * vcpu)4007 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
4008 {
4009 /*
4010 * s390 will always inject the page directly,
4011 * but we still want check_async_completion to cleanup
4012 */
4013 return true;
4014 }
4015
kvm_arch_setup_async_pf(struct kvm_vcpu * vcpu)4016 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
4017 {
4018 hva_t hva;
4019 struct kvm_arch_async_pf arch;
4020
4021 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4022 return false;
4023 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
4024 vcpu->arch.pfault_compare)
4025 return false;
4026 if (psw_extint_disabled(vcpu))
4027 return false;
4028 if (kvm_s390_vcpu_has_irq(vcpu, 0))
4029 return false;
4030 if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
4031 return false;
4032 if (!vcpu->arch.gmap->pfault_enabled)
4033 return false;
4034
4035 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
4036 hva += current->thread.gmap_addr & ~PAGE_MASK;
4037 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4038 return false;
4039
4040 return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
4041 }
4042
vcpu_pre_run(struct kvm_vcpu * vcpu)4043 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4044 {
4045 int rc, cpuflags;
4046
4047 /*
4048 * On s390 notifications for arriving pages will be delivered directly
4049 * to the guest but the house keeping for completed pfaults is
4050 * handled outside the worker.
4051 */
4052 kvm_check_async_pf_completion(vcpu);
4053
4054 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4055 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4056
4057 if (need_resched())
4058 schedule();
4059
4060 if (!kvm_is_ucontrol(vcpu->kvm)) {
4061 rc = kvm_s390_deliver_pending_interrupts(vcpu);
4062 if (rc)
4063 return rc;
4064 }
4065
4066 rc = kvm_s390_handle_requests(vcpu);
4067 if (rc)
4068 return rc;
4069
4070 if (guestdbg_enabled(vcpu)) {
4071 kvm_s390_backup_guest_per_regs(vcpu);
4072 kvm_s390_patch_guest_per_regs(vcpu);
4073 }
4074
4075 clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
4076
4077 vcpu->arch.sie_block->icptcode = 0;
4078 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4079 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4080 trace_kvm_s390_sie_enter(vcpu, cpuflags);
4081
4082 return 0;
4083 }
4084
vcpu_post_run_fault_in_sie(struct kvm_vcpu * vcpu)4085 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4086 {
4087 struct kvm_s390_pgm_info pgm_info = {
4088 .code = PGM_ADDRESSING,
4089 };
4090 u8 opcode, ilen;
4091 int rc;
4092
4093 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4094 trace_kvm_s390_sie_fault(vcpu);
4095
4096 /*
4097 * We want to inject an addressing exception, which is defined as a
4098 * suppressing or terminating exception. However, since we came here
4099 * by a DAT access exception, the PSW still points to the faulting
4100 * instruction since DAT exceptions are nullifying. So we've got
4101 * to look up the current opcode to get the length of the instruction
4102 * to be able to forward the PSW.
4103 */
4104 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4105 ilen = insn_length(opcode);
4106 if (rc < 0) {
4107 return rc;
4108 } else if (rc) {
4109 /* Instruction-Fetching Exceptions - we can't detect the ilen.
4110 * Forward by arbitrary ilc, injection will take care of
4111 * nullification if necessary.
4112 */
4113 pgm_info = vcpu->arch.pgm;
4114 ilen = 4;
4115 }
4116 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4117 kvm_s390_forward_psw(vcpu, ilen);
4118 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4119 }
4120
vcpu_post_run(struct kvm_vcpu * vcpu,int exit_reason)4121 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4122 {
4123 struct mcck_volatile_info *mcck_info;
4124 struct sie_page *sie_page;
4125
4126 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4127 vcpu->arch.sie_block->icptcode);
4128 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4129
4130 if (guestdbg_enabled(vcpu))
4131 kvm_s390_restore_guest_per_regs(vcpu);
4132
4133 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4134 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4135
4136 if (exit_reason == -EINTR) {
4137 VCPU_EVENT(vcpu, 3, "%s", "machine check");
4138 sie_page = container_of(vcpu->arch.sie_block,
4139 struct sie_page, sie_block);
4140 mcck_info = &sie_page->mcck_info;
4141 kvm_s390_reinject_machine_check(vcpu, mcck_info);
4142 return 0;
4143 }
4144
4145 if (vcpu->arch.sie_block->icptcode > 0) {
4146 int rc = kvm_handle_sie_intercept(vcpu);
4147
4148 if (rc != -EOPNOTSUPP)
4149 return rc;
4150 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4151 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4152 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4153 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4154 return -EREMOTE;
4155 } else if (exit_reason != -EFAULT) {
4156 vcpu->stat.exit_null++;
4157 return 0;
4158 } else if (kvm_is_ucontrol(vcpu->kvm)) {
4159 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4160 vcpu->run->s390_ucontrol.trans_exc_code =
4161 current->thread.gmap_addr;
4162 vcpu->run->s390_ucontrol.pgm_code = 0x10;
4163 return -EREMOTE;
4164 } else if (current->thread.gmap_pfault) {
4165 trace_kvm_s390_major_guest_pfault(vcpu);
4166 current->thread.gmap_pfault = 0;
4167 if (kvm_arch_setup_async_pf(vcpu))
4168 return 0;
4169 vcpu->stat.pfault_sync++;
4170 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4171 }
4172 return vcpu_post_run_fault_in_sie(vcpu);
4173 }
4174
4175 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
__vcpu_run(struct kvm_vcpu * vcpu)4176 static int __vcpu_run(struct kvm_vcpu *vcpu)
4177 {
4178 int rc, exit_reason;
4179 struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4180
4181 /*
4182 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4183 * ning the guest), so that memslots (and other stuff) are protected
4184 */
4185 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4186
4187 do {
4188 rc = vcpu_pre_run(vcpu);
4189 if (rc)
4190 break;
4191
4192 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4193 /*
4194 * As PF_VCPU will be used in fault handler, between
4195 * guest_enter and guest_exit should be no uaccess.
4196 */
4197 local_irq_disable();
4198 guest_enter_irqoff();
4199 __disable_cpu_timer_accounting(vcpu);
4200 local_irq_enable();
4201 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4202 memcpy(sie_page->pv_grregs,
4203 vcpu->run->s.regs.gprs,
4204 sizeof(sie_page->pv_grregs));
4205 }
4206 if (test_cpu_flag(CIF_FPU))
4207 load_fpu_regs();
4208 exit_reason = sie64a(vcpu->arch.sie_block,
4209 vcpu->run->s.regs.gprs);
4210 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4211 memcpy(vcpu->run->s.regs.gprs,
4212 sie_page->pv_grregs,
4213 sizeof(sie_page->pv_grregs));
4214 /*
4215 * We're not allowed to inject interrupts on intercepts
4216 * that leave the guest state in an "in-between" state
4217 * where the next SIE entry will do a continuation.
4218 * Fence interrupts in our "internal" PSW.
4219 */
4220 if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4221 vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4222 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4223 }
4224 }
4225 local_irq_disable();
4226 __enable_cpu_timer_accounting(vcpu);
4227 guest_exit_irqoff();
4228 local_irq_enable();
4229 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4230
4231 rc = vcpu_post_run(vcpu, exit_reason);
4232 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4233
4234 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4235 return rc;
4236 }
4237
sync_regs_fmt2(struct kvm_vcpu * vcpu)4238 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4239 {
4240 struct kvm_run *kvm_run = vcpu->run;
4241 struct runtime_instr_cb *riccb;
4242 struct gs_cb *gscb;
4243
4244 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4245 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4246 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4247 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4248 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4249 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4250 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4251 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4252 }
4253 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4254 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4255 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4256 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4257 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4258 kvm_clear_async_pf_completion_queue(vcpu);
4259 }
4260 if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4261 vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4262 vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4263 }
4264 /*
4265 * If userspace sets the riccb (e.g. after migration) to a valid state,
4266 * we should enable RI here instead of doing the lazy enablement.
4267 */
4268 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4269 test_kvm_facility(vcpu->kvm, 64) &&
4270 riccb->v &&
4271 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4272 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4273 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4274 }
4275 /*
4276 * If userspace sets the gscb (e.g. after migration) to non-zero,
4277 * we should enable GS here instead of doing the lazy enablement.
4278 */
4279 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4280 test_kvm_facility(vcpu->kvm, 133) &&
4281 gscb->gssm &&
4282 !vcpu->arch.gs_enabled) {
4283 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4284 vcpu->arch.sie_block->ecb |= ECB_GS;
4285 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4286 vcpu->arch.gs_enabled = 1;
4287 }
4288 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4289 test_kvm_facility(vcpu->kvm, 82)) {
4290 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4291 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4292 }
4293 if (MACHINE_HAS_GS) {
4294 preempt_disable();
4295 __ctl_set_bit(2, 4);
4296 if (current->thread.gs_cb) {
4297 vcpu->arch.host_gscb = current->thread.gs_cb;
4298 save_gs_cb(vcpu->arch.host_gscb);
4299 }
4300 if (vcpu->arch.gs_enabled) {
4301 current->thread.gs_cb = (struct gs_cb *)
4302 &vcpu->run->s.regs.gscb;
4303 restore_gs_cb(current->thread.gs_cb);
4304 }
4305 preempt_enable();
4306 }
4307 /* SIE will load etoken directly from SDNX and therefore kvm_run */
4308 }
4309
sync_regs(struct kvm_vcpu * vcpu)4310 static void sync_regs(struct kvm_vcpu *vcpu)
4311 {
4312 struct kvm_run *kvm_run = vcpu->run;
4313
4314 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4315 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4316 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4317 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4318 /* some control register changes require a tlb flush */
4319 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4320 }
4321 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4322 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4323 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4324 }
4325 save_access_regs(vcpu->arch.host_acrs);
4326 restore_access_regs(vcpu->run->s.regs.acrs);
4327 /* save host (userspace) fprs/vrs */
4328 save_fpu_regs();
4329 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4330 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4331 if (MACHINE_HAS_VX)
4332 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4333 else
4334 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4335 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4336 if (test_fp_ctl(current->thread.fpu.fpc))
4337 /* User space provided an invalid FPC, let's clear it */
4338 current->thread.fpu.fpc = 0;
4339
4340 /* Sync fmt2 only data */
4341 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4342 sync_regs_fmt2(vcpu);
4343 } else {
4344 /*
4345 * In several places we have to modify our internal view to
4346 * not do things that are disallowed by the ultravisor. For
4347 * example we must not inject interrupts after specific exits
4348 * (e.g. 112 prefix page not secure). We do this by turning
4349 * off the machine check, external and I/O interrupt bits
4350 * of our PSW copy. To avoid getting validity intercepts, we
4351 * do only accept the condition code from userspace.
4352 */
4353 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4354 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4355 PSW_MASK_CC;
4356 }
4357
4358 kvm_run->kvm_dirty_regs = 0;
4359 }
4360
store_regs_fmt2(struct kvm_vcpu * vcpu)4361 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4362 {
4363 struct kvm_run *kvm_run = vcpu->run;
4364
4365 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4366 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4367 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4368 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4369 kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4370 if (MACHINE_HAS_GS) {
4371 preempt_disable();
4372 __ctl_set_bit(2, 4);
4373 if (vcpu->arch.gs_enabled)
4374 save_gs_cb(current->thread.gs_cb);
4375 current->thread.gs_cb = vcpu->arch.host_gscb;
4376 restore_gs_cb(vcpu->arch.host_gscb);
4377 if (!vcpu->arch.host_gscb)
4378 __ctl_clear_bit(2, 4);
4379 vcpu->arch.host_gscb = NULL;
4380 preempt_enable();
4381 }
4382 /* SIE will save etoken directly into SDNX and therefore kvm_run */
4383 }
4384
store_regs(struct kvm_vcpu * vcpu)4385 static void store_regs(struct kvm_vcpu *vcpu)
4386 {
4387 struct kvm_run *kvm_run = vcpu->run;
4388
4389 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4390 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4391 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4392 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4393 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4394 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4395 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4396 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4397 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4398 save_access_regs(vcpu->run->s.regs.acrs);
4399 restore_access_regs(vcpu->arch.host_acrs);
4400 /* Save guest register state */
4401 save_fpu_regs();
4402 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4403 /* Restore will be done lazily at return */
4404 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4405 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4406 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4407 store_regs_fmt2(vcpu);
4408 }
4409
kvm_arch_vcpu_ioctl_run(struct kvm_vcpu * vcpu)4410 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4411 {
4412 struct kvm_run *kvm_run = vcpu->run;
4413 int rc;
4414
4415 if (kvm_run->immediate_exit)
4416 return -EINTR;
4417
4418 if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4419 kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4420 return -EINVAL;
4421
4422 vcpu_load(vcpu);
4423
4424 if (guestdbg_exit_pending(vcpu)) {
4425 kvm_s390_prepare_debug_exit(vcpu);
4426 rc = 0;
4427 goto out;
4428 }
4429
4430 kvm_sigset_activate(vcpu);
4431
4432 /*
4433 * no need to check the return value of vcpu_start as it can only have
4434 * an error for protvirt, but protvirt means user cpu state
4435 */
4436 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4437 kvm_s390_vcpu_start(vcpu);
4438 } else if (is_vcpu_stopped(vcpu)) {
4439 pr_err_ratelimited("can't run stopped vcpu %d\n",
4440 vcpu->vcpu_id);
4441 rc = -EINVAL;
4442 goto out;
4443 }
4444
4445 sync_regs(vcpu);
4446 enable_cpu_timer_accounting(vcpu);
4447
4448 might_fault();
4449 rc = __vcpu_run(vcpu);
4450
4451 if (signal_pending(current) && !rc) {
4452 kvm_run->exit_reason = KVM_EXIT_INTR;
4453 rc = -EINTR;
4454 }
4455
4456 if (guestdbg_exit_pending(vcpu) && !rc) {
4457 kvm_s390_prepare_debug_exit(vcpu);
4458 rc = 0;
4459 }
4460
4461 if (rc == -EREMOTE) {
4462 /* userspace support is needed, kvm_run has been prepared */
4463 rc = 0;
4464 }
4465
4466 disable_cpu_timer_accounting(vcpu);
4467 store_regs(vcpu);
4468
4469 kvm_sigset_deactivate(vcpu);
4470
4471 vcpu->stat.exit_userspace++;
4472 out:
4473 vcpu_put(vcpu);
4474 return rc;
4475 }
4476
4477 /*
4478 * store status at address
4479 * we use have two special cases:
4480 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4481 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4482 */
kvm_s390_store_status_unloaded(struct kvm_vcpu * vcpu,unsigned long gpa)4483 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4484 {
4485 unsigned char archmode = 1;
4486 freg_t fprs[NUM_FPRS];
4487 unsigned int px;
4488 u64 clkcomp, cputm;
4489 int rc;
4490
4491 px = kvm_s390_get_prefix(vcpu);
4492 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4493 if (write_guest_abs(vcpu, 163, &archmode, 1))
4494 return -EFAULT;
4495 gpa = 0;
4496 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4497 if (write_guest_real(vcpu, 163, &archmode, 1))
4498 return -EFAULT;
4499 gpa = px;
4500 } else
4501 gpa -= __LC_FPREGS_SAVE_AREA;
4502
4503 /* manually convert vector registers if necessary */
4504 if (MACHINE_HAS_VX) {
4505 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4506 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4507 fprs, 128);
4508 } else {
4509 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4510 vcpu->run->s.regs.fprs, 128);
4511 }
4512 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4513 vcpu->run->s.regs.gprs, 128);
4514 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4515 &vcpu->arch.sie_block->gpsw, 16);
4516 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4517 &px, 4);
4518 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4519 &vcpu->run->s.regs.fpc, 4);
4520 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4521 &vcpu->arch.sie_block->todpr, 4);
4522 cputm = kvm_s390_get_cpu_timer(vcpu);
4523 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4524 &cputm, 8);
4525 clkcomp = vcpu->arch.sie_block->ckc >> 8;
4526 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4527 &clkcomp, 8);
4528 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4529 &vcpu->run->s.regs.acrs, 64);
4530 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4531 &vcpu->arch.sie_block->gcr, 128);
4532 return rc ? -EFAULT : 0;
4533 }
4534
kvm_s390_vcpu_store_status(struct kvm_vcpu * vcpu,unsigned long addr)4535 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4536 {
4537 /*
4538 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4539 * switch in the run ioctl. Let's update our copies before we save
4540 * it into the save area
4541 */
4542 save_fpu_regs();
4543 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4544 save_access_regs(vcpu->run->s.regs.acrs);
4545
4546 return kvm_s390_store_status_unloaded(vcpu, addr);
4547 }
4548
__disable_ibs_on_vcpu(struct kvm_vcpu * vcpu)4549 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4550 {
4551 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4552 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4553 }
4554
__disable_ibs_on_all_vcpus(struct kvm * kvm)4555 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4556 {
4557 unsigned long i;
4558 struct kvm_vcpu *vcpu;
4559
4560 kvm_for_each_vcpu(i, vcpu, kvm) {
4561 __disable_ibs_on_vcpu(vcpu);
4562 }
4563 }
4564
__enable_ibs_on_vcpu(struct kvm_vcpu * vcpu)4565 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4566 {
4567 if (!sclp.has_ibs)
4568 return;
4569 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4570 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4571 }
4572
kvm_s390_vcpu_start(struct kvm_vcpu * vcpu)4573 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4574 {
4575 int i, online_vcpus, r = 0, started_vcpus = 0;
4576
4577 if (!is_vcpu_stopped(vcpu))
4578 return 0;
4579
4580 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4581 /* Only one cpu at a time may enter/leave the STOPPED state. */
4582 spin_lock(&vcpu->kvm->arch.start_stop_lock);
4583 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4584
4585 /* Let's tell the UV that we want to change into the operating state */
4586 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4587 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4588 if (r) {
4589 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4590 return r;
4591 }
4592 }
4593
4594 for (i = 0; i < online_vcpus; i++) {
4595 if (!is_vcpu_stopped(kvm_get_vcpu(vcpu->kvm, i)))
4596 started_vcpus++;
4597 }
4598
4599 if (started_vcpus == 0) {
4600 /* we're the only active VCPU -> speed it up */
4601 __enable_ibs_on_vcpu(vcpu);
4602 } else if (started_vcpus == 1) {
4603 /*
4604 * As we are starting a second VCPU, we have to disable
4605 * the IBS facility on all VCPUs to remove potentially
4606 * outstanding ENABLE requests.
4607 */
4608 __disable_ibs_on_all_vcpus(vcpu->kvm);
4609 }
4610
4611 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4612 /*
4613 * The real PSW might have changed due to a RESTART interpreted by the
4614 * ultravisor. We block all interrupts and let the next sie exit
4615 * refresh our view.
4616 */
4617 if (kvm_s390_pv_cpu_is_protected(vcpu))
4618 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4619 /*
4620 * Another VCPU might have used IBS while we were offline.
4621 * Let's play safe and flush the VCPU at startup.
4622 */
4623 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4624 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4625 return 0;
4626 }
4627
kvm_s390_vcpu_stop(struct kvm_vcpu * vcpu)4628 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4629 {
4630 int i, online_vcpus, r = 0, started_vcpus = 0;
4631 struct kvm_vcpu *started_vcpu = NULL;
4632
4633 if (is_vcpu_stopped(vcpu))
4634 return 0;
4635
4636 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4637 /* Only one cpu at a time may enter/leave the STOPPED state. */
4638 spin_lock(&vcpu->kvm->arch.start_stop_lock);
4639 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4640
4641 /* Let's tell the UV that we want to change into the stopped state */
4642 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4643 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4644 if (r) {
4645 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4646 return r;
4647 }
4648 }
4649
4650 /*
4651 * Set the VCPU to STOPPED and THEN clear the interrupt flag,
4652 * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
4653 * have been fully processed. This will ensure that the VCPU
4654 * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
4655 */
4656 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4657 kvm_s390_clear_stop_irq(vcpu);
4658
4659 __disable_ibs_on_vcpu(vcpu);
4660
4661 for (i = 0; i < online_vcpus; i++) {
4662 struct kvm_vcpu *tmp = kvm_get_vcpu(vcpu->kvm, i);
4663
4664 if (!is_vcpu_stopped(tmp)) {
4665 started_vcpus++;
4666 started_vcpu = tmp;
4667 }
4668 }
4669
4670 if (started_vcpus == 1) {
4671 /*
4672 * As we only have one VCPU left, we want to enable the
4673 * IBS facility for that VCPU to speed it up.
4674 */
4675 __enable_ibs_on_vcpu(started_vcpu);
4676 }
4677
4678 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4679 return 0;
4680 }
4681
kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu * vcpu,struct kvm_enable_cap * cap)4682 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4683 struct kvm_enable_cap *cap)
4684 {
4685 int r;
4686
4687 if (cap->flags)
4688 return -EINVAL;
4689
4690 switch (cap->cap) {
4691 case KVM_CAP_S390_CSS_SUPPORT:
4692 if (!vcpu->kvm->arch.css_support) {
4693 vcpu->kvm->arch.css_support = 1;
4694 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4695 trace_kvm_s390_enable_css(vcpu->kvm);
4696 }
4697 r = 0;
4698 break;
4699 default:
4700 r = -EINVAL;
4701 break;
4702 }
4703 return r;
4704 }
4705
kvm_s390_guest_sida_op(struct kvm_vcpu * vcpu,struct kvm_s390_mem_op * mop)4706 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4707 struct kvm_s390_mem_op *mop)
4708 {
4709 void __user *uaddr = (void __user *)mop->buf;
4710 int r = 0;
4711
4712 if (mop->flags || !mop->size)
4713 return -EINVAL;
4714 if (mop->size + mop->sida_offset < mop->size)
4715 return -EINVAL;
4716 if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4717 return -E2BIG;
4718 if (!kvm_s390_pv_cpu_is_protected(vcpu))
4719 return -EINVAL;
4720
4721 switch (mop->op) {
4722 case KVM_S390_MEMOP_SIDA_READ:
4723 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4724 mop->sida_offset), mop->size))
4725 r = -EFAULT;
4726
4727 break;
4728 case KVM_S390_MEMOP_SIDA_WRITE:
4729 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4730 mop->sida_offset), uaddr, mop->size))
4731 r = -EFAULT;
4732 break;
4733 }
4734 return r;
4735 }
kvm_s390_guest_mem_op(struct kvm_vcpu * vcpu,struct kvm_s390_mem_op * mop)4736 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4737 struct kvm_s390_mem_op *mop)
4738 {
4739 void __user *uaddr = (void __user *)mop->buf;
4740 void *tmpbuf = NULL;
4741 int r = 0;
4742 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4743 | KVM_S390_MEMOP_F_CHECK_ONLY;
4744
4745 if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4746 return -EINVAL;
4747
4748 if (mop->size > MEM_OP_MAX_SIZE)
4749 return -E2BIG;
4750
4751 if (kvm_s390_pv_cpu_is_protected(vcpu))
4752 return -EINVAL;
4753
4754 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4755 tmpbuf = vmalloc(mop->size);
4756 if (!tmpbuf)
4757 return -ENOMEM;
4758 }
4759
4760 switch (mop->op) {
4761 case KVM_S390_MEMOP_LOGICAL_READ:
4762 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4763 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4764 mop->size, GACC_FETCH);
4765 break;
4766 }
4767 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4768 if (r == 0) {
4769 if (copy_to_user(uaddr, tmpbuf, mop->size))
4770 r = -EFAULT;
4771 }
4772 break;
4773 case KVM_S390_MEMOP_LOGICAL_WRITE:
4774 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4775 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4776 mop->size, GACC_STORE);
4777 break;
4778 }
4779 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4780 r = -EFAULT;
4781 break;
4782 }
4783 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4784 break;
4785 }
4786
4787 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4788 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4789
4790 vfree(tmpbuf);
4791 return r;
4792 }
4793
kvm_s390_guest_memsida_op(struct kvm_vcpu * vcpu,struct kvm_s390_mem_op * mop)4794 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4795 struct kvm_s390_mem_op *mop)
4796 {
4797 int r, srcu_idx;
4798
4799 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4800
4801 switch (mop->op) {
4802 case KVM_S390_MEMOP_LOGICAL_READ:
4803 case KVM_S390_MEMOP_LOGICAL_WRITE:
4804 r = kvm_s390_guest_mem_op(vcpu, mop);
4805 break;
4806 case KVM_S390_MEMOP_SIDA_READ:
4807 case KVM_S390_MEMOP_SIDA_WRITE:
4808 /* we are locked against sida going away by the vcpu->mutex */
4809 r = kvm_s390_guest_sida_op(vcpu, mop);
4810 break;
4811 default:
4812 r = -EINVAL;
4813 }
4814
4815 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4816 return r;
4817 }
4818
kvm_arch_vcpu_async_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)4819 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4820 unsigned int ioctl, unsigned long arg)
4821 {
4822 struct kvm_vcpu *vcpu = filp->private_data;
4823 void __user *argp = (void __user *)arg;
4824
4825 switch (ioctl) {
4826 case KVM_S390_IRQ: {
4827 struct kvm_s390_irq s390irq;
4828
4829 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4830 return -EFAULT;
4831 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4832 }
4833 case KVM_S390_INTERRUPT: {
4834 struct kvm_s390_interrupt s390int;
4835 struct kvm_s390_irq s390irq = {};
4836
4837 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4838 return -EFAULT;
4839 if (s390int_to_s390irq(&s390int, &s390irq))
4840 return -EINVAL;
4841 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4842 }
4843 }
4844 return -ENOIOCTLCMD;
4845 }
4846
kvm_arch_vcpu_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)4847 long kvm_arch_vcpu_ioctl(struct file *filp,
4848 unsigned int ioctl, unsigned long arg)
4849 {
4850 struct kvm_vcpu *vcpu = filp->private_data;
4851 void __user *argp = (void __user *)arg;
4852 int idx;
4853 long r;
4854 u16 rc, rrc;
4855
4856 vcpu_load(vcpu);
4857
4858 switch (ioctl) {
4859 case KVM_S390_STORE_STATUS:
4860 idx = srcu_read_lock(&vcpu->kvm->srcu);
4861 r = kvm_s390_store_status_unloaded(vcpu, arg);
4862 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4863 break;
4864 case KVM_S390_SET_INITIAL_PSW: {
4865 psw_t psw;
4866
4867 r = -EFAULT;
4868 if (copy_from_user(&psw, argp, sizeof(psw)))
4869 break;
4870 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4871 break;
4872 }
4873 case KVM_S390_CLEAR_RESET:
4874 r = 0;
4875 kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4876 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4877 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4878 UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4879 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4880 rc, rrc);
4881 }
4882 break;
4883 case KVM_S390_INITIAL_RESET:
4884 r = 0;
4885 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4886 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4887 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4888 UVC_CMD_CPU_RESET_INITIAL,
4889 &rc, &rrc);
4890 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4891 rc, rrc);
4892 }
4893 break;
4894 case KVM_S390_NORMAL_RESET:
4895 r = 0;
4896 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4897 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4898 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4899 UVC_CMD_CPU_RESET, &rc, &rrc);
4900 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4901 rc, rrc);
4902 }
4903 break;
4904 case KVM_SET_ONE_REG:
4905 case KVM_GET_ONE_REG: {
4906 struct kvm_one_reg reg;
4907 r = -EINVAL;
4908 if (kvm_s390_pv_cpu_is_protected(vcpu))
4909 break;
4910 r = -EFAULT;
4911 if (copy_from_user(®, argp, sizeof(reg)))
4912 break;
4913 if (ioctl == KVM_SET_ONE_REG)
4914 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
4915 else
4916 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
4917 break;
4918 }
4919 #ifdef CONFIG_KVM_S390_UCONTROL
4920 case KVM_S390_UCAS_MAP: {
4921 struct kvm_s390_ucas_mapping ucasmap;
4922
4923 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4924 r = -EFAULT;
4925 break;
4926 }
4927
4928 if (!kvm_is_ucontrol(vcpu->kvm)) {
4929 r = -EINVAL;
4930 break;
4931 }
4932
4933 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4934 ucasmap.vcpu_addr, ucasmap.length);
4935 break;
4936 }
4937 case KVM_S390_UCAS_UNMAP: {
4938 struct kvm_s390_ucas_mapping ucasmap;
4939
4940 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4941 r = -EFAULT;
4942 break;
4943 }
4944
4945 if (!kvm_is_ucontrol(vcpu->kvm)) {
4946 r = -EINVAL;
4947 break;
4948 }
4949
4950 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4951 ucasmap.length);
4952 break;
4953 }
4954 #endif
4955 case KVM_S390_VCPU_FAULT: {
4956 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4957 break;
4958 }
4959 case KVM_ENABLE_CAP:
4960 {
4961 struct kvm_enable_cap cap;
4962 r = -EFAULT;
4963 if (copy_from_user(&cap, argp, sizeof(cap)))
4964 break;
4965 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4966 break;
4967 }
4968 case KVM_S390_MEM_OP: {
4969 struct kvm_s390_mem_op mem_op;
4970
4971 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4972 r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4973 else
4974 r = -EFAULT;
4975 break;
4976 }
4977 case KVM_S390_SET_IRQ_STATE: {
4978 struct kvm_s390_irq_state irq_state;
4979
4980 r = -EFAULT;
4981 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4982 break;
4983 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4984 irq_state.len == 0 ||
4985 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4986 r = -EINVAL;
4987 break;
4988 }
4989 /* do not use irq_state.flags, it will break old QEMUs */
4990 r = kvm_s390_set_irq_state(vcpu,
4991 (void __user *) irq_state.buf,
4992 irq_state.len);
4993 break;
4994 }
4995 case KVM_S390_GET_IRQ_STATE: {
4996 struct kvm_s390_irq_state irq_state;
4997
4998 r = -EFAULT;
4999 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5000 break;
5001 if (irq_state.len == 0) {
5002 r = -EINVAL;
5003 break;
5004 }
5005 /* do not use irq_state.flags, it will break old QEMUs */
5006 r = kvm_s390_get_irq_state(vcpu,
5007 (__u8 __user *) irq_state.buf,
5008 irq_state.len);
5009 break;
5010 }
5011 default:
5012 r = -ENOTTY;
5013 }
5014
5015 vcpu_put(vcpu);
5016 return r;
5017 }
5018
kvm_arch_vcpu_fault(struct kvm_vcpu * vcpu,struct vm_fault * vmf)5019 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
5020 {
5021 #ifdef CONFIG_KVM_S390_UCONTROL
5022 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
5023 && (kvm_is_ucontrol(vcpu->kvm))) {
5024 vmf->page = virt_to_page(vcpu->arch.sie_block);
5025 get_page(vmf->page);
5026 return 0;
5027 }
5028 #endif
5029 return VM_FAULT_SIGBUS;
5030 }
5031
5032 /* Section: memory related */
kvm_arch_prepare_memory_region(struct kvm * kvm,struct kvm_memory_slot * memslot,const struct kvm_userspace_memory_region * mem,enum kvm_mr_change change)5033 int kvm_arch_prepare_memory_region(struct kvm *kvm,
5034 struct kvm_memory_slot *memslot,
5035 const struct kvm_userspace_memory_region *mem,
5036 enum kvm_mr_change change)
5037 {
5038 /* A few sanity checks. We can have memory slots which have to be
5039 located/ended at a segment boundary (1MB). The memory in userland is
5040 ok to be fragmented into various different vmas. It is okay to mmap()
5041 and munmap() stuff in this slot after doing this call at any time */
5042
5043 if (mem->userspace_addr & 0xffffful)
5044 return -EINVAL;
5045
5046 if (mem->memory_size & 0xffffful)
5047 return -EINVAL;
5048
5049 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
5050 return -EINVAL;
5051
5052 /* When we are protected, we should not change the memory slots */
5053 if (kvm_s390_pv_get_handle(kvm))
5054 return -EINVAL;
5055
5056 if (!kvm->arch.migration_mode)
5057 return 0;
5058
5059 /*
5060 * Turn off migration mode when:
5061 * - userspace creates a new memslot with dirty logging off,
5062 * - userspace modifies an existing memslot (MOVE or FLAGS_ONLY) and
5063 * dirty logging is turned off.
5064 * Migration mode expects dirty page logging being enabled to store
5065 * its dirty bitmap.
5066 */
5067 if (change != KVM_MR_DELETE &&
5068 !(mem->flags & KVM_MEM_LOG_DIRTY_PAGES))
5069 WARN(kvm_s390_vm_stop_migration(kvm),
5070 "Failed to stop migration mode");
5071
5072 return 0;
5073 }
5074
kvm_arch_commit_memory_region(struct kvm * kvm,const struct kvm_userspace_memory_region * mem,struct kvm_memory_slot * old,const struct kvm_memory_slot * new,enum kvm_mr_change change)5075 void kvm_arch_commit_memory_region(struct kvm *kvm,
5076 const struct kvm_userspace_memory_region *mem,
5077 struct kvm_memory_slot *old,
5078 const struct kvm_memory_slot *new,
5079 enum kvm_mr_change change)
5080 {
5081 int rc = 0;
5082
5083 switch (change) {
5084 case KVM_MR_DELETE:
5085 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5086 old->npages * PAGE_SIZE);
5087 break;
5088 case KVM_MR_MOVE:
5089 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5090 old->npages * PAGE_SIZE);
5091 if (rc)
5092 break;
5093 fallthrough;
5094 case KVM_MR_CREATE:
5095 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
5096 mem->guest_phys_addr, mem->memory_size);
5097 break;
5098 case KVM_MR_FLAGS_ONLY:
5099 break;
5100 default:
5101 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5102 }
5103 if (rc)
5104 pr_warn("failed to commit memory region\n");
5105 return;
5106 }
5107
nonhyp_mask(int i)5108 static inline unsigned long nonhyp_mask(int i)
5109 {
5110 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5111
5112 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5113 }
5114
kvm_arch_vcpu_block_finish(struct kvm_vcpu * vcpu)5115 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
5116 {
5117 vcpu->valid_wakeup = false;
5118 }
5119
kvm_s390_init(void)5120 static int __init kvm_s390_init(void)
5121 {
5122 int i;
5123
5124 if (!sclp.has_sief2) {
5125 pr_info("SIE is not available\n");
5126 return -ENODEV;
5127 }
5128
5129 if (nested && hpage) {
5130 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5131 return -EINVAL;
5132 }
5133
5134 for (i = 0; i < 16; i++)
5135 kvm_s390_fac_base[i] |=
5136 stfle_fac_list[i] & nonhyp_mask(i);
5137
5138 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5139 }
5140
kvm_s390_exit(void)5141 static void __exit kvm_s390_exit(void)
5142 {
5143 kvm_exit();
5144 }
5145
5146 module_init(kvm_s390_init);
5147 module_exit(kvm_s390_exit);
5148
5149 /*
5150 * Enable autoloading of the kvm module.
5151 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5152 * since x86 takes a different approach.
5153 */
5154 #include <linux/miscdevice.h>
5155 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5156 MODULE_ALIAS("devname:kvm");
5157