1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (C) 2021 Google LLC
4 * Author: Fuad Tabba <tabba@google.com>
5 */
6
7 #include <linux/kvm_host.h>
8 #include <linux/mm.h>
9
10 #include <kvm/arm_hypercalls.h>
11 #include <kvm/arm_psci.h>
12 #include <kvm/device.h>
13
14 #include <asm/kvm_emulate.h>
15 #include <hyp/adjust_pc.h>
16
17 #include <nvhe/alloc.h>
18 #include <nvhe/ffa.h>
19 #include <nvhe/mem_protect.h>
20 #include <nvhe/memory.h>
21 #include <nvhe/modules.h>
22 #include <nvhe/mm.h>
23 #include <nvhe/pkvm.h>
24 #include <nvhe/pviommu.h>
25 #include <nvhe/pviommu-host.h>
26 #include <nvhe/rwlock.h>
27 #include <nvhe/trap_handler.h>
28
29 /* Used by icache_is_aliasing(). */
30 unsigned long __icache_flags;
31
32 /* Used by kvm_get_vttbr(). */
33 unsigned int kvm_arm_vmid_bits;
34
35 unsigned int kvm_sve_max_vl;
36
37 unsigned int kvm_host_sve_max_vl;
38
39 /*
40 * The currently loaded hyp vCPU for each physical CPU. Used only when
41 * protected KVM is enabled, but for both protected and non-protected VMs.
42 */
43 static DEFINE_PER_CPU(struct pkvm_hyp_vcpu *, loaded_hyp_vcpu);
44
45 static LIST_HEAD(running_vms);
46 struct ffa_mem_transfer *find_transfer_by_handle(u64 ffa_handle, struct kvm_ffa_buffers *buf);
47
pkvm_vcpu_reset_hcr(struct kvm_vcpu * vcpu)48 static void pkvm_vcpu_reset_hcr(struct kvm_vcpu *vcpu)
49 {
50 vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
51
52 if (has_hvhe())
53 vcpu->arch.hcr_el2 |= HCR_E2H;
54
55 if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) {
56 /* route synchronous external abort exceptions to EL2 */
57 vcpu->arch.hcr_el2 |= HCR_TEA;
58 /* trap error record accesses */
59 vcpu->arch.hcr_el2 |= HCR_TERR;
60 }
61
62 if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
63 vcpu->arch.hcr_el2 |= HCR_FWB;
64
65 if (cpus_have_final_cap(ARM64_HAS_EVT) &&
66 !cpus_have_final_cap(ARM64_MISMATCHED_CACHE_TYPE))
67 vcpu->arch.hcr_el2 |= HCR_TID4;
68 else
69 vcpu->arch.hcr_el2 |= HCR_TID2;
70
71 if (vcpu_has_ptrauth(vcpu))
72 vcpu->arch.hcr_el2 |= (HCR_API | HCR_APK);
73
74 if (kvm_has_mte(vcpu->kvm))
75 vcpu->arch.hcr_el2 |= HCR_ATA;
76 }
77
pkvm_vcpu_reset_hcrx(struct pkvm_hyp_vcpu * hyp_vcpu)78 static void pkvm_vcpu_reset_hcrx(struct pkvm_hyp_vcpu *hyp_vcpu)
79 {
80 struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
81 struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu;
82
83 if (!cpus_have_final_cap(ARM64_HAS_HCX))
84 return;
85
86 /*
87 * In general, all HCRX_EL2 bits are gated by a feature.
88 * The only reason we can set SMPME without checking any
89 * feature is that its effects are not directly observable
90 * from the guest.
91 */
92 vcpu->arch.hcrx_el2 = HCRX_EL2_SMPME;
93
94 /*
95 * For non-protected VMs, the host is responsible for the guest's
96 * features, so use the remaining host HCRX_EL2 bits.
97 */
98 if ((!pkvm_hyp_vcpu_is_protected(hyp_vcpu)))
99 vcpu->arch.hcrx_el2 |= host_vcpu->arch.hcrx_el2;
100 }
101
pvm_init_traps_hcr(struct kvm_vcpu * vcpu)102 static void pvm_init_traps_hcr(struct kvm_vcpu *vcpu)
103 {
104 struct kvm *kvm = vcpu->kvm;
105 u64 val = vcpu->arch.hcr_el2;
106
107 /* No support for AArch32. */
108 val |= HCR_RW;
109
110 /*
111 * Always trap:
112 * - Feature id registers: to control features exposed to guests
113 * - Implementation-defined features
114 */
115 val |= HCR_TACR | HCR_TIDCP | HCR_TID3 | HCR_TID1;
116
117 if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, RAS, IMP)) {
118 val |= HCR_TERR | HCR_TEA;
119 val &= ~(HCR_FIEN);
120 }
121
122 if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, AMU, IMP))
123 val &= ~(HCR_AMVOFFEN);
124
125 if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, MTE, IMP)) {
126 val |= HCR_TID5;
127 val &= ~(HCR_DCT | HCR_ATA);
128 }
129
130 if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, LO, IMP))
131 val |= HCR_TLOR;
132
133 vcpu->arch.hcr_el2 = val;
134 }
135
pvm_init_traps_hcrx(struct kvm_vcpu * vcpu)136 static void pvm_init_traps_hcrx(struct kvm_vcpu *vcpu)
137 {
138 struct kvm *kvm = vcpu->kvm;
139 u64 hcrx_set = 0;
140
141 if (!cpus_have_final_cap(ARM64_HAS_HCX))
142 return;
143
144 if (kvm_has_feat(kvm, ID_AA64ISAR2_EL1, MOPS, IMP))
145 hcrx_set |= (HCRX_EL2_MSCEn | HCRX_EL2_MCE2);
146
147 if (kvm_has_feat(kvm, ID_AA64MMFR3_EL1, TCRX, IMP))
148 hcrx_set |= HCRX_EL2_TCR2En;
149
150 if (kvm_has_fpmr(kvm))
151 hcrx_set |= HCRX_EL2_EnFPM;
152
153 vcpu->arch.hcrx_el2 |= hcrx_set;
154 }
155
pvm_init_traps_mdcr(struct kvm_vcpu * vcpu)156 static void pvm_init_traps_mdcr(struct kvm_vcpu *vcpu)
157 {
158 struct kvm *kvm = vcpu->kvm;
159 u64 val = vcpu->arch.mdcr_el2;
160
161 if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMUVer, IMP)) {
162 val |= MDCR_EL2_TPM | MDCR_EL2_TPMCR;
163 val &= ~(MDCR_EL2_HPME | MDCR_EL2_MTPME | MDCR_EL2_HPMN_MASK);
164 }
165
166 if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, DebugVer, IMP))
167 val |= MDCR_EL2_TDRA | MDCR_EL2_TDA;
168
169 if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, DoubleLock, IMP))
170 val |= MDCR_EL2_TDOSA;
171
172 if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMSVer, IMP)) {
173 val |= MDCR_EL2_TPMS;
174 val &= ~(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT);
175 }
176
177 if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceFilt, IMP))
178 val |= MDCR_EL2_TTRF;
179
180 if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, ExtTrcBuff, IMP))
181 val |= MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT;
182
183 /* Trap Debug Communications Channel registers */
184 if (!kvm_has_feat(kvm, ID_AA64MMFR0_EL1, FGT, IMP))
185 val |= MDCR_EL2_TDCC;
186
187 vcpu->arch.mdcr_el2 = val;
188 }
189
190 /*
191 * Check that cpu features that are neither trapped nor supported are not
192 * enabled for protected VMs.
193 */
pkvm_check_pvm_cpu_features(struct kvm_vcpu * vcpu)194 static int pkvm_check_pvm_cpu_features(struct kvm_vcpu *vcpu)
195 {
196 struct kvm *kvm = vcpu->kvm;
197
198 /* Protected KVM does not support AArch32 guests. */
199 if (kvm_has_feat(kvm, ID_AA64PFR0_EL1, EL0, AARCH32) ||
200 kvm_has_feat(kvm, ID_AA64PFR0_EL1, EL1, AARCH32))
201 return -EINVAL;
202
203 /*
204 * Linux guests assume support for floating-point and Advanced SIMD. Do
205 * not change the trapping behavior for these from the KVM default.
206 */
207 if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, FP, IMP) ||
208 !kvm_has_feat(kvm, ID_AA64PFR0_EL1, AdvSIMD, IMP))
209 return -EINVAL;
210
211 /* No SME support in KVM right now. Check to catch if it changes. */
212 if (kvm_has_feat(kvm, ID_AA64PFR1_EL1, SME, IMP))
213 return -EINVAL;
214
215 return 0;
216 }
217
218 /*
219 * Initialize trap register values in protected mode.
220 */
pkvm_vcpu_init_traps(struct pkvm_hyp_vcpu * hyp_vcpu)221 static int pkvm_vcpu_init_traps(struct pkvm_hyp_vcpu *hyp_vcpu)
222 {
223 struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu;
224 int ret;
225
226 vcpu->arch.mdcr_el2 = 0;
227
228 pkvm_vcpu_reset_hcr(vcpu);
229 pkvm_vcpu_reset_hcrx(hyp_vcpu);
230
231 if ((!pkvm_hyp_vcpu_is_protected(hyp_vcpu)))
232 return 0;
233
234 ret = pkvm_check_pvm_cpu_features(vcpu);
235 if (ret)
236 return ret;
237
238 pvm_init_traps_hcr(vcpu);
239 pvm_init_traps_hcrx(vcpu);
240 pvm_init_traps_mdcr(vcpu);
241
242 return 0;
243 }
244
245 /*
246 * Start the VM table handle at the offset defined instead of at 0.
247 * Mainly for sanity checking and debugging.
248 */
249 #define HANDLE_OFFSET 0x1000
250
vm_handle_to_idx(pkvm_handle_t handle)251 static unsigned int vm_handle_to_idx(pkvm_handle_t handle)
252 {
253 return handle - HANDLE_OFFSET;
254 }
255
idx_to_vm_handle(unsigned int idx)256 static pkvm_handle_t idx_to_vm_handle(unsigned int idx)
257 {
258 return idx + HANDLE_OFFSET;
259 }
260
261 /* Rwlock for protecting state related to the VM table. */
262 static DEFINE_HYP_RWLOCK(vm_table_lock);
263
264 /*
265 * The table of VM entries for protected VMs in hyp.
266 * Allocated at hyp initialization and setup.
267 */
268 static struct pkvm_hyp_vm **vm_table;
269
pkvm_hyp_vm_table_init(void * tbl)270 void pkvm_hyp_vm_table_init(void *tbl)
271 {
272 WARN_ON(vm_table);
273 vm_table = tbl;
274 }
275
map_donated_memory_noclear(unsigned long host_va,size_t size)276 static void *map_donated_memory_noclear(unsigned long host_va, size_t size)
277 {
278 void *va = (void *)kern_hyp_va(host_va);
279
280 if (!PAGE_ALIGNED(va))
281 return NULL;
282
283 if (__pkvm_host_donate_hyp(hyp_virt_to_pfn(va),
284 PAGE_ALIGN(size) >> PAGE_SHIFT))
285 return NULL;
286
287 return va;
288 }
289
__unmap_donated_memory(void * va,size_t size)290 static void __unmap_donated_memory(void *va, size_t size)
291 {
292 kvm_flush_dcache_to_poc(va, size);
293 WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(va),
294 PAGE_ALIGN(size) >> PAGE_SHIFT));
295 }
296
unmap_donated_memory(void * va,size_t size)297 static void unmap_donated_memory(void *va, size_t size)
298 {
299 if (!va)
300 return;
301
302 memset(va, 0, size);
303 __unmap_donated_memory(va, size);
304 }
305
unmap_donated_memory_noclear(void * va,size_t size)306 static void unmap_donated_memory_noclear(void *va, size_t size)
307 {
308 if (!va)
309 return;
310
311 __unmap_donated_memory(va, size);
312 }
313
314 /*
315 * Return the hyp vm structure corresponding to the handle.
316 */
get_vm_by_handle(pkvm_handle_t handle)317 static struct pkvm_hyp_vm *get_vm_by_handle(pkvm_handle_t handle)
318 {
319 unsigned int idx = vm_handle_to_idx(handle);
320
321 if (unlikely(idx >= KVM_MAX_PVMS))
322 return NULL;
323
324 return vm_table[idx];
325 }
326
get_pkvm_hyp_vm(pkvm_handle_t handle)327 struct pkvm_hyp_vm *get_pkvm_hyp_vm(pkvm_handle_t handle)
328 {
329 struct pkvm_hyp_vm *hyp_vm;
330
331 hyp_read_lock(&vm_table_lock);
332
333 hyp_vm = get_vm_by_handle(handle);
334 if (!hyp_vm)
335 goto unlock;
336 if (hyp_vm->is_dying)
337 hyp_vm = NULL;
338 else
339 hyp_refcount_inc(hyp_vm->refcount);
340
341 unlock:
342 hyp_read_unlock(&vm_table_lock);
343
344 return hyp_vm;
345 }
346
put_pkvm_hyp_vm(struct pkvm_hyp_vm * hyp_vm)347 void put_pkvm_hyp_vm(struct pkvm_hyp_vm *hyp_vm)
348 {
349 hyp_refcount_dec(hyp_vm->refcount);
350 }
351
get_np_pkvm_hyp_vm(pkvm_handle_t handle)352 struct pkvm_hyp_vm *get_np_pkvm_hyp_vm(pkvm_handle_t handle)
353 {
354 struct pkvm_hyp_vm *hyp_vm = get_pkvm_hyp_vm(handle);
355
356 if (hyp_vm && pkvm_hyp_vm_is_protected(hyp_vm)) {
357 put_pkvm_hyp_vm(hyp_vm);
358 hyp_vm = NULL;
359 }
360
361 return hyp_vm;
362 }
363
__pkvm_reclaim_dying_guest_page(pkvm_handle_t handle,u64 pfn,u64 gfn,u8 order)364 int __pkvm_reclaim_dying_guest_page(pkvm_handle_t handle, u64 pfn, u64 gfn, u8 order)
365 {
366 struct pkvm_hyp_vm *hyp_vm;
367 int ret = -EINVAL;
368
369 hyp_read_lock(&vm_table_lock);
370 hyp_vm = get_vm_by_handle(handle);
371 if (!hyp_vm || !hyp_vm->is_dying)
372 goto unlock;
373
374 ret = __pkvm_host_reclaim_page(hyp_vm, pfn, gfn << PAGE_SHIFT, order);
375 if (ret)
376 goto unlock;
377
378 drain_hyp_pool(&hyp_vm->pool, &hyp_vm->host_kvm->arch.pkvm.stage2_teardown_mc);
379 unlock:
380 hyp_read_unlock(&vm_table_lock);
381
382 return ret;
383 }
384
__pkvm_reclaim_dying_guest_ffa_resources(pkvm_handle_t handle)385 int __pkvm_reclaim_dying_guest_ffa_resources(pkvm_handle_t handle)
386 {
387 struct pkvm_hyp_vm *hyp_vm;
388 int ret = -EINVAL;
389
390 hyp_read_lock(&vm_table_lock);
391 hyp_vm = get_vm_by_handle(handle);
392 if (hyp_vm && hyp_vm->is_dying)
393 ret = kvm_dying_guest_reclaim_ffa_resources(hyp_vm);
394 hyp_read_unlock(&vm_table_lock);
395
396 return ret;
397 }
398
__pkvm_notify_guest_vm_avail(pkvm_handle_t handle)399 int __pkvm_notify_guest_vm_avail(pkvm_handle_t handle)
400 {
401 struct pkvm_hyp_vm *hyp_vm;
402 int ret = 0;
403
404 hyp_read_lock(&vm_table_lock);
405 hyp_vm = get_vm_by_handle(handle);
406 if (!hyp_vm || !hyp_vm->kvm.arch.pkvm.ffa_support) {
407 ret = -EBUSY;
408 goto unlock;
409 }
410
411 ret = kvm_guest_notify_availability(vm_handle_to_ffa_handle(handle), &hyp_vm->ffa_buf,
412 hyp_vm->is_dying);
413 unlock:
414 hyp_read_unlock(&vm_table_lock);
415 return ret;
416 }
417
pkvm_load_hyp_vcpu(pkvm_handle_t handle,unsigned int vcpu_idx)418 struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle,
419 unsigned int vcpu_idx)
420 {
421 struct pkvm_hyp_vcpu *hyp_vcpu = NULL;
422 struct pkvm_hyp_vm *hyp_vm;
423
424 /* Cannot load a new vcpu without putting the old one first. */
425 if (__this_cpu_read(loaded_hyp_vcpu))
426 return NULL;
427
428 hyp_read_lock(&vm_table_lock);
429 hyp_vm = get_vm_by_handle(handle);
430 if (!hyp_vm || hyp_vm->is_dying || hyp_vm->kvm.created_vcpus <= vcpu_idx)
431 goto unlock;
432
433 /*
434 * Synchronise with concurrent vCPU initialisation by relying on
435 * dependency ordering from the vCPU pointer.
436 */
437 hyp_vcpu = READ_ONCE(hyp_vm->vcpus[vcpu_idx]);
438 if (!hyp_vcpu)
439 goto unlock;
440
441 /* Ensure vcpu isn't loaded on more than one cpu simultaneously. */
442 if (unlikely(cmpxchg_relaxed(&hyp_vcpu->loaded_hyp_vcpu, NULL,
443 this_cpu_ptr(&loaded_hyp_vcpu)))) {
444 hyp_vcpu = NULL;
445 goto unlock;
446 }
447
448 hyp_refcount_inc(hyp_vm->refcount);
449 unlock:
450 hyp_read_unlock(&vm_table_lock);
451
452 if (hyp_vcpu)
453 __this_cpu_write(loaded_hyp_vcpu, hyp_vcpu);
454 return hyp_vcpu;
455 }
456
pkvm_put_hyp_vcpu(struct pkvm_hyp_vcpu * hyp_vcpu)457 void pkvm_put_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
458 {
459 struct pkvm_hyp_vm *hyp_vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
460
461 __this_cpu_write(loaded_hyp_vcpu, NULL);
462
463 /*
464 * Clearing the 'loaded_hyp_vcpu' field allows the 'hyp_vcpu' to
465 * be loaded by another physical CPU, so make sure we're done
466 * with the vCPU before letting somebody else play with it.
467 */
468 smp_store_release(&hyp_vcpu->loaded_hyp_vcpu, NULL);
469
470 /*
471 * We don't hold the 'vm_table_lock'. Once the refcount hits
472 * zero, VM teardown can destroy the VM's data structures and
473 * so this must come last.
474 */
475 smp_wmb();
476 hyp_refcount_dec(hyp_vm->refcount);
477 }
478
pkvm_get_loaded_hyp_vcpu(void)479 struct pkvm_hyp_vcpu *pkvm_get_loaded_hyp_vcpu(void)
480 {
481 return __this_cpu_read(loaded_hyp_vcpu);
482 }
483
pkvm_init_features_from_host(struct pkvm_hyp_vm * hyp_vm,const struct kvm * host_kvm)484 static void pkvm_init_features_from_host(struct pkvm_hyp_vm *hyp_vm, const struct kvm *host_kvm)
485 {
486 struct kvm *kvm = &hyp_vm->kvm;
487 unsigned long host_arch_flags = READ_ONCE(host_kvm->arch.flags);
488
489 /* No restrictions for non-protected VMs. */
490 if (!kvm_vm_is_protected(kvm)) {
491 hyp_vm->kvm.arch.flags = host_arch_flags;
492
493 bitmap_copy(kvm->arch.vcpu_features,
494 host_kvm->arch.vcpu_features,
495 KVM_VCPU_MAX_FEATURES);
496 return;
497 }
498
499 kvm->arch.vcpu_features[0] = pvm_supported_vcpu_features() &
500 host_kvm->arch.vcpu_features[0];
501
502 if (kvm_pvm_ext_allowed(KVM_CAP_ARM_SVE) && kvm_has_sve(host_kvm))
503 set_bit(KVM_ARCH_FLAG_GUEST_HAS_SVE, &kvm->arch.flags);
504
505 if (kvm_pvm_ext_allowed(KVM_CAP_ARM_MTE) && kvm_has_mte(host_kvm))
506 set_bit(KVM_ARCH_FLAG_MTE_ENABLED, &kvm->arch.flags);
507 }
508
pkvm_vcpu_init_psci(struct pkvm_hyp_vcpu * hyp_vcpu,u32 mp_state)509 static int pkvm_vcpu_init_psci(struct pkvm_hyp_vcpu *hyp_vcpu, u32 mp_state)
510 {
511 struct vcpu_reset_state *reset_state = &hyp_vcpu->vcpu.arch.reset_state;
512 struct pkvm_hyp_vm *hyp_vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
513
514 if (!pkvm_hyp_vcpu_is_protected(hyp_vcpu)) {
515 /*
516 * The host is responsible for managing the vcpu state.
517 * Treat it as always on as far as hyp is concerned.
518 */
519 hyp_vcpu->power_state = PSCI_0_2_AFFINITY_LEVEL_ON;
520 return 0;
521 }
522
523 if (mp_state == KVM_MP_STATE_STOPPED) {
524 reset_state->reset = false;
525 hyp_vcpu->power_state = PSCI_0_2_AFFINITY_LEVEL_OFF;
526 } else if (pkvm_hyp_vm_has_pvmfw(hyp_vm)) {
527 if (hyp_vm->pvmfw_entry_vcpu)
528 return -EINVAL;
529
530 hyp_vm->pvmfw_entry_vcpu = hyp_vcpu;
531 reset_state->reset = true;
532 hyp_vcpu->power_state = PSCI_0_2_AFFINITY_LEVEL_ON_PENDING;
533 } else {
534 struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
535
536 reset_state->pc = READ_ONCE(host_vcpu->arch.ctxt.regs.pc);
537 reset_state->r0 = READ_ONCE(host_vcpu->arch.ctxt.regs.regs[0]);
538 reset_state->reset = true;
539 hyp_vcpu->power_state = PSCI_0_2_AFFINITY_LEVEL_ON_PENDING;
540 }
541
542 return 0;
543 }
544
unpin_host_vcpu(struct pkvm_hyp_vcpu * hyp_vcpu)545 static void unpin_host_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
546 {
547 struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
548 void *hyp_reqs = hyp_vcpu->vcpu.arch.hyp_reqs;
549
550 if (host_vcpu)
551 hyp_unpin_shared_mem(host_vcpu, host_vcpu + 1);
552 if (hyp_reqs)
553 hyp_unpin_shared_mem(hyp_reqs, hyp_reqs + 1);
554 }
555
unpin_host_sve_state(struct pkvm_hyp_vcpu * hyp_vcpu)556 static void unpin_host_sve_state(struct pkvm_hyp_vcpu *hyp_vcpu)
557 {
558 void *sve_state;
559
560 if (!vcpu_has_feature(&hyp_vcpu->vcpu, KVM_ARM_VCPU_SVE))
561 return;
562
563 sve_state = kern_hyp_va(hyp_vcpu->vcpu.arch.sve_state);
564 hyp_unpin_shared_mem(sve_state,
565 sve_state + vcpu_sve_state_size(&hyp_vcpu->vcpu));
566 }
567
teardown_sve_state(struct pkvm_hyp_vcpu * hyp_vcpu)568 static void teardown_sve_state(struct pkvm_hyp_vcpu *hyp_vcpu)
569 {
570 struct pkvm_hyp_vm *hyp_vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
571 void *sve_state = hyp_vcpu->vcpu.arch.sve_state;
572
573 if (sve_state)
574 hyp_free_account(sve_state, hyp_vm->host_kvm);
575 }
576
unpin_host_vcpus(struct pkvm_hyp_vcpu * hyp_vcpus[],unsigned int nr_vcpus)577 static void unpin_host_vcpus(struct pkvm_hyp_vcpu *hyp_vcpus[],
578 unsigned int nr_vcpus)
579 {
580 int i;
581
582 for (i = 0; i < nr_vcpus; i++) {
583 struct pkvm_hyp_vcpu *hyp_vcpu = hyp_vcpus[i];
584
585 if (!hyp_vcpu)
586 continue;
587
588 unpin_host_vcpu(hyp_vcpu);
589
590 if (!pkvm_hyp_vcpu_is_protected(hyp_vcpu))
591 unpin_host_sve_state(hyp_vcpu);
592 }
593 }
594
pkvm_get_last_ran_size(void)595 static size_t pkvm_get_last_ran_size(void)
596 {
597 return array_size(hyp_nr_cpus, sizeof(int));
598 }
599
init_pkvm_hyp_vm(struct kvm * host_kvm,struct pkvm_hyp_vm * hyp_vm,int * last_ran,unsigned int nr_vcpus)600 static void init_pkvm_hyp_vm(struct kvm *host_kvm, struct pkvm_hyp_vm *hyp_vm,
601 int *last_ran, unsigned int nr_vcpus)
602 {
603 u64 pvmfw_load_addr = PVMFW_INVALID_LOAD_ADDR;
604
605 hyp_vm->host_kvm = host_kvm;
606 hyp_vm->kvm.created_vcpus = nr_vcpus;
607 hyp_vm->kvm.arch.mmu.vtcr = host_mmu.arch.mmu.vtcr;
608 hyp_vm->kvm.arch.pkvm.enabled = READ_ONCE(host_kvm->arch.pkvm.enabled);
609 hyp_vm->kvm.arch.flags = 0;
610
611 if (hyp_vm->kvm.arch.pkvm.enabled)
612 pvmfw_load_addr = READ_ONCE(host_kvm->arch.pkvm.pvmfw_load_addr);
613 hyp_vm->kvm.arch.pkvm.pvmfw_load_addr = pvmfw_load_addr;
614
615 hyp_vm->kvm.arch.pkvm.ffa_support = READ_ONCE(host_kvm->arch.pkvm.ffa_support);
616 hyp_vm->kvm.arch.pkvm.smc_forwarded = READ_ONCE(host_kvm->arch.pkvm.smc_forwarded);
617 hyp_vm->kvm.arch.mmu.last_vcpu_ran = (int __percpu *)last_ran;
618 memset(last_ran, -1, pkvm_get_last_ran_size());
619 pkvm_init_features_from_host(hyp_vm, host_kvm);
620 hyp_spin_lock_init(&hyp_vm->vcpus_lock);
621 INIT_LIST_HEAD(&hyp_vm->ffa_buf.xfer_list);
622 }
623
pkvm_vcpu_init_sve(struct pkvm_hyp_vcpu * hyp_vcpu,struct kvm_vcpu * host_vcpu)624 static int pkvm_vcpu_init_sve(struct pkvm_hyp_vcpu *hyp_vcpu, struct kvm_vcpu *host_vcpu)
625 {
626 struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu;
627 unsigned int sve_max_vl;
628 size_t sve_state_size;
629 void *sve_state;
630
631 if (!vcpu_has_feature(vcpu, KVM_ARM_VCPU_SVE))
632 return 0;
633
634 /* Limit guest vector length to the maximum supported by the host. */
635 sve_max_vl = min(READ_ONCE(host_vcpu->arch.sve_max_vl), kvm_host_sve_max_vl);
636 sve_state_size = sve_state_size(sve_max_vl);
637 sve_state = kern_hyp_va(READ_ONCE(host_vcpu->arch.sve_state));
638
639 if (!sve_state && !pkvm_hyp_vcpu_is_protected(hyp_vcpu))
640 return -EINVAL;
641
642 if (!sve_state_size || (sve_max_vl > kvm_sve_max_vl))
643 return -EINVAL;
644
645 if (pkvm_hyp_vcpu_is_protected(hyp_vcpu)) {
646 struct pkvm_hyp_vm *hyp_vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
647
648 sve_state = hyp_alloc_account(sve_state_size,
649 hyp_vm->host_kvm);
650 if (!sve_state)
651 return hyp_alloc_errno();
652 } else {
653 int ret;
654
655 ret = hyp_pin_shared_mem(sve_state, sve_state + sve_state_size);
656 if (ret)
657 return ret;
658 }
659
660 vcpu->arch.sve_state = sve_state;
661 vcpu->arch.sve_max_vl = sve_max_vl;
662 vcpu_set_flag(vcpu, VCPU_SVE_FINALIZED);
663
664 return 0;
665 }
666
init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu * hyp_vcpu,struct pkvm_hyp_vm * hyp_vm,struct kvm_vcpu * host_vcpu)667 static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu,
668 struct pkvm_hyp_vm *hyp_vm,
669 struct kvm_vcpu *host_vcpu)
670 {
671 int ret = 0;
672 u32 mp_state;
673 struct kvm_hyp_req *hyp_reqs;
674
675 if (hyp_pin_shared_mem(host_vcpu, host_vcpu + 1))
676 return -EBUSY;
677
678 hyp_reqs = READ_ONCE(host_vcpu->arch.hyp_reqs);
679 if (!PAGE_ALIGNED(hyp_reqs)) {
680 hyp_unpin_shared_mem(host_vcpu, host_vcpu + 1);
681 return -EINVAL;
682 }
683
684 hyp_vcpu->vcpu.arch.hyp_reqs = kern_hyp_va(hyp_reqs);
685 if (hyp_pin_shared_mem(hyp_vcpu->vcpu.arch.hyp_reqs,
686 hyp_vcpu->vcpu.arch.hyp_reqs + 1)) {
687 hyp_unpin_shared_mem(host_vcpu, host_vcpu + 1);
688 return -EBUSY;
689 }
690
691 mp_state = READ_ONCE(host_vcpu->arch.mp_state.mp_state);
692 if (mp_state != KVM_MP_STATE_RUNNABLE && mp_state != KVM_MP_STATE_STOPPED) {
693 ret = -EINVAL;
694 goto done;
695 }
696
697 hyp_vcpu->host_vcpu = host_vcpu;
698
699 hyp_vcpu->vcpu.kvm = &hyp_vm->kvm;
700 hyp_vcpu->vcpu.vcpu_id = READ_ONCE(host_vcpu->vcpu_id);
701 hyp_vcpu->vcpu.vcpu_idx = READ_ONCE(host_vcpu->vcpu_idx);
702
703 hyp_vcpu->vcpu.arch.hw_mmu = &hyp_vm->kvm.arch.mmu;
704 hyp_vcpu->vcpu.arch.cflags = READ_ONCE(host_vcpu->arch.cflags);
705 hyp_vcpu->vcpu.arch.debug_ptr = &host_vcpu->arch.vcpu_debug_state;
706 hyp_vcpu->vcpu.arch.hyp_reqs->type = KVM_HYP_LAST_REQ;
707
708 if (pkvm_hyp_vcpu_is_protected(hyp_vcpu)) {
709 kvm_init_pvm_id_regs(&hyp_vcpu->vcpu);
710 kvm_reset_pvm_sys_regs(&hyp_vcpu->vcpu);
711 }
712
713 ret = pkvm_vcpu_init_traps(hyp_vcpu);
714 if (ret)
715 goto done;
716
717 ret = pkvm_vcpu_init_sve(hyp_vcpu, host_vcpu);
718 if (ret)
719 goto done;
720
721 ret = pkvm_vcpu_init_psci(hyp_vcpu, mp_state);
722 if (ret)
723 goto done;
724 done:
725 if (ret)
726 unpin_host_vcpu(hyp_vcpu);
727 return ret;
728 }
729
find_free_vm_table_entry(struct kvm * host_kvm)730 static int find_free_vm_table_entry(struct kvm *host_kvm)
731 {
732 int i;
733
734 for (i = 0; i < KVM_MAX_PVMS; ++i) {
735 if (!vm_table[i])
736 return i;
737 }
738
739 return -ENOMEM;
740 }
741
742 /*
743 * Allocate a VM table entry and insert a pointer to the new vm.
744 *
745 * Return a unique handle to the protected VM on success,
746 * negative error code on failure.
747 */
insert_vm_table_entry(struct kvm * host_kvm,struct pkvm_hyp_vm * hyp_vm)748 static pkvm_handle_t insert_vm_table_entry(struct kvm *host_kvm,
749 struct pkvm_hyp_vm *hyp_vm)
750 {
751 struct kvm_s2_mmu *mmu = &hyp_vm->kvm.arch.mmu;
752 int idx;
753
754 hyp_assert_write_lock_held(&vm_table_lock);
755
756 /*
757 * Initializing protected state might have failed, yet a malicious
758 * host could trigger this function. Thus, ensure that 'vm_table'
759 * exists.
760 */
761 if (unlikely(!vm_table))
762 return -EINVAL;
763
764 idx = find_free_vm_table_entry(host_kvm);
765 if (idx < 0)
766 return idx;
767
768 hyp_vm->kvm.arch.pkvm.handle = idx_to_vm_handle(idx);
769
770 /* VMID 0 is reserved for the host */
771 atomic64_set(&mmu->vmid.id, idx + 1);
772
773 mmu->arch = &hyp_vm->kvm.arch;
774 mmu->pgt = &hyp_vm->pgt;
775
776 vm_table[idx] = hyp_vm;
777 list_add(&hyp_vm->vm_list, &running_vms);
778 return hyp_vm->kvm.arch.pkvm.handle;
779 }
780
781 /*
782 * Deallocate and remove the VM table entry corresponding to the handle.
783 */
remove_vm_table_entry(pkvm_handle_t handle)784 static void remove_vm_table_entry(pkvm_handle_t handle)
785 {
786 struct pkvm_hyp_vm *hyp_vm;
787
788 hyp_assert_write_lock_held(&vm_table_lock);
789 hyp_vm = vm_table[vm_handle_to_idx(handle)];
790
791 /*
792 * If we didn't send the destruction message leak the vmid to
793 * prevent others from using it.
794 */
795 if (hyp_vm->kvm.arch.pkvm.ffa_support &&
796 hyp_vm->ffa_buf.vm_avail_bitmap) {
797 vm_table[vm_handle_to_idx(handle)] = (void *)0xdeadbeef;
798 list_del(&hyp_vm->vm_list);
799 return;
800 }
801
802 vm_table[vm_handle_to_idx(handle)] = NULL;
803 list_del(&hyp_vm->vm_list);
804 }
805
pkvm_get_hyp_vm_size(unsigned int nr_vcpus)806 static size_t pkvm_get_hyp_vm_size(unsigned int nr_vcpus)
807 {
808 return size_add(sizeof(struct pkvm_hyp_vm),
809 size_mul(sizeof(struct pkvm_hyp_vcpu *), nr_vcpus));
810 }
811
812 /*
813 * Initialize the hypervisor copy of the protected VM state using the
814 * memory donated by the host.
815 *
816 * Unmaps the donated memory from the host at stage 2.
817 *
818 * host_kvm: A pointer to the host's struct kvm.
819 * pgd_hva: The host va of the area being donated for the stage-2 PGD for
820 * the VM. Must be page aligned. Its size is implied by the VM's
821 * VTCR.
822 * Return a unique handle to the protected VM on success,
823 * negative error code on failure.
824 */
__pkvm_init_vm(struct kvm * host_kvm,unsigned long pgd_hva)825 int __pkvm_init_vm(struct kvm *host_kvm, unsigned long pgd_hva)
826 {
827 struct pkvm_hyp_vm *hyp_vm = NULL;
828 int *last_ran = NULL;
829 unsigned int nr_vcpus;
830 void *pgd = NULL;
831 size_t pgd_size;
832 int ret;
833
834 ret = hyp_pin_shared_mem(host_kvm, host_kvm + 1);
835 if (ret)
836 return ret;
837
838 nr_vcpus = READ_ONCE(host_kvm->created_vcpus);
839 if (nr_vcpus < 1) {
840 ret = -EINVAL;
841 goto err_unpin_kvm;
842 }
843
844 hyp_vm = hyp_alloc_account(pkvm_get_hyp_vm_size(nr_vcpus),
845 host_kvm);
846 if (!hyp_vm) {
847 ret = hyp_alloc_errno();
848 goto err_unpin_kvm;
849 }
850
851 last_ran = hyp_alloc_account(pkvm_get_last_ran_size(), host_kvm);
852 if (!last_ran) {
853 ret = hyp_alloc_errno();
854 goto err_free_vm;
855 }
856
857 ret = -EINVAL;
858
859 pgd_size = kvm_pgtable_stage2_pgd_size(host_mmu.arch.mmu.vtcr);
860 if (!IS_ALIGNED(pgd_hva, pgd_size))
861 goto err_free_last_ran;
862 pgd = map_donated_memory_noclear(pgd_hva, pgd_size);
863 if (!pgd)
864 goto err_free_last_ran;
865
866 init_pkvm_hyp_vm(host_kvm, hyp_vm, last_ran, nr_vcpus);
867
868 hyp_write_lock(&vm_table_lock);
869 ret = insert_vm_table_entry(host_kvm, hyp_vm);
870 if (ret < 0)
871 goto err_unlock;
872
873 ret = kvm_guest_prepare_stage2(hyp_vm, pgd);
874 if (ret)
875 goto err_remove_vm_table_entry;
876
877 ret = pkvm_pviommu_finalise(hyp_vm);
878 if (ret)
879 goto err_remove_vm_table_entry;
880
881 hyp_write_unlock(&vm_table_lock);
882
883 return hyp_vm->kvm.arch.pkvm.handle;
884
885 err_remove_vm_table_entry:
886 remove_vm_table_entry(hyp_vm->kvm.arch.pkvm.handle);
887 err_unlock:
888 hyp_write_unlock(&vm_table_lock);
889 unmap_donated_memory(pgd, pgd_size);
890 err_free_last_ran:
891 hyp_free_account(last_ran, host_kvm);
892 err_free_vm:
893 hyp_free_account(hyp_vm, host_kvm);
894 err_unpin_kvm:
895 hyp_unpin_shared_mem(host_kvm, host_kvm + 1);
896 return ret;
897 }
898
__pkvm_get_vm_ffa_transfer(u16 handle)899 struct ffa_mem_transfer *__pkvm_get_vm_ffa_transfer(u16 handle)
900 {
901 struct pkvm_hyp_vm *vm;
902 struct ffa_mem_transfer *transfer = NULL;
903
904 hyp_read_lock(&vm_table_lock);
905 list_for_each_entry(vm, &running_vms, vm_list) {
906 transfer = find_transfer_by_handle(handle, &vm->ffa_buf);
907 if (transfer)
908 goto unlock;
909 }
910 unlock:
911 hyp_read_unlock(&vm_table_lock);
912 return transfer;
913 }
914
915 /*
916 * Initialize the hypervisor copy of the protected vCPU state using the
917 * memory donated by the host.
918 *
919 * handle: The handle for the protected vm.
920 * host_vcpu: A pointer to the corresponding host vcpu.
921 *
922 * Return 0 on success, negative error code on failure.
923 */
__pkvm_init_vcpu(pkvm_handle_t handle,struct kvm_vcpu * host_vcpu)924 int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu)
925 {
926 struct pkvm_hyp_vcpu *hyp_vcpu;
927 struct pkvm_hyp_vm *hyp_vm;
928 unsigned int idx;
929 int ret;
930
931 hyp_read_lock(&vm_table_lock);
932
933 hyp_vm = get_vm_by_handle(handle);
934 if (!hyp_vm) {
935 ret = -ENOENT;
936 goto unlock_vm;
937 }
938
939 hyp_vcpu = hyp_alloc_account(sizeof(*hyp_vcpu), hyp_vm->host_kvm);
940 if (!hyp_vcpu) {
941 ret = hyp_alloc_errno();
942 goto unlock_vm;
943 }
944
945 hyp_spin_lock(&hyp_vm->vcpus_lock);
946 ret = init_pkvm_hyp_vcpu(hyp_vcpu, hyp_vm, host_vcpu);
947 if (ret)
948 goto unlock_vcpus;
949
950 idx = hyp_vcpu->vcpu.vcpu_idx;
951 if (idx >= hyp_vm->kvm.created_vcpus) {
952 ret = -EINVAL;
953 goto unlock_vcpus;
954 }
955
956 if (hyp_vm->vcpus[idx]) {
957 ret = -EINVAL;
958 goto unlock_vcpus;
959 }
960
961 /*
962 * Ensure the hyp_vcpu is initialised before publishing it to
963 * the vCPU-load path via 'hyp_vm->vcpus[]'.
964 */
965 smp_store_release(&hyp_vm->vcpus[idx], hyp_vcpu);
966
967 unlock_vcpus:
968 hyp_spin_unlock(&hyp_vm->vcpus_lock);
969
970 if (ret)
971 hyp_free_account(hyp_vcpu, hyp_vm->host_kvm);
972
973 unlock_vm:
974 hyp_read_unlock(&vm_table_lock);
975
976 return ret;
977 }
978
__pkvm_start_teardown_vm(pkvm_handle_t handle)979 int __pkvm_start_teardown_vm(pkvm_handle_t handle)
980 {
981 struct pkvm_hyp_vm *hyp_vm;
982 int ret = 0;
983
984 hyp_write_lock(&vm_table_lock);
985 hyp_vm = get_vm_by_handle(handle);
986 if (!hyp_vm) {
987 ret = -ENOENT;
988 goto unlock;
989 } else if (WARN_ON(hyp_refcount_get(hyp_vm->refcount))) {
990 ret = -EBUSY;
991 goto unlock;
992 } else if (hyp_vm->is_dying) {
993 ret = -EINVAL;
994 goto unlock;
995 }
996
997 hyp_vm->is_dying = true;
998
999 unlock:
1000 hyp_write_unlock(&vm_table_lock);
1001
1002 return ret;
1003 }
1004
__pkvm_finalize_teardown_vm(pkvm_handle_t handle)1005 int __pkvm_finalize_teardown_vm(pkvm_handle_t handle)
1006 {
1007 struct kvm_hyp_memcache *mc;
1008 struct pkvm_hyp_vm *hyp_vm;
1009 struct kvm *host_kvm;
1010 unsigned int idx;
1011 int err;
1012
1013 hyp_write_lock(&vm_table_lock);
1014 hyp_vm = get_vm_by_handle(handle);
1015 if (!hyp_vm) {
1016 err = -ENOENT;
1017 goto err_unlock;
1018 } else if (!hyp_vm->is_dying) {
1019 err = -EBUSY;
1020 goto err_unlock;
1021 }
1022
1023 host_kvm = hyp_vm->host_kvm;
1024
1025 /* Ensure the VMID is clean before it can be reallocated */
1026 __kvm_tlb_flush_vmid(&hyp_vm->kvm.arch.mmu);
1027 remove_vm_table_entry(handle);
1028 hyp_write_unlock(&vm_table_lock);
1029
1030 /* A well-behaved host will have reclaimed all FF-A resources already */
1031 do {
1032 err = kvm_dying_guest_reclaim_ffa_resources(hyp_vm);
1033 } while (err == -EAGAIN);
1034 WARN_ON(err);
1035
1036 pkvm_devices_teardown(hyp_vm);
1037
1038 pkvm_pviommu_teardown(hyp_vm);
1039
1040 /*
1041 * At this point all page tables are destroyed and should be pushed to the pool
1042 * the only place that might still have memory is the mc, which would be drained
1043 * from host as it hasn't been donated yet.
1044 */
1045 drain_hyp_pool(&hyp_vm->iommu_pool, &host_kvm->arch.pkvm.teardown_iommu_mc);
1046
1047 /*
1048 * At this point, the VM has been detached from the VM table and
1049 * has a refcount of 0 so we're free to tear it down without
1050 * worrying about anybody else.
1051 */
1052
1053 mc = &host_kvm->arch.pkvm.stage2_teardown_mc;
1054 destroy_hyp_vm_pgt(hyp_vm);
1055 drain_hyp_pool(&hyp_vm->pool, mc);
1056 unpin_host_vcpus(hyp_vm->vcpus, hyp_vm->kvm.created_vcpus);
1057
1058 /* Push the metadata pages to the teardown memcache */
1059 for (idx = 0; idx < hyp_vm->kvm.created_vcpus; ++idx) {
1060 struct pkvm_hyp_vcpu *hyp_vcpu = hyp_vm->vcpus[idx];
1061 struct kvm_hyp_memcache *vcpu_mc;
1062 void *addr;
1063
1064 if (!hyp_vcpu)
1065 continue;
1066
1067 vcpu_mc = &hyp_vcpu->vcpu.arch.stage2_mc;
1068 while (vcpu_mc->nr_pages) {
1069 unsigned long order;
1070
1071 addr = pop_hyp_memcache(vcpu_mc, hyp_phys_to_virt, &order);
1072 /* We don't expect vcpu to have higher order pages. */
1073 WARN_ON(order);
1074 push_hyp_memcache(mc, addr, hyp_virt_to_phys, order);
1075 unmap_donated_memory_noclear(addr, PAGE_SIZE);
1076 }
1077
1078 if (pkvm_hyp_vcpu_is_protected(hyp_vcpu))
1079 teardown_sve_state(hyp_vcpu);
1080
1081 hyp_free_account(hyp_vcpu, host_kvm);
1082 }
1083
1084 hyp_free_account((__force void *)hyp_vm->kvm.arch.mmu.last_vcpu_ran,
1085 host_kvm);
1086 hyp_free_account(hyp_vm, host_kvm);
1087 hyp_unpin_shared_mem(host_kvm, host_kvm + 1);
1088 return 0;
1089
1090 err_unlock:
1091 hyp_write_unlock(&vm_table_lock);
1092 return err;
1093 }
1094
pkvm_load_pvmfw_pages(struct pkvm_hyp_vm * vm,u64 ipa,phys_addr_t phys,u64 size)1095 int pkvm_load_pvmfw_pages(struct pkvm_hyp_vm *vm, u64 ipa, phys_addr_t phys,
1096 u64 size)
1097 {
1098 struct kvm_protected_vm *pkvm = &vm->kvm.arch.pkvm;
1099 u64 npages, offset = ipa - pkvm->pvmfw_load_addr;
1100 void *src = hyp_phys_to_virt(pvmfw_base) + offset;
1101
1102 if (offset >= pvmfw_size)
1103 return -EINVAL;
1104
1105 size = min(size, pvmfw_size - offset);
1106 if (!PAGE_ALIGNED(size) || !PAGE_ALIGNED(src))
1107 return -EINVAL;
1108
1109 npages = size >> PAGE_SHIFT;
1110 while (npages--) {
1111 /*
1112 * No need for cache maintenance here, as the pgtable code will
1113 * take care of this when installing the pte in the guest's
1114 * stage-2 page table.
1115 */
1116 memcpy(hyp_fixmap_map(phys), src, PAGE_SIZE);
1117 hyp_fixmap_unmap();
1118
1119 src += PAGE_SIZE;
1120 phys += PAGE_SIZE;
1121 }
1122
1123 return 0;
1124 }
1125
pkvm_poison_pvmfw_pages(void)1126 void pkvm_poison_pvmfw_pages(void)
1127 {
1128 u64 npages = pvmfw_size >> PAGE_SHIFT;
1129 phys_addr_t addr = pvmfw_base;
1130
1131 while (npages--) {
1132 hyp_poison_page(addr, PAGE_SIZE);
1133 addr += PAGE_SIZE;
1134 }
1135 }
1136
1137 /*
1138 * This function sets the registers on the vcpu to their architecturally defined
1139 * reset values.
1140 *
1141 * Note: Can only be called by the vcpu on itself, after it has been turned on.
1142 */
pkvm_reset_vcpu(struct pkvm_hyp_vcpu * hyp_vcpu)1143 void pkvm_reset_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
1144 {
1145 struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu;
1146 struct vcpu_reset_state *reset_state = &vcpu->arch.reset_state;
1147 struct pkvm_hyp_vm *hyp_vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
1148
1149 WARN_ON(!reset_state->reset);
1150
1151 kvm_reset_vcpu_core(vcpu);
1152 kvm_reset_pvm_sys_regs(vcpu);
1153
1154 /* Must be done after reseting sys registers. */
1155 kvm_reset_vcpu_psci(vcpu, reset_state);
1156 if (hyp_vm->pvmfw_entry_vcpu == hyp_vcpu) {
1157 struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
1158 u64 entry = hyp_vm->kvm.arch.pkvm.pvmfw_load_addr;
1159 int i;
1160
1161 /* X0 - X14 provided by the VMM (preserved) */
1162 for (i = 0; i <= 14; ++i) {
1163 u64 val = vcpu_get_reg(host_vcpu, i);
1164
1165 vcpu_set_reg(&hyp_vcpu->vcpu, i, val);
1166 }
1167
1168 /* X15: Boot protocol version */
1169 vcpu_set_reg(&hyp_vcpu->vcpu, 15, 0);
1170
1171 /* PC: IPA of pvmfw base */
1172 *vcpu_pc(&hyp_vcpu->vcpu) = entry;
1173 hyp_vm->pvmfw_entry_vcpu = NULL;
1174
1175 /* Auto enroll MMIO guard */
1176 set_bit(KVM_ARCH_FLAG_MMIO_GUARD, &hyp_vm->kvm.arch.flags);
1177 }
1178
1179 if (pkvm_hyp_vcpu_is_protected(hyp_vcpu) && vcpu_has_sve(vcpu))
1180 memset(vcpu->arch.sve_state, 0, vcpu_sve_state_size(vcpu));
1181
1182 reset_state->reset = false;
1183
1184 hyp_vcpu->exit_code = 0;
1185
1186 WARN_ON(hyp_vcpu->power_state != PSCI_0_2_AFFINITY_LEVEL_ON_PENDING);
1187 WRITE_ONCE(hyp_vcpu->power_state, PSCI_0_2_AFFINITY_LEVEL_ON);
1188 }
1189
pkvm_hyp_req_reserve(struct pkvm_hyp_vcpu * hyp_vcpu,u8 type)1190 struct kvm_hyp_req *pkvm_hyp_req_reserve(struct pkvm_hyp_vcpu *hyp_vcpu, u8 type)
1191 {
1192 struct kvm_hyp_req *next, *hyp_req = hyp_vcpu->vcpu.arch.hyp_reqs;
1193 int i;
1194
1195 for (i = 0; i < KVM_HYP_REQ_MAX; i++) {
1196 if (hyp_req->type == KVM_HYP_LAST_REQ)
1197 break;
1198 hyp_req++;
1199 }
1200
1201 /* The last entry of the page _must_ be a LAST_REQ */
1202 WARN_ON(i >= KVM_HYP_REQ_MAX);
1203
1204 /* We need at least one empty slot to write LAST_REQ */
1205 if (i + 1 >= KVM_HYP_REQ_MAX)
1206 return NULL;
1207
1208 hyp_req->type = type;
1209
1210 next = hyp_req + 1;
1211 next->type = KVM_HYP_LAST_REQ;
1212
1213 return hyp_req;
1214 }
1215
pkvm_mpidr_to_hyp_vcpu(struct pkvm_hyp_vm * hyp_vm,u64 mpidr)1216 struct pkvm_hyp_vcpu *pkvm_mpidr_to_hyp_vcpu(struct pkvm_hyp_vm *hyp_vm,
1217 u64 mpidr)
1218 {
1219 struct pkvm_hyp_vcpu *hyp_vcpu;
1220 int i;
1221
1222 mpidr &= MPIDR_HWID_BITMASK;
1223
1224 hyp_spin_lock(&hyp_vm->vcpus_lock);
1225 for (i = 0; i < hyp_vm->kvm.created_vcpus; i++) {
1226 hyp_vcpu = hyp_vm->vcpus[i];
1227 if (!hyp_vcpu)
1228 continue;
1229
1230 if (mpidr == kvm_vcpu_get_mpidr_aff(&hyp_vcpu->vcpu))
1231 goto unlock;
1232 }
1233 hyp_vcpu = NULL;
1234 unlock:
1235 hyp_spin_unlock(&hyp_vm->vcpus_lock);
1236 return hyp_vcpu;
1237 }
1238
1239 /*
1240 * Returns true if the hypervisor has handled the PSCI call, and control should
1241 * go back to the guest, or false if the host needs to do some additional work
1242 * (i.e., wake up the vcpu).
1243 */
pvm_psci_vcpu_on(struct pkvm_hyp_vcpu * hyp_vcpu)1244 static bool pvm_psci_vcpu_on(struct pkvm_hyp_vcpu *hyp_vcpu)
1245 {
1246 struct pkvm_hyp_vm *hyp_vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
1247 struct vcpu_reset_state *reset_state;
1248 struct pkvm_hyp_vcpu *target;
1249 unsigned long cpu_id, ret;
1250 int power_state;
1251
1252 cpu_id = smccc_get_arg1(&hyp_vcpu->vcpu);
1253 if (!kvm_psci_valid_affinity(&hyp_vcpu->vcpu, cpu_id)) {
1254 ret = PSCI_RET_INVALID_PARAMS;
1255 goto error;
1256 }
1257
1258 target = pkvm_mpidr_to_hyp_vcpu(hyp_vm, cpu_id);
1259 if (!target) {
1260 ret = PSCI_RET_INVALID_PARAMS;
1261 goto error;
1262 }
1263
1264 /*
1265 * Make sure the requested vcpu is not on to begin with.
1266 * Atomic to avoid race between vcpus trying to power on the same vcpu.
1267 */
1268 power_state = cmpxchg(&target->power_state,
1269 PSCI_0_2_AFFINITY_LEVEL_OFF,
1270 PSCI_0_2_AFFINITY_LEVEL_ON_PENDING);
1271 switch (power_state) {
1272 case PSCI_0_2_AFFINITY_LEVEL_ON_PENDING:
1273 ret = PSCI_RET_ON_PENDING;
1274 goto error;
1275 case PSCI_0_2_AFFINITY_LEVEL_ON:
1276 ret = PSCI_RET_ALREADY_ON;
1277 goto error;
1278 case PSCI_0_2_AFFINITY_LEVEL_OFF:
1279 break;
1280 default:
1281 ret = PSCI_RET_INTERNAL_FAILURE;
1282 goto error;
1283 }
1284
1285 reset_state = &target->vcpu.arch.reset_state;
1286 reset_state->pc = smccc_get_arg2(&hyp_vcpu->vcpu);
1287 reset_state->r0 = smccc_get_arg3(&hyp_vcpu->vcpu);
1288 /* Propagate caller endianness */
1289 reset_state->be = kvm_vcpu_is_be(&hyp_vcpu->vcpu);
1290 reset_state->reset = true;
1291
1292 /*
1293 * Return to the host, which should make the KVM_REQ_VCPU_RESET request
1294 * as well as kvm_vcpu_wake_up() to schedule the vcpu.
1295 */
1296 return false;
1297
1298 error:
1299 /* If there's an error go back straight to the guest. */
1300 smccc_set_retval(&hyp_vcpu->vcpu, ret, 0, 0, 0);
1301 return true;
1302 }
1303
pvm_psci_vcpu_affinity_info(struct pkvm_hyp_vcpu * hyp_vcpu)1304 static bool pvm_psci_vcpu_affinity_info(struct pkvm_hyp_vcpu *hyp_vcpu)
1305 {
1306 unsigned long target_affinity_mask, target_affinity, lowest_affinity_level;
1307 struct pkvm_hyp_vm *hyp_vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
1308 struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu;
1309 unsigned long mpidr, ret;
1310 int i, matching_cpus = 0;
1311
1312 target_affinity = smccc_get_arg1(vcpu);
1313 lowest_affinity_level = smccc_get_arg2(vcpu);
1314 if (!kvm_psci_valid_affinity(vcpu, target_affinity)) {
1315 ret = PSCI_RET_INVALID_PARAMS;
1316 goto done;
1317 }
1318
1319 /* Determine target affinity mask */
1320 target_affinity_mask = psci_affinity_mask(lowest_affinity_level);
1321 if (!target_affinity_mask) {
1322 ret = PSCI_RET_INVALID_PARAMS;
1323 goto done;
1324 }
1325
1326 /* Ignore other bits of target affinity */
1327 target_affinity &= target_affinity_mask;
1328 ret = PSCI_0_2_AFFINITY_LEVEL_OFF;
1329
1330 /*
1331 * If at least one vcpu matching target affinity is ON then return ON,
1332 * then if at least one is PENDING_ON then return PENDING_ON.
1333 * Otherwise, return OFF.
1334 */
1335 hyp_spin_lock(&hyp_vm->vcpus_lock);
1336 for (i = 0; i < hyp_vm->kvm.created_vcpus; i++) {
1337 struct pkvm_hyp_vcpu *target = hyp_vm->vcpus[i];
1338
1339 if (!target)
1340 continue;
1341
1342 mpidr = kvm_vcpu_get_mpidr_aff(&target->vcpu);
1343
1344 if ((mpidr & target_affinity_mask) == target_affinity) {
1345 int power_state;
1346
1347 matching_cpus++;
1348 power_state = READ_ONCE(target->power_state);
1349 switch (power_state) {
1350 case PSCI_0_2_AFFINITY_LEVEL_ON_PENDING:
1351 ret = PSCI_0_2_AFFINITY_LEVEL_ON_PENDING;
1352 break;
1353 case PSCI_0_2_AFFINITY_LEVEL_ON:
1354 ret = PSCI_0_2_AFFINITY_LEVEL_ON;
1355 goto unlock;
1356 case PSCI_0_2_AFFINITY_LEVEL_OFF:
1357 break;
1358 default:
1359 ret = PSCI_RET_INTERNAL_FAILURE;
1360 goto unlock;
1361 }
1362 }
1363 }
1364
1365 if (!matching_cpus)
1366 ret = PSCI_RET_INVALID_PARAMS;
1367 unlock:
1368 hyp_spin_unlock(&hyp_vm->vcpus_lock);
1369 done:
1370 /* Nothing to be handled by the host. Go back to the guest. */
1371 smccc_set_retval(vcpu, ret, 0, 0, 0);
1372 return true;
1373 }
1374
1375 /*
1376 * Returns true if the hypervisor has handled the PSCI call, and control should
1377 * go back to the guest, or false if the host needs to do some additional work
1378 * (e.g., turn off and update vcpu scheduling status).
1379 */
pvm_psci_vcpu_off(struct pkvm_hyp_vcpu * hyp_vcpu)1380 static bool pvm_psci_vcpu_off(struct pkvm_hyp_vcpu *hyp_vcpu)
1381 {
1382 WARN_ON(hyp_vcpu->power_state != PSCI_0_2_AFFINITY_LEVEL_ON);
1383 WRITE_ONCE(hyp_vcpu->power_state, PSCI_0_2_AFFINITY_LEVEL_OFF);
1384
1385 /* Return to the host so that it can finish powering off the vcpu. */
1386 return false;
1387 }
1388
pvm_psci_version(struct pkvm_hyp_vcpu * hyp_vcpu)1389 static bool pvm_psci_version(struct pkvm_hyp_vcpu *hyp_vcpu)
1390 {
1391 /* Nothing to be handled by the host. Go back to the guest. */
1392 smccc_set_retval(&hyp_vcpu->vcpu, KVM_ARM_PSCI_1_1, 0, 0, 0);
1393 return true;
1394 }
1395
pvm_psci_not_supported(struct pkvm_hyp_vcpu * hyp_vcpu)1396 static bool pvm_psci_not_supported(struct pkvm_hyp_vcpu *hyp_vcpu)
1397 {
1398 /* Nothing to be handled by the host. Go back to the guest. */
1399 smccc_set_retval(&hyp_vcpu->vcpu, PSCI_RET_NOT_SUPPORTED, 0, 0, 0);
1400 return true;
1401 }
1402
pvm_psci_features(struct pkvm_hyp_vcpu * hyp_vcpu)1403 static bool pvm_psci_features(struct pkvm_hyp_vcpu *hyp_vcpu)
1404 {
1405 struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu;
1406 u32 feature = smccc_get_arg1(vcpu);
1407 unsigned long val;
1408
1409 switch (feature) {
1410 case PSCI_0_2_FN_PSCI_VERSION:
1411 case PSCI_0_2_FN_CPU_SUSPEND:
1412 case PSCI_0_2_FN64_CPU_SUSPEND:
1413 case PSCI_0_2_FN_CPU_OFF:
1414 case PSCI_0_2_FN_CPU_ON:
1415 case PSCI_0_2_FN64_CPU_ON:
1416 case PSCI_0_2_FN_AFFINITY_INFO:
1417 case PSCI_0_2_FN64_AFFINITY_INFO:
1418 case PSCI_0_2_FN_SYSTEM_OFF:
1419 case PSCI_0_2_FN_SYSTEM_RESET:
1420 case PSCI_1_0_FN_PSCI_FEATURES:
1421 case PSCI_1_1_FN_SYSTEM_RESET2:
1422 case PSCI_1_1_FN64_SYSTEM_RESET2:
1423 case ARM_SMCCC_VERSION_FUNC_ID:
1424 val = PSCI_RET_SUCCESS;
1425 break;
1426 default:
1427 val = PSCI_RET_NOT_SUPPORTED;
1428 break;
1429 }
1430
1431 /* Nothing to be handled by the host. Go back to the guest. */
1432 smccc_set_retval(vcpu, val, 0, 0, 0);
1433 return true;
1434 }
1435
pkvm_handle_psci(struct pkvm_hyp_vcpu * hyp_vcpu)1436 static bool pkvm_handle_psci(struct pkvm_hyp_vcpu *hyp_vcpu)
1437 {
1438 struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu;
1439 u32 psci_fn = smccc_get_function(vcpu);
1440
1441 switch (psci_fn) {
1442 case PSCI_0_2_FN_CPU_ON:
1443 kvm_psci_narrow_to_32bit(vcpu);
1444 fallthrough;
1445 case PSCI_0_2_FN64_CPU_ON:
1446 return pvm_psci_vcpu_on(hyp_vcpu);
1447 case PSCI_0_2_FN_CPU_OFF:
1448 return pvm_psci_vcpu_off(hyp_vcpu);
1449 case PSCI_0_2_FN_AFFINITY_INFO:
1450 kvm_psci_narrow_to_32bit(vcpu);
1451 fallthrough;
1452 case PSCI_0_2_FN64_AFFINITY_INFO:
1453 return pvm_psci_vcpu_affinity_info(hyp_vcpu);
1454 case PSCI_0_2_FN_PSCI_VERSION:
1455 return pvm_psci_version(hyp_vcpu);
1456 case PSCI_1_0_FN_PSCI_FEATURES:
1457 return pvm_psci_features(hyp_vcpu);
1458 case PSCI_0_2_FN_SYSTEM_RESET:
1459 case PSCI_0_2_FN_CPU_SUSPEND:
1460 case PSCI_0_2_FN64_CPU_SUSPEND:
1461 case PSCI_0_2_FN_SYSTEM_OFF:
1462 case PSCI_1_1_FN_SYSTEM_RESET2:
1463 case PSCI_1_1_FN64_SYSTEM_RESET2:
1464 return false; /* Handled by the host. */
1465 default:
1466 break;
1467 }
1468
1469 return pvm_psci_not_supported(hyp_vcpu);
1470 }
1471
pkvm_handle_empty_memcache(struct pkvm_hyp_vcpu * hyp_vcpu,u64 * exit_code)1472 int pkvm_handle_empty_memcache(struct pkvm_hyp_vcpu *hyp_vcpu, u64 *exit_code)
1473 {
1474 struct kvm_hyp_req *req;
1475
1476 req = pkvm_hyp_req_reserve(hyp_vcpu, KVM_HYP_REQ_TYPE_MEM);
1477 if (!req)
1478 return -ENOMEM;
1479
1480 req->mem.dest = REQ_MEM_DEST_VCPU_MEMCACHE;
1481 req->mem.nr_pages = kvm_mmu_cache_min_pages(&hyp_vcpu->vcpu.kvm->arch.mmu);
1482
1483 write_sysreg_el2(read_sysreg_el2(SYS_ELR) - 4, SYS_ELR);
1484
1485 *exit_code = ARM_EXCEPTION_HYP_REQ;
1486
1487 return 0;
1488 }
1489
pkvm_memshare_call(struct pkvm_hyp_vcpu * hyp_vcpu,u64 * exit_code)1490 static bool pkvm_memshare_call(struct pkvm_hyp_vcpu *hyp_vcpu, u64 *exit_code)
1491 {
1492 struct pkvm_hyp_vm *hyp_vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
1493 struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu;
1494 u64 ipa = smccc_get_arg1(vcpu);
1495 u64 nr_pages = smccc_get_arg2(vcpu);
1496 u64 arg3 = smccc_get_arg3(vcpu);
1497 struct kvm_hyp_req *req;
1498 u64 nr_shared;
1499 int err;
1500
1501 /* Legacy guests have arg2 set to 0 */
1502 if (nr_pages == 0)
1503 nr_pages = 1;
1504
1505 if (arg3 || !PAGE_ALIGNED(ipa))
1506 goto out_guest_err;
1507
1508 err = __pkvm_guest_share_host(hyp_vcpu, ipa, nr_pages, &nr_shared);
1509 switch (err) {
1510 case 0:
1511 atomic64_add(nr_shared * PAGE_SIZE,
1512 &hyp_vm->host_kvm->stat.protected_shared_mem);
1513 smccc_set_retval(vcpu, SMCCC_RET_SUCCESS, nr_shared, 0, 0);
1514
1515 return true;
1516 case -EFAULT:
1517 req = pkvm_hyp_req_reserve(hyp_vcpu, KVM_HYP_REQ_TYPE_MAP);
1518 if (!req)
1519 goto out_guest_err;
1520
1521 req->map.guest_ipa = ipa;
1522 req->map.size = nr_pages << PAGE_SHIFT;
1523
1524 /*
1525 * We're about to go back to the host... let's not waste time
1526 * and check for the memcache while at it.
1527 */
1528 fallthrough;
1529 case -ENOMEM:
1530 if (pkvm_handle_empty_memcache(hyp_vcpu, exit_code))
1531 goto out_guest_err;
1532
1533 goto out_host;
1534 }
1535
1536 out_guest_err:
1537 smccc_set_retval(vcpu, SMCCC_RET_INVALID_PARAMETER, 0, 0, 0);
1538 return true;
1539
1540 out_host:
1541 return false;
1542 }
1543
pkvm_memunshare_call(struct pkvm_hyp_vcpu * hyp_vcpu)1544 static bool pkvm_memunshare_call(struct pkvm_hyp_vcpu *hyp_vcpu)
1545 {
1546 struct pkvm_hyp_vm *hyp_vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
1547 struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu;
1548 u64 ipa = smccc_get_arg1(vcpu);
1549 u64 nr_pages = smccc_get_arg2(vcpu);
1550 u64 arg3 = smccc_get_arg3(vcpu);
1551 u64 nr_unshared;
1552 int err;
1553
1554 /* Legacy guests have arg2 set to 0 */
1555 if (nr_pages == 0)
1556 nr_pages = 1;
1557
1558 if (arg3 || !PAGE_ALIGNED(ipa))
1559 goto out_guest_err;
1560
1561 err = __pkvm_guest_unshare_host(hyp_vcpu, ipa, nr_pages, &nr_unshared);
1562 if (err)
1563 goto out_guest_err;
1564
1565 atomic64_add(nr_unshared * PAGE_SIZE,
1566 &hyp_vm->host_kvm->stat.protected_shared_mem);
1567 smccc_set_retval(vcpu, SMCCC_RET_SUCCESS, nr_unshared, 0, 0);
1568 return true;
1569
1570 out_guest_err:
1571 smccc_set_retval(vcpu, SMCCC_RET_INVALID_PARAMETER, 0, 0, 0);
1572 return true;
1573 }
1574
pkvm_install_ioguard_page(struct pkvm_hyp_vcpu * hyp_vcpu,u64 * exit_code)1575 static bool pkvm_install_ioguard_page(struct pkvm_hyp_vcpu *hyp_vcpu,
1576 u64 *exit_code)
1577 {
1578 u64 ipa = smccc_get_arg1(&hyp_vcpu->vcpu);
1579 u64 nr_pages = smccc_get_arg2(&hyp_vcpu->vcpu);
1580 u32 fn = smccc_get_function(&hyp_vcpu->vcpu);
1581 u64 retval = SMCCC_RET_SUCCESS;
1582 u64 nr_guarded = 0;
1583 int ret = -EINVAL;
1584
1585 /* Legacy non-range version, arg2|arg3 might be garbage */
1586 if (fn == ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_MAP_FUNC_ID)
1587 nr_pages = 1;
1588 else if (smccc_get_arg3(&hyp_vcpu->vcpu))
1589 goto out_guest_err;
1590
1591 ret = __pkvm_install_ioguard_page(hyp_vcpu, ipa, nr_pages, &nr_guarded);
1592 if (ret == -ENOMEM && !pkvm_handle_empty_memcache(hyp_vcpu, exit_code))
1593 return false;
1594
1595 out_guest_err:
1596 if (ret)
1597 retval = SMCCC_RET_INVALID_PARAMETER;
1598
1599 smccc_set_retval(&hyp_vcpu->vcpu, retval, nr_guarded, 0, 0);
1600 return true;
1601 }
1602
pkvm_remove_ioguard_page(struct pkvm_hyp_vcpu * hyp_vcpu,u64 * exit_code)1603 static bool pkvm_remove_ioguard_page(struct pkvm_hyp_vcpu *hyp_vcpu,
1604 u64 *exit_code)
1605 {
1606 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
1607 u64 nr_pages = smccc_get_arg2(&hyp_vcpu->vcpu);
1608 u32 fn = smccc_get_function(&hyp_vcpu->vcpu);
1609 u64 retval = SMCCC_RET_INVALID_PARAMETER;
1610
1611 /* Legacy non-range version, arg2|arg3 might be garbage */
1612 if (fn == ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_UNMAP_FUNC_ID)
1613 nr_pages = 1;
1614 else if (smccc_get_arg3(&hyp_vcpu->vcpu))
1615 goto out_guest_err;
1616
1617 if (!test_bit(KVM_ARCH_FLAG_MMIO_GUARD, &vm->kvm.arch.flags))
1618 goto out_guest_err;
1619
1620 /*
1621 * Before 6.12 guests, unmap HVCs could be issued. However this operation
1622 * is not necessary:
1623 * - ioguard is only there to let the hypervisor know where are the
1624 * MMIO regions.
1625 * - MMIO_GUARD_MAP will not fail on multiple calls for the same
1626 * region.
1627 *
1628 * Keep the HVCs for compatibility reason, but do not do anything.
1629 */
1630 retval = SMCCC_RET_SUCCESS;
1631
1632 out_guest_err:
1633 smccc_set_retval(&hyp_vcpu->vcpu, retval, nr_pages, 0, 0);
1634 return true;
1635 }
1636
pkvm_meminfo_call(struct pkvm_hyp_vcpu * hyp_vcpu)1637 static bool pkvm_meminfo_call(struct pkvm_hyp_vcpu *hyp_vcpu)
1638 {
1639 struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu;
1640 u64 arg1 = smccc_get_arg1(vcpu);
1641 u64 arg2 = smccc_get_arg2(vcpu);
1642 u64 arg3 = smccc_get_arg3(vcpu);
1643
1644 if (arg1 || arg2 || arg3)
1645 goto out_guest_err;
1646
1647 smccc_set_retval(vcpu, PAGE_SIZE, KVM_FUNC_HAS_RANGE, 0, 0);
1648 return true;
1649
1650 out_guest_err:
1651 smccc_set_retval(vcpu, SMCCC_RET_INVALID_PARAMETER, 0, 0, 0);
1652 return true;
1653 }
1654
pkvm_memrelinquish_call(struct pkvm_hyp_vcpu * hyp_vcpu,u64 * exit_code)1655 static bool pkvm_memrelinquish_call(struct pkvm_hyp_vcpu *hyp_vcpu,
1656 u64 *exit_code)
1657 {
1658 struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu;
1659 u64 ipa = smccc_get_arg1(vcpu);
1660 u64 arg2 = smccc_get_arg2(vcpu);
1661 u64 arg3 = smccc_get_arg3(vcpu);
1662 u64 pa = 0;
1663 int ret;
1664
1665 if (arg2 || arg3)
1666 goto out_guest_err;
1667
1668 ret = __pkvm_guest_relinquish_to_host(hyp_vcpu, ipa, &pa);
1669 if (ret == -E2BIG) {
1670 struct kvm_hyp_req *req = pkvm_hyp_req_reserve(hyp_vcpu, KVM_HYP_REQ_TYPE_SPLIT);
1671
1672 if (!req) {
1673 ret = -ENOMEM;
1674 goto out_guest_err;
1675 }
1676
1677 req->split.guest_ipa = ALIGN_DOWN(ipa, PMD_SIZE);
1678 req->split.size = PMD_SIZE;
1679
1680 write_sysreg_el2(read_sysreg_el2(SYS_ELR) - 4, SYS_ELR);
1681 *exit_code = ARM_EXCEPTION_HYP_REQ;
1682
1683 return false;
1684 } else if (ret) {
1685 goto out_guest_err;
1686 }
1687
1688 if (pa != 0) {
1689 /* Now pass to host. */
1690 return false;
1691 }
1692
1693 /* This was a NOP as no page was actually mapped at the IPA. */
1694 smccc_set_retval(vcpu, 0, 0, 0, 0);
1695 return true;
1696
1697 out_guest_err:
1698 smccc_set_retval(vcpu, SMCCC_RET_INVALID_PARAMETER, 0, 0, 0);
1699 return true;
1700 }
1701
1702 bool smccc_trng_available;
1703
pkvm_forward_trng(struct kvm_vcpu * vcpu)1704 static bool pkvm_forward_trng(struct kvm_vcpu *vcpu)
1705 {
1706 u32 fn = smccc_get_function(vcpu);
1707 struct arm_smccc_res res;
1708 unsigned long arg1 = 0;
1709
1710 /*
1711 * Forward TRNG calls to EL3, as we can't trust the host to handle
1712 * these for us.
1713 */
1714 switch (fn) {
1715 case ARM_SMCCC_TRNG_FEATURES:
1716 case ARM_SMCCC_TRNG_RND32:
1717 case ARM_SMCCC_TRNG_RND64:
1718 arg1 = smccc_get_arg1(vcpu);
1719 fallthrough;
1720 case ARM_SMCCC_TRNG_VERSION:
1721 case ARM_SMCCC_TRNG_GET_UUID:
1722 arm_smccc_1_1_smc(fn, arg1, &res);
1723 smccc_set_retval(vcpu, res.a0, res.a1, res.a2, res.a3);
1724 memzero_explicit(&res, sizeof(res));
1725 break;
1726 }
1727
1728 return true;
1729 }
1730
1731 #define ARM_SMCCC_TRNG_VER_1_0 (1ULL << 16 | 0ULL)
1732 #define ARM_SMCCC_TRNG_INVALID_PARAMETERS ULL(-2)
1733 #define ARM_SMCCC_TRNG_SMC64_BITS 192
1734
module_handle_guest_trng_rng(struct kvm_vcpu * vcpu)1735 static bool module_handle_guest_trng_rng(struct kvm_vcpu *vcpu)
1736 {
1737 u64 ret;
1738 u64 entropy[DIV_ROUND_UP(ARM_SMCCC_TRNG_SMC64_BITS, 64)];
1739 u64 nbits;
1740
1741 nbits = smccc_get_arg1(vcpu);
1742 if (nbits == 0 || nbits > ARM_SMCCC_TRNG_SMC64_BITS) {
1743 ret = ARM_SMCCC_TRNG_INVALID_PARAMETERS;
1744 goto err;
1745 }
1746
1747 memset(entropy, 0, sizeof(entropy));
1748
1749 ret = module_get_guest_trng_rng(entropy, nbits);
1750 if (ret == SMCCC_RET_SUCCESS) {
1751 smccc_set_retval(vcpu, SMCCC_RET_SUCCESS, entropy[2],
1752 entropy[1], entropy[0]);
1753 return true;
1754 }
1755
1756 err:
1757 smccc_set_retval(vcpu, ret, 0, 0, 0);
1758 return true;
1759 }
1760
module_handle_guest_trng(struct kvm_vcpu * vcpu)1761 static bool module_handle_guest_trng(struct kvm_vcpu *vcpu)
1762 {
1763 u32 fn;
1764 u64 ret = SMCCC_RET_NOT_SUPPORTED;
1765 const uuid_t *uuid;
1766
1767 fn = smccc_get_function(vcpu);
1768 uuid = module_get_guest_trng_uuid();
1769 if (!uuid)
1770 return false;
1771
1772 switch (fn) {
1773 case ARM_SMCCC_TRNG_VERSION:
1774 ret = ARM_SMCCC_TRNG_VER_1_0;
1775 break;
1776 case ARM_SMCCC_TRNG_FEATURES:
1777 switch (smccc_get_arg1(vcpu)) {
1778 case ARM_SMCCC_TRNG_VERSION:
1779 case ARM_SMCCC_TRNG_FEATURES:
1780 case ARM_SMCCC_TRNG_GET_UUID:
1781 case ARM_SMCCC_TRNG_RND64:
1782 ret = SMCCC_RET_SUCCESS;
1783 break;
1784 }
1785 break;
1786 case ARM_SMCCC_TRNG_GET_UUID:
1787 smccc_set_retval(vcpu, le32_to_cpu(((u32 *)uuid->b)[0]),
1788 le32_to_cpu(((u32 *)uuid->b)[1]),
1789 le32_to_cpu(((u32 *)uuid->b)[2]),
1790 le32_to_cpu(((u32 *)uuid->b)[3]));
1791 return true;
1792 case ARM_SMCCC_TRNG_RND64:
1793 return module_handle_guest_trng_rng(vcpu);
1794 default:
1795 return false;
1796 }
1797
1798 smccc_set_retval(vcpu, ret, 0, 0, 0);
1799 return true;
1800 }
1801
1802
is_standard_secure_service_call(u64 func_id)1803 static bool is_standard_secure_service_call(u64 func_id)
1804 {
1805 return (func_id >= PSCI_0_2_FN_BASE && func_id <= ARM_CCA_FUNC_END) ||
1806 (func_id >= PSCI_0_2_FN64_BASE && func_id <= ARM_CCA_64BIT_FUNC_END);
1807 }
1808
kvm_handle_pvm_smc64(struct kvm_vcpu * vcpu,u64 * exit_code)1809 bool kvm_handle_pvm_smc64(struct kvm_vcpu *vcpu, u64 *exit_code)
1810 {
1811 bool handled = false;
1812 struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt;
1813 struct pkvm_hyp_vm *vm;
1814 struct pkvm_hyp_vcpu *hyp_vcpu;
1815 struct arm_smccc_1_2_regs regs;
1816 struct arm_smccc_1_2_regs res;
1817 DECLARE_REG(u64, func_id, ctxt, 0);
1818
1819 hyp_vcpu = container_of(vcpu, struct pkvm_hyp_vcpu, vcpu);
1820 vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
1821
1822 if (is_standard_secure_service_call(func_id))
1823 return false;
1824
1825 if (!vm->kvm.arch.pkvm.smc_forwarded)
1826 return false;
1827
1828 memcpy(®s, &ctxt->regs, sizeof(regs));
1829 handled = module_handle_guest_smc(®s, &res, vm->kvm.arch.pkvm.handle);
1830 if (handled)
1831 memcpy(&ctxt->regs.regs[0], &res, sizeof(res));
1832 else
1833 ctxt->regs.regs[0] = -1;
1834
1835 __kvm_skip_instr(vcpu);
1836
1837 return handled;
1838 }
1839
1840 /*
1841 * Handler for protected VM HVC calls.
1842 *
1843 * Returns true if the hypervisor has handled the exit, and control should go
1844 * back to the guest, or false if it hasn't.
1845 */
kvm_handle_pvm_hvc64(struct kvm_vcpu * vcpu,u64 * exit_code)1846 bool kvm_handle_pvm_hvc64(struct kvm_vcpu *vcpu, u64 *exit_code)
1847 {
1848 u64 val[4] = { SMCCC_RET_NOT_SUPPORTED };
1849 u32 fn = smccc_get_function(vcpu);
1850 struct pkvm_hyp_vcpu *hyp_vcpu;
1851
1852 hyp_vcpu = container_of(vcpu, struct pkvm_hyp_vcpu, vcpu);
1853
1854 switch (fn) {
1855 case ARM_SMCCC_VERSION_FUNC_ID:
1856 /* Nothing to be handled by the host. Go back to the guest. */
1857 val[0] = ARM_SMCCC_VERSION_1_2;
1858 break;
1859 case ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID:
1860 val[0] = ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0;
1861 val[1] = ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1;
1862 val[2] = ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_2;
1863 val[3] = ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3;
1864 break;
1865 case ARM_SMCCC_VENDOR_HYP_KVM_FEATURES_FUNC_ID:
1866 val[0] = BIT(ARM_SMCCC_KVM_FUNC_FEATURES);
1867 val[0] |= BIT(ARM_SMCCC_KVM_FUNC_HYP_MEMINFO);
1868 val[0] |= BIT(ARM_SMCCC_KVM_FUNC_MEM_SHARE);
1869 val[0] |= BIT(ARM_SMCCC_KVM_FUNC_MEM_UNSHARE);
1870 val[0] |= BIT(ARM_SMCCC_KVM_FUNC_MMIO_GUARD_INFO);
1871 val[0] |= BIT(ARM_SMCCC_KVM_FUNC_MMIO_GUARD_ENROLL);
1872 val[0] |= BIT(ARM_SMCCC_KVM_FUNC_MMIO_GUARD_MAP);
1873 val[0] |= BIT(ARM_SMCCC_KVM_FUNC_MMIO_GUARD_UNMAP);
1874 val[0] |= BIT(ARM_SMCCC_KVM_FUNC_MMIO_RGUARD_MAP);
1875 val[0] |= BIT(ARM_SMCCC_KVM_FUNC_MMIO_RGUARD_UNMAP);
1876 val[0] |= BIT(ARM_SMCCC_KVM_FUNC_MEM_RELINQUISH);
1877 break;
1878 case ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_ENROLL_FUNC_ID:
1879 set_bit(KVM_ARCH_FLAG_MMIO_GUARD, &vcpu->kvm->arch.flags);
1880 val[0] = SMCCC_RET_SUCCESS;
1881 break;
1882 case ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_MAP_FUNC_ID:
1883 case ARM_SMCCC_VENDOR_HYP_KVM_MMIO_RGUARD_MAP_FUNC_ID:
1884 return pkvm_install_ioguard_page(hyp_vcpu, exit_code);
1885 case ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_UNMAP_FUNC_ID:
1886 case ARM_SMCCC_VENDOR_HYP_KVM_MMIO_RGUARD_UNMAP_FUNC_ID:
1887 return pkvm_remove_ioguard_page(hyp_vcpu, exit_code);
1888 case ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_INFO_FUNC_ID:
1889 case ARM_SMCCC_VENDOR_HYP_KVM_HYP_MEMINFO_FUNC_ID:
1890 return pkvm_meminfo_call(hyp_vcpu);
1891 case ARM_SMCCC_VENDOR_HYP_KVM_MEM_SHARE_FUNC_ID:
1892 return pkvm_memshare_call(hyp_vcpu, exit_code);
1893 case ARM_SMCCC_VENDOR_HYP_KVM_MEM_UNSHARE_FUNC_ID:
1894 return pkvm_memunshare_call(hyp_vcpu);
1895 case ARM_SMCCC_VENDOR_HYP_KVM_MEM_RELINQUISH_FUNC_ID:
1896 return pkvm_memrelinquish_call(hyp_vcpu, exit_code);
1897 case ARM_SMCCC_TRNG_VERSION ... ARM_SMCCC_TRNG_RND32:
1898 case ARM_SMCCC_TRNG_RND64:
1899 if (module_handle_guest_trng(vcpu))
1900 return true;
1901 if (smccc_trng_available)
1902 return pkvm_forward_trng(vcpu);
1903 break;
1904 case ARM_SMCCC_VENDOR_HYP_KVM_PVIOMMU_OP_FUNC_ID:
1905 return kvm_handle_pviommu_hvc(vcpu, exit_code);
1906 case ARM_SMCCC_VENDOR_HYP_KVM_DEV_REQ_MMIO_FUNC_ID:
1907 return pkvm_device_request_mmio(hyp_vcpu, exit_code);
1908 case ARM_SMCCC_VENDOR_HYP_KVM_DEV_REQ_DMA_FUNC_ID:
1909 return pkvm_device_request_dma(hyp_vcpu, exit_code);
1910 default:
1911 if (is_ffa_call(fn))
1912 return kvm_guest_ffa_handler(hyp_vcpu, exit_code);
1913 else
1914 return pkvm_handle_psci(hyp_vcpu);
1915 }
1916
1917 smccc_set_retval(vcpu, val[0], val[1], val[2], val[3]);
1918 return true;
1919 }
1920
vm_handle_to_ffa_handle(pkvm_handle_t vm_handle)1921 u32 vm_handle_to_ffa_handle(pkvm_handle_t vm_handle)
1922 {
1923 if (!vm_handle)
1924 return HOST_FFA_ID;
1925 else
1926 return vm_handle_to_idx(vm_handle) + 1;
1927 }
1928
hyp_vcpu_to_ffa_handle(struct pkvm_hyp_vcpu * hyp_vcpu)1929 u32 hyp_vcpu_to_ffa_handle(struct pkvm_hyp_vcpu *hyp_vcpu)
1930 {
1931 pkvm_handle_t vm_handle;
1932
1933 if (!hyp_vcpu)
1934 return HOST_FFA_ID;
1935
1936 vm_handle = hyp_vcpu->vcpu.kvm->arch.pkvm.handle;
1937 return vm_handle_to_ffa_handle(vm_handle);
1938 }
1939