• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2021 Google LLC
4  * Author: Fuad Tabba <tabba@google.com>
5  */
6 
7 #include <linux/kvm_host.h>
8 #include <linux/mm.h>
9 
10 #include <kvm/arm_hypercalls.h>
11 #include <kvm/arm_psci.h>
12 #include <kvm/device.h>
13 
14 #include <asm/kvm_emulate.h>
15 #include <hyp/adjust_pc.h>
16 
17 #include <nvhe/alloc.h>
18 #include <nvhe/ffa.h>
19 #include <nvhe/mem_protect.h>
20 #include <nvhe/memory.h>
21 #include <nvhe/modules.h>
22 #include <nvhe/mm.h>
23 #include <nvhe/pkvm.h>
24 #include <nvhe/pviommu.h>
25 #include <nvhe/pviommu-host.h>
26 #include <nvhe/rwlock.h>
27 #include <nvhe/trap_handler.h>
28 
29 /* Used by icache_is_aliasing(). */
30 unsigned long __icache_flags;
31 
32 /* Used by kvm_get_vttbr(). */
33 unsigned int kvm_arm_vmid_bits;
34 
35 unsigned int kvm_sve_max_vl;
36 
37 unsigned int kvm_host_sve_max_vl;
38 
39 /*
40  * The currently loaded hyp vCPU for each physical CPU. Used only when
41  * protected KVM is enabled, but for both protected and non-protected VMs.
42  */
43 static DEFINE_PER_CPU(struct pkvm_hyp_vcpu *, loaded_hyp_vcpu);
44 
45 static LIST_HEAD(running_vms);
46 struct ffa_mem_transfer *find_transfer_by_handle(u64 ffa_handle, struct kvm_ffa_buffers *buf);
47 
pkvm_vcpu_reset_hcr(struct kvm_vcpu * vcpu)48 static void pkvm_vcpu_reset_hcr(struct kvm_vcpu *vcpu)
49 {
50 	vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
51 
52 	if (has_hvhe())
53 		vcpu->arch.hcr_el2 |= HCR_E2H;
54 
55 	if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) {
56 		/* route synchronous external abort exceptions to EL2 */
57 		vcpu->arch.hcr_el2 |= HCR_TEA;
58 		/* trap error record accesses */
59 		vcpu->arch.hcr_el2 |= HCR_TERR;
60 	}
61 
62 	if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
63 		vcpu->arch.hcr_el2 |= HCR_FWB;
64 
65 	if (cpus_have_final_cap(ARM64_HAS_EVT) &&
66 	    !cpus_have_final_cap(ARM64_MISMATCHED_CACHE_TYPE))
67 		vcpu->arch.hcr_el2 |= HCR_TID4;
68 	else
69 		vcpu->arch.hcr_el2 |= HCR_TID2;
70 
71 	if (vcpu_has_ptrauth(vcpu))
72 		vcpu->arch.hcr_el2 |= (HCR_API | HCR_APK);
73 
74 	if (kvm_has_mte(vcpu->kvm))
75 		vcpu->arch.hcr_el2 |= HCR_ATA;
76 }
77 
pkvm_vcpu_reset_hcrx(struct pkvm_hyp_vcpu * hyp_vcpu)78 static void pkvm_vcpu_reset_hcrx(struct pkvm_hyp_vcpu *hyp_vcpu)
79 {
80 	struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
81 	struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu;
82 
83 	if (!cpus_have_final_cap(ARM64_HAS_HCX))
84 		return;
85 
86 	/*
87 	 * In general, all HCRX_EL2 bits are gated by a feature.
88 	 * The only reason we can set SMPME without checking any
89 	 * feature is that its effects are not directly observable
90 	 * from the guest.
91 	 */
92 	vcpu->arch.hcrx_el2 = HCRX_EL2_SMPME;
93 
94 	/*
95 	 * For non-protected VMs, the host is responsible for the guest's
96 	 * features, so use the remaining host HCRX_EL2 bits.
97 	 */
98 	if ((!pkvm_hyp_vcpu_is_protected(hyp_vcpu)))
99 		vcpu->arch.hcrx_el2 |= host_vcpu->arch.hcrx_el2;
100 }
101 
pvm_init_traps_hcr(struct kvm_vcpu * vcpu)102 static void pvm_init_traps_hcr(struct kvm_vcpu *vcpu)
103 {
104 	struct kvm *kvm = vcpu->kvm;
105 	u64 val = vcpu->arch.hcr_el2;
106 
107 	/* No support for AArch32. */
108 	val |= HCR_RW;
109 
110 	/*
111 	 * Always trap:
112 	 * - Feature id registers: to control features exposed to guests
113 	 * - Implementation-defined features
114 	 */
115 	val |= HCR_TACR | HCR_TIDCP | HCR_TID3 | HCR_TID1;
116 
117 	if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, RAS, IMP)) {
118 		val |= HCR_TERR | HCR_TEA;
119 		val &= ~(HCR_FIEN);
120 	}
121 
122 	if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, AMU, IMP))
123 		val &= ~(HCR_AMVOFFEN);
124 
125 	if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, MTE, IMP)) {
126 		val |= HCR_TID5;
127 		val &= ~(HCR_DCT | HCR_ATA);
128 	}
129 
130 	if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, LO, IMP))
131 		val |= HCR_TLOR;
132 
133 	vcpu->arch.hcr_el2 = val;
134 }
135 
pvm_init_traps_hcrx(struct kvm_vcpu * vcpu)136 static void pvm_init_traps_hcrx(struct kvm_vcpu *vcpu)
137 {
138 	struct kvm *kvm = vcpu->kvm;
139 	u64 hcrx_set = 0;
140 
141 	if (!cpus_have_final_cap(ARM64_HAS_HCX))
142 		return;
143 
144 	if (kvm_has_feat(kvm, ID_AA64ISAR2_EL1, MOPS, IMP))
145 		hcrx_set |= (HCRX_EL2_MSCEn | HCRX_EL2_MCE2);
146 
147 	if (kvm_has_feat(kvm, ID_AA64MMFR3_EL1, TCRX, IMP))
148 		hcrx_set |= HCRX_EL2_TCR2En;
149 
150 	if (kvm_has_fpmr(kvm))
151 		hcrx_set |= HCRX_EL2_EnFPM;
152 
153 	vcpu->arch.hcrx_el2 |= hcrx_set;
154 }
155 
pvm_init_traps_mdcr(struct kvm_vcpu * vcpu)156 static void pvm_init_traps_mdcr(struct kvm_vcpu *vcpu)
157 {
158 	struct kvm *kvm = vcpu->kvm;
159 	u64 val = vcpu->arch.mdcr_el2;
160 
161 	if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMUVer, IMP)) {
162 		val |= MDCR_EL2_TPM | MDCR_EL2_TPMCR;
163 		val &= ~(MDCR_EL2_HPME | MDCR_EL2_MTPME | MDCR_EL2_HPMN_MASK);
164 	}
165 
166 	if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, DebugVer, IMP))
167 		val |= MDCR_EL2_TDRA | MDCR_EL2_TDA;
168 
169 	if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, DoubleLock, IMP))
170 		val |= MDCR_EL2_TDOSA;
171 
172 	if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMSVer, IMP)) {
173 		val |= MDCR_EL2_TPMS;
174 		val &= ~(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT);
175 	}
176 
177 	if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceFilt, IMP))
178 		val |= MDCR_EL2_TTRF;
179 
180 	if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, ExtTrcBuff, IMP))
181 		val |= MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT;
182 
183 	/* Trap Debug Communications Channel registers */
184 	if (!kvm_has_feat(kvm, ID_AA64MMFR0_EL1, FGT, IMP))
185 		val |= MDCR_EL2_TDCC;
186 
187 	vcpu->arch.mdcr_el2 = val;
188 }
189 
190 /*
191  * Check that cpu features that are neither trapped nor supported are not
192  * enabled for protected VMs.
193  */
pkvm_check_pvm_cpu_features(struct kvm_vcpu * vcpu)194 static int pkvm_check_pvm_cpu_features(struct kvm_vcpu *vcpu)
195 {
196 	struct kvm *kvm = vcpu->kvm;
197 
198 	/* Protected KVM does not support AArch32 guests. */
199 	if (kvm_has_feat(kvm, ID_AA64PFR0_EL1, EL0, AARCH32) ||
200 	    kvm_has_feat(kvm, ID_AA64PFR0_EL1, EL1, AARCH32))
201 		return -EINVAL;
202 
203 	/*
204 	 * Linux guests assume support for floating-point and Advanced SIMD. Do
205 	 * not change the trapping behavior for these from the KVM default.
206 	 */
207 	if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, FP, IMP) ||
208 	    !kvm_has_feat(kvm, ID_AA64PFR0_EL1, AdvSIMD, IMP))
209 		return -EINVAL;
210 
211 	/* No SME support in KVM right now. Check to catch if it changes. */
212 	if (kvm_has_feat(kvm, ID_AA64PFR1_EL1, SME, IMP))
213 		return -EINVAL;
214 
215 	return 0;
216 }
217 
218 /*
219  * Initialize trap register values in protected mode.
220  */
pkvm_vcpu_init_traps(struct pkvm_hyp_vcpu * hyp_vcpu)221 static int pkvm_vcpu_init_traps(struct pkvm_hyp_vcpu *hyp_vcpu)
222 {
223 	struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu;
224 	int ret;
225 
226 	vcpu->arch.mdcr_el2 = 0;
227 
228 	pkvm_vcpu_reset_hcr(vcpu);
229 	pkvm_vcpu_reset_hcrx(hyp_vcpu);
230 
231 	if ((!pkvm_hyp_vcpu_is_protected(hyp_vcpu)))
232 		return 0;
233 
234 	ret = pkvm_check_pvm_cpu_features(vcpu);
235 	if (ret)
236 		return ret;
237 
238 	pvm_init_traps_hcr(vcpu);
239 	pvm_init_traps_hcrx(vcpu);
240 	pvm_init_traps_mdcr(vcpu);
241 
242 	return 0;
243 }
244 
245 /*
246  * Start the VM table handle at the offset defined instead of at 0.
247  * Mainly for sanity checking and debugging.
248  */
249 #define HANDLE_OFFSET 0x1000
250 
vm_handle_to_idx(pkvm_handle_t handle)251 static unsigned int vm_handle_to_idx(pkvm_handle_t handle)
252 {
253 	return handle - HANDLE_OFFSET;
254 }
255 
idx_to_vm_handle(unsigned int idx)256 static pkvm_handle_t idx_to_vm_handle(unsigned int idx)
257 {
258 	return idx + HANDLE_OFFSET;
259 }
260 
261 /* Rwlock for protecting state related to the VM table. */
262 static DEFINE_HYP_RWLOCK(vm_table_lock);
263 
264 /*
265  * The table of VM entries for protected VMs in hyp.
266  * Allocated at hyp initialization and setup.
267  */
268 static struct pkvm_hyp_vm **vm_table;
269 
pkvm_hyp_vm_table_init(void * tbl)270 void pkvm_hyp_vm_table_init(void *tbl)
271 {
272 	WARN_ON(vm_table);
273 	vm_table = tbl;
274 }
275 
map_donated_memory_noclear(unsigned long host_va,size_t size)276 static void *map_donated_memory_noclear(unsigned long host_va, size_t size)
277 {
278 	void *va = (void *)kern_hyp_va(host_va);
279 
280 	if (!PAGE_ALIGNED(va))
281 		return NULL;
282 
283 	if (__pkvm_host_donate_hyp(hyp_virt_to_pfn(va),
284 				   PAGE_ALIGN(size) >> PAGE_SHIFT))
285 		return NULL;
286 
287 	return va;
288 }
289 
__unmap_donated_memory(void * va,size_t size)290 static void __unmap_donated_memory(void *va, size_t size)
291 {
292 	kvm_flush_dcache_to_poc(va, size);
293 	WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(va),
294 				       PAGE_ALIGN(size) >> PAGE_SHIFT));
295 }
296 
unmap_donated_memory(void * va,size_t size)297 static void unmap_donated_memory(void *va, size_t size)
298 {
299 	if (!va)
300 		return;
301 
302 	memset(va, 0, size);
303 	__unmap_donated_memory(va, size);
304 }
305 
unmap_donated_memory_noclear(void * va,size_t size)306 static void unmap_donated_memory_noclear(void *va, size_t size)
307 {
308 	if (!va)
309 		return;
310 
311 	__unmap_donated_memory(va, size);
312 }
313 
314 /*
315  * Return the hyp vm structure corresponding to the handle.
316  */
get_vm_by_handle(pkvm_handle_t handle)317 static struct pkvm_hyp_vm *get_vm_by_handle(pkvm_handle_t handle)
318 {
319 	unsigned int idx = vm_handle_to_idx(handle);
320 
321 	if (unlikely(idx >= KVM_MAX_PVMS))
322 		return NULL;
323 
324 	return vm_table[idx];
325 }
326 
get_pkvm_hyp_vm(pkvm_handle_t handle)327 struct pkvm_hyp_vm *get_pkvm_hyp_vm(pkvm_handle_t handle)
328 {
329 	struct pkvm_hyp_vm *hyp_vm;
330 
331 	hyp_read_lock(&vm_table_lock);
332 
333 	hyp_vm = get_vm_by_handle(handle);
334 	if (!hyp_vm)
335 		goto unlock;
336 	if (hyp_vm->is_dying)
337 		hyp_vm = NULL;
338 	else
339 		hyp_refcount_inc(hyp_vm->refcount);
340 
341 unlock:
342 	hyp_read_unlock(&vm_table_lock);
343 
344 	return hyp_vm;
345 }
346 
put_pkvm_hyp_vm(struct pkvm_hyp_vm * hyp_vm)347 void put_pkvm_hyp_vm(struct pkvm_hyp_vm *hyp_vm)
348 {
349 	hyp_refcount_dec(hyp_vm->refcount);
350 }
351 
get_np_pkvm_hyp_vm(pkvm_handle_t handle)352 struct pkvm_hyp_vm *get_np_pkvm_hyp_vm(pkvm_handle_t handle)
353 {
354 	struct pkvm_hyp_vm *hyp_vm = get_pkvm_hyp_vm(handle);
355 
356 	if (hyp_vm && pkvm_hyp_vm_is_protected(hyp_vm)) {
357 		put_pkvm_hyp_vm(hyp_vm);
358 		hyp_vm = NULL;
359 	}
360 
361 	return hyp_vm;
362 }
363 
__pkvm_reclaim_dying_guest_page(pkvm_handle_t handle,u64 pfn,u64 gfn,u8 order)364 int __pkvm_reclaim_dying_guest_page(pkvm_handle_t handle, u64 pfn, u64 gfn, u8 order)
365 {
366 	struct pkvm_hyp_vm *hyp_vm;
367 	int ret = -EINVAL;
368 
369 	hyp_read_lock(&vm_table_lock);
370 	hyp_vm = get_vm_by_handle(handle);
371 	if (!hyp_vm || !hyp_vm->is_dying)
372 		goto unlock;
373 
374 	ret = __pkvm_host_reclaim_page(hyp_vm, pfn, gfn << PAGE_SHIFT, order);
375 	if (ret)
376 		goto unlock;
377 
378 	drain_hyp_pool(&hyp_vm->pool, &hyp_vm->host_kvm->arch.pkvm.stage2_teardown_mc);
379 unlock:
380 	hyp_read_unlock(&vm_table_lock);
381 
382 	return ret;
383 }
384 
__pkvm_reclaim_dying_guest_ffa_resources(pkvm_handle_t handle)385 int __pkvm_reclaim_dying_guest_ffa_resources(pkvm_handle_t handle)
386 {
387 	struct pkvm_hyp_vm *hyp_vm;
388 	int ret = -EINVAL;
389 
390 	hyp_read_lock(&vm_table_lock);
391 	hyp_vm = get_vm_by_handle(handle);
392 	if (hyp_vm && hyp_vm->is_dying)
393 		ret = kvm_dying_guest_reclaim_ffa_resources(hyp_vm);
394 	hyp_read_unlock(&vm_table_lock);
395 
396 	return ret;
397 }
398 
__pkvm_notify_guest_vm_avail(pkvm_handle_t handle)399 int __pkvm_notify_guest_vm_avail(pkvm_handle_t handle)
400 {
401 	struct pkvm_hyp_vm *hyp_vm;
402 	int ret = 0;
403 
404 	hyp_read_lock(&vm_table_lock);
405 	hyp_vm = get_vm_by_handle(handle);
406 	if (!hyp_vm || !hyp_vm->kvm.arch.pkvm.ffa_support) {
407 		ret = -EBUSY;
408 		goto unlock;
409 	}
410 
411 	ret = kvm_guest_notify_availability(vm_handle_to_ffa_handle(handle), &hyp_vm->ffa_buf,
412 					    hyp_vm->is_dying);
413 unlock:
414 	hyp_read_unlock(&vm_table_lock);
415 	return ret;
416 }
417 
pkvm_load_hyp_vcpu(pkvm_handle_t handle,unsigned int vcpu_idx)418 struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle,
419 					 unsigned int vcpu_idx)
420 {
421 	struct pkvm_hyp_vcpu *hyp_vcpu = NULL;
422 	struct pkvm_hyp_vm *hyp_vm;
423 
424 	/* Cannot load a new vcpu without putting the old one first. */
425 	if (__this_cpu_read(loaded_hyp_vcpu))
426 		return NULL;
427 
428 	hyp_read_lock(&vm_table_lock);
429 	hyp_vm = get_vm_by_handle(handle);
430 	if (!hyp_vm || hyp_vm->is_dying || hyp_vm->kvm.created_vcpus <= vcpu_idx)
431 		goto unlock;
432 
433 	/*
434 	 * Synchronise with concurrent vCPU initialisation by relying on
435 	 * dependency ordering from the vCPU pointer.
436 	 */
437 	hyp_vcpu = READ_ONCE(hyp_vm->vcpus[vcpu_idx]);
438 	if (!hyp_vcpu)
439 		goto unlock;
440 
441 	/* Ensure vcpu isn't loaded on more than one cpu simultaneously. */
442 	if (unlikely(cmpxchg_relaxed(&hyp_vcpu->loaded_hyp_vcpu, NULL,
443 				     this_cpu_ptr(&loaded_hyp_vcpu)))) {
444 		hyp_vcpu = NULL;
445 		goto unlock;
446 	}
447 
448 	hyp_refcount_inc(hyp_vm->refcount);
449 unlock:
450 	hyp_read_unlock(&vm_table_lock);
451 
452 	if (hyp_vcpu)
453 		__this_cpu_write(loaded_hyp_vcpu, hyp_vcpu);
454 	return hyp_vcpu;
455 }
456 
pkvm_put_hyp_vcpu(struct pkvm_hyp_vcpu * hyp_vcpu)457 void pkvm_put_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
458 {
459 	struct pkvm_hyp_vm *hyp_vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
460 
461 	__this_cpu_write(loaded_hyp_vcpu, NULL);
462 
463 	/*
464 	 * Clearing the 'loaded_hyp_vcpu' field allows the 'hyp_vcpu' to
465 	 * be loaded by another physical CPU, so make sure we're done
466 	 * with the vCPU before letting somebody else play with it.
467 	 */
468 	smp_store_release(&hyp_vcpu->loaded_hyp_vcpu, NULL);
469 
470 	/*
471 	 * We don't hold the 'vm_table_lock'. Once the refcount hits
472 	 * zero, VM teardown can destroy the VM's data structures and
473 	 * so this must come last.
474 	 */
475 	smp_wmb();
476 	hyp_refcount_dec(hyp_vm->refcount);
477 }
478 
pkvm_get_loaded_hyp_vcpu(void)479 struct pkvm_hyp_vcpu *pkvm_get_loaded_hyp_vcpu(void)
480 {
481 	return __this_cpu_read(loaded_hyp_vcpu);
482 }
483 
pkvm_init_features_from_host(struct pkvm_hyp_vm * hyp_vm,const struct kvm * host_kvm)484 static void pkvm_init_features_from_host(struct pkvm_hyp_vm *hyp_vm, const struct kvm *host_kvm)
485 {
486 	struct kvm *kvm = &hyp_vm->kvm;
487 	unsigned long host_arch_flags = READ_ONCE(host_kvm->arch.flags);
488 
489 	/* No restrictions for non-protected VMs. */
490 	if (!kvm_vm_is_protected(kvm)) {
491 		hyp_vm->kvm.arch.flags = host_arch_flags;
492 
493 		bitmap_copy(kvm->arch.vcpu_features,
494 			    host_kvm->arch.vcpu_features,
495 			    KVM_VCPU_MAX_FEATURES);
496 		return;
497 	}
498 
499 	kvm->arch.vcpu_features[0] = pvm_supported_vcpu_features() &
500 				     host_kvm->arch.vcpu_features[0];
501 
502 	if (kvm_pvm_ext_allowed(KVM_CAP_ARM_SVE) && kvm_has_sve(host_kvm))
503 		set_bit(KVM_ARCH_FLAG_GUEST_HAS_SVE, &kvm->arch.flags);
504 
505 	if (kvm_pvm_ext_allowed(KVM_CAP_ARM_MTE) && kvm_has_mte(host_kvm))
506 		set_bit(KVM_ARCH_FLAG_MTE_ENABLED, &kvm->arch.flags);
507 }
508 
pkvm_vcpu_init_psci(struct pkvm_hyp_vcpu * hyp_vcpu,u32 mp_state)509 static int pkvm_vcpu_init_psci(struct pkvm_hyp_vcpu *hyp_vcpu, u32 mp_state)
510 {
511 	struct vcpu_reset_state *reset_state = &hyp_vcpu->vcpu.arch.reset_state;
512 	struct pkvm_hyp_vm *hyp_vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
513 
514 	if (!pkvm_hyp_vcpu_is_protected(hyp_vcpu)) {
515 		/*
516 		 * The host is responsible for managing the vcpu state.
517 		 * Treat it as always on as far as hyp is concerned.
518 		 */
519 		hyp_vcpu->power_state = PSCI_0_2_AFFINITY_LEVEL_ON;
520 		return 0;
521 	}
522 
523 	if (mp_state == KVM_MP_STATE_STOPPED) {
524 		reset_state->reset = false;
525 		hyp_vcpu->power_state = PSCI_0_2_AFFINITY_LEVEL_OFF;
526 	} else if (pkvm_hyp_vm_has_pvmfw(hyp_vm)) {
527 		if (hyp_vm->pvmfw_entry_vcpu)
528 			return -EINVAL;
529 
530 		hyp_vm->pvmfw_entry_vcpu = hyp_vcpu;
531 		reset_state->reset = true;
532 		hyp_vcpu->power_state = PSCI_0_2_AFFINITY_LEVEL_ON_PENDING;
533 	} else {
534 		struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
535 
536 		reset_state->pc = READ_ONCE(host_vcpu->arch.ctxt.regs.pc);
537 		reset_state->r0 = READ_ONCE(host_vcpu->arch.ctxt.regs.regs[0]);
538 		reset_state->reset = true;
539 		hyp_vcpu->power_state = PSCI_0_2_AFFINITY_LEVEL_ON_PENDING;
540 	}
541 
542 	return 0;
543 }
544 
unpin_host_vcpu(struct pkvm_hyp_vcpu * hyp_vcpu)545 static void unpin_host_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
546 {
547 	struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
548 	void *hyp_reqs = hyp_vcpu->vcpu.arch.hyp_reqs;
549 
550 	if (host_vcpu)
551 		hyp_unpin_shared_mem(host_vcpu, host_vcpu + 1);
552 	if (hyp_reqs)
553 		hyp_unpin_shared_mem(hyp_reqs, hyp_reqs + 1);
554 }
555 
unpin_host_sve_state(struct pkvm_hyp_vcpu * hyp_vcpu)556 static void unpin_host_sve_state(struct pkvm_hyp_vcpu *hyp_vcpu)
557 {
558 	void *sve_state;
559 
560 	if (!vcpu_has_feature(&hyp_vcpu->vcpu, KVM_ARM_VCPU_SVE))
561 		return;
562 
563 	sve_state = kern_hyp_va(hyp_vcpu->vcpu.arch.sve_state);
564 	hyp_unpin_shared_mem(sve_state,
565 			     sve_state + vcpu_sve_state_size(&hyp_vcpu->vcpu));
566 }
567 
teardown_sve_state(struct pkvm_hyp_vcpu * hyp_vcpu)568 static void teardown_sve_state(struct pkvm_hyp_vcpu *hyp_vcpu)
569 {
570 	struct pkvm_hyp_vm *hyp_vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
571 	void *sve_state = hyp_vcpu->vcpu.arch.sve_state;
572 
573 	if (sve_state)
574 		hyp_free_account(sve_state, hyp_vm->host_kvm);
575 }
576 
unpin_host_vcpus(struct pkvm_hyp_vcpu * hyp_vcpus[],unsigned int nr_vcpus)577 static void unpin_host_vcpus(struct pkvm_hyp_vcpu *hyp_vcpus[],
578 			     unsigned int nr_vcpus)
579 {
580 	int i;
581 
582 	for (i = 0; i < nr_vcpus; i++) {
583 		struct pkvm_hyp_vcpu *hyp_vcpu = hyp_vcpus[i];
584 
585 		if (!hyp_vcpu)
586 			continue;
587 
588 		unpin_host_vcpu(hyp_vcpu);
589 
590 		if (!pkvm_hyp_vcpu_is_protected(hyp_vcpu))
591 			unpin_host_sve_state(hyp_vcpu);
592 	}
593 }
594 
pkvm_get_last_ran_size(void)595 static size_t pkvm_get_last_ran_size(void)
596 {
597 	return array_size(hyp_nr_cpus, sizeof(int));
598 }
599 
init_pkvm_hyp_vm(struct kvm * host_kvm,struct pkvm_hyp_vm * hyp_vm,int * last_ran,unsigned int nr_vcpus)600 static void init_pkvm_hyp_vm(struct kvm *host_kvm, struct pkvm_hyp_vm *hyp_vm,
601 			     int *last_ran, unsigned int nr_vcpus)
602 {
603 	u64 pvmfw_load_addr = PVMFW_INVALID_LOAD_ADDR;
604 
605 	hyp_vm->host_kvm = host_kvm;
606 	hyp_vm->kvm.created_vcpus = nr_vcpus;
607 	hyp_vm->kvm.arch.mmu.vtcr = host_mmu.arch.mmu.vtcr;
608 	hyp_vm->kvm.arch.pkvm.enabled = READ_ONCE(host_kvm->arch.pkvm.enabled);
609 	hyp_vm->kvm.arch.flags = 0;
610 
611 	if (hyp_vm->kvm.arch.pkvm.enabled)
612 		pvmfw_load_addr = READ_ONCE(host_kvm->arch.pkvm.pvmfw_load_addr);
613 	hyp_vm->kvm.arch.pkvm.pvmfw_load_addr = pvmfw_load_addr;
614 
615 	hyp_vm->kvm.arch.pkvm.ffa_support = READ_ONCE(host_kvm->arch.pkvm.ffa_support);
616 	hyp_vm->kvm.arch.pkvm.smc_forwarded = READ_ONCE(host_kvm->arch.pkvm.smc_forwarded);
617 	hyp_vm->kvm.arch.mmu.last_vcpu_ran = (int __percpu *)last_ran;
618 	memset(last_ran, -1, pkvm_get_last_ran_size());
619 	pkvm_init_features_from_host(hyp_vm, host_kvm);
620 	hyp_spin_lock_init(&hyp_vm->vcpus_lock);
621 	INIT_LIST_HEAD(&hyp_vm->ffa_buf.xfer_list);
622 }
623 
pkvm_vcpu_init_sve(struct pkvm_hyp_vcpu * hyp_vcpu,struct kvm_vcpu * host_vcpu)624 static int pkvm_vcpu_init_sve(struct pkvm_hyp_vcpu *hyp_vcpu, struct kvm_vcpu *host_vcpu)
625 {
626 	struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu;
627 	unsigned int sve_max_vl;
628 	size_t sve_state_size;
629 	void *sve_state;
630 
631 	if (!vcpu_has_feature(vcpu, KVM_ARM_VCPU_SVE))
632 		return 0;
633 
634 	/* Limit guest vector length to the maximum supported by the host. */
635 	sve_max_vl = min(READ_ONCE(host_vcpu->arch.sve_max_vl), kvm_host_sve_max_vl);
636 	sve_state_size = sve_state_size(sve_max_vl);
637 	sve_state = kern_hyp_va(READ_ONCE(host_vcpu->arch.sve_state));
638 
639 	if (!sve_state && !pkvm_hyp_vcpu_is_protected(hyp_vcpu))
640 		return -EINVAL;
641 
642 	if (!sve_state_size || (sve_max_vl > kvm_sve_max_vl))
643 		return -EINVAL;
644 
645 	if (pkvm_hyp_vcpu_is_protected(hyp_vcpu)) {
646 		struct pkvm_hyp_vm *hyp_vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
647 
648 		sve_state = hyp_alloc_account(sve_state_size,
649 					      hyp_vm->host_kvm);
650 		if (!sve_state)
651 			return hyp_alloc_errno();
652 	} else {
653 		int ret;
654 
655 		ret = hyp_pin_shared_mem(sve_state, sve_state + sve_state_size);
656 		if (ret)
657 			return ret;
658 	}
659 
660 	vcpu->arch.sve_state = sve_state;
661 	vcpu->arch.sve_max_vl = sve_max_vl;
662 	vcpu_set_flag(vcpu, VCPU_SVE_FINALIZED);
663 
664 	return 0;
665 }
666 
init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu * hyp_vcpu,struct pkvm_hyp_vm * hyp_vm,struct kvm_vcpu * host_vcpu)667 static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu,
668 			      struct pkvm_hyp_vm *hyp_vm,
669 			      struct kvm_vcpu *host_vcpu)
670 {
671 	int ret = 0;
672 	u32 mp_state;
673 	struct kvm_hyp_req *hyp_reqs;
674 
675 	if (hyp_pin_shared_mem(host_vcpu, host_vcpu + 1))
676 		return -EBUSY;
677 
678 	hyp_reqs = READ_ONCE(host_vcpu->arch.hyp_reqs);
679 	if (!PAGE_ALIGNED(hyp_reqs)) {
680 		hyp_unpin_shared_mem(host_vcpu, host_vcpu + 1);
681 		return -EINVAL;
682 	}
683 
684 	hyp_vcpu->vcpu.arch.hyp_reqs = kern_hyp_va(hyp_reqs);
685 	if (hyp_pin_shared_mem(hyp_vcpu->vcpu.arch.hyp_reqs,
686 			       hyp_vcpu->vcpu.arch.hyp_reqs + 1)) {
687 		hyp_unpin_shared_mem(host_vcpu, host_vcpu + 1);
688 		return -EBUSY;
689 	}
690 
691 	mp_state = READ_ONCE(host_vcpu->arch.mp_state.mp_state);
692 	if (mp_state != KVM_MP_STATE_RUNNABLE && mp_state != KVM_MP_STATE_STOPPED) {
693 		ret = -EINVAL;
694 		goto done;
695 	}
696 
697 	hyp_vcpu->host_vcpu = host_vcpu;
698 
699 	hyp_vcpu->vcpu.kvm = &hyp_vm->kvm;
700 	hyp_vcpu->vcpu.vcpu_id = READ_ONCE(host_vcpu->vcpu_id);
701 	hyp_vcpu->vcpu.vcpu_idx = READ_ONCE(host_vcpu->vcpu_idx);
702 
703 	hyp_vcpu->vcpu.arch.hw_mmu = &hyp_vm->kvm.arch.mmu;
704 	hyp_vcpu->vcpu.arch.cflags = READ_ONCE(host_vcpu->arch.cflags);
705 	hyp_vcpu->vcpu.arch.debug_ptr = &host_vcpu->arch.vcpu_debug_state;
706 	hyp_vcpu->vcpu.arch.hyp_reqs->type = KVM_HYP_LAST_REQ;
707 
708 	if (pkvm_hyp_vcpu_is_protected(hyp_vcpu)) {
709 		kvm_init_pvm_id_regs(&hyp_vcpu->vcpu);
710 		kvm_reset_pvm_sys_regs(&hyp_vcpu->vcpu);
711 	}
712 
713 	ret = pkvm_vcpu_init_traps(hyp_vcpu);
714 	if (ret)
715 		goto done;
716 
717 	ret = pkvm_vcpu_init_sve(hyp_vcpu, host_vcpu);
718 	if (ret)
719 		goto done;
720 
721 	ret = pkvm_vcpu_init_psci(hyp_vcpu, mp_state);
722 	if (ret)
723 		goto done;
724 done:
725 	if (ret)
726 		unpin_host_vcpu(hyp_vcpu);
727 	return ret;
728 }
729 
find_free_vm_table_entry(struct kvm * host_kvm)730 static int find_free_vm_table_entry(struct kvm *host_kvm)
731 {
732 	int i;
733 
734 	for (i = 0; i < KVM_MAX_PVMS; ++i) {
735 		if (!vm_table[i])
736 			return i;
737 	}
738 
739 	return -ENOMEM;
740 }
741 
742 /*
743  * Allocate a VM table entry and insert a pointer to the new vm.
744  *
745  * Return a unique handle to the protected VM on success,
746  * negative error code on failure.
747  */
insert_vm_table_entry(struct kvm * host_kvm,struct pkvm_hyp_vm * hyp_vm)748 static pkvm_handle_t insert_vm_table_entry(struct kvm *host_kvm,
749 					   struct pkvm_hyp_vm *hyp_vm)
750 {
751 	struct kvm_s2_mmu *mmu = &hyp_vm->kvm.arch.mmu;
752 	int idx;
753 
754 	hyp_assert_write_lock_held(&vm_table_lock);
755 
756 	/*
757 	 * Initializing protected state might have failed, yet a malicious
758 	 * host could trigger this function. Thus, ensure that 'vm_table'
759 	 * exists.
760 	 */
761 	if (unlikely(!vm_table))
762 		return -EINVAL;
763 
764 	idx = find_free_vm_table_entry(host_kvm);
765 	if (idx < 0)
766 		return idx;
767 
768 	hyp_vm->kvm.arch.pkvm.handle = idx_to_vm_handle(idx);
769 
770 	/* VMID 0 is reserved for the host */
771 	atomic64_set(&mmu->vmid.id, idx + 1);
772 
773 	mmu->arch = &hyp_vm->kvm.arch;
774 	mmu->pgt = &hyp_vm->pgt;
775 
776 	vm_table[idx] = hyp_vm;
777 	list_add(&hyp_vm->vm_list, &running_vms);
778 	return hyp_vm->kvm.arch.pkvm.handle;
779 }
780 
781 /*
782  * Deallocate and remove the VM table entry corresponding to the handle.
783  */
remove_vm_table_entry(pkvm_handle_t handle)784 static void remove_vm_table_entry(pkvm_handle_t handle)
785 {
786 	struct pkvm_hyp_vm *hyp_vm;
787 
788 	hyp_assert_write_lock_held(&vm_table_lock);
789 	hyp_vm = vm_table[vm_handle_to_idx(handle)];
790 
791 	/*
792 	 * If we didn't send the destruction message leak the vmid to
793 	 * prevent others from using it.
794 	 */
795 	if (hyp_vm->kvm.arch.pkvm.ffa_support &&
796 	    hyp_vm->ffa_buf.vm_avail_bitmap) {
797 		vm_table[vm_handle_to_idx(handle)] = (void *)0xdeadbeef;
798 		list_del(&hyp_vm->vm_list);
799 		return;
800 	}
801 
802 	vm_table[vm_handle_to_idx(handle)] = NULL;
803 	list_del(&hyp_vm->vm_list);
804 }
805 
pkvm_get_hyp_vm_size(unsigned int nr_vcpus)806 static size_t pkvm_get_hyp_vm_size(unsigned int nr_vcpus)
807 {
808 	return size_add(sizeof(struct pkvm_hyp_vm),
809 		size_mul(sizeof(struct pkvm_hyp_vcpu *), nr_vcpus));
810 }
811 
812 /*
813  * Initialize the hypervisor copy of the protected VM state using the
814  * memory donated by the host.
815  *
816  * Unmaps the donated memory from the host at stage 2.
817  *
818  * host_kvm: A pointer to the host's struct kvm.
819  * pgd_hva: The host va of the area being donated for the stage-2 PGD for
820  *	    the VM. Must be page aligned. Its size is implied by the VM's
821  *	    VTCR.
822  * Return a unique handle to the protected VM on success,
823  * negative error code on failure.
824  */
__pkvm_init_vm(struct kvm * host_kvm,unsigned long pgd_hva)825 int __pkvm_init_vm(struct kvm *host_kvm, unsigned long pgd_hva)
826 {
827 	struct pkvm_hyp_vm *hyp_vm = NULL;
828 	int *last_ran = NULL;
829 	unsigned int nr_vcpus;
830 	void *pgd = NULL;
831 	size_t pgd_size;
832 	int ret;
833 
834 	ret = hyp_pin_shared_mem(host_kvm, host_kvm + 1);
835 	if (ret)
836 		return ret;
837 
838 	nr_vcpus = READ_ONCE(host_kvm->created_vcpus);
839 	if (nr_vcpus < 1) {
840 		ret = -EINVAL;
841 		goto err_unpin_kvm;
842 	}
843 
844 	hyp_vm = hyp_alloc_account(pkvm_get_hyp_vm_size(nr_vcpus),
845 				   host_kvm);
846 	if (!hyp_vm) {
847 		ret = hyp_alloc_errno();
848 		goto err_unpin_kvm;
849 	}
850 
851 	last_ran = hyp_alloc_account(pkvm_get_last_ran_size(), host_kvm);
852 	if (!last_ran) {
853 		ret = hyp_alloc_errno();
854 		goto err_free_vm;
855 	}
856 
857 	ret = -EINVAL;
858 
859 	pgd_size = kvm_pgtable_stage2_pgd_size(host_mmu.arch.mmu.vtcr);
860 	if (!IS_ALIGNED(pgd_hva, pgd_size))
861 		goto err_free_last_ran;
862 	pgd = map_donated_memory_noclear(pgd_hva, pgd_size);
863 	if (!pgd)
864 		goto err_free_last_ran;
865 
866 	init_pkvm_hyp_vm(host_kvm, hyp_vm, last_ran, nr_vcpus);
867 
868 	hyp_write_lock(&vm_table_lock);
869 	ret = insert_vm_table_entry(host_kvm, hyp_vm);
870 	if (ret < 0)
871 		goto err_unlock;
872 
873 	ret = kvm_guest_prepare_stage2(hyp_vm, pgd);
874 	if (ret)
875 		goto err_remove_vm_table_entry;
876 
877 	ret = pkvm_pviommu_finalise(hyp_vm);
878 	if (ret)
879 		goto err_remove_vm_table_entry;
880 
881 	hyp_write_unlock(&vm_table_lock);
882 
883 	return hyp_vm->kvm.arch.pkvm.handle;
884 
885 err_remove_vm_table_entry:
886 	remove_vm_table_entry(hyp_vm->kvm.arch.pkvm.handle);
887 err_unlock:
888 	hyp_write_unlock(&vm_table_lock);
889 	unmap_donated_memory(pgd, pgd_size);
890 err_free_last_ran:
891 	hyp_free_account(last_ran, host_kvm);
892 err_free_vm:
893 	hyp_free_account(hyp_vm, host_kvm);
894 err_unpin_kvm:
895 	hyp_unpin_shared_mem(host_kvm, host_kvm + 1);
896 	return ret;
897 }
898 
__pkvm_get_vm_ffa_transfer(u16 handle)899 struct ffa_mem_transfer *__pkvm_get_vm_ffa_transfer(u16 handle)
900 {
901 	struct pkvm_hyp_vm *vm;
902 	struct ffa_mem_transfer *transfer = NULL;
903 
904 	hyp_read_lock(&vm_table_lock);
905 	list_for_each_entry(vm, &running_vms, vm_list) {
906 		transfer = find_transfer_by_handle(handle, &vm->ffa_buf);
907 		if (transfer)
908 			goto unlock;
909 	}
910 unlock:
911 	hyp_read_unlock(&vm_table_lock);
912 	return transfer;
913 }
914 
915 /*
916  * Initialize the hypervisor copy of the protected vCPU state using the
917  * memory donated by the host.
918  *
919  * handle: The handle for the protected vm.
920  * host_vcpu: A pointer to the corresponding host vcpu.
921  *
922  * Return 0 on success, negative error code on failure.
923  */
__pkvm_init_vcpu(pkvm_handle_t handle,struct kvm_vcpu * host_vcpu)924 int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu)
925 {
926 	struct pkvm_hyp_vcpu *hyp_vcpu;
927 	struct pkvm_hyp_vm *hyp_vm;
928 	unsigned int idx;
929 	int ret;
930 
931 	hyp_read_lock(&vm_table_lock);
932 
933 	hyp_vm = get_vm_by_handle(handle);
934 	if (!hyp_vm) {
935 		ret = -ENOENT;
936 		goto unlock_vm;
937 	}
938 
939 	hyp_vcpu = hyp_alloc_account(sizeof(*hyp_vcpu), hyp_vm->host_kvm);
940 	if (!hyp_vcpu) {
941 		ret = hyp_alloc_errno();
942 		goto unlock_vm;
943 	}
944 
945 	hyp_spin_lock(&hyp_vm->vcpus_lock);
946 	ret = init_pkvm_hyp_vcpu(hyp_vcpu, hyp_vm, host_vcpu);
947 	if (ret)
948 		goto unlock_vcpus;
949 
950 	idx = hyp_vcpu->vcpu.vcpu_idx;
951 	if (idx >= hyp_vm->kvm.created_vcpus) {
952 		ret = -EINVAL;
953 		goto unlock_vcpus;
954 	}
955 
956 	if (hyp_vm->vcpus[idx]) {
957 		ret = -EINVAL;
958 		goto unlock_vcpus;
959 	}
960 
961 	/*
962 	 * Ensure the hyp_vcpu is initialised before publishing it to
963 	 * the vCPU-load path via 'hyp_vm->vcpus[]'.
964 	 */
965 	smp_store_release(&hyp_vm->vcpus[idx], hyp_vcpu);
966 
967 unlock_vcpus:
968 	hyp_spin_unlock(&hyp_vm->vcpus_lock);
969 
970 	if (ret)
971 		hyp_free_account(hyp_vcpu, hyp_vm->host_kvm);
972 
973 unlock_vm:
974 	hyp_read_unlock(&vm_table_lock);
975 
976 	return ret;
977 }
978 
__pkvm_start_teardown_vm(pkvm_handle_t handle)979 int __pkvm_start_teardown_vm(pkvm_handle_t handle)
980 {
981 	struct pkvm_hyp_vm *hyp_vm;
982 	int ret = 0;
983 
984 	hyp_write_lock(&vm_table_lock);
985 	hyp_vm = get_vm_by_handle(handle);
986 	if (!hyp_vm) {
987 		ret = -ENOENT;
988 		goto unlock;
989 	} else if (WARN_ON(hyp_refcount_get(hyp_vm->refcount))) {
990 		ret = -EBUSY;
991 		goto unlock;
992 	} else if (hyp_vm->is_dying) {
993 		ret = -EINVAL;
994 		goto unlock;
995 	}
996 
997 	hyp_vm->is_dying = true;
998 
999 unlock:
1000 	hyp_write_unlock(&vm_table_lock);
1001 
1002 	return ret;
1003 }
1004 
__pkvm_finalize_teardown_vm(pkvm_handle_t handle)1005 int __pkvm_finalize_teardown_vm(pkvm_handle_t handle)
1006 {
1007 	struct kvm_hyp_memcache *mc;
1008 	struct pkvm_hyp_vm *hyp_vm;
1009 	struct kvm *host_kvm;
1010 	unsigned int idx;
1011 	int err;
1012 
1013 	hyp_write_lock(&vm_table_lock);
1014 	hyp_vm = get_vm_by_handle(handle);
1015 	if (!hyp_vm) {
1016 		err = -ENOENT;
1017 		goto err_unlock;
1018 	} else if (!hyp_vm->is_dying) {
1019 		err = -EBUSY;
1020 		goto err_unlock;
1021 	}
1022 
1023 	host_kvm = hyp_vm->host_kvm;
1024 
1025 	/* Ensure the VMID is clean before it can be reallocated */
1026 	__kvm_tlb_flush_vmid(&hyp_vm->kvm.arch.mmu);
1027 	remove_vm_table_entry(handle);
1028 	hyp_write_unlock(&vm_table_lock);
1029 
1030 	/* A well-behaved host will have reclaimed all FF-A resources already */
1031 	do {
1032 		err = kvm_dying_guest_reclaim_ffa_resources(hyp_vm);
1033 	} while (err == -EAGAIN);
1034 	WARN_ON(err);
1035 
1036 	pkvm_devices_teardown(hyp_vm);
1037 
1038 	pkvm_pviommu_teardown(hyp_vm);
1039 
1040 	/*
1041 	 * At this point all page tables are destroyed and should be pushed to the pool
1042 	 * the only place that might still have memory is the mc, which would be drained
1043 	 * from host as it hasn't been donated yet.
1044 	 */
1045 	drain_hyp_pool(&hyp_vm->iommu_pool, &host_kvm->arch.pkvm.teardown_iommu_mc);
1046 
1047 	/*
1048 	 * At this point, the VM has been detached from the VM table and
1049 	 * has a refcount of 0 so we're free to tear it down without
1050 	 * worrying about anybody else.
1051 	 */
1052 
1053 	mc = &host_kvm->arch.pkvm.stage2_teardown_mc;
1054 	destroy_hyp_vm_pgt(hyp_vm);
1055 	drain_hyp_pool(&hyp_vm->pool, mc);
1056 	unpin_host_vcpus(hyp_vm->vcpus, hyp_vm->kvm.created_vcpus);
1057 
1058 	/* Push the metadata pages to the teardown memcache */
1059 	for (idx = 0; idx < hyp_vm->kvm.created_vcpus; ++idx) {
1060 		struct pkvm_hyp_vcpu *hyp_vcpu = hyp_vm->vcpus[idx];
1061 		struct kvm_hyp_memcache *vcpu_mc;
1062 		void *addr;
1063 
1064 		if (!hyp_vcpu)
1065 			continue;
1066 
1067 		vcpu_mc = &hyp_vcpu->vcpu.arch.stage2_mc;
1068 		while (vcpu_mc->nr_pages) {
1069 			unsigned long order;
1070 
1071 			addr = pop_hyp_memcache(vcpu_mc, hyp_phys_to_virt, &order);
1072 			/* We don't expect vcpu to have higher order pages. */
1073 			WARN_ON(order);
1074 			push_hyp_memcache(mc, addr, hyp_virt_to_phys, order);
1075 			unmap_donated_memory_noclear(addr, PAGE_SIZE);
1076 		}
1077 
1078 		if (pkvm_hyp_vcpu_is_protected(hyp_vcpu))
1079 			teardown_sve_state(hyp_vcpu);
1080 
1081 		hyp_free_account(hyp_vcpu, host_kvm);
1082 	}
1083 
1084 	hyp_free_account((__force void *)hyp_vm->kvm.arch.mmu.last_vcpu_ran,
1085 			 host_kvm);
1086 	hyp_free_account(hyp_vm, host_kvm);
1087 	hyp_unpin_shared_mem(host_kvm, host_kvm + 1);
1088 	return 0;
1089 
1090 err_unlock:
1091 	hyp_write_unlock(&vm_table_lock);
1092 	return err;
1093 }
1094 
pkvm_load_pvmfw_pages(struct pkvm_hyp_vm * vm,u64 ipa,phys_addr_t phys,u64 size)1095 int pkvm_load_pvmfw_pages(struct pkvm_hyp_vm *vm, u64 ipa, phys_addr_t phys,
1096 			  u64 size)
1097 {
1098 	struct kvm_protected_vm *pkvm = &vm->kvm.arch.pkvm;
1099 	u64 npages, offset = ipa - pkvm->pvmfw_load_addr;
1100 	void *src = hyp_phys_to_virt(pvmfw_base) + offset;
1101 
1102 	if (offset >= pvmfw_size)
1103 		return -EINVAL;
1104 
1105 	size = min(size, pvmfw_size - offset);
1106 	if (!PAGE_ALIGNED(size) || !PAGE_ALIGNED(src))
1107 		return -EINVAL;
1108 
1109 	npages = size >> PAGE_SHIFT;
1110 	while (npages--) {
1111 		/*
1112 		 * No need for cache maintenance here, as the pgtable code will
1113 		 * take care of this when installing the pte in the guest's
1114 		 * stage-2 page table.
1115 		 */
1116 		memcpy(hyp_fixmap_map(phys), src, PAGE_SIZE);
1117 		hyp_fixmap_unmap();
1118 
1119 		src += PAGE_SIZE;
1120 		phys += PAGE_SIZE;
1121 	}
1122 
1123 	return 0;
1124 }
1125 
pkvm_poison_pvmfw_pages(void)1126 void pkvm_poison_pvmfw_pages(void)
1127 {
1128 	u64 npages = pvmfw_size >> PAGE_SHIFT;
1129 	phys_addr_t addr = pvmfw_base;
1130 
1131 	while (npages--) {
1132 		hyp_poison_page(addr, PAGE_SIZE);
1133 		addr += PAGE_SIZE;
1134 	}
1135 }
1136 
1137 /*
1138  * This function sets the registers on the vcpu to their architecturally defined
1139  * reset values.
1140  *
1141  * Note: Can only be called by the vcpu on itself, after it has been turned on.
1142  */
pkvm_reset_vcpu(struct pkvm_hyp_vcpu * hyp_vcpu)1143 void pkvm_reset_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
1144 {
1145 	struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu;
1146 	struct vcpu_reset_state *reset_state = &vcpu->arch.reset_state;
1147 	struct pkvm_hyp_vm *hyp_vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
1148 
1149 	WARN_ON(!reset_state->reset);
1150 
1151 	kvm_reset_vcpu_core(vcpu);
1152 	kvm_reset_pvm_sys_regs(vcpu);
1153 
1154 	/* Must be done after reseting sys registers. */
1155 	kvm_reset_vcpu_psci(vcpu, reset_state);
1156 	if (hyp_vm->pvmfw_entry_vcpu == hyp_vcpu) {
1157 		struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
1158 		u64 entry = hyp_vm->kvm.arch.pkvm.pvmfw_load_addr;
1159 		int i;
1160 
1161 		/* X0 - X14 provided by the VMM (preserved) */
1162 		for (i = 0; i <= 14; ++i) {
1163 			u64 val = vcpu_get_reg(host_vcpu, i);
1164 
1165 			vcpu_set_reg(&hyp_vcpu->vcpu, i, val);
1166 		}
1167 
1168 		/* X15: Boot protocol version */
1169 		vcpu_set_reg(&hyp_vcpu->vcpu, 15, 0);
1170 
1171 		/* PC: IPA of pvmfw base */
1172 		*vcpu_pc(&hyp_vcpu->vcpu) = entry;
1173 		hyp_vm->pvmfw_entry_vcpu = NULL;
1174 
1175 		/* Auto enroll MMIO guard */
1176 		set_bit(KVM_ARCH_FLAG_MMIO_GUARD, &hyp_vm->kvm.arch.flags);
1177 	}
1178 
1179 	if (pkvm_hyp_vcpu_is_protected(hyp_vcpu) && vcpu_has_sve(vcpu))
1180 		memset(vcpu->arch.sve_state, 0, vcpu_sve_state_size(vcpu));
1181 
1182 	reset_state->reset = false;
1183 
1184 	hyp_vcpu->exit_code = 0;
1185 
1186 	WARN_ON(hyp_vcpu->power_state != PSCI_0_2_AFFINITY_LEVEL_ON_PENDING);
1187 	WRITE_ONCE(hyp_vcpu->power_state, PSCI_0_2_AFFINITY_LEVEL_ON);
1188 }
1189 
pkvm_hyp_req_reserve(struct pkvm_hyp_vcpu * hyp_vcpu,u8 type)1190 struct kvm_hyp_req *pkvm_hyp_req_reserve(struct pkvm_hyp_vcpu *hyp_vcpu, u8 type)
1191 {
1192 	struct kvm_hyp_req *next, *hyp_req = hyp_vcpu->vcpu.arch.hyp_reqs;
1193 	int i;
1194 
1195 	for (i = 0; i < KVM_HYP_REQ_MAX; i++) {
1196 		if (hyp_req->type == KVM_HYP_LAST_REQ)
1197 			break;
1198 		hyp_req++;
1199 	}
1200 
1201 	/* The last entry of the page _must_ be a LAST_REQ */
1202 	WARN_ON(i >= KVM_HYP_REQ_MAX);
1203 
1204 	/* We need at least one empty slot to write LAST_REQ */
1205 	if (i + 1 >= KVM_HYP_REQ_MAX)
1206 		return NULL;
1207 
1208 	hyp_req->type = type;
1209 
1210 	next = hyp_req + 1;
1211 	next->type = KVM_HYP_LAST_REQ;
1212 
1213 	return hyp_req;
1214 }
1215 
pkvm_mpidr_to_hyp_vcpu(struct pkvm_hyp_vm * hyp_vm,u64 mpidr)1216 struct pkvm_hyp_vcpu *pkvm_mpidr_to_hyp_vcpu(struct pkvm_hyp_vm *hyp_vm,
1217 					     u64 mpidr)
1218 {
1219 	struct pkvm_hyp_vcpu *hyp_vcpu;
1220 	int i;
1221 
1222 	mpidr &= MPIDR_HWID_BITMASK;
1223 
1224 	hyp_spin_lock(&hyp_vm->vcpus_lock);
1225 	for (i = 0; i < hyp_vm->kvm.created_vcpus; i++) {
1226 		hyp_vcpu = hyp_vm->vcpus[i];
1227 		if (!hyp_vcpu)
1228 			continue;
1229 
1230 		if (mpidr == kvm_vcpu_get_mpidr_aff(&hyp_vcpu->vcpu))
1231 			goto unlock;
1232 	}
1233 	hyp_vcpu = NULL;
1234 unlock:
1235 	hyp_spin_unlock(&hyp_vm->vcpus_lock);
1236 	return hyp_vcpu;
1237 }
1238 
1239 /*
1240  * Returns true if the hypervisor has handled the PSCI call, and control should
1241  * go back to the guest, or false if the host needs to do some additional work
1242  * (i.e., wake up the vcpu).
1243  */
pvm_psci_vcpu_on(struct pkvm_hyp_vcpu * hyp_vcpu)1244 static bool pvm_psci_vcpu_on(struct pkvm_hyp_vcpu *hyp_vcpu)
1245 {
1246 	struct pkvm_hyp_vm *hyp_vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
1247 	struct vcpu_reset_state *reset_state;
1248 	struct pkvm_hyp_vcpu *target;
1249 	unsigned long cpu_id, ret;
1250 	int power_state;
1251 
1252 	cpu_id = smccc_get_arg1(&hyp_vcpu->vcpu);
1253 	if (!kvm_psci_valid_affinity(&hyp_vcpu->vcpu, cpu_id)) {
1254 		ret = PSCI_RET_INVALID_PARAMS;
1255 		goto error;
1256 	}
1257 
1258 	target = pkvm_mpidr_to_hyp_vcpu(hyp_vm, cpu_id);
1259 	if (!target) {
1260 		ret = PSCI_RET_INVALID_PARAMS;
1261 		goto error;
1262 	}
1263 
1264 	/*
1265 	 * Make sure the requested vcpu is not on to begin with.
1266 	 * Atomic to avoid race between vcpus trying to power on the same vcpu.
1267 	 */
1268 	power_state = cmpxchg(&target->power_state,
1269 			      PSCI_0_2_AFFINITY_LEVEL_OFF,
1270 			      PSCI_0_2_AFFINITY_LEVEL_ON_PENDING);
1271 	switch (power_state) {
1272 	case PSCI_0_2_AFFINITY_LEVEL_ON_PENDING:
1273 		ret = PSCI_RET_ON_PENDING;
1274 		goto error;
1275 	case PSCI_0_2_AFFINITY_LEVEL_ON:
1276 		ret = PSCI_RET_ALREADY_ON;
1277 		goto error;
1278 	case PSCI_0_2_AFFINITY_LEVEL_OFF:
1279 		break;
1280 	default:
1281 		ret = PSCI_RET_INTERNAL_FAILURE;
1282 		goto error;
1283 	}
1284 
1285 	reset_state = &target->vcpu.arch.reset_state;
1286 	reset_state->pc = smccc_get_arg2(&hyp_vcpu->vcpu);
1287 	reset_state->r0 = smccc_get_arg3(&hyp_vcpu->vcpu);
1288 	/* Propagate caller endianness */
1289 	reset_state->be = kvm_vcpu_is_be(&hyp_vcpu->vcpu);
1290 	reset_state->reset = true;
1291 
1292 	/*
1293 	 * Return to the host, which should make the KVM_REQ_VCPU_RESET request
1294 	 * as well as kvm_vcpu_wake_up() to schedule the vcpu.
1295 	 */
1296 	return false;
1297 
1298 error:
1299 	/* If there's an error go back straight to the guest. */
1300 	smccc_set_retval(&hyp_vcpu->vcpu, ret, 0, 0, 0);
1301 	return true;
1302 }
1303 
pvm_psci_vcpu_affinity_info(struct pkvm_hyp_vcpu * hyp_vcpu)1304 static bool pvm_psci_vcpu_affinity_info(struct pkvm_hyp_vcpu *hyp_vcpu)
1305 {
1306 	unsigned long target_affinity_mask, target_affinity, lowest_affinity_level;
1307 	struct pkvm_hyp_vm *hyp_vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
1308 	struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu;
1309 	unsigned long mpidr, ret;
1310 	int i, matching_cpus = 0;
1311 
1312 	target_affinity = smccc_get_arg1(vcpu);
1313 	lowest_affinity_level = smccc_get_arg2(vcpu);
1314 	if (!kvm_psci_valid_affinity(vcpu, target_affinity)) {
1315 		ret = PSCI_RET_INVALID_PARAMS;
1316 		goto done;
1317 	}
1318 
1319 	/* Determine target affinity mask */
1320 	target_affinity_mask = psci_affinity_mask(lowest_affinity_level);
1321 	if (!target_affinity_mask) {
1322 		ret = PSCI_RET_INVALID_PARAMS;
1323 		goto done;
1324 	}
1325 
1326 	/* Ignore other bits of target affinity */
1327 	target_affinity &= target_affinity_mask;
1328 	ret = PSCI_0_2_AFFINITY_LEVEL_OFF;
1329 
1330 	/*
1331 	 * If at least one vcpu matching target affinity is ON then return ON,
1332 	 * then if at least one is PENDING_ON then return PENDING_ON.
1333 	 * Otherwise, return OFF.
1334 	 */
1335 	hyp_spin_lock(&hyp_vm->vcpus_lock);
1336 	for (i = 0; i < hyp_vm->kvm.created_vcpus; i++) {
1337 		struct pkvm_hyp_vcpu *target = hyp_vm->vcpus[i];
1338 
1339 		if (!target)
1340 			continue;
1341 
1342 		mpidr = kvm_vcpu_get_mpidr_aff(&target->vcpu);
1343 
1344 		if ((mpidr & target_affinity_mask) == target_affinity) {
1345 			int power_state;
1346 
1347 			matching_cpus++;
1348 			power_state = READ_ONCE(target->power_state);
1349 			switch (power_state) {
1350 			case PSCI_0_2_AFFINITY_LEVEL_ON_PENDING:
1351 				ret = PSCI_0_2_AFFINITY_LEVEL_ON_PENDING;
1352 				break;
1353 			case PSCI_0_2_AFFINITY_LEVEL_ON:
1354 				ret = PSCI_0_2_AFFINITY_LEVEL_ON;
1355 				goto unlock;
1356 			case PSCI_0_2_AFFINITY_LEVEL_OFF:
1357 				break;
1358 			default:
1359 				ret = PSCI_RET_INTERNAL_FAILURE;
1360 				goto unlock;
1361 			}
1362 		}
1363 	}
1364 
1365 	if (!matching_cpus)
1366 		ret = PSCI_RET_INVALID_PARAMS;
1367 unlock:
1368 	hyp_spin_unlock(&hyp_vm->vcpus_lock);
1369 done:
1370 	/* Nothing to be handled by the host. Go back to the guest. */
1371 	smccc_set_retval(vcpu, ret, 0, 0, 0);
1372 	return true;
1373 }
1374 
1375 /*
1376  * Returns true if the hypervisor has handled the PSCI call, and control should
1377  * go back to the guest, or false if the host needs to do some additional work
1378  * (e.g., turn off and update vcpu scheduling status).
1379  */
pvm_psci_vcpu_off(struct pkvm_hyp_vcpu * hyp_vcpu)1380 static bool pvm_psci_vcpu_off(struct pkvm_hyp_vcpu *hyp_vcpu)
1381 {
1382 	WARN_ON(hyp_vcpu->power_state != PSCI_0_2_AFFINITY_LEVEL_ON);
1383 	WRITE_ONCE(hyp_vcpu->power_state, PSCI_0_2_AFFINITY_LEVEL_OFF);
1384 
1385 	/* Return to the host so that it can finish powering off the vcpu. */
1386 	return false;
1387 }
1388 
pvm_psci_version(struct pkvm_hyp_vcpu * hyp_vcpu)1389 static bool pvm_psci_version(struct pkvm_hyp_vcpu *hyp_vcpu)
1390 {
1391 	/* Nothing to be handled by the host. Go back to the guest. */
1392 	smccc_set_retval(&hyp_vcpu->vcpu, KVM_ARM_PSCI_1_1, 0, 0, 0);
1393 	return true;
1394 }
1395 
pvm_psci_not_supported(struct pkvm_hyp_vcpu * hyp_vcpu)1396 static bool pvm_psci_not_supported(struct pkvm_hyp_vcpu *hyp_vcpu)
1397 {
1398 	/* Nothing to be handled by the host. Go back to the guest. */
1399 	smccc_set_retval(&hyp_vcpu->vcpu, PSCI_RET_NOT_SUPPORTED, 0, 0, 0);
1400 	return true;
1401 }
1402 
pvm_psci_features(struct pkvm_hyp_vcpu * hyp_vcpu)1403 static bool pvm_psci_features(struct pkvm_hyp_vcpu *hyp_vcpu)
1404 {
1405 	struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu;
1406 	u32 feature = smccc_get_arg1(vcpu);
1407 	unsigned long val;
1408 
1409 	switch (feature) {
1410 	case PSCI_0_2_FN_PSCI_VERSION:
1411 	case PSCI_0_2_FN_CPU_SUSPEND:
1412 	case PSCI_0_2_FN64_CPU_SUSPEND:
1413 	case PSCI_0_2_FN_CPU_OFF:
1414 	case PSCI_0_2_FN_CPU_ON:
1415 	case PSCI_0_2_FN64_CPU_ON:
1416 	case PSCI_0_2_FN_AFFINITY_INFO:
1417 	case PSCI_0_2_FN64_AFFINITY_INFO:
1418 	case PSCI_0_2_FN_SYSTEM_OFF:
1419 	case PSCI_0_2_FN_SYSTEM_RESET:
1420 	case PSCI_1_0_FN_PSCI_FEATURES:
1421 	case PSCI_1_1_FN_SYSTEM_RESET2:
1422 	case PSCI_1_1_FN64_SYSTEM_RESET2:
1423 	case ARM_SMCCC_VERSION_FUNC_ID:
1424 		val = PSCI_RET_SUCCESS;
1425 		break;
1426 	default:
1427 		val = PSCI_RET_NOT_SUPPORTED;
1428 		break;
1429 	}
1430 
1431 	/* Nothing to be handled by the host. Go back to the guest. */
1432 	smccc_set_retval(vcpu, val, 0, 0, 0);
1433 	return true;
1434 }
1435 
pkvm_handle_psci(struct pkvm_hyp_vcpu * hyp_vcpu)1436 static bool pkvm_handle_psci(struct pkvm_hyp_vcpu *hyp_vcpu)
1437 {
1438 	struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu;
1439 	u32 psci_fn = smccc_get_function(vcpu);
1440 
1441 	switch (psci_fn) {
1442 	case PSCI_0_2_FN_CPU_ON:
1443 		kvm_psci_narrow_to_32bit(vcpu);
1444 		fallthrough;
1445 	case PSCI_0_2_FN64_CPU_ON:
1446 		return pvm_psci_vcpu_on(hyp_vcpu);
1447 	case PSCI_0_2_FN_CPU_OFF:
1448 		return pvm_psci_vcpu_off(hyp_vcpu);
1449 	case PSCI_0_2_FN_AFFINITY_INFO:
1450 		kvm_psci_narrow_to_32bit(vcpu);
1451 		fallthrough;
1452 	case PSCI_0_2_FN64_AFFINITY_INFO:
1453 		return pvm_psci_vcpu_affinity_info(hyp_vcpu);
1454 	case PSCI_0_2_FN_PSCI_VERSION:
1455 		return pvm_psci_version(hyp_vcpu);
1456 	case PSCI_1_0_FN_PSCI_FEATURES:
1457 		return pvm_psci_features(hyp_vcpu);
1458 	case PSCI_0_2_FN_SYSTEM_RESET:
1459 	case PSCI_0_2_FN_CPU_SUSPEND:
1460 	case PSCI_0_2_FN64_CPU_SUSPEND:
1461 	case PSCI_0_2_FN_SYSTEM_OFF:
1462 	case PSCI_1_1_FN_SYSTEM_RESET2:
1463 	case PSCI_1_1_FN64_SYSTEM_RESET2:
1464 		return false; /* Handled by the host. */
1465 	default:
1466 		break;
1467 	}
1468 
1469 	return pvm_psci_not_supported(hyp_vcpu);
1470 }
1471 
pkvm_handle_empty_memcache(struct pkvm_hyp_vcpu * hyp_vcpu,u64 * exit_code)1472 int pkvm_handle_empty_memcache(struct pkvm_hyp_vcpu *hyp_vcpu, u64 *exit_code)
1473 {
1474 	struct kvm_hyp_req *req;
1475 
1476 	req = pkvm_hyp_req_reserve(hyp_vcpu, KVM_HYP_REQ_TYPE_MEM);
1477 	if (!req)
1478 		return -ENOMEM;
1479 
1480 	req->mem.dest = REQ_MEM_DEST_VCPU_MEMCACHE;
1481 	req->mem.nr_pages = kvm_mmu_cache_min_pages(&hyp_vcpu->vcpu.kvm->arch.mmu);
1482 
1483 	write_sysreg_el2(read_sysreg_el2(SYS_ELR) - 4, SYS_ELR);
1484 
1485 	*exit_code = ARM_EXCEPTION_HYP_REQ;
1486 
1487 	return 0;
1488 }
1489 
pkvm_memshare_call(struct pkvm_hyp_vcpu * hyp_vcpu,u64 * exit_code)1490 static bool pkvm_memshare_call(struct pkvm_hyp_vcpu *hyp_vcpu, u64 *exit_code)
1491 {
1492 	struct pkvm_hyp_vm *hyp_vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
1493 	struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu;
1494 	u64 ipa = smccc_get_arg1(vcpu);
1495 	u64 nr_pages = smccc_get_arg2(vcpu);
1496 	u64 arg3 = smccc_get_arg3(vcpu);
1497 	struct kvm_hyp_req *req;
1498 	u64 nr_shared;
1499 	int err;
1500 
1501 	/* Legacy guests have arg2 set to 0 */
1502 	if (nr_pages == 0)
1503 		nr_pages = 1;
1504 
1505 	if (arg3 || !PAGE_ALIGNED(ipa))
1506 		goto out_guest_err;
1507 
1508 	err = __pkvm_guest_share_host(hyp_vcpu, ipa, nr_pages, &nr_shared);
1509 	switch (err) {
1510 	case 0:
1511 		atomic64_add(nr_shared * PAGE_SIZE,
1512 			     &hyp_vm->host_kvm->stat.protected_shared_mem);
1513 		smccc_set_retval(vcpu, SMCCC_RET_SUCCESS, nr_shared, 0, 0);
1514 
1515 		return true;
1516 	case -EFAULT:
1517 		req = pkvm_hyp_req_reserve(hyp_vcpu, KVM_HYP_REQ_TYPE_MAP);
1518 		if (!req)
1519 			goto out_guest_err;
1520 
1521 		req->map.guest_ipa = ipa;
1522 		req->map.size = nr_pages << PAGE_SHIFT;
1523 
1524 		/*
1525 		 * We're about to go back to the host... let's not waste time
1526 		 * and check for the memcache while at it.
1527 		 */
1528 		fallthrough;
1529 	case -ENOMEM:
1530 		if (pkvm_handle_empty_memcache(hyp_vcpu, exit_code))
1531 			goto out_guest_err;
1532 
1533 		goto out_host;
1534 	}
1535 
1536 out_guest_err:
1537 	smccc_set_retval(vcpu, SMCCC_RET_INVALID_PARAMETER, 0, 0, 0);
1538 	return true;
1539 
1540 out_host:
1541 	return false;
1542 }
1543 
pkvm_memunshare_call(struct pkvm_hyp_vcpu * hyp_vcpu)1544 static bool pkvm_memunshare_call(struct pkvm_hyp_vcpu *hyp_vcpu)
1545 {
1546 	struct pkvm_hyp_vm *hyp_vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
1547 	struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu;
1548 	u64 ipa = smccc_get_arg1(vcpu);
1549 	u64 nr_pages = smccc_get_arg2(vcpu);
1550 	u64 arg3 = smccc_get_arg3(vcpu);
1551 	u64 nr_unshared;
1552 	int err;
1553 
1554 	/* Legacy guests have arg2 set to 0 */
1555 	if (nr_pages == 0)
1556 		nr_pages = 1;
1557 
1558 	if (arg3 || !PAGE_ALIGNED(ipa))
1559 		goto out_guest_err;
1560 
1561 	err = __pkvm_guest_unshare_host(hyp_vcpu, ipa, nr_pages, &nr_unshared);
1562 	if (err)
1563 		goto out_guest_err;
1564 
1565 	atomic64_add(nr_unshared * PAGE_SIZE,
1566 		     &hyp_vm->host_kvm->stat.protected_shared_mem);
1567 	smccc_set_retval(vcpu, SMCCC_RET_SUCCESS, nr_unshared, 0, 0);
1568 	return true;
1569 
1570 out_guest_err:
1571 	smccc_set_retval(vcpu, SMCCC_RET_INVALID_PARAMETER, 0, 0, 0);
1572 	return true;
1573 }
1574 
pkvm_install_ioguard_page(struct pkvm_hyp_vcpu * hyp_vcpu,u64 * exit_code)1575 static bool pkvm_install_ioguard_page(struct pkvm_hyp_vcpu *hyp_vcpu,
1576 				      u64 *exit_code)
1577 {
1578 	u64 ipa = smccc_get_arg1(&hyp_vcpu->vcpu);
1579 	u64 nr_pages = smccc_get_arg2(&hyp_vcpu->vcpu);
1580 	u32 fn = smccc_get_function(&hyp_vcpu->vcpu);
1581 	u64 retval = SMCCC_RET_SUCCESS;
1582 	u64 nr_guarded = 0;
1583 	int ret = -EINVAL;
1584 
1585 	/* Legacy non-range version, arg2|arg3 might be garbage */
1586 	if (fn == ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_MAP_FUNC_ID)
1587 		nr_pages = 1;
1588 	else if (smccc_get_arg3(&hyp_vcpu->vcpu))
1589 		goto out_guest_err;
1590 
1591 	ret = __pkvm_install_ioguard_page(hyp_vcpu, ipa, nr_pages, &nr_guarded);
1592 	if (ret == -ENOMEM && !pkvm_handle_empty_memcache(hyp_vcpu, exit_code))
1593 		return false;
1594 
1595 out_guest_err:
1596 	if (ret)
1597 		retval = SMCCC_RET_INVALID_PARAMETER;
1598 
1599 	smccc_set_retval(&hyp_vcpu->vcpu, retval, nr_guarded, 0, 0);
1600 	return true;
1601 }
1602 
pkvm_remove_ioguard_page(struct pkvm_hyp_vcpu * hyp_vcpu,u64 * exit_code)1603 static bool pkvm_remove_ioguard_page(struct pkvm_hyp_vcpu *hyp_vcpu,
1604 				     u64 *exit_code)
1605 {
1606 	struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
1607 	u64 nr_pages = smccc_get_arg2(&hyp_vcpu->vcpu);
1608 	u32 fn = smccc_get_function(&hyp_vcpu->vcpu);
1609 	u64 retval = SMCCC_RET_INVALID_PARAMETER;
1610 
1611 	/* Legacy non-range version, arg2|arg3 might be garbage */
1612 	if (fn == ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_UNMAP_FUNC_ID)
1613 		nr_pages = 1;
1614 	else if (smccc_get_arg3(&hyp_vcpu->vcpu))
1615 		goto out_guest_err;
1616 
1617 	if (!test_bit(KVM_ARCH_FLAG_MMIO_GUARD, &vm->kvm.arch.flags))
1618 		goto out_guest_err;
1619 
1620 	/*
1621 	 * Before 6.12 guests, unmap HVCs could be issued. However this operation
1622 	 * is not necessary:
1623 	 *   - ioguard is only there to let the hypervisor know where are the
1624 	 *   MMIO regions.
1625 	 *   - MMIO_GUARD_MAP will not fail on multiple calls for the same
1626 	 *   region.
1627 	 *
1628 	 * Keep the HVCs for compatibility reason, but do not do anything.
1629 	 */
1630 	retval = SMCCC_RET_SUCCESS;
1631 
1632 out_guest_err:
1633 	smccc_set_retval(&hyp_vcpu->vcpu, retval, nr_pages, 0, 0);
1634 	return true;
1635 }
1636 
pkvm_meminfo_call(struct pkvm_hyp_vcpu * hyp_vcpu)1637 static bool pkvm_meminfo_call(struct pkvm_hyp_vcpu *hyp_vcpu)
1638 {
1639 	struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu;
1640 	u64 arg1 = smccc_get_arg1(vcpu);
1641 	u64 arg2 = smccc_get_arg2(vcpu);
1642 	u64 arg3 = smccc_get_arg3(vcpu);
1643 
1644 	if (arg1 || arg2 || arg3)
1645 		goto out_guest_err;
1646 
1647 	smccc_set_retval(vcpu, PAGE_SIZE, KVM_FUNC_HAS_RANGE, 0, 0);
1648 	return true;
1649 
1650 out_guest_err:
1651 	smccc_set_retval(vcpu, SMCCC_RET_INVALID_PARAMETER, 0, 0, 0);
1652 	return true;
1653 }
1654 
pkvm_memrelinquish_call(struct pkvm_hyp_vcpu * hyp_vcpu,u64 * exit_code)1655 static bool pkvm_memrelinquish_call(struct pkvm_hyp_vcpu *hyp_vcpu,
1656 				    u64 *exit_code)
1657 {
1658 	struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu;
1659 	u64 ipa = smccc_get_arg1(vcpu);
1660 	u64 arg2 = smccc_get_arg2(vcpu);
1661 	u64 arg3 = smccc_get_arg3(vcpu);
1662 	u64 pa = 0;
1663 	int ret;
1664 
1665 	if (arg2 || arg3)
1666 		goto out_guest_err;
1667 
1668 	ret = __pkvm_guest_relinquish_to_host(hyp_vcpu, ipa, &pa);
1669 	if (ret == -E2BIG) {
1670 		struct kvm_hyp_req *req = pkvm_hyp_req_reserve(hyp_vcpu, KVM_HYP_REQ_TYPE_SPLIT);
1671 
1672 		if (!req) {
1673 			ret = -ENOMEM;
1674 			goto out_guest_err;
1675 		}
1676 
1677 		req->split.guest_ipa = ALIGN_DOWN(ipa, PMD_SIZE);
1678 		req->split.size = PMD_SIZE;
1679 
1680 		write_sysreg_el2(read_sysreg_el2(SYS_ELR) - 4, SYS_ELR);
1681 		*exit_code = ARM_EXCEPTION_HYP_REQ;
1682 
1683 		return false;
1684 	} else if (ret) {
1685 		goto out_guest_err;
1686 	}
1687 
1688 	if (pa != 0) {
1689 		/* Now pass to host. */
1690 		return false;
1691 	}
1692 
1693 	/* This was a NOP as no page was actually mapped at the IPA. */
1694 	smccc_set_retval(vcpu, 0, 0, 0, 0);
1695 	return true;
1696 
1697 out_guest_err:
1698 	smccc_set_retval(vcpu, SMCCC_RET_INVALID_PARAMETER, 0, 0, 0);
1699 	return true;
1700 }
1701 
1702 bool smccc_trng_available;
1703 
pkvm_forward_trng(struct kvm_vcpu * vcpu)1704 static bool pkvm_forward_trng(struct kvm_vcpu *vcpu)
1705 {
1706 	u32 fn = smccc_get_function(vcpu);
1707 	struct arm_smccc_res res;
1708 	unsigned long arg1 = 0;
1709 
1710 	/*
1711 	 * Forward TRNG calls to EL3, as we can't trust the host to handle
1712 	 * these for us.
1713 	 */
1714 	switch (fn) {
1715 	case ARM_SMCCC_TRNG_FEATURES:
1716 	case ARM_SMCCC_TRNG_RND32:
1717 	case ARM_SMCCC_TRNG_RND64:
1718 		arg1 = smccc_get_arg1(vcpu);
1719 		fallthrough;
1720 	case ARM_SMCCC_TRNG_VERSION:
1721 	case ARM_SMCCC_TRNG_GET_UUID:
1722 		arm_smccc_1_1_smc(fn, arg1, &res);
1723 		smccc_set_retval(vcpu, res.a0, res.a1, res.a2, res.a3);
1724 		memzero_explicit(&res, sizeof(res));
1725 		break;
1726 	}
1727 
1728 	return true;
1729 }
1730 
1731 #define ARM_SMCCC_TRNG_VER_1_0			(1ULL << 16 | 0ULL)
1732 #define ARM_SMCCC_TRNG_INVALID_PARAMETERS	ULL(-2)
1733 #define ARM_SMCCC_TRNG_SMC64_BITS		192
1734 
module_handle_guest_trng_rng(struct kvm_vcpu * vcpu)1735 static bool module_handle_guest_trng_rng(struct kvm_vcpu *vcpu)
1736 {
1737 	u64 ret;
1738 	u64 entropy[DIV_ROUND_UP(ARM_SMCCC_TRNG_SMC64_BITS, 64)];
1739 	u64 nbits;
1740 
1741 	nbits = smccc_get_arg1(vcpu);
1742 	if (nbits == 0 || nbits > ARM_SMCCC_TRNG_SMC64_BITS) {
1743 		ret = ARM_SMCCC_TRNG_INVALID_PARAMETERS;
1744 		goto err;
1745 	}
1746 
1747 	memset(entropy, 0, sizeof(entropy));
1748 
1749 	ret = module_get_guest_trng_rng(entropy, nbits);
1750 	if (ret == SMCCC_RET_SUCCESS) {
1751 		smccc_set_retval(vcpu, SMCCC_RET_SUCCESS, entropy[2],
1752 				 entropy[1], entropy[0]);
1753 		return true;
1754 	}
1755 
1756 err:
1757 	smccc_set_retval(vcpu, ret, 0, 0, 0);
1758 	return true;
1759 }
1760 
module_handle_guest_trng(struct kvm_vcpu * vcpu)1761 static bool module_handle_guest_trng(struct kvm_vcpu *vcpu)
1762 {
1763 	u32 fn;
1764 	u64 ret = SMCCC_RET_NOT_SUPPORTED;
1765 	const uuid_t *uuid;
1766 
1767 	fn = smccc_get_function(vcpu);
1768 	uuid = module_get_guest_trng_uuid();
1769 	if (!uuid)
1770 		return false;
1771 
1772 	switch (fn) {
1773 	case ARM_SMCCC_TRNG_VERSION:
1774 		ret = ARM_SMCCC_TRNG_VER_1_0;
1775 		break;
1776 	case ARM_SMCCC_TRNG_FEATURES:
1777 		switch (smccc_get_arg1(vcpu)) {
1778 		case ARM_SMCCC_TRNG_VERSION:
1779 		case ARM_SMCCC_TRNG_FEATURES:
1780 		case ARM_SMCCC_TRNG_GET_UUID:
1781 		case ARM_SMCCC_TRNG_RND64:
1782 			ret = SMCCC_RET_SUCCESS;
1783 			break;
1784 		}
1785 		break;
1786 	case ARM_SMCCC_TRNG_GET_UUID:
1787 		smccc_set_retval(vcpu, le32_to_cpu(((u32 *)uuid->b)[0]),
1788 				 le32_to_cpu(((u32 *)uuid->b)[1]),
1789 				 le32_to_cpu(((u32 *)uuid->b)[2]),
1790 				 le32_to_cpu(((u32 *)uuid->b)[3]));
1791 		return true;
1792 	case ARM_SMCCC_TRNG_RND64:
1793 		return module_handle_guest_trng_rng(vcpu);
1794 	default:
1795 		return false;
1796 	}
1797 
1798 	smccc_set_retval(vcpu, ret, 0, 0, 0);
1799 	return true;
1800 }
1801 
1802 
is_standard_secure_service_call(u64 func_id)1803 static bool is_standard_secure_service_call(u64 func_id)
1804 {
1805 	return (func_id >= PSCI_0_2_FN_BASE && func_id <= ARM_CCA_FUNC_END) ||
1806 	       (func_id >= PSCI_0_2_FN64_BASE && func_id <= ARM_CCA_64BIT_FUNC_END);
1807 }
1808 
kvm_handle_pvm_smc64(struct kvm_vcpu * vcpu,u64 * exit_code)1809 bool kvm_handle_pvm_smc64(struct kvm_vcpu *vcpu, u64 *exit_code)
1810 {
1811 	bool handled = false;
1812 	struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt;
1813 	struct pkvm_hyp_vm *vm;
1814 	struct pkvm_hyp_vcpu *hyp_vcpu;
1815 	struct arm_smccc_1_2_regs regs;
1816 	struct arm_smccc_1_2_regs res;
1817 	DECLARE_REG(u64, func_id, ctxt, 0);
1818 
1819 	hyp_vcpu = container_of(vcpu, struct pkvm_hyp_vcpu, vcpu);
1820 	vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
1821 
1822 	if (is_standard_secure_service_call(func_id))
1823 		return false;
1824 
1825 	if (!vm->kvm.arch.pkvm.smc_forwarded)
1826 		return false;
1827 
1828 	memcpy(&regs, &ctxt->regs, sizeof(regs));
1829 	handled = module_handle_guest_smc(&regs, &res, vm->kvm.arch.pkvm.handle);
1830 	if (handled)
1831 		memcpy(&ctxt->regs.regs[0], &res, sizeof(res));
1832 	else
1833 		ctxt->regs.regs[0] = -1;
1834 
1835 	__kvm_skip_instr(vcpu);
1836 
1837 	return handled;
1838 }
1839 
1840 /*
1841  * Handler for protected VM HVC calls.
1842  *
1843  * Returns true if the hypervisor has handled the exit, and control should go
1844  * back to the guest, or false if it hasn't.
1845  */
kvm_handle_pvm_hvc64(struct kvm_vcpu * vcpu,u64 * exit_code)1846 bool kvm_handle_pvm_hvc64(struct kvm_vcpu *vcpu, u64 *exit_code)
1847 {
1848 	u64 val[4] = { SMCCC_RET_NOT_SUPPORTED };
1849 	u32 fn = smccc_get_function(vcpu);
1850 	struct pkvm_hyp_vcpu *hyp_vcpu;
1851 
1852 	hyp_vcpu = container_of(vcpu, struct pkvm_hyp_vcpu, vcpu);
1853 
1854 	switch (fn) {
1855 	case ARM_SMCCC_VERSION_FUNC_ID:
1856 		/* Nothing to be handled by the host. Go back to the guest. */
1857 		val[0] = ARM_SMCCC_VERSION_1_2;
1858 		break;
1859 	case ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID:
1860 		val[0] = ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0;
1861 		val[1] = ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1;
1862 		val[2] = ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_2;
1863 		val[3] = ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3;
1864 		break;
1865 	case ARM_SMCCC_VENDOR_HYP_KVM_FEATURES_FUNC_ID:
1866 		val[0] = BIT(ARM_SMCCC_KVM_FUNC_FEATURES);
1867 		val[0] |= BIT(ARM_SMCCC_KVM_FUNC_HYP_MEMINFO);
1868 		val[0] |= BIT(ARM_SMCCC_KVM_FUNC_MEM_SHARE);
1869 		val[0] |= BIT(ARM_SMCCC_KVM_FUNC_MEM_UNSHARE);
1870 		val[0] |= BIT(ARM_SMCCC_KVM_FUNC_MMIO_GUARD_INFO);
1871 		val[0] |= BIT(ARM_SMCCC_KVM_FUNC_MMIO_GUARD_ENROLL);
1872 		val[0] |= BIT(ARM_SMCCC_KVM_FUNC_MMIO_GUARD_MAP);
1873 		val[0] |= BIT(ARM_SMCCC_KVM_FUNC_MMIO_GUARD_UNMAP);
1874 		val[0] |= BIT(ARM_SMCCC_KVM_FUNC_MMIO_RGUARD_MAP);
1875 		val[0] |= BIT(ARM_SMCCC_KVM_FUNC_MMIO_RGUARD_UNMAP);
1876 		val[0] |= BIT(ARM_SMCCC_KVM_FUNC_MEM_RELINQUISH);
1877 		break;
1878 	case ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_ENROLL_FUNC_ID:
1879 		set_bit(KVM_ARCH_FLAG_MMIO_GUARD, &vcpu->kvm->arch.flags);
1880 		val[0] = SMCCC_RET_SUCCESS;
1881 		break;
1882 	case ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_MAP_FUNC_ID:
1883 	case ARM_SMCCC_VENDOR_HYP_KVM_MMIO_RGUARD_MAP_FUNC_ID:
1884 		return pkvm_install_ioguard_page(hyp_vcpu, exit_code);
1885 	case ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_UNMAP_FUNC_ID:
1886 	case ARM_SMCCC_VENDOR_HYP_KVM_MMIO_RGUARD_UNMAP_FUNC_ID:
1887 		return pkvm_remove_ioguard_page(hyp_vcpu, exit_code);
1888 	case ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_INFO_FUNC_ID:
1889 	case ARM_SMCCC_VENDOR_HYP_KVM_HYP_MEMINFO_FUNC_ID:
1890 		return pkvm_meminfo_call(hyp_vcpu);
1891 	case ARM_SMCCC_VENDOR_HYP_KVM_MEM_SHARE_FUNC_ID:
1892 		return pkvm_memshare_call(hyp_vcpu, exit_code);
1893 	case ARM_SMCCC_VENDOR_HYP_KVM_MEM_UNSHARE_FUNC_ID:
1894 		return pkvm_memunshare_call(hyp_vcpu);
1895 	case ARM_SMCCC_VENDOR_HYP_KVM_MEM_RELINQUISH_FUNC_ID:
1896 		return pkvm_memrelinquish_call(hyp_vcpu, exit_code);
1897 	case ARM_SMCCC_TRNG_VERSION ... ARM_SMCCC_TRNG_RND32:
1898 	case ARM_SMCCC_TRNG_RND64:
1899 		if (module_handle_guest_trng(vcpu))
1900 			return true;
1901 		if (smccc_trng_available)
1902 			return pkvm_forward_trng(vcpu);
1903 		break;
1904 	case ARM_SMCCC_VENDOR_HYP_KVM_PVIOMMU_OP_FUNC_ID:
1905 		return kvm_handle_pviommu_hvc(vcpu, exit_code);
1906 	case ARM_SMCCC_VENDOR_HYP_KVM_DEV_REQ_MMIO_FUNC_ID:
1907 		return pkvm_device_request_mmio(hyp_vcpu, exit_code);
1908 	case ARM_SMCCC_VENDOR_HYP_KVM_DEV_REQ_DMA_FUNC_ID:
1909 		return pkvm_device_request_dma(hyp_vcpu, exit_code);
1910 	default:
1911 		if (is_ffa_call(fn))
1912 			return kvm_guest_ffa_handler(hyp_vcpu, exit_code);
1913 		else
1914 			return pkvm_handle_psci(hyp_vcpu);
1915 	}
1916 
1917 	smccc_set_retval(vcpu, val[0], val[1], val[2], val[3]);
1918 	return true;
1919 }
1920 
vm_handle_to_ffa_handle(pkvm_handle_t vm_handle)1921 u32 vm_handle_to_ffa_handle(pkvm_handle_t vm_handle)
1922 {
1923 	if (!vm_handle)
1924 		return HOST_FFA_ID;
1925 	else
1926 		return vm_handle_to_idx(vm_handle) + 1;
1927 }
1928 
hyp_vcpu_to_ffa_handle(struct pkvm_hyp_vcpu * hyp_vcpu)1929 u32 hyp_vcpu_to_ffa_handle(struct pkvm_hyp_vcpu *hyp_vcpu)
1930 {
1931 	pkvm_handle_t vm_handle;
1932 
1933 	if (!hyp_vcpu)
1934 		return HOST_FFA_ID;
1935 
1936 	vm_handle = hyp_vcpu->vcpu.kvm->arch.pkvm.handle;
1937 	return vm_handle_to_ffa_handle(vm_handle);
1938 }
1939