• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2021 Google LLC
4  * Author: Fuad Tabba <tabba@google.com>
5  */
6 
7 #include <asm/kvm_arm.h>
8 #include <asm/kvm_asm.h>
9 #include <asm/kvm_host.h>
10 #include <asm/kvm_mmu.h>
11 #include <asm/memory.h>
12 
13 #include <linux/kvm_host.h>
14 #include <linux/mm.h>
15 
16 #include <kvm/arm_hypercalls.h>
17 #include <kvm/arm_psci.h>
18 
19 #include <nvhe/mem_protect.h>
20 #include <nvhe/mm.h>
21 #include <nvhe/pkvm.h>
22 #include <nvhe/trap_handler.h>
23 
24 /* Used by icache_is_vpipt(). */
25 unsigned long __icache_flags;
26 
27 /*
28  * Set trap register values based on features in ID_AA64PFR0.
29  */
pvm_init_traps_aa64pfr0(struct kvm_vcpu * vcpu)30 static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu)
31 {
32 	const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64PFR0_EL1);
33 	u64 hcr_set = HCR_RW;
34 	u64 hcr_clear = 0;
35 	u64 cptr_set = 0;
36 
37 	/* Protected KVM does not support AArch32 guests. */
38 	BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL0),
39 		PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_ELx_64BIT_ONLY);
40 	BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1),
41 		PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_ELx_64BIT_ONLY);
42 
43 	/*
44 	 * Linux guests assume support for floating-point and Advanced SIMD. Do
45 	 * not change the trapping behavior for these from the KVM default.
46 	 */
47 	BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_FP),
48 				PVM_ID_AA64PFR0_ALLOW));
49 	BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD),
50 				PVM_ID_AA64PFR0_ALLOW));
51 
52 	/* Trap RAS unless all current versions are supported */
53 	if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_RAS), feature_ids) <
54 	    ID_AA64PFR0_RAS_V1P1) {
55 		hcr_set |= HCR_TERR | HCR_TEA;
56 		hcr_clear |= HCR_FIEN;
57 	}
58 
59 	/* Trap AMU */
60 	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_AMU), feature_ids)) {
61 		hcr_clear |= HCR_AMVOFFEN;
62 		cptr_set |= CPTR_EL2_TAM;
63 	}
64 
65 	/* Trap SVE */
66 	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_SVE), feature_ids))
67 		cptr_set |= CPTR_EL2_TZ;
68 
69 	vcpu->arch.hcr_el2 |= hcr_set;
70 	vcpu->arch.hcr_el2 &= ~hcr_clear;
71 	vcpu->arch.cptr_el2 |= cptr_set;
72 }
73 
74 /*
75  * Set trap register values based on features in ID_AA64PFR1.
76  */
pvm_init_traps_aa64pfr1(struct kvm_vcpu * vcpu)77 static void pvm_init_traps_aa64pfr1(struct kvm_vcpu *vcpu)
78 {
79 	const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64PFR1_EL1);
80 	u64 hcr_set = 0;
81 	u64 hcr_clear = 0;
82 
83 	/* Memory Tagging: Trap and Treat as Untagged if not supported. */
84 	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_MTE), feature_ids)) {
85 		hcr_set |= HCR_TID5;
86 		hcr_clear |= HCR_DCT | HCR_ATA;
87 	}
88 
89 	vcpu->arch.hcr_el2 |= hcr_set;
90 	vcpu->arch.hcr_el2 &= ~hcr_clear;
91 }
92 
93 /*
94  * Set trap register values based on features in ID_AA64DFR0.
95  */
pvm_init_traps_aa64dfr0(struct kvm_vcpu * vcpu)96 static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu)
97 {
98 	const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64DFR0_EL1);
99 	u64 mdcr_set = 0;
100 	u64 mdcr_clear = 0;
101 	u64 cptr_set = 0;
102 
103 	/* Trap/constrain PMU */
104 	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_PMUVER), feature_ids)) {
105 		mdcr_set |= MDCR_EL2_TPM | MDCR_EL2_TPMCR;
106 		mdcr_clear |= MDCR_EL2_HPME | MDCR_EL2_MTPME |
107 			      MDCR_EL2_HPMN_MASK;
108 	}
109 
110 	/* Trap Debug */
111 	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER), feature_ids))
112 		mdcr_set |= MDCR_EL2_TDRA | MDCR_EL2_TDA;
113 
114 	/* Trap OS Double Lock */
115 	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DOUBLELOCK), feature_ids))
116 		mdcr_set |= MDCR_EL2_TDOSA;
117 
118 	/* Trap SPE */
119 	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_PMSVER), feature_ids)) {
120 		mdcr_set |= MDCR_EL2_TPMS;
121 		mdcr_clear |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT;
122 	}
123 
124 	/* Trap Trace Filter */
125 	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_TRACE_FILT), feature_ids))
126 		mdcr_set |= MDCR_EL2_TTRF;
127 
128 	/* Trap Trace */
129 	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_TRACEVER), feature_ids))
130 		cptr_set |= CPTR_EL2_TTA;
131 
132 	vcpu->arch.mdcr_el2 |= mdcr_set;
133 	vcpu->arch.mdcr_el2 &= ~mdcr_clear;
134 	vcpu->arch.cptr_el2 |= cptr_set;
135 }
136 
137 /*
138  * Set trap register values based on features in ID_AA64MMFR0.
139  */
pvm_init_traps_aa64mmfr0(struct kvm_vcpu * vcpu)140 static void pvm_init_traps_aa64mmfr0(struct kvm_vcpu *vcpu)
141 {
142 	const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64MMFR0_EL1);
143 	u64 mdcr_set = 0;
144 
145 	/* Trap Debug Communications Channel registers */
146 	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_FGT), feature_ids))
147 		mdcr_set |= MDCR_EL2_TDCC;
148 
149 	vcpu->arch.mdcr_el2 |= mdcr_set;
150 }
151 
152 /*
153  * Set trap register values based on features in ID_AA64MMFR1.
154  */
pvm_init_traps_aa64mmfr1(struct kvm_vcpu * vcpu)155 static void pvm_init_traps_aa64mmfr1(struct kvm_vcpu *vcpu)
156 {
157 	const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64MMFR1_EL1);
158 	u64 hcr_set = 0;
159 
160 	/* Trap LOR */
161 	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_LOR), feature_ids))
162 		hcr_set |= HCR_TLOR;
163 
164 	vcpu->arch.hcr_el2 |= hcr_set;
165 }
166 
167 /*
168  * Set baseline trap register values.
169  */
pvm_init_trap_regs(struct kvm_vcpu * vcpu)170 static void pvm_init_trap_regs(struct kvm_vcpu *vcpu)
171 {
172 	vcpu->arch.cptr_el2 = CPTR_EL2_DEFAULT;
173 	vcpu->arch.mdcr_el2 = 0;
174 
175 	/*
176 	 * Always trap:
177 	 * - Feature id registers: to control features exposed to guests
178 	 * - Implementation-defined features
179 	 */
180 	vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS |
181 			     HCR_TID3 | HCR_TACR | HCR_TIDCP | HCR_TID1;
182 
183 	if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) {
184 		/* route synchronous external abort exceptions to EL2 */
185 		vcpu->arch.hcr_el2 |= HCR_TEA;
186 		/* trap error record accesses */
187 		vcpu->arch.hcr_el2 |= HCR_TERR;
188 	}
189 
190 	if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
191 		vcpu->arch.hcr_el2 |= HCR_FWB;
192 
193 	if (cpus_have_const_cap(ARM64_MISMATCHED_CACHE_TYPE))
194 		vcpu->arch.hcr_el2 |= HCR_TID2;
195 }
196 
197 /*
198  * Initialize trap register values for protected VMs.
199  */
pkvm_vcpu_init_traps(struct kvm_vcpu * vcpu)200 static void pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu)
201 {
202 	pvm_init_trap_regs(vcpu);
203 	pvm_init_traps_aa64pfr0(vcpu);
204 	pvm_init_traps_aa64pfr1(vcpu);
205 	pvm_init_traps_aa64dfr0(vcpu);
206 	pvm_init_traps_aa64mmfr0(vcpu);
207 	pvm_init_traps_aa64mmfr1(vcpu);
208 }
209 
210 /*
211  * Start the shadow table handle at the offset defined instead of at 0.
212  * Mainly for sanity checking and debugging.
213  */
214 #define HANDLE_OFFSET 0x1000
215 
shadow_handle_to_index(int shadow_handle)216 static int shadow_handle_to_index(int shadow_handle)
217 {
218 	return shadow_handle - HANDLE_OFFSET;
219 }
220 
index_to_shadow_handle(int index)221 static int index_to_shadow_handle(int index)
222 {
223 	return index + HANDLE_OFFSET;
224 }
225 
226 extern unsigned long hyp_nr_cpus;
227 
228 /*
229  * Track the vcpu most recently loaded on each physical CPU.
230  */
231 static DEFINE_PER_CPU(struct kvm_vcpu *, last_loaded_vcpu);
232 
233 /*
234  * Spinlock for protecting the shadow table related state.
235  * Protects writes to shadow_table, num_shadow_entries, and next_shadow_alloc,
236  * as well as reads and writes to last_shadow_vcpu_lookup.
237  */
238 static DEFINE_HYP_SPINLOCK(shadow_lock);
239 
240 /*
241  * The table of shadow entries for protected VMs in hyp.
242  * Allocated at hyp initialization and setup.
243  */
244 static struct kvm_shadow_vm **shadow_table;
245 
246 /* Current number of vms in the shadow table. */
247 static int num_shadow_entries;
248 
249 /* The next entry index to try to allocate from. */
250 static int next_shadow_alloc;
251 
hyp_shadow_table_init(void * tbl)252 void hyp_shadow_table_init(void *tbl)
253 {
254 	WARN_ON(shadow_table);
255 	shadow_table = tbl;
256 }
257 
258 /*
259  * Return the shadow vm corresponding to the handle.
260  */
find_shadow_by_handle(int shadow_handle)261 static struct kvm_shadow_vm *find_shadow_by_handle(int shadow_handle)
262 {
263 	int shadow_index = shadow_handle_to_index(shadow_handle);
264 
265 	if (unlikely(shadow_index < 0 || shadow_index >= KVM_MAX_PVMS))
266 		return NULL;
267 
268 	return shadow_table[shadow_index];
269 }
270 
get_shadow_vcpu(int shadow_handle,unsigned int vcpu_idx)271 struct kvm_vcpu *get_shadow_vcpu(int shadow_handle, unsigned int vcpu_idx)
272 {
273 	struct kvm_vcpu *vcpu = NULL;
274 	struct kvm_shadow_vm *vm;
275 	bool flush_context = false;
276 
277 	hyp_spin_lock(&shadow_lock);
278 	vm = find_shadow_by_handle(shadow_handle);
279 	if (!vm || vm->nr_vcpus <= vcpu_idx)
280 		goto unlock;
281 	vcpu = &vm->shadow_vcpus[vcpu_idx]->vcpu;
282 
283 	/* Ensure vcpu isn't loaded on more than one cpu simultaneously. */
284 	if (unlikely(vcpu->arch.pkvm.loaded_on_cpu)) {
285 		vcpu = NULL;
286 		goto unlock;
287 	}
288 
289 	/*
290 	 * Guarantee that both TLBs and I-cache are private to each vcpu.
291 	 * The check below is conservative and could lead to over-invalidation,
292 	 * because there is no need to nuke the contexts if the vcpu belongs to
293 	 * a different vm.
294 	 */
295 	if (vcpu != __this_cpu_read(last_loaded_vcpu)) {
296 		flush_context = true;
297 		__this_cpu_write(last_loaded_vcpu, vcpu);
298 	}
299 
300 	vcpu->arch.pkvm.loaded_on_cpu = true;
301 
302 	hyp_page_ref_inc(hyp_virt_to_page(vm));
303 unlock:
304 	hyp_spin_unlock(&shadow_lock);
305 
306 	/* No need for the lock while flushing the context. */
307 	if (flush_context)
308 		__kvm_flush_cpu_context(vcpu->arch.hw_mmu);
309 
310 	return vcpu;
311 }
312 
put_shadow_vcpu(struct kvm_vcpu * vcpu)313 void put_shadow_vcpu(struct kvm_vcpu *vcpu)
314 {
315 	struct kvm_shadow_vm *vm = vcpu->arch.pkvm.shadow_vm;
316 
317 	hyp_spin_lock(&shadow_lock);
318 	vcpu->arch.pkvm.loaded_on_cpu = false;
319 	hyp_page_ref_dec(hyp_virt_to_page(vm));
320 	hyp_spin_unlock(&shadow_lock);
321 }
322 
323 /* Check and copy the supported features for the vcpu from the host. */
copy_features(struct kvm_vcpu * shadow_vcpu,struct kvm_vcpu * host_vcpu)324 static int copy_features(struct kvm_vcpu *shadow_vcpu, struct kvm_vcpu *host_vcpu)
325 {
326 	DECLARE_BITMAP(allowed_features, KVM_VCPU_MAX_FEATURES);
327 
328 	bitmap_zero(allowed_features, KVM_VCPU_MAX_FEATURES);
329 
330 	/*
331 	 * Always allowed:
332 	 * - CPU starting in poweroff state
333 	 * - PSCI v0.2
334 	 */
335 	set_bit(KVM_ARM_VCPU_POWER_OFF, allowed_features);
336 	set_bit(KVM_ARM_VCPU_PSCI_0_2, allowed_features);
337 
338 	/*
339 	 * Check if remaining features are allowed:
340 	 * - Performance Monitoring
341 	 * - Pointer Authentication
342 	 */
343 	if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_PMUVER), PVM_ID_AA64DFR0_ALLOW))
344 	        set_bit(KVM_ARM_VCPU_PMU_V3, allowed_features);
345 
346 	if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_API), PVM_ID_AA64ISAR1_ALLOW) &&
347 	    FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_APA), PVM_ID_AA64ISAR1_ALLOW))
348 	        set_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, allowed_features);
349 
350 	if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI), PVM_ID_AA64ISAR1_ALLOW) &&
351 	    FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA), PVM_ID_AA64ISAR1_ALLOW))
352 	        set_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, allowed_features);
353 
354 	bitmap_and(shadow_vcpu->arch.features, host_vcpu->arch.features,
355 		allowed_features, KVM_VCPU_MAX_FEATURES);
356 
357 	/*
358 	 * Check for system support for address/generic pointer authentication
359 	 * features if either are enabled.
360 	 */
361 	if ((test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, shadow_vcpu->arch.features) ||
362 	     test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, shadow_vcpu->arch.features)) &&
363 	    !system_has_full_ptr_auth())
364 		return -EINVAL;
365 
366 	return 0;
367 }
368 
unpin_host_vcpu(struct shadow_vcpu_state * shadow_vcpu)369 static void unpin_host_vcpu(struct shadow_vcpu_state *shadow_vcpu)
370 {
371 	struct kvm_vcpu *host_vcpu = shadow_vcpu->vcpu.arch.pkvm.host_vcpu;
372 	size_t sve_state_size;
373 	void *sve_state = shadow_vcpu->vcpu.arch.sve_state;
374 
375 	hyp_unpin_shared_mem(host_vcpu, host_vcpu + 1);
376 
377 	if (!sve_state)
378 		return;
379 
380 	sve_state = kern_hyp_va(sve_state);
381 	sve_state_size = vcpu_sve_state_size(&shadow_vcpu->vcpu);
382 	hyp_unpin_shared_mem(sve_state, sve_state + sve_state_size);
383 }
384 
unpin_host_vcpus(struct shadow_vcpu_state * shadow_vcpus[],int nr_vcpus)385 static void unpin_host_vcpus(struct shadow_vcpu_state *shadow_vcpus[], int nr_vcpus)
386 {
387 	int i;
388 
389 	for (i = 0; i < nr_vcpus; i++)
390 		unpin_host_vcpu(shadow_vcpus[i]);
391 }
392 
init_ptrauth(struct kvm_vcpu * shadow_vcpu)393 static int init_ptrauth(struct kvm_vcpu *shadow_vcpu)
394 {
395 	int ret = 0;
396 	if (test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, shadow_vcpu->arch.features) ||
397 	    test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, shadow_vcpu->arch.features))
398 		ret = kvm_vcpu_enable_ptrauth(shadow_vcpu);
399 	return ret;
400 }
401 
init_shadow_vm(struct kvm * kvm,struct kvm_shadow_vm * vm,int nr_vcpus)402 static void init_shadow_vm(struct kvm *kvm, struct kvm_shadow_vm *vm,
403 			   int nr_vcpus)
404 {
405 	vm->host_kvm = kvm;
406 	vm->created_vcpus = nr_vcpus;
407 	vm->arch.pkvm.pvmfw_load_addr = kvm->arch.pkvm.pvmfw_load_addr;
408 	vm->arch.pkvm.enabled = READ_ONCE(kvm->arch.pkvm.enabled);
409 }
410 
init_shadow_vcpu(struct shadow_vcpu_state * shadow_state,struct kvm_vcpu * host_vcpu,struct kvm_shadow_vm * vm,int vcpu_idx)411 static int init_shadow_vcpu(struct shadow_vcpu_state *shadow_state,
412 			    struct kvm_vcpu *host_vcpu,
413 			    struct kvm_shadow_vm *vm, int vcpu_idx)
414 {
415 	struct kvm_vcpu *shadow_vcpu = &shadow_state->vcpu;
416 	int ret;
417 
418 	host_vcpu = kern_hyp_va(host_vcpu);
419 	if (hyp_pin_shared_mem(host_vcpu, host_vcpu + 1))
420 		return -EBUSY;
421 
422 	if (host_vcpu->vcpu_idx != vcpu_idx) {
423 		ret = -EINVAL;
424 		goto done;
425 	}
426 
427 	shadow_vcpu->arch.pkvm.host_vcpu = host_vcpu;
428 	shadow_vcpu->kvm = vm->host_kvm;
429 	shadow_vcpu->vcpu_id = host_vcpu->vcpu_id;
430 	shadow_vcpu->vcpu_idx = vcpu_idx;
431 
432 	ret = copy_features(shadow_vcpu, host_vcpu);
433 	if (ret)
434 		goto done;
435 
436 	ret = init_ptrauth(shadow_vcpu);
437 	if (ret)
438 		goto done;
439 
440 	if (test_bit(KVM_ARM_VCPU_SVE, shadow_vcpu->arch.features)) {
441 		size_t sve_state_size;
442 		void *sve_state;
443 
444 		shadow_vcpu->arch.sve_state =
445 			READ_ONCE(host_vcpu->arch.sve_state);
446 		shadow_vcpu->arch.sve_max_vl =
447 			READ_ONCE(host_vcpu->arch.sve_max_vl);
448 
449 		sve_state = kern_hyp_va(shadow_vcpu->arch.sve_state);
450 		sve_state_size = vcpu_sve_state_size(shadow_vcpu);
451 
452 		if (!shadow_vcpu->arch.sve_state || !sve_state_size ||
453 		    hyp_pin_shared_mem(sve_state, sve_state + sve_state_size)) {
454 			clear_bit(KVM_ARM_VCPU_SVE, shadow_vcpu->arch.features);
455 			shadow_vcpu->arch.sve_state = NULL;
456 			shadow_vcpu->arch.sve_max_vl = 0;
457 			ret = -EINVAL;
458 			goto done;
459 		}
460 	}
461 
462 	if (vm->arch.pkvm.enabled)
463 		pkvm_vcpu_init_traps(shadow_vcpu);
464 	kvm_reset_pvm_sys_regs(shadow_vcpu);
465 
466 	vm->vcpus[vcpu_idx] = shadow_vcpu;
467 	shadow_state->vm = vm;
468 
469 	shadow_vcpu->arch.hw_mmu = &vm->arch.mmu;
470 	shadow_vcpu->arch.pkvm.shadow_vm = vm;
471 	shadow_vcpu->arch.power_off = true;
472 
473 	if (test_bit(KVM_ARM_VCPU_POWER_OFF, shadow_vcpu->arch.features)) {
474 		shadow_vcpu->arch.pkvm.power_state =
475 			PSCI_0_2_AFFINITY_LEVEL_OFF;
476 	} else if (pvm_has_pvmfw(vm)) {
477 		if (vm->pvmfw_entry_vcpu) {
478 			ret = -EINVAL;
479 			goto done;
480 		}
481 
482 		vm->pvmfw_entry_vcpu = shadow_vcpu;
483 		shadow_vcpu->arch.reset_state.reset = true;
484 		shadow_vcpu->arch.pkvm.power_state =
485 			PSCI_0_2_AFFINITY_LEVEL_ON_PENDING;
486 	} else {
487 		struct vcpu_reset_state *reset_state =
488 			&shadow_vcpu->arch.reset_state;
489 
490 		reset_state->pc = *vcpu_pc(host_vcpu);
491 		reset_state->r0 = vcpu_get_reg(host_vcpu, 0);
492 		reset_state->reset = true;
493 		shadow_vcpu->arch.pkvm.power_state =
494 			PSCI_0_2_AFFINITY_LEVEL_ON_PENDING;
495 	}
496 
497 done:
498 	if (ret)
499 		unpin_host_vcpu(shadow_state);
500 
501 	return ret;
502 }
503 
__exists_shadow(struct kvm * host_kvm)504 static bool __exists_shadow(struct kvm *host_kvm)
505 {
506 	int i;
507 	int num_checked = 0;
508 
509 	for (i = 0; i < KVM_MAX_PVMS && num_checked < num_shadow_entries; i++) {
510 		if (!shadow_table[i])
511 			continue;
512 
513 		if (unlikely(shadow_table[i]->host_kvm == host_kvm))
514 			return true;
515 
516 		num_checked++;
517 	}
518 
519 	return false;
520 }
521 
522 /*
523  * Allocate a shadow table entry and insert a pointer to the shadow vm.
524  *
525  * Return a unique handle to the protected VM on success,
526  * negative error code on failure.
527  */
insert_shadow_table(struct kvm * kvm,struct kvm_shadow_vm * vm,size_t shadow_size)528 static int insert_shadow_table(struct kvm *kvm, struct kvm_shadow_vm *vm,
529 			       size_t shadow_size)
530 {
531 	struct kvm_s2_mmu *mmu = &vm->arch.mmu;
532 	int shadow_handle;
533 	int vmid;
534 
535 	hyp_assert_lock_held(&shadow_lock);
536 
537 	if (unlikely(num_shadow_entries >= KVM_MAX_PVMS))
538 		return -ENOMEM;
539 
540 	/*
541 	 * Initializing protected state might have failed, yet a malicious host
542 	 * could trigger this function. Thus, ensure that shadow_table exists.
543 	 */
544 	if (unlikely(!shadow_table))
545 		return -EINVAL;
546 
547 	/* Check that a shadow hasn't been created before for this host KVM. */
548 	if (unlikely(__exists_shadow(kvm)))
549 		return -EEXIST;
550 
551 	/* Find the next free entry in the shadow table. */
552 	while (shadow_table[next_shadow_alloc])
553 		next_shadow_alloc = (next_shadow_alloc + 1) % KVM_MAX_PVMS;
554 	shadow_handle = index_to_shadow_handle(next_shadow_alloc);
555 
556 	vm->shadow_handle = shadow_handle;
557 	vm->shadow_area_size = shadow_size;
558 
559 	/* VMID 0 is reserved for the host */
560 	vmid = next_shadow_alloc + 1;
561 	if (vmid > 0xff)
562 		return -ENOMEM;
563 
564 	mmu->vmid.vmid = vmid;
565 	mmu->vmid.vmid_gen = 0;
566 	mmu->arch = &vm->arch;
567 	mmu->pgt = &vm->pgt;
568 
569 	shadow_table[next_shadow_alloc] = vm;
570 	next_shadow_alloc = (next_shadow_alloc + 1) % KVM_MAX_PVMS;
571 	num_shadow_entries++;
572 
573 	return shadow_handle;
574 }
575 
576 /*
577  * Deallocate and remove the shadow table entry corresponding to the handle.
578  */
remove_shadow_table(int shadow_handle)579 static void remove_shadow_table(int shadow_handle)
580 {
581 	hyp_assert_lock_held(&shadow_lock);
582 	shadow_table[shadow_handle_to_index(shadow_handle)] = NULL;
583 	num_shadow_entries--;
584 }
585 
pkvm_get_shadow_size(int num_vcpus)586 static size_t pkvm_get_shadow_size(int num_vcpus)
587 {
588 	/* Shadow space for the vm struct and all of its vcpu states. */
589 	return sizeof(struct kvm_shadow_vm) +
590 	       sizeof(struct shadow_vcpu_state *) * num_vcpus;
591 }
592 
593 /*
594  * Check whether the size of the area donated by the host is sufficient for
595  * the shadow structues required for nr_vcpus as well as the shadow vm.
596  */
check_shadow_size(int nr_vcpus,size_t shadow_size)597 static int check_shadow_size(int nr_vcpus, size_t shadow_size)
598 {
599 	if (nr_vcpus < 1 || nr_vcpus > KVM_MAX_VCPUS)
600 		return -EINVAL;
601 
602 	/*
603 	 * Shadow size is rounded up when allocated and donated by the host,
604 	 * so it's likely to be larger than the sum of the struct sizes.
605 	 */
606 	if (shadow_size < pkvm_get_shadow_size(nr_vcpus))
607 		return -EINVAL;
608 
609 	return 0;
610 }
611 
drain_shadow_vcpus(struct shadow_vcpu_state * shadow_vcpus[],unsigned int nr_vcpus,struct kvm_hyp_memcache * mc)612 static void drain_shadow_vcpus(struct shadow_vcpu_state *shadow_vcpus[],
613 			       unsigned int nr_vcpus,
614 			       struct kvm_hyp_memcache *mc)
615 {
616 	int i;
617 
618 	for (i = 0; i < nr_vcpus; i++) {
619 		struct kvm_vcpu *shadow_vcpu = &shadow_vcpus[i]->vcpu;
620 		struct kvm_hyp_memcache *vcpu_mc = &shadow_vcpu->arch.pkvm_memcache;
621 		void *addr;
622 
623 		while (vcpu_mc->nr_pages) {
624 			addr = pop_hyp_memcache(vcpu_mc, hyp_phys_to_virt);
625 			push_hyp_memcache(mc, addr, hyp_virt_to_phys);
626 			WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(addr), 1));
627 		}
628 	}
629 }
630 
631 /*
632  * Initialize the shadow copy of the protected VM state using the memory
633  * donated by the host.
634  *
635  * Unmaps the donated memory from the host at stage 2.
636  *
637  * kvm: A pointer to the host's struct kvm (host va).
638  * shadow_va: The host va of the area being donated for the shadow state.
639  * 	      Must be page aligned.
640  * shadow_size: The size of the area being donated for the shadow state.
641  * 		Must be a multiple of the page size.
642  * pgd: The host va of the area being donated for the stage-2 PGD for the VM.
643  * 	Must be page aligned. Its size is implied by the VM's VTCR.
644  *
645  * Return a unique handle to the protected VM on success,
646  * negative error code on failure.
647  */
__pkvm_init_shadow(struct kvm * kvm,void * shadow_va,size_t shadow_size,void * pgd)648 int __pkvm_init_shadow(struct kvm *kvm,
649 		       void *shadow_va,
650 		       size_t shadow_size,
651 		       void *pgd)
652 {
653 	struct kvm_shadow_vm *vm = kern_hyp_va(shadow_va);
654 	phys_addr_t shadow_pa = hyp_virt_to_phys(vm);
655 	u64 pfn = hyp_phys_to_pfn(shadow_pa);
656 	u64 nr_shadow_pages = shadow_size >> PAGE_SHIFT;
657 	u64 nr_pgd_pages;
658 	size_t pgd_size;
659 	int nr_vcpus = 0;
660 	int ret = 0;
661 
662 	/* Check that the donated memory is aligned to page boundaries. */
663 	if (!PAGE_ALIGNED(shadow_va) ||
664 	    !PAGE_ALIGNED(shadow_size) ||
665 	    !PAGE_ALIGNED(pgd))
666 		return -EINVAL;
667 
668 	kvm = kern_hyp_va(kvm);
669 	pgd = kern_hyp_va(pgd);
670 
671 	ret = hyp_pin_shared_mem(kvm, kvm + 1);
672 	if (ret)
673 		return ret;
674 
675 	/* Ensure the host has donated enough memory for the shadow structs. */
676 	nr_vcpus = kvm->created_vcpus;
677 	ret = check_shadow_size(nr_vcpus, shadow_size);
678 	if (ret)
679 		goto err;
680 
681 	ret = __pkvm_host_donate_hyp(pfn, nr_shadow_pages);
682 	if (ret)
683 		goto err;
684 
685 	/* Ensure we're working with a clean slate. */
686 	memset(vm, 0, shadow_size);
687 
688 	vm->arch.vtcr = host_kvm.arch.vtcr;
689 	pgd_size = kvm_pgtable_stage2_pgd_size(host_kvm.arch.vtcr);
690 	nr_pgd_pages = pgd_size >> PAGE_SHIFT;
691 	ret = __pkvm_host_donate_hyp(hyp_virt_to_pfn(pgd), nr_pgd_pages);
692 	if (ret)
693 		goto err_remove_mappings;
694 
695 	init_shadow_vm(kvm, vm, nr_vcpus);
696 
697 	/* Add the entry to the shadow table. */
698 	hyp_spin_lock(&shadow_lock);
699 	ret = insert_shadow_table(kvm, vm, shadow_size);
700 	if (ret < 0)
701 		goto err_unlock;
702 
703 	ret = kvm_guest_prepare_stage2(vm, pgd);
704 	if (ret)
705 		goto err_remove_shadow_table;
706 
707 	hyp_spin_unlock(&shadow_lock);
708 	return vm->shadow_handle;
709 
710 err_remove_shadow_table:
711 	remove_shadow_table(vm->shadow_handle);
712 err_unlock:
713 	hyp_spin_unlock(&shadow_lock);
714 	WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(pgd), nr_pgd_pages));
715 err_remove_mappings:
716 	/* Clear the donated shadow memory on failure to avoid data leaks. */
717 	memset(vm, 0, shadow_size);
718 	WARN_ON(__pkvm_hyp_donate_host(hyp_phys_to_pfn(shadow_pa),
719 				       shadow_size >> PAGE_SHIFT));
720 err:
721 	hyp_unpin_shared_mem(kvm, kvm + 1);
722 	return ret;
723 }
724 
725 /*
726  * Initialize the protected vcpu state shadow copy in host-donated memory.
727  *
728  * shadow_handle: The handle for the protected vm.
729  * host_vcpu: A pointer to the corresponding host vcpu (host va).
730  * shadow_vcpu_hva: The host va of the area being donated for the vcpu state.
731  *                 Must be page aligned. The size of the area must be equal to
732  *                 the paged-aligned size of kvm_shadow_vcpu_state.
733  *
734  * Return 0 on success, negative error code on failure.
735  */
__pkvm_init_shadow_vcpu(unsigned int shadow_handle,struct kvm_vcpu * host_vcpu,void * shadow_vcpu_hva)736 int __pkvm_init_shadow_vcpu(unsigned int shadow_handle,
737 			    struct kvm_vcpu *host_vcpu,
738 			    void *shadow_vcpu_hva)
739 {
740 	struct kvm_shadow_vm *vm;
741 	struct shadow_vcpu_state *shadow_state = kern_hyp_va(shadow_vcpu_hva);
742 	size_t vcpu_state_sz = sizeof(*shadow_state);
743 	u64 nr_pages = PAGE_ALIGN(vcpu_state_sz) >> PAGE_SHIFT;
744 	unsigned int idx;
745 	int ret;
746 
747 	if (!PAGE_ALIGNED(shadow_vcpu_hva))
748 		return -EINVAL;
749 
750 	ret = __pkvm_host_donate_hyp(hyp_virt_to_pfn(shadow_state),
751 				     nr_pages);
752 	if (ret)
753 		return ret;
754 
755 	memset(shadow_state, 0, vcpu_state_sz);
756 
757 	hyp_spin_lock(&shadow_lock);
758 
759 	vm = find_shadow_by_handle(shadow_handle);
760 	if (!vm) {
761 		ret = -ENOENT;
762 		goto unlock;
763 	}
764 
765 	idx = vm->nr_vcpus;
766 	if (idx >= vm->created_vcpus) {
767 		ret = -EINVAL;
768 		goto unlock;
769 	}
770 
771 	ret = init_shadow_vcpu(shadow_state, host_vcpu, vm, idx);
772 	if (ret)
773 		goto unlock;
774 
775 	vm->shadow_vcpus[idx] = shadow_state;
776 	vm->nr_vcpus++;
777 unlock:
778 	hyp_spin_unlock(&shadow_lock);
779 
780 	if (ret) {
781 		memset(shadow_state, 0, vcpu_state_sz);
782 		WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(shadow_state),
783 					       nr_pages));
784 	}
785 
786 	return ret;
787 }
788 
teardown_donated_memory(struct kvm_hyp_memcache * mc,void * addr,size_t size)789 static void teardown_donated_memory(struct kvm_hyp_memcache *mc, void *addr,
790 				    size_t size)
791 {
792 	u64 pfn = hyp_phys_to_pfn(__hyp_pa(addr));
793 	u64 nr_pages = size >> PAGE_SHIFT;
794 	void *start;
795 
796 	memset(addr, 0, size);
797 	kvm_flush_dcache_to_poc(addr, size);
798 
799 	for (start = addr; start < addr + size; start += PAGE_SIZE)
800 		push_hyp_memcache(mc, start, hyp_virt_to_phys);
801 
802 	WARN_ON(__pkvm_hyp_donate_host(pfn, nr_pages));
803 }
804 
__pkvm_teardown_shadow(int shadow_handle)805 int __pkvm_teardown_shadow(int shadow_handle)
806 {
807 	struct kvm_hyp_memcache *mc;
808 	struct kvm_shadow_vm *vm;
809 	struct kvm *host_kvm;
810 	unsigned int nr_vcpus;
811 	int err;
812 	int i;
813 
814 	/* Lookup then remove entry from the shadow table. */
815 	hyp_spin_lock(&shadow_lock);
816 	vm = find_shadow_by_handle(shadow_handle);
817 	if (!vm) {
818 		err = -ENOENT;
819 		goto err_unlock;
820 	}
821 
822 	if (WARN_ON(hyp_page_count(vm))) {
823 		err = -EBUSY;
824 		goto err_unlock;
825 	}
826 
827 	host_kvm = vm->host_kvm;
828 	nr_vcpus = vm->nr_vcpus;
829 
830 	/*
831 	 * Clear the tracking for last_loaded_vcpu for all cpus for this vm in
832 	 * case the same addresses for those vcpus are reused for future vms.
833 	 */
834 	for (i = 0; i < hyp_nr_cpus; i++) {
835 		struct kvm_vcpu **last_loaded_vcpu_ptr =
836 			per_cpu_ptr(&last_loaded_vcpu, i);
837 		struct kvm_vcpu *vcpu = *last_loaded_vcpu_ptr;
838 
839 		if (vcpu && vcpu->arch.pkvm.shadow_vm == vm)
840 			*last_loaded_vcpu_ptr = NULL;
841 	}
842 
843 	/* Ensure the VMID is clean before it can be reallocated */
844 	__kvm_tlb_flush_vmid(&vm->arch.mmu);
845 	remove_shadow_table(shadow_handle);
846 	hyp_spin_unlock(&shadow_lock);
847 
848 	/* Reclaim guest pages, and page-table pages */
849 	mc = &host_kvm->arch.pkvm.teardown_mc;
850 	reclaim_guest_pages(vm, mc);
851 	drain_shadow_vcpus(vm->shadow_vcpus, nr_vcpus, mc);
852 	unpin_host_vcpus(vm->shadow_vcpus, nr_vcpus);
853 
854 	for (i = 0; i < nr_vcpus; i++)
855 		teardown_donated_memory(mc, vm->shadow_vcpus[i],
856 					PAGE_ALIGN(sizeof(vm->shadow_vcpus[i])));
857 	teardown_donated_memory(mc, vm, vm->shadow_area_size);
858 
859 	hyp_unpin_shared_mem(host_kvm, host_kvm + 1);
860 	return 0;
861 
862 err_unlock:
863 	hyp_spin_unlock(&shadow_lock);
864 	return err;
865 }
866 
pkvm_load_pvmfw_pages(struct kvm_shadow_vm * vm,u64 ipa,phys_addr_t phys,u64 size)867 int pkvm_load_pvmfw_pages(struct kvm_shadow_vm *vm, u64 ipa, phys_addr_t phys,
868 			  u64 size)
869 {
870 	struct kvm_protected_vm *pkvm = &vm->arch.pkvm;
871 	u64 npages, offset = ipa - pkvm->pvmfw_load_addr;
872 	void *src = hyp_phys_to_virt(pvmfw_base) + offset;
873 
874 	if (offset >= pvmfw_size)
875 		return -EINVAL;
876 
877 	size = min(size, pvmfw_size - offset);
878 	if (!PAGE_ALIGNED(size) || !PAGE_ALIGNED(src))
879 		return -EINVAL;
880 
881 	npages = size >> PAGE_SHIFT;
882 	while (npages--) {
883 		void *dst;
884 
885 		dst = hyp_fixmap_map(phys);
886 		if (!dst)
887 			return -EINVAL;
888 
889 		/*
890 		 * No need for cache maintenance here, as the pgtable code will
891 		 * take care of this when installing the pte in the guest's
892 		 * stage-2 page table.
893 		 */
894 		memcpy(dst, src, PAGE_SIZE);
895 
896 		hyp_fixmap_unmap();
897 		src += PAGE_SIZE;
898 		phys += PAGE_SIZE;
899 	}
900 
901 	return 0;
902 }
903 
pkvm_clear_pvmfw_pages(void)904 void pkvm_clear_pvmfw_pages(void)
905 {
906 	void *addr = hyp_phys_to_virt(pvmfw_base);
907 
908 	memset(addr, 0, pvmfw_size);
909 	kvm_flush_dcache_to_poc(addr, pvmfw_size);
910 }
911 
912 /*
913  * This function sets the registers on the vcpu to their architecturally defined
914  * reset values.
915  *
916  * Note: Can only be called by the vcpu on itself, after it has been turned on.
917  */
pkvm_reset_vcpu(struct kvm_vcpu * vcpu)918 void pkvm_reset_vcpu(struct kvm_vcpu *vcpu)
919 {
920 	struct vcpu_reset_state *reset_state = &vcpu->arch.reset_state;
921 	struct kvm_shadow_vm *vm = vcpu->arch.pkvm.shadow_vm;
922 
923 	WARN_ON(!reset_state->reset);
924 
925 	init_ptrauth(vcpu);
926 
927 	/* Reset core registers */
928 	memset(vcpu_gp_regs(vcpu), 0, sizeof(*vcpu_gp_regs(vcpu)));
929 	memset(&vcpu->arch.ctxt.fp_regs, 0, sizeof(vcpu->arch.ctxt.fp_regs));
930 	vcpu_gp_regs(vcpu)->pstate = VCPU_RESET_PSTATE_EL1;
931 
932 	/* Reset system registers */
933 	kvm_reset_pvm_sys_regs(vcpu);
934 
935 	/* Propagate initiator's endianness, after kvm_reset_pvm_sys_regs. */
936 	if (reset_state->be)
937 		kvm_vcpu_set_be(vcpu);
938 
939 	if (vm->pvmfw_entry_vcpu == vcpu) {
940 		struct kvm_vcpu *host_vcpu = vcpu->arch.pkvm.host_vcpu;
941 		u64 entry = vm->arch.pkvm.pvmfw_load_addr;
942 		int i;
943 
944 		/* X0 - X14 provided by the VMM (preserved) */
945 		for (i = 0; i <= 14; ++i)
946 			vcpu_set_reg(vcpu, i, vcpu_get_reg(host_vcpu, i));
947 
948 		/* X15: Boot protocol version */
949 		vcpu_set_reg(vcpu, 15, 0);
950 
951 		/* PC: IPA of pvmfw base */
952 		*vcpu_pc(vcpu) = entry;
953 
954 		vm->pvmfw_entry_vcpu = NULL;
955 
956 		/* Auto enroll MMIO guard */
957 		set_bit(KVM_ARCH_FLAG_MMIO_GUARD,
958 			&vcpu->arch.pkvm.shadow_vm->arch.flags);
959 	} else {
960 		*vcpu_pc(vcpu) = reset_state->pc;
961 		vcpu_set_reg(vcpu, 0, reset_state->r0);
962 	}
963 
964 	reset_state->reset = false;
965 
966 	vcpu->arch.pkvm.exit_code = 0;
967 
968 	WARN_ON(vcpu->arch.pkvm.power_state != PSCI_0_2_AFFINITY_LEVEL_ON_PENDING);
969 	WRITE_ONCE(vcpu->arch.power_off, false);
970 	WRITE_ONCE(vcpu->arch.pkvm.power_state, PSCI_0_2_AFFINITY_LEVEL_ON);
971 }
972 
pvm_mpidr_to_vcpu(struct kvm_shadow_vm * vm,unsigned long mpidr)973 struct kvm_vcpu *pvm_mpidr_to_vcpu(struct kvm_shadow_vm *vm, unsigned long mpidr)
974 {
975 	struct kvm_vcpu *vcpu;
976 	int i;
977 
978 	mpidr &= MPIDR_HWID_BITMASK;
979 
980 	for (i = 0; i < READ_ONCE(vm->nr_vcpus); i++) {
981 		vcpu = vm->vcpus[i];
982 
983 		if (mpidr == kvm_vcpu_get_mpidr_aff(vcpu))
984 			return vcpu;
985 	}
986 
987 	return NULL;
988 }
989 
990 /*
991  * Returns true if the hypervisor handled PSCI call, and control should go back
992  * to the guest, or false if the host needs to do some additional work (i.e.,
993  * wake up the vcpu).
994  */
pvm_psci_vcpu_on(struct kvm_vcpu * source_vcpu)995 static bool pvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
996 {
997 	struct kvm_shadow_vm *vm = source_vcpu->arch.pkvm.shadow_vm;
998 	struct kvm_vcpu *vcpu;
999 	struct vcpu_reset_state *reset_state;
1000 	unsigned long cpu_id;
1001 	unsigned long hvc_ret_val;
1002 	int power_state;
1003 
1004 	cpu_id = smccc_get_arg1(source_vcpu);
1005 	if (!kvm_psci_valid_affinity(source_vcpu, cpu_id)) {
1006 		hvc_ret_val = PSCI_RET_INVALID_PARAMS;
1007 		goto error;
1008 	}
1009 
1010 	vcpu = pvm_mpidr_to_vcpu(vm, cpu_id);
1011 
1012 	/* Make sure the caller requested a valid vcpu. */
1013 	if (!vcpu) {
1014 		hvc_ret_val = PSCI_RET_INVALID_PARAMS;
1015 		goto error;
1016 	}
1017 
1018 	/*
1019 	 * Make sure the requested vcpu is not on to begin with.
1020 	 * Atomic to avoid race between vcpus trying to power on the same vcpu.
1021 	 */
1022 	power_state = cmpxchg(&vcpu->arch.pkvm.power_state,
1023 		PSCI_0_2_AFFINITY_LEVEL_OFF,
1024 		PSCI_0_2_AFFINITY_LEVEL_ON_PENDING);
1025 	switch (power_state) {
1026 	case PSCI_0_2_AFFINITY_LEVEL_ON_PENDING:
1027 		hvc_ret_val = PSCI_RET_ON_PENDING;
1028 		goto error;
1029 	case PSCI_0_2_AFFINITY_LEVEL_ON:
1030 		hvc_ret_val = PSCI_RET_ALREADY_ON;
1031 		goto error;
1032 	case PSCI_0_2_AFFINITY_LEVEL_OFF:
1033 		break;
1034 	default:
1035 		hvc_ret_val = PSCI_RET_INTERNAL_FAILURE;
1036 		goto error;
1037 	}
1038 
1039 	reset_state = &vcpu->arch.reset_state;
1040 
1041 	reset_state->pc = smccc_get_arg2(source_vcpu);
1042 	reset_state->r0 = smccc_get_arg3(source_vcpu);
1043 
1044 	/* Propagate caller endianness */
1045 	reset_state->be = kvm_vcpu_is_be(source_vcpu);
1046 
1047 	reset_state->reset = true;
1048 
1049 	/*
1050 	 * Return to the host, which should make the KVM_REQ_VCPU_RESET request
1051 	 * as well as kvm_vcpu_wake_up() to schedule the vcpu.
1052 	 */
1053 	return false;
1054 
1055 error:
1056 	/* If there's an error go back straight to the guest. */
1057 	smccc_set_retval(source_vcpu, hvc_ret_val, 0, 0, 0);
1058 	return true;
1059 }
1060 
pvm_psci_vcpu_affinity_info(struct kvm_vcpu * vcpu)1061 static bool pvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
1062 {
1063 	int i, matching_cpus = 0;
1064 	unsigned long mpidr;
1065 	unsigned long target_affinity;
1066 	unsigned long target_affinity_mask;
1067 	unsigned long lowest_affinity_level;
1068 	struct kvm_shadow_vm *vm = vcpu->arch.pkvm.shadow_vm;
1069 	struct kvm_vcpu *tmp;
1070 	unsigned long hvc_ret_val;
1071 
1072 	target_affinity = smccc_get_arg1(vcpu);
1073 	lowest_affinity_level = smccc_get_arg2(vcpu);
1074 
1075 	if (!kvm_psci_valid_affinity(vcpu, target_affinity)) {
1076 		hvc_ret_val = PSCI_RET_INVALID_PARAMS;
1077 		goto done;
1078 	}
1079 
1080 	/* Determine target affinity mask */
1081 	target_affinity_mask = psci_affinity_mask(lowest_affinity_level);
1082 	if (!target_affinity_mask) {
1083 		hvc_ret_val = PSCI_RET_INVALID_PARAMS;
1084 		goto done;
1085 	}
1086 
1087 	/* Ignore other bits of target affinity */
1088 	target_affinity &= target_affinity_mask;
1089 
1090 	hvc_ret_val = PSCI_0_2_AFFINITY_LEVEL_OFF;
1091 
1092 	/*
1093 	 * If at least one vcpu matching target affinity is ON then return ON,
1094 	 * then if at least one is PENDING_ON then return PENDING_ON.
1095 	 * Otherwise, return OFF.
1096 	 */
1097 	for (i = 0; i < READ_ONCE(vm->nr_vcpus); i++) {
1098 		tmp = vm->vcpus[i];
1099 		mpidr = kvm_vcpu_get_mpidr_aff(tmp);
1100 
1101 		if ((mpidr & target_affinity_mask) == target_affinity) {
1102 			int power_state;
1103 
1104 			matching_cpus++;
1105 			power_state = READ_ONCE(tmp->arch.pkvm.power_state);
1106 			switch (power_state) {
1107 			case PSCI_0_2_AFFINITY_LEVEL_ON_PENDING:
1108 				hvc_ret_val = PSCI_0_2_AFFINITY_LEVEL_ON_PENDING;
1109 				break;
1110 			case PSCI_0_2_AFFINITY_LEVEL_ON:
1111 				hvc_ret_val = PSCI_0_2_AFFINITY_LEVEL_ON;
1112 				goto done;
1113 			case PSCI_0_2_AFFINITY_LEVEL_OFF:
1114 				break;
1115 			default:
1116 				hvc_ret_val = PSCI_RET_INTERNAL_FAILURE;
1117 				goto done;
1118 			}
1119 		}
1120 	}
1121 
1122 	if (!matching_cpus)
1123 		hvc_ret_val = PSCI_RET_INVALID_PARAMS;
1124 
1125 done:
1126 	/* Nothing to be handled by the host. Go back to the guest. */
1127 	smccc_set_retval(vcpu, hvc_ret_val, 0, 0, 0);
1128 	return true;
1129 }
1130 
1131 /*
1132  * Returns true if the hypervisor has handled the PSCI call, and control should
1133  * go back to the guest, or false if the host needs to do some additional work
1134  * (e.g., turn off and update vcpu scheduling status).
1135  */
pvm_psci_vcpu_off(struct kvm_vcpu * vcpu)1136 static bool pvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
1137 {
1138 	WARN_ON(vcpu->arch.power_off);
1139 	WARN_ON(vcpu->arch.pkvm.power_state != PSCI_0_2_AFFINITY_LEVEL_ON);
1140 
1141 	WRITE_ONCE(vcpu->arch.power_off, true);
1142 	WRITE_ONCE(vcpu->arch.pkvm.power_state, PSCI_0_2_AFFINITY_LEVEL_OFF);
1143 
1144 	/* Return to the host so that it can finish powering off the vcpu. */
1145 	return false;
1146 }
1147 
pvm_psci_version(struct kvm_vcpu * vcpu)1148 static bool pvm_psci_version(struct kvm_vcpu *vcpu)
1149 {
1150 	/* Nothing to be handled by the host. Go back to the guest. */
1151 	smccc_set_retval(vcpu, KVM_ARM_PSCI_1_1, 0, 0, 0);
1152 	return true;
1153 }
1154 
pvm_psci_not_supported(struct kvm_vcpu * vcpu)1155 static bool pvm_psci_not_supported(struct kvm_vcpu *vcpu)
1156 {
1157 	/* Nothing to be handled by the host. Go back to the guest. */
1158 	smccc_set_retval(vcpu, PSCI_RET_NOT_SUPPORTED, 0, 0, 0);
1159 	return true;
1160 }
1161 
pvm_psci_features(struct kvm_vcpu * vcpu)1162 static bool pvm_psci_features(struct kvm_vcpu *vcpu)
1163 {
1164 	u32 feature = smccc_get_arg1(vcpu);
1165 	unsigned long val;
1166 
1167 	switch (feature) {
1168 	case PSCI_0_2_FN_PSCI_VERSION:
1169 	case PSCI_0_2_FN_CPU_SUSPEND:
1170 	case PSCI_0_2_FN64_CPU_SUSPEND:
1171 	case PSCI_0_2_FN_CPU_OFF:
1172 	case PSCI_0_2_FN_CPU_ON:
1173 	case PSCI_0_2_FN64_CPU_ON:
1174 	case PSCI_0_2_FN_AFFINITY_INFO:
1175 	case PSCI_0_2_FN64_AFFINITY_INFO:
1176 	case PSCI_0_2_FN_SYSTEM_OFF:
1177 	case PSCI_0_2_FN_SYSTEM_RESET:
1178 	case PSCI_1_0_FN_PSCI_FEATURES:
1179 	case PSCI_1_1_FN_SYSTEM_RESET2:
1180 	case PSCI_1_1_FN64_SYSTEM_RESET2:
1181 	case ARM_SMCCC_VERSION_FUNC_ID:
1182 		val = PSCI_RET_SUCCESS;
1183 		break;
1184 	default:
1185 		val = PSCI_RET_NOT_SUPPORTED;
1186 		break;
1187 	}
1188 
1189 	/* Nothing to be handled by the host. Go back to the guest. */
1190 	smccc_set_retval(vcpu, val, 0, 0, 0);
1191 	return true;
1192 }
1193 
pkvm_handle_psci(struct kvm_vcpu * vcpu)1194 static bool pkvm_handle_psci(struct kvm_vcpu *vcpu)
1195 {
1196 	u32 psci_fn = smccc_get_function(vcpu);
1197 
1198 	switch (psci_fn) {
1199 	case PSCI_0_2_FN_CPU_ON:
1200 		kvm_psci_narrow_to_32bit(vcpu);
1201 		fallthrough;
1202 	case PSCI_0_2_FN64_CPU_ON:
1203 		return pvm_psci_vcpu_on(vcpu);
1204 	case PSCI_0_2_FN_CPU_OFF:
1205 		return pvm_psci_vcpu_off(vcpu);
1206 	case PSCI_0_2_FN_AFFINITY_INFO:
1207 		kvm_psci_narrow_to_32bit(vcpu);
1208 		fallthrough;
1209 	case PSCI_0_2_FN64_AFFINITY_INFO:
1210 		return pvm_psci_vcpu_affinity_info(vcpu);
1211 	case PSCI_0_2_FN_PSCI_VERSION:
1212 		return pvm_psci_version(vcpu);
1213 	case PSCI_1_0_FN_PSCI_FEATURES:
1214 		return pvm_psci_features(vcpu);
1215 	case PSCI_0_2_FN_SYSTEM_RESET:
1216 	case PSCI_0_2_FN_CPU_SUSPEND:
1217 	case PSCI_0_2_FN64_CPU_SUSPEND:
1218 	case PSCI_0_2_FN_SYSTEM_OFF:
1219 	case PSCI_1_1_FN_SYSTEM_RESET2:
1220 	case PSCI_1_1_FN64_SYSTEM_RESET2:
1221 		return false; /* Handled by the host. */
1222 	default:
1223 		break;
1224 	}
1225 
1226 	return pvm_psci_not_supported(vcpu);
1227 }
1228 
__pkvm_memshare_page_req(struct kvm_vcpu * vcpu,u64 ipa)1229 static u64 __pkvm_memshare_page_req(struct kvm_vcpu *vcpu, u64 ipa)
1230 {
1231 	u64 elr;
1232 
1233 	/* Fake up a data abort (Level 3 translation fault on write) */
1234 	vcpu->arch.fault.esr_el2 = (u32)ESR_ELx_EC_DABT_LOW << ESR_ELx_EC_SHIFT |
1235 				   ESR_ELx_WNR | ESR_ELx_FSC_FAULT |
1236 				   FIELD_PREP(ESR_ELx_FSC_LEVEL, 3);
1237 
1238 	/* Shuffle the IPA around into the HPFAR */
1239 	vcpu->arch.fault.hpfar_el2 = (ipa >> 8) & HPFAR_MASK;
1240 
1241 	/* This is a virtual address. 0's good. Let's go with 0. */
1242 	vcpu->arch.fault.far_el2 = 0;
1243 
1244 	/* Rewind the ELR so we return to the HVC once the IPA is mapped */
1245 	elr = read_sysreg(elr_el2);
1246 	elr -=4;
1247 	write_sysreg(elr, elr_el2);
1248 
1249 	return ARM_EXCEPTION_TRAP;
1250 }
1251 
pkvm_memshare_call(struct kvm_vcpu * vcpu,u64 * exit_code)1252 static bool pkvm_memshare_call(struct kvm_vcpu *vcpu, u64 *exit_code)
1253 {
1254 	u64 ipa = smccc_get_arg1(vcpu);
1255 	u64 arg2 = smccc_get_arg2(vcpu);
1256 	u64 arg3 = smccc_get_arg3(vcpu);
1257 	int err;
1258 
1259 	if (arg2 || arg3)
1260 		goto out_guest_err;
1261 
1262 	err = __pkvm_guest_share_host(vcpu, ipa);
1263 	switch (err) {
1264 	case 0:
1265 		/* Success! Now tell the host. */
1266 		goto out_host;
1267 	case -EFAULT:
1268 		/*
1269 		 * Convert the exception into a data abort so that the page
1270 		 * being shared is mapped into the guest next time.
1271 		 */
1272 		*exit_code = __pkvm_memshare_page_req(vcpu, ipa);
1273 		goto out_host;
1274 	}
1275 
1276 out_guest_err:
1277 	smccc_set_retval(vcpu, SMCCC_RET_INVALID_PARAMETER, 0, 0, 0);
1278 	return true;
1279 
1280 out_host:
1281 	return false;
1282 }
1283 
pkvm_memunshare_call(struct kvm_vcpu * vcpu)1284 static bool pkvm_memunshare_call(struct kvm_vcpu *vcpu)
1285 {
1286 	u64 ipa = smccc_get_arg1(vcpu);
1287 	u64 arg2 = smccc_get_arg2(vcpu);
1288 	u64 arg3 = smccc_get_arg3(vcpu);
1289 	int err;
1290 
1291 	if (arg2 || arg3)
1292 		goto out_guest_err;
1293 
1294 	err = __pkvm_guest_unshare_host(vcpu, ipa);
1295 	if (err)
1296 		goto out_guest_err;
1297 
1298 	return false;
1299 
1300 out_guest_err:
1301 	smccc_set_retval(vcpu, SMCCC_RET_INVALID_PARAMETER, 0, 0, 0);
1302 	return true;
1303 }
1304 
pkvm_install_ioguard_page(struct kvm_vcpu * vcpu,u64 * exit_code)1305 static bool pkvm_install_ioguard_page(struct kvm_vcpu *vcpu, u64 *exit_code)
1306 {
1307 	u64 retval = SMCCC_RET_SUCCESS;
1308 	u64 ipa = smccc_get_arg1(vcpu);
1309 	int ret;
1310 
1311 	ret = __pkvm_install_ioguard_page(vcpu, ipa);
1312 	if (ret == -ENOMEM) {
1313 		/*
1314 		 * We ran out of memcache, let's ask for more. Cancel
1315 		 * the effects of the HVC that took us here, and
1316 		 * forward the hypercall to the host for page donation
1317 		 * purposes.
1318 		 */
1319 		write_sysreg_el2(read_sysreg_el2(SYS_ELR) - 4, SYS_ELR);
1320 		return false;
1321 	}
1322 
1323 	if (ret)
1324 		retval = SMCCC_RET_INVALID_PARAMETER;
1325 
1326 	smccc_set_retval(vcpu, retval, 0, 0, 0);
1327 	return true;
1328 }
1329 
1330 bool smccc_trng_available;
1331 
pkvm_forward_trng(struct kvm_vcpu * vcpu)1332 static bool pkvm_forward_trng(struct kvm_vcpu *vcpu)
1333 {
1334 	u32 fn = smccc_get_function(vcpu);
1335 	struct arm_smccc_res res;
1336 	unsigned long arg1 = 0;
1337 
1338 	/*
1339 	 * Forward TRNG calls to EL3, as we can't trust the host to handle
1340 	 * these for us.
1341 	 */
1342 	switch (fn) {
1343 	case ARM_SMCCC_TRNG_FEATURES:
1344 	case ARM_SMCCC_TRNG_RND32:
1345 	case ARM_SMCCC_TRNG_RND64:
1346 		arg1 = smccc_get_arg1(vcpu);
1347 		fallthrough;
1348 	case ARM_SMCCC_TRNG_VERSION:
1349 	case ARM_SMCCC_TRNG_GET_UUID:
1350 		arm_smccc_1_1_smc(fn, arg1, &res);
1351 		smccc_set_retval(vcpu, res.a0, res.a1, res.a2, res.a3);
1352 		memzero_explicit(&res, sizeof(res));
1353 		break;
1354 	}
1355 
1356 	return true;
1357 }
1358 
1359 /*
1360  * Handler for protected VM HVC calls.
1361  *
1362  * Returns true if the hypervisor has handled the exit, and control should go
1363  * back to the guest, or false if it hasn't.
1364  */
kvm_handle_pvm_hvc64(struct kvm_vcpu * vcpu,u64 * exit_code)1365 bool kvm_handle_pvm_hvc64(struct kvm_vcpu *vcpu, u64 *exit_code)
1366 {
1367 	u32 fn = smccc_get_function(vcpu);
1368 	u64 val[4] = { SMCCC_RET_NOT_SUPPORTED };
1369 
1370 	switch (fn) {
1371 	case ARM_SMCCC_VERSION_FUNC_ID:
1372 		/* Nothing to be handled by the host. Go back to the guest. */
1373 		val[0] = ARM_SMCCC_VERSION_1_1;
1374 		break;
1375 	case ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID:
1376 		val[0] = ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0;
1377 		val[1] = ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1;
1378 		val[2] = ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_2;
1379 		val[3] = ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3;
1380 		break;
1381 	case ARM_SMCCC_VENDOR_HYP_KVM_FEATURES_FUNC_ID:
1382 		val[0] = BIT(ARM_SMCCC_KVM_FUNC_FEATURES);
1383 		val[0] |= BIT(ARM_SMCCC_KVM_FUNC_HYP_MEMINFO);
1384 		val[0] |= BIT(ARM_SMCCC_KVM_FUNC_MEM_SHARE);
1385 		val[0] |= BIT(ARM_SMCCC_KVM_FUNC_MEM_UNSHARE);
1386 		val[0] |= BIT(ARM_SMCCC_KVM_FUNC_MMIO_GUARD_INFO);
1387 		val[0] |= BIT(ARM_SMCCC_KVM_FUNC_MMIO_GUARD_ENROLL);
1388 		val[0] |= BIT(ARM_SMCCC_KVM_FUNC_MMIO_GUARD_MAP);
1389 		val[0] |= BIT(ARM_SMCCC_KVM_FUNC_MMIO_GUARD_UNMAP);
1390 		break;
1391 	case ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_ENROLL_FUNC_ID:
1392 		set_bit(KVM_ARCH_FLAG_MMIO_GUARD, &vcpu->arch.pkvm.shadow_vm->arch.flags);
1393 		val[0] = SMCCC_RET_SUCCESS;
1394 		break;
1395 	case ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_MAP_FUNC_ID:
1396 		return pkvm_install_ioguard_page(vcpu, exit_code);
1397 	case ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_UNMAP_FUNC_ID:
1398 		if (__pkvm_remove_ioguard_page(vcpu, vcpu_get_reg(vcpu, 1)))
1399 			val[0] = SMCCC_RET_INVALID_PARAMETER;
1400 		else
1401 			val[0] = SMCCC_RET_SUCCESS;
1402 		break;
1403 	case ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_INFO_FUNC_ID:
1404 	case ARM_SMCCC_VENDOR_HYP_KVM_HYP_MEMINFO_FUNC_ID:
1405 		if (smccc_get_arg1(vcpu) ||
1406 		    smccc_get_arg2(vcpu) ||
1407 		    smccc_get_arg3(vcpu)) {
1408 			val[0] = SMCCC_RET_INVALID_PARAMETER;
1409 		} else {
1410 			val[0] = PAGE_SIZE;
1411 		}
1412 		break;
1413 	case ARM_SMCCC_VENDOR_HYP_KVM_MEM_SHARE_FUNC_ID:
1414 		return pkvm_memshare_call(vcpu, exit_code);
1415 	case ARM_SMCCC_VENDOR_HYP_KVM_MEM_UNSHARE_FUNC_ID:
1416 		return pkvm_memunshare_call(vcpu);
1417 	case ARM_SMCCC_TRNG_VERSION ... ARM_SMCCC_TRNG_RND32:
1418 	case ARM_SMCCC_TRNG_RND64:
1419 		if (smccc_trng_available)
1420 			return pkvm_forward_trng(vcpu);
1421 		break;
1422 	default:
1423 		return pkvm_handle_psci(vcpu);
1424 	}
1425 
1426 	smccc_set_retval(vcpu, val[0], val[1], val[2], val[3]);
1427 	return true;
1428 }
1429