• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2021 Google LLC
4  * Author: Fuad Tabba <tabba@google.com>
5  */
6 
7 #include <asm/kvm_arm.h>
8 #include <asm/kvm_asm.h>
9 #include <asm/kvm_host.h>
10 #include <asm/kvm_mmu.h>
11 #include <asm/memory.h>
12 
13 #include <linux/kvm_host.h>
14 #include <linux/mm.h>
15 
16 #include <kvm/arm_hypercalls.h>
17 #include <kvm/arm_psci.h>
18 
19 #include <nvhe/mem_protect.h>
20 #include <nvhe/mm.h>
21 #include <nvhe/pkvm.h>
22 #include <nvhe/trap_handler.h>
23 
24 /* Used by icache_is_vpipt(). */
25 unsigned long __icache_flags;
26 
27 /*
28  * Set trap register values based on features in ID_AA64PFR0.
29  */
pvm_init_traps_aa64pfr0(struct kvm_vcpu * vcpu)30 static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu)
31 {
32 	const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64PFR0_EL1);
33 	u64 hcr_set = HCR_RW;
34 	u64 hcr_clear = 0;
35 	u64 cptr_set = 0;
36 
37 	/* Protected KVM does not support AArch32 guests. */
38 	BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL0),
39 		PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_ELx_64BIT_ONLY);
40 	BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1),
41 		PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_ELx_64BIT_ONLY);
42 
43 	/*
44 	 * Linux guests assume support for floating-point and Advanced SIMD. Do
45 	 * not change the trapping behavior for these from the KVM default.
46 	 */
47 	BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_FP),
48 				PVM_ID_AA64PFR0_ALLOW));
49 	BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD),
50 				PVM_ID_AA64PFR0_ALLOW));
51 
52 	/* Trap RAS unless all current versions are supported */
53 	if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_RAS), feature_ids) <
54 	    ID_AA64PFR0_RAS_V1P1) {
55 		hcr_set |= HCR_TERR | HCR_TEA;
56 		hcr_clear |= HCR_FIEN;
57 	}
58 
59 	/* Trap AMU */
60 	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_AMU), feature_ids)) {
61 		hcr_clear |= HCR_AMVOFFEN;
62 		cptr_set |= CPTR_EL2_TAM;
63 	}
64 
65 	/* Trap SVE */
66 	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_SVE), feature_ids))
67 		cptr_set |= CPTR_EL2_TZ;
68 
69 	vcpu->arch.hcr_el2 |= hcr_set;
70 	vcpu->arch.hcr_el2 &= ~hcr_clear;
71 	vcpu->arch.cptr_el2 |= cptr_set;
72 }
73 
74 /*
75  * Set trap register values based on features in ID_AA64PFR1.
76  */
pvm_init_traps_aa64pfr1(struct kvm_vcpu * vcpu)77 static void pvm_init_traps_aa64pfr1(struct kvm_vcpu *vcpu)
78 {
79 	const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64PFR1_EL1);
80 	u64 hcr_set = 0;
81 	u64 hcr_clear = 0;
82 
83 	/* Memory Tagging: Trap and Treat as Untagged if not supported. */
84 	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_MTE), feature_ids)) {
85 		hcr_set |= HCR_TID5;
86 		hcr_clear |= HCR_DCT | HCR_ATA;
87 	}
88 
89 	vcpu->arch.hcr_el2 |= hcr_set;
90 	vcpu->arch.hcr_el2 &= ~hcr_clear;
91 }
92 
93 /*
94  * Set trap register values based on features in ID_AA64DFR0.
95  */
pvm_init_traps_aa64dfr0(struct kvm_vcpu * vcpu)96 static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu)
97 {
98 	const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64DFR0_EL1);
99 	u64 mdcr_set = 0;
100 	u64 mdcr_clear = 0;
101 	u64 cptr_set = 0;
102 
103 	/* Trap/constrain PMU */
104 	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_PMUVER), feature_ids)) {
105 		mdcr_set |= MDCR_EL2_TPM | MDCR_EL2_TPMCR;
106 		mdcr_clear |= MDCR_EL2_HPME | MDCR_EL2_MTPME |
107 			      MDCR_EL2_HPMN_MASK;
108 	}
109 
110 	/* Trap Debug */
111 	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER), feature_ids))
112 		mdcr_set |= MDCR_EL2_TDRA | MDCR_EL2_TDA;
113 
114 	/* Trap OS Double Lock */
115 	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DOUBLELOCK), feature_ids))
116 		mdcr_set |= MDCR_EL2_TDOSA;
117 
118 	/* Trap SPE */
119 	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_PMSVER), feature_ids)) {
120 		mdcr_set |= MDCR_EL2_TPMS;
121 		mdcr_clear |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT;
122 	}
123 
124 	/* Trap Trace Filter */
125 	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_TRACE_FILT), feature_ids))
126 		mdcr_set |= MDCR_EL2_TTRF;
127 
128 	/* Trap Trace */
129 	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_TRACEVER), feature_ids))
130 		cptr_set |= CPTR_EL2_TTA;
131 
132 	vcpu->arch.mdcr_el2 |= mdcr_set;
133 	vcpu->arch.mdcr_el2 &= ~mdcr_clear;
134 	vcpu->arch.cptr_el2 |= cptr_set;
135 }
136 
137 /*
138  * Set trap register values based on features in ID_AA64MMFR0.
139  */
pvm_init_traps_aa64mmfr0(struct kvm_vcpu * vcpu)140 static void pvm_init_traps_aa64mmfr0(struct kvm_vcpu *vcpu)
141 {
142 	const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64MMFR0_EL1);
143 	u64 mdcr_set = 0;
144 
145 	/* Trap Debug Communications Channel registers */
146 	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_FGT), feature_ids))
147 		mdcr_set |= MDCR_EL2_TDCC;
148 
149 	vcpu->arch.mdcr_el2 |= mdcr_set;
150 }
151 
152 /*
153  * Set trap register values based on features in ID_AA64MMFR1.
154  */
pvm_init_traps_aa64mmfr1(struct kvm_vcpu * vcpu)155 static void pvm_init_traps_aa64mmfr1(struct kvm_vcpu *vcpu)
156 {
157 	const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64MMFR1_EL1);
158 	u64 hcr_set = 0;
159 
160 	/* Trap LOR */
161 	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_LOR), feature_ids))
162 		hcr_set |= HCR_TLOR;
163 
164 	vcpu->arch.hcr_el2 |= hcr_set;
165 }
166 
167 /*
168  * Set baseline trap register values.
169  */
pvm_init_trap_regs(struct kvm_vcpu * vcpu)170 static void pvm_init_trap_regs(struct kvm_vcpu *vcpu)
171 {
172 	vcpu->arch.cptr_el2 = CPTR_EL2_DEFAULT;
173 	vcpu->arch.mdcr_el2 = 0;
174 
175 	/*
176 	 * Always trap:
177 	 * - Feature id registers: to control features exposed to guests
178 	 * - Implementation-defined features
179 	 */
180 	vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS |
181 			     HCR_TID3 | HCR_TACR | HCR_TIDCP | HCR_TID1;
182 
183 	if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) {
184 		/* route synchronous external abort exceptions to EL2 */
185 		vcpu->arch.hcr_el2 |= HCR_TEA;
186 		/* trap error record accesses */
187 		vcpu->arch.hcr_el2 |= HCR_TERR;
188 	}
189 
190 	if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
191 		vcpu->arch.hcr_el2 |= HCR_FWB;
192 
193 	if (cpus_have_const_cap(ARM64_MISMATCHED_CACHE_TYPE))
194 		vcpu->arch.hcr_el2 |= HCR_TID2;
195 }
196 
197 /*
198  * Initialize trap register values for protected VMs.
199  */
pkvm_vcpu_init_traps(struct kvm_vcpu * vcpu)200 static void pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu)
201 {
202 	pvm_init_trap_regs(vcpu);
203 	pvm_init_traps_aa64pfr0(vcpu);
204 	pvm_init_traps_aa64pfr1(vcpu);
205 	pvm_init_traps_aa64dfr0(vcpu);
206 	pvm_init_traps_aa64mmfr0(vcpu);
207 	pvm_init_traps_aa64mmfr1(vcpu);
208 }
209 
210 /*
211  * Start the shadow table handle at the offset defined instead of at 0.
212  * Mainly for sanity checking and debugging.
213  */
214 #define HANDLE_OFFSET 0x1000
215 
shadow_handle_to_index(int shadow_handle)216 static int shadow_handle_to_index(int shadow_handle)
217 {
218 	return shadow_handle - HANDLE_OFFSET;
219 }
220 
index_to_shadow_handle(int index)221 static int index_to_shadow_handle(int index)
222 {
223 	return index + HANDLE_OFFSET;
224 }
225 
226 extern unsigned long hyp_nr_cpus;
227 
228 /*
229  * Track the vcpu most recently loaded on each physical CPU.
230  */
231 static DEFINE_PER_CPU(struct kvm_vcpu *, last_loaded_vcpu);
232 
233 /*
234  * Spinlock for protecting the shadow table related state.
235  * Protects writes to shadow_table, num_shadow_entries, and next_shadow_alloc,
236  * as well as reads and writes to last_shadow_vcpu_lookup.
237  */
238 static DEFINE_HYP_SPINLOCK(shadow_lock);
239 
240 /*
241  * The table of shadow entries for protected VMs in hyp.
242  * Allocated at hyp initialization and setup.
243  */
244 static struct kvm_shadow_vm **shadow_table;
245 
246 /* Current number of vms in the shadow table. */
247 static int num_shadow_entries;
248 
249 /* The next entry index to try to allocate from. */
250 static int next_shadow_alloc;
251 
hyp_shadow_table_init(void * tbl)252 void hyp_shadow_table_init(void *tbl)
253 {
254 	WARN_ON(shadow_table);
255 	shadow_table = tbl;
256 }
257 
258 /*
259  * Return the shadow vm corresponding to the handle.
260  */
find_shadow_by_handle(int shadow_handle)261 static struct kvm_shadow_vm *find_shadow_by_handle(int shadow_handle)
262 {
263 	int shadow_index = shadow_handle_to_index(shadow_handle);
264 
265 	if (unlikely(shadow_index < 0 || shadow_index >= KVM_MAX_PVMS))
266 		return NULL;
267 
268 	return shadow_table[shadow_index];
269 }
270 
get_shadow_vcpu(int shadow_handle,unsigned int vcpu_idx)271 struct kvm_vcpu *get_shadow_vcpu(int shadow_handle, unsigned int vcpu_idx)
272 {
273 	struct kvm_vcpu *vcpu = NULL;
274 	struct kvm_shadow_vm *vm;
275 	bool flush_context = false;
276 
277 	hyp_spin_lock(&shadow_lock);
278 	vm = find_shadow_by_handle(shadow_handle);
279 	if (!vm || vm->nr_vcpus <= vcpu_idx)
280 		goto unlock;
281 	vcpu = &vm->shadow_vcpus[vcpu_idx]->vcpu;
282 
283 	/* Ensure vcpu isn't loaded on more than one cpu simultaneously. */
284 	if (unlikely(vcpu->arch.pkvm.loaded_on_cpu)) {
285 		vcpu = NULL;
286 		goto unlock;
287 	}
288 
289 	/*
290 	 * Guarantee that both TLBs and I-cache are private to each vcpu.
291 	 * The check below is conservative and could lead to over-invalidation,
292 	 * because there is no need to nuke the contexts if the vcpu belongs to
293 	 * a different vm.
294 	 */
295 	if (vcpu != __this_cpu_read(last_loaded_vcpu)) {
296 		flush_context = true;
297 		__this_cpu_write(last_loaded_vcpu, vcpu);
298 	}
299 
300 	vcpu->arch.pkvm.loaded_on_cpu = true;
301 
302 	hyp_page_ref_inc(hyp_virt_to_page(vm));
303 unlock:
304 	hyp_spin_unlock(&shadow_lock);
305 
306 	/* No need for the lock while flushing the context. */
307 	if (flush_context)
308 		__kvm_flush_cpu_context(vcpu->arch.hw_mmu);
309 
310 	return vcpu;
311 }
312 
put_shadow_vcpu(struct kvm_vcpu * vcpu)313 void put_shadow_vcpu(struct kvm_vcpu *vcpu)
314 {
315 	struct kvm_shadow_vm *vm = vcpu->arch.pkvm.shadow_vm;
316 
317 	hyp_spin_lock(&shadow_lock);
318 	vcpu->arch.pkvm.loaded_on_cpu = false;
319 	hyp_page_ref_dec(hyp_virt_to_page(vm));
320 	hyp_spin_unlock(&shadow_lock);
321 }
322 
323 /* Check and copy the supported features for the vcpu from the host. */
copy_features(struct kvm_vcpu * shadow_vcpu,struct kvm_vcpu * host_vcpu)324 static int copy_features(struct kvm_vcpu *shadow_vcpu, struct kvm_vcpu *host_vcpu)
325 {
326 	DECLARE_BITMAP(allowed_features, KVM_VCPU_MAX_FEATURES);
327 
328 	bitmap_zero(allowed_features, KVM_VCPU_MAX_FEATURES);
329 
330 	/*
331 	 * Always allowed:
332 	 * - CPU starting in poweroff state
333 	 * - PSCI v0.2
334 	 */
335 	set_bit(KVM_ARM_VCPU_POWER_OFF, allowed_features);
336 	set_bit(KVM_ARM_VCPU_PSCI_0_2, allowed_features);
337 
338 	/*
339 	 * Check if remaining features are allowed:
340 	 * - Performance Monitoring
341 	 * - Scalable Vectors
342 	 * - Pointer Authentication
343 	 */
344 	if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_PMUVER), PVM_ID_AA64DFR0_ALLOW))
345 	        set_bit(KVM_ARM_VCPU_PMU_V3, allowed_features);
346 
347 	if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_SVE), PVM_ID_AA64PFR0_ALLOW))
348 	        set_bit(KVM_ARM_VCPU_SVE, allowed_features);
349 
350 	if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_API), PVM_ID_AA64ISAR1_ALLOW) &&
351 	    FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_APA), PVM_ID_AA64ISAR1_ALLOW))
352 	        set_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, allowed_features);
353 
354 	if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI), PVM_ID_AA64ISAR1_ALLOW) &&
355 	    FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA), PVM_ID_AA64ISAR1_ALLOW))
356 	        set_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, allowed_features);
357 
358 	bitmap_and(shadow_vcpu->arch.features, host_vcpu->arch.features,
359 		allowed_features, KVM_VCPU_MAX_FEATURES);
360 
361 	/*
362 	 * Check for system support for address/generic pointer authentication
363 	 * features if either are enabled.
364 	 */
365 	if ((test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, shadow_vcpu->arch.features) ||
366 	     test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, shadow_vcpu->arch.features)) &&
367 	    !system_has_full_ptr_auth())
368 		return -EINVAL;
369 
370 	return 0;
371 }
372 
unpin_host_vcpu(struct shadow_vcpu_state * shadow_vcpu)373 static void unpin_host_vcpu(struct shadow_vcpu_state *shadow_vcpu)
374 {
375 	struct kvm_vcpu *host_vcpu = shadow_vcpu->vcpu.arch.pkvm.host_vcpu;
376 	size_t sve_state_size;
377 	void *sve_state = shadow_vcpu->vcpu.arch.sve_state;
378 
379 	hyp_unpin_shared_mem(host_vcpu, host_vcpu + 1);
380 
381 	if (!sve_state)
382 		return;
383 
384 	sve_state = kern_hyp_va(sve_state);
385 	sve_state_size = vcpu_sve_state_size(&shadow_vcpu->vcpu);
386 	hyp_unpin_shared_mem(sve_state, sve_state + sve_state_size);
387 }
388 
unpin_host_vcpus(struct shadow_vcpu_state * shadow_vcpus[],int nr_vcpus)389 static void unpin_host_vcpus(struct shadow_vcpu_state *shadow_vcpus[], int nr_vcpus)
390 {
391 	int i;
392 
393 	for (i = 0; i < nr_vcpus; i++)
394 		unpin_host_vcpu(shadow_vcpus[i]);
395 }
396 
init_ptrauth(struct kvm_vcpu * shadow_vcpu)397 static int init_ptrauth(struct kvm_vcpu *shadow_vcpu)
398 {
399 	int ret = 0;
400 	if (test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, shadow_vcpu->arch.features) ||
401 	    test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, shadow_vcpu->arch.features))
402 		ret = kvm_vcpu_enable_ptrauth(shadow_vcpu);
403 	return ret;
404 }
405 
init_shadow_vm(struct kvm * kvm,struct kvm_shadow_vm * vm,int nr_vcpus)406 static void init_shadow_vm(struct kvm *kvm, struct kvm_shadow_vm *vm,
407 			   int nr_vcpus)
408 {
409 	vm->host_kvm = kvm;
410 	vm->created_vcpus = nr_vcpus;
411 	vm->arch.pkvm.pvmfw_load_addr = kvm->arch.pkvm.pvmfw_load_addr;
412 	vm->arch.pkvm.enabled = READ_ONCE(kvm->arch.pkvm.enabled);
413 }
414 
init_shadow_vcpu(struct shadow_vcpu_state * shadow_state,struct kvm_vcpu * host_vcpu,struct kvm_shadow_vm * vm,int vcpu_idx)415 static int init_shadow_vcpu(struct shadow_vcpu_state *shadow_state,
416 			    struct kvm_vcpu *host_vcpu,
417 			    struct kvm_shadow_vm *vm, int vcpu_idx)
418 {
419 	struct kvm_vcpu *shadow_vcpu = &shadow_state->vcpu;
420 	int ret;
421 
422 	host_vcpu = kern_hyp_va(host_vcpu);
423 	if (hyp_pin_shared_mem(host_vcpu, host_vcpu + 1))
424 		return -EBUSY;
425 
426 	if (host_vcpu->vcpu_idx != vcpu_idx) {
427 		ret = -EINVAL;
428 		goto done;
429 	}
430 
431 	shadow_vcpu->arch.pkvm.host_vcpu = host_vcpu;
432 	shadow_vcpu->kvm = vm->host_kvm;
433 	shadow_vcpu->vcpu_id = host_vcpu->vcpu_id;
434 	shadow_vcpu->vcpu_idx = vcpu_idx;
435 
436 	ret = copy_features(shadow_vcpu, host_vcpu);
437 	if (ret)
438 		goto done;
439 
440 	ret = init_ptrauth(shadow_vcpu);
441 	if (ret)
442 		goto done;
443 
444 	if (test_bit(KVM_ARM_VCPU_SVE, shadow_vcpu->arch.features)) {
445 		size_t sve_state_size;
446 		void *sve_state;
447 
448 		shadow_vcpu->arch.sve_state =
449 			READ_ONCE(host_vcpu->arch.sve_state);
450 		shadow_vcpu->arch.sve_max_vl =
451 			READ_ONCE(host_vcpu->arch.sve_max_vl);
452 
453 		sve_state = kern_hyp_va(shadow_vcpu->arch.sve_state);
454 		sve_state_size = vcpu_sve_state_size(shadow_vcpu);
455 
456 		if (!shadow_vcpu->arch.sve_state || !sve_state_size ||
457 		    hyp_pin_shared_mem(sve_state, sve_state + sve_state_size)) {
458 			clear_bit(KVM_ARM_VCPU_SVE, shadow_vcpu->arch.features);
459 			shadow_vcpu->arch.sve_state = NULL;
460 			shadow_vcpu->arch.sve_max_vl = 0;
461 			ret = -EINVAL;
462 			goto done;
463 		}
464 	}
465 
466 	if (vm->arch.pkvm.enabled)
467 		pkvm_vcpu_init_traps(shadow_vcpu);
468 	kvm_reset_pvm_sys_regs(shadow_vcpu);
469 
470 	vm->vcpus[vcpu_idx] = shadow_vcpu;
471 	shadow_state->vm = vm;
472 
473 	shadow_vcpu->arch.hw_mmu = &vm->arch.mmu;
474 	shadow_vcpu->arch.pkvm.shadow_vm = vm;
475 	shadow_vcpu->arch.power_off = true;
476 
477 	if (test_bit(KVM_ARM_VCPU_POWER_OFF, shadow_vcpu->arch.features)) {
478 		shadow_vcpu->arch.pkvm.power_state =
479 			PSCI_0_2_AFFINITY_LEVEL_OFF;
480 	} else if (pvm_has_pvmfw(vm)) {
481 		if (vm->pvmfw_entry_vcpu) {
482 			ret = -EINVAL;
483 			goto done;
484 		}
485 
486 		vm->pvmfw_entry_vcpu = shadow_vcpu;
487 		shadow_vcpu->arch.reset_state.reset = true;
488 		shadow_vcpu->arch.pkvm.power_state =
489 			PSCI_0_2_AFFINITY_LEVEL_ON_PENDING;
490 	} else {
491 		struct vcpu_reset_state *reset_state =
492 			&shadow_vcpu->arch.reset_state;
493 
494 		reset_state->pc = *vcpu_pc(host_vcpu);
495 		reset_state->r0 = vcpu_get_reg(host_vcpu, 0);
496 		reset_state->reset = true;
497 		shadow_vcpu->arch.pkvm.power_state =
498 			PSCI_0_2_AFFINITY_LEVEL_ON_PENDING;
499 	}
500 
501 done:
502 	if (ret)
503 		unpin_host_vcpu(shadow_state);
504 
505 	return ret;
506 }
507 
__exists_shadow(struct kvm * host_kvm)508 static bool __exists_shadow(struct kvm *host_kvm)
509 {
510 	int i;
511 	int num_checked = 0;
512 
513 	for (i = 0; i < KVM_MAX_PVMS && num_checked < num_shadow_entries; i++) {
514 		if (!shadow_table[i])
515 			continue;
516 
517 		if (unlikely(shadow_table[i]->host_kvm == host_kvm))
518 			return true;
519 
520 		num_checked++;
521 	}
522 
523 	return false;
524 }
525 
526 /*
527  * Allocate a shadow table entry and insert a pointer to the shadow vm.
528  *
529  * Return a unique handle to the protected VM on success,
530  * negative error code on failure.
531  */
insert_shadow_table(struct kvm * kvm,struct kvm_shadow_vm * vm,size_t shadow_size)532 static int insert_shadow_table(struct kvm *kvm, struct kvm_shadow_vm *vm,
533 			       size_t shadow_size)
534 {
535 	struct kvm_s2_mmu *mmu = &vm->arch.mmu;
536 	int shadow_handle;
537 	int vmid;
538 
539 	hyp_assert_lock_held(&shadow_lock);
540 
541 	if (unlikely(num_shadow_entries >= KVM_MAX_PVMS))
542 		return -ENOMEM;
543 
544 	/*
545 	 * Initializing protected state might have failed, yet a malicious host
546 	 * could trigger this function. Thus, ensure that shadow_table exists.
547 	 */
548 	if (unlikely(!shadow_table))
549 		return -EINVAL;
550 
551 	/* Check that a shadow hasn't been created before for this host KVM. */
552 	if (unlikely(__exists_shadow(kvm)))
553 		return -EEXIST;
554 
555 	/* Find the next free entry in the shadow table. */
556 	while (shadow_table[next_shadow_alloc])
557 		next_shadow_alloc = (next_shadow_alloc + 1) % KVM_MAX_PVMS;
558 	shadow_handle = index_to_shadow_handle(next_shadow_alloc);
559 
560 	vm->shadow_handle = shadow_handle;
561 	vm->shadow_area_size = shadow_size;
562 
563 	/* VMID 0 is reserved for the host */
564 	vmid = next_shadow_alloc + 1;
565 	if (vmid > 0xff)
566 		return -ENOMEM;
567 
568 	mmu->vmid.vmid = vmid;
569 	mmu->vmid.vmid_gen = 0;
570 	mmu->arch = &vm->arch;
571 	mmu->pgt = &vm->pgt;
572 
573 	shadow_table[next_shadow_alloc] = vm;
574 	next_shadow_alloc = (next_shadow_alloc + 1) % KVM_MAX_PVMS;
575 	num_shadow_entries++;
576 
577 	return shadow_handle;
578 }
579 
580 /*
581  * Deallocate and remove the shadow table entry corresponding to the handle.
582  */
remove_shadow_table(int shadow_handle)583 static void remove_shadow_table(int shadow_handle)
584 {
585 	hyp_assert_lock_held(&shadow_lock);
586 	shadow_table[shadow_handle_to_index(shadow_handle)] = NULL;
587 	num_shadow_entries--;
588 }
589 
pkvm_get_shadow_size(int num_vcpus)590 static size_t pkvm_get_shadow_size(int num_vcpus)
591 {
592 	/* Shadow space for the vm struct and all of its vcpu states. */
593 	return sizeof(struct kvm_shadow_vm) +
594 	       sizeof(struct shadow_vcpu_state *) * num_vcpus;
595 }
596 
597 /*
598  * Check whether the size of the area donated by the host is sufficient for
599  * the shadow structues required for nr_vcpus as well as the shadow vm.
600  */
check_shadow_size(int nr_vcpus,size_t shadow_size)601 static int check_shadow_size(int nr_vcpus, size_t shadow_size)
602 {
603 	if (nr_vcpus < 1 || nr_vcpus > KVM_MAX_VCPUS)
604 		return -EINVAL;
605 
606 	/*
607 	 * Shadow size is rounded up when allocated and donated by the host,
608 	 * so it's likely to be larger than the sum of the struct sizes.
609 	 */
610 	if (shadow_size < pkvm_get_shadow_size(nr_vcpus))
611 		return -EINVAL;
612 
613 	return 0;
614 }
615 
drain_shadow_vcpus(struct shadow_vcpu_state * shadow_vcpus[],unsigned int nr_vcpus,struct kvm_hyp_memcache * mc)616 static void drain_shadow_vcpus(struct shadow_vcpu_state *shadow_vcpus[],
617 			       unsigned int nr_vcpus,
618 			       struct kvm_hyp_memcache *mc)
619 {
620 	int i;
621 
622 	for (i = 0; i < nr_vcpus; i++) {
623 		struct kvm_vcpu *shadow_vcpu = &shadow_vcpus[i]->vcpu;
624 		struct kvm_hyp_memcache *vcpu_mc = &shadow_vcpu->arch.pkvm_memcache;
625 		void *addr;
626 
627 		while (vcpu_mc->nr_pages) {
628 			addr = pop_hyp_memcache(vcpu_mc, hyp_phys_to_virt);
629 			push_hyp_memcache(mc, addr, hyp_virt_to_phys);
630 			WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(addr), 1));
631 		}
632 	}
633 }
634 
635 /*
636  * Initialize the shadow copy of the protected VM state using the memory
637  * donated by the host.
638  *
639  * Unmaps the donated memory from the host at stage 2.
640  *
641  * kvm: A pointer to the host's struct kvm (host va).
642  * shadow_va: The host va of the area being donated for the shadow state.
643  * 	      Must be page aligned.
644  * shadow_size: The size of the area being donated for the shadow state.
645  * 		Must be a multiple of the page size.
646  * pgd: The host va of the area being donated for the stage-2 PGD for the VM.
647  * 	Must be page aligned. Its size is implied by the VM's VTCR.
648  *
649  * Return a unique handle to the protected VM on success,
650  * negative error code on failure.
651  */
__pkvm_init_shadow(struct kvm * kvm,void * shadow_va,size_t shadow_size,void * pgd)652 int __pkvm_init_shadow(struct kvm *kvm,
653 		       void *shadow_va,
654 		       size_t shadow_size,
655 		       void *pgd)
656 {
657 	struct kvm_shadow_vm *vm = kern_hyp_va(shadow_va);
658 	phys_addr_t shadow_pa = hyp_virt_to_phys(vm);
659 	u64 pfn = hyp_phys_to_pfn(shadow_pa);
660 	u64 nr_shadow_pages = shadow_size >> PAGE_SHIFT;
661 	u64 nr_pgd_pages;
662 	size_t pgd_size;
663 	int nr_vcpus = 0;
664 	int ret = 0;
665 
666 	/* Check that the donated memory is aligned to page boundaries. */
667 	if (!PAGE_ALIGNED(shadow_va) ||
668 	    !PAGE_ALIGNED(shadow_size) ||
669 	    !PAGE_ALIGNED(pgd))
670 		return -EINVAL;
671 
672 	kvm = kern_hyp_va(kvm);
673 	pgd = kern_hyp_va(pgd);
674 
675 	ret = hyp_pin_shared_mem(kvm, kvm + 1);
676 	if (ret)
677 		return ret;
678 
679 	/* Ensure the host has donated enough memory for the shadow structs. */
680 	nr_vcpus = kvm->created_vcpus;
681 	ret = check_shadow_size(nr_vcpus, shadow_size);
682 	if (ret)
683 		goto err;
684 
685 	ret = __pkvm_host_donate_hyp(pfn, nr_shadow_pages);
686 	if (ret)
687 		goto err;
688 
689 	/* Ensure we're working with a clean slate. */
690 	memset(vm, 0, shadow_size);
691 
692 	vm->arch.vtcr = host_kvm.arch.vtcr;
693 	pgd_size = kvm_pgtable_stage2_pgd_size(host_kvm.arch.vtcr);
694 	nr_pgd_pages = pgd_size >> PAGE_SHIFT;
695 	ret = __pkvm_host_donate_hyp(hyp_virt_to_pfn(pgd), nr_pgd_pages);
696 	if (ret)
697 		goto err_remove_mappings;
698 
699 	init_shadow_vm(kvm, vm, nr_vcpus);
700 
701 	/* Add the entry to the shadow table. */
702 	hyp_spin_lock(&shadow_lock);
703 	ret = insert_shadow_table(kvm, vm, shadow_size);
704 	if (ret < 0)
705 		goto err_unlock;
706 
707 	ret = kvm_guest_prepare_stage2(vm, pgd);
708 	if (ret)
709 		goto err_remove_shadow_table;
710 
711 	hyp_spin_unlock(&shadow_lock);
712 	return vm->shadow_handle;
713 
714 err_remove_shadow_table:
715 	remove_shadow_table(vm->shadow_handle);
716 err_unlock:
717 	hyp_spin_unlock(&shadow_lock);
718 	WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(pgd), nr_pgd_pages));
719 err_remove_mappings:
720 	/* Clear the donated shadow memory on failure to avoid data leaks. */
721 	memset(vm, 0, shadow_size);
722 	WARN_ON(__pkvm_hyp_donate_host(hyp_phys_to_pfn(shadow_pa),
723 				       shadow_size >> PAGE_SHIFT));
724 err:
725 	hyp_unpin_shared_mem(kvm, kvm + 1);
726 	return ret;
727 }
728 
729 /*
730  * Initialize the protected vcpu state shadow copy in host-donated memory.
731  *
732  * shadow_handle: The handle for the protected vm.
733  * host_vcpu: A pointer to the corresponding host vcpu (host va).
734  * shadow_vcpu_hva: The host va of the area being donated for the vcpu state.
735  *                 Must be page aligned. The size of the area must be equal to
736  *                 the paged-aligned size of kvm_shadow_vcpu_state.
737  *
738  * Return 0 on success, negative error code on failure.
739  */
__pkvm_init_shadow_vcpu(unsigned int shadow_handle,struct kvm_vcpu * host_vcpu,void * shadow_vcpu_hva)740 int __pkvm_init_shadow_vcpu(unsigned int shadow_handle,
741 			    struct kvm_vcpu *host_vcpu,
742 			    void *shadow_vcpu_hva)
743 {
744 	struct kvm_shadow_vm *vm;
745 	struct shadow_vcpu_state *shadow_state = kern_hyp_va(shadow_vcpu_hva);
746 	size_t vcpu_state_sz = sizeof(*shadow_state);
747 	u64 nr_pages = PAGE_ALIGN(vcpu_state_sz) >> PAGE_SHIFT;
748 	unsigned int idx;
749 	int ret;
750 
751 	if (!PAGE_ALIGNED(shadow_vcpu_hva))
752 		return -EINVAL;
753 
754 	ret = __pkvm_host_donate_hyp(hyp_virt_to_pfn(shadow_state),
755 				     nr_pages);
756 	if (ret)
757 		return ret;
758 
759 	memset(shadow_state, 0, vcpu_state_sz);
760 
761 	hyp_spin_lock(&shadow_lock);
762 
763 	vm = find_shadow_by_handle(shadow_handle);
764 	if (!vm) {
765 		ret = -ENOENT;
766 		goto unlock;
767 	}
768 
769 	idx = vm->nr_vcpus;
770 	if (idx >= vm->created_vcpus) {
771 		ret = -EINVAL;
772 		goto unlock;
773 	}
774 
775 	ret = init_shadow_vcpu(shadow_state, host_vcpu, vm, idx);
776 	if (ret)
777 		goto unlock;
778 
779 	vm->shadow_vcpus[idx] = shadow_state;
780 	vm->nr_vcpus++;
781 unlock:
782 	hyp_spin_unlock(&shadow_lock);
783 
784 	if (ret) {
785 		memset(shadow_state, 0, vcpu_state_sz);
786 		WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(shadow_state),
787 					       nr_pages));
788 	}
789 
790 	return ret;
791 }
792 
teardown_donated_memory(struct kvm_hyp_memcache * mc,void * addr,size_t size)793 static void teardown_donated_memory(struct kvm_hyp_memcache *mc, void *addr,
794 				    size_t size)
795 {
796 	u64 pfn = hyp_phys_to_pfn(__hyp_pa(addr));
797 	u64 nr_pages = size >> PAGE_SHIFT;
798 	void *start;
799 
800 	memset(addr, 0, size);
801 	kvm_flush_dcache_to_poc(addr, size);
802 
803 	for (start = addr; start < addr + size; start += PAGE_SIZE)
804 		push_hyp_memcache(mc, start, hyp_virt_to_phys);
805 
806 	WARN_ON(__pkvm_hyp_donate_host(pfn, nr_pages));
807 }
808 
__pkvm_teardown_shadow(int shadow_handle)809 int __pkvm_teardown_shadow(int shadow_handle)
810 {
811 	struct kvm_hyp_memcache *mc;
812 	struct kvm_shadow_vm *vm;
813 	struct kvm *host_kvm;
814 	unsigned int nr_vcpus;
815 	int err;
816 	int i;
817 
818 	/* Lookup then remove entry from the shadow table. */
819 	hyp_spin_lock(&shadow_lock);
820 	vm = find_shadow_by_handle(shadow_handle);
821 	if (!vm) {
822 		err = -ENOENT;
823 		goto err_unlock;
824 	}
825 
826 	if (WARN_ON(hyp_page_count(vm))) {
827 		err = -EBUSY;
828 		goto err_unlock;
829 	}
830 
831 	host_kvm = vm->host_kvm;
832 	nr_vcpus = vm->nr_vcpus;
833 
834 	/*
835 	 * Clear the tracking for last_loaded_vcpu for all cpus for this vm in
836 	 * case the same addresses for those vcpus are reused for future vms.
837 	 */
838 	for (i = 0; i < hyp_nr_cpus; i++) {
839 		struct kvm_vcpu **last_loaded_vcpu_ptr =
840 			per_cpu_ptr(&last_loaded_vcpu, i);
841 		struct kvm_vcpu *vcpu = *last_loaded_vcpu_ptr;
842 
843 		if (vcpu && vcpu->arch.pkvm.shadow_vm == vm)
844 			*last_loaded_vcpu_ptr = NULL;
845 	}
846 
847 	/* Ensure the VMID is clean before it can be reallocated */
848 	__kvm_tlb_flush_vmid(&vm->arch.mmu);
849 	remove_shadow_table(shadow_handle);
850 	hyp_spin_unlock(&shadow_lock);
851 
852 	/* Reclaim guest pages, and page-table pages */
853 	mc = &host_kvm->arch.pkvm.teardown_mc;
854 	reclaim_guest_pages(vm, mc);
855 	drain_shadow_vcpus(vm->shadow_vcpus, nr_vcpus, mc);
856 	unpin_host_vcpus(vm->shadow_vcpus, nr_vcpus);
857 
858 	for (i = 0; i < nr_vcpus; i++)
859 		teardown_donated_memory(mc, vm->shadow_vcpus[i],
860 					PAGE_ALIGN(sizeof(vm->shadow_vcpus[i])));
861 	teardown_donated_memory(mc, vm, vm->shadow_area_size);
862 
863 	hyp_unpin_shared_mem(host_kvm, host_kvm + 1);
864 	return 0;
865 
866 err_unlock:
867 	hyp_spin_unlock(&shadow_lock);
868 	return err;
869 }
870 
pkvm_load_pvmfw_pages(struct kvm_shadow_vm * vm,u64 ipa,phys_addr_t phys,u64 size)871 int pkvm_load_pvmfw_pages(struct kvm_shadow_vm *vm, u64 ipa, phys_addr_t phys,
872 			  u64 size)
873 {
874 	struct kvm_protected_vm *pkvm = &vm->arch.pkvm;
875 	u64 npages, offset = ipa - pkvm->pvmfw_load_addr;
876 	void *src = hyp_phys_to_virt(pvmfw_base) + offset;
877 
878 	if (offset >= pvmfw_size)
879 		return -EINVAL;
880 
881 	size = min(size, pvmfw_size - offset);
882 	if (!PAGE_ALIGNED(size) || !PAGE_ALIGNED(src))
883 		return -EINVAL;
884 
885 	npages = size >> PAGE_SHIFT;
886 	while (npages--) {
887 		void *dst;
888 
889 		dst = hyp_fixmap_map(phys);
890 		if (!dst)
891 			return -EINVAL;
892 
893 		/*
894 		 * No need for cache maintenance here, as the pgtable code will
895 		 * take care of this when installing the pte in the guest's
896 		 * stage-2 page table.
897 		 */
898 		memcpy(dst, src, PAGE_SIZE);
899 
900 		hyp_fixmap_unmap();
901 		src += PAGE_SIZE;
902 		phys += PAGE_SIZE;
903 	}
904 
905 	return 0;
906 }
907 
pkvm_clear_pvmfw_pages(void)908 void pkvm_clear_pvmfw_pages(void)
909 {
910 	void *addr = hyp_phys_to_virt(pvmfw_base);
911 
912 	memset(addr, 0, pvmfw_size);
913 	kvm_flush_dcache_to_poc(addr, pvmfw_size);
914 }
915 
916 /*
917  * This function sets the registers on the vcpu to their architecturally defined
918  * reset values.
919  *
920  * Note: Can only be called by the vcpu on itself, after it has been turned on.
921  */
pkvm_reset_vcpu(struct kvm_vcpu * vcpu)922 void pkvm_reset_vcpu(struct kvm_vcpu *vcpu)
923 {
924 	struct vcpu_reset_state *reset_state = &vcpu->arch.reset_state;
925 	struct kvm_shadow_vm *vm = vcpu->arch.pkvm.shadow_vm;
926 
927 	WARN_ON(!reset_state->reset);
928 
929 	init_ptrauth(vcpu);
930 
931 	/* Reset core registers */
932 	memset(vcpu_gp_regs(vcpu), 0, sizeof(*vcpu_gp_regs(vcpu)));
933 	memset(&vcpu->arch.ctxt.fp_regs, 0, sizeof(vcpu->arch.ctxt.fp_regs));
934 	vcpu_gp_regs(vcpu)->pstate = VCPU_RESET_PSTATE_EL1;
935 
936 	/* Reset system registers */
937 	kvm_reset_pvm_sys_regs(vcpu);
938 
939 	/* Propagate initiator's endianness, after kvm_reset_pvm_sys_regs. */
940 	if (reset_state->be)
941 		kvm_vcpu_set_be(vcpu);
942 
943 	if (vm->pvmfw_entry_vcpu == vcpu) {
944 		struct kvm_vcpu *host_vcpu = vcpu->arch.pkvm.host_vcpu;
945 		u64 entry = vm->arch.pkvm.pvmfw_load_addr;
946 		int i;
947 
948 		/* X0 - X14 provided by the VMM (preserved) */
949 		for (i = 0; i <= 14; ++i)
950 			vcpu_set_reg(vcpu, i, vcpu_get_reg(host_vcpu, i));
951 
952 		/* X15: Boot protocol version */
953 		vcpu_set_reg(vcpu, 15, 0);
954 
955 		/* PC: IPA of pvmfw base */
956 		*vcpu_pc(vcpu) = entry;
957 
958 		vm->pvmfw_entry_vcpu = NULL;
959 
960 		/* Auto enroll MMIO guard */
961 		set_bit(KVM_ARCH_FLAG_MMIO_GUARD,
962 			&vcpu->arch.pkvm.shadow_vm->arch.flags);
963 	} else {
964 		*vcpu_pc(vcpu) = reset_state->pc;
965 		vcpu_set_reg(vcpu, 0, reset_state->r0);
966 	}
967 
968 	reset_state->reset = false;
969 
970 	vcpu->arch.pkvm.exit_code = 0;
971 
972 	WARN_ON(vcpu->arch.pkvm.power_state != PSCI_0_2_AFFINITY_LEVEL_ON_PENDING);
973 	WRITE_ONCE(vcpu->arch.power_off, false);
974 	WRITE_ONCE(vcpu->arch.pkvm.power_state, PSCI_0_2_AFFINITY_LEVEL_ON);
975 }
976 
pvm_mpidr_to_vcpu(struct kvm_shadow_vm * vm,unsigned long mpidr)977 struct kvm_vcpu *pvm_mpidr_to_vcpu(struct kvm_shadow_vm *vm, unsigned long mpidr)
978 {
979 	struct kvm_vcpu *vcpu;
980 	int i;
981 
982 	mpidr &= MPIDR_HWID_BITMASK;
983 
984 	for (i = 0; i < READ_ONCE(vm->nr_vcpus); i++) {
985 		vcpu = vm->vcpus[i];
986 
987 		if (mpidr == kvm_vcpu_get_mpidr_aff(vcpu))
988 			return vcpu;
989 	}
990 
991 	return NULL;
992 }
993 
994 /*
995  * Returns true if the hypervisor handled PSCI call, and control should go back
996  * to the guest, or false if the host needs to do some additional work (i.e.,
997  * wake up the vcpu).
998  */
pvm_psci_vcpu_on(struct kvm_vcpu * source_vcpu)999 static bool pvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
1000 {
1001 	struct kvm_shadow_vm *vm = source_vcpu->arch.pkvm.shadow_vm;
1002 	struct kvm_vcpu *vcpu;
1003 	struct vcpu_reset_state *reset_state;
1004 	unsigned long cpu_id;
1005 	unsigned long hvc_ret_val;
1006 	int power_state;
1007 
1008 	cpu_id = smccc_get_arg1(source_vcpu);
1009 	if (!kvm_psci_valid_affinity(source_vcpu, cpu_id)) {
1010 		hvc_ret_val = PSCI_RET_INVALID_PARAMS;
1011 		goto error;
1012 	}
1013 
1014 	vcpu = pvm_mpidr_to_vcpu(vm, cpu_id);
1015 
1016 	/* Make sure the caller requested a valid vcpu. */
1017 	if (!vcpu) {
1018 		hvc_ret_val = PSCI_RET_INVALID_PARAMS;
1019 		goto error;
1020 	}
1021 
1022 	/*
1023 	 * Make sure the requested vcpu is not on to begin with.
1024 	 * Atomic to avoid race between vcpus trying to power on the same vcpu.
1025 	 */
1026 	power_state = cmpxchg(&vcpu->arch.pkvm.power_state,
1027 		PSCI_0_2_AFFINITY_LEVEL_OFF,
1028 		PSCI_0_2_AFFINITY_LEVEL_ON_PENDING);
1029 	switch (power_state) {
1030 	case PSCI_0_2_AFFINITY_LEVEL_ON_PENDING:
1031 		hvc_ret_val = PSCI_RET_ON_PENDING;
1032 		goto error;
1033 	case PSCI_0_2_AFFINITY_LEVEL_ON:
1034 		hvc_ret_val = PSCI_RET_ALREADY_ON;
1035 		goto error;
1036 	case PSCI_0_2_AFFINITY_LEVEL_OFF:
1037 		break;
1038 	default:
1039 		hvc_ret_val = PSCI_RET_INTERNAL_FAILURE;
1040 		goto error;
1041 	}
1042 
1043 	reset_state = &vcpu->arch.reset_state;
1044 
1045 	reset_state->pc = smccc_get_arg2(source_vcpu);
1046 	reset_state->r0 = smccc_get_arg3(source_vcpu);
1047 
1048 	/* Propagate caller endianness */
1049 	reset_state->be = kvm_vcpu_is_be(source_vcpu);
1050 
1051 	reset_state->reset = true;
1052 
1053 	/*
1054 	 * Return to the host, which should make the KVM_REQ_VCPU_RESET request
1055 	 * as well as kvm_vcpu_wake_up() to schedule the vcpu.
1056 	 */
1057 	return false;
1058 
1059 error:
1060 	/* If there's an error go back straight to the guest. */
1061 	smccc_set_retval(source_vcpu, hvc_ret_val, 0, 0, 0);
1062 	return true;
1063 }
1064 
pvm_psci_vcpu_affinity_info(struct kvm_vcpu * vcpu)1065 static bool pvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
1066 {
1067 	int i, matching_cpus = 0;
1068 	unsigned long mpidr;
1069 	unsigned long target_affinity;
1070 	unsigned long target_affinity_mask;
1071 	unsigned long lowest_affinity_level;
1072 	struct kvm_shadow_vm *vm = vcpu->arch.pkvm.shadow_vm;
1073 	struct kvm_vcpu *tmp;
1074 	unsigned long hvc_ret_val;
1075 
1076 	target_affinity = smccc_get_arg1(vcpu);
1077 	lowest_affinity_level = smccc_get_arg2(vcpu);
1078 
1079 	if (!kvm_psci_valid_affinity(vcpu, target_affinity)) {
1080 		hvc_ret_val = PSCI_RET_INVALID_PARAMS;
1081 		goto done;
1082 	}
1083 
1084 	/* Determine target affinity mask */
1085 	target_affinity_mask = psci_affinity_mask(lowest_affinity_level);
1086 	if (!target_affinity_mask) {
1087 		hvc_ret_val = PSCI_RET_INVALID_PARAMS;
1088 		goto done;
1089 	}
1090 
1091 	/* Ignore other bits of target affinity */
1092 	target_affinity &= target_affinity_mask;
1093 
1094 	hvc_ret_val = PSCI_0_2_AFFINITY_LEVEL_OFF;
1095 
1096 	/*
1097 	 * If at least one vcpu matching target affinity is ON then return ON,
1098 	 * then if at least one is PENDING_ON then return PENDING_ON.
1099 	 * Otherwise, return OFF.
1100 	 */
1101 	for (i = 0; i < READ_ONCE(vm->nr_vcpus); i++) {
1102 		tmp = vm->vcpus[i];
1103 		mpidr = kvm_vcpu_get_mpidr_aff(tmp);
1104 
1105 		if ((mpidr & target_affinity_mask) == target_affinity) {
1106 			int power_state;
1107 
1108 			matching_cpus++;
1109 			power_state = READ_ONCE(tmp->arch.pkvm.power_state);
1110 			switch (power_state) {
1111 			case PSCI_0_2_AFFINITY_LEVEL_ON_PENDING:
1112 				hvc_ret_val = PSCI_0_2_AFFINITY_LEVEL_ON_PENDING;
1113 				break;
1114 			case PSCI_0_2_AFFINITY_LEVEL_ON:
1115 				hvc_ret_val = PSCI_0_2_AFFINITY_LEVEL_ON;
1116 				goto done;
1117 			case PSCI_0_2_AFFINITY_LEVEL_OFF:
1118 				break;
1119 			default:
1120 				hvc_ret_val = PSCI_RET_INTERNAL_FAILURE;
1121 				goto done;
1122 			}
1123 		}
1124 	}
1125 
1126 	if (!matching_cpus)
1127 		hvc_ret_val = PSCI_RET_INVALID_PARAMS;
1128 
1129 done:
1130 	/* Nothing to be handled by the host. Go back to the guest. */
1131 	smccc_set_retval(vcpu, hvc_ret_val, 0, 0, 0);
1132 	return true;
1133 }
1134 
1135 /*
1136  * Returns true if the hypervisor has handled the PSCI call, and control should
1137  * go back to the guest, or false if the host needs to do some additional work
1138  * (e.g., turn off and update vcpu scheduling status).
1139  */
pvm_psci_vcpu_off(struct kvm_vcpu * vcpu)1140 static bool pvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
1141 {
1142 	WARN_ON(vcpu->arch.power_off);
1143 	WARN_ON(vcpu->arch.pkvm.power_state != PSCI_0_2_AFFINITY_LEVEL_ON);
1144 
1145 	WRITE_ONCE(vcpu->arch.power_off, true);
1146 	WRITE_ONCE(vcpu->arch.pkvm.power_state, PSCI_0_2_AFFINITY_LEVEL_OFF);
1147 
1148 	/* Return to the host so that it can finish powering off the vcpu. */
1149 	return false;
1150 }
1151 
pvm_psci_version(struct kvm_vcpu * vcpu)1152 static bool pvm_psci_version(struct kvm_vcpu *vcpu)
1153 {
1154 	/* Nothing to be handled by the host. Go back to the guest. */
1155 	smccc_set_retval(vcpu, KVM_ARM_PSCI_1_1, 0, 0, 0);
1156 	return true;
1157 }
1158 
pvm_psci_not_supported(struct kvm_vcpu * vcpu)1159 static bool pvm_psci_not_supported(struct kvm_vcpu *vcpu)
1160 {
1161 	/* Nothing to be handled by the host. Go back to the guest. */
1162 	smccc_set_retval(vcpu, PSCI_RET_NOT_SUPPORTED, 0, 0, 0);
1163 	return true;
1164 }
1165 
pvm_psci_features(struct kvm_vcpu * vcpu)1166 static bool pvm_psci_features(struct kvm_vcpu *vcpu)
1167 {
1168 	u32 feature = smccc_get_arg1(vcpu);
1169 	unsigned long val;
1170 
1171 	switch (feature) {
1172 	case PSCI_0_2_FN_PSCI_VERSION:
1173 	case PSCI_0_2_FN_CPU_SUSPEND:
1174 	case PSCI_0_2_FN64_CPU_SUSPEND:
1175 	case PSCI_0_2_FN_CPU_OFF:
1176 	case PSCI_0_2_FN_CPU_ON:
1177 	case PSCI_0_2_FN64_CPU_ON:
1178 	case PSCI_0_2_FN_AFFINITY_INFO:
1179 	case PSCI_0_2_FN64_AFFINITY_INFO:
1180 	case PSCI_0_2_FN_SYSTEM_OFF:
1181 	case PSCI_0_2_FN_SYSTEM_RESET:
1182 	case PSCI_1_0_FN_PSCI_FEATURES:
1183 	case PSCI_1_1_FN_SYSTEM_RESET2:
1184 	case PSCI_1_1_FN64_SYSTEM_RESET2:
1185 	case ARM_SMCCC_VERSION_FUNC_ID:
1186 		val = PSCI_RET_SUCCESS;
1187 		break;
1188 	default:
1189 		val = PSCI_RET_NOT_SUPPORTED;
1190 		break;
1191 	}
1192 
1193 	/* Nothing to be handled by the host. Go back to the guest. */
1194 	smccc_set_retval(vcpu, val, 0, 0, 0);
1195 	return true;
1196 }
1197 
pkvm_handle_psci(struct kvm_vcpu * vcpu)1198 static bool pkvm_handle_psci(struct kvm_vcpu *vcpu)
1199 {
1200 	u32 psci_fn = smccc_get_function(vcpu);
1201 
1202 	switch (psci_fn) {
1203 	case PSCI_0_2_FN_CPU_ON:
1204 		kvm_psci_narrow_to_32bit(vcpu);
1205 		fallthrough;
1206 	case PSCI_0_2_FN64_CPU_ON:
1207 		return pvm_psci_vcpu_on(vcpu);
1208 	case PSCI_0_2_FN_CPU_OFF:
1209 		return pvm_psci_vcpu_off(vcpu);
1210 	case PSCI_0_2_FN_AFFINITY_INFO:
1211 		kvm_psci_narrow_to_32bit(vcpu);
1212 		fallthrough;
1213 	case PSCI_0_2_FN64_AFFINITY_INFO:
1214 		return pvm_psci_vcpu_affinity_info(vcpu);
1215 	case PSCI_0_2_FN_PSCI_VERSION:
1216 		return pvm_psci_version(vcpu);
1217 	case PSCI_1_0_FN_PSCI_FEATURES:
1218 		return pvm_psci_features(vcpu);
1219 	case PSCI_0_2_FN_SYSTEM_RESET:
1220 	case PSCI_0_2_FN_CPU_SUSPEND:
1221 	case PSCI_0_2_FN64_CPU_SUSPEND:
1222 	case PSCI_0_2_FN_SYSTEM_OFF:
1223 	case PSCI_1_1_FN_SYSTEM_RESET2:
1224 	case PSCI_1_1_FN64_SYSTEM_RESET2:
1225 		return false; /* Handled by the host. */
1226 	default:
1227 		break;
1228 	}
1229 
1230 	return pvm_psci_not_supported(vcpu);
1231 }
1232 
__pkvm_memshare_page_req(struct kvm_vcpu * vcpu,u64 ipa)1233 static u64 __pkvm_memshare_page_req(struct kvm_vcpu *vcpu, u64 ipa)
1234 {
1235 	u64 elr;
1236 
1237 	/* Fake up a data abort (Level 3 translation fault on write) */
1238 	vcpu->arch.fault.esr_el2 = (u32)ESR_ELx_EC_DABT_LOW << ESR_ELx_EC_SHIFT |
1239 				   ESR_ELx_WNR | ESR_ELx_FSC_FAULT |
1240 				   FIELD_PREP(ESR_ELx_FSC_LEVEL, 3);
1241 
1242 	/* Shuffle the IPA around into the HPFAR */
1243 	vcpu->arch.fault.hpfar_el2 = (ipa >> 8) & HPFAR_MASK;
1244 
1245 	/* This is a virtual address. 0's good. Let's go with 0. */
1246 	vcpu->arch.fault.far_el2 = 0;
1247 
1248 	/* Rewind the ELR so we return to the HVC once the IPA is mapped */
1249 	elr = read_sysreg(elr_el2);
1250 	elr -=4;
1251 	write_sysreg(elr, elr_el2);
1252 
1253 	return ARM_EXCEPTION_TRAP;
1254 }
1255 
pkvm_memshare_call(struct kvm_vcpu * vcpu,u64 * exit_code)1256 static bool pkvm_memshare_call(struct kvm_vcpu *vcpu, u64 *exit_code)
1257 {
1258 	u64 ipa = smccc_get_arg1(vcpu);
1259 	u64 arg2 = smccc_get_arg2(vcpu);
1260 	u64 arg3 = smccc_get_arg3(vcpu);
1261 	int err;
1262 
1263 	if (arg2 || arg3)
1264 		goto out_guest_err;
1265 
1266 	err = __pkvm_guest_share_host(vcpu, ipa);
1267 	switch (err) {
1268 	case 0:
1269 		/* Success! Now tell the host. */
1270 		goto out_host;
1271 	case -EFAULT:
1272 		/*
1273 		 * Convert the exception into a data abort so that the page
1274 		 * being shared is mapped into the guest next time.
1275 		 */
1276 		*exit_code = __pkvm_memshare_page_req(vcpu, ipa);
1277 		goto out_host;
1278 	}
1279 
1280 out_guest_err:
1281 	smccc_set_retval(vcpu, SMCCC_RET_INVALID_PARAMETER, 0, 0, 0);
1282 	return true;
1283 
1284 out_host:
1285 	return false;
1286 }
1287 
pkvm_memunshare_call(struct kvm_vcpu * vcpu)1288 static bool pkvm_memunshare_call(struct kvm_vcpu *vcpu)
1289 {
1290 	u64 ipa = smccc_get_arg1(vcpu);
1291 	u64 arg2 = smccc_get_arg2(vcpu);
1292 	u64 arg3 = smccc_get_arg3(vcpu);
1293 	int err;
1294 
1295 	if (arg2 || arg3)
1296 		goto out_guest_err;
1297 
1298 	err = __pkvm_guest_unshare_host(vcpu, ipa);
1299 	if (err)
1300 		goto out_guest_err;
1301 
1302 	return false;
1303 
1304 out_guest_err:
1305 	smccc_set_retval(vcpu, SMCCC_RET_INVALID_PARAMETER, 0, 0, 0);
1306 	return true;
1307 }
1308 
pkvm_install_ioguard_page(struct kvm_vcpu * vcpu,u64 * exit_code)1309 static bool pkvm_install_ioguard_page(struct kvm_vcpu *vcpu, u64 *exit_code)
1310 {
1311 	u64 retval = SMCCC_RET_SUCCESS;
1312 	u64 ipa = smccc_get_arg1(vcpu);
1313 	int ret;
1314 
1315 	ret = __pkvm_install_ioguard_page(vcpu, ipa);
1316 	if (ret == -ENOMEM) {
1317 		/*
1318 		 * We ran out of memcache, let's ask for more. Cancel
1319 		 * the effects of the HVC that took us here, and
1320 		 * forward the hypercall to the host for page donation
1321 		 * purposes.
1322 		 */
1323 		write_sysreg_el2(read_sysreg_el2(SYS_ELR) - 4, SYS_ELR);
1324 		return false;
1325 	}
1326 
1327 	if (ret)
1328 		retval = SMCCC_RET_INVALID_PARAMETER;
1329 
1330 	smccc_set_retval(vcpu, retval, 0, 0, 0);
1331 	return true;
1332 }
1333 
1334 bool smccc_trng_available;
1335 
pkvm_forward_trng(struct kvm_vcpu * vcpu)1336 static bool pkvm_forward_trng(struct kvm_vcpu *vcpu)
1337 {
1338 	u32 fn = smccc_get_function(vcpu);
1339 	struct arm_smccc_res res;
1340 	unsigned long arg1 = 0;
1341 
1342 	/*
1343 	 * Forward TRNG calls to EL3, as we can't trust the host to handle
1344 	 * these for us.
1345 	 */
1346 	switch (fn) {
1347 	case ARM_SMCCC_TRNG_FEATURES:
1348 	case ARM_SMCCC_TRNG_RND32:
1349 	case ARM_SMCCC_TRNG_RND64:
1350 		arg1 = smccc_get_arg1(vcpu);
1351 		fallthrough;
1352 	case ARM_SMCCC_TRNG_VERSION:
1353 	case ARM_SMCCC_TRNG_GET_UUID:
1354 		arm_smccc_1_1_smc(fn, arg1, &res);
1355 		smccc_set_retval(vcpu, res.a0, res.a1, res.a2, res.a3);
1356 		memzero_explicit(&res, sizeof(res));
1357 		break;
1358 	}
1359 
1360 	return true;
1361 }
1362 
1363 /*
1364  * Handler for protected VM HVC calls.
1365  *
1366  * Returns true if the hypervisor has handled the exit, and control should go
1367  * back to the guest, or false if it hasn't.
1368  */
kvm_handle_pvm_hvc64(struct kvm_vcpu * vcpu,u64 * exit_code)1369 bool kvm_handle_pvm_hvc64(struct kvm_vcpu *vcpu, u64 *exit_code)
1370 {
1371 	u32 fn = smccc_get_function(vcpu);
1372 	u64 val[4] = { SMCCC_RET_NOT_SUPPORTED };
1373 
1374 	switch (fn) {
1375 	case ARM_SMCCC_VERSION_FUNC_ID:
1376 		/* Nothing to be handled by the host. Go back to the guest. */
1377 		val[0] = ARM_SMCCC_VERSION_1_1;
1378 		break;
1379 	case ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID:
1380 		val[0] = ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0;
1381 		val[1] = ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1;
1382 		val[2] = ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_2;
1383 		val[3] = ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3;
1384 		break;
1385 	case ARM_SMCCC_VENDOR_HYP_KVM_FEATURES_FUNC_ID:
1386 		val[0] = BIT(ARM_SMCCC_KVM_FUNC_FEATURES);
1387 		val[0] |= BIT(ARM_SMCCC_KVM_FUNC_HYP_MEMINFO);
1388 		val[0] |= BIT(ARM_SMCCC_KVM_FUNC_MEM_SHARE);
1389 		val[0] |= BIT(ARM_SMCCC_KVM_FUNC_MEM_UNSHARE);
1390 		val[0] |= BIT(ARM_SMCCC_KVM_FUNC_MMIO_GUARD_INFO);
1391 		val[0] |= BIT(ARM_SMCCC_KVM_FUNC_MMIO_GUARD_ENROLL);
1392 		val[0] |= BIT(ARM_SMCCC_KVM_FUNC_MMIO_GUARD_MAP);
1393 		val[0] |= BIT(ARM_SMCCC_KVM_FUNC_MMIO_GUARD_UNMAP);
1394 		break;
1395 	case ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_ENROLL_FUNC_ID:
1396 		set_bit(KVM_ARCH_FLAG_MMIO_GUARD, &vcpu->arch.pkvm.shadow_vm->arch.flags);
1397 		val[0] = SMCCC_RET_SUCCESS;
1398 		break;
1399 	case ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_MAP_FUNC_ID:
1400 		return pkvm_install_ioguard_page(vcpu, exit_code);
1401 	case ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_UNMAP_FUNC_ID:
1402 		if (__pkvm_remove_ioguard_page(vcpu, vcpu_get_reg(vcpu, 1)))
1403 			val[0] = SMCCC_RET_INVALID_PARAMETER;
1404 		else
1405 			val[0] = SMCCC_RET_SUCCESS;
1406 		break;
1407 	case ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_INFO_FUNC_ID:
1408 	case ARM_SMCCC_VENDOR_HYP_KVM_HYP_MEMINFO_FUNC_ID:
1409 		if (smccc_get_arg1(vcpu) ||
1410 		    smccc_get_arg2(vcpu) ||
1411 		    smccc_get_arg3(vcpu)) {
1412 			val[0] = SMCCC_RET_INVALID_PARAMETER;
1413 		} else {
1414 			val[0] = PAGE_SIZE;
1415 		}
1416 		break;
1417 	case ARM_SMCCC_VENDOR_HYP_KVM_MEM_SHARE_FUNC_ID:
1418 		return pkvm_memshare_call(vcpu, exit_code);
1419 	case ARM_SMCCC_VENDOR_HYP_KVM_MEM_UNSHARE_FUNC_ID:
1420 		return pkvm_memunshare_call(vcpu);
1421 	case ARM_SMCCC_TRNG_VERSION ... ARM_SMCCC_TRNG_RND32:
1422 	case ARM_SMCCC_TRNG_RND64:
1423 		if (smccc_trng_available)
1424 			return pkvm_forward_trng(vcpu);
1425 		break;
1426 	default:
1427 		return pkvm_handle_psci(vcpu);
1428 	}
1429 
1430 	smccc_set_retval(vcpu, val[0], val[1], val[2], val[3]);
1431 	return true;
1432 }
1433