• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2020 Google LLC
4  * Author: Quentin Perret <qperret@google.com>
5  */
6 
7 #include <linux/kvm_host.h>
8 #include <asm/kvm_emulate.h>
9 #include <asm/kvm_hyp.h>
10 #include <asm/kvm_hypevents.h>
11 #include <asm/kvm_mmu.h>
12 #include <asm/kvm_pgtable.h>
13 #include <asm/kvm_pkvm.h>
14 #include <asm/stage2_pgtable.h>
15 
16 #include <hyp/fault.h>
17 
18 #include <nvhe/gfp.h>
19 #include <nvhe/iommu.h>
20 #include <nvhe/memory.h>
21 #include <nvhe/mem_protect.h>
22 #include <nvhe/mm.h>
23 #include <nvhe/modules.h>
24 
25 #define KVM_HOST_S2_FLAGS (KVM_PGTABLE_S2_NOFWB | \
26 			   KVM_PGTABLE_S2_IDMAP | \
27 			   KVM_PGTABLE_S2_PREFAULT_BLOCK)
28 
29 struct host_mmu host_mmu;
30 
31 struct pkvm_moveable_reg pkvm_moveable_regs[PKVM_NR_MOVEABLE_REGS];
32 unsigned int pkvm_moveable_regs_nr;
33 
34 static struct hyp_pool host_s2_pool;
35 
36 static DEFINE_PER_CPU(struct pkvm_hyp_vm *, __current_vm);
37 #define current_vm (*this_cpu_ptr(&__current_vm))
38 
39 static struct kvm_pgtable_pte_ops host_s2_pte_ops;
40 static bool host_stage2_force_pte(u64 addr, u64 end, enum kvm_pgtable_prot prot);
41 static bool host_stage2_pte_is_counted(kvm_pte_t pte, u32 level);
42 static bool guest_stage2_force_pte_cb(u64 addr, u64 end,
43 				      enum kvm_pgtable_prot prot);
44 static bool guest_stage2_pte_is_counted(kvm_pte_t pte, u32 level);
45 
46 static struct kvm_pgtable_pte_ops guest_s2_pte_ops = {
47 	.force_pte_cb = guest_stage2_force_pte_cb,
48 	.pte_is_counted_cb = guest_stage2_pte_is_counted
49 };
50 
guest_lock_component(struct pkvm_hyp_vm * vm)51 static void guest_lock_component(struct pkvm_hyp_vm *vm)
52 {
53 	hyp_spin_lock(&vm->pgtable_lock);
54 	current_vm = vm;
55 }
56 
guest_unlock_component(struct pkvm_hyp_vm * vm)57 static void guest_unlock_component(struct pkvm_hyp_vm *vm)
58 {
59 	current_vm = NULL;
60 	hyp_spin_unlock(&vm->pgtable_lock);
61 }
62 
host_lock_component(void)63 static void host_lock_component(void)
64 {
65 	hyp_spin_lock(&host_mmu.lock);
66 }
67 
host_unlock_component(void)68 static void host_unlock_component(void)
69 {
70 	hyp_spin_unlock(&host_mmu.lock);
71 }
72 
hyp_lock_component(void)73 static void hyp_lock_component(void)
74 {
75 	hyp_spin_lock(&pkvm_pgd_lock);
76 }
77 
hyp_unlock_component(void)78 static void hyp_unlock_component(void)
79 {
80 	hyp_spin_unlock(&pkvm_pgd_lock);
81 }
82 
host_s2_zalloc_pages_exact(size_t size)83 static void *host_s2_zalloc_pages_exact(size_t size)
84 {
85 	void *addr = hyp_alloc_pages(&host_s2_pool, get_order(size));
86 
87 	hyp_split_page(hyp_virt_to_page(addr));
88 
89 	/*
90 	 * The size of concatenated PGDs is always a power of two of PAGE_SIZE,
91 	 * so there should be no need to free any of the tail pages to make the
92 	 * allocation exact.
93 	 */
94 	WARN_ON(size != (PAGE_SIZE << get_order(size)));
95 
96 	return addr;
97 }
98 
host_s2_zalloc_page(void * pool)99 static void *host_s2_zalloc_page(void *pool)
100 {
101 	return hyp_alloc_pages(pool, 0);
102 }
103 
host_s2_get_page(void * addr)104 static void host_s2_get_page(void *addr)
105 {
106 	hyp_get_page(&host_s2_pool, addr);
107 }
108 
host_s2_put_page(void * addr)109 static void host_s2_put_page(void *addr)
110 {
111 	hyp_put_page(&host_s2_pool, addr);
112 }
113 
host_s2_free_unlinked_table(void * addr,s8 level)114 static void host_s2_free_unlinked_table(void *addr, s8 level)
115 {
116 	kvm_pgtable_stage2_free_unlinked(&host_mmu.mm_ops, host_mmu.pgt.pte_ops,
117 					 addr, level);
118 }
119 
prepare_s2_pool(void * pgt_pool_base)120 static int prepare_s2_pool(void *pgt_pool_base)
121 {
122 	unsigned long nr_pages, pfn;
123 	int ret;
124 
125 	pfn = hyp_virt_to_pfn(pgt_pool_base);
126 	nr_pages = host_s2_pgtable_pages();
127 	ret = hyp_pool_init(&host_s2_pool, pfn, nr_pages, 0);
128 	if (ret)
129 		return ret;
130 
131 	host_mmu.mm_ops = (struct kvm_pgtable_mm_ops) {
132 		.zalloc_pages_exact = host_s2_zalloc_pages_exact,
133 		.zalloc_page = host_s2_zalloc_page,
134 		.free_unlinked_table = host_s2_free_unlinked_table,
135 		.phys_to_virt = hyp_phys_to_virt,
136 		.virt_to_phys = hyp_virt_to_phys,
137 		.page_count = hyp_page_count,
138 		.get_page = host_s2_get_page,
139 		.put_page = host_s2_put_page,
140 	};
141 
142 	return 0;
143 }
144 
prepare_host_vtcr(void)145 static void prepare_host_vtcr(void)
146 {
147 	u32 parange, phys_shift;
148 
149 	/* The host stage 2 is id-mapped, so use parange for T0SZ */
150 	parange = kvm_get_parange(id_aa64mmfr0_el1_sys_val);
151 	phys_shift = id_aa64mmfr0_parange_to_phys_shift(parange);
152 
153 	host_mmu.arch.mmu.vtcr = kvm_get_vtcr(id_aa64mmfr0_el1_sys_val,
154 					      id_aa64mmfr1_el1_sys_val, phys_shift);
155 }
156 
prepopulate_host_stage2(void)157 static int prepopulate_host_stage2(void)
158 {
159 	struct memblock_region *reg;
160 	int i, ret = 0;
161 
162 	for (i = 0; i < hyp_memblock_nr; i++) {
163 		reg = &hyp_memory[i];
164 		ret = host_stage2_idmap_locked(reg->base, reg->size, PKVM_HOST_MEM_PROT, false);
165 		if (ret)
166 			return ret;
167 	}
168 
169 	return ret;
170 }
171 
kvm_host_prepare_stage2(void * pgt_pool_base)172 int kvm_host_prepare_stage2(void *pgt_pool_base)
173 {
174 	struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu;
175 	int ret;
176 
177 	prepare_host_vtcr();
178 	hyp_spin_lock_init(&host_mmu.lock);
179 	mmu->arch = &host_mmu.arch;
180 
181 	ret = prepare_s2_pool(pgt_pool_base);
182 	if (ret)
183 		return ret;
184 
185 	host_s2_pte_ops.force_pte_cb = host_stage2_force_pte;
186 	host_s2_pte_ops.pte_is_counted_cb = host_stage2_pte_is_counted;
187 
188 	ret = __kvm_pgtable_stage2_init(&host_mmu.pgt, mmu,
189 					&host_mmu.mm_ops, KVM_HOST_S2_FLAGS,
190 					&host_s2_pte_ops);
191 	if (ret)
192 		return ret;
193 
194 	mmu->pgd_phys = __hyp_pa(host_mmu.pgt.pgd);
195 	mmu->pgt = &host_mmu.pgt;
196 	atomic64_set(&mmu->vmid.id, 0);
197 
198 	return prepopulate_host_stage2();
199 }
200 
guest_stage2_force_pte_cb(u64 addr,u64 end,enum kvm_pgtable_prot prot)201 static bool guest_stage2_force_pte_cb(u64 addr, u64 end,
202 				      enum kvm_pgtable_prot prot)
203 {
204 	return false;
205 }
206 
guest_stage2_pte_is_counted(kvm_pte_t pte,u32 level)207 static bool guest_stage2_pte_is_counted(kvm_pte_t pte, u32 level)
208 {
209 	/*
210 	 * The refcount tracks valid entries as well as invalid entries if they
211 	 * encode ownership of a page to another entity than the page-table
212 	 * owner, whose id is 0.
213 	 */
214 	return !!pte;
215 }
216 
guest_s2_zalloc_pages_exact(size_t size)217 static void *guest_s2_zalloc_pages_exact(size_t size)
218 {
219 	void *addr = hyp_alloc_pages(&current_vm->pool, get_order(size));
220 
221 	WARN_ON(!addr || size != (PAGE_SIZE << get_order(size)));
222 	hyp_split_page(hyp_virt_to_page(addr));
223 
224 	return addr;
225 }
226 
guest_s2_free_pages_exact(void * addr,unsigned long size)227 static void guest_s2_free_pages_exact(void *addr, unsigned long size)
228 {
229 	u8 order = get_order(size);
230 	unsigned int i;
231 
232 	for (i = 0; i < (1 << order); i++)
233 		hyp_put_page(&current_vm->pool, addr + (i * PAGE_SIZE));
234 }
235 
guest_s2_zalloc_page(void * mc)236 static void *guest_s2_zalloc_page(void *mc)
237 {
238 	struct hyp_page *p;
239 	void *addr;
240 	unsigned long order;
241 
242 	addr = hyp_alloc_pages(&current_vm->pool, 0);
243 	if (addr)
244 		return addr;
245 
246 	addr = pop_hyp_memcache(mc, hyp_phys_to_virt, &order);
247 	if (!addr)
248 		return addr;
249 
250 	WARN_ON(order);
251 	memset(addr, 0, PAGE_SIZE);
252 	p = hyp_virt_to_page(addr);
253 	hyp_set_page_refcounted(p);
254 	p->order = 0;
255 
256 	return addr;
257 }
258 
guest_s2_get_page(void * addr)259 static void guest_s2_get_page(void *addr)
260 {
261 	hyp_get_page(&current_vm->pool, addr);
262 }
263 
guest_s2_put_page(void * addr)264 static void guest_s2_put_page(void *addr)
265 {
266 	hyp_put_page(&current_vm->pool, addr);
267 }
268 
__fixmap_guest_page(void * va,size_t * size)269 static void *__fixmap_guest_page(void *va, size_t *size)
270 {
271 	void *addr;
272 
273 	if (WARN_ON(!IS_ALIGNED(*size, *size)))
274 		return NULL;
275 
276 	if (IS_ALIGNED(*size, PMD_SIZE)) {
277 		addr = hyp_fixblock_map(__hyp_pa(va));
278 		if (addr)
279 			return addr;
280 
281 		*size = PAGE_SIZE;
282 	}
283 
284 	if (IS_ALIGNED(*size, PAGE_SIZE))
285 		return hyp_fixmap_map(__hyp_pa(va));
286 
287 	WARN_ON(1);
288 
289 	return NULL;
290 }
291 
__fixunmap_guest_page(size_t size)292 static void __fixunmap_guest_page(size_t size)
293 {
294 	switch (size) {
295 	case PAGE_SIZE:
296 		hyp_fixmap_unmap();
297 		break;
298 	case PMD_SIZE:
299 		hyp_fixblock_unmap();
300 		break;
301 	default:
302 		BUG();
303 	}
304 }
305 
clean_dcache_guest_page(void * va,size_t size)306 static void clean_dcache_guest_page(void *va, size_t size)
307 {
308 	while (size) {
309 		size_t __size = size == PMD_SIZE ? size : PAGE_SIZE;
310 		void *addr = __fixmap_guest_page(va, &__size);
311 
312 		__clean_dcache_guest_page(addr, __size);
313 		__fixunmap_guest_page(__size);
314 
315 		size -= __size;
316 		va += __size;
317 	}
318 }
319 
invalidate_icache_guest_page(void * va,size_t size)320 static void invalidate_icache_guest_page(void *va, size_t size)
321 {
322 	while (size) {
323 		size_t __size = size == PMD_SIZE ? size : PAGE_SIZE;
324 		void *addr = __fixmap_guest_page(va, &__size);
325 
326 		__invalidate_icache_guest_page(addr, __size);
327 		__fixunmap_guest_page(__size);
328 
329 		size -= __size;
330 		va += __size;
331 	}
332 }
333 
kvm_guest_prepare_stage2(struct pkvm_hyp_vm * vm,void * pgd)334 int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd)
335 {
336 	struct kvm_s2_mmu *mmu = &vm->kvm.arch.mmu;
337 	unsigned long nr_pages;
338 	int ret;
339 
340 	nr_pages = kvm_pgtable_stage2_pgd_size(mmu->vtcr) >> PAGE_SHIFT;
341 	ret = hyp_pool_init(&vm->pool, hyp_virt_to_pfn(pgd), nr_pages, 0);
342 	if (ret)
343 		return ret;
344 
345 	hyp_spin_lock_init(&vm->pgtable_lock);
346 	vm->mm_ops = (struct kvm_pgtable_mm_ops) {
347 		.zalloc_pages_exact	= guest_s2_zalloc_pages_exact,
348 		.free_pages_exact	= guest_s2_free_pages_exact,
349 		.zalloc_page		= guest_s2_zalloc_page,
350 		.phys_to_virt		= hyp_phys_to_virt,
351 		.virt_to_phys		= hyp_virt_to_phys,
352 		.page_count		= hyp_page_count,
353 		.get_page		= guest_s2_get_page,
354 		.put_page		= guest_s2_put_page,
355 		.dcache_clean_inval_poc	= clean_dcache_guest_page,
356 		.icache_inval_pou	= invalidate_icache_guest_page,
357 	};
358 
359 	guest_lock_component(vm);
360 	ret = __kvm_pgtable_stage2_init(mmu->pgt, mmu, &vm->mm_ops,
361 					KVM_PGTABLE_S2_PREFAULT_BLOCK,
362 					&guest_s2_pte_ops);
363 	guest_unlock_component(vm);
364 	if (ret)
365 		return ret;
366 
367 	vm->kvm.arch.mmu.pgd_phys = __hyp_pa(vm->pgt.pgd);
368 
369 	return 0;
370 }
371 
guest_get_page_state(kvm_pte_t pte,u64 addr)372 static enum pkvm_page_state guest_get_page_state(kvm_pte_t pte, u64 addr)
373 {
374 	enum pkvm_page_state state = 0;
375 	enum kvm_pgtable_prot prot;
376 
377 	if (!kvm_pte_valid(pte)) {
378 		state = PKVM_NOPAGE;
379 
380 		if (pte == KVM_INVALID_PTE_MMIO_NOTE)
381 			state |= PKVM_MMIO;
382 
383 		return state;
384 	}
385 
386 	prot = kvm_pgtable_stage2_pte_prot(pte);
387 	if (kvm_pte_valid(pte) && ((prot & KVM_PGTABLE_PROT_RWX) != KVM_PGTABLE_PROT_RWX))
388 		state = PKVM_PAGE_RESTRICTED_PROT;
389 
390 	return state | pkvm_getstate(prot);
391 }
392 
__pkvm_guest_relinquish_to_host(struct pkvm_hyp_vcpu * vcpu,u64 ipa,u64 * ppa)393 int __pkvm_guest_relinquish_to_host(struct pkvm_hyp_vcpu *vcpu,
394 				    u64 ipa, u64 *ppa)
395 {
396 	struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
397 	enum pkvm_page_state state;
398 	u64 phys = 0, addr;
399 	kvm_pte_t pte;
400 	s8 level;
401 	int ret;
402 
403 	if (!pkvm_hyp_vcpu_is_protected(vcpu))
404 		return 0;
405 
406 	host_lock_component();
407 	guest_lock_component(vm);
408 
409 	ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level);
410 	if (ret || !kvm_pte_valid(pte))
411 		goto end;
412 
413 	/* We don't support splitting non-leaf mappings */
414 	if (level != KVM_PGTABLE_LAST_LEVEL) {
415 		ret = -E2BIG;
416 		goto end;
417 	}
418 
419 	state = guest_get_page_state(pte, ipa);
420 	if (state != PKVM_PAGE_OWNED) {
421 		ret = -EPERM;
422 		goto end;
423 	}
424 
425 	addr = ALIGN_DOWN(ipa, kvm_granule_size(level));
426 	phys = kvm_pte_to_phys(pte);
427 	phys += ipa - addr;
428 	/* page might be used for DMA! */
429 	if (hyp_page_count(hyp_phys_to_virt(phys))) {
430 		ret = -EBUSY;
431 		goto end;
432 	}
433 
434 	/* Zap the guest stage2 pte and return ownership to the host */
435 	ret = kvm_pgtable_stage2_unmap(&vm->pgt, ipa, PAGE_SIZE);
436 	if (ret)
437 		goto end;
438 
439 	hyp_poison_page(phys, PAGE_SIZE);
440 	psci_mem_protect_dec(1);
441 
442 	WARN_ON(host_stage2_set_owner_locked(phys, PAGE_SIZE, PKVM_ID_HOST));
443 
444 	if (pkvm_ipa_range_has_pvmfw(vm, ipa, ipa + PAGE_SIZE))
445 		vm->kvm.arch.pkvm.pvmfw_load_addr = PVMFW_INVALID_LOAD_ADDR;
446 
447 end:
448 	guest_unlock_component(vm);
449 	host_unlock_component();
450 
451 	*ppa = phys;
452 
453 	return ret;
454 }
455 
__pkvm_prot_finalize(void)456 int __pkvm_prot_finalize(void)
457 {
458 	struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu;
459 	struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params);
460 
461 	if (params->hcr_el2 & HCR_VM)
462 		return -EPERM;
463 
464 	params->vttbr = kvm_get_vttbr(mmu);
465 	params->vtcr = mmu->vtcr;
466 	params->hcr_el2 |= HCR_VM;
467 
468 	/*
469 	 * The CMO below not only cleans the updated params to the
470 	 * PoC, but also provides the DSB that ensures ongoing
471 	 * page-table walks that have started before we trapped to EL2
472 	 * have completed.
473 	 */
474 	kvm_flush_dcache_to_poc(params, sizeof(*params));
475 
476 	write_sysreg(params->hcr_el2, hcr_el2);
477 	__load_stage2(&host_mmu.arch.mmu, &host_mmu.arch);
478 
479 	/*
480 	 * Make sure to have an ISB before the TLB maintenance below but only
481 	 * when __load_stage2() doesn't include one already.
482 	 */
483 	asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT));
484 
485 	/* Invalidate stale HCR bits that may be cached in TLBs */
486 	__tlbi(vmalls12e1);
487 	dsb(nsh);
488 	isb();
489 
490 	__pkvm_close_module_registration();
491 
492 	return 0;
493 }
494 
host_stage2_unmap_reg_locked(phys_addr_t start,u64 size)495 int host_stage2_unmap_reg_locked(phys_addr_t start, u64 size)
496 {
497 	hyp_assert_lock_held(&host_mmu.lock);
498 
499 	return kvm_pgtable_stage2_reclaim_leaves(&host_mmu.pgt, start, size);
500 }
501 
host_stage2_unmap_unmoveable_regs(void)502 static int host_stage2_unmap_unmoveable_regs(void)
503 {
504 	struct kvm_pgtable *pgt = &host_mmu.pgt;
505 	struct pkvm_moveable_reg *reg;
506 	u64 addr = 0;
507 	int i, ret;
508 
509 	/* Unmap all unmoveable regions to recycle the pages */
510 	for (i = 0; i < pkvm_moveable_regs_nr; i++) {
511 		reg = &pkvm_moveable_regs[i];
512 		if (reg->start > addr) {
513 			ret = host_stage2_unmap_reg_locked(addr, reg->start - addr);
514 			if (ret)
515 				return ret;
516 		}
517 		addr = max(addr, reg->start + reg->size);
518 	}
519 	return host_stage2_unmap_reg_locked(addr, BIT(pgt->ia_bits) - addr);
520 }
521 
522 struct kvm_mem_range {
523 	u64 start;
524 	u64 end;
525 };
526 
find_mem_range(phys_addr_t addr,struct kvm_mem_range * range)527 static struct memblock_region *find_mem_range(phys_addr_t addr, struct kvm_mem_range *range)
528 {
529 	int cur, left = 0, right = hyp_memblock_nr;
530 	struct memblock_region *reg;
531 	phys_addr_t end;
532 
533 	range->start = 0;
534 	range->end = ULONG_MAX;
535 
536 	/* The list of memblock regions is sorted, binary search it */
537 	while (left < right) {
538 		cur = (left + right) >> 1;
539 		reg = &hyp_memory[cur];
540 		end = reg->base + reg->size;
541 		if (addr < reg->base) {
542 			right = cur;
543 			range->end = reg->base;
544 		} else if (addr >= end) {
545 			left = cur + 1;
546 			range->start = end;
547 		} else {
548 			range->start = reg->base;
549 			range->end = end;
550 			return reg;
551 		}
552 	}
553 
554 	return NULL;
555 }
556 
default_host_prot(bool is_memory)557 static enum kvm_pgtable_prot default_host_prot(bool is_memory)
558 {
559 	return is_memory ? PKVM_HOST_MEM_PROT : PKVM_HOST_MMIO_PROT;
560 }
561 
default_hyp_prot(phys_addr_t phys)562 static enum kvm_pgtable_prot default_hyp_prot(phys_addr_t phys)
563 {
564 	return addr_is_memory(phys) ? PAGE_HYP : PAGE_HYP_DEVICE;
565 }
566 
567 /*
568  * Use NORMAL_NC for guest MMIO, when a guest has:
569  * No FWB: It will combined with stage-1 attrs where device has precedence over normal.
570  * FWB: With MT_S2_FWB_NORMAL_NC encoding, results in device if stage-1 used device attr.
571  *      otherwise NC.
572  */
default_guest_prot(bool is_memory)573 static enum kvm_pgtable_prot default_guest_prot(bool is_memory)
574 {
575 	return is_memory ? KVM_PGTABLE_PROT_RWX :
576 		KVM_PGTABLE_PROT_RW | KVM_PGTABLE_PROT_NORMAL_NC;
577 }
578 
addr_is_memory(phys_addr_t phys)579 bool addr_is_memory(phys_addr_t phys)
580 {
581 	struct kvm_mem_range range;
582 
583 	return !!find_mem_range(phys, &range);
584 }
585 
is_in_mem_range(u64 addr,struct kvm_mem_range * range)586 static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range)
587 {
588 	return range->start <= addr && addr < range->end;
589 }
590 
check_range_allowed_memory(u64 start,u64 end)591 static int check_range_allowed_memory(u64 start, u64 end)
592 {
593 	struct memblock_region *reg;
594 	struct kvm_mem_range range;
595 
596 	/*
597 	 * Callers can't check the state of a range that overlaps memory and
598 	 * MMIO regions, so ensure [start, end[ is in the same kvm_mem_range.
599 	 */
600 	reg = find_mem_range(start, &range);
601 	if (!is_in_mem_range(end - 1, &range))
602 		return -EINVAL;
603 
604 	if (!reg || reg->flags & MEMBLOCK_NOMAP)
605 		return -EPERM;
606 
607 	return 0;
608 }
609 
range_is_memory(u64 start,u64 end)610 static bool range_is_memory(u64 start, u64 end)
611 {
612 	struct kvm_mem_range r;
613 
614 	if (!find_mem_range(start, &r))
615 		return false;
616 
617 	return is_in_mem_range(end - 1, &r);
618 }
619 
range_is_allowed_memory(u64 start,u64 end)620 static bool range_is_allowed_memory(u64 start, u64 end)
621 {
622 	struct memblock_region *reg;
623 	struct kvm_mem_range range;
624 
625 	reg = find_mem_range(start, &range);
626 	if (!reg)
627 		return false;
628 
629 	if (!is_in_mem_range(end - 1, &range))
630 		return false;
631 
632 	return !(reg->flags & MEMBLOCK_NOMAP);
633 }
634 
__host_stage2_idmap(u64 start,u64 end,enum kvm_pgtable_prot prot,bool update_iommu)635 static inline int __host_stage2_idmap(u64 start, u64 end,
636 				      enum kvm_pgtable_prot prot,
637 				      bool update_iommu)
638 {
639 	int ret;
640 
641 	ret = kvm_pgtable_stage2_map(&host_mmu.pgt, start, end - start, start,
642 				     prot, &host_s2_pool, 0);
643 	if (ret)
644 		return ret;
645 
646 	if (update_iommu) {
647 		kvm_iommu_host_stage2_idmap(start, end, prot);
648 		kvm_iommu_host_stage2_idmap_complete(!!prot);
649 	}
650 
651 	return 0;
652 }
653 
654 /*
655  * The pool has been provided with enough pages to cover all of moveable regions
656  * with page granularity, but it is difficult to know how much of the
657  * non-moveable regions we will need to cover upfront, so we may need to
658  * 'recycle' the pages if we run out.
659  */
660 #define host_stage2_try(fn, ...)					\
661 	({								\
662 		int __ret;						\
663 		hyp_assert_lock_held(&host_mmu.lock);			\
664 		__ret = fn(__VA_ARGS__);				\
665 		if (__ret == -ENOMEM) {					\
666 			__ret = host_stage2_unmap_unmoveable_regs();		\
667 			if (!__ret)					\
668 				__ret = fn(__VA_ARGS__);		\
669 		}							\
670 		__ret;							\
671 	 })
672 
range_included(struct kvm_mem_range * child,struct kvm_mem_range * parent)673 static inline bool range_included(struct kvm_mem_range *child,
674 				  struct kvm_mem_range *parent)
675 {
676 	return parent->start <= child->start && child->end <= parent->end;
677 }
678 
host_stage2_adjust_range(u64 addr,struct kvm_mem_range * range)679 static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
680 {
681 	struct kvm_mem_range cur;
682 	kvm_pte_t pte;
683 	s8 level;
684 	int ret;
685 
686 	hyp_assert_lock_held(&host_mmu.lock);
687 	ret = kvm_pgtable_get_leaf(&host_mmu.pgt, addr, &pte, &level);
688 	if (ret)
689 		return ret;
690 
691 	if (kvm_pte_valid(pte))
692 		return -EAGAIN;
693 
694 	if (pte) {
695 		WARN_ON(addr_is_memory(addr) &&
696 			!(hyp_phys_to_page(addr)->host_state & PKVM_NOPAGE));
697 		return -EPERM;
698 	}
699 
700 	do {
701 		u64 granule = kvm_granule_size(level);
702 		cur.start = ALIGN_DOWN(addr, granule);
703 		cur.end = cur.start + granule;
704 		level++;
705 	} while ((level <= KVM_PGTABLE_LAST_LEVEL) &&
706 			!(kvm_level_supports_block_mapping(level) &&
707 			  range_included(&cur, range)));
708 
709 	*range = cur;
710 
711 	return 0;
712 }
713 
host_stage2_idmap_locked(phys_addr_t addr,u64 size,enum kvm_pgtable_prot prot,bool update_iommu)714 int host_stage2_idmap_locked(phys_addr_t addr, u64 size,
715 			     enum kvm_pgtable_prot prot,
716 			     bool update_iommu)
717 {
718 	return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot, update_iommu);
719 }
720 
721 #define KVM_MAX_OWNER_ID               FIELD_MAX(KVM_INVALID_PTE_OWNER_MASK)
722 
kvm_init_invalid_leaf_owner(u8 owner_id)723 static kvm_pte_t kvm_init_invalid_leaf_owner(u8 owner_id)
724 {
725 	return FIELD_PREP(KVM_INVALID_PTE_OWNER_MASK, owner_id);
726 }
727 
__host_update_page_state(phys_addr_t addr,u64 size,enum pkvm_page_state state)728 static void __host_update_page_state(phys_addr_t addr, u64 size, enum pkvm_page_state state)
729 {
730 	phys_addr_t end = addr + size;
731 
732 	for (; addr < end; addr += PAGE_SIZE)
733 		hyp_phys_to_page(addr)->host_state = state;
734 }
735 
__host_stage2_set_owner_locked(phys_addr_t addr,u64 size,u8 owner_id,bool is_memory,enum pkvm_page_state nopage_state,bool update_iommu)736 static int __host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id, bool is_memory,
737 					  enum pkvm_page_state nopage_state, bool update_iommu)
738 {
739 	kvm_pte_t annotation;
740 	enum kvm_pgtable_prot prot;
741 	int ret;
742 
743 	if (owner_id > KVM_MAX_OWNER_ID)
744 		return -EINVAL;
745 
746 	if (owner_id == PKVM_ID_HOST) {
747 		prot = default_host_prot(addr_is_memory(addr));
748 		ret = host_stage2_idmap_locked(addr, size, prot, false);
749 	} else {
750 		annotation = kvm_init_invalid_leaf_owner(owner_id);
751 		ret = host_stage2_try(kvm_pgtable_stage2_annotate,
752 				      &host_mmu.pgt,
753 				      addr, size, &host_s2_pool, annotation);
754 	}
755 	if (ret)
756 		return ret;
757 
758 	if (update_iommu) {
759 		prot = owner_id == PKVM_ID_HOST ? PKVM_HOST_MEM_PROT : 0;
760 		kvm_iommu_host_stage2_idmap(addr, addr + size, prot);
761 		kvm_iommu_host_stage2_idmap_complete(!!prot);
762 	}
763 
764 	if (!is_memory)
765 		return 0;
766 
767 	/* Don't forget to update the vmemmap tracking for the host */
768 	if (owner_id == PKVM_ID_HOST)
769 		__host_update_page_state(addr, size, PKVM_PAGE_OWNED);
770 	else
771 		__host_update_page_state(addr, size, PKVM_NOPAGE | nopage_state);
772 
773 	return 0;
774 }
775 
host_stage2_set_owner_locked(phys_addr_t addr,u64 size,u8 owner_id)776 int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id)
777 {
778 	return __host_stage2_set_owner_locked(addr, size, owner_id, addr_is_memory(addr), 0, true);
779 }
780 
host_stage2_force_pte(u64 addr,u64 end,enum kvm_pgtable_prot prot)781 static bool host_stage2_force_pte(u64 addr, u64 end, enum kvm_pgtable_prot prot)
782 {
783 	/*
784 	 * Block mappings must be used with care in the host stage-2 as a
785 	 * kvm_pgtable_stage2_map() operation targeting a page in the range of
786 	 * an existing block will delete the block under the assumption that
787 	 * mappings in the rest of the block range can always be rebuilt lazily.
788 	 * That assumption is correct for the host stage-2 with RWX mappings
789 	 * targeting memory or RW mappings targeting MMIO ranges (see
790 	 * host_stage2_idmap() below which implements some of the host memory
791 	 * abort logic). However, this is not safe for any other mappings where
792 	 * the host stage-2 page-table is in fact the only place where this
793 	 * state is stored. In all those cases, it is safer to use page-level
794 	 * mappings, hence avoiding to lose the state because of side-effects in
795 	 * kvm_pgtable_stage2_map().
796 	 */
797 	return prot != default_host_prot(range_is_memory(addr, end));
798 }
799 
host_stage2_pte_is_counted(kvm_pte_t pte,u32 level)800 static bool host_stage2_pte_is_counted(kvm_pte_t pte, u32 level)
801 {
802 	u64 phys;
803 
804 	if (!kvm_pte_valid(pte))
805 		return !!pte;
806 
807 	if (kvm_pte_table(pte, level))
808 		return true;
809 
810 	phys = kvm_pte_to_phys(pte);
811 	if (addr_is_memory(phys))
812 		return (pte & KVM_HOST_S2_DEFAULT_MASK) !=
813 			KVM_HOST_S2_DEFAULT_MEM_PTE;
814 
815 	return (pte & KVM_HOST_S2_DEFAULT_MASK) != KVM_HOST_S2_DEFAULT_MMIO_PTE;
816 }
817 
host_stage2_idmap(u64 addr)818 static int host_stage2_idmap(u64 addr)
819 {
820 	struct kvm_mem_range range;
821 	bool is_memory = !!find_mem_range(addr, &range);
822 	enum kvm_pgtable_prot prot = default_host_prot(is_memory);
823 	int ret;
824 	bool update_iommu = !is_memory;
825 
826 	host_lock_component();
827 	ret = host_stage2_adjust_range(addr, &range);
828 	if (ret)
829 		goto unlock;
830 
831 	ret = host_stage2_idmap_locked(range.start, range.end - range.start, prot, update_iommu);
832 unlock:
833 	host_unlock_component();
834 
835 	return ret;
836 }
837 
838 static void (*illegal_abt_notifier)(struct user_pt_regs *regs);
839 
__pkvm_register_illegal_abt_notifier(void (* cb)(struct user_pt_regs *))840 int __pkvm_register_illegal_abt_notifier(void (*cb)(struct user_pt_regs *))
841 {
842 	return cmpxchg(&illegal_abt_notifier, NULL, cb) ? -EBUSY : 0;
843 }
844 
host_inject_abort(struct kvm_cpu_context * host_ctxt)845 static void host_inject_abort(struct kvm_cpu_context *host_ctxt)
846 {
847 	u64 spsr = read_sysreg_el2(SYS_SPSR);
848 	u64 esr = read_sysreg_el2(SYS_ESR);
849 	u64 ventry, ec;
850 
851 	if (READ_ONCE(illegal_abt_notifier))
852 		illegal_abt_notifier(&host_ctxt->regs);
853 
854 	/* Repaint the ESR to report a same-level fault if taken from EL1 */
855 	if ((spsr & PSR_MODE_MASK) != PSR_MODE_EL0t) {
856 		ec = ESR_ELx_EC(esr);
857 		if (ec == ESR_ELx_EC_DABT_LOW)
858 			ec = ESR_ELx_EC_DABT_CUR;
859 		else if (ec == ESR_ELx_EC_IABT_LOW)
860 			ec = ESR_ELx_EC_IABT_CUR;
861 		else
862 			WARN_ON(1);
863 		esr &= ~ESR_ELx_EC_MASK;
864 		esr |= ec << ESR_ELx_EC_SHIFT;
865 	}
866 
867 	/*
868 	 * Since S1PTW should only ever be set for stage-2 faults, we're pretty
869 	 * much guaranteed that it won't be set in ESR_EL1 by the hardware. So,
870 	 * let's use that bit to allow the host abort handler to differentiate
871 	 * this abort from normal userspace faults.
872 	 *
873 	 * Note: although S1PTW is RES0 at EL1, it is guaranteed by the
874 	 * architecture to be backed by flops, so it should be safe to use.
875 	 */
876 	esr |= ESR_ELx_S1PTW;
877 
878 	write_sysreg_el1(esr, SYS_ESR);
879 	write_sysreg_el1(spsr, SYS_SPSR);
880 	write_sysreg_el1(read_sysreg_el2(SYS_ELR), SYS_ELR);
881 	write_sysreg_el1(read_sysreg_el2(SYS_FAR), SYS_FAR);
882 
883 	ventry = read_sysreg_el1(SYS_VBAR);
884 	ventry += get_except64_offset(spsr, PSR_MODE_EL1h, except_type_sync);
885 	write_sysreg_el2(ventry, SYS_ELR);
886 
887 	spsr = get_except64_cpsr(spsr, system_supports_mte(),
888 				 read_sysreg_el1(SYS_SCTLR), PSR_MODE_EL1h);
889 	write_sysreg_el2(spsr, SYS_SPSR);
890 }
891 
is_dabt(u64 esr)892 static bool is_dabt(u64 esr)
893 {
894 	return ESR_ELx_EC(esr) == ESR_ELx_EC_DABT_LOW;
895 }
896 
handle_host_mem_abort(struct kvm_cpu_context * host_ctxt)897 void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
898 {
899 	struct kvm_vcpu_fault_info fault;
900 	u64 esr, addr;
901 	int ret;
902 
903 	esr = read_sysreg_el2(SYS_ESR);
904 	if (!__get_fault_info(esr, &fault)) {
905 		/* Setting the address to an invalid value for use in tracing. */
906 		addr = (u64)-1;
907 		/*
908 		 * We've presumably raced with a page-table change which caused
909 		 * AT to fail, try again.
910 		 */
911 		return;
912 	}
913 
914 	addr = (fault.hpfar_el2 & HPFAR_MASK) << 8;
915 	addr |= fault.far_el2 & FAR_MASK;
916 
917 	if (is_dabt(esr) && !addr_is_memory(addr) &&
918 	    kvm_iommu_host_dabt_handler(host_ctxt, esr, addr))
919 		goto return_to_host;
920 
921 	switch (esr & ESR_ELx_FSC_TYPE) {
922 	case ESR_ELx_FSC_FAULT:
923 		ret = host_stage2_idmap(addr);
924 		break;
925 	case ESR_ELx_FSC_PERM:
926 		ret = module_handle_host_perm_fault(&host_ctxt->regs, esr, addr);
927 		ret = ret ? 0 /* handled */ : -EPERM;
928 		break;
929 	default:
930 		ret = -EPERM;
931 		break;
932 	}
933 
934 	if (ret == -EPERM)
935 		host_inject_abort(host_ctxt);
936 	else
937 		BUG_ON(ret && ret != -EAGAIN);
938 
939 return_to_host:
940 	trace_host_mem_abort(esr, addr);
941 }
942 
943 struct check_walk_data {
944 	enum pkvm_page_state	desired;
945 	enum pkvm_page_state	(*get_page_state)(kvm_pte_t pte, u64 addr);
946 };
947 
__check_page_state_visitor(const struct kvm_pgtable_visit_ctx * ctx,enum kvm_pgtable_walk_flags visit)948 static int __check_page_state_visitor(const struct kvm_pgtable_visit_ctx *ctx,
949 				      enum kvm_pgtable_walk_flags visit)
950 {
951 	struct check_walk_data *d = ctx->arg;
952 
953 	return d->get_page_state(ctx->old, ctx->addr) == d->desired ? 0 : -EPERM;
954 }
955 
check_page_state_range(struct kvm_pgtable * pgt,u64 addr,u64 size,struct check_walk_data * data)956 static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size,
957 				  struct check_walk_data *data)
958 {
959 	struct kvm_pgtable_walker walker = {
960 		.cb	= __check_page_state_visitor,
961 		.arg	= data,
962 		.flags	= KVM_PGTABLE_WALK_LEAF,
963 	};
964 
965 	return kvm_pgtable_walk(pgt, addr, size, &walker);
966 }
967 
host_get_mmio_page_state(kvm_pte_t pte,u64 addr)968 static enum pkvm_page_state host_get_mmio_page_state(kvm_pte_t pte, u64 addr)
969 {
970 	enum pkvm_page_state state = 0;
971 	enum kvm_pgtable_prot prot;
972 
973 	WARN_ON(addr_is_memory(addr));
974 
975 	if (!kvm_pte_valid(pte) && pte)
976 		return PKVM_NOPAGE;
977 
978 	prot = kvm_pgtable_stage2_pte_prot(pte);
979 	if (kvm_pte_valid(pte)) {
980 		if ((prot & KVM_PGTABLE_PROT_RWX) != PKVM_HOST_MMIO_PROT)
981 			state = PKVM_PAGE_RESTRICTED_PROT;
982 	}
983 
984 	return state | pkvm_getstate(prot);
985 }
986 
987 enum host_check_page_state_flags {
988 	HOST_CHECK_NULL_REFCNT		= BIT(0),
989 	HOST_CHECK_IS_MEMORY		= BIT(1),
990 };
991 
___host_check_page_state_range(u64 addr,u64 size,enum pkvm_page_state state,enum host_check_page_state_flags flags)992 static int ___host_check_page_state_range(u64 addr, u64 size,
993 					  enum pkvm_page_state state,
994 					  enum host_check_page_state_flags flags)
995 {
996 	struct check_walk_data d = {
997 		.desired	= state,
998 		.get_page_state	= host_get_mmio_page_state,
999 	};
1000 	struct hyp_page *p;
1001 	struct memblock_region *reg;
1002 	struct kvm_mem_range range;
1003 	u64 end;
1004 
1005 	if (check_add_overflow(addr, size, &end))
1006 		return -EINVAL;
1007 
1008 	/* Can't check the state of both MMIO and memory regions at once */
1009 	reg = find_mem_range(addr, &range);
1010 	if (!reg && (flags & HOST_CHECK_IS_MEMORY))
1011 		return -EINVAL;
1012 
1013 	if (!is_in_mem_range(end - 1, &range))
1014 		return -EINVAL;
1015 
1016 	hyp_assert_lock_held(&host_mmu.lock);
1017 
1018 	/* MMIO state is still in the page-table */
1019 	if (!reg)
1020 		return check_page_state_range(&host_mmu.pgt, addr, size, &d);
1021 
1022 	if (reg->flags & MEMBLOCK_NOMAP)
1023 		return -EPERM;
1024 
1025 	for (; addr < end; addr += PAGE_SIZE) {
1026 		p = hyp_phys_to_page(addr);
1027 		if (p->host_state != state)
1028 			return -EPERM;
1029 		if ((flags & HOST_CHECK_NULL_REFCNT) && hyp_refcount_get(p->refcount))
1030 			return -EINVAL;
1031 	}
1032 
1033 	/*
1034 	 * All memory pages with restricted permissions will already be covered
1035 	 * by other states (e.g. PKVM_MODULE_OWNED_PAGE), so no need to retrieve
1036 	 * the PKVM_PAGE_RESTRICTED_PROT state from the PTE.
1037 	 */
1038 
1039 	return 0;
1040 }
1041 
__host_check_page_state_range(u64 addr,u64 size,enum pkvm_page_state state)1042 static int __host_check_page_state_range(u64 addr, u64 size,
1043 					 enum pkvm_page_state state)
1044 {
1045 	enum host_check_page_state_flags flags = HOST_CHECK_IS_MEMORY;
1046 
1047 	if (state == PKVM_PAGE_OWNED)
1048 		flags |= HOST_CHECK_NULL_REFCNT;
1049 
1050 	/* Check the refcount of PAGE_OWNED pages as those may be used for DMA. */
1051 	return ___host_check_page_state_range(addr, size, state, flags);
1052 }
1053 
__host_set_page_state_range(u64 addr,u64 size,enum pkvm_page_state state)1054 static int __host_set_page_state_range(u64 addr, u64 size,
1055 				       enum pkvm_page_state state)
1056 {
1057 	if (hyp_phys_to_page(addr)->host_state & PKVM_NOPAGE) {
1058 		int ret = host_stage2_idmap_locked(addr, size, PKVM_HOST_MEM_PROT, true);
1059 
1060 		if (ret)
1061 			return ret;
1062 	}
1063 
1064 	__host_update_page_state(addr, size, state);
1065 
1066 	return 0;
1067 }
1068 
hyp_get_page_state(kvm_pte_t pte,u64 addr)1069 static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte, u64 addr)
1070 {
1071 	enum pkvm_page_state state = 0;
1072 	enum kvm_pgtable_prot prot;
1073 
1074 	if (!kvm_pte_valid(pte))
1075 		return PKVM_NOPAGE;
1076 
1077 	prot = kvm_pgtable_hyp_pte_prot(pte);
1078 	if (kvm_pte_valid(pte) && ((prot & KVM_PGTABLE_PROT_RWX) != PAGE_HYP))
1079 		state = PKVM_PAGE_RESTRICTED_PROT;
1080 
1081 	return state | pkvm_getstate(prot);
1082 }
1083 
__hyp_check_page_state_range(u64 addr,u64 size,enum pkvm_page_state state)1084 static int __hyp_check_page_state_range(u64 addr, u64 size,
1085 					enum pkvm_page_state state)
1086 {
1087 	struct check_walk_data d = {
1088 		.desired	= state,
1089 		.get_page_state	= hyp_get_page_state,
1090 	};
1091 
1092 	hyp_assert_lock_held(&pkvm_pgd_lock);
1093 	return check_page_state_range(&pkvm_pgtable, addr, size, &d);
1094 }
1095 
hyp_check_range_owned(u64 phys_addr,u64 size)1096 int hyp_check_range_owned(u64 phys_addr, u64 size)
1097 {
1098 	int ret;
1099 
1100 	hyp_lock_component();
1101 	ret = __hyp_check_page_state_range((u64)hyp_phys_to_virt(phys_addr),
1102 					   size, PKVM_PAGE_OWNED);
1103 	hyp_unlock_component();
1104 
1105 	return ret;
1106 }
1107 
__guest_check_page_state_range(struct pkvm_hyp_vcpu * vcpu,u64 addr,u64 size,enum pkvm_page_state state)1108 static int __guest_check_page_state_range(struct pkvm_hyp_vcpu *vcpu, u64 addr,
1109 					  u64 size, enum pkvm_page_state state)
1110 {
1111 	struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1112 	struct check_walk_data d = {
1113 		.desired	= state,
1114 		.get_page_state	= guest_get_page_state,
1115 	};
1116 	u64 end;
1117 
1118 	if (check_add_overflow(addr, size, &end))
1119 		return -EINVAL;
1120 
1121 	hyp_assert_lock_held(&vm->pgtable_lock);
1122 	return check_page_state_range(&vm->pgt, addr, size, &d);
1123 }
1124 
1125 struct guest_request_walker_data {
1126 	unsigned long		ipa_start;
1127 	kvm_pte_t		pte_start;
1128 	u64			size;
1129 	enum pkvm_page_state	desired_state;
1130 	enum pkvm_page_state	desired_mask;
1131 	int			max_ptes;
1132 };
1133 
1134 #define GUEST_WALKER_DATA_INIT(__state)			\
1135 {							\
1136 	.size		= 0,				\
1137 	.desired_state	= __state,			\
1138 	.desired_mask	= ~0,				\
1139 	/*						\
1140 	 * Arbitrary limit of walked PTEs to restrict	\
1141 	 * the time spent at EL2			\
1142 	 */						\
1143 	.max_ptes	= 512,				\
1144 }
1145 
guest_request_walker(const struct kvm_pgtable_visit_ctx * ctx,enum kvm_pgtable_walk_flags visit)1146 static int guest_request_walker(const struct kvm_pgtable_visit_ctx *ctx,
1147 				enum kvm_pgtable_walk_flags visit)
1148 {
1149 	struct guest_request_walker_data *data = (struct guest_request_walker_data *)ctx->arg;
1150 	enum pkvm_page_state state;
1151 	kvm_pte_t pte = *ctx->ptep;
1152 	phys_addr_t phys = kvm_pte_to_phys(pte);
1153 	u32 level = ctx->level;
1154 
1155 	state = guest_get_page_state(pte, 0);
1156 	if (data->desired_state != (state & data->desired_mask))
1157 		return (state & PKVM_NOPAGE) ? -EFAULT : -EPERM;
1158 
1159 	data->max_ptes--;
1160 
1161 	if (!data->size) {
1162 		data->pte_start = pte;
1163 		data->size = kvm_granule_size(level);
1164 		data->ipa_start = ctx->addr & ~(kvm_granule_size(level) - 1);
1165 
1166 		goto end;
1167 	}
1168 
1169 	if (kvm_pgtable_stage2_pte_prot(pte) !=
1170 	    kvm_pgtable_stage2_pte_prot(data->pte_start))
1171 		return -EINVAL;
1172 
1173 	/* Can only describe physically contiguous mappings */
1174 	if (kvm_pte_valid(data->pte_start) &&
1175 	    (phys != kvm_pte_to_phys(data->pte_start) + data->size))
1176 			return -E2BIG;
1177 
1178 	data->size += kvm_granule_size(level);
1179 
1180 end:
1181 	return --data->max_ptes > 0 ? 0 : -E2BIG;
1182 }
1183 
__guest_request_page_transition(u64 ipa,kvm_pte_t * __pte,u64 * __nr_pages,struct pkvm_hyp_vcpu * vcpu,enum pkvm_page_state desired)1184 static int __guest_request_page_transition(u64 ipa, kvm_pte_t *__pte, u64 *__nr_pages,
1185 					   struct pkvm_hyp_vcpu *vcpu,
1186 					   enum pkvm_page_state desired)
1187 {
1188 	struct guest_request_walker_data data = GUEST_WALKER_DATA_INIT(desired);
1189 	struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1190 	struct kvm_pgtable_walker walker = {
1191 		.cb     = guest_request_walker,
1192 		.flags  = KVM_PGTABLE_WALK_LEAF,
1193 		.arg    = (void *)&data,
1194 	};
1195 	phys_addr_t phys, phys_offset;
1196 	kvm_pte_t pte;
1197 	int ret = kvm_pgtable_walk(&vm->pgt, ipa, *__nr_pages * PAGE_SIZE, &walker);
1198 
1199 	/* Walker reached data.max_ptes or a non physically contiguous block */
1200 	if (ret == -E2BIG)
1201 		ret = 0;
1202 	else if (ret)
1203 		return ret;
1204 
1205 	if (WARN_ON(!kvm_pte_valid(data.pte_start)))
1206 		return -EINVAL;
1207 
1208 	phys = kvm_pte_to_phys(data.pte_start);
1209 	if (!range_is_allowed_memory(phys, phys + data.size))
1210 		return -EINVAL;
1211 
1212 	if (data.ipa_start > ipa)
1213 		return -EINVAL;
1214 
1215 	/*
1216 	 * transition not aligned with block memory mapping. They'll be broken
1217 	 * down and memory donation will be needed.
1218 	 */
1219 	phys_offset = ipa - data.ipa_start;
1220 	if (phys_offset || (*__nr_pages * PAGE_SIZE < data.size)) {
1221 		struct pkvm_hyp_vcpu *hyp_vcpu = pkvm_get_loaded_hyp_vcpu();
1222 		int min_pages;
1223 
1224 		if (WARN_ON(!hyp_vcpu))
1225 			return -EINVAL;
1226 
1227 		min_pages = kvm_mmu_cache_min_pages(&hyp_vcpu->vcpu.kvm->arch.mmu);
1228 		if (hyp_vcpu->vcpu.arch.stage2_mc.nr_pages < min_pages)
1229 			return -ENOMEM;
1230 	}
1231 
1232 	phys = kvm_pte_to_phys(data.pte_start) + phys_offset;
1233 	pte = data.pte_start & ~kvm_phys_to_pte(KVM_PHYS_INVALID);
1234 	pte |= kvm_phys_to_pte(phys);
1235 
1236 	if (WARN_ON(phys_offset >= data.size))
1237 		return -EINVAL;
1238 
1239 	*__pte = pte;
1240 	*__nr_pages = min_t(u64, (data.size - phys_offset) >> PAGE_SHIFT,
1241 			    *__nr_pages);
1242 
1243 	return 0;
1244 }
1245 
__guest_initiate_page_transition(u64 ipa,kvm_pte_t pte,u64 nr_pages,struct pkvm_hyp_vcpu * vcpu,enum pkvm_page_state state)1246 static int __guest_initiate_page_transition(u64 ipa, kvm_pte_t pte, u64 nr_pages,
1247 					    struct pkvm_hyp_vcpu *vcpu,
1248 					    enum pkvm_page_state state)
1249 {
1250 	struct kvm_hyp_memcache *mc = &vcpu->vcpu.arch.stage2_mc;
1251 	struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1252 	u64 size = nr_pages * PAGE_SIZE;
1253 	enum kvm_pgtable_prot prot;
1254 	u64 phys;
1255 	int ret;
1256 
1257 	phys = kvm_pte_to_phys(pte);
1258 	prot = pkvm_mkstate(kvm_pgtable_stage2_pte_prot(pte), state);
1259 	ret = kvm_pgtable_stage2_map(&vm->pgt, ipa, size, phys, prot, mc, 0);
1260 	if (ret)
1261 		return ret;
1262 
1263 	return 0;
1264 }
1265 
__pkvm_host_share_hyp(u64 pfn)1266 int __pkvm_host_share_hyp(u64 pfn)
1267 {
1268 	u64 phys = hyp_pfn_to_phys(pfn);
1269 	void *virt = __hyp_va(phys);
1270 	enum kvm_pgtable_prot prot;
1271 	u64 size = PAGE_SIZE;
1272 	int ret;
1273 
1274 	host_lock_component();
1275 	hyp_lock_component();
1276 
1277 	ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED);
1278 	if (ret)
1279 		goto unlock;
1280 	if (IS_ENABLED(CONFIG_PKVM_STRICT_CHECKS)) {
1281 		ret = __hyp_check_page_state_range((u64)virt, size, PKVM_NOPAGE);
1282 		if (ret)
1283 			goto unlock;
1284 	}
1285 
1286 	prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED);
1287 	ret = pkvm_create_mappings_locked(virt, virt + size, prot);
1288 	if (ret) {
1289 		WARN_ON(ret != -ENOMEM);
1290 		/* We might have failed halfway through, so remove anything we've installed */
1291 		pkvm_remove_mappings_locked(virt, virt + size);
1292 		goto unlock;
1293 	}
1294 
1295 	WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED));
1296 
1297 unlock:
1298 	hyp_unlock_component();
1299 	host_unlock_component();
1300 	return ret;
1301 }
1302 
__pkvm_host_unshare_hyp(u64 pfn)1303 int __pkvm_host_unshare_hyp(u64 pfn)
1304 {
1305 	u64 phys = hyp_pfn_to_phys(pfn);
1306 	u64 virt = (u64)__hyp_va(phys);
1307 	u64 size = PAGE_SIZE;
1308 	int ret;
1309 
1310 	host_lock_component();
1311 	hyp_lock_component();
1312 
1313 	ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED);
1314 	if (ret)
1315 		goto unlock;
1316 	ret = __hyp_check_page_state_range(virt, size, PKVM_PAGE_SHARED_BORROWED);
1317 	if (ret)
1318 		goto unlock;
1319 	if (hyp_page_count((void *)virt)) {
1320 		ret = -EBUSY;
1321 		goto unlock;
1322 	}
1323 
1324 	WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, virt, size) != size);
1325 	WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_OWNED));
1326 
1327 unlock:
1328 	hyp_unlock_component();
1329 	host_unlock_component();
1330 
1331 	return ret;
1332 }
1333 
__pkvm_guest_share_host(struct pkvm_hyp_vcpu * vcpu,u64 ipa,u64 nr_pages,u64 * nr_shared)1334 int __pkvm_guest_share_host(struct pkvm_hyp_vcpu *vcpu, u64 ipa, u64 nr_pages,
1335 			    u64 *nr_shared)
1336 {
1337 	struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1338 	kvm_pte_t pte;
1339 	size_t size;
1340 	u64 phys;
1341 	int ret;
1342 
1343 	host_lock_component();
1344 	guest_lock_component(vm);
1345 
1346 	ret = __guest_request_page_transition(ipa, &pte, &nr_pages, vcpu, PKVM_PAGE_OWNED);
1347 	if (ret)
1348 		goto unlock;
1349 
1350 	phys = kvm_pte_to_phys(pte);
1351 	if (check_shl_overflow(nr_pages, PAGE_SHIFT, &size)) {
1352 		ret = -EINVAL;
1353 		goto unlock;
1354 	}
1355 
1356 	ret = __host_check_page_state_range(phys, size, PKVM_NOPAGE);
1357 	if (ret)
1358 		goto unlock;
1359 
1360 	WARN_ON(__guest_initiate_page_transition(ipa, pte, nr_pages, vcpu, PKVM_PAGE_SHARED_OWNED));
1361 	WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_SHARED_BORROWED));
1362 	psci_mem_protect_dec(nr_pages);
1363 	*nr_shared = nr_pages;
1364 
1365 unlock:
1366 	guest_unlock_component(vm);
1367 	host_unlock_component();
1368 
1369 	return ret;
1370 }
1371 
__pkvm_guest_share_hyp_page(struct pkvm_hyp_vcpu * vcpu,u64 ipa,u64 * hyp_va)1372 int __pkvm_guest_share_hyp_page(struct pkvm_hyp_vcpu *vcpu, u64 ipa, u64 *hyp_va)
1373 {
1374 	int ret;
1375 	struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1376 	kvm_pte_t pte;
1377 	u64 phys;
1378 	enum kvm_pgtable_prot prot;
1379 	void *virt;
1380 	u64 nr_pages = 1;
1381 
1382 	hyp_lock_component();
1383 	guest_lock_component(vm);
1384 
1385 	ret = __guest_request_page_transition(ipa, &pte, &nr_pages, vcpu, PKVM_PAGE_OWNED);
1386 	if (ret)
1387 		goto unlock;
1388 
1389 	phys = kvm_pte_to_phys(pte);
1390 
1391 	virt = __hyp_va(phys);
1392 	if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) {
1393 		ret = __hyp_check_page_state_range((u64)virt, PAGE_SIZE, PKVM_NOPAGE);
1394 		if (ret)
1395 			goto unlock;
1396 	}
1397 
1398 	prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED);
1399 	ret = pkvm_create_mappings_locked(virt, virt + PAGE_SIZE, prot);
1400 	if (ret) {
1401 		/*
1402 		 * Repaint the return code as we need to distinguish between the
1403 		 * no memory from the guest which is recoverable and no memory
1404 		 * from the hypervisor.
1405 		 */
1406 		if (ret == -ENOMEM)
1407 			ret = -EBUSY;
1408 		goto unlock;
1409 	}
1410 
1411 	WARN_ON(__guest_initiate_page_transition(ipa, pte, nr_pages, vcpu, PKVM_PAGE_SHARED_OWNED));
1412 	*hyp_va = (u64)virt;
1413 unlock:
1414 	guest_unlock_component(vm);
1415 	hyp_unlock_component();
1416 
1417 	return ret;
1418 }
1419 
__pkvm_guest_unshare_hyp_page(struct pkvm_hyp_vcpu * vcpu,u64 ipa)1420 int __pkvm_guest_unshare_hyp_page(struct pkvm_hyp_vcpu *vcpu, u64 ipa)
1421 {
1422 	int ret;
1423 	struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1424 	kvm_pte_t pte;
1425 	u64 phys, virt, nr_pages = 1;
1426 
1427 	hyp_lock_component();
1428 	guest_lock_component(vm);
1429 
1430 	ret = __guest_request_page_transition(ipa, &pte, &nr_pages, vcpu, PKVM_PAGE_SHARED_OWNED);
1431 	if (ret)
1432 		goto unlock;
1433 
1434 	phys = kvm_pte_to_phys(pte);
1435 
1436 	virt = (u64)__hyp_va(phys);
1437 	ret = __hyp_check_page_state_range(virt, PAGE_SIZE, PKVM_PAGE_SHARED_BORROWED);
1438 	if (ret)
1439 		goto unlock;
1440 
1441 	WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, virt, PAGE_SIZE) != PAGE_SIZE);
1442 	WARN_ON(__guest_initiate_page_transition(ipa, pte, nr_pages, vcpu, PKVM_PAGE_OWNED));
1443 unlock:
1444 	guest_unlock_component(vm);
1445 	hyp_unlock_component();
1446 
1447 	return ret;
1448 }
1449 
__pkvm_guest_unshare_host(struct pkvm_hyp_vcpu * vcpu,u64 ipa,u64 nr_pages,u64 * nr_unshared)1450 int __pkvm_guest_unshare_host(struct pkvm_hyp_vcpu *vcpu, u64 ipa, u64 nr_pages,
1451 			      u64 *nr_unshared)
1452 {
1453 	struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1454 	kvm_pte_t pte;
1455 	size_t size;
1456 	u64 phys;
1457 	int ret;
1458 
1459 	host_lock_component();
1460 	guest_lock_component(vm);
1461 
1462 	ret = __guest_request_page_transition(ipa, &pte, &nr_pages, vcpu, PKVM_PAGE_SHARED_OWNED);
1463 	if (ret)
1464 		goto unlock;
1465 
1466 	phys = kvm_pte_to_phys(pte);
1467 	if (check_shl_overflow(nr_pages, PAGE_SHIFT, &size)) {
1468 		ret = -EINVAL;
1469 		goto unlock;
1470 	}
1471 
1472 	ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_BORROWED);
1473 	if (ret)
1474 		goto unlock;
1475 
1476 	WARN_ON(__guest_initiate_page_transition(ipa, pte, nr_pages, vcpu, PKVM_PAGE_OWNED));
1477 	psci_mem_protect_inc(nr_pages);
1478 	WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_GUEST));
1479 	*nr_unshared = nr_pages;
1480 
1481 unlock:
1482 	guest_unlock_component(vm);
1483 	host_unlock_component();
1484 
1485 	return ret;
1486 }
1487 
__pkvm_guest_share_ffa_page(struct pkvm_hyp_vcpu * vcpu,u64 ipa,phys_addr_t * phys)1488 int __pkvm_guest_share_ffa_page(struct pkvm_hyp_vcpu *vcpu, u64 ipa, phys_addr_t *phys)
1489 {
1490 	int ret;
1491 	struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1492 	kvm_pte_t pte;
1493 	u64 nr_pages = 1;
1494 
1495 	guest_lock_component(vm);
1496 	ret = __guest_request_page_transition(ipa, &pte, &nr_pages, vcpu, PKVM_PAGE_OWNED);
1497 	if (ret)
1498 		goto unlock;
1499 
1500 	ret = __guest_initiate_page_transition(ipa, pte, nr_pages, vcpu, PKVM_PAGE_SHARED_OWNED);
1501 	if (!ret)
1502 		*phys = kvm_pte_to_phys(pte);
1503 unlock:
1504 	guest_unlock_component(vm);
1505 
1506 	return ret;
1507 }
1508 
1509 /*
1510  * The caller is responsible for tracking the FFA state and this function
1511  * should only be called for IPAs that have previously been shared with FFA.
1512  */
__pkvm_guest_unshare_ffa_page(struct pkvm_hyp_vcpu * vcpu,u64 ipa)1513 int __pkvm_guest_unshare_ffa_page(struct pkvm_hyp_vcpu *vcpu, u64 ipa)
1514 {
1515 	int ret;
1516 	struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1517 	kvm_pte_t pte;
1518 	u64 nr_pages = 1;
1519 
1520 	guest_lock_component(vm);
1521 	ret = __guest_request_page_transition(ipa, &pte, &nr_pages, vcpu, PKVM_PAGE_SHARED_OWNED);
1522 	if (ret)
1523 		goto unlock;
1524 
1525 	ret = __guest_initiate_page_transition(ipa, pte, nr_pages, vcpu, PKVM_PAGE_OWNED);
1526 unlock:
1527 	guest_unlock_component(vm);
1528 
1529 	return ret;
1530 }
1531 
__pkvm_host_donate_hyp(u64 pfn,u64 nr_pages)1532 int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages)
1533 {
1534 	return ___pkvm_host_donate_hyp(pfn, nr_pages, false);
1535 }
1536 
1537 /* The swiss knife of memory donation. */
___pkvm_host_donate_hyp_prot(u64 pfn,u64 nr_pages,bool accept_mmio,enum kvm_pgtable_prot prot)1538 int ___pkvm_host_donate_hyp_prot(u64 pfn, u64 nr_pages,
1539 				 bool accept_mmio, enum kvm_pgtable_prot prot)
1540 {
1541 	phys_addr_t start = hyp_pfn_to_phys(pfn);
1542 	phys_addr_t end = start + (nr_pages << PAGE_SHIFT);
1543 	int ret;
1544 
1545 	if (!accept_mmio && !range_is_memory(start, end))
1546 		return -EPERM;
1547 
1548 	host_lock_component();
1549 	ret = __pkvm_host_donate_hyp_locked(pfn, nr_pages, prot);
1550 	host_unlock_component();
1551 
1552 	return ret;
1553 }
1554 
___pkvm_host_donate_hyp(u64 pfn,u64 nr_pages,bool accept_mmio)1555 int ___pkvm_host_donate_hyp(u64 pfn, u64 nr_pages, bool accept_mmio)
1556 {
1557 	return ___pkvm_host_donate_hyp_prot(pfn, nr_pages, accept_mmio,
1558 					    default_hyp_prot(hyp_pfn_to_phys(pfn)));
1559 }
1560 
pkvm_hyp_donate_guest(struct pkvm_hyp_vcpu * vcpu,u64 pfn,u64 gfn)1561 static int pkvm_hyp_donate_guest(struct pkvm_hyp_vcpu *vcpu, u64 pfn, u64 gfn)
1562 {
1563 	struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1564 	u64 phys = hyp_pfn_to_phys(pfn);
1565 	u64 ipa = hyp_pfn_to_phys(gfn);
1566 	u64 hyp_addr = (u64)__hyp_va(phys);
1567 	size_t size = PAGE_SIZE;
1568 	enum kvm_pgtable_prot prot;
1569 	int ret;
1570 
1571 	hyp_assert_lock_held(&pkvm_pgd_lock);
1572 	hyp_assert_lock_held(&vm->pgtable_lock);
1573 
1574 	ret = __hyp_check_page_state_range(hyp_addr, size, PKVM_PAGE_OWNED);
1575 	if (ret)
1576 		return ret;;
1577 	ret = __guest_check_page_state_range(vcpu, ipa, size, PKVM_NOPAGE);
1578 	if (ret)
1579 		return ret;
1580 
1581 	WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, hyp_addr, size) != size);
1582 	prot = pkvm_mkstate(default_guest_prot(addr_is_memory(phys)), PKVM_PAGE_OWNED);
1583 	return WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, size, phys, prot,
1584 					      &vcpu->vcpu.arch.stage2_mc, 0));
1585 }
1586 
__pkvm_host_donate_hyp_locked(u64 pfn,u64 nr_pages,enum kvm_pgtable_prot prot)1587 int __pkvm_host_donate_hyp_locked(u64 pfn, u64 nr_pages, enum kvm_pgtable_prot prot)
1588 {
1589 	u64 size, phys = hyp_pfn_to_phys(pfn);
1590 	void *virt = __hyp_va(phys);
1591 	int ret;
1592 
1593 	if (check_shl_overflow(nr_pages, PAGE_SHIFT, &size))
1594 		return -EINVAL;
1595 
1596 	hyp_lock_component();
1597 
1598 	ret = ___host_check_page_state_range(phys, size, PKVM_PAGE_OWNED, HOST_CHECK_NULL_REFCNT);
1599 	if (ret)
1600 		goto unlock;
1601 	if (IS_ENABLED(CONFIG_PKVM_STRICT_CHECKS)) {
1602 		ret = __hyp_check_page_state_range((u64)virt, size, PKVM_NOPAGE);
1603 		if (ret)
1604 			goto unlock;
1605 	}
1606 
1607 	prot = pkvm_mkstate(prot, PKVM_PAGE_OWNED);
1608 	ret = pkvm_create_mappings_locked(virt, virt + size, prot);
1609 	if (ret) {
1610 		WARN_ON(ret != -ENOMEM);
1611 		/* We might have failed halfway through, so remove anything we've installed */
1612 		pkvm_remove_mappings_locked(virt, virt + size);
1613 		goto unlock;
1614 	}
1615 	WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_HYP));
1616 
1617 unlock:
1618 	hyp_unlock_component();
1619 
1620 	return ret;
1621 }
1622 
__pkvm_hyp_donate_host(u64 pfn,u64 nr_pages)1623 int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages)
1624 {
1625 	u64 size, phys = hyp_pfn_to_phys(pfn);
1626 	u64 virt = (u64)__hyp_va(phys);
1627 	int ret;
1628 
1629 	if (check_shl_overflow(nr_pages, PAGE_SHIFT, &size))
1630 		return -EINVAL;
1631 
1632 	host_lock_component();
1633 	hyp_lock_component();
1634 
1635 	ret = __hyp_check_page_state_range(virt, size, PKVM_PAGE_OWNED);
1636 	if (ret)
1637 		goto unlock;
1638 	if (IS_ENABLED(CONFIG_PKVM_STRICT_CHECKS)) {
1639 		ret = ___host_check_page_state_range(phys, size, PKVM_NOPAGE, 0);
1640 		if (ret)
1641 			goto unlock;
1642 	}
1643 
1644 	WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, virt, size) != size);
1645 	WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_HOST));
1646 
1647 unlock:
1648 	hyp_unlock_component();
1649 	host_unlock_component();
1650 	return ret;
1651 }
1652 
__pkvm_host_donate_ffa(u64 pfn,u64 nr_pages)1653 int __pkvm_host_donate_ffa(u64 pfn, u64 nr_pages)
1654 {
1655 	u64 size, phys = hyp_pfn_to_phys(pfn), end;
1656 	int ret;
1657 
1658 	if (check_shl_overflow(nr_pages, PAGE_SHIFT, &size) ||
1659 	    check_add_overflow(phys, size, &end))
1660 		return -EINVAL;
1661 
1662 	host_lock_component();
1663 
1664 	ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED);
1665 	if (ret)
1666 		goto unlock;
1667 
1668 	WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_FFA));
1669 unlock:
1670 	host_unlock_component();
1671 	return ret;
1672 }
1673 
__pkvm_host_reclaim_ffa(u64 pfn,u64 nr_pages)1674 int __pkvm_host_reclaim_ffa(u64 pfn, u64 nr_pages)
1675 {
1676 	u64 size, phys = hyp_pfn_to_phys(pfn), end;
1677 	int ret;
1678 
1679 	if (check_shl_overflow(nr_pages, PAGE_SHIFT, &size) ||
1680 	    check_add_overflow(phys, size, &end))
1681 		return -EINVAL;
1682 
1683 	host_lock_component();
1684 
1685 	ret = __host_check_page_state_range(phys, size, PKVM_NOPAGE);
1686 	if (ret)
1687 		goto unlock;
1688 
1689 	WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_HOST));
1690 unlock:
1691 	host_unlock_component();
1692 	return ret;
1693 }
1694 
1695 #define MODULE_PROT_ALLOWLIST (KVM_PGTABLE_PROT_RWX |		\
1696 			       KVM_PGTABLE_PROT_DEVICE |	\
1697 			       KVM_PGTABLE_PROT_NORMAL_NC |	\
1698 			       KVM_PGTABLE_PROT_PXN |		\
1699 			       KVM_PGTABLE_PROT_UXN)
1700 
module_change_host_page_prot(u64 pfn,enum kvm_pgtable_prot prot,u64 nr_pages,bool update_iommu)1701 int module_change_host_page_prot(u64 pfn, enum kvm_pgtable_prot prot, u64 nr_pages,
1702 				 bool update_iommu)
1703 {
1704 	u64 i, end, addr = hyp_pfn_to_phys(pfn);
1705 	struct hyp_page *page = NULL;
1706 	struct kvm_mem_range range;
1707 	struct memblock_region *reg;
1708 	int ret;
1709 
1710 	if ((prot & MODULE_PROT_ALLOWLIST) != prot)
1711 		return -EINVAL;
1712 
1713 	if (check_shl_overflow(nr_pages, PAGE_SHIFT, &end) ||
1714 	    check_add_overflow(addr, end, &end))
1715 		return -EINVAL;
1716 
1717 	reg = find_mem_range(addr, &range);
1718 	if (end > range.end) {
1719 		/* Specified range not in a single mmio or memory block. */
1720 		return -EPERM;
1721 	}
1722 
1723 	host_lock_component();
1724 
1725 	/*
1726 	 * There is no hyp_vmemmap covering MMIO regions, which makes tracking
1727 	 * of module-owned MMIO regions hard, so we trust the modules not to
1728 	 * mess things up.
1729 	 */
1730 	if (!reg)
1731 		goto update;
1732 
1733 	/* Range is memory: we can track module ownership. */
1734 	page = hyp_phys_to_page(addr);
1735 
1736 	/*
1737 	 * Modules can only modify pages they already own, and pristine host
1738 	 * pages. The entire range must be consistently one or the other.
1739 	 */
1740 	if (page->host_state & PKVM_MODULE_OWNED_PAGE) {
1741 		/* The entire range must be module-owned. */
1742 		ret = -EPERM;
1743 		for (i = 1; i < nr_pages; i++) {
1744 			if (!(page[i].host_state & PKVM_MODULE_OWNED_PAGE))
1745 				goto unlock;
1746 		}
1747 	} else {
1748 		/* The entire range must be pristine. */
1749 		ret = ___host_check_page_state_range(addr, nr_pages << PAGE_SHIFT,
1750 						     PKVM_PAGE_OWNED, HOST_CHECK_NULL_REFCNT);
1751 		if (ret)
1752 			goto unlock;
1753 	}
1754 
1755 update:
1756 	if (!prot) {
1757 		ret = __host_stage2_set_owner_locked(addr, nr_pages << PAGE_SHIFT,
1758 						     PKVM_ID_PROTECTED, !!reg,
1759 						     PKVM_MODULE_OWNED_PAGE, update_iommu);
1760 	} else {
1761 		ret = host_stage2_idmap_locked(
1762 			addr, nr_pages << PAGE_SHIFT, prot, update_iommu);
1763 	}
1764 
1765 	if (WARN_ON(ret) || !page || !prot)
1766 		goto unlock;
1767 
1768 	for (i = 0; i < nr_pages; i++) {
1769 		if (prot != KVM_PGTABLE_PROT_RWX) {
1770 			page[i].host_state = PKVM_MODULE_OWNED_PAGE;
1771 		} else {
1772 			page[i].host_state = PKVM_PAGE_OWNED;
1773 		}
1774 	}
1775 
1776 unlock:
1777 	host_unlock_component();
1778 
1779 	return ret;
1780 }
1781 
hyp_pin_shared_mem(void * from,void * to)1782 int hyp_pin_shared_mem(void *from, void *to)
1783 {
1784 	u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE);
1785 	u64 end = PAGE_ALIGN((u64)to);
1786 	u64 size = end - start;
1787 	int ret;
1788 
1789 	host_lock_component();
1790 	hyp_lock_component();
1791 
1792 	ret = __host_check_page_state_range(__hyp_pa(start), size,
1793 					    PKVM_PAGE_SHARED_OWNED);
1794 	if (ret)
1795 		goto unlock;
1796 
1797 	ret = __hyp_check_page_state_range(start, size,
1798 					   PKVM_PAGE_SHARED_BORROWED);
1799 	if (ret)
1800 		goto unlock;
1801 
1802 	for (cur = start; cur < end; cur += PAGE_SIZE)
1803 		hyp_page_ref_inc(hyp_virt_to_page(cur));
1804 
1805 unlock:
1806 	hyp_unlock_component();
1807 	host_unlock_component();
1808 
1809 	return ret;
1810 }
1811 
hyp_unpin_shared_mem(void * from,void * to)1812 void hyp_unpin_shared_mem(void *from, void *to)
1813 {
1814 	u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE);
1815 	u64 end = PAGE_ALIGN((u64)to);
1816 
1817 	host_lock_component();
1818 	hyp_lock_component();
1819 
1820 	for (cur = start; cur < end; cur += PAGE_SIZE)
1821 		hyp_page_ref_dec(hyp_virt_to_page(cur));
1822 
1823 	hyp_unlock_component();
1824 	host_unlock_component();
1825 }
1826 
__pkvm_host_share_ffa(u64 pfn,u64 nr_pages)1827 int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages)
1828 {
1829 
1830 	u64 size, phys = hyp_pfn_to_phys(pfn);
1831 	int ret;
1832 
1833 	if (check_shl_overflow(nr_pages, PAGE_SHIFT, &size))
1834 		return -EINVAL;
1835 
1836 	host_lock_component();
1837 
1838 	ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED);
1839 	if (!ret)
1840 		ret = __host_set_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED);
1841 
1842 	host_unlock_component();
1843 
1844 	return ret;
1845 }
1846 
__pkvm_host_unshare_ffa(u64 pfn,u64 nr_pages)1847 int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages)
1848 {
1849 	u64 size, phys = hyp_pfn_to_phys(pfn);
1850 	int ret;
1851 
1852 	if (check_shl_overflow(nr_pages, PAGE_SHIFT, &size))
1853 		return -EINVAL;
1854 
1855 	host_lock_component();
1856 
1857 	ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED);
1858 	if (!ret)
1859 		ret = __host_set_page_state_range(phys, size, PKVM_PAGE_OWNED);
1860 
1861 	host_unlock_component();
1862 
1863 	return ret;
1864 }
1865 
__pkvm_use_dma_page(phys_addr_t phys_addr)1866 static void __pkvm_use_dma_page(phys_addr_t phys_addr)
1867 {
1868 	struct hyp_page *p = hyp_phys_to_page(phys_addr);
1869 
1870 	hyp_page_ref_inc(p);
1871 }
1872 
__pkvm_unuse_dma_page(phys_addr_t phys_addr)1873 static void __pkvm_unuse_dma_page(phys_addr_t phys_addr)
1874 {
1875 	struct hyp_page *p = hyp_phys_to_page(phys_addr);
1876 
1877 	hyp_page_ref_dec(p);
1878 }
1879 
__pkvm_use_dma_locked(phys_addr_t phys_addr,size_t size,struct pkvm_hyp_vcpu * hyp_vcpu)1880 static int __pkvm_use_dma_locked(phys_addr_t phys_addr, size_t size,
1881 				 struct pkvm_hyp_vcpu *hyp_vcpu)
1882 {
1883 	int i;
1884 	int ret = 0;
1885 	struct kvm_mem_range r;
1886 	size_t nr_pages = size >> PAGE_SHIFT;
1887 	struct memblock_region *reg = find_mem_range(phys_addr, &r);
1888 
1889 	if (WARN_ON(!PAGE_ALIGNED(phys_addr | size)) || !is_in_mem_range(phys_addr + size - 1, &r))
1890 		return -EINVAL;
1891 
1892 	/*
1893 	 * Some differences between handling of RAM and device memory:
1894 	 * - The hyp vmemmap area for device memory is not backed by physical
1895 	 *   pages in the hyp page tables.
1896 	 * - However, in some cases modules can donate MMIO, as they can't be
1897 	 *   refcounted, taint them by marking them as shared PKVM_PAGE_TAINTED, and that
1898 	 *   will prevent any future transition.
1899 	 */
1900 	if (!reg) {
1901 		enum kvm_pgtable_prot prot;
1902 
1903 		if (hyp_vcpu)
1904 			return -EINVAL;
1905 
1906 		for (i = 0; i < nr_pages; i++) {
1907 			u64 addr = phys_addr + i * PAGE_SIZE;
1908 
1909 			ret = ___host_check_page_state_range(addr, PAGE_SIZE,
1910 							     PKVM_PAGE_TAINTED,
1911 							     0);
1912 			/* Page already tainted */
1913 			if (!ret)
1914 				continue;
1915 			ret = ___host_check_page_state_range(addr, PAGE_SIZE,
1916 							     PKVM_PAGE_OWNED,
1917 							     0);
1918 			if (ret)
1919 				return ret;
1920 		}
1921 		prot = pkvm_mkstate(PKVM_HOST_MMIO_PROT, PKVM_PAGE_TAINTED);
1922 		WARN_ON(host_stage2_idmap_locked(phys_addr, size, prot, false));
1923 	} else {
1924 		/* For VMs, we know if we reach this point the VM has access to the page. */
1925 		if (!hyp_vcpu) {
1926 			for (i = 0; i < nr_pages; i++) {
1927 				enum pkvm_page_state state;
1928 				phys_addr_t this_addr = phys_addr + i * PAGE_SIZE;
1929 
1930 				state = hyp_phys_to_page(this_addr)->host_state;
1931 				if (state != PKVM_PAGE_OWNED) {
1932 					ret = -EPERM;
1933 					break;
1934 				}
1935 			}
1936 			if (ret)
1937 				return ret;
1938 		}
1939 
1940 		for (i = 0; i < nr_pages; i++)
1941 			__pkvm_use_dma_page(phys_addr + i * PAGE_SIZE);
1942 	}
1943 
1944 	return ret;
1945 }
1946 
1947 /*
1948  * __pkvm_use_dma - Mark memory as used for DMA
1949  * @phys_addr:	physical address of the DMA region
1950  * @size:	size of the DMA region
1951  * When a page is mapped in an IOMMU page table for DMA, it must
1952  * not be donated to a guest or the hypervisor we ensure this with:
1953  * - Host can only map pages that are OWNED
1954  * - Any page that is mapped is refcounted
1955  * - Donation/Sharing is prevented from refcount check in
1956  *   ___host_check_page_state_range()
1957  * - No MMIO transtion is allowed beyond IOMMU MMIO which
1958  *   happens during de-privilege.
1959  * In case in the future shared pages are allowed to be mapped,
1960  * similar checks are needed in host_request_unshare() and
1961  * host_ack_unshare()
1962  */
__pkvm_use_dma(phys_addr_t phys_addr,size_t size,struct pkvm_hyp_vcpu * hyp_vcpu)1963 int __pkvm_use_dma(phys_addr_t phys_addr, size_t size, struct pkvm_hyp_vcpu *hyp_vcpu)
1964 {
1965 	int ret;
1966 
1967 	host_lock_component();
1968 	ret = __pkvm_use_dma_locked(phys_addr, size, hyp_vcpu);
1969 	host_unlock_component();
1970 	return ret;
1971 }
1972 
__pkvm_unuse_dma(phys_addr_t phys_addr,size_t size,struct pkvm_hyp_vcpu * hyp_vcpu)1973 int __pkvm_unuse_dma(phys_addr_t phys_addr, size_t size, struct pkvm_hyp_vcpu *hyp_vcpu)
1974 {
1975 	int i;
1976 	size_t nr_pages = size >> PAGE_SHIFT;
1977 
1978 	if (WARN_ON(!PAGE_ALIGNED(phys_addr | size)))
1979 		return -EINVAL;
1980 	if (!range_is_memory(phys_addr, phys_addr + size)) {
1981 		WARN_ON(hyp_vcpu);
1982 		return 0;
1983 	}
1984 
1985 	host_lock_component();
1986 	/*
1987 	 * We end up here after the caller successfully unmapped the page from
1988 	 * the IOMMU table. Which means that a ref is held, the page is shared
1989 	 * in the host s2, there can be no failure.
1990 	 */
1991 	for (i = 0; i < nr_pages; i++)
1992 		__pkvm_unuse_dma_page(phys_addr + i * PAGE_SIZE);
1993 
1994 	host_unlock_component();
1995 	return 0;
1996 }
1997 
__pkvm_host_share_guest(u64 pfn,u64 gfn,struct pkvm_hyp_vcpu * vcpu,enum kvm_pgtable_prot prot,u64 nr_pages)1998 int __pkvm_host_share_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu,
1999 			    enum kvm_pgtable_prot prot, u64 nr_pages)
2000 {
2001 	struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
2002 	u64 phys = hyp_pfn_to_phys(pfn);
2003 	u64 ipa = hyp_pfn_to_phys(gfn);
2004 	struct hyp_page *page;
2005 	size_t size;
2006 	u64 end;
2007 	int ret;
2008 
2009 	if (prot & ~KVM_PGTABLE_PROT_RWX)
2010 		return -EINVAL;
2011 
2012 	if (check_shl_overflow(nr_pages, PAGE_SHIFT, &size) ||
2013 	    check_add_overflow(phys, size, &end))
2014 		return -EINVAL;
2015 
2016 	ret = check_range_allowed_memory(phys, end);
2017 	if (ret)
2018 		return ret;
2019 
2020 	host_lock_component();
2021 	guest_lock_component(vm);
2022 
2023 	ret = __guest_check_page_state_range(vcpu, ipa, size, PKVM_NOPAGE);
2024 	if (ret)
2025 		goto unlock;
2026 
2027 	for (; phys < end; phys += PAGE_SIZE) {
2028 		page = hyp_phys_to_page(phys);
2029 		if (page->host_state == PKVM_PAGE_OWNED && !hyp_refcount_get(page->refcount))
2030 			continue;
2031 		else if (page->host_state == PKVM_PAGE_SHARED_OWNED && page->host_share_guest_count)
2032 			continue;
2033 		ret = -EPERM;
2034 		goto unlock;
2035 	}
2036 
2037 	phys = hyp_pfn_to_phys(pfn) ;
2038 	WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, size, phys,
2039 				       pkvm_mkstate(prot, PKVM_PAGE_SHARED_BORROWED),
2040 				       &vcpu->vcpu.arch.stage2_mc, 0));
2041 	for (; phys < end; phys += PAGE_SIZE) {
2042 		page = hyp_phys_to_page(phys);
2043 		page->host_state = PKVM_PAGE_SHARED_OWNED;
2044 		page->host_share_guest_count++;
2045 	}
2046 
2047 unlock:
2048 	guest_unlock_component(vm);
2049 	host_unlock_component();
2050 
2051 	return ret;
2052 }
2053 
__check_host_shared_guest(struct pkvm_hyp_vm * vm,u64 * __phys,u64 ipa,size_t size)2054 static int __check_host_shared_guest(struct pkvm_hyp_vm *vm, u64 *__phys, u64 ipa, size_t size)
2055 {
2056 	enum pkvm_page_state state;
2057 	struct hyp_page *page;
2058 	kvm_pte_t pte;
2059 	u64 phys, end;
2060 	s8 level;
2061 	int ret;
2062 
2063 	if (size != PAGE_SIZE && size != PMD_SIZE)
2064 		return -EINVAL;
2065 	ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level);
2066 	if (ret)
2067 		return ret;
2068 	if (!kvm_pte_valid(pte))
2069 		return -ENOENT;
2070 	if (kvm_granule_size(level) != size)
2071 		return -E2BIG;
2072 
2073 	state = guest_get_page_state(pte, ipa) & ~PKVM_PAGE_RESTRICTED_PROT;
2074 	if (state != PKVM_PAGE_SHARED_BORROWED)
2075 		return -EPERM;
2076 
2077 	phys = kvm_pte_to_phys(pte);
2078 	if (check_add_overflow(phys, size, &end))
2079 		return -EINVAL;
2080 
2081 	ret = check_range_allowed_memory(phys, end);
2082 	if (WARN_ON(ret))
2083 		return ret;
2084 
2085 	for (; phys < end; phys += PAGE_SIZE) {
2086 		page = hyp_phys_to_page(phys);
2087 		if (page->host_state != PKVM_PAGE_SHARED_OWNED)
2088 			return -EPERM;
2089 		if (WARN_ON(!page->host_share_guest_count))
2090 			return -EINVAL;
2091 	}
2092 
2093 	*__phys = kvm_pte_to_phys(pte);
2094 
2095 	return 0;
2096 }
2097 
__pkvm_host_unshare_guest(u64 gfn,struct pkvm_hyp_vm * vm,u64 nr_pages)2098 int __pkvm_host_unshare_guest(u64 gfn, struct pkvm_hyp_vm *vm, u64 nr_pages)
2099 {
2100 	size_t size = PAGE_SIZE * nr_pages;
2101 	u64 ipa = hyp_pfn_to_phys(gfn);
2102 	struct hyp_page *page;
2103 	u64 phys, end;
2104 	int ret;
2105 
2106 	host_lock_component();
2107 	guest_lock_component(vm);
2108 
2109 	ret = __check_host_shared_guest(vm, &phys, ipa, size);
2110 	if (ret)
2111 		goto unlock;
2112 
2113 	ret = kvm_pgtable_stage2_unmap(&vm->pgt, ipa, size);
2114 	if (ret)
2115 		goto unlock;
2116 
2117 	end = phys + size;
2118 	for (; phys < end; phys += PAGE_SIZE) {
2119 		page = hyp_phys_to_page(phys);
2120 		page->host_share_guest_count--;
2121 		if (!page->host_share_guest_count)
2122 			page->host_state = PKVM_PAGE_OWNED;
2123 	}
2124 
2125 unlock:
2126 	guest_unlock_component(vm);
2127 	host_unlock_component();
2128 
2129 	return ret;
2130 }
2131 
guest_get_valid_pte(struct pkvm_hyp_vm * vm,u64 * phys,u64 ipa,u8 order,kvm_pte_t * pte)2132 static int guest_get_valid_pte(struct pkvm_hyp_vm *vm, u64 *phys, u64 ipa, u8 order, kvm_pte_t *pte)
2133 {
2134 	size_t size = PAGE_SIZE << order;
2135 	s8 level;
2136 
2137 	if (order && size != PMD_SIZE)
2138 		return -EINVAL;
2139 
2140 	WARN_ON(kvm_pgtable_get_leaf(&vm->pgt, ipa, pte, &level));
2141 
2142 	if (kvm_granule_size(level) != size)
2143 		return -E2BIG;
2144 
2145 	if (!kvm_pte_valid(*pte))
2146 		return -ENOENT;
2147 
2148 	*phys = kvm_pte_to_phys(*pte);
2149 
2150 	return 0;
2151 }
2152 
__pkvm_host_relax_perms_guest(u64 gfn,struct pkvm_hyp_vcpu * vcpu,enum kvm_pgtable_prot prot)2153 int __pkvm_host_relax_perms_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_pgtable_prot prot)
2154 {
2155 	struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
2156 	u64 ipa = hyp_pfn_to_phys(gfn);
2157 	int ret;
2158 
2159 	if (WARN_ON(kvm_vm_is_protected(&vm->kvm)))
2160 		return -EPERM;
2161 
2162 	if (prot & ~KVM_PGTABLE_PROT_RWX)
2163 		return -EINVAL;
2164 
2165 	guest_lock_component(vm);
2166 	ret = kvm_pgtable_stage2_relax_perms(&vm->pgt, ipa, prot, 0);
2167 	guest_unlock_component(vm);
2168 
2169 	return ret;
2170 }
2171 
__pkvm_host_wrprotect_guest(u64 gfn,struct pkvm_hyp_vm * vm,u64 size)2172 int __pkvm_host_wrprotect_guest(u64 gfn, struct pkvm_hyp_vm *vm, u64 size)
2173 {
2174 	u64 ipa = hyp_pfn_to_phys(gfn);
2175 	int ret;
2176 
2177 	if (WARN_ON(kvm_vm_is_protected(&vm->kvm)))
2178 		return -EPERM;
2179 
2180 	guest_lock_component(vm);
2181 	ret = kvm_pgtable_stage2_wrprotect(&vm->pgt, ipa, size);
2182 	guest_unlock_component(vm);
2183 
2184 	return ret;
2185 }
2186 
__pkvm_host_test_clear_young_guest(u64 gfn,u64 size,bool mkold,struct pkvm_hyp_vm * vm)2187 int __pkvm_host_test_clear_young_guest(u64 gfn, u64 size, bool mkold, struct pkvm_hyp_vm *vm)
2188 {
2189 	u64 ipa = hyp_pfn_to_phys(gfn);
2190 	int ret;
2191 
2192 	if (WARN_ON(kvm_vm_is_protected(&vm->kvm)))
2193 		return -EPERM;
2194 
2195 	guest_lock_component(vm);
2196 	ret = kvm_pgtable_stage2_test_clear_young(&vm->pgt, ipa, size, mkold);
2197 	guest_unlock_component(vm);
2198 
2199 	return ret;
2200 }
2201 
__pkvm_host_mkyoung_guest(u64 gfn,struct pkvm_hyp_vcpu * vcpu)2202 kvm_pte_t __pkvm_host_mkyoung_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu)
2203 {
2204 	struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
2205 	u64 ipa = hyp_pfn_to_phys(gfn);
2206 	kvm_pte_t pte;
2207 
2208 	if (WARN_ON(kvm_vm_is_protected(&vm->kvm)))
2209 		return 0;
2210 
2211 	guest_lock_component(vm);
2212 	pte = kvm_pgtable_stage2_mkyoung(&vm->pgt, ipa, 0);
2213 	guest_unlock_component(vm);
2214 
2215 	return pte;
2216 }
2217 
__pkvm_host_split_guest(u64 gfn,u64 size,struct pkvm_hyp_vcpu * vcpu)2218 int __pkvm_host_split_guest(u64 gfn, u64 size, struct pkvm_hyp_vcpu *vcpu)
2219 {
2220 	struct kvm_hyp_memcache *mc = &vcpu->vcpu.arch.stage2_mc;
2221 	struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
2222 	u64 ipa = hyp_pfn_to_phys(gfn);
2223 	int ret;
2224 
2225 	if (size != PMD_SIZE)
2226 		return -EINVAL;
2227 
2228 	guest_lock_component(vm);
2229 
2230 	/*
2231 	 * stage2_split() already checks the existing mapping is valid and PMD-level.
2232 	 * No other check is necessary.
2233 	 */
2234 
2235 	ret = kvm_pgtable_stage2_split(&vm->pgt, ipa, size, mc);
2236 
2237 	guest_unlock_component(vm);
2238 
2239 	return ret;
2240 }
2241 
__host_set_owner_guest(struct pkvm_hyp_vcpu * vcpu,u64 phys,u64 ipa,size_t size,bool is_memory)2242 static int __host_set_owner_guest(struct pkvm_hyp_vcpu *vcpu, u64 phys, u64 ipa,
2243 				  size_t size, bool is_memory)
2244 {
2245 	struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
2246 	u64 nr_pages = size >> PAGE_SHIFT;
2247 	int ret;
2248 
2249 	/*
2250 	 * update_iommu=false, the caller must do the update _before_ this function is called. This
2251 	 * intends to protect pvmfw loading.
2252 	 */
2253 	WARN_ON(__host_stage2_set_owner_locked(phys, size, PKVM_ID_GUEST,
2254 					       is_memory, 0, false));
2255 	psci_mem_protect_inc(nr_pages);
2256 	if (pkvm_ipa_range_has_pvmfw(vm, ipa, ipa + size)) {
2257 		ret = pkvm_load_pvmfw_pages(vm, ipa, phys, size);
2258 		if (WARN_ON(ret)) {
2259 			psci_mem_protect_dec(nr_pages);
2260 			return ret;
2261 		}
2262 	}
2263 
2264 	return 0;
2265 }
2266 
__pkvm_host_donate_guest(u64 pfn,u64 gfn,struct pkvm_hyp_vcpu * vcpu,u64 nr_pages)2267 int __pkvm_host_donate_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu, u64 nr_pages)
2268 {
2269 	struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
2270 	u64 phys = hyp_pfn_to_phys(pfn);
2271 	u64 ipa = hyp_pfn_to_phys(gfn);
2272 	enum kvm_pgtable_prot prot;
2273 	bool is_memory;
2274 	size_t size;
2275 	int ret;
2276 
2277 	if (check_mul_overflow(nr_pages, PAGE_SIZE, &size))
2278 		return -EINVAL;
2279 
2280 	host_lock_component();
2281 	guest_lock_component(vm);
2282 
2283 	ret = ___host_check_page_state_range(phys, size, PKVM_PAGE_OWNED, HOST_CHECK_NULL_REFCNT);
2284 	if (ret)
2285 		goto unlock;
2286 	ret = __guest_check_page_state_range(vcpu, ipa, size, PKVM_NOPAGE);
2287 	if (ret)
2288 		goto unlock;
2289 
2290 	is_memory = addr_is_memory(phys);
2291 	if (is_memory) {
2292 		kvm_iommu_host_stage2_idmap(phys, phys + size, 0);
2293 		kvm_iommu_host_stage2_idmap_complete(false);
2294 	}
2295 	WARN_ON(__host_set_owner_guest(vcpu, phys, ipa, size, is_memory));
2296 
2297 	prot = pkvm_mkstate(KVM_PGTABLE_PROT_RWX, PKVM_PAGE_OWNED);
2298 	WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, size, phys, prot,
2299 				       &vcpu->vcpu.arch.stage2_mc, 0));
2300 
2301 unlock:
2302 	guest_unlock_component(vm);
2303 	host_unlock_component();
2304 
2305 	return ret;
2306 }
2307 
2308 struct kvm_hyp_pinned_page *hyp_ppages;
2309 
__copy_hyp_ppages(struct pkvm_hyp_vcpu * vcpu)2310 static int __copy_hyp_ppages(struct pkvm_hyp_vcpu *vcpu)
2311 {
2312 	struct kvm_hyp_pinned_page *ppage, *hyp_ppage;
2313 
2314 	WARN_ON(!hyp_ppages);
2315 
2316 	ppage = next_kvm_hyp_pinned_page(vcpu->vcpu.arch.hyp_reqs, NULL, true);
2317 	if (!ppage)
2318 		return -EINVAL;
2319 
2320 	hyp_ppage = hyp_ppages;
2321 
2322 	do {
2323 		memcpy(hyp_ppage, ppage, sizeof(*ppage));
2324 		ppage = next_kvm_hyp_pinned_page(vcpu->vcpu.arch.hyp_reqs, ppage, true);
2325 		hyp_ppage++; /* No risk to overflow hyp_ppages */
2326 	} while (ppage);
2327 
2328 	hyp_ppage->order = 0xFF;
2329 
2330 	return 0;
2331 }
2332 
2333 #define for_each_hyp_ppage(hyp_ppage)						\
2334 	for (hyp_ppage = hyp_ppages; (hyp_ppage)->order != 0xFF; (hyp_ppage)++)
2335 
__pkvm_host_donate_sglist_guest(struct pkvm_hyp_vcpu * vcpu)2336 int __pkvm_host_donate_sglist_guest(struct pkvm_hyp_vcpu *vcpu)
2337 {
2338 	struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
2339 	struct kvm_hyp_pinned_page *ppage = hyp_ppages;
2340 	bool is_memory;
2341 	int ret;
2342 
2343 	host_lock_component();
2344 	guest_lock_component(vm);
2345 
2346 	ret = __copy_hyp_ppages(vcpu);
2347 	if (ret)
2348 		goto unlock;
2349 
2350 	is_memory = addr_is_memory(hyp_pfn_to_phys(ppage->pfn));
2351 
2352 	for_each_hyp_ppage(ppage) {
2353 		u64 phys = hyp_pfn_to_phys(ppage->pfn);
2354 		u64 ipa = hyp_pfn_to_phys(ppage->gfn);
2355 		size_t size;
2356 
2357 		if (check_shl_overflow(PAGE_SIZE, ppage->order, &size)) {
2358 			ret = -EINVAL;
2359 			goto unlock;
2360 		}
2361 
2362 		if (addr_is_memory(phys) != is_memory) {
2363 			ret = -EINVAL;
2364 			goto unlock;
2365 		}
2366 
2367 		ret = ___host_check_page_state_range(phys, size, PKVM_PAGE_OWNED,
2368 						     HOST_CHECK_NULL_REFCNT);
2369 		if (ret)
2370 			goto unlock;
2371 
2372 		ret = __guest_check_page_state_range(vcpu, ipa, size, PKVM_NOPAGE);
2373 		if (ret)
2374 			goto unlock;
2375 	}
2376 
2377 	if (is_memory) {
2378 		for_each_hyp_ppage(ppage) {
2379 			size_t size = PAGE_SIZE << ppage->order;
2380 			u64 phys = hyp_pfn_to_phys(ppage->pfn);
2381 
2382 			kvm_iommu_host_stage2_idmap(phys, phys + size, 0);
2383 		}
2384 
2385 		kvm_iommu_host_stage2_idmap_complete(false);
2386 	}
2387 
2388 	for_each_hyp_ppage(ppage) {
2389 		size_t size = PAGE_SIZE << ppage->order;
2390 		u64 phys = hyp_pfn_to_phys(ppage->pfn);
2391 		u64 ipa = hyp_pfn_to_phys(ppage->gfn);
2392 		enum kvm_pgtable_prot prot;
2393 
2394 		/* Now the sglist is unmapped from the IOMMUs, we can load pvmfw */
2395 		WARN_ON(__host_set_owner_guest(vcpu, phys, ipa, size, is_memory));
2396 
2397 		prot = pkvm_mkstate(KVM_PGTABLE_PROT_RWX, PKVM_PAGE_OWNED);
2398 		WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, size, phys, prot,
2399 				       &vcpu->vcpu.arch.stage2_mc, 0));
2400 	}
2401 
2402 unlock:
2403 	guest_unlock_component(vm);
2404 	host_unlock_component();
2405 
2406 	return ret;
2407 }
2408 
__pkvm_host_donate_sglist_hyp(struct pkvm_sglist_page * sglist,size_t nr_pages)2409 int __pkvm_host_donate_sglist_hyp(struct pkvm_sglist_page *sglist, size_t nr_pages)
2410 {
2411 	int p, ret;
2412 
2413 	host_lock_component();
2414 	hyp_lock_component();
2415 
2416 	/* Checking we are reading hyp private memory */
2417 	if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG))
2418 		WARN_ON(__hyp_check_page_state_range((u64)sglist, nr_pages * sizeof(*sglist),
2419 						     PKVM_PAGE_OWNED));
2420 
2421 	for (p = 0; p < nr_pages; p++) {
2422 		u64 phys = hyp_pfn_to_phys(sglist[p].pfn);
2423 		size_t size;
2424 
2425 		if (check_shl_overflow(PAGE_SIZE, sglist[p].order, &size)) {
2426 			ret = -EINVAL;
2427 			goto unlock;
2428 		}
2429 
2430 		if (!addr_is_memory(phys)) {
2431 			ret = -EINVAL;
2432 			goto unlock;
2433 		}
2434 
2435 		ret = ___host_check_page_state_range(phys, size, PKVM_PAGE_OWNED,
2436 						     HOST_CHECK_NULL_REFCNT);
2437 		if (ret)
2438 			goto unlock;
2439 
2440 		if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) {
2441 			ret = __hyp_check_page_state_range((u64)__hyp_va(phys), size, PKVM_NOPAGE);
2442 			if (ret)
2443 				goto unlock;
2444 		}
2445 	}
2446 
2447 	for (p = 0; p < nr_pages; p++) {
2448 		size_t size = PAGE_SIZE << sglist[p].order;
2449 		u64 phys = hyp_pfn_to_phys(sglist[p].pfn);
2450 		enum kvm_pgtable_prot prot;
2451 
2452 		prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_OWNED);
2453 		ret = pkvm_create_mappings_locked(__hyp_va(phys), __hyp_va(phys) + size, prot);
2454 		if (ret) {
2455 			WARN_ON(ret != -ENOMEM);
2456 
2457 			kvm_iommu_host_stage2_idmap_complete(false);
2458 
2459 			/* Rollback */
2460 			for (; p >= 0; p--) {
2461 				phys = hyp_pfn_to_phys(sglist[p].pfn);
2462 				size = PAGE_SIZE << sglist[p].order;
2463 
2464 				WARN_ON(host_stage2_idmap_locked(phys, size,
2465 								 PKVM_HOST_MEM_PROT, false));
2466 				kvm_iommu_host_stage2_idmap(phys, phys + size, PKVM_HOST_MEM_PROT);
2467 				pkvm_remove_mappings_locked(__hyp_va(phys), __hyp_va(phys) + size);
2468 			}
2469 			kvm_iommu_host_stage2_idmap_complete(true);
2470 
2471 			break;
2472 		}
2473 
2474 		WARN_ON(__host_stage2_set_owner_locked(phys, size, PKVM_ID_HYP, true, 0, false));
2475 		kvm_iommu_host_stage2_idmap(phys, phys + size, 0);
2476 	}
2477 
2478 	kvm_iommu_host_stage2_idmap_complete(false);
2479 
2480 unlock:
2481 	hyp_unlock_component();
2482 	host_unlock_component();
2483 
2484 	return ret;
2485 }
2486 
hyp_poison_page(phys_addr_t phys,size_t size)2487 void hyp_poison_page(phys_addr_t phys, size_t size)
2488 {
2489 	WARN_ON(!PAGE_ALIGNED(size));
2490 
2491 	while (size) {
2492 		size_t __size = size == PMD_SIZE ? size : PAGE_SIZE;
2493 		void *addr = __fixmap_guest_page(__hyp_va(phys), &__size);
2494 
2495 		memset(addr, 0, __size);
2496 
2497 		/*
2498 		 * Prefer kvm_flush_dcache_to_poc() over __clean_dcache_guest_page()
2499 		 * here as the latter may elide the CMO under the assumption that FWB
2500 		 * will be enabled on CPUs that support it. This is incorrect for the
2501 		 * host stage-2 and would otherwise lead to a malicious host potentially
2502 		 * being able to read the contents of newly reclaimed guest pages.
2503 		 */
2504 		kvm_flush_dcache_to_poc(addr, __size);
2505 		__fixunmap_guest_page(__size);
2506 
2507 		size -= __size;
2508 		phys += __size;
2509 	}
2510 }
2511 
destroy_hyp_vm_pgt(struct pkvm_hyp_vm * vm)2512 void destroy_hyp_vm_pgt(struct pkvm_hyp_vm *vm)
2513 {
2514 	guest_lock_component(vm);
2515 	kvm_pgtable_stage2_destroy(&vm->pgt);
2516 	guest_unlock_component(vm);
2517 }
2518 
drain_hyp_pool(struct hyp_pool * pool,struct kvm_hyp_memcache * mc)2519 void drain_hyp_pool(struct hyp_pool *pool, struct kvm_hyp_memcache *mc)
2520 {
2521 	WARN_ON(reclaim_hyp_pool(pool, mc, INT_MAX) != -ENOMEM);
2522 }
2523 
__pkvm_host_reclaim_page(struct pkvm_hyp_vm * vm,u64 pfn,u64 ipa,u8 order)2524 int __pkvm_host_reclaim_page(struct pkvm_hyp_vm *vm, u64 pfn, u64 ipa, u8 order)
2525 {
2526 	phys_addr_t __phys, phys = hyp_pfn_to_phys(pfn);
2527 	size_t page_size;
2528 	kvm_pte_t pte;
2529 	int ret = 0;
2530 
2531 	if (check_shl_overflow(PAGE_SIZE, order, &page_size))
2532 		return -EINVAL;
2533 
2534 	host_lock_component();
2535 	guest_lock_component(vm);
2536 
2537 	ret = guest_get_valid_pte(vm, &__phys, ipa, order, &pte);
2538 	if (ret)
2539 		goto unlock;
2540 
2541 	if (phys != __phys) {
2542 		ret = -EINVAL;
2543 		goto unlock;
2544 	}
2545 
2546 	switch ((int)guest_get_page_state(pte, ipa)) {
2547 	case PKVM_PAGE_OWNED:
2548 		WARN_ON(___host_check_page_state_range(phys, page_size, PKVM_NOPAGE,
2549 						       HOST_CHECK_IS_MEMORY));
2550 		/* No vCPUs of the guest can run, doing this prior to stage-2 unmap is OK */
2551 		hyp_poison_page(phys, page_size);
2552 		psci_mem_protect_dec(1 << order);
2553 		break;
2554 	case PKVM_PAGE_SHARED_BORROWED:
2555 	case PKVM_PAGE_SHARED_BORROWED | PKVM_PAGE_RESTRICTED_PROT:
2556 		WARN_ON(__host_check_page_state_range(phys, page_size, PKVM_PAGE_SHARED_OWNED));
2557 		break;
2558 	case PKVM_PAGE_SHARED_OWNED:
2559 		if (__host_check_page_state_range(phys, page_size, PKVM_PAGE_SHARED_BORROWED)) {
2560 			/* Presumably a page shared via FF-A, will be handled separately */
2561 			ret = -EBUSY;
2562 			goto unlock;
2563 		}
2564 		break;
2565 	default:
2566 		BUG_ON(1);
2567 	}
2568 
2569 	/* We could avoid TLB inval, it is done per VMID on the finalize path */
2570 	WARN_ON(kvm_pgtable_stage2_unmap(&vm->pgt, ipa, page_size));
2571 	WARN_ON(host_stage2_set_owner_locked(phys, page_size, PKVM_ID_HOST));
2572 
2573 unlock:
2574 	guest_unlock_component(vm);
2575 	host_unlock_component();
2576 
2577 	return ret;
2578 }
2579 
__check_ioguard_page(struct pkvm_hyp_vcpu * hyp_vcpu,u64 ipa)2580 static bool __check_ioguard_page(struct pkvm_hyp_vcpu *hyp_vcpu, u64 ipa)
2581 {
2582 	struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
2583 	kvm_pte_t pte;
2584 	s8 level;
2585 	int ret;
2586 
2587 	ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level);
2588 	if (ret)
2589 		return false;
2590 
2591 	/* Must be a PAGE_SIZE mapping with our annotation */
2592 	return (BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(level)) == PAGE_SIZE &&
2593 		pte == KVM_INVALID_PTE_MMIO_NOTE);
2594 }
2595 
__pkvm_install_ioguard_page(struct pkvm_hyp_vcpu * hyp_vcpu,u64 ipa,u64 nr_pages,u64 * nr_guarded)2596 int __pkvm_install_ioguard_page(struct pkvm_hyp_vcpu *hyp_vcpu, u64 ipa,
2597 				u64 nr_pages, u64 *nr_guarded)
2598 {
2599 	struct guest_request_walker_data data = GUEST_WALKER_DATA_INIT(PKVM_NOPAGE);
2600 	struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
2601 	struct kvm_pgtable_walker walker = {
2602 		.cb     = guest_request_walker,
2603 		.flags  = KVM_PGTABLE_WALK_LEAF,
2604 		.arg    = (void *)&data,
2605 	};
2606 	int ret;
2607 
2608 	if (!test_bit(KVM_ARCH_FLAG_MMIO_GUARD, &vm->kvm.arch.flags))
2609 		return -EINVAL;
2610 
2611 	if (!PAGE_ALIGNED(ipa))
2612 		return -EINVAL;
2613 
2614 	guest_lock_component(vm);
2615 
2616 	/* Check we either have NOMAP or NOMAP|MMIO in this range */
2617 	data.desired_mask = ~PKVM_MMIO;
2618 
2619 	ret = kvm_pgtable_walk(&vm->pgt, ipa, nr_pages << PAGE_SHIFT, &walker);
2620 	/* Walker reached data.max_ptes */
2621 	if (ret == -E2BIG)
2622 		ret = 0;
2623 	else if (ret)
2624 		goto unlock;
2625 
2626 	/*
2627 	 * Intersection between the requested region and what has been verified
2628 	 */
2629 	*nr_guarded = nr_pages = min_t(u64, data.size >> PAGE_SHIFT, nr_pages);
2630 	ret = kvm_pgtable_stage2_annotate(&vm->pgt, ipa, nr_pages << PAGE_SHIFT,
2631 					  &hyp_vcpu->vcpu.arch.stage2_mc,
2632 					  KVM_INVALID_PTE_MMIO_NOTE);
2633 
2634 unlock:
2635 	guest_unlock_component(vm);
2636 	return ret;
2637 }
2638 
__pkvm_check_ioguard_page(struct pkvm_hyp_vcpu * hyp_vcpu)2639 bool __pkvm_check_ioguard_page(struct pkvm_hyp_vcpu *hyp_vcpu)
2640 {
2641 	struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
2642 	u64 ipa, end;
2643 	bool ret;
2644 
2645 	if (!kvm_vcpu_dabt_isvalid(&hyp_vcpu->vcpu))
2646 		return false;
2647 
2648 	if (!test_bit(KVM_ARCH_FLAG_MMIO_GUARD, &vm->kvm.arch.flags))
2649 		return true;
2650 
2651 	ipa  = kvm_vcpu_get_fault_ipa(&hyp_vcpu->vcpu);
2652 	ipa |= kvm_vcpu_get_hfar(&hyp_vcpu->vcpu) & FAR_MASK;
2653 	end = ipa + kvm_vcpu_dabt_get_as(&hyp_vcpu->vcpu) - 1;
2654 
2655 	guest_lock_component(vm);
2656 	ret = __check_ioguard_page(hyp_vcpu, ipa);
2657 	if ((end & PAGE_MASK) != (ipa & PAGE_MASK))
2658 		ret &= __check_ioguard_page(hyp_vcpu, end);
2659 	guest_unlock_component(vm);
2660 
2661 	return ret;
2662 }
2663 
__pkvm_remove_ioguard_page(struct pkvm_hyp_vm * vm,u64 ipa)2664 static int __pkvm_remove_ioguard_page(struct pkvm_hyp_vm *vm, u64 ipa)
2665 {
2666 	int ret;
2667 	kvm_pte_t pte;
2668 	s8 level;
2669 
2670 	hyp_assert_lock_held(&vm->pgtable_lock);
2671 
2672 	if (!test_bit(KVM_ARCH_FLAG_MMIO_GUARD, &vm->kvm.arch.flags))
2673 		return -EINVAL;
2674 
2675 	if (!PAGE_ALIGNED(ipa))
2676 		return -EINVAL;
2677 
2678 	ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level);
2679 	if (ret)
2680 		return ret;
2681 
2682 	if (BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(level)) == PAGE_SIZE &&
2683 	    pte == KVM_INVALID_PTE_MMIO_NOTE)
2684 		return kvm_pgtable_stage2_unmap(&vm->pgt, ipa, PAGE_SIZE);
2685 
2686 	return kvm_pte_valid(pte) ? -EEXIST : -EINVAL;
2687 }
2688 
__pkvm_install_guest_mmio(struct pkvm_hyp_vcpu * hyp_vcpu,u64 pfn,u64 gfn)2689 int __pkvm_install_guest_mmio(struct pkvm_hyp_vcpu *hyp_vcpu, u64 pfn, u64 gfn)
2690 {
2691 	struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
2692 	u64 ipa = gfn << PAGE_SHIFT;
2693 	int ret;
2694 
2695 	hyp_lock_component();
2696 	guest_lock_component(vm);
2697 	ret = __pkvm_remove_ioguard_page(vm, ipa);
2698 	if (ret)
2699 		goto out_unlock;
2700 	ret = pkvm_hyp_donate_guest(hyp_vcpu, pfn, gfn);
2701 out_unlock:
2702 	guest_unlock_component(vm);
2703 	hyp_unlock_component();
2704 	return ret;
2705 }
2706 
host_stage2_get_leaf(phys_addr_t phys,kvm_pte_t * ptep,s8 * level)2707 int host_stage2_get_leaf(phys_addr_t phys, kvm_pte_t *ptep, s8 *level)
2708 {
2709 	int ret;
2710 
2711 	host_lock_component();
2712 	ret = kvm_pgtable_get_leaf(&host_mmu.pgt, phys, ptep, level);
2713 	host_unlock_component();
2714 
2715 	return ret;
2716 }
2717 
__pkvm_ptdump_get_host_config(enum pkvm_ptdump_ops op)2718 static u64 __pkvm_ptdump_get_host_config(enum pkvm_ptdump_ops op)
2719 {
2720 	u64 ret = 0;
2721 
2722 	host_lock_component();
2723 	if (op == PKVM_PTDUMP_GET_LEVEL)
2724 		ret = host_mmu.pgt.start_level;
2725 	else
2726 		ret = host_mmu.pgt.ia_bits;
2727 	host_unlock_component();
2728 
2729 	return ret;
2730 }
2731 
__pkvm_ptdump_get_guest_config(pkvm_handle_t handle,enum pkvm_ptdump_ops op)2732 static u64 __pkvm_ptdump_get_guest_config(pkvm_handle_t handle, enum pkvm_ptdump_ops op)
2733 {
2734 	struct pkvm_hyp_vm *vm;
2735 	u64 ret = 0;
2736 
2737 	vm = get_pkvm_hyp_vm(handle);
2738 	if (!vm)
2739 		return -EINVAL;
2740 
2741 	if (op == PKVM_PTDUMP_GET_LEVEL)
2742 		ret = vm->pgt.start_level;
2743 	else
2744 		ret = vm->pgt.ia_bits;
2745 
2746 	put_pkvm_hyp_vm(vm);
2747 	return ret;
2748 }
2749 
__pkvm_ptdump_get_config(pkvm_handle_t handle,enum pkvm_ptdump_ops op)2750 u64 __pkvm_ptdump_get_config(pkvm_handle_t handle, enum pkvm_ptdump_ops op)
2751 {
2752 	if (!handle)
2753 		return __pkvm_ptdump_get_host_config(op);
2754 
2755 	return __pkvm_ptdump_get_guest_config(handle, op);
2756 }
2757 
pkvm_ptdump_walker(const struct kvm_pgtable_visit_ctx * ctx,enum kvm_pgtable_walk_flags visit)2758 static int pkvm_ptdump_walker(const struct kvm_pgtable_visit_ctx *ctx,
2759 			      enum kvm_pgtable_walk_flags visit)
2760 {
2761 	struct pkvm_ptdump_log_hdr **log_hdr = ctx->arg;
2762 	ssize_t avail_space = PAGE_SIZE - (*log_hdr)->w_index - sizeof(struct pkvm_ptdump_log_hdr);
2763 	struct pkvm_ptdump_log *log;
2764 
2765 	if (avail_space < sizeof(struct pkvm_ptdump_log)) {
2766 		if ((*log_hdr)->pfn_next == INVALID_PTDUMP_PFN)
2767 			return -ENOMEM;
2768 
2769 		*log_hdr = hyp_phys_to_virt(hyp_pfn_to_phys((*log_hdr)->pfn_next));
2770 		WARN_ON((*log_hdr)->w_index);
2771 	}
2772 
2773 	log = (struct pkvm_ptdump_log *)((void *)*log_hdr + (*log_hdr)->w_index +
2774 					 sizeof(struct pkvm_ptdump_log_hdr));
2775 	log->pfn = ctx->addr >> PAGE_SHIFT;
2776 	log->valid = ctx->old & PTE_VALID;
2777 	log->r = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R, ctx->old);
2778 	log->w = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W, ctx->old);
2779 	log->xn = FIELD_GET(KVM_PTE_LEAF_ATTR_HI_S2_XN, ctx->old);
2780 	log->table = FIELD_GET(KVM_PTE_TYPE, ctx->old);
2781 	log->level = ctx->level;
2782 	log->page_state = FIELD_GET(PKVM_PAGE_STATE_PROT_MASK, ctx->old);
2783 
2784 	(*log_hdr)->w_index += sizeof(struct pkvm_ptdump_log);
2785 	return 0;
2786 }
2787 
pkvm_ptdump_teardown_log(struct pkvm_ptdump_log_hdr * log_hva,struct pkvm_ptdump_log_hdr * cur)2788 static void pkvm_ptdump_teardown_log(struct pkvm_ptdump_log_hdr *log_hva,
2789 				     struct pkvm_ptdump_log_hdr *cur)
2790 {
2791 	struct pkvm_ptdump_log_hdr *tmp, *log = (void *)kern_hyp_va(log_hva);
2792 	bool next_log_invalid = false;
2793 
2794 	while (log != cur && !next_log_invalid) {
2795 		next_log_invalid = log->pfn_next == INVALID_PTDUMP_PFN;
2796 		tmp = hyp_phys_to_virt(hyp_pfn_to_phys(log->pfn_next));
2797 		WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(log), 1));
2798 		log = tmp;
2799 	}
2800 }
2801 
pkvm_ptdump_setup_log(struct pkvm_ptdump_log_hdr * log_hva)2802 static int pkvm_ptdump_setup_log(struct pkvm_ptdump_log_hdr *log_hva)
2803 {
2804 	int ret;
2805 	struct pkvm_ptdump_log_hdr *log = (void *)kern_hyp_va(log_hva);
2806 
2807 	if (!PAGE_ALIGNED(log))
2808 		return -EINVAL;
2809 
2810 	for (;;) {
2811 		ret = __pkvm_host_donate_hyp(hyp_virt_to_pfn(log), 1);
2812 		if (ret) {
2813 			pkvm_ptdump_teardown_log(log_hva, log);
2814 			return ret;
2815 		}
2816 
2817 		log->w_index = 0;
2818 		if (log->pfn_next == INVALID_PTDUMP_PFN)
2819 			break;
2820 
2821 		log = hyp_phys_to_virt(hyp_pfn_to_phys(log->pfn_next));
2822 	}
2823 
2824 	return 0;
2825 }
2826 
pkvm_ptdump_walk_host(struct kvm_pgtable_walker * walker)2827 static int pkvm_ptdump_walk_host(struct kvm_pgtable_walker *walker)
2828 {
2829 	int ret;
2830 
2831 	host_lock_component();
2832 	ret = kvm_pgtable_walk(&host_mmu.pgt, 0, BIT(host_mmu.pgt.ia_bits), walker);
2833 	host_unlock_component();
2834 
2835 	return ret;
2836 }
2837 
pkvm_ptdump_walk_guest(struct pkvm_hyp_vm * vm,struct kvm_pgtable_walker * walker)2838 static int pkvm_ptdump_walk_guest(struct pkvm_hyp_vm *vm, struct kvm_pgtable_walker *walker)
2839 {
2840 	int ret;
2841 
2842 	guest_lock_component(vm);
2843 
2844 	ret = kvm_pgtable_walk(&vm->pgt, 0, BIT(vm->pgt.ia_bits), walker);
2845 
2846 	guest_unlock_component(vm);
2847 
2848 	return ret;
2849 }
2850 
__pkvm_ptdump_walk_range(pkvm_handle_t handle,struct pkvm_ptdump_log_hdr * log)2851 u64 __pkvm_ptdump_walk_range(pkvm_handle_t handle, struct pkvm_ptdump_log_hdr *log)
2852 {
2853 	struct pkvm_hyp_vm *vm;
2854 	int ret;
2855 	struct pkvm_ptdump_log_hdr *log_hyp = kern_hyp_va(log);
2856 	struct kvm_pgtable_walker walker = {
2857 		.cb     = pkvm_ptdump_walker,
2858 		.flags  = KVM_PGTABLE_WALK_LEAF,
2859 		.arg    = &log_hyp,
2860 	};
2861 
2862 	ret = pkvm_ptdump_setup_log(log);
2863 	if (ret)
2864 		return ret;
2865 
2866 	if (!handle)
2867 		ret = pkvm_ptdump_walk_host(&walker);
2868 	else {
2869 		vm = get_pkvm_hyp_vm(handle);
2870 		if (!vm) {
2871 			ret = -EINVAL;
2872 			goto teardown;
2873 		}
2874 
2875 		ret = pkvm_ptdump_walk_guest(vm, &walker);
2876 		put_pkvm_hyp_vm(vm);
2877 	}
2878 teardown:
2879 	pkvm_ptdump_teardown_log(log, NULL);
2880 	return ret;
2881 }
2882 
2883 /* Return PA for an owned guest IPA or request it, and repeat the guest HVC */
pkvm_get_guest_pa_request(struct pkvm_hyp_vcpu * hyp_vcpu,u64 ipa,size_t ipa_size_request,u64 * out_pa,s8 * out_level)2884 int pkvm_get_guest_pa_request(struct pkvm_hyp_vcpu *hyp_vcpu, u64 ipa,
2885 			      size_t ipa_size_request, u64 *out_pa, s8 *out_level)
2886 {
2887 	struct kvm_hyp_req *req;
2888 	kvm_pte_t pte;
2889 	enum pkvm_page_state state;
2890 	struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
2891 
2892 	guest_lock_component(vm);
2893 	WARN_ON(kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, out_level));
2894 	guest_unlock_component(vm);
2895 	if (!kvm_pte_valid(pte)) {
2896 		/* Page not mapped, create a request*/
2897 		req = pkvm_hyp_req_reserve(hyp_vcpu, KVM_HYP_REQ_TYPE_MAP);
2898 		if (!req)
2899 			return -ENOMEM;
2900 
2901 		req->map.guest_ipa = ipa;
2902 		req->map.size = ipa_size_request;
2903 		return -ENOENT;
2904 	}
2905 
2906 	state = pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte));
2907 	if (state != PKVM_PAGE_OWNED)
2908 		return -EPERM;
2909 
2910 	*out_pa = kvm_pte_to_phys(pte);
2911 	*out_pa |= ipa & (kvm_granule_size(*out_level) - 1) & PAGE_MASK;
2912 	return 0;
2913 }
2914 
2915 /* Get a PA and use the page for DMA */
pkvm_get_guest_pa_request_use_dma(struct pkvm_hyp_vcpu * hyp_vcpu,u64 ipa,size_t ipa_size_request,u64 * out_pa,s8 * level)2916 int pkvm_get_guest_pa_request_use_dma(struct pkvm_hyp_vcpu *hyp_vcpu, u64 ipa,
2917 				      size_t ipa_size_request, u64 *out_pa, s8 *level)
2918 {
2919 	int ret;
2920 
2921 	host_lock_component();
2922 	ret = pkvm_get_guest_pa_request(hyp_vcpu, ipa, ipa_size_request,
2923 					out_pa, level);
2924 	if (ret)
2925 		goto out_ret;
2926 	WARN_ON(__pkvm_use_dma_locked(*out_pa, kvm_granule_size(*level), hyp_vcpu));
2927 out_ret:
2928 	host_unlock_component();
2929 	return ret;
2930 }
2931 
2932 #ifdef CONFIG_PKVM_SELFTESTS
2933 struct pkvm_expected_state {
2934 	enum pkvm_page_state host;
2935 	enum pkvm_page_state hyp;
2936 	enum pkvm_page_state guest[2]; /* [ gfn, gfn + 1 ] */
2937 };
2938 
2939 static struct pkvm_expected_state selftest_state;
2940 static struct hyp_page *selftest_page;
2941 
2942 static struct pkvm_hyp_vm selftest_vm = {
2943 	.kvm = {
2944 		.arch = {
2945 			.mmu = {
2946 				.arch = &selftest_vm.kvm.arch,
2947 				.pgt = &selftest_vm.pgt,
2948 			},
2949 		},
2950 	},
2951 };
2952 
2953 static struct pkvm_hyp_vcpu selftest_vcpu = {
2954 	.vcpu = {
2955 		.arch = {
2956 			.hw_mmu = &selftest_vm.kvm.arch.mmu,
2957 		},
2958 		.kvm = &selftest_vm.kvm,
2959 	},
2960 };
2961 
init_selftest_vm(void * virt)2962 static void init_selftest_vm(void *virt)
2963 {
2964 	struct hyp_page *p = hyp_virt_to_page(virt);
2965 	int i;
2966 
2967 	selftest_vm.kvm.arch.mmu.vtcr = host_mmu.arch.mmu.vtcr;
2968 	WARN_ON(kvm_guest_prepare_stage2(&selftest_vm, virt));
2969 
2970 	for (i = 0; i < pkvm_selftest_pages(); i++) {
2971 		if (p[i].refcount)
2972 			continue;
2973 		p[i].refcount = 1;
2974 		hyp_put_page(&selftest_vm.pool, hyp_page_to_virt(&p[i]));
2975 	}
2976 }
2977 
teardown_selftest_vm(void)2978 static void teardown_selftest_vm(void)
2979 {
2980 	destroy_hyp_vm_pgt(&selftest_vm);
2981 }
2982 
selftest_ipa(void)2983 static u64 selftest_ipa(void)
2984 {
2985 	return BIT(selftest_vm.pgt.ia_bits - 1);
2986 }
2987 
assert_page_state(void)2988 static void assert_page_state(void)
2989 {
2990 	void *virt = hyp_page_to_virt(selftest_page);
2991 	u64 size = PAGE_SIZE << selftest_page->order;
2992 	struct pkvm_hyp_vcpu *vcpu = &selftest_vcpu;
2993 	u64 phys = hyp_virt_to_phys(virt);
2994 	u64 ipa[2] = { selftest_ipa(), selftest_ipa() + PAGE_SIZE };
2995 
2996 	host_lock_component();
2997 	WARN_ON(__host_check_page_state_range(phys, size, selftest_state.host));
2998 	host_unlock_component();
2999 
3000 	hyp_lock_component();
3001 	WARN_ON(__hyp_check_page_state_range((u64)virt, size, selftest_state.hyp));
3002 	hyp_unlock_component();
3003 
3004 	guest_lock_component(&selftest_vm);
3005 	WARN_ON(__guest_check_page_state_range(vcpu, ipa[0], size, selftest_state.guest[0]));
3006 	WARN_ON(__guest_check_page_state_range(vcpu, ipa[1], size, selftest_state.guest[1]));
3007 	guest_unlock_component(&selftest_vm);
3008 }
3009 
3010 #define assert_transition_res(res, fn, ...)		\
3011 	do {						\
3012 		WARN_ON(fn(__VA_ARGS__) != res);	\
3013 		assert_page_state();			\
3014 	} while (0)
3015 
pkvm_ownership_selftest(void * base)3016 void pkvm_ownership_selftest(void *base)
3017 {
3018 	enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_RWX;
3019 	void *virt = hyp_alloc_pages(&host_s2_pool, 0);
3020 	struct pkvm_hyp_vcpu *vcpu = &selftest_vcpu;
3021 	struct pkvm_hyp_vm *vm = &selftest_vm;
3022 	u64 phys, size, pfn, gfn, pa;
3023 
3024 	WARN_ON(!virt);
3025 	selftest_page = hyp_virt_to_page(virt);
3026 	selftest_page->refcount = 0;
3027 	init_selftest_vm(base);
3028 
3029 	size = PAGE_SIZE << selftest_page->order;
3030 	phys = hyp_virt_to_phys(virt);
3031 	pfn = hyp_phys_to_pfn(phys);
3032 	gfn = hyp_phys_to_pfn(selftest_ipa());
3033 
3034 	selftest_state.host = PKVM_NOPAGE;
3035 	selftest_state.hyp = PKVM_PAGE_OWNED;
3036 	selftest_state.guest[0] = selftest_state.guest[1] = PKVM_NOPAGE;
3037 	assert_page_state();
3038 	assert_transition_res(-EPERM,	__pkvm_host_donate_hyp, pfn, 1);
3039 	assert_transition_res(-EPERM,	__pkvm_host_share_hyp, pfn);
3040 	assert_transition_res(-EPERM,	__pkvm_host_unshare_hyp, pfn);
3041 	assert_transition_res(-EPERM,	__pkvm_host_share_ffa, pfn, 1);
3042 	assert_transition_res(-EPERM,	__pkvm_host_unshare_ffa, pfn, 1);
3043 	assert_transition_res(-EPERM,	hyp_pin_shared_mem, virt, virt + size);
3044 	assert_transition_res(-EPERM,	__pkvm_host_share_guest, pfn, gfn, vcpu, prot, 1);
3045 	assert_transition_res(-ENOENT,	__pkvm_host_unshare_guest, gfn, vm, 1);
3046 	assert_transition_res(-EPERM,	__pkvm_host_donate_guest, pfn, gfn, vcpu, 1);
3047 
3048 	selftest_state.host = PKVM_PAGE_OWNED;
3049 	selftest_state.hyp = PKVM_NOPAGE;
3050 	assert_transition_res(0,	__pkvm_hyp_donate_host, pfn, 1);
3051 	assert_transition_res(-EPERM,	__pkvm_hyp_donate_host, pfn, 1);
3052 	assert_transition_res(-EPERM,	__pkvm_host_unshare_hyp, pfn);
3053 	assert_transition_res(-EPERM,	__pkvm_host_unshare_ffa, pfn, 1);
3054 	assert_transition_res(-ENOENT,	__pkvm_host_unshare_guest, gfn, vm, 1);
3055 	assert_transition_res(-EPERM,	hyp_pin_shared_mem, virt, virt + size);
3056 
3057 	selftest_state.host = PKVM_PAGE_SHARED_OWNED;
3058 	selftest_state.hyp = PKVM_PAGE_SHARED_BORROWED;
3059 	assert_transition_res(0,	__pkvm_host_share_hyp, pfn);
3060 	assert_transition_res(-EPERM,	__pkvm_host_share_hyp, pfn);
3061 	assert_transition_res(-EPERM,	__pkvm_host_donate_hyp, pfn, 1);
3062 	assert_transition_res(-EPERM,	__pkvm_host_share_ffa, pfn, 1);
3063 	assert_transition_res(-EPERM,	__pkvm_hyp_donate_host, pfn, 1);
3064 	assert_transition_res(-EPERM,	__pkvm_host_share_guest, pfn, gfn, vcpu, prot, 1);
3065 	assert_transition_res(-ENOENT,	__pkvm_host_unshare_guest, gfn, vm, 1);
3066 	assert_transition_res(-EPERM,	__pkvm_host_donate_guest, pfn, gfn, vcpu, 1);
3067 
3068 	assert_transition_res(0,	hyp_pin_shared_mem, virt, virt + size);
3069 	assert_transition_res(0,	hyp_pin_shared_mem, virt, virt + size);
3070 	hyp_unpin_shared_mem(virt, virt + size);
3071 	WARN_ON(hyp_page_count(virt) != 1);
3072 	assert_transition_res(-EBUSY,	__pkvm_host_unshare_hyp, pfn);
3073 	assert_transition_res(-EPERM,	__pkvm_host_share_hyp, pfn);
3074 	assert_transition_res(-EPERM,	__pkvm_host_donate_hyp, pfn, 1);
3075 	assert_transition_res(-EPERM,	__pkvm_host_share_ffa, pfn, 1);
3076 	assert_transition_res(-EPERM,	__pkvm_hyp_donate_host, pfn, 1);
3077 	assert_transition_res(-EPERM,	__pkvm_host_share_guest, pfn, gfn, vcpu, prot, 1);
3078 	assert_transition_res(-ENOENT,	__pkvm_host_unshare_guest, gfn, vm, 1);
3079 	assert_transition_res(-EPERM,	__pkvm_host_donate_guest, pfn, gfn, vcpu, 1);
3080 
3081 	hyp_unpin_shared_mem(virt, virt + size);
3082 	assert_page_state();
3083 	WARN_ON(hyp_page_count(virt));
3084 
3085 	selftest_state.host = PKVM_PAGE_OWNED;
3086 	selftest_state.hyp = PKVM_NOPAGE;
3087 	assert_transition_res(0,	__pkvm_host_unshare_hyp, pfn);
3088 
3089 	selftest_state.host = PKVM_PAGE_SHARED_OWNED;
3090 	selftest_state.hyp = PKVM_NOPAGE;
3091 	assert_transition_res(0,	__pkvm_host_share_ffa, pfn, 1);
3092 	assert_transition_res(-EPERM,	__pkvm_host_share_ffa, pfn, 1);
3093 	assert_transition_res(-EPERM,	__pkvm_host_donate_hyp, pfn, 1);
3094 	assert_transition_res(-EPERM,	__pkvm_host_share_hyp, pfn);
3095 	assert_transition_res(-EPERM,	__pkvm_host_unshare_hyp, pfn);
3096 	assert_transition_res(-EPERM,	__pkvm_hyp_donate_host, pfn, 1);
3097 	assert_transition_res(-EPERM,	__pkvm_host_share_guest, pfn, gfn, vcpu, prot, 1);
3098 	assert_transition_res(-ENOENT,	__pkvm_host_unshare_guest, gfn, vm, 1);
3099 	assert_transition_res(-EPERM,	__pkvm_host_donate_guest, pfn, gfn, vcpu, 1);
3100 	assert_transition_res(-EPERM,	hyp_pin_shared_mem, virt, virt + size);
3101 
3102 	selftest_state.host = PKVM_PAGE_OWNED;
3103 	selftest_state.hyp = PKVM_NOPAGE;
3104 	assert_transition_res(0,	__pkvm_host_unshare_ffa, pfn, 1);
3105 	assert_transition_res(-EPERM,	__pkvm_host_unshare_ffa, pfn, 1);
3106 
3107 	selftest_state.host = PKVM_PAGE_SHARED_OWNED;
3108 	selftest_state.guest[0] = PKVM_PAGE_SHARED_BORROWED;
3109 	assert_transition_res(0,	__pkvm_host_share_guest, pfn, gfn, vcpu, prot, 1);
3110 	assert_transition_res(-EPERM,	__pkvm_host_share_guest, pfn, gfn, vcpu, prot, 1);
3111 	assert_transition_res(-EPERM,	__pkvm_host_share_ffa, pfn, 1);
3112 	assert_transition_res(-EPERM,	__pkvm_host_donate_hyp, pfn, 1);
3113 	assert_transition_res(-EPERM,	__pkvm_host_share_hyp, pfn);
3114 	assert_transition_res(-EPERM,	__pkvm_host_unshare_hyp, pfn);
3115 	assert_transition_res(-EPERM,	__pkvm_hyp_donate_host, pfn, 1);
3116 	assert_transition_res(-EPERM,	__pkvm_host_donate_guest, pfn, gfn, vcpu, 1);
3117 	assert_transition_res(-EPERM,	hyp_pin_shared_mem, virt, virt + size);
3118 
3119 	selftest_state.guest[1] = PKVM_PAGE_SHARED_BORROWED;
3120 	assert_transition_res(0,	__pkvm_host_share_guest, pfn, gfn + 1, vcpu, prot, 1);
3121 	WARN_ON(hyp_virt_to_page(virt)->host_share_guest_count != 2);
3122 
3123 	selftest_state.guest[0] = PKVM_NOPAGE;
3124 	assert_transition_res(0,	__pkvm_host_unshare_guest, gfn, vm, 1);
3125 
3126 	selftest_state.guest[1] = PKVM_NOPAGE;
3127 	selftest_state.host = PKVM_PAGE_OWNED;
3128 	assert_transition_res(0,	__pkvm_host_unshare_guest, gfn + 1, vm, 1);
3129 
3130 	selftest_vm.kvm.arch.pkvm.enabled = true;
3131 	selftest_state.host = PKVM_NOPAGE;
3132 	selftest_state.guest[0] = PKVM_PAGE_OWNED;
3133 	assert_transition_res(0,	__pkvm_host_donate_guest, pfn, gfn, vcpu, 1);
3134 	assert_transition_res(-EPERM,	__pkvm_host_donate_guest, pfn, gfn, vcpu, 1);
3135 	assert_transition_res(-EPERM,	__pkvm_host_donate_guest, pfn, gfn + 1, vcpu, 1);
3136 	assert_transition_res(-EPERM,	__pkvm_host_share_guest, pfn, gfn, vcpu, prot, 1);
3137 	assert_transition_res(-EPERM,	__pkvm_host_share_guest, pfn, gfn + 1, vcpu, prot, 1);
3138 	assert_transition_res(-EPERM,	__pkvm_host_share_ffa, pfn, 1);
3139 	assert_transition_res(-EPERM,	__pkvm_host_donate_hyp, pfn, 1);
3140 	assert_transition_res(-EPERM,	__pkvm_host_share_hyp, pfn);
3141 	assert_transition_res(-EPERM,	__pkvm_host_unshare_hyp, pfn);
3142 	assert_transition_res(-EPERM,	__pkvm_hyp_donate_host, pfn, 1);
3143 
3144 	selftest_state.host = PKVM_PAGE_OWNED;
3145 	selftest_state.guest[0] = PKVM_NOPAGE;
3146 	assert_transition_res(0,	__pkvm_guest_relinquish_to_host, vcpu, gfn * PAGE_SIZE, &pa);
3147 	WARN_ON(pa != phys);
3148 
3149 	selftest_state.host = PKVM_NOPAGE;
3150 	selftest_state.hyp = PKVM_PAGE_OWNED;
3151 	assert_transition_res(0,	__pkvm_host_donate_hyp, pfn, 1);
3152 
3153 	teardown_selftest_vm();
3154 	selftest_page->refcount = 1;
3155 	hyp_put_page(&host_s2_pool, virt);
3156 }
3157 #endif
3158