1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (C) 2020 Google LLC
4 * Author: Quentin Perret <qperret@google.com>
5 */
6
7 #include <linux/kvm_host.h>
8 #include <asm/kvm_emulate.h>
9 #include <asm/kvm_hyp.h>
10 #include <asm/kvm_hypevents.h>
11 #include <asm/kvm_mmu.h>
12 #include <asm/kvm_pgtable.h>
13 #include <asm/kvm_pkvm.h>
14 #include <asm/stage2_pgtable.h>
15
16 #include <hyp/fault.h>
17
18 #include <nvhe/gfp.h>
19 #include <nvhe/iommu.h>
20 #include <nvhe/memory.h>
21 #include <nvhe/mem_protect.h>
22 #include <nvhe/mm.h>
23 #include <nvhe/modules.h>
24
25 #define KVM_HOST_S2_FLAGS (KVM_PGTABLE_S2_NOFWB | \
26 KVM_PGTABLE_S2_IDMAP | \
27 KVM_PGTABLE_S2_PREFAULT_BLOCK)
28
29 struct host_mmu host_mmu;
30
31 struct pkvm_moveable_reg pkvm_moveable_regs[PKVM_NR_MOVEABLE_REGS];
32 unsigned int pkvm_moveable_regs_nr;
33
34 static struct hyp_pool host_s2_pool;
35
36 static DEFINE_PER_CPU(struct pkvm_hyp_vm *, __current_vm);
37 #define current_vm (*this_cpu_ptr(&__current_vm))
38
39 static struct kvm_pgtable_pte_ops host_s2_pte_ops;
40 static bool host_stage2_force_pte(u64 addr, u64 end, enum kvm_pgtable_prot prot);
41 static bool host_stage2_pte_is_counted(kvm_pte_t pte, u32 level);
42 static bool guest_stage2_force_pte_cb(u64 addr, u64 end,
43 enum kvm_pgtable_prot prot);
44 static bool guest_stage2_pte_is_counted(kvm_pte_t pte, u32 level);
45
46 static struct kvm_pgtable_pte_ops guest_s2_pte_ops = {
47 .force_pte_cb = guest_stage2_force_pte_cb,
48 .pte_is_counted_cb = guest_stage2_pte_is_counted
49 };
50
guest_lock_component(struct pkvm_hyp_vm * vm)51 static void guest_lock_component(struct pkvm_hyp_vm *vm)
52 {
53 hyp_spin_lock(&vm->pgtable_lock);
54 current_vm = vm;
55 }
56
guest_unlock_component(struct pkvm_hyp_vm * vm)57 static void guest_unlock_component(struct pkvm_hyp_vm *vm)
58 {
59 current_vm = NULL;
60 hyp_spin_unlock(&vm->pgtable_lock);
61 }
62
host_lock_component(void)63 static void host_lock_component(void)
64 {
65 hyp_spin_lock(&host_mmu.lock);
66 }
67
host_unlock_component(void)68 static void host_unlock_component(void)
69 {
70 hyp_spin_unlock(&host_mmu.lock);
71 }
72
hyp_lock_component(void)73 static void hyp_lock_component(void)
74 {
75 hyp_spin_lock(&pkvm_pgd_lock);
76 }
77
hyp_unlock_component(void)78 static void hyp_unlock_component(void)
79 {
80 hyp_spin_unlock(&pkvm_pgd_lock);
81 }
82
host_s2_zalloc_pages_exact(size_t size)83 static void *host_s2_zalloc_pages_exact(size_t size)
84 {
85 void *addr = hyp_alloc_pages(&host_s2_pool, get_order(size));
86
87 hyp_split_page(hyp_virt_to_page(addr));
88
89 /*
90 * The size of concatenated PGDs is always a power of two of PAGE_SIZE,
91 * so there should be no need to free any of the tail pages to make the
92 * allocation exact.
93 */
94 WARN_ON(size != (PAGE_SIZE << get_order(size)));
95
96 return addr;
97 }
98
host_s2_zalloc_page(void * pool)99 static void *host_s2_zalloc_page(void *pool)
100 {
101 return hyp_alloc_pages(pool, 0);
102 }
103
host_s2_get_page(void * addr)104 static void host_s2_get_page(void *addr)
105 {
106 hyp_get_page(&host_s2_pool, addr);
107 }
108
host_s2_put_page(void * addr)109 static void host_s2_put_page(void *addr)
110 {
111 hyp_put_page(&host_s2_pool, addr);
112 }
113
host_s2_free_unlinked_table(void * addr,s8 level)114 static void host_s2_free_unlinked_table(void *addr, s8 level)
115 {
116 kvm_pgtable_stage2_free_unlinked(&host_mmu.mm_ops, host_mmu.pgt.pte_ops,
117 addr, level);
118 }
119
prepare_s2_pool(void * pgt_pool_base)120 static int prepare_s2_pool(void *pgt_pool_base)
121 {
122 unsigned long nr_pages, pfn;
123 int ret;
124
125 pfn = hyp_virt_to_pfn(pgt_pool_base);
126 nr_pages = host_s2_pgtable_pages();
127 ret = hyp_pool_init(&host_s2_pool, pfn, nr_pages, 0);
128 if (ret)
129 return ret;
130
131 host_mmu.mm_ops = (struct kvm_pgtable_mm_ops) {
132 .zalloc_pages_exact = host_s2_zalloc_pages_exact,
133 .zalloc_page = host_s2_zalloc_page,
134 .free_unlinked_table = host_s2_free_unlinked_table,
135 .phys_to_virt = hyp_phys_to_virt,
136 .virt_to_phys = hyp_virt_to_phys,
137 .page_count = hyp_page_count,
138 .get_page = host_s2_get_page,
139 .put_page = host_s2_put_page,
140 };
141
142 return 0;
143 }
144
prepare_host_vtcr(void)145 static void prepare_host_vtcr(void)
146 {
147 u32 parange, phys_shift;
148
149 /* The host stage 2 is id-mapped, so use parange for T0SZ */
150 parange = kvm_get_parange(id_aa64mmfr0_el1_sys_val);
151 phys_shift = id_aa64mmfr0_parange_to_phys_shift(parange);
152
153 host_mmu.arch.mmu.vtcr = kvm_get_vtcr(id_aa64mmfr0_el1_sys_val,
154 id_aa64mmfr1_el1_sys_val, phys_shift);
155 }
156
prepopulate_host_stage2(void)157 static int prepopulate_host_stage2(void)
158 {
159 struct memblock_region *reg;
160 int i, ret = 0;
161
162 for (i = 0; i < hyp_memblock_nr; i++) {
163 reg = &hyp_memory[i];
164 ret = host_stage2_idmap_locked(reg->base, reg->size, PKVM_HOST_MEM_PROT, false);
165 if (ret)
166 return ret;
167 }
168
169 return ret;
170 }
171
kvm_host_prepare_stage2(void * pgt_pool_base)172 int kvm_host_prepare_stage2(void *pgt_pool_base)
173 {
174 struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu;
175 int ret;
176
177 prepare_host_vtcr();
178 hyp_spin_lock_init(&host_mmu.lock);
179 mmu->arch = &host_mmu.arch;
180
181 ret = prepare_s2_pool(pgt_pool_base);
182 if (ret)
183 return ret;
184
185 host_s2_pte_ops.force_pte_cb = host_stage2_force_pte;
186 host_s2_pte_ops.pte_is_counted_cb = host_stage2_pte_is_counted;
187
188 ret = __kvm_pgtable_stage2_init(&host_mmu.pgt, mmu,
189 &host_mmu.mm_ops, KVM_HOST_S2_FLAGS,
190 &host_s2_pte_ops);
191 if (ret)
192 return ret;
193
194 mmu->pgd_phys = __hyp_pa(host_mmu.pgt.pgd);
195 mmu->pgt = &host_mmu.pgt;
196 atomic64_set(&mmu->vmid.id, 0);
197
198 return prepopulate_host_stage2();
199 }
200
guest_stage2_force_pte_cb(u64 addr,u64 end,enum kvm_pgtable_prot prot)201 static bool guest_stage2_force_pte_cb(u64 addr, u64 end,
202 enum kvm_pgtable_prot prot)
203 {
204 return false;
205 }
206
guest_stage2_pte_is_counted(kvm_pte_t pte,u32 level)207 static bool guest_stage2_pte_is_counted(kvm_pte_t pte, u32 level)
208 {
209 /*
210 * The refcount tracks valid entries as well as invalid entries if they
211 * encode ownership of a page to another entity than the page-table
212 * owner, whose id is 0.
213 */
214 return !!pte;
215 }
216
guest_s2_zalloc_pages_exact(size_t size)217 static void *guest_s2_zalloc_pages_exact(size_t size)
218 {
219 void *addr = hyp_alloc_pages(¤t_vm->pool, get_order(size));
220
221 WARN_ON(!addr || size != (PAGE_SIZE << get_order(size)));
222 hyp_split_page(hyp_virt_to_page(addr));
223
224 return addr;
225 }
226
guest_s2_free_pages_exact(void * addr,unsigned long size)227 static void guest_s2_free_pages_exact(void *addr, unsigned long size)
228 {
229 u8 order = get_order(size);
230 unsigned int i;
231
232 for (i = 0; i < (1 << order); i++)
233 hyp_put_page(¤t_vm->pool, addr + (i * PAGE_SIZE));
234 }
235
guest_s2_zalloc_page(void * mc)236 static void *guest_s2_zalloc_page(void *mc)
237 {
238 struct hyp_page *p;
239 void *addr;
240 unsigned long order;
241
242 addr = hyp_alloc_pages(¤t_vm->pool, 0);
243 if (addr)
244 return addr;
245
246 addr = pop_hyp_memcache(mc, hyp_phys_to_virt, &order);
247 if (!addr)
248 return addr;
249
250 WARN_ON(order);
251 memset(addr, 0, PAGE_SIZE);
252 p = hyp_virt_to_page(addr);
253 hyp_set_page_refcounted(p);
254 p->order = 0;
255
256 return addr;
257 }
258
guest_s2_get_page(void * addr)259 static void guest_s2_get_page(void *addr)
260 {
261 hyp_get_page(¤t_vm->pool, addr);
262 }
263
guest_s2_put_page(void * addr)264 static void guest_s2_put_page(void *addr)
265 {
266 hyp_put_page(¤t_vm->pool, addr);
267 }
268
__fixmap_guest_page(void * va,size_t * size)269 static void *__fixmap_guest_page(void *va, size_t *size)
270 {
271 void *addr;
272
273 if (WARN_ON(!IS_ALIGNED(*size, *size)))
274 return NULL;
275
276 if (IS_ALIGNED(*size, PMD_SIZE)) {
277 addr = hyp_fixblock_map(__hyp_pa(va));
278 if (addr)
279 return addr;
280
281 *size = PAGE_SIZE;
282 }
283
284 if (IS_ALIGNED(*size, PAGE_SIZE))
285 return hyp_fixmap_map(__hyp_pa(va));
286
287 WARN_ON(1);
288
289 return NULL;
290 }
291
__fixunmap_guest_page(size_t size)292 static void __fixunmap_guest_page(size_t size)
293 {
294 switch (size) {
295 case PAGE_SIZE:
296 hyp_fixmap_unmap();
297 break;
298 case PMD_SIZE:
299 hyp_fixblock_unmap();
300 break;
301 default:
302 BUG();
303 }
304 }
305
clean_dcache_guest_page(void * va,size_t size)306 static void clean_dcache_guest_page(void *va, size_t size)
307 {
308 while (size) {
309 size_t __size = size == PMD_SIZE ? size : PAGE_SIZE;
310 void *addr = __fixmap_guest_page(va, &__size);
311
312 __clean_dcache_guest_page(addr, __size);
313 __fixunmap_guest_page(__size);
314
315 size -= __size;
316 va += __size;
317 }
318 }
319
invalidate_icache_guest_page(void * va,size_t size)320 static void invalidate_icache_guest_page(void *va, size_t size)
321 {
322 while (size) {
323 size_t __size = size == PMD_SIZE ? size : PAGE_SIZE;
324 void *addr = __fixmap_guest_page(va, &__size);
325
326 __invalidate_icache_guest_page(addr, __size);
327 __fixunmap_guest_page(__size);
328
329 size -= __size;
330 va += __size;
331 }
332 }
333
kvm_guest_prepare_stage2(struct pkvm_hyp_vm * vm,void * pgd)334 int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd)
335 {
336 struct kvm_s2_mmu *mmu = &vm->kvm.arch.mmu;
337 unsigned long nr_pages;
338 int ret;
339
340 nr_pages = kvm_pgtable_stage2_pgd_size(mmu->vtcr) >> PAGE_SHIFT;
341 ret = hyp_pool_init(&vm->pool, hyp_virt_to_pfn(pgd), nr_pages, 0);
342 if (ret)
343 return ret;
344
345 hyp_spin_lock_init(&vm->pgtable_lock);
346 vm->mm_ops = (struct kvm_pgtable_mm_ops) {
347 .zalloc_pages_exact = guest_s2_zalloc_pages_exact,
348 .free_pages_exact = guest_s2_free_pages_exact,
349 .zalloc_page = guest_s2_zalloc_page,
350 .phys_to_virt = hyp_phys_to_virt,
351 .virt_to_phys = hyp_virt_to_phys,
352 .page_count = hyp_page_count,
353 .get_page = guest_s2_get_page,
354 .put_page = guest_s2_put_page,
355 .dcache_clean_inval_poc = clean_dcache_guest_page,
356 .icache_inval_pou = invalidate_icache_guest_page,
357 };
358
359 guest_lock_component(vm);
360 ret = __kvm_pgtable_stage2_init(mmu->pgt, mmu, &vm->mm_ops,
361 KVM_PGTABLE_S2_PREFAULT_BLOCK,
362 &guest_s2_pte_ops);
363 guest_unlock_component(vm);
364 if (ret)
365 return ret;
366
367 vm->kvm.arch.mmu.pgd_phys = __hyp_pa(vm->pgt.pgd);
368
369 return 0;
370 }
371
guest_get_page_state(kvm_pte_t pte,u64 addr)372 static enum pkvm_page_state guest_get_page_state(kvm_pte_t pte, u64 addr)
373 {
374 enum pkvm_page_state state = 0;
375 enum kvm_pgtable_prot prot;
376
377 if (!kvm_pte_valid(pte)) {
378 state = PKVM_NOPAGE;
379
380 if (pte == KVM_INVALID_PTE_MMIO_NOTE)
381 state |= PKVM_MMIO;
382
383 return state;
384 }
385
386 prot = kvm_pgtable_stage2_pte_prot(pte);
387 if (kvm_pte_valid(pte) && ((prot & KVM_PGTABLE_PROT_RWX) != KVM_PGTABLE_PROT_RWX))
388 state = PKVM_PAGE_RESTRICTED_PROT;
389
390 return state | pkvm_getstate(prot);
391 }
392
__pkvm_guest_relinquish_to_host(struct pkvm_hyp_vcpu * vcpu,u64 ipa,u64 * ppa)393 int __pkvm_guest_relinquish_to_host(struct pkvm_hyp_vcpu *vcpu,
394 u64 ipa, u64 *ppa)
395 {
396 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
397 enum pkvm_page_state state;
398 u64 phys = 0, addr;
399 kvm_pte_t pte;
400 s8 level;
401 int ret;
402
403 if (!pkvm_hyp_vcpu_is_protected(vcpu))
404 return 0;
405
406 host_lock_component();
407 guest_lock_component(vm);
408
409 ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level);
410 if (ret || !kvm_pte_valid(pte))
411 goto end;
412
413 /* We don't support splitting non-leaf mappings */
414 if (level != KVM_PGTABLE_LAST_LEVEL) {
415 ret = -E2BIG;
416 goto end;
417 }
418
419 state = guest_get_page_state(pte, ipa);
420 if (state != PKVM_PAGE_OWNED) {
421 ret = -EPERM;
422 goto end;
423 }
424
425 addr = ALIGN_DOWN(ipa, kvm_granule_size(level));
426 phys = kvm_pte_to_phys(pte);
427 phys += ipa - addr;
428 /* page might be used for DMA! */
429 if (hyp_page_count(hyp_phys_to_virt(phys))) {
430 ret = -EBUSY;
431 goto end;
432 }
433
434 /* Zap the guest stage2 pte and return ownership to the host */
435 ret = kvm_pgtable_stage2_unmap(&vm->pgt, ipa, PAGE_SIZE);
436 if (ret)
437 goto end;
438
439 hyp_poison_page(phys, PAGE_SIZE);
440 psci_mem_protect_dec(1);
441
442 WARN_ON(host_stage2_set_owner_locked(phys, PAGE_SIZE, PKVM_ID_HOST));
443
444 if (pkvm_ipa_range_has_pvmfw(vm, ipa, ipa + PAGE_SIZE))
445 vm->kvm.arch.pkvm.pvmfw_load_addr = PVMFW_INVALID_LOAD_ADDR;
446
447 end:
448 guest_unlock_component(vm);
449 host_unlock_component();
450
451 *ppa = phys;
452
453 return ret;
454 }
455
__pkvm_prot_finalize(void)456 int __pkvm_prot_finalize(void)
457 {
458 struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu;
459 struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params);
460
461 if (params->hcr_el2 & HCR_VM)
462 return -EPERM;
463
464 params->vttbr = kvm_get_vttbr(mmu);
465 params->vtcr = mmu->vtcr;
466 params->hcr_el2 |= HCR_VM;
467
468 /*
469 * The CMO below not only cleans the updated params to the
470 * PoC, but also provides the DSB that ensures ongoing
471 * page-table walks that have started before we trapped to EL2
472 * have completed.
473 */
474 kvm_flush_dcache_to_poc(params, sizeof(*params));
475
476 write_sysreg(params->hcr_el2, hcr_el2);
477 __load_stage2(&host_mmu.arch.mmu, &host_mmu.arch);
478
479 /*
480 * Make sure to have an ISB before the TLB maintenance below but only
481 * when __load_stage2() doesn't include one already.
482 */
483 asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT));
484
485 /* Invalidate stale HCR bits that may be cached in TLBs */
486 __tlbi(vmalls12e1);
487 dsb(nsh);
488 isb();
489
490 __pkvm_close_module_registration();
491
492 return 0;
493 }
494
host_stage2_unmap_reg_locked(phys_addr_t start,u64 size)495 int host_stage2_unmap_reg_locked(phys_addr_t start, u64 size)
496 {
497 hyp_assert_lock_held(&host_mmu.lock);
498
499 return kvm_pgtable_stage2_reclaim_leaves(&host_mmu.pgt, start, size);
500 }
501
host_stage2_unmap_unmoveable_regs(void)502 static int host_stage2_unmap_unmoveable_regs(void)
503 {
504 struct kvm_pgtable *pgt = &host_mmu.pgt;
505 struct pkvm_moveable_reg *reg;
506 u64 addr = 0;
507 int i, ret;
508
509 /* Unmap all unmoveable regions to recycle the pages */
510 for (i = 0; i < pkvm_moveable_regs_nr; i++) {
511 reg = &pkvm_moveable_regs[i];
512 if (reg->start > addr) {
513 ret = host_stage2_unmap_reg_locked(addr, reg->start - addr);
514 if (ret)
515 return ret;
516 }
517 addr = max(addr, reg->start + reg->size);
518 }
519 return host_stage2_unmap_reg_locked(addr, BIT(pgt->ia_bits) - addr);
520 }
521
522 struct kvm_mem_range {
523 u64 start;
524 u64 end;
525 };
526
find_mem_range(phys_addr_t addr,struct kvm_mem_range * range)527 static struct memblock_region *find_mem_range(phys_addr_t addr, struct kvm_mem_range *range)
528 {
529 int cur, left = 0, right = hyp_memblock_nr;
530 struct memblock_region *reg;
531 phys_addr_t end;
532
533 range->start = 0;
534 range->end = ULONG_MAX;
535
536 /* The list of memblock regions is sorted, binary search it */
537 while (left < right) {
538 cur = (left + right) >> 1;
539 reg = &hyp_memory[cur];
540 end = reg->base + reg->size;
541 if (addr < reg->base) {
542 right = cur;
543 range->end = reg->base;
544 } else if (addr >= end) {
545 left = cur + 1;
546 range->start = end;
547 } else {
548 range->start = reg->base;
549 range->end = end;
550 return reg;
551 }
552 }
553
554 return NULL;
555 }
556
default_host_prot(bool is_memory)557 static enum kvm_pgtable_prot default_host_prot(bool is_memory)
558 {
559 return is_memory ? PKVM_HOST_MEM_PROT : PKVM_HOST_MMIO_PROT;
560 }
561
default_hyp_prot(phys_addr_t phys)562 static enum kvm_pgtable_prot default_hyp_prot(phys_addr_t phys)
563 {
564 return addr_is_memory(phys) ? PAGE_HYP : PAGE_HYP_DEVICE;
565 }
566
567 /*
568 * Use NORMAL_NC for guest MMIO, when a guest has:
569 * No FWB: It will combined with stage-1 attrs where device has precedence over normal.
570 * FWB: With MT_S2_FWB_NORMAL_NC encoding, results in device if stage-1 used device attr.
571 * otherwise NC.
572 */
default_guest_prot(bool is_memory)573 static enum kvm_pgtable_prot default_guest_prot(bool is_memory)
574 {
575 return is_memory ? KVM_PGTABLE_PROT_RWX :
576 KVM_PGTABLE_PROT_RW | KVM_PGTABLE_PROT_NORMAL_NC;
577 }
578
addr_is_memory(phys_addr_t phys)579 bool addr_is_memory(phys_addr_t phys)
580 {
581 struct kvm_mem_range range;
582
583 return !!find_mem_range(phys, &range);
584 }
585
is_in_mem_range(u64 addr,struct kvm_mem_range * range)586 static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range)
587 {
588 return range->start <= addr && addr < range->end;
589 }
590
check_range_allowed_memory(u64 start,u64 end)591 static int check_range_allowed_memory(u64 start, u64 end)
592 {
593 struct memblock_region *reg;
594 struct kvm_mem_range range;
595
596 /*
597 * Callers can't check the state of a range that overlaps memory and
598 * MMIO regions, so ensure [start, end[ is in the same kvm_mem_range.
599 */
600 reg = find_mem_range(start, &range);
601 if (!is_in_mem_range(end - 1, &range))
602 return -EINVAL;
603
604 if (!reg || reg->flags & MEMBLOCK_NOMAP)
605 return -EPERM;
606
607 return 0;
608 }
609
range_is_memory(u64 start,u64 end)610 static bool range_is_memory(u64 start, u64 end)
611 {
612 struct kvm_mem_range r;
613
614 if (!find_mem_range(start, &r))
615 return false;
616
617 return is_in_mem_range(end - 1, &r);
618 }
619
range_is_allowed_memory(u64 start,u64 end)620 static bool range_is_allowed_memory(u64 start, u64 end)
621 {
622 struct memblock_region *reg;
623 struct kvm_mem_range range;
624
625 reg = find_mem_range(start, &range);
626 if (!reg)
627 return false;
628
629 if (!is_in_mem_range(end - 1, &range))
630 return false;
631
632 return !(reg->flags & MEMBLOCK_NOMAP);
633 }
634
__host_stage2_idmap(u64 start,u64 end,enum kvm_pgtable_prot prot,bool update_iommu)635 static inline int __host_stage2_idmap(u64 start, u64 end,
636 enum kvm_pgtable_prot prot,
637 bool update_iommu)
638 {
639 int ret;
640
641 ret = kvm_pgtable_stage2_map(&host_mmu.pgt, start, end - start, start,
642 prot, &host_s2_pool, 0);
643 if (ret)
644 return ret;
645
646 if (update_iommu) {
647 kvm_iommu_host_stage2_idmap(start, end, prot);
648 kvm_iommu_host_stage2_idmap_complete(!!prot);
649 }
650
651 return 0;
652 }
653
654 /*
655 * The pool has been provided with enough pages to cover all of moveable regions
656 * with page granularity, but it is difficult to know how much of the
657 * non-moveable regions we will need to cover upfront, so we may need to
658 * 'recycle' the pages if we run out.
659 */
660 #define host_stage2_try(fn, ...) \
661 ({ \
662 int __ret; \
663 hyp_assert_lock_held(&host_mmu.lock); \
664 __ret = fn(__VA_ARGS__); \
665 if (__ret == -ENOMEM) { \
666 __ret = host_stage2_unmap_unmoveable_regs(); \
667 if (!__ret) \
668 __ret = fn(__VA_ARGS__); \
669 } \
670 __ret; \
671 })
672
range_included(struct kvm_mem_range * child,struct kvm_mem_range * parent)673 static inline bool range_included(struct kvm_mem_range *child,
674 struct kvm_mem_range *parent)
675 {
676 return parent->start <= child->start && child->end <= parent->end;
677 }
678
host_stage2_adjust_range(u64 addr,struct kvm_mem_range * range)679 static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
680 {
681 struct kvm_mem_range cur;
682 kvm_pte_t pte;
683 s8 level;
684 int ret;
685
686 hyp_assert_lock_held(&host_mmu.lock);
687 ret = kvm_pgtable_get_leaf(&host_mmu.pgt, addr, &pte, &level);
688 if (ret)
689 return ret;
690
691 if (kvm_pte_valid(pte))
692 return -EAGAIN;
693
694 if (pte) {
695 WARN_ON(addr_is_memory(addr) &&
696 !(hyp_phys_to_page(addr)->host_state & PKVM_NOPAGE));
697 return -EPERM;
698 }
699
700 do {
701 u64 granule = kvm_granule_size(level);
702 cur.start = ALIGN_DOWN(addr, granule);
703 cur.end = cur.start + granule;
704 level++;
705 } while ((level <= KVM_PGTABLE_LAST_LEVEL) &&
706 !(kvm_level_supports_block_mapping(level) &&
707 range_included(&cur, range)));
708
709 *range = cur;
710
711 return 0;
712 }
713
host_stage2_idmap_locked(phys_addr_t addr,u64 size,enum kvm_pgtable_prot prot,bool update_iommu)714 int host_stage2_idmap_locked(phys_addr_t addr, u64 size,
715 enum kvm_pgtable_prot prot,
716 bool update_iommu)
717 {
718 return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot, update_iommu);
719 }
720
721 #define KVM_MAX_OWNER_ID FIELD_MAX(KVM_INVALID_PTE_OWNER_MASK)
722
kvm_init_invalid_leaf_owner(u8 owner_id)723 static kvm_pte_t kvm_init_invalid_leaf_owner(u8 owner_id)
724 {
725 return FIELD_PREP(KVM_INVALID_PTE_OWNER_MASK, owner_id);
726 }
727
__host_update_page_state(phys_addr_t addr,u64 size,enum pkvm_page_state state)728 static void __host_update_page_state(phys_addr_t addr, u64 size, enum pkvm_page_state state)
729 {
730 phys_addr_t end = addr + size;
731
732 for (; addr < end; addr += PAGE_SIZE)
733 hyp_phys_to_page(addr)->host_state = state;
734 }
735
__host_stage2_set_owner_locked(phys_addr_t addr,u64 size,u8 owner_id,bool is_memory,enum pkvm_page_state nopage_state,bool update_iommu)736 static int __host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id, bool is_memory,
737 enum pkvm_page_state nopage_state, bool update_iommu)
738 {
739 kvm_pte_t annotation;
740 enum kvm_pgtable_prot prot;
741 int ret;
742
743 if (owner_id > KVM_MAX_OWNER_ID)
744 return -EINVAL;
745
746 if (owner_id == PKVM_ID_HOST) {
747 prot = default_host_prot(addr_is_memory(addr));
748 ret = host_stage2_idmap_locked(addr, size, prot, false);
749 } else {
750 annotation = kvm_init_invalid_leaf_owner(owner_id);
751 ret = host_stage2_try(kvm_pgtable_stage2_annotate,
752 &host_mmu.pgt,
753 addr, size, &host_s2_pool, annotation);
754 }
755 if (ret)
756 return ret;
757
758 if (update_iommu) {
759 prot = owner_id == PKVM_ID_HOST ? PKVM_HOST_MEM_PROT : 0;
760 kvm_iommu_host_stage2_idmap(addr, addr + size, prot);
761 kvm_iommu_host_stage2_idmap_complete(!!prot);
762 }
763
764 if (!is_memory)
765 return 0;
766
767 /* Don't forget to update the vmemmap tracking for the host */
768 if (owner_id == PKVM_ID_HOST)
769 __host_update_page_state(addr, size, PKVM_PAGE_OWNED);
770 else
771 __host_update_page_state(addr, size, PKVM_NOPAGE | nopage_state);
772
773 return 0;
774 }
775
host_stage2_set_owner_locked(phys_addr_t addr,u64 size,u8 owner_id)776 int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id)
777 {
778 return __host_stage2_set_owner_locked(addr, size, owner_id, addr_is_memory(addr), 0, true);
779 }
780
host_stage2_force_pte(u64 addr,u64 end,enum kvm_pgtable_prot prot)781 static bool host_stage2_force_pte(u64 addr, u64 end, enum kvm_pgtable_prot prot)
782 {
783 /*
784 * Block mappings must be used with care in the host stage-2 as a
785 * kvm_pgtable_stage2_map() operation targeting a page in the range of
786 * an existing block will delete the block under the assumption that
787 * mappings in the rest of the block range can always be rebuilt lazily.
788 * That assumption is correct for the host stage-2 with RWX mappings
789 * targeting memory or RW mappings targeting MMIO ranges (see
790 * host_stage2_idmap() below which implements some of the host memory
791 * abort logic). However, this is not safe for any other mappings where
792 * the host stage-2 page-table is in fact the only place where this
793 * state is stored. In all those cases, it is safer to use page-level
794 * mappings, hence avoiding to lose the state because of side-effects in
795 * kvm_pgtable_stage2_map().
796 */
797 return prot != default_host_prot(range_is_memory(addr, end));
798 }
799
host_stage2_pte_is_counted(kvm_pte_t pte,u32 level)800 static bool host_stage2_pte_is_counted(kvm_pte_t pte, u32 level)
801 {
802 u64 phys;
803
804 if (!kvm_pte_valid(pte))
805 return !!pte;
806
807 if (kvm_pte_table(pte, level))
808 return true;
809
810 phys = kvm_pte_to_phys(pte);
811 if (addr_is_memory(phys))
812 return (pte & KVM_HOST_S2_DEFAULT_MASK) !=
813 KVM_HOST_S2_DEFAULT_MEM_PTE;
814
815 return (pte & KVM_HOST_S2_DEFAULT_MASK) != KVM_HOST_S2_DEFAULT_MMIO_PTE;
816 }
817
host_stage2_idmap(u64 addr)818 static int host_stage2_idmap(u64 addr)
819 {
820 struct kvm_mem_range range;
821 bool is_memory = !!find_mem_range(addr, &range);
822 enum kvm_pgtable_prot prot = default_host_prot(is_memory);
823 int ret;
824 bool update_iommu = !is_memory;
825
826 host_lock_component();
827 ret = host_stage2_adjust_range(addr, &range);
828 if (ret)
829 goto unlock;
830
831 ret = host_stage2_idmap_locked(range.start, range.end - range.start, prot, update_iommu);
832 unlock:
833 host_unlock_component();
834
835 return ret;
836 }
837
838 static void (*illegal_abt_notifier)(struct user_pt_regs *regs);
839
__pkvm_register_illegal_abt_notifier(void (* cb)(struct user_pt_regs *))840 int __pkvm_register_illegal_abt_notifier(void (*cb)(struct user_pt_regs *))
841 {
842 return cmpxchg(&illegal_abt_notifier, NULL, cb) ? -EBUSY : 0;
843 }
844
host_inject_abort(struct kvm_cpu_context * host_ctxt)845 static void host_inject_abort(struct kvm_cpu_context *host_ctxt)
846 {
847 u64 spsr = read_sysreg_el2(SYS_SPSR);
848 u64 esr = read_sysreg_el2(SYS_ESR);
849 u64 ventry, ec;
850
851 if (READ_ONCE(illegal_abt_notifier))
852 illegal_abt_notifier(&host_ctxt->regs);
853
854 /* Repaint the ESR to report a same-level fault if taken from EL1 */
855 if ((spsr & PSR_MODE_MASK) != PSR_MODE_EL0t) {
856 ec = ESR_ELx_EC(esr);
857 if (ec == ESR_ELx_EC_DABT_LOW)
858 ec = ESR_ELx_EC_DABT_CUR;
859 else if (ec == ESR_ELx_EC_IABT_LOW)
860 ec = ESR_ELx_EC_IABT_CUR;
861 else
862 WARN_ON(1);
863 esr &= ~ESR_ELx_EC_MASK;
864 esr |= ec << ESR_ELx_EC_SHIFT;
865 }
866
867 /*
868 * Since S1PTW should only ever be set for stage-2 faults, we're pretty
869 * much guaranteed that it won't be set in ESR_EL1 by the hardware. So,
870 * let's use that bit to allow the host abort handler to differentiate
871 * this abort from normal userspace faults.
872 *
873 * Note: although S1PTW is RES0 at EL1, it is guaranteed by the
874 * architecture to be backed by flops, so it should be safe to use.
875 */
876 esr |= ESR_ELx_S1PTW;
877
878 write_sysreg_el1(esr, SYS_ESR);
879 write_sysreg_el1(spsr, SYS_SPSR);
880 write_sysreg_el1(read_sysreg_el2(SYS_ELR), SYS_ELR);
881 write_sysreg_el1(read_sysreg_el2(SYS_FAR), SYS_FAR);
882
883 ventry = read_sysreg_el1(SYS_VBAR);
884 ventry += get_except64_offset(spsr, PSR_MODE_EL1h, except_type_sync);
885 write_sysreg_el2(ventry, SYS_ELR);
886
887 spsr = get_except64_cpsr(spsr, system_supports_mte(),
888 read_sysreg_el1(SYS_SCTLR), PSR_MODE_EL1h);
889 write_sysreg_el2(spsr, SYS_SPSR);
890 }
891
is_dabt(u64 esr)892 static bool is_dabt(u64 esr)
893 {
894 return ESR_ELx_EC(esr) == ESR_ELx_EC_DABT_LOW;
895 }
896
handle_host_mem_abort(struct kvm_cpu_context * host_ctxt)897 void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
898 {
899 struct kvm_vcpu_fault_info fault;
900 u64 esr, addr;
901 int ret;
902
903 esr = read_sysreg_el2(SYS_ESR);
904 if (!__get_fault_info(esr, &fault)) {
905 /* Setting the address to an invalid value for use in tracing. */
906 addr = (u64)-1;
907 /*
908 * We've presumably raced with a page-table change which caused
909 * AT to fail, try again.
910 */
911 return;
912 }
913
914 addr = (fault.hpfar_el2 & HPFAR_MASK) << 8;
915 addr |= fault.far_el2 & FAR_MASK;
916
917 if (is_dabt(esr) && !addr_is_memory(addr) &&
918 kvm_iommu_host_dabt_handler(host_ctxt, esr, addr))
919 goto return_to_host;
920
921 switch (esr & ESR_ELx_FSC_TYPE) {
922 case ESR_ELx_FSC_FAULT:
923 ret = host_stage2_idmap(addr);
924 break;
925 case ESR_ELx_FSC_PERM:
926 ret = module_handle_host_perm_fault(&host_ctxt->regs, esr, addr);
927 ret = ret ? 0 /* handled */ : -EPERM;
928 break;
929 default:
930 ret = -EPERM;
931 break;
932 }
933
934 if (ret == -EPERM)
935 host_inject_abort(host_ctxt);
936 else
937 BUG_ON(ret && ret != -EAGAIN);
938
939 return_to_host:
940 trace_host_mem_abort(esr, addr);
941 }
942
943 struct check_walk_data {
944 enum pkvm_page_state desired;
945 enum pkvm_page_state (*get_page_state)(kvm_pte_t pte, u64 addr);
946 };
947
__check_page_state_visitor(const struct kvm_pgtable_visit_ctx * ctx,enum kvm_pgtable_walk_flags visit)948 static int __check_page_state_visitor(const struct kvm_pgtable_visit_ctx *ctx,
949 enum kvm_pgtable_walk_flags visit)
950 {
951 struct check_walk_data *d = ctx->arg;
952
953 return d->get_page_state(ctx->old, ctx->addr) == d->desired ? 0 : -EPERM;
954 }
955
check_page_state_range(struct kvm_pgtable * pgt,u64 addr,u64 size,struct check_walk_data * data)956 static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size,
957 struct check_walk_data *data)
958 {
959 struct kvm_pgtable_walker walker = {
960 .cb = __check_page_state_visitor,
961 .arg = data,
962 .flags = KVM_PGTABLE_WALK_LEAF,
963 };
964
965 return kvm_pgtable_walk(pgt, addr, size, &walker);
966 }
967
host_get_mmio_page_state(kvm_pte_t pte,u64 addr)968 static enum pkvm_page_state host_get_mmio_page_state(kvm_pte_t pte, u64 addr)
969 {
970 enum pkvm_page_state state = 0;
971 enum kvm_pgtable_prot prot;
972
973 WARN_ON(addr_is_memory(addr));
974
975 if (!kvm_pte_valid(pte) && pte)
976 return PKVM_NOPAGE;
977
978 prot = kvm_pgtable_stage2_pte_prot(pte);
979 if (kvm_pte_valid(pte)) {
980 if ((prot & KVM_PGTABLE_PROT_RWX) != PKVM_HOST_MMIO_PROT)
981 state = PKVM_PAGE_RESTRICTED_PROT;
982 }
983
984 return state | pkvm_getstate(prot);
985 }
986
987 enum host_check_page_state_flags {
988 HOST_CHECK_NULL_REFCNT = BIT(0),
989 HOST_CHECK_IS_MEMORY = BIT(1),
990 };
991
___host_check_page_state_range(u64 addr,u64 size,enum pkvm_page_state state,enum host_check_page_state_flags flags)992 static int ___host_check_page_state_range(u64 addr, u64 size,
993 enum pkvm_page_state state,
994 enum host_check_page_state_flags flags)
995 {
996 struct check_walk_data d = {
997 .desired = state,
998 .get_page_state = host_get_mmio_page_state,
999 };
1000 struct hyp_page *p;
1001 struct memblock_region *reg;
1002 struct kvm_mem_range range;
1003 u64 end;
1004
1005 if (check_add_overflow(addr, size, &end))
1006 return -EINVAL;
1007
1008 /* Can't check the state of both MMIO and memory regions at once */
1009 reg = find_mem_range(addr, &range);
1010 if (!reg && (flags & HOST_CHECK_IS_MEMORY))
1011 return -EINVAL;
1012
1013 if (!is_in_mem_range(end - 1, &range))
1014 return -EINVAL;
1015
1016 hyp_assert_lock_held(&host_mmu.lock);
1017
1018 /* MMIO state is still in the page-table */
1019 if (!reg)
1020 return check_page_state_range(&host_mmu.pgt, addr, size, &d);
1021
1022 if (reg->flags & MEMBLOCK_NOMAP)
1023 return -EPERM;
1024
1025 for (; addr < end; addr += PAGE_SIZE) {
1026 p = hyp_phys_to_page(addr);
1027 if (p->host_state != state)
1028 return -EPERM;
1029 if ((flags & HOST_CHECK_NULL_REFCNT) && hyp_refcount_get(p->refcount))
1030 return -EINVAL;
1031 }
1032
1033 /*
1034 * All memory pages with restricted permissions will already be covered
1035 * by other states (e.g. PKVM_MODULE_OWNED_PAGE), so no need to retrieve
1036 * the PKVM_PAGE_RESTRICTED_PROT state from the PTE.
1037 */
1038
1039 return 0;
1040 }
1041
__host_check_page_state_range(u64 addr,u64 size,enum pkvm_page_state state)1042 static int __host_check_page_state_range(u64 addr, u64 size,
1043 enum pkvm_page_state state)
1044 {
1045 enum host_check_page_state_flags flags = HOST_CHECK_IS_MEMORY;
1046
1047 if (state == PKVM_PAGE_OWNED)
1048 flags |= HOST_CHECK_NULL_REFCNT;
1049
1050 /* Check the refcount of PAGE_OWNED pages as those may be used for DMA. */
1051 return ___host_check_page_state_range(addr, size, state, flags);
1052 }
1053
__host_set_page_state_range(u64 addr,u64 size,enum pkvm_page_state state)1054 static int __host_set_page_state_range(u64 addr, u64 size,
1055 enum pkvm_page_state state)
1056 {
1057 if (hyp_phys_to_page(addr)->host_state & PKVM_NOPAGE) {
1058 int ret = host_stage2_idmap_locked(addr, size, PKVM_HOST_MEM_PROT, true);
1059
1060 if (ret)
1061 return ret;
1062 }
1063
1064 __host_update_page_state(addr, size, state);
1065
1066 return 0;
1067 }
1068
hyp_get_page_state(kvm_pte_t pte,u64 addr)1069 static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte, u64 addr)
1070 {
1071 enum pkvm_page_state state = 0;
1072 enum kvm_pgtable_prot prot;
1073
1074 if (!kvm_pte_valid(pte))
1075 return PKVM_NOPAGE;
1076
1077 prot = kvm_pgtable_hyp_pte_prot(pte);
1078 if (kvm_pte_valid(pte) && ((prot & KVM_PGTABLE_PROT_RWX) != PAGE_HYP))
1079 state = PKVM_PAGE_RESTRICTED_PROT;
1080
1081 return state | pkvm_getstate(prot);
1082 }
1083
__hyp_check_page_state_range(u64 addr,u64 size,enum pkvm_page_state state)1084 static int __hyp_check_page_state_range(u64 addr, u64 size,
1085 enum pkvm_page_state state)
1086 {
1087 struct check_walk_data d = {
1088 .desired = state,
1089 .get_page_state = hyp_get_page_state,
1090 };
1091
1092 hyp_assert_lock_held(&pkvm_pgd_lock);
1093 return check_page_state_range(&pkvm_pgtable, addr, size, &d);
1094 }
1095
hyp_check_range_owned(u64 phys_addr,u64 size)1096 int hyp_check_range_owned(u64 phys_addr, u64 size)
1097 {
1098 int ret;
1099
1100 hyp_lock_component();
1101 ret = __hyp_check_page_state_range((u64)hyp_phys_to_virt(phys_addr),
1102 size, PKVM_PAGE_OWNED);
1103 hyp_unlock_component();
1104
1105 return ret;
1106 }
1107
__guest_check_page_state_range(struct pkvm_hyp_vcpu * vcpu,u64 addr,u64 size,enum pkvm_page_state state)1108 static int __guest_check_page_state_range(struct pkvm_hyp_vcpu *vcpu, u64 addr,
1109 u64 size, enum pkvm_page_state state)
1110 {
1111 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1112 struct check_walk_data d = {
1113 .desired = state,
1114 .get_page_state = guest_get_page_state,
1115 };
1116 u64 end;
1117
1118 if (check_add_overflow(addr, size, &end))
1119 return -EINVAL;
1120
1121 hyp_assert_lock_held(&vm->pgtable_lock);
1122 return check_page_state_range(&vm->pgt, addr, size, &d);
1123 }
1124
1125 struct guest_request_walker_data {
1126 unsigned long ipa_start;
1127 kvm_pte_t pte_start;
1128 u64 size;
1129 enum pkvm_page_state desired_state;
1130 enum pkvm_page_state desired_mask;
1131 int max_ptes;
1132 };
1133
1134 #define GUEST_WALKER_DATA_INIT(__state) \
1135 { \
1136 .size = 0, \
1137 .desired_state = __state, \
1138 .desired_mask = ~0, \
1139 /* \
1140 * Arbitrary limit of walked PTEs to restrict \
1141 * the time spent at EL2 \
1142 */ \
1143 .max_ptes = 512, \
1144 }
1145
guest_request_walker(const struct kvm_pgtable_visit_ctx * ctx,enum kvm_pgtable_walk_flags visit)1146 static int guest_request_walker(const struct kvm_pgtable_visit_ctx *ctx,
1147 enum kvm_pgtable_walk_flags visit)
1148 {
1149 struct guest_request_walker_data *data = (struct guest_request_walker_data *)ctx->arg;
1150 enum pkvm_page_state state;
1151 kvm_pte_t pte = *ctx->ptep;
1152 phys_addr_t phys = kvm_pte_to_phys(pte);
1153 u32 level = ctx->level;
1154
1155 state = guest_get_page_state(pte, 0);
1156 if (data->desired_state != (state & data->desired_mask))
1157 return (state & PKVM_NOPAGE) ? -EFAULT : -EPERM;
1158
1159 data->max_ptes--;
1160
1161 if (!data->size) {
1162 data->pte_start = pte;
1163 data->size = kvm_granule_size(level);
1164 data->ipa_start = ctx->addr & ~(kvm_granule_size(level) - 1);
1165
1166 goto end;
1167 }
1168
1169 if (kvm_pgtable_stage2_pte_prot(pte) !=
1170 kvm_pgtable_stage2_pte_prot(data->pte_start))
1171 return -EINVAL;
1172
1173 /* Can only describe physically contiguous mappings */
1174 if (kvm_pte_valid(data->pte_start) &&
1175 (phys != kvm_pte_to_phys(data->pte_start) + data->size))
1176 return -E2BIG;
1177
1178 data->size += kvm_granule_size(level);
1179
1180 end:
1181 return --data->max_ptes > 0 ? 0 : -E2BIG;
1182 }
1183
__guest_request_page_transition(u64 ipa,kvm_pte_t * __pte,u64 * __nr_pages,struct pkvm_hyp_vcpu * vcpu,enum pkvm_page_state desired)1184 static int __guest_request_page_transition(u64 ipa, kvm_pte_t *__pte, u64 *__nr_pages,
1185 struct pkvm_hyp_vcpu *vcpu,
1186 enum pkvm_page_state desired)
1187 {
1188 struct guest_request_walker_data data = GUEST_WALKER_DATA_INIT(desired);
1189 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1190 struct kvm_pgtable_walker walker = {
1191 .cb = guest_request_walker,
1192 .flags = KVM_PGTABLE_WALK_LEAF,
1193 .arg = (void *)&data,
1194 };
1195 phys_addr_t phys, phys_offset;
1196 kvm_pte_t pte;
1197 int ret = kvm_pgtable_walk(&vm->pgt, ipa, *__nr_pages * PAGE_SIZE, &walker);
1198
1199 /* Walker reached data.max_ptes or a non physically contiguous block */
1200 if (ret == -E2BIG)
1201 ret = 0;
1202 else if (ret)
1203 return ret;
1204
1205 if (WARN_ON(!kvm_pte_valid(data.pte_start)))
1206 return -EINVAL;
1207
1208 phys = kvm_pte_to_phys(data.pte_start);
1209 if (!range_is_allowed_memory(phys, phys + data.size))
1210 return -EINVAL;
1211
1212 if (data.ipa_start > ipa)
1213 return -EINVAL;
1214
1215 /*
1216 * transition not aligned with block memory mapping. They'll be broken
1217 * down and memory donation will be needed.
1218 */
1219 phys_offset = ipa - data.ipa_start;
1220 if (phys_offset || (*__nr_pages * PAGE_SIZE < data.size)) {
1221 struct pkvm_hyp_vcpu *hyp_vcpu = pkvm_get_loaded_hyp_vcpu();
1222 int min_pages;
1223
1224 if (WARN_ON(!hyp_vcpu))
1225 return -EINVAL;
1226
1227 min_pages = kvm_mmu_cache_min_pages(&hyp_vcpu->vcpu.kvm->arch.mmu);
1228 if (hyp_vcpu->vcpu.arch.stage2_mc.nr_pages < min_pages)
1229 return -ENOMEM;
1230 }
1231
1232 phys = kvm_pte_to_phys(data.pte_start) + phys_offset;
1233 pte = data.pte_start & ~kvm_phys_to_pte(KVM_PHYS_INVALID);
1234 pte |= kvm_phys_to_pte(phys);
1235
1236 if (WARN_ON(phys_offset >= data.size))
1237 return -EINVAL;
1238
1239 *__pte = pte;
1240 *__nr_pages = min_t(u64, (data.size - phys_offset) >> PAGE_SHIFT,
1241 *__nr_pages);
1242
1243 return 0;
1244 }
1245
__guest_initiate_page_transition(u64 ipa,kvm_pte_t pte,u64 nr_pages,struct pkvm_hyp_vcpu * vcpu,enum pkvm_page_state state)1246 static int __guest_initiate_page_transition(u64 ipa, kvm_pte_t pte, u64 nr_pages,
1247 struct pkvm_hyp_vcpu *vcpu,
1248 enum pkvm_page_state state)
1249 {
1250 struct kvm_hyp_memcache *mc = &vcpu->vcpu.arch.stage2_mc;
1251 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1252 u64 size = nr_pages * PAGE_SIZE;
1253 enum kvm_pgtable_prot prot;
1254 u64 phys;
1255 int ret;
1256
1257 phys = kvm_pte_to_phys(pte);
1258 prot = pkvm_mkstate(kvm_pgtable_stage2_pte_prot(pte), state);
1259 ret = kvm_pgtable_stage2_map(&vm->pgt, ipa, size, phys, prot, mc, 0);
1260 if (ret)
1261 return ret;
1262
1263 return 0;
1264 }
1265
__pkvm_host_share_hyp(u64 pfn)1266 int __pkvm_host_share_hyp(u64 pfn)
1267 {
1268 u64 phys = hyp_pfn_to_phys(pfn);
1269 void *virt = __hyp_va(phys);
1270 enum kvm_pgtable_prot prot;
1271 u64 size = PAGE_SIZE;
1272 int ret;
1273
1274 host_lock_component();
1275 hyp_lock_component();
1276
1277 ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED);
1278 if (ret)
1279 goto unlock;
1280 if (IS_ENABLED(CONFIG_PKVM_STRICT_CHECKS)) {
1281 ret = __hyp_check_page_state_range((u64)virt, size, PKVM_NOPAGE);
1282 if (ret)
1283 goto unlock;
1284 }
1285
1286 prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED);
1287 ret = pkvm_create_mappings_locked(virt, virt + size, prot);
1288 if (ret) {
1289 WARN_ON(ret != -ENOMEM);
1290 /* We might have failed halfway through, so remove anything we've installed */
1291 pkvm_remove_mappings_locked(virt, virt + size);
1292 goto unlock;
1293 }
1294
1295 WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED));
1296
1297 unlock:
1298 hyp_unlock_component();
1299 host_unlock_component();
1300 return ret;
1301 }
1302
__pkvm_host_unshare_hyp(u64 pfn)1303 int __pkvm_host_unshare_hyp(u64 pfn)
1304 {
1305 u64 phys = hyp_pfn_to_phys(pfn);
1306 u64 virt = (u64)__hyp_va(phys);
1307 u64 size = PAGE_SIZE;
1308 int ret;
1309
1310 host_lock_component();
1311 hyp_lock_component();
1312
1313 ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED);
1314 if (ret)
1315 goto unlock;
1316 ret = __hyp_check_page_state_range(virt, size, PKVM_PAGE_SHARED_BORROWED);
1317 if (ret)
1318 goto unlock;
1319 if (hyp_page_count((void *)virt)) {
1320 ret = -EBUSY;
1321 goto unlock;
1322 }
1323
1324 WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, virt, size) != size);
1325 WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_OWNED));
1326
1327 unlock:
1328 hyp_unlock_component();
1329 host_unlock_component();
1330
1331 return ret;
1332 }
1333
__pkvm_guest_share_host(struct pkvm_hyp_vcpu * vcpu,u64 ipa,u64 nr_pages,u64 * nr_shared)1334 int __pkvm_guest_share_host(struct pkvm_hyp_vcpu *vcpu, u64 ipa, u64 nr_pages,
1335 u64 *nr_shared)
1336 {
1337 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1338 kvm_pte_t pte;
1339 size_t size;
1340 u64 phys;
1341 int ret;
1342
1343 host_lock_component();
1344 guest_lock_component(vm);
1345
1346 ret = __guest_request_page_transition(ipa, &pte, &nr_pages, vcpu, PKVM_PAGE_OWNED);
1347 if (ret)
1348 goto unlock;
1349
1350 phys = kvm_pte_to_phys(pte);
1351 if (check_shl_overflow(nr_pages, PAGE_SHIFT, &size)) {
1352 ret = -EINVAL;
1353 goto unlock;
1354 }
1355
1356 ret = __host_check_page_state_range(phys, size, PKVM_NOPAGE);
1357 if (ret)
1358 goto unlock;
1359
1360 WARN_ON(__guest_initiate_page_transition(ipa, pte, nr_pages, vcpu, PKVM_PAGE_SHARED_OWNED));
1361 WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_SHARED_BORROWED));
1362 psci_mem_protect_dec(nr_pages);
1363 *nr_shared = nr_pages;
1364
1365 unlock:
1366 guest_unlock_component(vm);
1367 host_unlock_component();
1368
1369 return ret;
1370 }
1371
__pkvm_guest_share_hyp_page(struct pkvm_hyp_vcpu * vcpu,u64 ipa,u64 * hyp_va)1372 int __pkvm_guest_share_hyp_page(struct pkvm_hyp_vcpu *vcpu, u64 ipa, u64 *hyp_va)
1373 {
1374 int ret;
1375 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1376 kvm_pte_t pte;
1377 u64 phys;
1378 enum kvm_pgtable_prot prot;
1379 void *virt;
1380 u64 nr_pages = 1;
1381
1382 hyp_lock_component();
1383 guest_lock_component(vm);
1384
1385 ret = __guest_request_page_transition(ipa, &pte, &nr_pages, vcpu, PKVM_PAGE_OWNED);
1386 if (ret)
1387 goto unlock;
1388
1389 phys = kvm_pte_to_phys(pte);
1390
1391 virt = __hyp_va(phys);
1392 if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) {
1393 ret = __hyp_check_page_state_range((u64)virt, PAGE_SIZE, PKVM_NOPAGE);
1394 if (ret)
1395 goto unlock;
1396 }
1397
1398 prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED);
1399 ret = pkvm_create_mappings_locked(virt, virt + PAGE_SIZE, prot);
1400 if (ret) {
1401 /*
1402 * Repaint the return code as we need to distinguish between the
1403 * no memory from the guest which is recoverable and no memory
1404 * from the hypervisor.
1405 */
1406 if (ret == -ENOMEM)
1407 ret = -EBUSY;
1408 goto unlock;
1409 }
1410
1411 WARN_ON(__guest_initiate_page_transition(ipa, pte, nr_pages, vcpu, PKVM_PAGE_SHARED_OWNED));
1412 *hyp_va = (u64)virt;
1413 unlock:
1414 guest_unlock_component(vm);
1415 hyp_unlock_component();
1416
1417 return ret;
1418 }
1419
__pkvm_guest_unshare_hyp_page(struct pkvm_hyp_vcpu * vcpu,u64 ipa)1420 int __pkvm_guest_unshare_hyp_page(struct pkvm_hyp_vcpu *vcpu, u64 ipa)
1421 {
1422 int ret;
1423 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1424 kvm_pte_t pte;
1425 u64 phys, virt, nr_pages = 1;
1426
1427 hyp_lock_component();
1428 guest_lock_component(vm);
1429
1430 ret = __guest_request_page_transition(ipa, &pte, &nr_pages, vcpu, PKVM_PAGE_SHARED_OWNED);
1431 if (ret)
1432 goto unlock;
1433
1434 phys = kvm_pte_to_phys(pte);
1435
1436 virt = (u64)__hyp_va(phys);
1437 ret = __hyp_check_page_state_range(virt, PAGE_SIZE, PKVM_PAGE_SHARED_BORROWED);
1438 if (ret)
1439 goto unlock;
1440
1441 WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, virt, PAGE_SIZE) != PAGE_SIZE);
1442 WARN_ON(__guest_initiate_page_transition(ipa, pte, nr_pages, vcpu, PKVM_PAGE_OWNED));
1443 unlock:
1444 guest_unlock_component(vm);
1445 hyp_unlock_component();
1446
1447 return ret;
1448 }
1449
__pkvm_guest_unshare_host(struct pkvm_hyp_vcpu * vcpu,u64 ipa,u64 nr_pages,u64 * nr_unshared)1450 int __pkvm_guest_unshare_host(struct pkvm_hyp_vcpu *vcpu, u64 ipa, u64 nr_pages,
1451 u64 *nr_unshared)
1452 {
1453 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1454 kvm_pte_t pte;
1455 size_t size;
1456 u64 phys;
1457 int ret;
1458
1459 host_lock_component();
1460 guest_lock_component(vm);
1461
1462 ret = __guest_request_page_transition(ipa, &pte, &nr_pages, vcpu, PKVM_PAGE_SHARED_OWNED);
1463 if (ret)
1464 goto unlock;
1465
1466 phys = kvm_pte_to_phys(pte);
1467 if (check_shl_overflow(nr_pages, PAGE_SHIFT, &size)) {
1468 ret = -EINVAL;
1469 goto unlock;
1470 }
1471
1472 ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_BORROWED);
1473 if (ret)
1474 goto unlock;
1475
1476 WARN_ON(__guest_initiate_page_transition(ipa, pte, nr_pages, vcpu, PKVM_PAGE_OWNED));
1477 psci_mem_protect_inc(nr_pages);
1478 WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_GUEST));
1479 *nr_unshared = nr_pages;
1480
1481 unlock:
1482 guest_unlock_component(vm);
1483 host_unlock_component();
1484
1485 return ret;
1486 }
1487
__pkvm_guest_share_ffa_page(struct pkvm_hyp_vcpu * vcpu,u64 ipa,phys_addr_t * phys)1488 int __pkvm_guest_share_ffa_page(struct pkvm_hyp_vcpu *vcpu, u64 ipa, phys_addr_t *phys)
1489 {
1490 int ret;
1491 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1492 kvm_pte_t pte;
1493 u64 nr_pages = 1;
1494
1495 guest_lock_component(vm);
1496 ret = __guest_request_page_transition(ipa, &pte, &nr_pages, vcpu, PKVM_PAGE_OWNED);
1497 if (ret)
1498 goto unlock;
1499
1500 ret = __guest_initiate_page_transition(ipa, pte, nr_pages, vcpu, PKVM_PAGE_SHARED_OWNED);
1501 if (!ret)
1502 *phys = kvm_pte_to_phys(pte);
1503 unlock:
1504 guest_unlock_component(vm);
1505
1506 return ret;
1507 }
1508
1509 /*
1510 * The caller is responsible for tracking the FFA state and this function
1511 * should only be called for IPAs that have previously been shared with FFA.
1512 */
__pkvm_guest_unshare_ffa_page(struct pkvm_hyp_vcpu * vcpu,u64 ipa)1513 int __pkvm_guest_unshare_ffa_page(struct pkvm_hyp_vcpu *vcpu, u64 ipa)
1514 {
1515 int ret;
1516 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1517 kvm_pte_t pte;
1518 u64 nr_pages = 1;
1519
1520 guest_lock_component(vm);
1521 ret = __guest_request_page_transition(ipa, &pte, &nr_pages, vcpu, PKVM_PAGE_SHARED_OWNED);
1522 if (ret)
1523 goto unlock;
1524
1525 ret = __guest_initiate_page_transition(ipa, pte, nr_pages, vcpu, PKVM_PAGE_OWNED);
1526 unlock:
1527 guest_unlock_component(vm);
1528
1529 return ret;
1530 }
1531
__pkvm_host_donate_hyp(u64 pfn,u64 nr_pages)1532 int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages)
1533 {
1534 return ___pkvm_host_donate_hyp(pfn, nr_pages, false);
1535 }
1536
1537 /* The swiss knife of memory donation. */
___pkvm_host_donate_hyp_prot(u64 pfn,u64 nr_pages,bool accept_mmio,enum kvm_pgtable_prot prot)1538 int ___pkvm_host_donate_hyp_prot(u64 pfn, u64 nr_pages,
1539 bool accept_mmio, enum kvm_pgtable_prot prot)
1540 {
1541 phys_addr_t start = hyp_pfn_to_phys(pfn);
1542 phys_addr_t end = start + (nr_pages << PAGE_SHIFT);
1543 int ret;
1544
1545 if (!accept_mmio && !range_is_memory(start, end))
1546 return -EPERM;
1547
1548 host_lock_component();
1549 ret = __pkvm_host_donate_hyp_locked(pfn, nr_pages, prot);
1550 host_unlock_component();
1551
1552 return ret;
1553 }
1554
___pkvm_host_donate_hyp(u64 pfn,u64 nr_pages,bool accept_mmio)1555 int ___pkvm_host_donate_hyp(u64 pfn, u64 nr_pages, bool accept_mmio)
1556 {
1557 return ___pkvm_host_donate_hyp_prot(pfn, nr_pages, accept_mmio,
1558 default_hyp_prot(hyp_pfn_to_phys(pfn)));
1559 }
1560
pkvm_hyp_donate_guest(struct pkvm_hyp_vcpu * vcpu,u64 pfn,u64 gfn)1561 static int pkvm_hyp_donate_guest(struct pkvm_hyp_vcpu *vcpu, u64 pfn, u64 gfn)
1562 {
1563 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1564 u64 phys = hyp_pfn_to_phys(pfn);
1565 u64 ipa = hyp_pfn_to_phys(gfn);
1566 u64 hyp_addr = (u64)__hyp_va(phys);
1567 size_t size = PAGE_SIZE;
1568 enum kvm_pgtable_prot prot;
1569 int ret;
1570
1571 hyp_assert_lock_held(&pkvm_pgd_lock);
1572 hyp_assert_lock_held(&vm->pgtable_lock);
1573
1574 ret = __hyp_check_page_state_range(hyp_addr, size, PKVM_PAGE_OWNED);
1575 if (ret)
1576 return ret;;
1577 ret = __guest_check_page_state_range(vcpu, ipa, size, PKVM_NOPAGE);
1578 if (ret)
1579 return ret;
1580
1581 WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, hyp_addr, size) != size);
1582 prot = pkvm_mkstate(default_guest_prot(addr_is_memory(phys)), PKVM_PAGE_OWNED);
1583 return WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, size, phys, prot,
1584 &vcpu->vcpu.arch.stage2_mc, 0));
1585 }
1586
__pkvm_host_donate_hyp_locked(u64 pfn,u64 nr_pages,enum kvm_pgtable_prot prot)1587 int __pkvm_host_donate_hyp_locked(u64 pfn, u64 nr_pages, enum kvm_pgtable_prot prot)
1588 {
1589 u64 size, phys = hyp_pfn_to_phys(pfn);
1590 void *virt = __hyp_va(phys);
1591 int ret;
1592
1593 if (check_shl_overflow(nr_pages, PAGE_SHIFT, &size))
1594 return -EINVAL;
1595
1596 hyp_lock_component();
1597
1598 ret = ___host_check_page_state_range(phys, size, PKVM_PAGE_OWNED, HOST_CHECK_NULL_REFCNT);
1599 if (ret)
1600 goto unlock;
1601 if (IS_ENABLED(CONFIG_PKVM_STRICT_CHECKS)) {
1602 ret = __hyp_check_page_state_range((u64)virt, size, PKVM_NOPAGE);
1603 if (ret)
1604 goto unlock;
1605 }
1606
1607 prot = pkvm_mkstate(prot, PKVM_PAGE_OWNED);
1608 ret = pkvm_create_mappings_locked(virt, virt + size, prot);
1609 if (ret) {
1610 WARN_ON(ret != -ENOMEM);
1611 /* We might have failed halfway through, so remove anything we've installed */
1612 pkvm_remove_mappings_locked(virt, virt + size);
1613 goto unlock;
1614 }
1615 WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_HYP));
1616
1617 unlock:
1618 hyp_unlock_component();
1619
1620 return ret;
1621 }
1622
__pkvm_hyp_donate_host(u64 pfn,u64 nr_pages)1623 int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages)
1624 {
1625 u64 size, phys = hyp_pfn_to_phys(pfn);
1626 u64 virt = (u64)__hyp_va(phys);
1627 int ret;
1628
1629 if (check_shl_overflow(nr_pages, PAGE_SHIFT, &size))
1630 return -EINVAL;
1631
1632 host_lock_component();
1633 hyp_lock_component();
1634
1635 ret = __hyp_check_page_state_range(virt, size, PKVM_PAGE_OWNED);
1636 if (ret)
1637 goto unlock;
1638 if (IS_ENABLED(CONFIG_PKVM_STRICT_CHECKS)) {
1639 ret = ___host_check_page_state_range(phys, size, PKVM_NOPAGE, 0);
1640 if (ret)
1641 goto unlock;
1642 }
1643
1644 WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, virt, size) != size);
1645 WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_HOST));
1646
1647 unlock:
1648 hyp_unlock_component();
1649 host_unlock_component();
1650 return ret;
1651 }
1652
__pkvm_host_donate_ffa(u64 pfn,u64 nr_pages)1653 int __pkvm_host_donate_ffa(u64 pfn, u64 nr_pages)
1654 {
1655 u64 size, phys = hyp_pfn_to_phys(pfn), end;
1656 int ret;
1657
1658 if (check_shl_overflow(nr_pages, PAGE_SHIFT, &size) ||
1659 check_add_overflow(phys, size, &end))
1660 return -EINVAL;
1661
1662 host_lock_component();
1663
1664 ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED);
1665 if (ret)
1666 goto unlock;
1667
1668 WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_FFA));
1669 unlock:
1670 host_unlock_component();
1671 return ret;
1672 }
1673
__pkvm_host_reclaim_ffa(u64 pfn,u64 nr_pages)1674 int __pkvm_host_reclaim_ffa(u64 pfn, u64 nr_pages)
1675 {
1676 u64 size, phys = hyp_pfn_to_phys(pfn), end;
1677 int ret;
1678
1679 if (check_shl_overflow(nr_pages, PAGE_SHIFT, &size) ||
1680 check_add_overflow(phys, size, &end))
1681 return -EINVAL;
1682
1683 host_lock_component();
1684
1685 ret = __host_check_page_state_range(phys, size, PKVM_NOPAGE);
1686 if (ret)
1687 goto unlock;
1688
1689 WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_HOST));
1690 unlock:
1691 host_unlock_component();
1692 return ret;
1693 }
1694
1695 #define MODULE_PROT_ALLOWLIST (KVM_PGTABLE_PROT_RWX | \
1696 KVM_PGTABLE_PROT_DEVICE | \
1697 KVM_PGTABLE_PROT_NORMAL_NC | \
1698 KVM_PGTABLE_PROT_PXN | \
1699 KVM_PGTABLE_PROT_UXN)
1700
module_change_host_page_prot(u64 pfn,enum kvm_pgtable_prot prot,u64 nr_pages,bool update_iommu)1701 int module_change_host_page_prot(u64 pfn, enum kvm_pgtable_prot prot, u64 nr_pages,
1702 bool update_iommu)
1703 {
1704 u64 i, end, addr = hyp_pfn_to_phys(pfn);
1705 struct hyp_page *page = NULL;
1706 struct kvm_mem_range range;
1707 struct memblock_region *reg;
1708 int ret;
1709
1710 if ((prot & MODULE_PROT_ALLOWLIST) != prot)
1711 return -EINVAL;
1712
1713 if (check_shl_overflow(nr_pages, PAGE_SHIFT, &end) ||
1714 check_add_overflow(addr, end, &end))
1715 return -EINVAL;
1716
1717 reg = find_mem_range(addr, &range);
1718 if (end > range.end) {
1719 /* Specified range not in a single mmio or memory block. */
1720 return -EPERM;
1721 }
1722
1723 host_lock_component();
1724
1725 /*
1726 * There is no hyp_vmemmap covering MMIO regions, which makes tracking
1727 * of module-owned MMIO regions hard, so we trust the modules not to
1728 * mess things up.
1729 */
1730 if (!reg)
1731 goto update;
1732
1733 /* Range is memory: we can track module ownership. */
1734 page = hyp_phys_to_page(addr);
1735
1736 /*
1737 * Modules can only modify pages they already own, and pristine host
1738 * pages. The entire range must be consistently one or the other.
1739 */
1740 if (page->host_state & PKVM_MODULE_OWNED_PAGE) {
1741 /* The entire range must be module-owned. */
1742 ret = -EPERM;
1743 for (i = 1; i < nr_pages; i++) {
1744 if (!(page[i].host_state & PKVM_MODULE_OWNED_PAGE))
1745 goto unlock;
1746 }
1747 } else {
1748 /* The entire range must be pristine. */
1749 ret = ___host_check_page_state_range(addr, nr_pages << PAGE_SHIFT,
1750 PKVM_PAGE_OWNED, HOST_CHECK_NULL_REFCNT);
1751 if (ret)
1752 goto unlock;
1753 }
1754
1755 update:
1756 if (!prot) {
1757 ret = __host_stage2_set_owner_locked(addr, nr_pages << PAGE_SHIFT,
1758 PKVM_ID_PROTECTED, !!reg,
1759 PKVM_MODULE_OWNED_PAGE, update_iommu);
1760 } else {
1761 ret = host_stage2_idmap_locked(
1762 addr, nr_pages << PAGE_SHIFT, prot, update_iommu);
1763 }
1764
1765 if (WARN_ON(ret) || !page || !prot)
1766 goto unlock;
1767
1768 for (i = 0; i < nr_pages; i++) {
1769 if (prot != KVM_PGTABLE_PROT_RWX) {
1770 page[i].host_state = PKVM_MODULE_OWNED_PAGE;
1771 } else {
1772 page[i].host_state = PKVM_PAGE_OWNED;
1773 }
1774 }
1775
1776 unlock:
1777 host_unlock_component();
1778
1779 return ret;
1780 }
1781
hyp_pin_shared_mem(void * from,void * to)1782 int hyp_pin_shared_mem(void *from, void *to)
1783 {
1784 u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE);
1785 u64 end = PAGE_ALIGN((u64)to);
1786 u64 size = end - start;
1787 int ret;
1788
1789 host_lock_component();
1790 hyp_lock_component();
1791
1792 ret = __host_check_page_state_range(__hyp_pa(start), size,
1793 PKVM_PAGE_SHARED_OWNED);
1794 if (ret)
1795 goto unlock;
1796
1797 ret = __hyp_check_page_state_range(start, size,
1798 PKVM_PAGE_SHARED_BORROWED);
1799 if (ret)
1800 goto unlock;
1801
1802 for (cur = start; cur < end; cur += PAGE_SIZE)
1803 hyp_page_ref_inc(hyp_virt_to_page(cur));
1804
1805 unlock:
1806 hyp_unlock_component();
1807 host_unlock_component();
1808
1809 return ret;
1810 }
1811
hyp_unpin_shared_mem(void * from,void * to)1812 void hyp_unpin_shared_mem(void *from, void *to)
1813 {
1814 u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE);
1815 u64 end = PAGE_ALIGN((u64)to);
1816
1817 host_lock_component();
1818 hyp_lock_component();
1819
1820 for (cur = start; cur < end; cur += PAGE_SIZE)
1821 hyp_page_ref_dec(hyp_virt_to_page(cur));
1822
1823 hyp_unlock_component();
1824 host_unlock_component();
1825 }
1826
__pkvm_host_share_ffa(u64 pfn,u64 nr_pages)1827 int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages)
1828 {
1829
1830 u64 size, phys = hyp_pfn_to_phys(pfn);
1831 int ret;
1832
1833 if (check_shl_overflow(nr_pages, PAGE_SHIFT, &size))
1834 return -EINVAL;
1835
1836 host_lock_component();
1837
1838 ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED);
1839 if (!ret)
1840 ret = __host_set_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED);
1841
1842 host_unlock_component();
1843
1844 return ret;
1845 }
1846
__pkvm_host_unshare_ffa(u64 pfn,u64 nr_pages)1847 int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages)
1848 {
1849 u64 size, phys = hyp_pfn_to_phys(pfn);
1850 int ret;
1851
1852 if (check_shl_overflow(nr_pages, PAGE_SHIFT, &size))
1853 return -EINVAL;
1854
1855 host_lock_component();
1856
1857 ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED);
1858 if (!ret)
1859 ret = __host_set_page_state_range(phys, size, PKVM_PAGE_OWNED);
1860
1861 host_unlock_component();
1862
1863 return ret;
1864 }
1865
__pkvm_use_dma_page(phys_addr_t phys_addr)1866 static void __pkvm_use_dma_page(phys_addr_t phys_addr)
1867 {
1868 struct hyp_page *p = hyp_phys_to_page(phys_addr);
1869
1870 hyp_page_ref_inc(p);
1871 }
1872
__pkvm_unuse_dma_page(phys_addr_t phys_addr)1873 static void __pkvm_unuse_dma_page(phys_addr_t phys_addr)
1874 {
1875 struct hyp_page *p = hyp_phys_to_page(phys_addr);
1876
1877 hyp_page_ref_dec(p);
1878 }
1879
__pkvm_use_dma_locked(phys_addr_t phys_addr,size_t size,struct pkvm_hyp_vcpu * hyp_vcpu)1880 static int __pkvm_use_dma_locked(phys_addr_t phys_addr, size_t size,
1881 struct pkvm_hyp_vcpu *hyp_vcpu)
1882 {
1883 int i;
1884 int ret = 0;
1885 struct kvm_mem_range r;
1886 size_t nr_pages = size >> PAGE_SHIFT;
1887 struct memblock_region *reg = find_mem_range(phys_addr, &r);
1888
1889 if (WARN_ON(!PAGE_ALIGNED(phys_addr | size)) || !is_in_mem_range(phys_addr + size - 1, &r))
1890 return -EINVAL;
1891
1892 /*
1893 * Some differences between handling of RAM and device memory:
1894 * - The hyp vmemmap area for device memory is not backed by physical
1895 * pages in the hyp page tables.
1896 * - However, in some cases modules can donate MMIO, as they can't be
1897 * refcounted, taint them by marking them as shared PKVM_PAGE_TAINTED, and that
1898 * will prevent any future transition.
1899 */
1900 if (!reg) {
1901 enum kvm_pgtable_prot prot;
1902
1903 if (hyp_vcpu)
1904 return -EINVAL;
1905
1906 for (i = 0; i < nr_pages; i++) {
1907 u64 addr = phys_addr + i * PAGE_SIZE;
1908
1909 ret = ___host_check_page_state_range(addr, PAGE_SIZE,
1910 PKVM_PAGE_TAINTED,
1911 0);
1912 /* Page already tainted */
1913 if (!ret)
1914 continue;
1915 ret = ___host_check_page_state_range(addr, PAGE_SIZE,
1916 PKVM_PAGE_OWNED,
1917 0);
1918 if (ret)
1919 return ret;
1920 }
1921 prot = pkvm_mkstate(PKVM_HOST_MMIO_PROT, PKVM_PAGE_TAINTED);
1922 WARN_ON(host_stage2_idmap_locked(phys_addr, size, prot, false));
1923 } else {
1924 /* For VMs, we know if we reach this point the VM has access to the page. */
1925 if (!hyp_vcpu) {
1926 for (i = 0; i < nr_pages; i++) {
1927 enum pkvm_page_state state;
1928 phys_addr_t this_addr = phys_addr + i * PAGE_SIZE;
1929
1930 state = hyp_phys_to_page(this_addr)->host_state;
1931 if (state != PKVM_PAGE_OWNED) {
1932 ret = -EPERM;
1933 break;
1934 }
1935 }
1936 if (ret)
1937 return ret;
1938 }
1939
1940 for (i = 0; i < nr_pages; i++)
1941 __pkvm_use_dma_page(phys_addr + i * PAGE_SIZE);
1942 }
1943
1944 return ret;
1945 }
1946
1947 /*
1948 * __pkvm_use_dma - Mark memory as used for DMA
1949 * @phys_addr: physical address of the DMA region
1950 * @size: size of the DMA region
1951 * When a page is mapped in an IOMMU page table for DMA, it must
1952 * not be donated to a guest or the hypervisor we ensure this with:
1953 * - Host can only map pages that are OWNED
1954 * - Any page that is mapped is refcounted
1955 * - Donation/Sharing is prevented from refcount check in
1956 * ___host_check_page_state_range()
1957 * - No MMIO transtion is allowed beyond IOMMU MMIO which
1958 * happens during de-privilege.
1959 * In case in the future shared pages are allowed to be mapped,
1960 * similar checks are needed in host_request_unshare() and
1961 * host_ack_unshare()
1962 */
__pkvm_use_dma(phys_addr_t phys_addr,size_t size,struct pkvm_hyp_vcpu * hyp_vcpu)1963 int __pkvm_use_dma(phys_addr_t phys_addr, size_t size, struct pkvm_hyp_vcpu *hyp_vcpu)
1964 {
1965 int ret;
1966
1967 host_lock_component();
1968 ret = __pkvm_use_dma_locked(phys_addr, size, hyp_vcpu);
1969 host_unlock_component();
1970 return ret;
1971 }
1972
__pkvm_unuse_dma(phys_addr_t phys_addr,size_t size,struct pkvm_hyp_vcpu * hyp_vcpu)1973 int __pkvm_unuse_dma(phys_addr_t phys_addr, size_t size, struct pkvm_hyp_vcpu *hyp_vcpu)
1974 {
1975 int i;
1976 size_t nr_pages = size >> PAGE_SHIFT;
1977
1978 if (WARN_ON(!PAGE_ALIGNED(phys_addr | size)))
1979 return -EINVAL;
1980 if (!range_is_memory(phys_addr, phys_addr + size)) {
1981 WARN_ON(hyp_vcpu);
1982 return 0;
1983 }
1984
1985 host_lock_component();
1986 /*
1987 * We end up here after the caller successfully unmapped the page from
1988 * the IOMMU table. Which means that a ref is held, the page is shared
1989 * in the host s2, there can be no failure.
1990 */
1991 for (i = 0; i < nr_pages; i++)
1992 __pkvm_unuse_dma_page(phys_addr + i * PAGE_SIZE);
1993
1994 host_unlock_component();
1995 return 0;
1996 }
1997
__pkvm_host_share_guest(u64 pfn,u64 gfn,struct pkvm_hyp_vcpu * vcpu,enum kvm_pgtable_prot prot,u64 nr_pages)1998 int __pkvm_host_share_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu,
1999 enum kvm_pgtable_prot prot, u64 nr_pages)
2000 {
2001 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
2002 u64 phys = hyp_pfn_to_phys(pfn);
2003 u64 ipa = hyp_pfn_to_phys(gfn);
2004 struct hyp_page *page;
2005 size_t size;
2006 u64 end;
2007 int ret;
2008
2009 if (prot & ~KVM_PGTABLE_PROT_RWX)
2010 return -EINVAL;
2011
2012 if (check_shl_overflow(nr_pages, PAGE_SHIFT, &size) ||
2013 check_add_overflow(phys, size, &end))
2014 return -EINVAL;
2015
2016 ret = check_range_allowed_memory(phys, end);
2017 if (ret)
2018 return ret;
2019
2020 host_lock_component();
2021 guest_lock_component(vm);
2022
2023 ret = __guest_check_page_state_range(vcpu, ipa, size, PKVM_NOPAGE);
2024 if (ret)
2025 goto unlock;
2026
2027 for (; phys < end; phys += PAGE_SIZE) {
2028 page = hyp_phys_to_page(phys);
2029 if (page->host_state == PKVM_PAGE_OWNED && !hyp_refcount_get(page->refcount))
2030 continue;
2031 else if (page->host_state == PKVM_PAGE_SHARED_OWNED && page->host_share_guest_count)
2032 continue;
2033 ret = -EPERM;
2034 goto unlock;
2035 }
2036
2037 phys = hyp_pfn_to_phys(pfn) ;
2038 WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, size, phys,
2039 pkvm_mkstate(prot, PKVM_PAGE_SHARED_BORROWED),
2040 &vcpu->vcpu.arch.stage2_mc, 0));
2041 for (; phys < end; phys += PAGE_SIZE) {
2042 page = hyp_phys_to_page(phys);
2043 page->host_state = PKVM_PAGE_SHARED_OWNED;
2044 page->host_share_guest_count++;
2045 }
2046
2047 unlock:
2048 guest_unlock_component(vm);
2049 host_unlock_component();
2050
2051 return ret;
2052 }
2053
__check_host_shared_guest(struct pkvm_hyp_vm * vm,u64 * __phys,u64 ipa,size_t size)2054 static int __check_host_shared_guest(struct pkvm_hyp_vm *vm, u64 *__phys, u64 ipa, size_t size)
2055 {
2056 enum pkvm_page_state state;
2057 struct hyp_page *page;
2058 kvm_pte_t pte;
2059 u64 phys, end;
2060 s8 level;
2061 int ret;
2062
2063 if (size != PAGE_SIZE && size != PMD_SIZE)
2064 return -EINVAL;
2065 ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level);
2066 if (ret)
2067 return ret;
2068 if (!kvm_pte_valid(pte))
2069 return -ENOENT;
2070 if (kvm_granule_size(level) != size)
2071 return -E2BIG;
2072
2073 state = guest_get_page_state(pte, ipa) & ~PKVM_PAGE_RESTRICTED_PROT;
2074 if (state != PKVM_PAGE_SHARED_BORROWED)
2075 return -EPERM;
2076
2077 phys = kvm_pte_to_phys(pte);
2078 if (check_add_overflow(phys, size, &end))
2079 return -EINVAL;
2080
2081 ret = check_range_allowed_memory(phys, end);
2082 if (WARN_ON(ret))
2083 return ret;
2084
2085 for (; phys < end; phys += PAGE_SIZE) {
2086 page = hyp_phys_to_page(phys);
2087 if (page->host_state != PKVM_PAGE_SHARED_OWNED)
2088 return -EPERM;
2089 if (WARN_ON(!page->host_share_guest_count))
2090 return -EINVAL;
2091 }
2092
2093 *__phys = kvm_pte_to_phys(pte);
2094
2095 return 0;
2096 }
2097
__pkvm_host_unshare_guest(u64 gfn,struct pkvm_hyp_vm * vm,u64 nr_pages)2098 int __pkvm_host_unshare_guest(u64 gfn, struct pkvm_hyp_vm *vm, u64 nr_pages)
2099 {
2100 size_t size = PAGE_SIZE * nr_pages;
2101 u64 ipa = hyp_pfn_to_phys(gfn);
2102 struct hyp_page *page;
2103 u64 phys, end;
2104 int ret;
2105
2106 host_lock_component();
2107 guest_lock_component(vm);
2108
2109 ret = __check_host_shared_guest(vm, &phys, ipa, size);
2110 if (ret)
2111 goto unlock;
2112
2113 ret = kvm_pgtable_stage2_unmap(&vm->pgt, ipa, size);
2114 if (ret)
2115 goto unlock;
2116
2117 end = phys + size;
2118 for (; phys < end; phys += PAGE_SIZE) {
2119 page = hyp_phys_to_page(phys);
2120 page->host_share_guest_count--;
2121 if (!page->host_share_guest_count)
2122 page->host_state = PKVM_PAGE_OWNED;
2123 }
2124
2125 unlock:
2126 guest_unlock_component(vm);
2127 host_unlock_component();
2128
2129 return ret;
2130 }
2131
guest_get_valid_pte(struct pkvm_hyp_vm * vm,u64 * phys,u64 ipa,u8 order,kvm_pte_t * pte)2132 static int guest_get_valid_pte(struct pkvm_hyp_vm *vm, u64 *phys, u64 ipa, u8 order, kvm_pte_t *pte)
2133 {
2134 size_t size = PAGE_SIZE << order;
2135 s8 level;
2136
2137 if (order && size != PMD_SIZE)
2138 return -EINVAL;
2139
2140 WARN_ON(kvm_pgtable_get_leaf(&vm->pgt, ipa, pte, &level));
2141
2142 if (kvm_granule_size(level) != size)
2143 return -E2BIG;
2144
2145 if (!kvm_pte_valid(*pte))
2146 return -ENOENT;
2147
2148 *phys = kvm_pte_to_phys(*pte);
2149
2150 return 0;
2151 }
2152
__pkvm_host_relax_perms_guest(u64 gfn,struct pkvm_hyp_vcpu * vcpu,enum kvm_pgtable_prot prot)2153 int __pkvm_host_relax_perms_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_pgtable_prot prot)
2154 {
2155 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
2156 u64 ipa = hyp_pfn_to_phys(gfn);
2157 int ret;
2158
2159 if (WARN_ON(kvm_vm_is_protected(&vm->kvm)))
2160 return -EPERM;
2161
2162 if (prot & ~KVM_PGTABLE_PROT_RWX)
2163 return -EINVAL;
2164
2165 guest_lock_component(vm);
2166 ret = kvm_pgtable_stage2_relax_perms(&vm->pgt, ipa, prot, 0);
2167 guest_unlock_component(vm);
2168
2169 return ret;
2170 }
2171
__pkvm_host_wrprotect_guest(u64 gfn,struct pkvm_hyp_vm * vm,u64 size)2172 int __pkvm_host_wrprotect_guest(u64 gfn, struct pkvm_hyp_vm *vm, u64 size)
2173 {
2174 u64 ipa = hyp_pfn_to_phys(gfn);
2175 int ret;
2176
2177 if (WARN_ON(kvm_vm_is_protected(&vm->kvm)))
2178 return -EPERM;
2179
2180 guest_lock_component(vm);
2181 ret = kvm_pgtable_stage2_wrprotect(&vm->pgt, ipa, size);
2182 guest_unlock_component(vm);
2183
2184 return ret;
2185 }
2186
__pkvm_host_test_clear_young_guest(u64 gfn,u64 size,bool mkold,struct pkvm_hyp_vm * vm)2187 int __pkvm_host_test_clear_young_guest(u64 gfn, u64 size, bool mkold, struct pkvm_hyp_vm *vm)
2188 {
2189 u64 ipa = hyp_pfn_to_phys(gfn);
2190 int ret;
2191
2192 if (WARN_ON(kvm_vm_is_protected(&vm->kvm)))
2193 return -EPERM;
2194
2195 guest_lock_component(vm);
2196 ret = kvm_pgtable_stage2_test_clear_young(&vm->pgt, ipa, size, mkold);
2197 guest_unlock_component(vm);
2198
2199 return ret;
2200 }
2201
__pkvm_host_mkyoung_guest(u64 gfn,struct pkvm_hyp_vcpu * vcpu)2202 kvm_pte_t __pkvm_host_mkyoung_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu)
2203 {
2204 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
2205 u64 ipa = hyp_pfn_to_phys(gfn);
2206 kvm_pte_t pte;
2207
2208 if (WARN_ON(kvm_vm_is_protected(&vm->kvm)))
2209 return 0;
2210
2211 guest_lock_component(vm);
2212 pte = kvm_pgtable_stage2_mkyoung(&vm->pgt, ipa, 0);
2213 guest_unlock_component(vm);
2214
2215 return pte;
2216 }
2217
__pkvm_host_split_guest(u64 gfn,u64 size,struct pkvm_hyp_vcpu * vcpu)2218 int __pkvm_host_split_guest(u64 gfn, u64 size, struct pkvm_hyp_vcpu *vcpu)
2219 {
2220 struct kvm_hyp_memcache *mc = &vcpu->vcpu.arch.stage2_mc;
2221 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
2222 u64 ipa = hyp_pfn_to_phys(gfn);
2223 int ret;
2224
2225 if (size != PMD_SIZE)
2226 return -EINVAL;
2227
2228 guest_lock_component(vm);
2229
2230 /*
2231 * stage2_split() already checks the existing mapping is valid and PMD-level.
2232 * No other check is necessary.
2233 */
2234
2235 ret = kvm_pgtable_stage2_split(&vm->pgt, ipa, size, mc);
2236
2237 guest_unlock_component(vm);
2238
2239 return ret;
2240 }
2241
__host_set_owner_guest(struct pkvm_hyp_vcpu * vcpu,u64 phys,u64 ipa,size_t size,bool is_memory)2242 static int __host_set_owner_guest(struct pkvm_hyp_vcpu *vcpu, u64 phys, u64 ipa,
2243 size_t size, bool is_memory)
2244 {
2245 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
2246 u64 nr_pages = size >> PAGE_SHIFT;
2247 int ret;
2248
2249 /*
2250 * update_iommu=false, the caller must do the update _before_ this function is called. This
2251 * intends to protect pvmfw loading.
2252 */
2253 WARN_ON(__host_stage2_set_owner_locked(phys, size, PKVM_ID_GUEST,
2254 is_memory, 0, false));
2255 psci_mem_protect_inc(nr_pages);
2256 if (pkvm_ipa_range_has_pvmfw(vm, ipa, ipa + size)) {
2257 ret = pkvm_load_pvmfw_pages(vm, ipa, phys, size);
2258 if (WARN_ON(ret)) {
2259 psci_mem_protect_dec(nr_pages);
2260 return ret;
2261 }
2262 }
2263
2264 return 0;
2265 }
2266
__pkvm_host_donate_guest(u64 pfn,u64 gfn,struct pkvm_hyp_vcpu * vcpu,u64 nr_pages)2267 int __pkvm_host_donate_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu, u64 nr_pages)
2268 {
2269 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
2270 u64 phys = hyp_pfn_to_phys(pfn);
2271 u64 ipa = hyp_pfn_to_phys(gfn);
2272 enum kvm_pgtable_prot prot;
2273 bool is_memory;
2274 size_t size;
2275 int ret;
2276
2277 if (check_mul_overflow(nr_pages, PAGE_SIZE, &size))
2278 return -EINVAL;
2279
2280 host_lock_component();
2281 guest_lock_component(vm);
2282
2283 ret = ___host_check_page_state_range(phys, size, PKVM_PAGE_OWNED, HOST_CHECK_NULL_REFCNT);
2284 if (ret)
2285 goto unlock;
2286 ret = __guest_check_page_state_range(vcpu, ipa, size, PKVM_NOPAGE);
2287 if (ret)
2288 goto unlock;
2289
2290 is_memory = addr_is_memory(phys);
2291 if (is_memory) {
2292 kvm_iommu_host_stage2_idmap(phys, phys + size, 0);
2293 kvm_iommu_host_stage2_idmap_complete(false);
2294 }
2295 WARN_ON(__host_set_owner_guest(vcpu, phys, ipa, size, is_memory));
2296
2297 prot = pkvm_mkstate(KVM_PGTABLE_PROT_RWX, PKVM_PAGE_OWNED);
2298 WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, size, phys, prot,
2299 &vcpu->vcpu.arch.stage2_mc, 0));
2300
2301 unlock:
2302 guest_unlock_component(vm);
2303 host_unlock_component();
2304
2305 return ret;
2306 }
2307
2308 struct kvm_hyp_pinned_page *hyp_ppages;
2309
__copy_hyp_ppages(struct pkvm_hyp_vcpu * vcpu)2310 static int __copy_hyp_ppages(struct pkvm_hyp_vcpu *vcpu)
2311 {
2312 struct kvm_hyp_pinned_page *ppage, *hyp_ppage;
2313
2314 WARN_ON(!hyp_ppages);
2315
2316 ppage = next_kvm_hyp_pinned_page(vcpu->vcpu.arch.hyp_reqs, NULL, true);
2317 if (!ppage)
2318 return -EINVAL;
2319
2320 hyp_ppage = hyp_ppages;
2321
2322 do {
2323 memcpy(hyp_ppage, ppage, sizeof(*ppage));
2324 ppage = next_kvm_hyp_pinned_page(vcpu->vcpu.arch.hyp_reqs, ppage, true);
2325 hyp_ppage++; /* No risk to overflow hyp_ppages */
2326 } while (ppage);
2327
2328 hyp_ppage->order = 0xFF;
2329
2330 return 0;
2331 }
2332
2333 #define for_each_hyp_ppage(hyp_ppage) \
2334 for (hyp_ppage = hyp_ppages; (hyp_ppage)->order != 0xFF; (hyp_ppage)++)
2335
__pkvm_host_donate_sglist_guest(struct pkvm_hyp_vcpu * vcpu)2336 int __pkvm_host_donate_sglist_guest(struct pkvm_hyp_vcpu *vcpu)
2337 {
2338 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
2339 struct kvm_hyp_pinned_page *ppage = hyp_ppages;
2340 bool is_memory;
2341 int ret;
2342
2343 host_lock_component();
2344 guest_lock_component(vm);
2345
2346 ret = __copy_hyp_ppages(vcpu);
2347 if (ret)
2348 goto unlock;
2349
2350 is_memory = addr_is_memory(hyp_pfn_to_phys(ppage->pfn));
2351
2352 for_each_hyp_ppage(ppage) {
2353 u64 phys = hyp_pfn_to_phys(ppage->pfn);
2354 u64 ipa = hyp_pfn_to_phys(ppage->gfn);
2355 size_t size;
2356
2357 if (check_shl_overflow(PAGE_SIZE, ppage->order, &size)) {
2358 ret = -EINVAL;
2359 goto unlock;
2360 }
2361
2362 if (addr_is_memory(phys) != is_memory) {
2363 ret = -EINVAL;
2364 goto unlock;
2365 }
2366
2367 ret = ___host_check_page_state_range(phys, size, PKVM_PAGE_OWNED,
2368 HOST_CHECK_NULL_REFCNT);
2369 if (ret)
2370 goto unlock;
2371
2372 ret = __guest_check_page_state_range(vcpu, ipa, size, PKVM_NOPAGE);
2373 if (ret)
2374 goto unlock;
2375 }
2376
2377 if (is_memory) {
2378 for_each_hyp_ppage(ppage) {
2379 size_t size = PAGE_SIZE << ppage->order;
2380 u64 phys = hyp_pfn_to_phys(ppage->pfn);
2381
2382 kvm_iommu_host_stage2_idmap(phys, phys + size, 0);
2383 }
2384
2385 kvm_iommu_host_stage2_idmap_complete(false);
2386 }
2387
2388 for_each_hyp_ppage(ppage) {
2389 size_t size = PAGE_SIZE << ppage->order;
2390 u64 phys = hyp_pfn_to_phys(ppage->pfn);
2391 u64 ipa = hyp_pfn_to_phys(ppage->gfn);
2392 enum kvm_pgtable_prot prot;
2393
2394 /* Now the sglist is unmapped from the IOMMUs, we can load pvmfw */
2395 WARN_ON(__host_set_owner_guest(vcpu, phys, ipa, size, is_memory));
2396
2397 prot = pkvm_mkstate(KVM_PGTABLE_PROT_RWX, PKVM_PAGE_OWNED);
2398 WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, size, phys, prot,
2399 &vcpu->vcpu.arch.stage2_mc, 0));
2400 }
2401
2402 unlock:
2403 guest_unlock_component(vm);
2404 host_unlock_component();
2405
2406 return ret;
2407 }
2408
__pkvm_host_donate_sglist_hyp(struct pkvm_sglist_page * sglist,size_t nr_pages)2409 int __pkvm_host_donate_sglist_hyp(struct pkvm_sglist_page *sglist, size_t nr_pages)
2410 {
2411 int p, ret;
2412
2413 host_lock_component();
2414 hyp_lock_component();
2415
2416 /* Checking we are reading hyp private memory */
2417 if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG))
2418 WARN_ON(__hyp_check_page_state_range((u64)sglist, nr_pages * sizeof(*sglist),
2419 PKVM_PAGE_OWNED));
2420
2421 for (p = 0; p < nr_pages; p++) {
2422 u64 phys = hyp_pfn_to_phys(sglist[p].pfn);
2423 size_t size;
2424
2425 if (check_shl_overflow(PAGE_SIZE, sglist[p].order, &size)) {
2426 ret = -EINVAL;
2427 goto unlock;
2428 }
2429
2430 if (!addr_is_memory(phys)) {
2431 ret = -EINVAL;
2432 goto unlock;
2433 }
2434
2435 ret = ___host_check_page_state_range(phys, size, PKVM_PAGE_OWNED,
2436 HOST_CHECK_NULL_REFCNT);
2437 if (ret)
2438 goto unlock;
2439
2440 if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) {
2441 ret = __hyp_check_page_state_range((u64)__hyp_va(phys), size, PKVM_NOPAGE);
2442 if (ret)
2443 goto unlock;
2444 }
2445 }
2446
2447 for (p = 0; p < nr_pages; p++) {
2448 size_t size = PAGE_SIZE << sglist[p].order;
2449 u64 phys = hyp_pfn_to_phys(sglist[p].pfn);
2450 enum kvm_pgtable_prot prot;
2451
2452 prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_OWNED);
2453 ret = pkvm_create_mappings_locked(__hyp_va(phys), __hyp_va(phys) + size, prot);
2454 if (ret) {
2455 WARN_ON(ret != -ENOMEM);
2456
2457 kvm_iommu_host_stage2_idmap_complete(false);
2458
2459 /* Rollback */
2460 for (; p >= 0; p--) {
2461 phys = hyp_pfn_to_phys(sglist[p].pfn);
2462 size = PAGE_SIZE << sglist[p].order;
2463
2464 WARN_ON(host_stage2_idmap_locked(phys, size,
2465 PKVM_HOST_MEM_PROT, false));
2466 kvm_iommu_host_stage2_idmap(phys, phys + size, PKVM_HOST_MEM_PROT);
2467 pkvm_remove_mappings_locked(__hyp_va(phys), __hyp_va(phys) + size);
2468 }
2469 kvm_iommu_host_stage2_idmap_complete(true);
2470
2471 break;
2472 }
2473
2474 WARN_ON(__host_stage2_set_owner_locked(phys, size, PKVM_ID_HYP, true, 0, false));
2475 kvm_iommu_host_stage2_idmap(phys, phys + size, 0);
2476 }
2477
2478 kvm_iommu_host_stage2_idmap_complete(false);
2479
2480 unlock:
2481 hyp_unlock_component();
2482 host_unlock_component();
2483
2484 return ret;
2485 }
2486
hyp_poison_page(phys_addr_t phys,size_t size)2487 void hyp_poison_page(phys_addr_t phys, size_t size)
2488 {
2489 WARN_ON(!PAGE_ALIGNED(size));
2490
2491 while (size) {
2492 size_t __size = size == PMD_SIZE ? size : PAGE_SIZE;
2493 void *addr = __fixmap_guest_page(__hyp_va(phys), &__size);
2494
2495 memset(addr, 0, __size);
2496
2497 /*
2498 * Prefer kvm_flush_dcache_to_poc() over __clean_dcache_guest_page()
2499 * here as the latter may elide the CMO under the assumption that FWB
2500 * will be enabled on CPUs that support it. This is incorrect for the
2501 * host stage-2 and would otherwise lead to a malicious host potentially
2502 * being able to read the contents of newly reclaimed guest pages.
2503 */
2504 kvm_flush_dcache_to_poc(addr, __size);
2505 __fixunmap_guest_page(__size);
2506
2507 size -= __size;
2508 phys += __size;
2509 }
2510 }
2511
destroy_hyp_vm_pgt(struct pkvm_hyp_vm * vm)2512 void destroy_hyp_vm_pgt(struct pkvm_hyp_vm *vm)
2513 {
2514 guest_lock_component(vm);
2515 kvm_pgtable_stage2_destroy(&vm->pgt);
2516 guest_unlock_component(vm);
2517 }
2518
drain_hyp_pool(struct hyp_pool * pool,struct kvm_hyp_memcache * mc)2519 void drain_hyp_pool(struct hyp_pool *pool, struct kvm_hyp_memcache *mc)
2520 {
2521 WARN_ON(reclaim_hyp_pool(pool, mc, INT_MAX) != -ENOMEM);
2522 }
2523
__pkvm_host_reclaim_page(struct pkvm_hyp_vm * vm,u64 pfn,u64 ipa,u8 order)2524 int __pkvm_host_reclaim_page(struct pkvm_hyp_vm *vm, u64 pfn, u64 ipa, u8 order)
2525 {
2526 phys_addr_t __phys, phys = hyp_pfn_to_phys(pfn);
2527 size_t page_size;
2528 kvm_pte_t pte;
2529 int ret = 0;
2530
2531 if (check_shl_overflow(PAGE_SIZE, order, &page_size))
2532 return -EINVAL;
2533
2534 host_lock_component();
2535 guest_lock_component(vm);
2536
2537 ret = guest_get_valid_pte(vm, &__phys, ipa, order, &pte);
2538 if (ret)
2539 goto unlock;
2540
2541 if (phys != __phys) {
2542 ret = -EINVAL;
2543 goto unlock;
2544 }
2545
2546 switch ((int)guest_get_page_state(pte, ipa)) {
2547 case PKVM_PAGE_OWNED:
2548 WARN_ON(___host_check_page_state_range(phys, page_size, PKVM_NOPAGE,
2549 HOST_CHECK_IS_MEMORY));
2550 /* No vCPUs of the guest can run, doing this prior to stage-2 unmap is OK */
2551 hyp_poison_page(phys, page_size);
2552 psci_mem_protect_dec(1 << order);
2553 break;
2554 case PKVM_PAGE_SHARED_BORROWED:
2555 case PKVM_PAGE_SHARED_BORROWED | PKVM_PAGE_RESTRICTED_PROT:
2556 WARN_ON(__host_check_page_state_range(phys, page_size, PKVM_PAGE_SHARED_OWNED));
2557 break;
2558 case PKVM_PAGE_SHARED_OWNED:
2559 if (__host_check_page_state_range(phys, page_size, PKVM_PAGE_SHARED_BORROWED)) {
2560 /* Presumably a page shared via FF-A, will be handled separately */
2561 ret = -EBUSY;
2562 goto unlock;
2563 }
2564 break;
2565 default:
2566 BUG_ON(1);
2567 }
2568
2569 /* We could avoid TLB inval, it is done per VMID on the finalize path */
2570 WARN_ON(kvm_pgtable_stage2_unmap(&vm->pgt, ipa, page_size));
2571 WARN_ON(host_stage2_set_owner_locked(phys, page_size, PKVM_ID_HOST));
2572
2573 unlock:
2574 guest_unlock_component(vm);
2575 host_unlock_component();
2576
2577 return ret;
2578 }
2579
__check_ioguard_page(struct pkvm_hyp_vcpu * hyp_vcpu,u64 ipa)2580 static bool __check_ioguard_page(struct pkvm_hyp_vcpu *hyp_vcpu, u64 ipa)
2581 {
2582 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
2583 kvm_pte_t pte;
2584 s8 level;
2585 int ret;
2586
2587 ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level);
2588 if (ret)
2589 return false;
2590
2591 /* Must be a PAGE_SIZE mapping with our annotation */
2592 return (BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(level)) == PAGE_SIZE &&
2593 pte == KVM_INVALID_PTE_MMIO_NOTE);
2594 }
2595
__pkvm_install_ioguard_page(struct pkvm_hyp_vcpu * hyp_vcpu,u64 ipa,u64 nr_pages,u64 * nr_guarded)2596 int __pkvm_install_ioguard_page(struct pkvm_hyp_vcpu *hyp_vcpu, u64 ipa,
2597 u64 nr_pages, u64 *nr_guarded)
2598 {
2599 struct guest_request_walker_data data = GUEST_WALKER_DATA_INIT(PKVM_NOPAGE);
2600 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
2601 struct kvm_pgtable_walker walker = {
2602 .cb = guest_request_walker,
2603 .flags = KVM_PGTABLE_WALK_LEAF,
2604 .arg = (void *)&data,
2605 };
2606 int ret;
2607
2608 if (!test_bit(KVM_ARCH_FLAG_MMIO_GUARD, &vm->kvm.arch.flags))
2609 return -EINVAL;
2610
2611 if (!PAGE_ALIGNED(ipa))
2612 return -EINVAL;
2613
2614 guest_lock_component(vm);
2615
2616 /* Check we either have NOMAP or NOMAP|MMIO in this range */
2617 data.desired_mask = ~PKVM_MMIO;
2618
2619 ret = kvm_pgtable_walk(&vm->pgt, ipa, nr_pages << PAGE_SHIFT, &walker);
2620 /* Walker reached data.max_ptes */
2621 if (ret == -E2BIG)
2622 ret = 0;
2623 else if (ret)
2624 goto unlock;
2625
2626 /*
2627 * Intersection between the requested region and what has been verified
2628 */
2629 *nr_guarded = nr_pages = min_t(u64, data.size >> PAGE_SHIFT, nr_pages);
2630 ret = kvm_pgtable_stage2_annotate(&vm->pgt, ipa, nr_pages << PAGE_SHIFT,
2631 &hyp_vcpu->vcpu.arch.stage2_mc,
2632 KVM_INVALID_PTE_MMIO_NOTE);
2633
2634 unlock:
2635 guest_unlock_component(vm);
2636 return ret;
2637 }
2638
__pkvm_check_ioguard_page(struct pkvm_hyp_vcpu * hyp_vcpu)2639 bool __pkvm_check_ioguard_page(struct pkvm_hyp_vcpu *hyp_vcpu)
2640 {
2641 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
2642 u64 ipa, end;
2643 bool ret;
2644
2645 if (!kvm_vcpu_dabt_isvalid(&hyp_vcpu->vcpu))
2646 return false;
2647
2648 if (!test_bit(KVM_ARCH_FLAG_MMIO_GUARD, &vm->kvm.arch.flags))
2649 return true;
2650
2651 ipa = kvm_vcpu_get_fault_ipa(&hyp_vcpu->vcpu);
2652 ipa |= kvm_vcpu_get_hfar(&hyp_vcpu->vcpu) & FAR_MASK;
2653 end = ipa + kvm_vcpu_dabt_get_as(&hyp_vcpu->vcpu) - 1;
2654
2655 guest_lock_component(vm);
2656 ret = __check_ioguard_page(hyp_vcpu, ipa);
2657 if ((end & PAGE_MASK) != (ipa & PAGE_MASK))
2658 ret &= __check_ioguard_page(hyp_vcpu, end);
2659 guest_unlock_component(vm);
2660
2661 return ret;
2662 }
2663
__pkvm_remove_ioguard_page(struct pkvm_hyp_vm * vm,u64 ipa)2664 static int __pkvm_remove_ioguard_page(struct pkvm_hyp_vm *vm, u64 ipa)
2665 {
2666 int ret;
2667 kvm_pte_t pte;
2668 s8 level;
2669
2670 hyp_assert_lock_held(&vm->pgtable_lock);
2671
2672 if (!test_bit(KVM_ARCH_FLAG_MMIO_GUARD, &vm->kvm.arch.flags))
2673 return -EINVAL;
2674
2675 if (!PAGE_ALIGNED(ipa))
2676 return -EINVAL;
2677
2678 ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level);
2679 if (ret)
2680 return ret;
2681
2682 if (BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(level)) == PAGE_SIZE &&
2683 pte == KVM_INVALID_PTE_MMIO_NOTE)
2684 return kvm_pgtable_stage2_unmap(&vm->pgt, ipa, PAGE_SIZE);
2685
2686 return kvm_pte_valid(pte) ? -EEXIST : -EINVAL;
2687 }
2688
__pkvm_install_guest_mmio(struct pkvm_hyp_vcpu * hyp_vcpu,u64 pfn,u64 gfn)2689 int __pkvm_install_guest_mmio(struct pkvm_hyp_vcpu *hyp_vcpu, u64 pfn, u64 gfn)
2690 {
2691 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
2692 u64 ipa = gfn << PAGE_SHIFT;
2693 int ret;
2694
2695 hyp_lock_component();
2696 guest_lock_component(vm);
2697 ret = __pkvm_remove_ioguard_page(vm, ipa);
2698 if (ret)
2699 goto out_unlock;
2700 ret = pkvm_hyp_donate_guest(hyp_vcpu, pfn, gfn);
2701 out_unlock:
2702 guest_unlock_component(vm);
2703 hyp_unlock_component();
2704 return ret;
2705 }
2706
host_stage2_get_leaf(phys_addr_t phys,kvm_pte_t * ptep,s8 * level)2707 int host_stage2_get_leaf(phys_addr_t phys, kvm_pte_t *ptep, s8 *level)
2708 {
2709 int ret;
2710
2711 host_lock_component();
2712 ret = kvm_pgtable_get_leaf(&host_mmu.pgt, phys, ptep, level);
2713 host_unlock_component();
2714
2715 return ret;
2716 }
2717
__pkvm_ptdump_get_host_config(enum pkvm_ptdump_ops op)2718 static u64 __pkvm_ptdump_get_host_config(enum pkvm_ptdump_ops op)
2719 {
2720 u64 ret = 0;
2721
2722 host_lock_component();
2723 if (op == PKVM_PTDUMP_GET_LEVEL)
2724 ret = host_mmu.pgt.start_level;
2725 else
2726 ret = host_mmu.pgt.ia_bits;
2727 host_unlock_component();
2728
2729 return ret;
2730 }
2731
__pkvm_ptdump_get_guest_config(pkvm_handle_t handle,enum pkvm_ptdump_ops op)2732 static u64 __pkvm_ptdump_get_guest_config(pkvm_handle_t handle, enum pkvm_ptdump_ops op)
2733 {
2734 struct pkvm_hyp_vm *vm;
2735 u64 ret = 0;
2736
2737 vm = get_pkvm_hyp_vm(handle);
2738 if (!vm)
2739 return -EINVAL;
2740
2741 if (op == PKVM_PTDUMP_GET_LEVEL)
2742 ret = vm->pgt.start_level;
2743 else
2744 ret = vm->pgt.ia_bits;
2745
2746 put_pkvm_hyp_vm(vm);
2747 return ret;
2748 }
2749
__pkvm_ptdump_get_config(pkvm_handle_t handle,enum pkvm_ptdump_ops op)2750 u64 __pkvm_ptdump_get_config(pkvm_handle_t handle, enum pkvm_ptdump_ops op)
2751 {
2752 if (!handle)
2753 return __pkvm_ptdump_get_host_config(op);
2754
2755 return __pkvm_ptdump_get_guest_config(handle, op);
2756 }
2757
pkvm_ptdump_walker(const struct kvm_pgtable_visit_ctx * ctx,enum kvm_pgtable_walk_flags visit)2758 static int pkvm_ptdump_walker(const struct kvm_pgtable_visit_ctx *ctx,
2759 enum kvm_pgtable_walk_flags visit)
2760 {
2761 struct pkvm_ptdump_log_hdr **log_hdr = ctx->arg;
2762 ssize_t avail_space = PAGE_SIZE - (*log_hdr)->w_index - sizeof(struct pkvm_ptdump_log_hdr);
2763 struct pkvm_ptdump_log *log;
2764
2765 if (avail_space < sizeof(struct pkvm_ptdump_log)) {
2766 if ((*log_hdr)->pfn_next == INVALID_PTDUMP_PFN)
2767 return -ENOMEM;
2768
2769 *log_hdr = hyp_phys_to_virt(hyp_pfn_to_phys((*log_hdr)->pfn_next));
2770 WARN_ON((*log_hdr)->w_index);
2771 }
2772
2773 log = (struct pkvm_ptdump_log *)((void *)*log_hdr + (*log_hdr)->w_index +
2774 sizeof(struct pkvm_ptdump_log_hdr));
2775 log->pfn = ctx->addr >> PAGE_SHIFT;
2776 log->valid = ctx->old & PTE_VALID;
2777 log->r = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R, ctx->old);
2778 log->w = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W, ctx->old);
2779 log->xn = FIELD_GET(KVM_PTE_LEAF_ATTR_HI_S2_XN, ctx->old);
2780 log->table = FIELD_GET(KVM_PTE_TYPE, ctx->old);
2781 log->level = ctx->level;
2782 log->page_state = FIELD_GET(PKVM_PAGE_STATE_PROT_MASK, ctx->old);
2783
2784 (*log_hdr)->w_index += sizeof(struct pkvm_ptdump_log);
2785 return 0;
2786 }
2787
pkvm_ptdump_teardown_log(struct pkvm_ptdump_log_hdr * log_hva,struct pkvm_ptdump_log_hdr * cur)2788 static void pkvm_ptdump_teardown_log(struct pkvm_ptdump_log_hdr *log_hva,
2789 struct pkvm_ptdump_log_hdr *cur)
2790 {
2791 struct pkvm_ptdump_log_hdr *tmp, *log = (void *)kern_hyp_va(log_hva);
2792 bool next_log_invalid = false;
2793
2794 while (log != cur && !next_log_invalid) {
2795 next_log_invalid = log->pfn_next == INVALID_PTDUMP_PFN;
2796 tmp = hyp_phys_to_virt(hyp_pfn_to_phys(log->pfn_next));
2797 WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(log), 1));
2798 log = tmp;
2799 }
2800 }
2801
pkvm_ptdump_setup_log(struct pkvm_ptdump_log_hdr * log_hva)2802 static int pkvm_ptdump_setup_log(struct pkvm_ptdump_log_hdr *log_hva)
2803 {
2804 int ret;
2805 struct pkvm_ptdump_log_hdr *log = (void *)kern_hyp_va(log_hva);
2806
2807 if (!PAGE_ALIGNED(log))
2808 return -EINVAL;
2809
2810 for (;;) {
2811 ret = __pkvm_host_donate_hyp(hyp_virt_to_pfn(log), 1);
2812 if (ret) {
2813 pkvm_ptdump_teardown_log(log_hva, log);
2814 return ret;
2815 }
2816
2817 log->w_index = 0;
2818 if (log->pfn_next == INVALID_PTDUMP_PFN)
2819 break;
2820
2821 log = hyp_phys_to_virt(hyp_pfn_to_phys(log->pfn_next));
2822 }
2823
2824 return 0;
2825 }
2826
pkvm_ptdump_walk_host(struct kvm_pgtable_walker * walker)2827 static int pkvm_ptdump_walk_host(struct kvm_pgtable_walker *walker)
2828 {
2829 int ret;
2830
2831 host_lock_component();
2832 ret = kvm_pgtable_walk(&host_mmu.pgt, 0, BIT(host_mmu.pgt.ia_bits), walker);
2833 host_unlock_component();
2834
2835 return ret;
2836 }
2837
pkvm_ptdump_walk_guest(struct pkvm_hyp_vm * vm,struct kvm_pgtable_walker * walker)2838 static int pkvm_ptdump_walk_guest(struct pkvm_hyp_vm *vm, struct kvm_pgtable_walker *walker)
2839 {
2840 int ret;
2841
2842 guest_lock_component(vm);
2843
2844 ret = kvm_pgtable_walk(&vm->pgt, 0, BIT(vm->pgt.ia_bits), walker);
2845
2846 guest_unlock_component(vm);
2847
2848 return ret;
2849 }
2850
__pkvm_ptdump_walk_range(pkvm_handle_t handle,struct pkvm_ptdump_log_hdr * log)2851 u64 __pkvm_ptdump_walk_range(pkvm_handle_t handle, struct pkvm_ptdump_log_hdr *log)
2852 {
2853 struct pkvm_hyp_vm *vm;
2854 int ret;
2855 struct pkvm_ptdump_log_hdr *log_hyp = kern_hyp_va(log);
2856 struct kvm_pgtable_walker walker = {
2857 .cb = pkvm_ptdump_walker,
2858 .flags = KVM_PGTABLE_WALK_LEAF,
2859 .arg = &log_hyp,
2860 };
2861
2862 ret = pkvm_ptdump_setup_log(log);
2863 if (ret)
2864 return ret;
2865
2866 if (!handle)
2867 ret = pkvm_ptdump_walk_host(&walker);
2868 else {
2869 vm = get_pkvm_hyp_vm(handle);
2870 if (!vm) {
2871 ret = -EINVAL;
2872 goto teardown;
2873 }
2874
2875 ret = pkvm_ptdump_walk_guest(vm, &walker);
2876 put_pkvm_hyp_vm(vm);
2877 }
2878 teardown:
2879 pkvm_ptdump_teardown_log(log, NULL);
2880 return ret;
2881 }
2882
2883 /* Return PA for an owned guest IPA or request it, and repeat the guest HVC */
pkvm_get_guest_pa_request(struct pkvm_hyp_vcpu * hyp_vcpu,u64 ipa,size_t ipa_size_request,u64 * out_pa,s8 * out_level)2884 int pkvm_get_guest_pa_request(struct pkvm_hyp_vcpu *hyp_vcpu, u64 ipa,
2885 size_t ipa_size_request, u64 *out_pa, s8 *out_level)
2886 {
2887 struct kvm_hyp_req *req;
2888 kvm_pte_t pte;
2889 enum pkvm_page_state state;
2890 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
2891
2892 guest_lock_component(vm);
2893 WARN_ON(kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, out_level));
2894 guest_unlock_component(vm);
2895 if (!kvm_pte_valid(pte)) {
2896 /* Page not mapped, create a request*/
2897 req = pkvm_hyp_req_reserve(hyp_vcpu, KVM_HYP_REQ_TYPE_MAP);
2898 if (!req)
2899 return -ENOMEM;
2900
2901 req->map.guest_ipa = ipa;
2902 req->map.size = ipa_size_request;
2903 return -ENOENT;
2904 }
2905
2906 state = pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte));
2907 if (state != PKVM_PAGE_OWNED)
2908 return -EPERM;
2909
2910 *out_pa = kvm_pte_to_phys(pte);
2911 *out_pa |= ipa & (kvm_granule_size(*out_level) - 1) & PAGE_MASK;
2912 return 0;
2913 }
2914
2915 /* Get a PA and use the page for DMA */
pkvm_get_guest_pa_request_use_dma(struct pkvm_hyp_vcpu * hyp_vcpu,u64 ipa,size_t ipa_size_request,u64 * out_pa,s8 * level)2916 int pkvm_get_guest_pa_request_use_dma(struct pkvm_hyp_vcpu *hyp_vcpu, u64 ipa,
2917 size_t ipa_size_request, u64 *out_pa, s8 *level)
2918 {
2919 int ret;
2920
2921 host_lock_component();
2922 ret = pkvm_get_guest_pa_request(hyp_vcpu, ipa, ipa_size_request,
2923 out_pa, level);
2924 if (ret)
2925 goto out_ret;
2926 WARN_ON(__pkvm_use_dma_locked(*out_pa, kvm_granule_size(*level), hyp_vcpu));
2927 out_ret:
2928 host_unlock_component();
2929 return ret;
2930 }
2931
2932 #ifdef CONFIG_PKVM_SELFTESTS
2933 struct pkvm_expected_state {
2934 enum pkvm_page_state host;
2935 enum pkvm_page_state hyp;
2936 enum pkvm_page_state guest[2]; /* [ gfn, gfn + 1 ] */
2937 };
2938
2939 static struct pkvm_expected_state selftest_state;
2940 static struct hyp_page *selftest_page;
2941
2942 static struct pkvm_hyp_vm selftest_vm = {
2943 .kvm = {
2944 .arch = {
2945 .mmu = {
2946 .arch = &selftest_vm.kvm.arch,
2947 .pgt = &selftest_vm.pgt,
2948 },
2949 },
2950 },
2951 };
2952
2953 static struct pkvm_hyp_vcpu selftest_vcpu = {
2954 .vcpu = {
2955 .arch = {
2956 .hw_mmu = &selftest_vm.kvm.arch.mmu,
2957 },
2958 .kvm = &selftest_vm.kvm,
2959 },
2960 };
2961
init_selftest_vm(void * virt)2962 static void init_selftest_vm(void *virt)
2963 {
2964 struct hyp_page *p = hyp_virt_to_page(virt);
2965 int i;
2966
2967 selftest_vm.kvm.arch.mmu.vtcr = host_mmu.arch.mmu.vtcr;
2968 WARN_ON(kvm_guest_prepare_stage2(&selftest_vm, virt));
2969
2970 for (i = 0; i < pkvm_selftest_pages(); i++) {
2971 if (p[i].refcount)
2972 continue;
2973 p[i].refcount = 1;
2974 hyp_put_page(&selftest_vm.pool, hyp_page_to_virt(&p[i]));
2975 }
2976 }
2977
teardown_selftest_vm(void)2978 static void teardown_selftest_vm(void)
2979 {
2980 destroy_hyp_vm_pgt(&selftest_vm);
2981 }
2982
selftest_ipa(void)2983 static u64 selftest_ipa(void)
2984 {
2985 return BIT(selftest_vm.pgt.ia_bits - 1);
2986 }
2987
assert_page_state(void)2988 static void assert_page_state(void)
2989 {
2990 void *virt = hyp_page_to_virt(selftest_page);
2991 u64 size = PAGE_SIZE << selftest_page->order;
2992 struct pkvm_hyp_vcpu *vcpu = &selftest_vcpu;
2993 u64 phys = hyp_virt_to_phys(virt);
2994 u64 ipa[2] = { selftest_ipa(), selftest_ipa() + PAGE_SIZE };
2995
2996 host_lock_component();
2997 WARN_ON(__host_check_page_state_range(phys, size, selftest_state.host));
2998 host_unlock_component();
2999
3000 hyp_lock_component();
3001 WARN_ON(__hyp_check_page_state_range((u64)virt, size, selftest_state.hyp));
3002 hyp_unlock_component();
3003
3004 guest_lock_component(&selftest_vm);
3005 WARN_ON(__guest_check_page_state_range(vcpu, ipa[0], size, selftest_state.guest[0]));
3006 WARN_ON(__guest_check_page_state_range(vcpu, ipa[1], size, selftest_state.guest[1]));
3007 guest_unlock_component(&selftest_vm);
3008 }
3009
3010 #define assert_transition_res(res, fn, ...) \
3011 do { \
3012 WARN_ON(fn(__VA_ARGS__) != res); \
3013 assert_page_state(); \
3014 } while (0)
3015
pkvm_ownership_selftest(void * base)3016 void pkvm_ownership_selftest(void *base)
3017 {
3018 enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_RWX;
3019 void *virt = hyp_alloc_pages(&host_s2_pool, 0);
3020 struct pkvm_hyp_vcpu *vcpu = &selftest_vcpu;
3021 struct pkvm_hyp_vm *vm = &selftest_vm;
3022 u64 phys, size, pfn, gfn, pa;
3023
3024 WARN_ON(!virt);
3025 selftest_page = hyp_virt_to_page(virt);
3026 selftest_page->refcount = 0;
3027 init_selftest_vm(base);
3028
3029 size = PAGE_SIZE << selftest_page->order;
3030 phys = hyp_virt_to_phys(virt);
3031 pfn = hyp_phys_to_pfn(phys);
3032 gfn = hyp_phys_to_pfn(selftest_ipa());
3033
3034 selftest_state.host = PKVM_NOPAGE;
3035 selftest_state.hyp = PKVM_PAGE_OWNED;
3036 selftest_state.guest[0] = selftest_state.guest[1] = PKVM_NOPAGE;
3037 assert_page_state();
3038 assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1);
3039 assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn);
3040 assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn);
3041 assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1);
3042 assert_transition_res(-EPERM, __pkvm_host_unshare_ffa, pfn, 1);
3043 assert_transition_res(-EPERM, hyp_pin_shared_mem, virt, virt + size);
3044 assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, vcpu, prot, 1);
3045 assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, vm, 1);
3046 assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn, vcpu, 1);
3047
3048 selftest_state.host = PKVM_PAGE_OWNED;
3049 selftest_state.hyp = PKVM_NOPAGE;
3050 assert_transition_res(0, __pkvm_hyp_donate_host, pfn, 1);
3051 assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);
3052 assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn);
3053 assert_transition_res(-EPERM, __pkvm_host_unshare_ffa, pfn, 1);
3054 assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, vm, 1);
3055 assert_transition_res(-EPERM, hyp_pin_shared_mem, virt, virt + size);
3056
3057 selftest_state.host = PKVM_PAGE_SHARED_OWNED;
3058 selftest_state.hyp = PKVM_PAGE_SHARED_BORROWED;
3059 assert_transition_res(0, __pkvm_host_share_hyp, pfn);
3060 assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn);
3061 assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1);
3062 assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1);
3063 assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);
3064 assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, vcpu, prot, 1);
3065 assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, vm, 1);
3066 assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn, vcpu, 1);
3067
3068 assert_transition_res(0, hyp_pin_shared_mem, virt, virt + size);
3069 assert_transition_res(0, hyp_pin_shared_mem, virt, virt + size);
3070 hyp_unpin_shared_mem(virt, virt + size);
3071 WARN_ON(hyp_page_count(virt) != 1);
3072 assert_transition_res(-EBUSY, __pkvm_host_unshare_hyp, pfn);
3073 assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn);
3074 assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1);
3075 assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1);
3076 assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);
3077 assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, vcpu, prot, 1);
3078 assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, vm, 1);
3079 assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn, vcpu, 1);
3080
3081 hyp_unpin_shared_mem(virt, virt + size);
3082 assert_page_state();
3083 WARN_ON(hyp_page_count(virt));
3084
3085 selftest_state.host = PKVM_PAGE_OWNED;
3086 selftest_state.hyp = PKVM_NOPAGE;
3087 assert_transition_res(0, __pkvm_host_unshare_hyp, pfn);
3088
3089 selftest_state.host = PKVM_PAGE_SHARED_OWNED;
3090 selftest_state.hyp = PKVM_NOPAGE;
3091 assert_transition_res(0, __pkvm_host_share_ffa, pfn, 1);
3092 assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1);
3093 assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1);
3094 assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn);
3095 assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn);
3096 assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);
3097 assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, vcpu, prot, 1);
3098 assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, vm, 1);
3099 assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn, vcpu, 1);
3100 assert_transition_res(-EPERM, hyp_pin_shared_mem, virt, virt + size);
3101
3102 selftest_state.host = PKVM_PAGE_OWNED;
3103 selftest_state.hyp = PKVM_NOPAGE;
3104 assert_transition_res(0, __pkvm_host_unshare_ffa, pfn, 1);
3105 assert_transition_res(-EPERM, __pkvm_host_unshare_ffa, pfn, 1);
3106
3107 selftest_state.host = PKVM_PAGE_SHARED_OWNED;
3108 selftest_state.guest[0] = PKVM_PAGE_SHARED_BORROWED;
3109 assert_transition_res(0, __pkvm_host_share_guest, pfn, gfn, vcpu, prot, 1);
3110 assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, vcpu, prot, 1);
3111 assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1);
3112 assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1);
3113 assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn);
3114 assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn);
3115 assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);
3116 assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn, vcpu, 1);
3117 assert_transition_res(-EPERM, hyp_pin_shared_mem, virt, virt + size);
3118
3119 selftest_state.guest[1] = PKVM_PAGE_SHARED_BORROWED;
3120 assert_transition_res(0, __pkvm_host_share_guest, pfn, gfn + 1, vcpu, prot, 1);
3121 WARN_ON(hyp_virt_to_page(virt)->host_share_guest_count != 2);
3122
3123 selftest_state.guest[0] = PKVM_NOPAGE;
3124 assert_transition_res(0, __pkvm_host_unshare_guest, gfn, vm, 1);
3125
3126 selftest_state.guest[1] = PKVM_NOPAGE;
3127 selftest_state.host = PKVM_PAGE_OWNED;
3128 assert_transition_res(0, __pkvm_host_unshare_guest, gfn + 1, vm, 1);
3129
3130 selftest_vm.kvm.arch.pkvm.enabled = true;
3131 selftest_state.host = PKVM_NOPAGE;
3132 selftest_state.guest[0] = PKVM_PAGE_OWNED;
3133 assert_transition_res(0, __pkvm_host_donate_guest, pfn, gfn, vcpu, 1);
3134 assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn, vcpu, 1);
3135 assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn + 1, vcpu, 1);
3136 assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, vcpu, prot, 1);
3137 assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn + 1, vcpu, prot, 1);
3138 assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1);
3139 assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1);
3140 assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn);
3141 assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn);
3142 assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);
3143
3144 selftest_state.host = PKVM_PAGE_OWNED;
3145 selftest_state.guest[0] = PKVM_NOPAGE;
3146 assert_transition_res(0, __pkvm_guest_relinquish_to_host, vcpu, gfn * PAGE_SIZE, &pa);
3147 WARN_ON(pa != phys);
3148
3149 selftest_state.host = PKVM_NOPAGE;
3150 selftest_state.hyp = PKVM_PAGE_OWNED;
3151 assert_transition_res(0, __pkvm_host_donate_hyp, pfn, 1);
3152
3153 teardown_selftest_vm();
3154 selftest_page->refcount = 1;
3155 hyp_put_page(&host_s2_pool, virt);
3156 }
3157 #endif
3158