1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2020 - Google LLC
4 * Author: Quentin Perret <qperret@google.com>
5 */
6
7 #include <linux/arm_ffa.h>
8 #include <linux/delay.h>
9 #include <linux/init.h>
10 #include <linux/initrd.h>
11 #include <linux/interval_tree_generic.h>
12 #include <linux/io.h>
13 #include <linux/iommu.h>
14 #include <linux/kmemleak.h>
15 #include <linux/kvm_host.h>
16 #include <asm/kvm_mmu.h>
17 #include <linux/memblock.h>
18 #include <linux/mm.h>
19 #include <linux/mutex.h>
20 #include <linux/of_address.h>
21 #include <linux/of_fdt.h>
22 #include <linux/of_reserved_mem.h>
23 #include <linux/platform_device.h>
24 #include <linux/sort.h>
25
26 #include <asm/kvm_host.h>
27 #include <asm/kvm_hyp.h>
28 #include <asm/kvm_mmu.h>
29 #include <asm/kvm_pkvm.h>
30 #include <asm/kvm_pkvm_module.h>
31 #include <asm/patching.h>
32 #include <asm/setup.h>
33
34 #include <kvm/device.h>
35
36 #include <linux/init_syscalls.h>
37 #include <uapi/linux/mount.h>
38
39 #include "hyp_constants.h"
40 #include "hyp_trace.h"
41
42 #define PKVM_DEVICE_ASSIGN_COMPAT "pkvm,device-assignment"
43
44 /*
45 * Retry the VM creation message for the host for a maximul total
46 * amount of times, with sleeps in between. For the first few attempts,
47 * do a faster reschedule instead of a full sleep.
48 */
49 #define VM_AVAILABILITY_FAST_RETRIES 5
50 #define VM_AVAILABILITY_TOTAL_RETRIES 500
51 #define VM_AVAILABILITY_RETRY_SLEEP_MS 10
52
53 DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
54
55 static phys_addr_t pvmfw_base;
56 static phys_addr_t pvmfw_size;
57
58 static struct pkvm_moveable_reg *moveable_regs = kvm_nvhe_sym(pkvm_moveable_regs);
59 static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory);
60 static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr);
61
62 phys_addr_t hyp_mem_base;
63 phys_addr_t hyp_mem_size;
64
65 extern struct pkvm_device *kvm_nvhe_sym(registered_devices);
66 extern u32 kvm_nvhe_sym(registered_devices_nr);
67
cmp_hyp_memblock(const void * p1,const void * p2)68 static int cmp_hyp_memblock(const void *p1, const void *p2)
69 {
70 const struct memblock_region *r1 = p1;
71 const struct memblock_region *r2 = p2;
72
73 return r1->base < r2->base ? -1 : (r1->base > r2->base);
74 }
75
sort_memblock_regions(void)76 static void __init sort_memblock_regions(void)
77 {
78 sort(hyp_memory,
79 *hyp_memblock_nr_ptr,
80 sizeof(struct memblock_region),
81 cmp_hyp_memblock,
82 NULL);
83 }
84
register_memblock_regions(void)85 static int __init register_memblock_regions(void)
86 {
87 struct memblock_region *reg;
88 bool pvmfw_in_mem = false;
89
90 for_each_mem_region(reg) {
91 if (*hyp_memblock_nr_ptr >= HYP_MEMBLOCK_REGIONS)
92 return -ENOMEM;
93
94 hyp_memory[*hyp_memblock_nr_ptr] = *reg;
95 (*hyp_memblock_nr_ptr)++;
96
97 if (!pvmfw_size || pvmfw_in_mem ||
98 !memblock_addrs_overlap(reg->base, reg->size, pvmfw_base, pvmfw_size))
99 continue;
100 /* If the pvmfw region overlaps a memblock, it must be a subset */
101 if (pvmfw_base < reg->base || (pvmfw_base + pvmfw_size) > (reg->base + reg->size))
102 return -EINVAL;
103 pvmfw_in_mem = true;
104 }
105
106 if (pvmfw_size && !pvmfw_in_mem) {
107 if (*hyp_memblock_nr_ptr >= HYP_MEMBLOCK_REGIONS)
108 return -ENOMEM;
109
110 hyp_memory[*hyp_memblock_nr_ptr] = (struct memblock_region) {
111 .base = pvmfw_base,
112 .size = pvmfw_size,
113 .flags = MEMBLOCK_NOMAP,
114 };
115 (*hyp_memblock_nr_ptr)++;
116 }
117 sort_memblock_regions();
118
119 return 0;
120 }
121
cmp_moveable_reg(const void * p1,const void * p2)122 static int cmp_moveable_reg(const void *p1, const void *p2)
123 {
124 const struct pkvm_moveable_reg *r1 = p1;
125 const struct pkvm_moveable_reg *r2 = p2;
126
127 /*
128 * Moveable regions may overlap, so put the largest one first when start
129 * addresses are equal to allow a simpler walk from e.g.
130 * host_stage2_unmap_unmoveable_regs().
131 */
132 if (r1->start < r2->start)
133 return -1;
134 else if (r1->start > r2->start)
135 return 1;
136 else if (r1->size > r2->size)
137 return -1;
138 else if (r1->size < r2->size)
139 return 1;
140 return 0;
141 }
142
sort_moveable_regs(void)143 static void __init sort_moveable_regs(void)
144 {
145 sort(moveable_regs,
146 kvm_nvhe_sym(pkvm_moveable_regs_nr),
147 sizeof(struct pkvm_moveable_reg),
148 cmp_moveable_reg,
149 NULL);
150 }
151
register_moveable_fdt_resource(struct device_node * np,enum pkvm_moveable_reg_type type)152 static int __init register_moveable_fdt_resource(struct device_node *np,
153 enum pkvm_moveable_reg_type type)
154 {
155 struct resource res;
156 u64 start, size;
157 unsigned int j = 0;
158 unsigned int i = kvm_nvhe_sym(pkvm_moveable_regs_nr);
159
160 while(!of_address_to_resource(np, j, &res)) {
161 if (i >= PKVM_NR_MOVEABLE_REGS)
162 return -ENOMEM;
163
164 start = res.start;
165 size = resource_size(&res);
166 if (!PAGE_ALIGNED(start) || !PAGE_ALIGNED(size))
167 return -EINVAL;
168
169 moveable_regs[i].start = start;
170 moveable_regs[i].size = size;
171 moveable_regs[i].type = type;
172 i++;
173 j++;
174 }
175
176 kvm_nvhe_sym(pkvm_moveable_regs_nr) = i;
177 return 0;
178 }
179
register_moveable_regions(void)180 static int __init register_moveable_regions(void)
181 {
182 struct memblock_region *reg;
183 struct device_node *np;
184 int i = 0, ret = 0, idx = 0;
185
186 for_each_mem_region(reg) {
187 if (i >= PKVM_NR_MOVEABLE_REGS)
188 return -ENOMEM;
189 moveable_regs[i].start = reg->base;
190 moveable_regs[i].size = reg->size;
191 moveable_regs[i].type = PKVM_MREG_MEMORY;
192 i++;
193 }
194 kvm_nvhe_sym(pkvm_moveable_regs_nr) = i;
195
196 for_each_compatible_node(np, NULL, "pkvm,protected-region") {
197 ret = register_moveable_fdt_resource(np, PKVM_MREG_PROTECTED_RANGE);
198 if (ret)
199 goto out_fail;
200 }
201
202 for_each_compatible_node(np, NULL, PKVM_DEVICE_ASSIGN_COMPAT) {
203 struct of_phandle_args args;
204
205 while (!of_parse_phandle_with_fixed_args(np, "devices", 1, idx, &args)) {
206 idx++;
207 ret = register_moveable_fdt_resource(args.np, PKVM_MREG_ASSIGN_MMIO);
208 of_node_put(args.np);
209 if (ret)
210 goto out_fail;
211 }
212 }
213
214 sort_moveable_regs();
215
216 return ret;
217 out_fail:
218 of_node_put(np);
219 kvm_nvhe_sym(pkvm_moveable_regs_nr) = 0;
220 return ret;
221 }
222
early_hyp_lm_size_mb_cfg(char * arg)223 static int __init early_hyp_lm_size_mb_cfg(char *arg)
224 {
225 return kstrtoull(arg, 10, &kvm_nvhe_sym(hyp_lm_size_mb));
226 }
227 early_param("kvm-arm.hyp_lm_size_mb", early_hyp_lm_size_mb_cfg);
228
229 DEFINE_STATIC_KEY_FALSE(kvm_ffa_unmap_on_lend);
230
kvm_hyp_reserve(void)231 void __init kvm_hyp_reserve(void)
232 {
233 u64 hyp_mem_pages = 0;
234 int ret;
235
236 if (!is_hyp_mode_available() || is_kernel_in_hyp_mode())
237 return;
238
239 if (kvm_get_mode() != KVM_MODE_PROTECTED)
240 return;
241
242 ret = register_memblock_regions();
243 if (ret) {
244 *hyp_memblock_nr_ptr = 0;
245 kvm_err("Failed to register hyp memblocks: %d\n", ret);
246 return;
247 }
248
249 ret = register_moveable_regions();
250 if (ret) {
251 *hyp_memblock_nr_ptr = 0;
252 kvm_err("Failed to register pkvm moveable regions: %d\n", ret);
253 return;
254 }
255
256 hyp_mem_pages += hyp_s1_pgtable_pages();
257 hyp_mem_pages += host_s2_pgtable_pages();
258 hyp_mem_pages += hyp_vm_table_pages();
259 hyp_mem_pages += hyp_vmemmap_pages(STRUCT_HYP_PAGE_SIZE);
260 hyp_mem_pages += pkvm_selftest_pages();
261 hyp_mem_pages += hyp_ffa_proxy_pages();
262
263 if (static_branch_unlikely(&kvm_ffa_unmap_on_lend))
264 hyp_mem_pages += KVM_FFA_SPM_HANDLE_NR_PAGES;
265
266 hyp_mem_pages++; /* hyp_ppages */
267
268 /*
269 * Try to allocate a PMD-aligned region to reduce TLB pressure once
270 * this is unmapped from the host stage-2, and fallback to PAGE_SIZE.
271 */
272 hyp_mem_size = hyp_mem_pages << PAGE_SHIFT;
273 hyp_mem_base = memblock_phys_alloc(ALIGN(hyp_mem_size, PMD_SIZE),
274 PMD_SIZE);
275 if (!hyp_mem_base)
276 hyp_mem_base = memblock_phys_alloc(hyp_mem_size, PAGE_SIZE);
277 else
278 hyp_mem_size = ALIGN(hyp_mem_size, PMD_SIZE);
279
280 if (!hyp_mem_base) {
281 kvm_err("Failed to reserve hyp memory\n");
282 return;
283 }
284
285 kvm_info("Reserved %lld MiB at 0x%llx\n", hyp_mem_size >> 20,
286 hyp_mem_base);
287 }
288
289
__pkvm_vcpu_hyp_created(struct kvm_vcpu * vcpu)290 static void __pkvm_vcpu_hyp_created(struct kvm_vcpu *vcpu)
291 {
292 if (kvm_vm_is_protected(vcpu->kvm))
293 vcpu->arch.sve_state = NULL;
294
295 vcpu_set_flag(vcpu, VCPU_PKVM_FINALIZED);
296 }
297
__pkvm_create_hyp_vcpu(struct kvm_vcpu * host_vcpu)298 static int __pkvm_create_hyp_vcpu(struct kvm_vcpu *host_vcpu)
299 {
300 pkvm_handle_t handle = host_vcpu->kvm->arch.pkvm.handle;
301 struct kvm_hyp_req *hyp_reqs;
302 int ret;
303
304 init_hyp_stage2_memcache(&host_vcpu->arch.stage2_mc);
305
306 hyp_reqs = (struct kvm_hyp_req *)__get_free_page(GFP_KERNEL_ACCOUNT);
307 if (!hyp_reqs)
308 return -ENOMEM;
309
310 ret = kvm_share_hyp(hyp_reqs, hyp_reqs + 1);
311 if (ret)
312 goto err_free_reqs;
313 host_vcpu->arch.hyp_reqs = hyp_reqs;
314
315 ret = kvm_call_refill_hyp_nvhe(__pkvm_init_vcpu, handle, host_vcpu);
316 if (!ret) {
317 __pkvm_vcpu_hyp_created(host_vcpu);
318 return 0;
319 }
320
321 kvm_unshare_hyp(hyp_reqs, hyp_reqs + 1);
322 err_free_reqs:
323 free_page((unsigned long)hyp_reqs);
324 host_vcpu->arch.hyp_reqs = NULL;
325
326 return ret;
327 }
328
329 /*
330 * Handle split huge pages which have not been reported to the kvm_pinned_page tree.
331 */
pkvm_call_hyp_nvhe_ppage(struct kvm_pinned_page * ppage,int (* call_hyp_nvhe)(u64 pfn,u64 gfn,u8 order,void * args),void * args)332 static int pkvm_call_hyp_nvhe_ppage(struct kvm_pinned_page *ppage,
333 int (*call_hyp_nvhe)(u64 pfn, u64 gfn, u8 order, void *args),
334 void *args)
335 {
336 size_t page_size, size = PAGE_SIZE << ppage->order;
337 u64 pfn = page_to_pfn(ppage->page);
338 u8 order = ppage->order;
339 u64 gfn = ppage->ipa >> PAGE_SHIFT;
340
341 while (size) {
342 int err = call_hyp_nvhe(pfn, gfn, order, args);
343
344 switch (err) {
345 case -E2BIG:
346 if (order)
347 order = 0;
348 else
349 /* Something is really wrong ... */
350 return -EINVAL;
351 break;
352 case 0:
353 page_size = PAGE_SIZE << order;
354 gfn += 1 << order;
355 pfn += 1 << order;
356
357 if (page_size > size)
358 return -EINVAL;
359
360 size -= page_size;
361 break;
362 default:
363 return err;
364 }
365 }
366
367 return 0;
368 }
369
__reclaim_dying_guest_page_call(u64 pfn,u64 gfn,u8 order,void * args)370 static int __reclaim_dying_guest_page_call(u64 pfn, u64 gfn, u8 order, void *args)
371 {
372 struct kvm *host_kvm = args;
373
374 return kvm_call_hyp_nvhe(__pkvm_reclaim_dying_guest_page,
375 host_kvm->arch.pkvm.handle,
376 pfn, gfn, order);
377 }
378
379 /* __pkvm_notify_guest_vm_avail_retry - notify secure of the VM state change
380 * @host_kvm: the kvm structure
381 * @availability_msg: the VM state that will be notified
382 *
383 * Returns: 0 when the notification is sent with success, -EINTR or -EAGAIN if
384 * the destruction notification is interrupted and retries exceeded and
385 * a positive value indicating the remaining jiffies when the creation
386 * notification is sent but interrupted.
387 */
__pkvm_notify_guest_vm_avail_retry(struct kvm * host_kvm,u32 availability_msg)388 static int __pkvm_notify_guest_vm_avail_retry(struct kvm *host_kvm, u32 availability_msg)
389 {
390 int ret, retries;
391 long timeout;
392
393 if (!host_kvm->arch.pkvm.ffa_support)
394 return 0;
395
396 for (retries = 0; retries < VM_AVAILABILITY_TOTAL_RETRIES; retries++) {
397 ret = kvm_call_hyp_nvhe(__pkvm_notify_guest_vm_avail,
398 host_kvm->arch.pkvm.handle);
399 if (!ret)
400 return 0;
401 else if (ret != -EINTR && ret != -EAGAIN)
402 return ret;
403
404 if (retries < VM_AVAILABILITY_FAST_RETRIES) {
405 cond_resched();
406 } else if (availability_msg == FFA_VM_DESTRUCTION_MSG) {
407 msleep(VM_AVAILABILITY_RETRY_SLEEP_MS);
408 } else {
409 timeout = msecs_to_jiffies(VM_AVAILABILITY_RETRY_SLEEP_MS);
410 timeout = schedule_timeout_killable(timeout);
411 if (timeout) {
412 /*
413 * The timer did not expire,
414 * most likely because the
415 * process was killed.
416 */
417 return ret;
418 }
419 }
420 }
421
422 return ret;
423 }
424
__pkvm_destroy_hyp_vm(struct kvm * host_kvm)425 static void __pkvm_destroy_hyp_vm(struct kvm *host_kvm)
426 {
427 struct mm_struct *mm = current->mm;
428 struct kvm_pinned_page *ppage;
429 struct kvm_vcpu *host_vcpu;
430 unsigned long nr_busy;
431 unsigned long pages;
432 unsigned long idx;
433 int ret, notify_status;
434
435 if (!pkvm_is_hyp_created(host_kvm))
436 goto out_free;
437
438 WARN_ON(kvm_call_hyp_nvhe(__pkvm_start_teardown_vm, host_kvm->arch.pkvm.handle));
439
440 retry:
441 pages = 0;
442 nr_busy = 0;
443 ppage = kvm_pinned_pages_iter_first(&host_kvm->arch.pkvm.pinned_pages, 0, ~(0UL));
444 while (ppage) {
445 struct kvm_pinned_page *next;
446
447 ret = pkvm_call_hyp_nvhe_ppage(ppage, __reclaim_dying_guest_page_call,
448 host_kvm);
449 cond_resched();
450 if (ret == -EBUSY) {
451 nr_busy++;
452 next = kvm_pinned_pages_iter_next(ppage, 0, ~(0UL));
453 ppage = next;
454 continue;
455 }
456 WARN_ON(ret);
457
458 unpin_user_pages_dirty_lock(&ppage->page, 1, true);
459 next = kvm_pinned_pages_iter_next(ppage, 0, ~(0UL));
460 kvm_pinned_pages_remove(ppage, &host_kvm->arch.pkvm.pinned_pages);
461 pages += 1 << ppage->order;
462 kfree(ppage);
463 ppage = next;
464 }
465
466 account_locked_vm(mm, pages, false);
467
468 notify_status = __pkvm_notify_guest_vm_avail_retry(host_kvm, FFA_VM_DESTRUCTION_MSG);
469 if (nr_busy) {
470 do {
471 ret = kvm_call_hyp_nvhe(__pkvm_reclaim_dying_guest_ffa_resources,
472 host_kvm->arch.pkvm.handle);
473 WARN_ON(ret && ret != -EAGAIN);
474
475 if (notify_status == -EINTR || notify_status == -EAGAIN)
476 notify_status = __pkvm_notify_guest_vm_avail_retry(
477 host_kvm, FFA_VM_DESTRUCTION_MSG);
478 cond_resched();
479 } while (ret == -EAGAIN);
480 goto retry;
481 }
482
483 WARN_ON(kvm_call_hyp_nvhe(__pkvm_finalize_teardown_vm, host_kvm->arch.pkvm.handle));
484
485 out_free:
486 host_kvm->arch.pkvm.handle = 0;
487
488 atomic64_sub(host_kvm->arch.pkvm.stage2_teardown_mc.nr_pages << PAGE_SHIFT,
489 &host_kvm->stat.protected_hyp_mem);
490 free_hyp_memcache(&host_kvm->arch.pkvm.stage2_teardown_mc);
491
492 kvm_iommu_guest_free_mc(&host_kvm->arch.pkvm.teardown_iommu_mc);
493
494 kvm_for_each_vcpu(idx, host_vcpu, host_kvm) {
495 struct kvm_hyp_req *hyp_reqs = host_vcpu->arch.hyp_reqs;
496
497 if (!hyp_reqs)
498 continue;
499
500 kvm_unshare_hyp(hyp_reqs, hyp_reqs + 1);
501 host_vcpu->arch.hyp_reqs = NULL;
502 free_page((unsigned long)hyp_reqs);
503
504 kvm_iommu_guest_free_mc(&host_vcpu->arch.iommu_mc);
505 }
506 }
507
508 /*
509 * Allocates and donates memory for hypervisor VM structs at EL2.
510 *
511 * Allocates space for the VM state, which includes the hyp vm as well as
512 * the hyp vcpus.
513 *
514 * Stores an opaque handler in the kvm struct for future reference.
515 *
516 * Return 0 on success, negative error code on failure.
517 */
__pkvm_create_hyp_vm(struct kvm * host_kvm)518 static int __pkvm_create_hyp_vm(struct kvm *host_kvm)
519 {
520 size_t pgd_sz;
521 void *pgd;
522 int ret;
523
524 if (host_kvm->created_vcpus < 1)
525 return -EINVAL;
526
527 pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.mmu.vtcr);
528
529 /*
530 * The PGD pages will be reclaimed using a hyp_memcache which implies
531 * page granularity. So, use alloc_pages_exact() to get individual
532 * refcounts.
533 */
534 pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT);
535 if (!pgd)
536 return -ENOMEM;
537 atomic64_add(pgd_sz, &host_kvm->stat.protected_hyp_mem);
538
539 init_hyp_stage2_memcache(&host_kvm->arch.pkvm.stage2_teardown_mc);
540
541 /* Donate the VM memory to hyp and let hyp initialize it. */
542 ret = kvm_call_refill_hyp_nvhe(__pkvm_init_vm, host_kvm, pgd);
543 if (ret < 0)
544 goto free_pgd;
545
546 WRITE_ONCE(host_kvm->arch.pkvm.handle, ret);
547
548 kvm_account_pgtable_pages(pgd, pgd_sz >> PAGE_SHIFT);
549
550 return __pkvm_notify_guest_vm_avail_retry(host_kvm, FFA_VM_CREATION_MSG);
551 free_pgd:
552 free_pages_exact(pgd, pgd_sz);
553 atomic64_sub(pgd_sz, &host_kvm->stat.protected_hyp_mem);
554
555 return ret;
556 }
557
pkvm_is_hyp_created(struct kvm * host_kvm)558 bool pkvm_is_hyp_created(struct kvm *host_kvm)
559 {
560 return READ_ONCE(host_kvm->arch.pkvm.handle);
561 }
562
pkvm_create_hyp_vm(struct kvm * host_kvm)563 int pkvm_create_hyp_vm(struct kvm *host_kvm)
564 {
565 int ret = 0;
566
567 mutex_lock(&host_kvm->arch.config_lock);
568 if (!pkvm_is_hyp_created(host_kvm))
569 ret = __pkvm_create_hyp_vm(host_kvm);
570 mutex_unlock(&host_kvm->arch.config_lock);
571
572 return ret;
573 }
574
pkvm_create_hyp_vcpu(struct kvm_vcpu * vcpu)575 int pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu)
576 {
577 int ret = 0;
578
579 mutex_lock(&vcpu->kvm->arch.config_lock);
580 if (!vcpu_get_flag(vcpu, VCPU_PKVM_FINALIZED))
581 ret = __pkvm_create_hyp_vcpu(vcpu);
582 mutex_unlock(&vcpu->kvm->arch.config_lock);
583
584 return ret;
585 }
586
pkvm_destroy_hyp_vm(struct kvm * host_kvm)587 void pkvm_destroy_hyp_vm(struct kvm *host_kvm)
588 {
589 mutex_lock(&host_kvm->arch.config_lock);
590 __pkvm_destroy_hyp_vm(host_kvm);
591 mutex_unlock(&host_kvm->arch.config_lock);
592 }
593
pkvm_init_host_vm(struct kvm * host_kvm,unsigned long type)594 int pkvm_init_host_vm(struct kvm *host_kvm, unsigned long type)
595 {
596 if (!(type & KVM_VM_TYPE_ARM_PROTECTED))
597 return 0;
598
599 if (!is_protected_kvm_enabled())
600 return -EINVAL;
601
602 host_kvm->arch.pkvm.pvmfw_load_addr = PVMFW_INVALID_LOAD_ADDR;
603 host_kvm->arch.pkvm.enabled = true;
604 return 0;
605 }
606
pkvm_register_device(struct of_phandle_args * args,struct pkvm_device * dev)607 static int pkvm_register_device(struct of_phandle_args *args,
608 struct pkvm_device *dev)
609 {
610 struct device_node *np = args->np;
611 struct of_phandle_args iommu_spec;
612 u32 group_id = args->args[0];
613 struct resource res;
614 u64 base, size, iommu_id;
615 unsigned int j = 0;
616
617 /* Parse regs */
618 while (!of_address_to_resource(np, j, &res)) {
619 if (j >= PKVM_DEVICE_MAX_RESOURCE)
620 return -E2BIG;
621
622 base = res.start;
623 size = resource_size(&res);
624 if (!PAGE_ALIGNED(base) || !PAGE_ALIGNED(size))
625 return -EINVAL;
626
627 dev->resources[j].base = base;
628 dev->resources[j].size = size;
629 j++;
630 }
631 dev->nr_resources = j;
632
633 /* Parse iommus */
634 j = 0;
635 while (!of_parse_phandle_with_args(np, "iommus",
636 "#iommu-cells",
637 j, &iommu_spec)) {
638 if (iommu_spec.args_count != 1) {
639 kvm_err("[Devices] Unsupported binding for %s, expected <&iommu id>",
640 np->full_name);
641 return -EINVAL;
642 }
643
644 if (j >= PKVM_DEVICE_MAX_RESOURCE) {
645 of_node_put(iommu_spec.np);
646 return -E2BIG;
647 }
648
649 iommu_id = kvm_get_iommu_id_by_of(iommu_spec.np);
650
651 dev->iommus[j].id = iommu_id;
652 dev->iommus[j].endpoint = iommu_spec.args[0];
653 of_node_put(iommu_spec.np);
654 j++;
655 }
656
657 dev->nr_iommus = j;
658 dev->ctxt = NULL;
659 dev->group_id = group_id;
660
661 return 0;
662 }
663
pkvm_init_devices(void)664 static int pkvm_init_devices(void)
665 {
666 struct device_node *np;
667 int idx = 0, ret = 0, dev_cnt = 0;
668 size_t dev_sz;
669 struct pkvm_device *dev_base;
670
671 for_each_compatible_node (np, NULL, PKVM_DEVICE_ASSIGN_COMPAT) {
672 struct of_phandle_args args;
673
674 while (!of_parse_phandle_with_fixed_args(np, "devices", 1, dev_cnt, &args)) {
675 dev_cnt++;
676 of_node_put(args.np);
677 }
678 }
679 kvm_info("Found %d assignable devices", dev_cnt);
680
681 if (!dev_cnt)
682 return 0;
683
684 dev_sz = PAGE_ALIGN(size_mul(sizeof(struct pkvm_device), dev_cnt));
685
686 dev_base = alloc_pages_exact(dev_sz, GFP_KERNEL_ACCOUNT);
687
688 if (!dev_base)
689 return -ENOMEM;
690
691 for_each_compatible_node(np, NULL, PKVM_DEVICE_ASSIGN_COMPAT) {
692 struct of_phandle_args args;
693
694 while (!of_parse_phandle_with_fixed_args(np, "devices", 1, idx, &args)) {
695 ret = pkvm_register_device(&args, &dev_base[idx]);
696 of_node_put(args.np);
697 if (ret) {
698 of_node_put(np);
699 goto out_free;
700 }
701 idx++;
702 }
703 }
704
705 kvm_nvhe_sym(registered_devices_nr) = dev_cnt;
706 kvm_nvhe_sym(registered_devices) = dev_base;
707 return ret;
708
709 out_free:
710 free_pages_exact(dev_base, dev_sz);
711 return ret;
712 }
713
_kvm_host_prot_finalize(void * arg)714 static void __init _kvm_host_prot_finalize(void *arg)
715 {
716 int *err = arg;
717
718 if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize)))
719 WRITE_ONCE(*err, -EINVAL);
720 }
721
pkvm_drop_host_privileges(void)722 static int __init pkvm_drop_host_privileges(void)
723 {
724 int ret = 0;
725
726 /*
727 * Flip the static key upfront as that may no longer be possible
728 * once the host stage 2 is installed.
729 */
730 static_branch_enable(&kvm_protected_mode_initialized);
731 on_each_cpu(_kvm_host_prot_finalize, &ret, 1);
732 return ret;
733 }
734
735 static int __init pkvm_firmware_rmem_clear(void);
736
finalize_pkvm(void)737 static int __init finalize_pkvm(void)
738 {
739 int ret;
740
741 if (!is_protected_kvm_enabled() || !is_kvm_arm_initialised()) {
742 pkvm_firmware_rmem_clear();
743 return 0;
744 }
745
746 /*
747 * Modules can play an essential part in the pKVM protection. All of
748 * them must properly load to enable protected VMs.
749 */
750 if (pkvm_load_early_modules())
751 pkvm_firmware_rmem_clear();
752
753 ret = kvm_iommu_init_driver();
754 if (ret) {
755 pr_err("Failed to init KVM IOMMU driver: %d\n", ret);
756 pkvm_firmware_rmem_clear();
757 }
758
759 ret = pkvm_init_devices();
760 if (ret) {
761 pr_err("Failed to init kvm devices %d\n", ret);
762 pkvm_firmware_rmem_clear();
763 }
764
765 ret = kvm_call_hyp_nvhe(__pkvm_devices_init);
766 if (ret)
767 pr_warn("Assignable devices failed to initialize in the hypervisor %d", ret);
768
769 /*
770 * Exclude HYP sections from kmemleak so that they don't get peeked
771 * at, which would end badly once inaccessible.
772 */
773 kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
774 kmemleak_free_part(__hyp_data_start, __hyp_data_end - __hyp_data_start);
775 kmemleak_free_part(__hyp_rodata_start, __hyp_rodata_end - __hyp_rodata_start);
776 kmemleak_free_part_phys(hyp_mem_base, hyp_mem_size);
777
778 kvm_s2_ptdump_host_create_debugfs();
779
780 ret = pkvm_drop_host_privileges();
781 if (ret) {
782 pr_err("Failed to finalize Hyp protection: %d\n", ret);
783 kvm_iommu_remove_driver();
784 }
785
786 return 0;
787 }
788 device_initcall_sync(finalize_pkvm);
789
pkvm_host_reclaim_page(struct kvm * host_kvm,phys_addr_t ipa)790 void pkvm_host_reclaim_page(struct kvm *host_kvm, phys_addr_t ipa)
791 {
792 struct mm_struct *mm = current->mm;
793 struct kvm_pinned_page *ppage;
794 u8 order;
795
796 write_lock(&host_kvm->mmu_lock);
797 ppage = kvm_pinned_pages_iter_first(&host_kvm->arch.pkvm.pinned_pages,
798 ipa, ipa + PAGE_SIZE - 1);
799 if (ppage) {
800 order = ppage->order;
801 if (!order)
802 kvm_pinned_pages_remove(ppage, &host_kvm->arch.pkvm.pinned_pages);
803 }
804 write_unlock(&host_kvm->mmu_lock);
805
806 if (WARN_ON(!ppage || order))
807 return;
808
809 account_locked_vm(mm, 1 << ppage->order, false);
810 unpin_user_pages_dirty_lock(&ppage->page, 1, true);
811 kfree(ppage);
812 }
813
pkvm_enable_smc_forwarding(struct file * kvm_file)814 int pkvm_enable_smc_forwarding(struct file *kvm_file)
815 {
816 struct kvm *host_kvm;
817
818 if (!file_is_kvm(kvm_file))
819 return -EINVAL;
820
821 if (!kvm_get_kvm_safe(kvm_file->private_data))
822 return -EINVAL;
823
824 host_kvm = kvm_file->private_data;
825 if (!host_kvm)
826 return -EINVAL;
827
828 host_kvm->arch.pkvm.smc_forwarded = true;
829
830 return 0;
831 }
832
pkvm_firmware_rmem_err(struct reserved_mem * rmem,const char * reason)833 static int __init pkvm_firmware_rmem_err(struct reserved_mem *rmem,
834 const char *reason)
835 {
836 phys_addr_t end = rmem->base + rmem->size;
837
838 kvm_err("Ignoring pkvm guest firmware memory reservation [%pa - %pa]: %s\n",
839 &rmem->base, &end, reason);
840 return -EINVAL;
841 }
842
pkvm_firmware_rmem_init(struct reserved_mem * rmem)843 static int __init pkvm_firmware_rmem_init(struct reserved_mem *rmem)
844 {
845 unsigned long node = rmem->fdt_node;
846
847 if (pvmfw_size)
848 return pkvm_firmware_rmem_err(rmem, "duplicate reservation");
849
850 if (!of_get_flat_dt_prop(node, "no-map", NULL))
851 return pkvm_firmware_rmem_err(rmem, "missing \"no-map\" property");
852
853 if (of_get_flat_dt_prop(node, "reusable", NULL))
854 return pkvm_firmware_rmem_err(rmem, "\"reusable\" property unsupported");
855
856 if (!PAGE_ALIGNED(rmem->base))
857 return pkvm_firmware_rmem_err(rmem, "base is not page-aligned");
858
859 if (!PAGE_ALIGNED(rmem->size))
860 return pkvm_firmware_rmem_err(rmem, "size is not page-aligned");
861
862 pvmfw_size = kvm_nvhe_sym(pvmfw_size) = rmem->size;
863 pvmfw_base = kvm_nvhe_sym(pvmfw_base) = rmem->base;
864 return 0;
865 }
866 RESERVEDMEM_OF_DECLARE(pkvm_firmware, "linux,pkvm-guest-firmware-memory",
867 pkvm_firmware_rmem_init);
868
pkvm_firmware_rmem_clear(void)869 static int __init pkvm_firmware_rmem_clear(void)
870 {
871 void *addr;
872 phys_addr_t size;
873
874 if (likely(!pvmfw_size))
875 return 0;
876
877 kvm_info("Clearing pKVM firmware memory\n");
878 size = pvmfw_size;
879 addr = memremap(pvmfw_base, size, MEMREMAP_WB);
880
881 pvmfw_size = kvm_nvhe_sym(pvmfw_size) = 0;
882 pvmfw_base = kvm_nvhe_sym(pvmfw_base) = 0;
883
884 if (!addr)
885 return -EINVAL;
886
887 memset(addr, 0, size);
888 dcache_clean_poc((unsigned long)addr, (unsigned long)addr + size);
889 memunmap(addr);
890 return 0;
891 }
892
pkvm_vm_ioctl_set_fw_ipa(struct kvm * kvm,u64 ipa)893 static int pkvm_vm_ioctl_set_fw_ipa(struct kvm *kvm, u64 ipa)
894 {
895 int ret = 0;
896
897 if (!pvmfw_size)
898 return -EINVAL;
899
900 mutex_lock(&kvm->lock);
901 if (kvm->arch.pkvm.handle) {
902 ret = -EBUSY;
903 goto out_unlock;
904 }
905
906 kvm->arch.pkvm.pvmfw_load_addr = ipa;
907 out_unlock:
908 mutex_unlock(&kvm->lock);
909 return ret;
910 }
911
pkvm_get_ffa_version(void)912 static u32 pkvm_get_ffa_version(void)
913 {
914 static u32 ffa_version;
915 u32 ret;
916
917 ret = READ_ONCE(ffa_version);
918 if (ret)
919 return ret;
920
921 ret = kvm_call_hyp_nvhe(__pkvm_host_get_ffa_version);
922 WRITE_ONCE(ffa_version, ret);
923 return ret;
924
925 }
926
pkvm_vm_ioctl_info(struct kvm * kvm,struct kvm_protected_vm_info __user * info)927 static int pkvm_vm_ioctl_info(struct kvm *kvm,
928 struct kvm_protected_vm_info __user *info)
929 {
930 struct kvm_protected_vm_info kinfo = {
931 .firmware_size = pvmfw_size,
932 .ffa_version = pkvm_get_ffa_version(),
933 };
934
935 return copy_to_user(info, &kinfo, sizeof(kinfo)) ? -EFAULT : 0;
936 }
937
pkvm_vm_ioctl_ffa_support(struct kvm * kvm,u32 enable)938 static int pkvm_vm_ioctl_ffa_support(struct kvm *kvm, u32 enable)
939 {
940 int ret = 0;
941 u32 ffa_version;
942
943 /* Restrict userspace from having an IPC channel over FF-A with secure */
944 if (!capable(CAP_IPC_OWNER))
945 return -EPERM;
946
947 /*
948 * If the host hasn't negotiated a version don't enable the
949 * FF-A capability.
950 */
951 ffa_version = pkvm_get_ffa_version();
952 if (!ffa_version)
953 return -EINVAL;
954
955 mutex_lock(&kvm->arch.config_lock);
956 if (kvm->arch.pkvm.handle) {
957 ret = -EBUSY;
958 goto out_unlock;
959 }
960
961 kvm->arch.pkvm.ffa_support = enable;
962 out_unlock:
963 mutex_unlock(&kvm->arch.config_lock);
964 return ret;
965 }
966
pkvm_vm_ioctl_enable_cap(struct kvm * kvm,struct kvm_enable_cap * cap)967 int pkvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
968 {
969 if (!kvm_vm_is_protected(kvm))
970 return -EINVAL;
971
972 if (cap->args[1] || cap->args[2] || cap->args[3])
973 return -EINVAL;
974
975 switch (cap->flags) {
976 case KVM_CAP_ARM_PROTECTED_VM_FLAGS_SET_FW_IPA:
977 return pkvm_vm_ioctl_set_fw_ipa(kvm, cap->args[0]);
978 case KVM_CAP_ARM_PROTECTED_VM_FLAGS_INFO:
979 return pkvm_vm_ioctl_info(kvm, (void __force __user *)cap->args[0]);
980 case KVM_CAP_ARM_PROTECTED_VM_FLAGS_SET_FFA:
981 return pkvm_vm_ioctl_ffa_support(kvm, cap->args[0]);
982 default:
983 return -EINVAL;
984 }
985
986 return 0;
987 }
988
989 #ifdef CONFIG_MODULES
990 static char early_pkvm_modules[COMMAND_LINE_SIZE] __initdata;
991
early_pkvm_modules_cfg(char * arg)992 static int __init early_pkvm_modules_cfg(char *arg)
993 {
994 /*
995 * Loading pKVM modules with kvm-arm.protected_modules is deprecated
996 * Use kvm-arm.protected_modules=<module1>,<module2>
997 */
998 if (!arg)
999 return -EINVAL;
1000
1001 strscpy(early_pkvm_modules, arg, COMMAND_LINE_SIZE);
1002
1003 return 0;
1004 }
1005 early_param("kvm-arm.protected_modules", early_pkvm_modules_cfg);
1006
free_modprobe_argv(struct subprocess_info * info)1007 static void __init free_modprobe_argv(struct subprocess_info *info)
1008 {
1009 kfree(info->argv);
1010 }
1011
init_modprobe(struct subprocess_info * info,struct cred * new)1012 static int __init init_modprobe(struct subprocess_info *info, struct cred *new)
1013 {
1014 struct file *file = filp_open("/dev/kmsg", O_RDWR, 0);
1015
1016 if (IS_ERR(file)) {
1017 pr_warn("Warning: unable to open /dev/kmsg, modprobe will be silent.\n");
1018 return 0;
1019 }
1020
1021 init_dup(file);
1022 init_dup(file);
1023 init_dup(file);
1024 fput(file);
1025
1026 return 0;
1027 }
1028
1029 /*
1030 * Heavily inspired by request_module(). The latest couldn't be reused though as
1031 * the feature can be disabled depending on umh configuration. Here some
1032 * security is enforced by making sure this can be called only when pKVM is
1033 * enabled, not yet completely initialized.
1034 */
__pkvm_request_early_module(char * module_name,char * module_path)1035 static int __init __pkvm_request_early_module(char *module_name,
1036 char *module_path)
1037 {
1038 char *modprobe_path = CONFIG_MODPROBE_PATH;
1039 struct subprocess_info *info;
1040 static char *envp[] = {
1041 "HOME=/",
1042 "TERM=linux",
1043 "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
1044 NULL
1045 };
1046 static bool proc;
1047 char **argv;
1048 int idx = 0;
1049
1050 if (!is_protected_kvm_enabled())
1051 return -EACCES;
1052
1053 if (static_branch_likely(&kvm_protected_mode_initialized))
1054 return -EACCES;
1055
1056 argv = kmalloc(sizeof(char *) * 7, GFP_KERNEL);
1057 if (!argv)
1058 return -ENOMEM;
1059
1060 argv[idx++] = modprobe_path;
1061 argv[idx++] = "-q";
1062 if (*module_path != '\0') {
1063 argv[idx++] = "-d";
1064 argv[idx++] = module_path;
1065 }
1066 argv[idx++] = "--";
1067 argv[idx++] = module_name;
1068 argv[idx++] = NULL;
1069
1070 info = call_usermodehelper_setup(modprobe_path, argv, envp, GFP_KERNEL,
1071 init_modprobe, free_modprobe_argv, NULL);
1072 if (!info)
1073 goto err;
1074
1075 /* Even with CONFIG_STATIC_USERMODEHELPER we really want this path */
1076 info->path = modprobe_path;
1077
1078 if (!proc) {
1079 wait_for_initramfs();
1080 if (init_mount("proc", "/proc", "proc",
1081 MS_SILENT | MS_NOEXEC | MS_NOSUID, NULL))
1082 pr_warn("Couldn't mount /proc, pKVM module parameters will be ignored\n");
1083
1084 proc = true;
1085 }
1086
1087 return call_usermodehelper_exec(info, UMH_WAIT_PROC | UMH_KILLABLE);
1088 err:
1089 kfree(argv);
1090
1091 return -ENOMEM;
1092 }
1093
pkvm_request_early_module(char * module_name,char * module_path)1094 static int __init pkvm_request_early_module(char *module_name, char *module_path)
1095 {
1096 int err = __pkvm_request_early_module(module_name, module_path);
1097
1098 if (!err)
1099 return 0;
1100
1101 /* Already tried the default path */
1102 if (*module_path == '\0')
1103 return err;
1104
1105 pr_info("loading %s from %s failed, fallback to the default path\n",
1106 module_name, module_path);
1107
1108 return __pkvm_request_early_module(module_name, "");
1109 }
1110
1111 static void pkvm_el2_mod_free(void);
1112
pkvm_load_early_modules(void)1113 int __init pkvm_load_early_modules(void)
1114 {
1115 char *token, *buf = early_pkvm_modules;
1116 char *module_path = CONFIG_PKVM_MODULE_PATH;
1117 int err = 0;
1118
1119 while (true) {
1120 token = strsep(&buf, ",");
1121
1122 if (!token)
1123 break;
1124
1125 if (*token) {
1126 err = pkvm_request_early_module(token, module_path);
1127 if (err) {
1128 pr_err("Failed to load pkvm module %s: %d\n",
1129 token, err);
1130 goto out;
1131 }
1132 }
1133
1134 if (buf)
1135 *(buf - 1) = ',';
1136 }
1137
1138 out:
1139 pkvm_el2_mod_free();
1140
1141 return err;
1142 }
1143
1144 static LIST_HEAD(pkvm_modules);
1145
pkvm_el2_mod_add(struct pkvm_el2_module * mod)1146 static void pkvm_el2_mod_add(struct pkvm_el2_module *mod)
1147 {
1148 INIT_LIST_HEAD(&mod->node);
1149 list_add(&mod->node, &pkvm_modules);
1150 }
1151
pkvm_el2_mod_free(void)1152 static void pkvm_el2_mod_free(void)
1153 {
1154 struct pkvm_el2_sym *sym, *tmp;
1155 struct pkvm_el2_module *mod;
1156
1157 list_for_each_entry(mod, &pkvm_modules, node) {
1158 list_for_each_entry_safe(sym, tmp, &mod->ext_symbols, node) {
1159 list_del(&sym->node);
1160 kfree(sym->name);
1161 kfree(sym);
1162 }
1163 }
1164 }
1165
pkvm_el2_mod_to_module(struct pkvm_el2_module * hyp_mod)1166 static struct module *pkvm_el2_mod_to_module(struct pkvm_el2_module *hyp_mod)
1167 {
1168 struct mod_arch_specific *arch;
1169
1170 arch = container_of(hyp_mod, struct mod_arch_specific, hyp);
1171 return container_of(arch, struct module, arch);
1172 }
1173
1174 #ifdef CONFIG_PKVM_STACKTRACE
pkvm_el2_mod_kern_va(unsigned long addr)1175 unsigned long pkvm_el2_mod_kern_va(unsigned long addr)
1176 {
1177 struct pkvm_el2_module *mod;
1178
1179 list_for_each_entry(mod, &pkvm_modules, node) {
1180 unsigned long hyp_va = (unsigned long)mod->hyp_va;
1181 size_t len = (unsigned long)mod->sections.end -
1182 (unsigned long)mod->sections.start;
1183
1184 if (addr >= hyp_va && addr < (hyp_va + len))
1185 return (unsigned long)mod->sections.start +
1186 (addr - hyp_va);
1187 }
1188
1189 return 0;
1190 }
1191 #else
pkvm_el2_mod_kern_va(unsigned long addr)1192 unsigned long pkvm_el2_mod_kern_va(unsigned long addr) { return 0; }
1193 #endif
1194
pkvm_el2_mod_lookup_symbol(const char * name,unsigned long * addr)1195 static struct pkvm_el2_module *pkvm_el2_mod_lookup_symbol(const char *name,
1196 unsigned long *addr)
1197 {
1198 struct pkvm_el2_module *hyp_mod;
1199 unsigned long __addr;
1200
1201 list_for_each_entry(hyp_mod, &pkvm_modules, node) {
1202 struct module *mod = pkvm_el2_mod_to_module(hyp_mod);
1203
1204 __addr = find_kallsyms_symbol_value(mod, name);
1205 if (!__addr)
1206 continue;
1207
1208 *addr = __addr;
1209 return hyp_mod;
1210 }
1211
1212 return NULL;
1213 }
1214
within_pkvm_module_section(struct pkvm_module_section * section,unsigned long addr)1215 static bool within_pkvm_module_section(struct pkvm_module_section *section,
1216 unsigned long addr)
1217 {
1218 return (addr > (unsigned long)section->start) &&
1219 (addr < (unsigned long)section->end);
1220 }
1221
pkvm_reloc_imported_symbol(struct pkvm_el2_module * importer,struct pkvm_el2_sym * sym,unsigned long hyp_dst)1222 static int pkvm_reloc_imported_symbol(struct pkvm_el2_module *importer,
1223 struct pkvm_el2_sym *sym,
1224 unsigned long hyp_dst)
1225 {
1226 s64 val, val_max = (s64)(~(BIT(25) - 1)) << 2;
1227 u32 insn = le32_to_cpu(*sym->rela_pos);
1228 unsigned long hyp_src;
1229 u64 imm;
1230
1231 if (!within_pkvm_module_section(&importer->text,
1232 (unsigned long)sym->rela_pos))
1233 return -EINVAL;
1234
1235 hyp_src = (unsigned long)importer->hyp_va +
1236 ((void *)sym->rela_pos - importer->text.start);
1237
1238 /*
1239 * Module hyp VAs are allocated going upward. Source MUST have a
1240 * lower address than the destination
1241 */
1242 if (WARN_ON(hyp_src < hyp_dst))
1243 return -EINVAL;
1244
1245 val = hyp_dst - hyp_src;
1246 if (val < val_max) {
1247 pr_warn("Exported symbol %s is too far for the relocation in module %s\n",
1248 sym->name, pkvm_el2_mod_to_module(importer)->name);
1249 return -ERANGE;
1250 }
1251
1252 /* offset encoded as imm26 * 4 */
1253 imm = (val >> 2) & (BIT(26) - 1);
1254
1255 insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_26, insn, imm);
1256
1257 return aarch64_insn_patch_text_nosync((void *)sym->rela_pos, insn);
1258 }
1259
pkvm_reloc_imported_symbols(struct pkvm_el2_module * importer)1260 static int pkvm_reloc_imported_symbols(struct pkvm_el2_module *importer)
1261 {
1262 unsigned long addr, offset, hyp_addr;
1263 struct pkvm_el2_module *exporter;
1264 struct pkvm_el2_sym *sym;
1265
1266 list_for_each_entry(sym, &importer->ext_symbols, node) {
1267 exporter = pkvm_el2_mod_lookup_symbol(sym->name, &addr);
1268 if (!exporter) {
1269 pr_warn("pKVM symbol %s not exported by any module\n",
1270 sym->name);
1271 return -EINVAL;
1272 }
1273
1274 if (!within_pkvm_module_section(&exporter->text, addr)) {
1275 pr_warn("pKVM symbol %s not part of %s .text section\n",
1276 sym->name,
1277 pkvm_el2_mod_to_module(exporter)->name);
1278 return -EINVAL;
1279 }
1280
1281 /* hyp addr in the exporter */
1282 offset = addr - (unsigned long)exporter->text.start;
1283 hyp_addr = (unsigned long)exporter->hyp_va + offset;
1284
1285 pkvm_reloc_imported_symbol(importer, sym, hyp_addr);
1286 }
1287
1288 return 0;
1289 }
1290
1291 struct pkvm_mod_sec_mapping {
1292 struct pkvm_module_section *sec;
1293 enum kvm_pgtable_prot prot;
1294 };
1295
pkvm_unmap_module_pages(void * kern_va,void * hyp_va,size_t size)1296 static void pkvm_unmap_module_pages(void *kern_va, void *hyp_va, size_t size)
1297 {
1298 size_t offset;
1299 u64 pfn;
1300
1301 for (offset = 0; offset < size; offset += PAGE_SIZE) {
1302 pfn = vmalloc_to_pfn(kern_va + offset);
1303 kvm_call_hyp_nvhe(__pkvm_unmap_module_page, pfn,
1304 hyp_va + offset);
1305 }
1306 }
1307
pkvm_unmap_module_sections(struct pkvm_mod_sec_mapping * secs_map,void * hyp_va_base,int nr_secs)1308 static void pkvm_unmap_module_sections(struct pkvm_mod_sec_mapping *secs_map, void *hyp_va_base, int nr_secs)
1309 {
1310 size_t offset, size;
1311 void *start;
1312 int i;
1313
1314 for (i = 0; i < nr_secs; i++) {
1315 start = secs_map[i].sec->start;
1316 size = secs_map[i].sec->end - start;
1317 offset = start - secs_map[0].sec->start;
1318 pkvm_unmap_module_pages(start, hyp_va_base + offset, size);
1319 }
1320 }
1321
pkvm_map_module_section(struct pkvm_mod_sec_mapping * sec_map,void * hyp_va)1322 static int pkvm_map_module_section(struct pkvm_mod_sec_mapping *sec_map, void *hyp_va)
1323 {
1324 size_t offset, size = sec_map->sec->end - sec_map->sec->start;
1325 int ret;
1326 u64 pfn;
1327
1328 for (offset = 0; offset < size; offset += PAGE_SIZE) {
1329 pfn = vmalloc_to_pfn(sec_map->sec->start + offset);
1330 ret = kvm_call_hyp_nvhe(__pkvm_map_module_page, pfn,
1331 hyp_va + offset, sec_map->prot);
1332 if (ret) {
1333 pkvm_unmap_module_pages(sec_map->sec->start, hyp_va, offset);
1334 return ret;
1335 }
1336 }
1337
1338 return 0;
1339 }
1340
pkvm_map_module_sections(struct pkvm_mod_sec_mapping * secs_map,void * hyp_va_base,int nr_secs)1341 static int pkvm_map_module_sections(struct pkvm_mod_sec_mapping *secs_map,
1342 void *hyp_va_base, int nr_secs)
1343 {
1344 size_t offset;
1345 int i, ret;
1346
1347 for (i = 0; i < nr_secs; i++) {
1348 offset = secs_map[i].sec->start - secs_map[0].sec->start;
1349 ret = pkvm_map_module_section(&secs_map[i], hyp_va_base + offset);
1350 if (ret) {
1351 pkvm_unmap_module_sections(secs_map, hyp_va_base, i);
1352 return ret;
1353 }
1354 }
1355
1356 return 0;
1357 }
1358
__pkvm_cmp_mod_sec(const void * p1,const void * p2)1359 static int __pkvm_cmp_mod_sec(const void *p1, const void *p2)
1360 {
1361 struct pkvm_mod_sec_mapping const *s1 = p1;
1362 struct pkvm_mod_sec_mapping const *s2 = p2;
1363
1364 return s1->sec->start < s2->sec->start ? -1 : s1->sec->start > s2->sec->start;
1365 }
1366
pkvm_map_module_struct(struct pkvm_el2_module * mod)1367 static void *pkvm_map_module_struct(struct pkvm_el2_module *mod)
1368 {
1369 void *addr = (void *)__get_free_page(GFP_KERNEL);
1370
1371 if (!addr)
1372 return NULL;
1373
1374 if (kvm_share_hyp(addr, addr + PAGE_SIZE)) {
1375 free_page((unsigned long)addr);
1376 return NULL;
1377 }
1378
1379 /*
1380 * pkvm_el2_module being stored in vmalloc we can't guarantee a
1381 * linear map for the hypervisor to rely on. Copy the struct instead.
1382 */
1383 memcpy(addr, mod, sizeof(*mod));
1384
1385 return addr;
1386 }
1387
pkvm_unmap_module_struct(void * addr)1388 static void pkvm_unmap_module_struct(void *addr)
1389 {
1390 kvm_unshare_hyp(addr, addr + PAGE_SIZE);
1391 free_page((unsigned long)addr);
1392 }
1393
pkvm_module_kmemleak(struct module * this,struct pkvm_mod_sec_mapping * sec_map,int nr_sections)1394 static void pkvm_module_kmemleak(struct module *this,
1395 struct pkvm_mod_sec_mapping *sec_map,
1396 int nr_sections)
1397 {
1398 void *start, *end;
1399 int i;
1400
1401 if (!this)
1402 return;
1403
1404 /*
1405 * The module loader already removes read-only sections from kmemleak
1406 * scanned objects. However, few hyp sections are installed into
1407 * MOD_DATA. Skip those sections before they are made inaccessible from
1408 * the host.
1409 */
1410
1411 start = this->mem[MOD_DATA].base;
1412 end = start + this->mem[MOD_DATA].size;
1413
1414 for (i = 0; i < nr_sections; i++, sec_map++) {
1415 if (sec_map->sec->start < start || sec_map->sec->start >= end)
1416 continue;
1417
1418 kmemleak_scan_area(start, sec_map->sec->start - start, GFP_KERNEL);
1419 start = sec_map->sec->end;
1420 }
1421
1422 kmemleak_scan_area(start, end - start, GFP_KERNEL);
1423 }
1424
__pkvm_load_el2_module(struct module * this,unsigned long * token)1425 int __pkvm_load_el2_module(struct module *this, unsigned long *token)
1426 {
1427 struct pkvm_el2_module *mod = &this->arch.hyp;
1428 struct pkvm_mod_sec_mapping secs_map[] = {
1429 { &mod->text, KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_X },
1430 { &mod->bss, KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W },
1431 { &mod->rodata, KVM_PGTABLE_PROT_R },
1432 { &mod->event_ids, KVM_PGTABLE_PROT_R },
1433 { &mod->patchable_function_entries, KVM_PGTABLE_PROT_R },
1434 { &mod->data, KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W },
1435 };
1436 void *start, *end, *hyp_va, *mod_remap;
1437 struct arm_smccc_res res;
1438 kvm_nvhe_reloc_t *endrel;
1439 int ret, i, secs_first;
1440 size_t size;
1441
1442 /* The pKVM hyp only allows loading before it is fully initialized */
1443 if (!is_protected_kvm_enabled() || is_pkvm_initialized())
1444 return -EOPNOTSUPP;
1445
1446 for (i = 0; i < ARRAY_SIZE(secs_map); i++) {
1447 if (!PAGE_ALIGNED(secs_map[i].sec->start)) {
1448 kvm_err("EL2 sections are not page-aligned\n");
1449 return -EINVAL;
1450 }
1451 }
1452
1453 if (!try_module_get(this)) {
1454 kvm_err("Kernel module has been unloaded\n");
1455 return -ENODEV;
1456 }
1457
1458 /* Missing or empty module sections are placed first */
1459 sort(secs_map, ARRAY_SIZE(secs_map), sizeof(secs_map[0]), __pkvm_cmp_mod_sec, NULL);
1460 for (secs_first = 0; secs_first < ARRAY_SIZE(secs_map); secs_first++) {
1461 start = secs_map[secs_first].sec->start;
1462 if (start)
1463 break;
1464 }
1465 end = secs_map[ARRAY_SIZE(secs_map) - 1].sec->end;
1466 size = end - start;
1467
1468 mod->sections.start = start;
1469 mod->sections.end = end;
1470
1471 arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__pkvm_alloc_module_va),
1472 size >> PAGE_SHIFT, &res);
1473 if (res.a0 != SMCCC_RET_SUCCESS || !res.a1) {
1474 kvm_err("Failed to allocate hypervisor VA space for EL2 module\n");
1475 module_put(this);
1476 return res.a0 == SMCCC_RET_SUCCESS ? -ENOMEM : -EPERM;
1477 }
1478 hyp_va = (void *)res.a1;
1479 mod->hyp_va = hyp_va;
1480
1481 /*
1482 * The token can be used for other calls related to this module.
1483 * Conveniently the only information needed is this addr so let's use it
1484 * as an identifier.
1485 */
1486 if (token)
1487 *token = (unsigned long)hyp_va;
1488
1489 mod->sections.start = start;
1490 mod->sections.end = end;
1491
1492 endrel = (void *)mod->relocs + mod->nr_relocs * sizeof(*endrel);
1493 kvm_apply_hyp_module_relocations(mod, mod->relocs, endrel);
1494
1495 ret = pkvm_reloc_imported_symbols(mod);
1496 if (ret)
1497 return ret;
1498
1499 pkvm_module_kmemleak(this, secs_map, ARRAY_SIZE(secs_map));
1500
1501 ret = hyp_trace_init_mod_events(mod);
1502 if (ret)
1503 kvm_err("Failed to init module events: %d\n", ret);
1504
1505 mod_remap = pkvm_map_module_struct(mod);
1506 if (!mod_remap) {
1507 module_put(this);
1508 return -ENOMEM;
1509 }
1510
1511 ret = pkvm_map_module_sections(secs_map + secs_first, hyp_va,
1512 ARRAY_SIZE(secs_map) - secs_first);
1513 if (ret) {
1514 kvm_err("Failed to map EL2 module page: %d\n", ret);
1515 pkvm_unmap_module_struct(mod_remap);
1516 module_put(this);
1517 return ret;
1518 }
1519
1520 pkvm_el2_mod_add(mod);
1521
1522 ret = kvm_call_hyp_nvhe(__pkvm_init_module, mod_remap);
1523 pkvm_unmap_module_struct(mod_remap);
1524 if (ret) {
1525 kvm_err("Failed to init EL2 module: %d\n", ret);
1526 list_del(&mod->node);
1527 pkvm_unmap_module_sections(secs_map, hyp_va, ARRAY_SIZE(secs_map));
1528 module_put(this);
1529 return ret;
1530 }
1531
1532 hyp_trace_enable_event_early();
1533
1534 return 0;
1535 }
1536 EXPORT_SYMBOL(__pkvm_load_el2_module);
1537
__pkvm_register_el2_call(unsigned long hfn_hyp_va)1538 int __pkvm_register_el2_call(unsigned long hfn_hyp_va)
1539 {
1540 return kvm_call_hyp_nvhe(__pkvm_register_hcall, hfn_hyp_va);
1541 }
1542 EXPORT_SYMBOL(__pkvm_register_el2_call);
1543
pkvm_el2_mod_frob_sections(Elf_Ehdr * ehdr,Elf_Shdr * sechdrs,char * secstrings)1544 void pkvm_el2_mod_frob_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings)
1545 {
1546 #ifdef CONFIG_PKVM_FTRACE
1547 int i;
1548
1549 for (i = 0; i < ehdr->e_shnum; i++) {
1550 if (!strcmp(secstrings + sechdrs[i].sh_name, ".hyp.text")) {
1551 Elf_Shdr *hyp_text = sechdrs + i;
1552
1553 /* .hyp.text.ftrace_tramp pollutes .hyp.text flags */
1554 hyp_text->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
1555 break;
1556 }
1557 }
1558 #endif
1559 }
1560 #endif /* CONFIG_MODULES */
1561
__pkvm_topup_hyp_alloc_mgt_mc(unsigned long id,struct kvm_hyp_memcache * mc)1562 int __pkvm_topup_hyp_alloc_mgt_mc(unsigned long id, struct kvm_hyp_memcache *mc)
1563 {
1564 struct arm_smccc_res res;
1565
1566 res = kvm_call_hyp_nvhe_smccc(__pkvm_hyp_alloc_mgt_refill,
1567 id, mc->head, mc->nr_pages);
1568 mc->head = res.a2;
1569 mc->nr_pages = res.a3;
1570 return res.a1;
1571 }
1572 EXPORT_SYMBOL(__pkvm_topup_hyp_alloc_mgt_mc);
1573
__pkvm_topup_hyp_alloc(unsigned long nr_pages)1574 int __pkvm_topup_hyp_alloc(unsigned long nr_pages)
1575 {
1576 struct kvm_hyp_memcache mc;
1577 int ret;
1578
1579 init_hyp_memcache(&mc);
1580
1581 ret = topup_hyp_memcache(&mc, nr_pages, 0);
1582 if (ret)
1583 return ret;
1584
1585 ret = __pkvm_topup_hyp_alloc_mgt_mc(HYP_ALLOC_MGT_HEAP_ID, &mc);
1586 if (ret)
1587 free_hyp_memcache(&mc);
1588
1589 return ret;
1590 }
1591 EXPORT_SYMBOL(__pkvm_topup_hyp_alloc);
1592
__pkvm_reclaim_hyp_alloc_mgt(unsigned long nr_pages)1593 unsigned long __pkvm_reclaim_hyp_alloc_mgt(unsigned long nr_pages)
1594 {
1595 unsigned long ratelimit, last_reclaim, reclaimed = 0;
1596 struct kvm_hyp_memcache mc;
1597 struct arm_smccc_res res;
1598
1599 init_hyp_memcache(&mc);
1600
1601 do {
1602 /* Arbitrary upper bound to limit the time spent at EL2 */
1603 ratelimit = min(nr_pages, 16UL);
1604
1605 arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__pkvm_hyp_alloc_mgt_reclaim),
1606 ratelimit, &res);
1607 if (WARN_ON(res.a0 != SMCCC_RET_SUCCESS))
1608 break;
1609
1610 mc.head = res.a1;
1611 last_reclaim = mc.nr_pages = res.a2;
1612
1613 free_hyp_memcache(&mc);
1614 reclaimed += last_reclaim;
1615
1616 } while (last_reclaim && (reclaimed < nr_pages));
1617
1618 return reclaimed;
1619 }
1620
__pkvm_topup_hyp_alloc_mgt_gfp(unsigned long id,unsigned long nr_pages,unsigned long sz_alloc,gfp_t gfp)1621 int __pkvm_topup_hyp_alloc_mgt_gfp(unsigned long id, unsigned long nr_pages,
1622 unsigned long sz_alloc, gfp_t gfp)
1623 {
1624 struct kvm_hyp_memcache mc;
1625 int ret;
1626
1627 init_hyp_memcache(&mc);
1628
1629 ret = topup_hyp_memcache_gfp(&mc, nr_pages, get_order(sz_alloc), gfp);
1630 if (ret)
1631 return ret;
1632
1633 ret = __pkvm_topup_hyp_alloc_mgt_mc(id, &mc);
1634 if (ret) {
1635 kvm_err("Failed topup %ld pages = %ld, size = %ld err = %d, freeing %ld pages\n",
1636 id, nr_pages, sz_alloc, ret, mc.nr_pages);
1637 free_hyp_memcache(&mc);
1638 }
1639
1640 return ret;
1641 }
1642 EXPORT_SYMBOL(__pkvm_topup_hyp_alloc_mgt_gfp);
1643
__pkvm_donate_resource(struct resource * r)1644 static int __pkvm_donate_resource(struct resource *r)
1645 {
1646 if (!PAGE_ALIGNED(resource_size(r)) || !PAGE_ALIGNED(r->start))
1647 return -EINVAL;
1648
1649 return kvm_call_hyp_nvhe(__pkvm_host_donate_hyp_mmio,
1650 __phys_to_pfn(r->start),
1651 resource_size(r) >> PAGE_SHIFT);
1652
1653 }
1654
__pkvm_reclaim_resource(struct resource * r)1655 static int __pkvm_reclaim_resource(struct resource *r)
1656 {
1657 if (!PAGE_ALIGNED(resource_size(r)) || !PAGE_ALIGNED(r->start))
1658 return -EINVAL;
1659
1660 return kvm_call_hyp_nvhe(__pkvm_host_reclaim_hyp_mmio,
1661 __phys_to_pfn(r->start),
1662 resource_size(r) >> PAGE_SHIFT);
1663 }
1664
__pkvm_arch_assign_device(struct device * dev,void * data)1665 static int __pkvm_arch_assign_device(struct device *dev, void *data)
1666 {
1667 struct platform_device *pdev;
1668 struct resource *r;
1669 int index = 0;
1670 int ret = 0;
1671
1672 if (!dev_is_platform(dev))
1673 return -EOPNOTSUPP;
1674
1675 pdev = to_platform_device(dev);
1676
1677 while ((r = platform_get_resource(pdev, IORESOURCE_MEM, index++))) {
1678 ret = __pkvm_donate_resource(r);
1679 if (ret)
1680 break;
1681 }
1682
1683 if (ret) {
1684 while (index--) {
1685 r = platform_get_resource(pdev, IORESOURCE_MEM, index);
1686 __pkvm_reclaim_resource(r);
1687 }
1688 }
1689 return ret;
1690 }
1691
__pkvm_arch_reclaim_device(struct device * dev,void * data)1692 static int __pkvm_arch_reclaim_device(struct device *dev, void *data)
1693 {
1694 struct platform_device *pdev;
1695 struct resource *r;
1696 int index = 0;
1697
1698 pdev = to_platform_device(dev);
1699
1700 while ((r = platform_get_resource(pdev, IORESOURCE_MEM, index++)))
1701 __pkvm_reclaim_resource(r);
1702
1703 return 0;
1704 }
1705
kvm_arch_assign_device(struct device * dev)1706 int kvm_arch_assign_device(struct device *dev)
1707 {
1708 if (!is_protected_kvm_enabled())
1709 return 0;
1710
1711 return __pkvm_arch_assign_device(dev, NULL);
1712 }
1713
kvm_arch_assign_group(struct iommu_group * group)1714 int kvm_arch_assign_group(struct iommu_group *group)
1715 {
1716 int ret;
1717
1718 if (!is_protected_kvm_enabled())
1719 return 0;
1720
1721 ret = iommu_group_for_each_dev(group, NULL, __pkvm_arch_assign_device);
1722
1723 if (ret)
1724 iommu_group_for_each_dev(group, NULL, __pkvm_arch_reclaim_device);
1725
1726 return ret;
1727 }
1728
kvm_arch_reclaim_device(struct device * dev)1729 void kvm_arch_reclaim_device(struct device *dev)
1730 {
1731 if (!is_protected_kvm_enabled())
1732 return;
1733
1734 __pkvm_arch_reclaim_device(dev, NULL);
1735 }
1736
kvm_arch_reclaim_group(struct iommu_group * group)1737 void kvm_arch_reclaim_group(struct iommu_group *group)
1738 {
1739 if (!is_protected_kvm_enabled())
1740 return;
1741
1742 iommu_group_for_each_dev(group, NULL, __pkvm_arch_reclaim_device);
1743 }
1744
__pkvm_mapping_start(struct pkvm_mapping * m)1745 static u64 __pkvm_mapping_start(struct pkvm_mapping *m)
1746 {
1747 return m->gfn * PAGE_SIZE;
1748 }
1749
__pkvm_mapping_end(struct pkvm_mapping * m)1750 static u64 __pkvm_mapping_end(struct pkvm_mapping *m)
1751 {
1752 return (m->gfn + m->nr_pages) * PAGE_SIZE - 1;
1753 }
1754
1755 INTERVAL_TREE_DEFINE(struct pkvm_mapping, node, u64, __subtree_last,
1756 __pkvm_mapping_start, __pkvm_mapping_end, static,
1757 pkvm_mapping);
1758
1759 #define for_each_mapping_in_range_safe(__pgt, __start, __end, __map) \
1760 for (struct pkvm_mapping *__tmp = pkvm_mapping_iter_first(&(__pgt)->pkvm_mappings, \
1761 __start, __end - 1); \
1762 __tmp && ({ \
1763 __map = __tmp; \
1764 __tmp = pkvm_mapping_iter_next(__map, __start, __end - 1); \
1765 true; \
1766 }); \
1767 )
1768
pkvm_pgtable_stage2_init(struct kvm_pgtable * pgt,struct kvm_s2_mmu * mmu,struct kvm_pgtable_mm_ops * mm_ops,struct kvm_pgtable_pte_ops * pte_ops)1769 int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
1770 struct kvm_pgtable_mm_ops *mm_ops, struct kvm_pgtable_pte_ops *pte_ops)
1771 {
1772 pgt->pkvm_mappings = RB_ROOT_CACHED;
1773 pgt->mmu = mmu;
1774
1775 return 0;
1776 }
1777
__pkvm_pgtable_stage2_unmap(struct kvm_pgtable * pgt,u64 start,u64 end)1778 static int __pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 start, u64 end)
1779 {
1780 struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
1781 pkvm_handle_t handle = kvm->arch.pkvm.handle;
1782 struct pkvm_mapping *mapping;
1783 int ret;
1784
1785 if (!handle)
1786 return 0;
1787
1788 for_each_mapping_in_range_safe(pgt, start, end, mapping) {
1789 ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn,
1790 mapping->nr_pages);
1791 if (WARN_ON(ret))
1792 return ret;
1793 pkvm_mapping_remove(mapping, &pgt->pkvm_mappings);
1794 kfree(mapping);
1795 }
1796
1797 return 0;
1798 }
1799
pkvm_pgtable_stage2_destroy(struct kvm_pgtable * pgt)1800 void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
1801 {
1802 __pkvm_pgtable_stage2_unmap(pgt, 0, ~(0ULL));
1803 }
1804
pkvm_pgtable_stage2_map(struct kvm_pgtable * pgt,u64 addr,u64 size,u64 phys,enum kvm_pgtable_prot prot,void * mc,enum kvm_pgtable_walk_flags flags)1805 int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
1806 u64 phys, enum kvm_pgtable_prot prot,
1807 void *mc, enum kvm_pgtable_walk_flags flags)
1808 {
1809 struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
1810 struct pkvm_mapping *mapping = NULL;
1811 struct kvm_hyp_memcache *cache = mc;
1812 u64 gfn = addr >> PAGE_SHIFT;
1813 u64 pfn = phys >> PAGE_SHIFT;
1814 int ret;
1815
1816 if (size != PAGE_SIZE && size != PMD_SIZE)
1817 return -EINVAL;
1818
1819 lockdep_assert_held_write(&kvm->mmu_lock);
1820
1821 /*
1822 * Calling stage2_map() on top of existing mappings is either happening because of a race
1823 * with another vCPU, or because we're changing between page and block mappings. As per
1824 * user_mem_abort(), same-size permission faults are handled in the relax_perms() path.
1825 */
1826 mapping = pkvm_mapping_iter_first(&pgt->pkvm_mappings, addr, addr + size - 1);
1827 if (mapping) {
1828 if (size == (mapping->nr_pages * PAGE_SIZE))
1829 return -EAGAIN;
1830
1831 /* Remove _any_ pkvm_mapping overlapping with the range, bigger or smaller. */
1832 ret = __pkvm_pgtable_stage2_unmap(pgt, addr, addr + size);
1833 if (ret)
1834 return ret;
1835 mapping = NULL;
1836 }
1837
1838 ret = kvm_call_hyp_nvhe(__pkvm_host_share_guest, pfn, gfn, prot, size / PAGE_SIZE);
1839 if (ret) {
1840 WARN_ON(ret != -ENOMEM);
1841 return ret;
1842 }
1843
1844 swap(mapping, cache->mapping);
1845 mapping->gfn = gfn;
1846 mapping->pfn = pfn;
1847 mapping->nr_pages = size / PAGE_SIZE;
1848 pkvm_mapping_insert(mapping, &pgt->pkvm_mappings);
1849
1850 return ret;
1851 }
1852
pkvm_pgtable_stage2_unmap(struct kvm_pgtable * pgt,u64 addr,u64 size)1853 int pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
1854 {
1855 lockdep_assert_held_write(&kvm_s2_mmu_to_kvm(pgt->mmu)->mmu_lock);
1856
1857 return __pkvm_pgtable_stage2_unmap(pgt, addr, addr + size);
1858 }
1859
pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable * pgt,u64 addr,u64 size)1860 int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size)
1861 {
1862 struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
1863 pkvm_handle_t handle = kvm->arch.pkvm.handle;
1864
1865 return kvm_call_hyp_nvhe(__pkvm_host_wrprotect_guest, handle, addr >> PAGE_SHIFT, size);
1866 }
1867
pkvm_pgtable_stage2_flush(struct kvm_pgtable * pgt,u64 addr,u64 size)1868 int pkvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
1869 {
1870 struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
1871 struct pkvm_mapping *mapping;
1872
1873 lockdep_assert_held(&kvm->mmu_lock);
1874 for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
1875 __clean_dcache_guest_page(pfn_to_kaddr(mapping->pfn), PAGE_SIZE * mapping->nr_pages);
1876
1877 return 0;
1878 }
1879
pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable * pgt,u64 addr,u64 size,bool mkold)1880 bool pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64 size, bool mkold)
1881 {
1882 struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
1883 pkvm_handle_t handle = kvm->arch.pkvm.handle;
1884
1885 return kvm_call_hyp_nvhe(__pkvm_host_test_clear_young_guest, handle, addr >> PAGE_SHIFT,
1886 size, mkold);
1887 }
1888
pkvm_pgtable_stage2_relax_perms(struct kvm_pgtable * pgt,u64 addr,enum kvm_pgtable_prot prot,enum kvm_pgtable_walk_flags flags)1889 int pkvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_prot prot,
1890 enum kvm_pgtable_walk_flags flags)
1891 {
1892 return kvm_call_hyp_nvhe(__pkvm_host_relax_perms_guest, addr >> PAGE_SHIFT, prot);
1893 }
1894
pkvm_pgtable_stage2_mkyoung(struct kvm_pgtable * pgt,u64 addr,enum kvm_pgtable_walk_flags flags)1895 kvm_pte_t pkvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr,
1896 enum kvm_pgtable_walk_flags flags)
1897 {
1898 return kvm_call_hyp_nvhe(__pkvm_host_mkyoung_guest, addr >> PAGE_SHIFT);
1899 }
1900
pkvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops * mm_ops,struct kvm_pgtable_pte_ops * pte_ops,void * pgtable,s8 level)1901 void pkvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops,
1902 struct kvm_pgtable_pte_ops *pte_ops,
1903 void *pgtable, s8 level)
1904 {
1905 WARN_ON_ONCE(1);
1906 }
1907
pkvm_pgtable_stage2_create_unlinked(struct kvm_pgtable * pgt,u64 phys,s8 level,enum kvm_pgtable_prot prot,void * mc,bool force_pte)1908 kvm_pte_t *pkvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, u64 phys, s8 level,
1909 enum kvm_pgtable_prot prot, void *mc, bool force_pte)
1910 {
1911 WARN_ON_ONCE(1);
1912 return NULL;
1913 }
1914
pkvm_pgtable_stage2_split(struct kvm_pgtable * pgt,u64 addr,u64 size,void * mc)1915 int pkvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size, void *mc)
1916 {
1917 WARN_ON_ONCE(1);
1918 return -EINVAL;
1919 }
1920
early_ffa_unmap_on_lend_cfg(char * arg)1921 static int early_ffa_unmap_on_lend_cfg(char *arg)
1922 {
1923 static_branch_enable(&kvm_ffa_unmap_on_lend);
1924 return 0;
1925 }
1926
1927 early_param("kvm-arm.ffa-unmap-on-lend", early_ffa_unmap_on_lend_cfg);
1928