1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2020 - Google LLC
4 * Author: Quentin Perret <qperret@google.com>
5 */
6
7 #include <linux/io.h>
8 #include <linux/kmemleak.h>
9 #include <linux/kvm_host.h>
10 #include <linux/memblock.h>
11 #include <linux/mm.h>
12 #include <linux/mutex.h>
13 #include <linux/of_address.h>
14 #include <linux/of_fdt.h>
15 #include <linux/of_reserved_mem.h>
16 #include <linux/sort.h>
17 #include <linux/stat.h>
18
19 #include <asm/kvm_hyp.h>
20 #include <asm/kvm_mmu.h>
21 #include <asm/kvm_pkvm.h>
22 #include <asm/kvm_pkvm_module.h>
23 #include <asm/setup.h>
24
25 #include <uapi/linux/mount.h>
26 #include <linux/init_syscalls.h>
27
28 #include "hyp_constants.h"
29
30 DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
31
32 static struct reserved_mem *pkvm_firmware_mem;
33 static phys_addr_t *pvmfw_base = &kvm_nvhe_sym(pvmfw_base);
34 static phys_addr_t *pvmfw_size = &kvm_nvhe_sym(pvmfw_size);
35
36 static struct pkvm_moveable_reg *moveable_regs = kvm_nvhe_sym(pkvm_moveable_regs);
37 static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory);
38 static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr);
39
40 phys_addr_t hyp_mem_base;
41 phys_addr_t hyp_mem_size;
42
cmp_hyp_memblock(const void * p1,const void * p2)43 static int cmp_hyp_memblock(const void *p1, const void *p2)
44 {
45 const struct memblock_region *r1 = p1;
46 const struct memblock_region *r2 = p2;
47
48 return r1->base < r2->base ? -1 : (r1->base > r2->base);
49 }
50
sort_memblock_regions(void)51 static void __init sort_memblock_regions(void)
52 {
53 sort(hyp_memory,
54 *hyp_memblock_nr_ptr,
55 sizeof(struct memblock_region),
56 cmp_hyp_memblock,
57 NULL);
58 }
59
register_memblock_regions(void)60 static int __init register_memblock_regions(void)
61 {
62 struct memblock_region *reg;
63
64 for_each_mem_region(reg) {
65 if (*hyp_memblock_nr_ptr >= HYP_MEMBLOCK_REGIONS)
66 return -ENOMEM;
67
68 hyp_memory[*hyp_memblock_nr_ptr] = *reg;
69 (*hyp_memblock_nr_ptr)++;
70 }
71 sort_memblock_regions();
72
73 return 0;
74 }
75
cmp_moveable_reg(const void * p1,const void * p2)76 static int cmp_moveable_reg(const void *p1, const void *p2)
77 {
78 const struct pkvm_moveable_reg *r1 = p1;
79 const struct pkvm_moveable_reg *r2 = p2;
80
81 /*
82 * Moveable regions may overlap, so put the largest one first when start
83 * addresses are equal to allow a simpler walk from e.g.
84 * host_stage2_unmap_unmoveable_regs().
85 */
86 if (r1->start < r2->start)
87 return -1;
88 else if (r1->start > r2->start)
89 return 1;
90 else if (r1->size > r2->size)
91 return -1;
92 else if (r1->size < r2->size)
93 return 1;
94 return 0;
95 }
96
sort_moveable_regs(void)97 static void __init sort_moveable_regs(void)
98 {
99 sort(moveable_regs,
100 kvm_nvhe_sym(pkvm_moveable_regs_nr),
101 sizeof(struct pkvm_moveable_reg),
102 cmp_moveable_reg,
103 NULL);
104 }
105
register_moveable_regions(void)106 static int __init register_moveable_regions(void)
107 {
108 struct memblock_region *reg;
109 struct device_node *np;
110 int i = 0;
111
112 for_each_mem_region(reg) {
113 if (i >= PKVM_NR_MOVEABLE_REGS)
114 return -ENOMEM;
115 moveable_regs[i].start = reg->base;
116 moveable_regs[i].size = reg->size;
117 moveable_regs[i].type = PKVM_MREG_MEMORY;
118 i++;
119 }
120
121 for_each_compatible_node(np, NULL, "pkvm,protected-region") {
122 struct resource res;
123 u64 start, size;
124 int ret;
125
126 if (i >= PKVM_NR_MOVEABLE_REGS)
127 return -ENOMEM;
128
129 ret = of_address_to_resource(np, 0, &res);
130 if (ret)
131 return ret;
132
133 start = res.start;
134 size = resource_size(&res);
135 if (!PAGE_ALIGNED(start) || !PAGE_ALIGNED(size))
136 return -EINVAL;
137
138 moveable_regs[i].start = start;
139 moveable_regs[i].size = size;
140 moveable_regs[i].type = PKVM_MREG_PROTECTED_RANGE;
141 i++;
142 }
143
144 kvm_nvhe_sym(pkvm_moveable_regs_nr) = i;
145 sort_moveable_regs();
146
147 return 0;
148 }
149
kvm_hyp_reserve(void)150 void __init kvm_hyp_reserve(void)
151 {
152 u64 hyp_mem_pages = 0;
153 int ret;
154
155 if (!is_hyp_mode_available() || is_kernel_in_hyp_mode())
156 return;
157
158 if (kvm_get_mode() != KVM_MODE_PROTECTED)
159 return;
160
161 ret = register_memblock_regions();
162 if (ret) {
163 *hyp_memblock_nr_ptr = 0;
164 kvm_err("Failed to register hyp memblocks: %d\n", ret);
165 return;
166 }
167
168 ret = register_moveable_regions();
169 if (ret) {
170 *hyp_memblock_nr_ptr = 0;
171 kvm_err("Failed to register pkvm moveable regions: %d\n", ret);
172 return;
173 }
174
175 hyp_mem_pages += hyp_s1_pgtable_pages();
176 hyp_mem_pages += host_s2_pgtable_pages();
177 hyp_mem_pages += hyp_vm_table_pages();
178 hyp_mem_pages += hyp_vmemmap_pages(STRUCT_HYP_PAGE_SIZE);
179 hyp_mem_pages += hyp_ffa_proxy_pages();
180
181 /*
182 * Try to allocate a PMD-aligned region to reduce TLB pressure once
183 * this is unmapped from the host stage-2, and fallback to PAGE_SIZE.
184 */
185 hyp_mem_size = hyp_mem_pages << PAGE_SHIFT;
186 hyp_mem_base = memblock_phys_alloc(ALIGN(hyp_mem_size, PMD_SIZE),
187 PMD_SIZE);
188 if (!hyp_mem_base)
189 hyp_mem_base = memblock_phys_alloc(hyp_mem_size, PAGE_SIZE);
190 else
191 hyp_mem_size = ALIGN(hyp_mem_size, PMD_SIZE);
192
193 if (!hyp_mem_base) {
194 kvm_err("Failed to reserve hyp memory\n");
195 return;
196 }
197
198 kvm_info("Reserved %lld MiB at 0x%llx\n", hyp_mem_size >> 20,
199 hyp_mem_base);
200 }
201
202 /*
203 * Allocates and donates memory for hypervisor VM structs at EL2.
204 *
205 * Allocates space for the VM state, which includes the hyp vm as well as
206 * the hyp vcpus.
207 *
208 * Stores an opaque handler in the kvm struct for future reference.
209 *
210 * Return 0 on success, negative error code on failure.
211 */
__pkvm_create_hyp_vm(struct kvm * host_kvm)212 static int __pkvm_create_hyp_vm(struct kvm *host_kvm)
213 {
214 size_t pgd_sz, hyp_vm_sz, hyp_vcpu_sz, last_ran_sz, total_sz;
215 struct kvm_vcpu *host_vcpu;
216 pkvm_handle_t handle;
217 void *pgd, *hyp_vm, *last_ran;
218 unsigned long idx;
219 int ret;
220
221 if (host_kvm->created_vcpus < 1)
222 return -EINVAL;
223
224 pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.vtcr);
225
226 /*
227 * The PGD pages will be reclaimed using a hyp_memcache which implies
228 * page granularity. So, use alloc_pages_exact() to get individual
229 * refcounts.
230 */
231 pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT);
232 if (!pgd)
233 return -ENOMEM;
234
235 /* Allocate memory to donate to hyp for vm and vcpu pointers. */
236 hyp_vm_sz = PAGE_ALIGN(size_add(PKVM_HYP_VM_SIZE,
237 size_mul(sizeof(void *),
238 host_kvm->created_vcpus)));
239 hyp_vm = alloc_pages_exact(hyp_vm_sz, GFP_KERNEL_ACCOUNT);
240 if (!hyp_vm) {
241 ret = -ENOMEM;
242 goto free_pgd;
243 }
244
245 /* Allocate memory to donate to hyp for tracking mmu->last_vcpu_ran. */
246 last_ran_sz = PAGE_ALIGN(array_size(num_possible_cpus(), sizeof(int)));
247 last_ran = alloc_pages_exact(last_ran_sz, GFP_KERNEL_ACCOUNT);
248 if (!last_ran) {
249 ret = -ENOMEM;
250 goto free_vm;
251 }
252
253 /* Donate the VM memory to hyp and let hyp initialize it. */
254 ret = kvm_call_hyp_nvhe(__pkvm_init_vm, host_kvm, hyp_vm, pgd, last_ran);
255 if (ret < 0)
256 goto free_last_ran;
257
258 handle = ret;
259
260 host_kvm->arch.pkvm.handle = handle;
261
262 total_sz = hyp_vm_sz + last_ran_sz + pgd_sz;
263
264 /* Donate memory for the vcpus at hyp and initialize it. */
265 hyp_vcpu_sz = PAGE_ALIGN(PKVM_HYP_VCPU_SIZE);
266 kvm_for_each_vcpu(idx, host_vcpu, host_kvm) {
267 void *hyp_vcpu;
268
269 /* Indexing of the vcpus to be sequential starting at 0. */
270 if (WARN_ON(host_vcpu->vcpu_idx != idx)) {
271 ret = -EINVAL;
272 goto destroy_vm;
273 }
274
275 hyp_vcpu = alloc_pages_exact(hyp_vcpu_sz, GFP_KERNEL_ACCOUNT);
276 if (!hyp_vcpu) {
277 ret = -ENOMEM;
278 goto destroy_vm;
279 }
280
281 total_sz += hyp_vcpu_sz;
282
283 ret = kvm_call_hyp_nvhe(__pkvm_init_vcpu, handle, host_vcpu,
284 hyp_vcpu);
285 if (ret) {
286 free_pages_exact(hyp_vcpu, hyp_vcpu_sz);
287 goto destroy_vm;
288 }
289 }
290
291 atomic64_set(&host_kvm->stat.protected_hyp_mem, total_sz);
292 kvm_account_pgtable_pages(pgd, pgd_sz >> PAGE_SHIFT);
293
294 return 0;
295
296 destroy_vm:
297 pkvm_destroy_hyp_vm(host_kvm);
298 return ret;
299 free_last_ran:
300 free_pages_exact(last_ran, last_ran_sz);
301 free_vm:
302 free_pages_exact(hyp_vm, hyp_vm_sz);
303 free_pgd:
304 free_pages_exact(pgd, pgd_sz);
305 return ret;
306 }
307
pkvm_create_hyp_vm(struct kvm * host_kvm)308 int pkvm_create_hyp_vm(struct kvm *host_kvm)
309 {
310 int ret = 0;
311
312 mutex_lock(&host_kvm->lock);
313 if (!host_kvm->arch.pkvm.handle)
314 ret = __pkvm_create_hyp_vm(host_kvm);
315 mutex_unlock(&host_kvm->lock);
316
317 return ret;
318 }
319
pkvm_destroy_hyp_vm(struct kvm * host_kvm)320 void pkvm_destroy_hyp_vm(struct kvm *host_kvm)
321 {
322 struct kvm_pinned_page *ppage;
323 struct mm_struct *mm = current->mm;
324 struct rb_node *node;
325
326 if (!host_kvm->arch.pkvm.handle)
327 goto out_free;
328
329 WARN_ON(kvm_call_hyp_nvhe(__pkvm_start_teardown_vm, host_kvm->arch.pkvm.handle));
330
331 node = rb_first(&host_kvm->arch.pkvm.pinned_pages);
332 while (node) {
333 ppage = rb_entry(node, struct kvm_pinned_page, node);
334 WARN_ON(kvm_call_hyp_nvhe(__pkvm_reclaim_dying_guest_page,
335 host_kvm->arch.pkvm.handle,
336 page_to_pfn(ppage->page),
337 ppage->ipa));
338 cond_resched();
339
340 account_locked_vm(mm, 1, false);
341 unpin_user_pages_dirty_lock(&ppage->page, 1, true);
342 node = rb_next(node);
343 rb_erase(&ppage->node, &host_kvm->arch.pkvm.pinned_pages);
344 kfree(ppage);
345 }
346
347 WARN_ON(kvm_call_hyp_nvhe(__pkvm_finalize_teardown_vm, host_kvm->arch.pkvm.handle));
348
349 out_free:
350 host_kvm->arch.pkvm.handle = 0;
351 free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc, host_kvm);
352 free_hyp_stage2_memcache(&host_kvm->arch.pkvm.teardown_stage2_mc,
353 host_kvm);
354 }
355
pkvm_init_host_vm(struct kvm * host_kvm,unsigned long type)356 int pkvm_init_host_vm(struct kvm *host_kvm, unsigned long type)
357 {
358 mutex_init(&host_kvm->lock);
359
360 if (!(type & KVM_VM_TYPE_ARM_PROTECTED))
361 return 0;
362
363 if (!is_protected_kvm_enabled())
364 return -EINVAL;
365
366 host_kvm->arch.pkvm.pvmfw_load_addr = PVMFW_INVALID_LOAD_ADDR;
367 host_kvm->arch.pkvm.enabled = true;
368 return 0;
369 }
370
rb_ppage_cmp(const void * key,const struct rb_node * node)371 static int rb_ppage_cmp(const void *key, const struct rb_node *node)
372 {
373 struct kvm_pinned_page *p = container_of(node, struct kvm_pinned_page, node);
374 phys_addr_t ipa = (phys_addr_t)key;
375
376 return (ipa < p->ipa) ? -1 : (ipa > p->ipa);
377 }
378
pkvm_host_reclaim_page(struct kvm * host_kvm,phys_addr_t ipa)379 void pkvm_host_reclaim_page(struct kvm *host_kvm, phys_addr_t ipa)
380 {
381 struct kvm_pinned_page *ppage;
382 struct mm_struct *mm = current->mm;
383 struct rb_node *node;
384
385 write_lock(&host_kvm->mmu_lock);
386 node = rb_find((void *)ipa, &host_kvm->arch.pkvm.pinned_pages,
387 rb_ppage_cmp);
388 if (node)
389 rb_erase(node, &host_kvm->arch.pkvm.pinned_pages);
390 write_unlock(&host_kvm->mmu_lock);
391
392 WARN_ON(!node);
393 if (!node)
394 return;
395
396 ppage = container_of(node, struct kvm_pinned_page, node);
397 account_locked_vm(mm, 1, false);
398 unpin_user_pages_dirty_lock(&ppage->page, 1, true);
399 kfree(ppage);
400 }
401
pkvm_firmware_rmem_err(struct reserved_mem * rmem,const char * reason)402 static int __init pkvm_firmware_rmem_err(struct reserved_mem *rmem,
403 const char *reason)
404 {
405 phys_addr_t end = rmem->base + rmem->size;
406
407 kvm_err("Ignoring pkvm guest firmware memory reservation [%pa - %pa]: %s\n",
408 &rmem->base, &end, reason);
409 return -EINVAL;
410 }
411
pkvm_firmware_rmem_init(struct reserved_mem * rmem)412 static int __init pkvm_firmware_rmem_init(struct reserved_mem *rmem)
413 {
414 unsigned long node = rmem->fdt_node;
415
416 if (pkvm_firmware_mem)
417 return pkvm_firmware_rmem_err(rmem, "duplicate reservation");
418
419 if (!of_get_flat_dt_prop(node, "no-map", NULL))
420 return pkvm_firmware_rmem_err(rmem, "missing \"no-map\" property");
421
422 if (of_get_flat_dt_prop(node, "reusable", NULL))
423 return pkvm_firmware_rmem_err(rmem, "\"reusable\" property unsupported");
424
425 if (!PAGE_ALIGNED(rmem->base))
426 return pkvm_firmware_rmem_err(rmem, "base is not page-aligned");
427
428 if (!PAGE_ALIGNED(rmem->size))
429 return pkvm_firmware_rmem_err(rmem, "size is not page-aligned");
430
431 *pvmfw_size = rmem->size;
432 *pvmfw_base = rmem->base;
433 pkvm_firmware_mem = rmem;
434 return 0;
435 }
436 RESERVEDMEM_OF_DECLARE(pkvm_firmware, "linux,pkvm-guest-firmware-memory",
437 pkvm_firmware_rmem_init);
438
pkvm_firmware_rmem_clear(void)439 static int __init pkvm_firmware_rmem_clear(void)
440 {
441 void *addr;
442 phys_addr_t size;
443
444 if (likely(!pkvm_firmware_mem))
445 return 0;
446
447 kvm_info("Clearing unused pKVM firmware memory\n");
448 size = pkvm_firmware_mem->size;
449 addr = memremap(pkvm_firmware_mem->base, size, MEMREMAP_WB);
450 if (!addr)
451 return -EINVAL;
452
453 memset(addr, 0, size);
454 /* Clear so user space doesn't get stale info via IOCTL. */
455 pkvm_firmware_mem = NULL;
456
457 dcache_clean_poc((unsigned long)addr, (unsigned long)addr + size);
458 memunmap(addr);
459 return 0;
460 }
461
_kvm_host_prot_finalize(void * arg)462 static void _kvm_host_prot_finalize(void *arg)
463 {
464 int *err = arg;
465
466 if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize)))
467 WRITE_ONCE(*err, -EINVAL);
468 }
469
pkvm_drop_host_privileges(void)470 static int pkvm_drop_host_privileges(void)
471 {
472 int ret = 0;
473
474 /*
475 * Flip the static key upfront as that may no longer be possible
476 * once the host stage 2 is installed.
477 */
478 static_branch_enable(&kvm_protected_mode_initialized);
479
480 /*
481 * Fixup the boot mode so that we don't take spurious round
482 * trips via EL2 on cpu_resume. Flush to the PoC for a good
483 * measure, so that it can be observed by a CPU coming out of
484 * suspend with the MMU off.
485 */
486 __boot_cpu_mode[0] = __boot_cpu_mode[1] = BOOT_CPU_MODE_EL1;
487 dcache_clean_poc((unsigned long)__boot_cpu_mode,
488 (unsigned long)(__boot_cpu_mode + 2));
489
490 on_each_cpu(_kvm_host_prot_finalize, &ret, 1);
491 return ret;
492 }
493
finalize_pkvm(void)494 static int __init finalize_pkvm(void)
495 {
496 int ret;
497
498 if (!is_protected_kvm_enabled()) {
499 pkvm_firmware_rmem_clear();
500 return 0;
501 }
502
503 /*
504 * Modules can play an essential part in the pKVM protection. All of
505 * them must properly load to enable protected VMs.
506 */
507 if (pkvm_load_early_modules())
508 pkvm_firmware_rmem_clear();
509
510 /*
511 * Exclude HYP sections from kmemleak so that they don't get peeked
512 * at, which would end badly once inaccessible.
513 */
514 kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
515 kmemleak_free_part(__hyp_data_start, __hyp_data_end - __hyp_data_start);
516 kmemleak_free_part_phys(hyp_mem_base, hyp_mem_size);
517
518 flush_deferred_probe_now();
519
520 /* If no DMA protection. */
521 if (!pkvm_iommu_finalized())
522 pkvm_firmware_rmem_clear();
523
524 ret = pkvm_drop_host_privileges();
525 if (ret) {
526 pr_err("Failed to de-privilege the host kernel: %d\n", ret);
527 pkvm_firmware_rmem_clear();
528 }
529
530 #ifdef CONFIG_ANDROID_ARM64_WORKAROUND_DMA_BEYOND_POC
531 if (!ret)
532 ret = pkvm_register_early_nc_mappings();
533 #endif
534
535 return ret;
536 }
537 device_initcall_sync(finalize_pkvm);
538
pkvm_vm_ioctl_set_fw_ipa(struct kvm * kvm,u64 ipa)539 static int pkvm_vm_ioctl_set_fw_ipa(struct kvm *kvm, u64 ipa)
540 {
541 int ret = 0;
542
543 if (!pkvm_firmware_mem)
544 return -EINVAL;
545
546 mutex_lock(&kvm->lock);
547 if (kvm->arch.pkvm.handle) {
548 ret = -EBUSY;
549 goto out_unlock;
550 }
551
552 kvm->arch.pkvm.pvmfw_load_addr = ipa;
553 out_unlock:
554 mutex_unlock(&kvm->lock);
555 return ret;
556 }
557
pkvm_vm_ioctl_info(struct kvm * kvm,struct kvm_protected_vm_info __user * info)558 static int pkvm_vm_ioctl_info(struct kvm *kvm,
559 struct kvm_protected_vm_info __user *info)
560 {
561 struct kvm_protected_vm_info kinfo = {
562 .firmware_size = pkvm_firmware_mem ?
563 pkvm_firmware_mem->size :
564 0,
565 };
566
567 return copy_to_user(info, &kinfo, sizeof(kinfo)) ? -EFAULT : 0;
568 }
569
pkvm_vm_ioctl_enable_cap(struct kvm * kvm,struct kvm_enable_cap * cap)570 int pkvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
571 {
572 if (!kvm_vm_is_protected(kvm))
573 return -EINVAL;
574
575 if (cap->args[1] || cap->args[2] || cap->args[3])
576 return -EINVAL;
577
578 switch (cap->flags) {
579 case KVM_CAP_ARM_PROTECTED_VM_FLAGS_SET_FW_IPA:
580 return pkvm_vm_ioctl_set_fw_ipa(kvm, cap->args[0]);
581 case KVM_CAP_ARM_PROTECTED_VM_FLAGS_INFO:
582 return pkvm_vm_ioctl_info(kvm, (void __force __user *)cap->args[0]);
583 default:
584 return -EINVAL;
585 }
586
587 return 0;
588 }
589
590 #ifdef CONFIG_MODULES
591 static char early_pkvm_modules[COMMAND_LINE_SIZE] __initdata;
592
early_pkvm_modules_cfg(char * arg)593 static int __init early_pkvm_modules_cfg(char *arg)
594 {
595 /*
596 * Loading pKVM modules with kvm-arm.protected_modules is deprecated
597 * Use kvm-arm.protected_modules=<module1>,<module2>
598 */
599 if (!arg)
600 return -EINVAL;
601
602 strscpy(early_pkvm_modules, arg, COMMAND_LINE_SIZE);
603
604 return 0;
605 }
606 early_param("kvm-arm.protected_modules", early_pkvm_modules_cfg);
607
free_modprobe_argv(struct subprocess_info * info)608 static void free_modprobe_argv(struct subprocess_info *info)
609 {
610 kfree(info->argv);
611 }
612
613 /*
614 * Heavily inspired by request_module(). The latest couldn't be reused though as
615 * the feature can be disabled depending on umh configuration. Here some
616 * security is enforced by making sure this can be called only when pKVM is
617 * enabled, not yet completely initialized.
618 */
__pkvm_request_early_module(char * module_name,char * module_path)619 static int __init __pkvm_request_early_module(char *module_name,
620 char *module_path)
621 {
622 char *modprobe_path = CONFIG_MODPROBE_PATH;
623 struct subprocess_info *info;
624 static char *envp[] = {
625 "HOME=/",
626 "TERM=linux",
627 "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
628 NULL
629 };
630 char **argv;
631 int idx = 0;
632
633 if (!is_protected_kvm_enabled())
634 return -EACCES;
635
636 if (static_branch_likely(&kvm_protected_mode_initialized))
637 return -EACCES;
638
639 argv = kmalloc(sizeof(char *) * 7, GFP_KERNEL);
640 if (!argv)
641 return -ENOMEM;
642
643 argv[idx++] = modprobe_path;
644 argv[idx++] = "-q";
645 if (*module_path != '\0') {
646 argv[idx++] = "-d";
647 argv[idx++] = module_path;
648 }
649 argv[idx++] = "--";
650 argv[idx++] = module_name;
651 argv[idx++] = NULL;
652
653 info = call_usermodehelper_setup(modprobe_path, argv, envp, GFP_KERNEL,
654 NULL, free_modprobe_argv, NULL);
655 if (!info)
656 goto err;
657
658 /* Even with CONFIG_STATIC_USERMODEHELPER we really want this path */
659 info->path = modprobe_path;
660
661 return call_usermodehelper_exec(info, UMH_WAIT_PROC | UMH_KILLABLE);
662 err:
663 kfree(argv);
664
665 return -ENOMEM;
666 }
667
pkvm_request_early_module(char * module_name,char * module_path)668 static int __init pkvm_request_early_module(char *module_name, char *module_path)
669 {
670 int err = __pkvm_request_early_module(module_name, module_path);
671
672 if (!err)
673 return 0;
674
675 /* Already tried the default path */
676 if (*module_path == '\0')
677 return err;
678
679 pr_info("loading %s from %s failed, fallback to the default path\n",
680 module_name, module_path);
681
682 return __pkvm_request_early_module(module_name, "");
683 }
684
pkvm_load_early_modules(void)685 int __init pkvm_load_early_modules(void)
686 {
687 char *token, *buf = early_pkvm_modules;
688 char *module_path = CONFIG_PKVM_MODULE_PATH;
689 int err = init_mount("proc", "/proc", "proc",
690 MS_SILENT | MS_NOEXEC | MS_NOSUID, NULL);
691
692 if (err)
693 return err;
694
695 while (true) {
696 token = strsep(&buf, ",");
697
698 if (!token)
699 break;
700
701 if (*token) {
702 err = pkvm_request_early_module(token, module_path);
703 if (err) {
704 pr_err("Failed to load pkvm module %s: %d\n",
705 token, err);
706 return err;
707 }
708 }
709
710 if (buf)
711 *(buf - 1) = ',';
712 }
713
714 return 0;
715 }
716
717 struct pkvm_mod_sec_mapping {
718 struct pkvm_module_section *sec;
719 enum kvm_pgtable_prot prot;
720 };
721
pkvm_unmap_module_pages(void * kern_va,void * hyp_va,size_t size)722 static void pkvm_unmap_module_pages(void *kern_va, void *hyp_va, size_t size)
723 {
724 size_t offset;
725 u64 pfn;
726
727 for (offset = 0; offset < size; offset += PAGE_SIZE) {
728 pfn = vmalloc_to_pfn(kern_va + offset);
729 kvm_call_hyp_nvhe(__pkvm_unmap_module_page, pfn,
730 hyp_va + offset);
731 }
732 }
733
pkvm_unmap_module_sections(struct pkvm_mod_sec_mapping * secs_map,void * hyp_va_base,int nr_secs)734 static void pkvm_unmap_module_sections(struct pkvm_mod_sec_mapping *secs_map, void *hyp_va_base, int nr_secs)
735 {
736 size_t offset, size;
737 void *start;
738 int i;
739
740 for (i = 0; i < nr_secs; i++) {
741 start = secs_map[i].sec->start;
742 size = secs_map[i].sec->end - start;
743 offset = start - secs_map[0].sec->start;
744 pkvm_unmap_module_pages(start, hyp_va_base + offset, size);
745 }
746 }
747
pkvm_map_module_section(struct pkvm_mod_sec_mapping * sec_map,void * hyp_va)748 static int pkvm_map_module_section(struct pkvm_mod_sec_mapping *sec_map, void *hyp_va)
749 {
750 size_t offset, size = sec_map->sec->end - sec_map->sec->start;
751 int ret;
752 u64 pfn;
753
754 for (offset = 0; offset < size; offset += PAGE_SIZE) {
755 pfn = vmalloc_to_pfn(sec_map->sec->start + offset);
756 ret = kvm_call_hyp_nvhe(__pkvm_map_module_page, pfn,
757 hyp_va + offset, sec_map->prot);
758 if (ret) {
759 pkvm_unmap_module_pages(sec_map->sec->start, hyp_va, offset);
760 return ret;
761 }
762 }
763
764 return 0;
765 }
766
pkvm_map_module_sections(struct pkvm_mod_sec_mapping * secs_map,void * hyp_va_base,int nr_secs)767 static int pkvm_map_module_sections(struct pkvm_mod_sec_mapping *secs_map, void *hyp_va_base, int nr_secs)
768 {
769 size_t offset;
770 int i, ret;
771
772 for (i = 0; i < nr_secs; i++) {
773 offset = secs_map[i].sec->start - secs_map[0].sec->start;
774 ret = pkvm_map_module_section(&secs_map[i], hyp_va_base + offset);
775 if (ret) {
776 pkvm_unmap_module_sections(secs_map, hyp_va_base, i);
777 return ret;
778 }
779 }
780
781 return 0;
782 }
783
__pkvm_cmp_mod_sec(const void * p1,const void * p2)784 static int __pkvm_cmp_mod_sec(const void *p1, const void *p2)
785 {
786 struct pkvm_mod_sec_mapping const *s1 = p1;
787 struct pkvm_mod_sec_mapping const *s2 = p2;
788
789 return s1->sec->start < s2->sec->start ? -1 : s1->sec->start > s2->sec->start;
790 }
791
__pkvm_load_el2_module(struct module * this,unsigned long * token)792 int __pkvm_load_el2_module(struct module *this, unsigned long *token)
793 {
794 struct pkvm_el2_module *mod = &this->arch.hyp;
795 struct pkvm_mod_sec_mapping secs_map[] = {
796 { &mod->text, KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_X },
797 { &mod->bss, KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W },
798 { &mod->rodata, KVM_PGTABLE_PROT_R },
799 { &mod->data, KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W },
800 };
801 void *start, *end, *hyp_va;
802 struct arm_smccc_res res;
803 kvm_nvhe_reloc_t *endrel;
804 int ret, i, secs_first;
805 size_t offset, size;
806
807 /* The pKVM hyp only allows loading before it is fully initialized */
808 if (!is_protected_kvm_enabled() || is_pkvm_initialized())
809 return -EOPNOTSUPP;
810
811 for (i = 0; i < ARRAY_SIZE(secs_map); i++) {
812 if (!PAGE_ALIGNED(secs_map[i].sec->start)) {
813 kvm_err("EL2 sections are not page-aligned\n");
814 return -EINVAL;
815 }
816 }
817
818 if (!try_module_get(this)) {
819 kvm_err("Kernel module has been unloaded\n");
820 return -ENODEV;
821 }
822
823 /* Missing or empty module sections are placed first */
824 sort(secs_map, ARRAY_SIZE(secs_map), sizeof(secs_map[0]), __pkvm_cmp_mod_sec, NULL);
825 for (secs_first = 0; secs_first < ARRAY_SIZE(secs_map); secs_first++) {
826 start = secs_map[secs_first].sec->start;
827 if (start)
828 break;
829 }
830 end = secs_map[ARRAY_SIZE(secs_map) - 1].sec->end;
831 size = end - start;
832
833 arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__pkvm_alloc_module_va),
834 size >> PAGE_SHIFT, &res);
835 if (res.a0 != SMCCC_RET_SUCCESS || !res.a1) {
836 kvm_err("Failed to allocate hypervisor VA space for EL2 module\n");
837 module_put(this);
838 return res.a0 == SMCCC_RET_SUCCESS ? -ENOMEM : -EPERM;
839 }
840 hyp_va = (void *)res.a1;
841
842 /*
843 * The token can be used for other calls related to this module.
844 * Conveniently the only information needed is this addr so let's use it
845 * as an identifier.
846 */
847 if (token)
848 *token = (unsigned long)hyp_va;
849
850 endrel = (void *)mod->relocs + mod->nr_relocs * sizeof(*endrel);
851 kvm_apply_hyp_module_relocations(start, hyp_va, mod->relocs, endrel);
852
853 /*
854 * Exclude EL2 module sections from kmemleak before making them
855 * inaccessible.
856 */
857 kmemleak_free_part(start, size);
858
859 ret = pkvm_map_module_sections(secs_map + secs_first, hyp_va,
860 ARRAY_SIZE(secs_map) - secs_first);
861 if (ret) {
862 kvm_err("Failed to map EL2 module page: %d\n", ret);
863 module_put(this);
864 return ret;
865 }
866
867 offset = (size_t)((void *)mod->init - start);
868 ret = kvm_call_hyp_nvhe(__pkvm_init_module, hyp_va + offset);
869 if (ret) {
870 kvm_err("Failed to init EL2 module: %d\n", ret);
871 pkvm_unmap_module_sections(secs_map, hyp_va, ARRAY_SIZE(secs_map));
872 module_put(this);
873 return ret;
874 }
875
876 return 0;
877 }
878 EXPORT_SYMBOL(__pkvm_load_el2_module);
879
__pkvm_register_el2_call(unsigned long hfn_hyp_va)880 int __pkvm_register_el2_call(unsigned long hfn_hyp_va)
881 {
882 return kvm_call_hyp_nvhe(__pkvm_register_hcall, hfn_hyp_va);
883 }
884 EXPORT_SYMBOL(__pkvm_register_el2_call);
885 #endif /* CONFIG_MODULES */
886