• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2020 - Google LLC
4  * Author: Quentin Perret <qperret@google.com>
5  */
6 
7 #include <linux/io.h>
8 #include <linux/kmemleak.h>
9 #include <linux/kvm_host.h>
10 #include <linux/memblock.h>
11 #include <linux/mm.h>
12 #include <linux/mutex.h>
13 #include <linux/of_address.h>
14 #include <linux/of_fdt.h>
15 #include <linux/of_reserved_mem.h>
16 #include <linux/sort.h>
17 #include <linux/stat.h>
18 
19 #include <asm/kvm_hyp.h>
20 #include <asm/kvm_mmu.h>
21 #include <asm/kvm_pkvm.h>
22 #include <asm/kvm_pkvm_module.h>
23 #include <asm/setup.h>
24 
25 #include <uapi/linux/mount.h>
26 #include <linux/init_syscalls.h>
27 
28 #include "hyp_constants.h"
29 
30 DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
31 
32 static struct reserved_mem *pkvm_firmware_mem;
33 static phys_addr_t *pvmfw_base = &kvm_nvhe_sym(pvmfw_base);
34 static phys_addr_t *pvmfw_size = &kvm_nvhe_sym(pvmfw_size);
35 
36 static struct pkvm_moveable_reg *moveable_regs = kvm_nvhe_sym(pkvm_moveable_regs);
37 static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory);
38 static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr);
39 
40 phys_addr_t hyp_mem_base;
41 phys_addr_t hyp_mem_size;
42 
cmp_hyp_memblock(const void * p1,const void * p2)43 static int cmp_hyp_memblock(const void *p1, const void *p2)
44 {
45 	const struct memblock_region *r1 = p1;
46 	const struct memblock_region *r2 = p2;
47 
48 	return r1->base < r2->base ? -1 : (r1->base > r2->base);
49 }
50 
sort_memblock_regions(void)51 static void __init sort_memblock_regions(void)
52 {
53 	sort(hyp_memory,
54 	     *hyp_memblock_nr_ptr,
55 	     sizeof(struct memblock_region),
56 	     cmp_hyp_memblock,
57 	     NULL);
58 }
59 
register_memblock_regions(void)60 static int __init register_memblock_regions(void)
61 {
62 	struct memblock_region *reg;
63 
64 	for_each_mem_region(reg) {
65 		if (*hyp_memblock_nr_ptr >= HYP_MEMBLOCK_REGIONS)
66 			return -ENOMEM;
67 
68 		hyp_memory[*hyp_memblock_nr_ptr] = *reg;
69 		(*hyp_memblock_nr_ptr)++;
70 	}
71 	sort_memblock_regions();
72 
73 	return 0;
74 }
75 
cmp_moveable_reg(const void * p1,const void * p2)76 static int cmp_moveable_reg(const void *p1, const void *p2)
77 {
78 	const struct pkvm_moveable_reg *r1 = p1;
79 	const struct pkvm_moveable_reg *r2 = p2;
80 
81 	/*
82 	 * Moveable regions may overlap, so put the largest one first when start
83 	 * addresses are equal to allow a simpler walk from e.g.
84 	 * host_stage2_unmap_unmoveable_regs().
85 	 */
86 	if (r1->start < r2->start)
87 		return -1;
88 	else if (r1->start > r2->start)
89 		return 1;
90 	else if (r1->size > r2->size)
91 		return -1;
92 	else if (r1->size < r2->size)
93 		return 1;
94 	return 0;
95 }
96 
sort_moveable_regs(void)97 static void __init sort_moveable_regs(void)
98 {
99 	sort(moveable_regs,
100 	     kvm_nvhe_sym(pkvm_moveable_regs_nr),
101 	     sizeof(struct pkvm_moveable_reg),
102 	     cmp_moveable_reg,
103 	     NULL);
104 }
105 
register_moveable_regions(void)106 static int __init register_moveable_regions(void)
107 {
108 	struct memblock_region *reg;
109 	struct device_node *np;
110 	int i = 0;
111 
112 	for_each_mem_region(reg) {
113 		if (i >= PKVM_NR_MOVEABLE_REGS)
114 			return -ENOMEM;
115 		moveable_regs[i].start = reg->base;
116 		moveable_regs[i].size = reg->size;
117 		moveable_regs[i].type = PKVM_MREG_MEMORY;
118 		i++;
119 	}
120 
121 	for_each_compatible_node(np, NULL, "pkvm,protected-region") {
122 		struct resource res;
123 		u64 start, size;
124 		int ret;
125 
126 		if (i >= PKVM_NR_MOVEABLE_REGS)
127 			return -ENOMEM;
128 
129 		ret = of_address_to_resource(np, 0, &res);
130 		if (ret)
131 			return ret;
132 
133 		start = res.start;
134 		size = resource_size(&res);
135 		if (!PAGE_ALIGNED(start) || !PAGE_ALIGNED(size))
136 			return -EINVAL;
137 
138 		moveable_regs[i].start = start;
139 		moveable_regs[i].size = size;
140 		moveable_regs[i].type = PKVM_MREG_PROTECTED_RANGE;
141 		i++;
142 	}
143 
144 	kvm_nvhe_sym(pkvm_moveable_regs_nr) = i;
145 	sort_moveable_regs();
146 
147 	return 0;
148 }
149 
kvm_hyp_reserve(void)150 void __init kvm_hyp_reserve(void)
151 {
152 	u64 hyp_mem_pages = 0;
153 	int ret;
154 
155 	if (!is_hyp_mode_available() || is_kernel_in_hyp_mode())
156 		return;
157 
158 	if (kvm_get_mode() != KVM_MODE_PROTECTED)
159 		return;
160 
161 	ret = register_memblock_regions();
162 	if (ret) {
163 		*hyp_memblock_nr_ptr = 0;
164 		kvm_err("Failed to register hyp memblocks: %d\n", ret);
165 		return;
166 	}
167 
168 	ret = register_moveable_regions();
169 	if (ret) {
170 		*hyp_memblock_nr_ptr = 0;
171 		kvm_err("Failed to register pkvm moveable regions: %d\n", ret);
172 		return;
173 	}
174 
175 	hyp_mem_pages += hyp_s1_pgtable_pages();
176 	hyp_mem_pages += host_s2_pgtable_pages();
177 	hyp_mem_pages += hyp_vm_table_pages();
178 	hyp_mem_pages += hyp_vmemmap_pages(STRUCT_HYP_PAGE_SIZE);
179 	hyp_mem_pages += hyp_ffa_proxy_pages();
180 
181 	/*
182 	 * Try to allocate a PMD-aligned region to reduce TLB pressure once
183 	 * this is unmapped from the host stage-2, and fallback to PAGE_SIZE.
184 	 */
185 	hyp_mem_size = hyp_mem_pages << PAGE_SHIFT;
186 	hyp_mem_base = memblock_phys_alloc(ALIGN(hyp_mem_size, PMD_SIZE),
187 					   PMD_SIZE);
188 	if (!hyp_mem_base)
189 		hyp_mem_base = memblock_phys_alloc(hyp_mem_size, PAGE_SIZE);
190 	else
191 		hyp_mem_size = ALIGN(hyp_mem_size, PMD_SIZE);
192 
193 	if (!hyp_mem_base) {
194 		kvm_err("Failed to reserve hyp memory\n");
195 		return;
196 	}
197 
198 	kvm_info("Reserved %lld MiB at 0x%llx\n", hyp_mem_size >> 20,
199 		 hyp_mem_base);
200 }
201 
202 /*
203  * Allocates and donates memory for hypervisor VM structs at EL2.
204  *
205  * Allocates space for the VM state, which includes the hyp vm as well as
206  * the hyp vcpus.
207  *
208  * Stores an opaque handler in the kvm struct for future reference.
209  *
210  * Return 0 on success, negative error code on failure.
211  */
__pkvm_create_hyp_vm(struct kvm * host_kvm)212 static int __pkvm_create_hyp_vm(struct kvm *host_kvm)
213 {
214 	size_t pgd_sz, hyp_vm_sz, hyp_vcpu_sz, last_ran_sz, total_sz;
215 	struct kvm_vcpu *host_vcpu;
216 	pkvm_handle_t handle;
217 	void *pgd, *hyp_vm, *last_ran;
218 	unsigned long idx;
219 	int ret;
220 
221 	if (host_kvm->created_vcpus < 1)
222 		return -EINVAL;
223 
224 	pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.vtcr);
225 
226 	/*
227 	 * The PGD pages will be reclaimed using a hyp_memcache which implies
228 	 * page granularity. So, use alloc_pages_exact() to get individual
229 	 * refcounts.
230 	 */
231 	pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT);
232 	if (!pgd)
233 		return -ENOMEM;
234 
235 	/* Allocate memory to donate to hyp for vm and vcpu pointers. */
236 	hyp_vm_sz = PAGE_ALIGN(size_add(PKVM_HYP_VM_SIZE,
237 					size_mul(sizeof(void *),
238 						 host_kvm->created_vcpus)));
239 	hyp_vm = alloc_pages_exact(hyp_vm_sz, GFP_KERNEL_ACCOUNT);
240 	if (!hyp_vm) {
241 		ret = -ENOMEM;
242 		goto free_pgd;
243 	}
244 
245 	/* Allocate memory to donate to hyp for tracking mmu->last_vcpu_ran. */
246 	last_ran_sz = PAGE_ALIGN(array_size(num_possible_cpus(), sizeof(int)));
247 	last_ran = alloc_pages_exact(last_ran_sz, GFP_KERNEL_ACCOUNT);
248 	if (!last_ran) {
249 		ret = -ENOMEM;
250 		goto free_vm;
251 	}
252 
253 	/* Donate the VM memory to hyp and let hyp initialize it. */
254 	ret = kvm_call_hyp_nvhe(__pkvm_init_vm, host_kvm, hyp_vm, pgd, last_ran);
255 	if (ret < 0)
256 		goto free_last_ran;
257 
258 	handle = ret;
259 
260 	host_kvm->arch.pkvm.handle = handle;
261 
262 	total_sz = hyp_vm_sz + last_ran_sz + pgd_sz;
263 
264 	/* Donate memory for the vcpus at hyp and initialize it. */
265 	hyp_vcpu_sz = PAGE_ALIGN(PKVM_HYP_VCPU_SIZE);
266 	kvm_for_each_vcpu(idx, host_vcpu, host_kvm) {
267 		void *hyp_vcpu;
268 
269 		/* Indexing of the vcpus to be sequential starting at 0. */
270 		if (WARN_ON(host_vcpu->vcpu_idx != idx)) {
271 			ret = -EINVAL;
272 			goto destroy_vm;
273 		}
274 
275 		hyp_vcpu = alloc_pages_exact(hyp_vcpu_sz, GFP_KERNEL_ACCOUNT);
276 		if (!hyp_vcpu) {
277 			ret = -ENOMEM;
278 			goto destroy_vm;
279 		}
280 
281 		total_sz += hyp_vcpu_sz;
282 
283 		ret = kvm_call_hyp_nvhe(__pkvm_init_vcpu, handle, host_vcpu,
284 					hyp_vcpu);
285 		if (ret) {
286 			free_pages_exact(hyp_vcpu, hyp_vcpu_sz);
287 			goto destroy_vm;
288 		}
289 	}
290 
291 	atomic64_set(&host_kvm->stat.protected_hyp_mem, total_sz);
292 	kvm_account_pgtable_pages(pgd, pgd_sz >> PAGE_SHIFT);
293 
294 	return 0;
295 
296 destroy_vm:
297 	pkvm_destroy_hyp_vm(host_kvm);
298 	return ret;
299 free_last_ran:
300 	free_pages_exact(last_ran, last_ran_sz);
301 free_vm:
302 	free_pages_exact(hyp_vm, hyp_vm_sz);
303 free_pgd:
304 	free_pages_exact(pgd, pgd_sz);
305 	return ret;
306 }
307 
pkvm_create_hyp_vm(struct kvm * host_kvm)308 int pkvm_create_hyp_vm(struct kvm *host_kvm)
309 {
310 	int ret = 0;
311 
312 	mutex_lock(&host_kvm->lock);
313 	if (!host_kvm->arch.pkvm.handle)
314 		ret = __pkvm_create_hyp_vm(host_kvm);
315 	mutex_unlock(&host_kvm->lock);
316 
317 	return ret;
318 }
319 
pkvm_destroy_hyp_vm(struct kvm * host_kvm)320 void pkvm_destroy_hyp_vm(struct kvm *host_kvm)
321 {
322 	struct kvm_pinned_page *ppage;
323 	struct mm_struct *mm = current->mm;
324 	struct rb_node *node;
325 
326 	if (!host_kvm->arch.pkvm.handle)
327 		goto out_free;
328 
329 	WARN_ON(kvm_call_hyp_nvhe(__pkvm_start_teardown_vm, host_kvm->arch.pkvm.handle));
330 
331 	node = rb_first(&host_kvm->arch.pkvm.pinned_pages);
332 	while (node) {
333 		ppage = rb_entry(node, struct kvm_pinned_page, node);
334 		WARN_ON(kvm_call_hyp_nvhe(__pkvm_reclaim_dying_guest_page,
335 					  host_kvm->arch.pkvm.handle,
336 					  page_to_pfn(ppage->page),
337 					  ppage->ipa));
338 		cond_resched();
339 
340 		account_locked_vm(mm, 1, false);
341 		unpin_user_pages_dirty_lock(&ppage->page, 1, true);
342 		node = rb_next(node);
343 		rb_erase(&ppage->node, &host_kvm->arch.pkvm.pinned_pages);
344 		kfree(ppage);
345 	}
346 
347 	WARN_ON(kvm_call_hyp_nvhe(__pkvm_finalize_teardown_vm, host_kvm->arch.pkvm.handle));
348 
349 out_free:
350 	host_kvm->arch.pkvm.handle = 0;
351 	free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc, host_kvm);
352 	free_hyp_stage2_memcache(&host_kvm->arch.pkvm.teardown_stage2_mc,
353 				 host_kvm);
354 }
355 
pkvm_init_host_vm(struct kvm * host_kvm,unsigned long type)356 int pkvm_init_host_vm(struct kvm *host_kvm, unsigned long type)
357 {
358 	mutex_init(&host_kvm->lock);
359 
360 	if (!(type & KVM_VM_TYPE_ARM_PROTECTED))
361 		return 0;
362 
363 	if (!is_protected_kvm_enabled())
364 		return -EINVAL;
365 
366 	host_kvm->arch.pkvm.pvmfw_load_addr = PVMFW_INVALID_LOAD_ADDR;
367 	host_kvm->arch.pkvm.enabled = true;
368 	return 0;
369 }
370 
rb_ppage_cmp(const void * key,const struct rb_node * node)371 static int rb_ppage_cmp(const void *key, const struct rb_node *node)
372 {
373        struct kvm_pinned_page *p = container_of(node, struct kvm_pinned_page, node);
374        phys_addr_t ipa = (phys_addr_t)key;
375 
376        return (ipa < p->ipa) ? -1 : (ipa > p->ipa);
377 }
378 
pkvm_host_reclaim_page(struct kvm * host_kvm,phys_addr_t ipa)379 void pkvm_host_reclaim_page(struct kvm *host_kvm, phys_addr_t ipa)
380 {
381 	struct kvm_pinned_page *ppage;
382 	struct mm_struct *mm = current->mm;
383 	struct rb_node *node;
384 
385 	write_lock(&host_kvm->mmu_lock);
386 	node = rb_find((void *)ipa, &host_kvm->arch.pkvm.pinned_pages,
387 		       rb_ppage_cmp);
388 	if (node)
389 		rb_erase(node, &host_kvm->arch.pkvm.pinned_pages);
390 	write_unlock(&host_kvm->mmu_lock);
391 
392 	WARN_ON(!node);
393 	if (!node)
394 		return;
395 
396 	ppage = container_of(node, struct kvm_pinned_page, node);
397 	account_locked_vm(mm, 1, false);
398 	unpin_user_pages_dirty_lock(&ppage->page, 1, true);
399 	kfree(ppage);
400 }
401 
pkvm_firmware_rmem_err(struct reserved_mem * rmem,const char * reason)402 static int __init pkvm_firmware_rmem_err(struct reserved_mem *rmem,
403 					 const char *reason)
404 {
405 	phys_addr_t end = rmem->base + rmem->size;
406 
407 	kvm_err("Ignoring pkvm guest firmware memory reservation [%pa - %pa]: %s\n",
408 		&rmem->base, &end, reason);
409 	return -EINVAL;
410 }
411 
pkvm_firmware_rmem_init(struct reserved_mem * rmem)412 static int __init pkvm_firmware_rmem_init(struct reserved_mem *rmem)
413 {
414 	unsigned long node = rmem->fdt_node;
415 
416 	if (pkvm_firmware_mem)
417 		return pkvm_firmware_rmem_err(rmem, "duplicate reservation");
418 
419 	if (!of_get_flat_dt_prop(node, "no-map", NULL))
420 		return pkvm_firmware_rmem_err(rmem, "missing \"no-map\" property");
421 
422 	if (of_get_flat_dt_prop(node, "reusable", NULL))
423 		return pkvm_firmware_rmem_err(rmem, "\"reusable\" property unsupported");
424 
425 	if (!PAGE_ALIGNED(rmem->base))
426 		return pkvm_firmware_rmem_err(rmem, "base is not page-aligned");
427 
428 	if (!PAGE_ALIGNED(rmem->size))
429 		return pkvm_firmware_rmem_err(rmem, "size is not page-aligned");
430 
431 	*pvmfw_size = rmem->size;
432 	*pvmfw_base = rmem->base;
433 	pkvm_firmware_mem = rmem;
434 	return 0;
435 }
436 RESERVEDMEM_OF_DECLARE(pkvm_firmware, "linux,pkvm-guest-firmware-memory",
437 		       pkvm_firmware_rmem_init);
438 
pkvm_firmware_rmem_clear(void)439 static int __init pkvm_firmware_rmem_clear(void)
440 {
441 	void *addr;
442 	phys_addr_t size;
443 
444 	if (likely(!pkvm_firmware_mem))
445 		return 0;
446 
447 	kvm_info("Clearing unused pKVM firmware memory\n");
448 	size = pkvm_firmware_mem->size;
449 	addr = memremap(pkvm_firmware_mem->base, size, MEMREMAP_WB);
450 	if (!addr)
451 		return -EINVAL;
452 
453 	memset(addr, 0, size);
454 	/* Clear so user space doesn't get stale info via IOCTL. */
455 	pkvm_firmware_mem = NULL;
456 
457 	dcache_clean_poc((unsigned long)addr, (unsigned long)addr + size);
458 	memunmap(addr);
459 	return 0;
460 }
461 
_kvm_host_prot_finalize(void * arg)462 static void _kvm_host_prot_finalize(void *arg)
463 {
464 	int *err = arg;
465 
466 	if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize)))
467 		WRITE_ONCE(*err, -EINVAL);
468 }
469 
pkvm_drop_host_privileges(void)470 static int pkvm_drop_host_privileges(void)
471 {
472 	int ret = 0;
473 
474 	/*
475 	 * Flip the static key upfront as that may no longer be possible
476 	 * once the host stage 2 is installed.
477 	 */
478 	static_branch_enable(&kvm_protected_mode_initialized);
479 
480 	/*
481 	 * Fixup the boot mode so that we don't take spurious round
482 	 * trips via EL2 on cpu_resume. Flush to the PoC for a good
483 	 * measure, so that it can be observed by a CPU coming out of
484 	 * suspend with the MMU off.
485 	 */
486 	__boot_cpu_mode[0] = __boot_cpu_mode[1] = BOOT_CPU_MODE_EL1;
487 	dcache_clean_poc((unsigned long)__boot_cpu_mode,
488 			 (unsigned long)(__boot_cpu_mode + 2));
489 
490 	on_each_cpu(_kvm_host_prot_finalize, &ret, 1);
491 	return ret;
492 }
493 
finalize_pkvm(void)494 static int __init finalize_pkvm(void)
495 {
496 	int ret;
497 
498 	if (!is_protected_kvm_enabled()) {
499 		pkvm_firmware_rmem_clear();
500 		return 0;
501 	}
502 
503 	/*
504 	 * Modules can play an essential part in the pKVM protection. All of
505 	 * them must properly load to enable protected VMs.
506 	 */
507 	if (pkvm_load_early_modules())
508 		pkvm_firmware_rmem_clear();
509 
510 	/*
511 	 * Exclude HYP sections from kmemleak so that they don't get peeked
512 	 * at, which would end badly once inaccessible.
513 	 */
514 	kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
515 	kmemleak_free_part(__hyp_data_start, __hyp_data_end - __hyp_data_start);
516 	kmemleak_free_part_phys(hyp_mem_base, hyp_mem_size);
517 
518 	flush_deferred_probe_now();
519 
520 	/* If no DMA protection. */
521 	if (!pkvm_iommu_finalized())
522 		pkvm_firmware_rmem_clear();
523 
524 	ret = pkvm_drop_host_privileges();
525 	if (ret) {
526 		pr_err("Failed to de-privilege the host kernel: %d\n", ret);
527 		pkvm_firmware_rmem_clear();
528 	}
529 
530 #ifdef CONFIG_ANDROID_ARM64_WORKAROUND_DMA_BEYOND_POC
531 	if (!ret)
532 		ret = pkvm_register_early_nc_mappings();
533 #endif
534 
535 	return ret;
536 }
537 device_initcall_sync(finalize_pkvm);
538 
pkvm_vm_ioctl_set_fw_ipa(struct kvm * kvm,u64 ipa)539 static int pkvm_vm_ioctl_set_fw_ipa(struct kvm *kvm, u64 ipa)
540 {
541 	int ret = 0;
542 
543 	if (!pkvm_firmware_mem)
544 		return -EINVAL;
545 
546 	mutex_lock(&kvm->lock);
547 	if (kvm->arch.pkvm.handle) {
548 		ret = -EBUSY;
549 		goto out_unlock;
550 	}
551 
552 	kvm->arch.pkvm.pvmfw_load_addr = ipa;
553 out_unlock:
554 	mutex_unlock(&kvm->lock);
555 	return ret;
556 }
557 
pkvm_vm_ioctl_info(struct kvm * kvm,struct kvm_protected_vm_info __user * info)558 static int pkvm_vm_ioctl_info(struct kvm *kvm,
559 			      struct kvm_protected_vm_info __user *info)
560 {
561 	struct kvm_protected_vm_info kinfo = {
562 		.firmware_size = pkvm_firmware_mem ?
563 				 pkvm_firmware_mem->size :
564 				 0,
565 	};
566 
567 	return copy_to_user(info, &kinfo, sizeof(kinfo)) ? -EFAULT : 0;
568 }
569 
pkvm_vm_ioctl_enable_cap(struct kvm * kvm,struct kvm_enable_cap * cap)570 int pkvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
571 {
572 	if (!kvm_vm_is_protected(kvm))
573 		return -EINVAL;
574 
575 	if (cap->args[1] || cap->args[2] || cap->args[3])
576 		return -EINVAL;
577 
578 	switch (cap->flags) {
579 	case KVM_CAP_ARM_PROTECTED_VM_FLAGS_SET_FW_IPA:
580 		return pkvm_vm_ioctl_set_fw_ipa(kvm, cap->args[0]);
581 	case KVM_CAP_ARM_PROTECTED_VM_FLAGS_INFO:
582 		return pkvm_vm_ioctl_info(kvm, (void __force __user *)cap->args[0]);
583 	default:
584 		return -EINVAL;
585 	}
586 
587 	return 0;
588 }
589 
590 #ifdef CONFIG_MODULES
591 static char early_pkvm_modules[COMMAND_LINE_SIZE] __initdata;
592 
early_pkvm_modules_cfg(char * arg)593 static int __init early_pkvm_modules_cfg(char *arg)
594 {
595 	/*
596 	 * Loading pKVM modules with kvm-arm.protected_modules is deprecated
597 	 * Use kvm-arm.protected_modules=<module1>,<module2>
598 	 */
599 	if (!arg)
600 		return -EINVAL;
601 
602 	strscpy(early_pkvm_modules, arg, COMMAND_LINE_SIZE);
603 
604 	return 0;
605 }
606 early_param("kvm-arm.protected_modules", early_pkvm_modules_cfg);
607 
free_modprobe_argv(struct subprocess_info * info)608 static void free_modprobe_argv(struct subprocess_info *info)
609 {
610 	kfree(info->argv);
611 }
612 
613 /*
614  * Heavily inspired by request_module(). The latest couldn't be reused though as
615  * the feature can be disabled depending on umh configuration. Here some
616  * security is enforced by making sure this can be called only when pKVM is
617  * enabled, not yet completely initialized.
618  */
__pkvm_request_early_module(char * module_name,char * module_path)619 static int __init __pkvm_request_early_module(char *module_name,
620 					      char *module_path)
621 {
622 	char *modprobe_path = CONFIG_MODPROBE_PATH;
623 	struct subprocess_info *info;
624 	static char *envp[] = {
625 		"HOME=/",
626 		"TERM=linux",
627 		"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
628 		NULL
629 	};
630 	char **argv;
631 	int idx = 0;
632 
633 	if (!is_protected_kvm_enabled())
634 		return -EACCES;
635 
636 	if (static_branch_likely(&kvm_protected_mode_initialized))
637 		return -EACCES;
638 
639 	argv = kmalloc(sizeof(char *) * 7, GFP_KERNEL);
640 	if (!argv)
641 		return -ENOMEM;
642 
643 	argv[idx++] = modprobe_path;
644 	argv[idx++] = "-q";
645 	if (*module_path != '\0') {
646 		argv[idx++] = "-d";
647 		argv[idx++] = module_path;
648 	}
649 	argv[idx++] = "--";
650 	argv[idx++] = module_name;
651 	argv[idx++] = NULL;
652 
653 	info = call_usermodehelper_setup(modprobe_path, argv, envp, GFP_KERNEL,
654 					 NULL, free_modprobe_argv, NULL);
655 	if (!info)
656 		goto err;
657 
658 	/* Even with CONFIG_STATIC_USERMODEHELPER we really want this path */
659 	info->path = modprobe_path;
660 
661 	return call_usermodehelper_exec(info, UMH_WAIT_PROC | UMH_KILLABLE);
662 err:
663 	kfree(argv);
664 
665 	return -ENOMEM;
666 }
667 
pkvm_request_early_module(char * module_name,char * module_path)668 static int __init pkvm_request_early_module(char *module_name, char *module_path)
669 {
670 	int err = __pkvm_request_early_module(module_name, module_path);
671 
672 	if (!err)
673 		return 0;
674 
675 	/* Already tried the default path */
676 	if (*module_path == '\0')
677 		return err;
678 
679 	pr_info("loading %s from %s failed, fallback to the default path\n",
680 		module_name, module_path);
681 
682 	return __pkvm_request_early_module(module_name, "");
683 }
684 
pkvm_load_early_modules(void)685 int __init pkvm_load_early_modules(void)
686 {
687 	char *token, *buf = early_pkvm_modules;
688 	char *module_path = CONFIG_PKVM_MODULE_PATH;
689 	int err = init_mount("proc", "/proc", "proc",
690 			     MS_SILENT | MS_NOEXEC | MS_NOSUID, NULL);
691 
692 	if (err)
693 		return err;
694 
695 	while (true) {
696 		token = strsep(&buf, ",");
697 
698 		if (!token)
699 			break;
700 
701 		if (*token) {
702 			err = pkvm_request_early_module(token, module_path);
703 			if (err) {
704 				pr_err("Failed to load pkvm module %s: %d\n",
705 				       token, err);
706 				return err;
707 			}
708 		}
709 
710 		if (buf)
711 			*(buf - 1) = ',';
712 	}
713 
714 	return 0;
715 }
716 
717 struct pkvm_mod_sec_mapping {
718 	struct pkvm_module_section *sec;
719 	enum kvm_pgtable_prot prot;
720 };
721 
pkvm_unmap_module_pages(void * kern_va,void * hyp_va,size_t size)722 static void pkvm_unmap_module_pages(void *kern_va, void *hyp_va, size_t size)
723 {
724 	size_t offset;
725 	u64 pfn;
726 
727 	for (offset = 0; offset < size; offset += PAGE_SIZE) {
728 		pfn = vmalloc_to_pfn(kern_va + offset);
729 		kvm_call_hyp_nvhe(__pkvm_unmap_module_page, pfn,
730 				  hyp_va + offset);
731 	}
732 }
733 
pkvm_unmap_module_sections(struct pkvm_mod_sec_mapping * secs_map,void * hyp_va_base,int nr_secs)734 static void pkvm_unmap_module_sections(struct pkvm_mod_sec_mapping *secs_map, void *hyp_va_base, int nr_secs)
735 {
736 	size_t offset, size;
737 	void *start;
738 	int i;
739 
740 	for (i = 0; i < nr_secs; i++) {
741 		start = secs_map[i].sec->start;
742 		size = secs_map[i].sec->end - start;
743 		offset = start - secs_map[0].sec->start;
744 		pkvm_unmap_module_pages(start, hyp_va_base + offset, size);
745 	}
746 }
747 
pkvm_map_module_section(struct pkvm_mod_sec_mapping * sec_map,void * hyp_va)748 static int pkvm_map_module_section(struct pkvm_mod_sec_mapping *sec_map, void *hyp_va)
749 {
750 	size_t offset, size = sec_map->sec->end - sec_map->sec->start;
751 	int ret;
752 	u64 pfn;
753 
754 	for (offset = 0; offset < size; offset += PAGE_SIZE) {
755 		pfn = vmalloc_to_pfn(sec_map->sec->start + offset);
756 		ret = kvm_call_hyp_nvhe(__pkvm_map_module_page, pfn,
757 					hyp_va + offset, sec_map->prot);
758 		if (ret) {
759 			pkvm_unmap_module_pages(sec_map->sec->start, hyp_va, offset);
760 			return ret;
761 		}
762 	}
763 
764 	return 0;
765 }
766 
pkvm_map_module_sections(struct pkvm_mod_sec_mapping * secs_map,void * hyp_va_base,int nr_secs)767 static int pkvm_map_module_sections(struct pkvm_mod_sec_mapping *secs_map, void *hyp_va_base, int nr_secs)
768 {
769 	size_t offset;
770 	int i, ret;
771 
772 	for (i = 0; i < nr_secs; i++) {
773 		offset = secs_map[i].sec->start - secs_map[0].sec->start;
774 		ret = pkvm_map_module_section(&secs_map[i], hyp_va_base + offset);
775 		if (ret) {
776 			pkvm_unmap_module_sections(secs_map, hyp_va_base, i);
777 			return ret;
778 		}
779 	}
780 
781 	return 0;
782 }
783 
__pkvm_cmp_mod_sec(const void * p1,const void * p2)784 static int __pkvm_cmp_mod_sec(const void *p1, const void *p2)
785 {
786 	struct pkvm_mod_sec_mapping const *s1 = p1;
787 	struct pkvm_mod_sec_mapping const *s2 = p2;
788 
789 	return s1->sec->start < s2->sec->start ? -1 : s1->sec->start > s2->sec->start;
790 }
791 
__pkvm_load_el2_module(struct module * this,unsigned long * token)792 int __pkvm_load_el2_module(struct module *this, unsigned long *token)
793 {
794 	struct pkvm_el2_module *mod = &this->arch.hyp;
795 	struct pkvm_mod_sec_mapping secs_map[] = {
796 		{ &mod->text, KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_X },
797 		{ &mod->bss, KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W },
798 		{ &mod->rodata, KVM_PGTABLE_PROT_R },
799 		{ &mod->data, KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W },
800 	};
801 	void *start, *end, *hyp_va;
802 	struct arm_smccc_res res;
803 	kvm_nvhe_reloc_t *endrel;
804 	int ret, i, secs_first;
805 	size_t offset, size;
806 
807 	/* The pKVM hyp only allows loading before it is fully initialized */
808 	if (!is_protected_kvm_enabled() || is_pkvm_initialized())
809 		return -EOPNOTSUPP;
810 
811 	for (i = 0; i < ARRAY_SIZE(secs_map); i++) {
812 		if (!PAGE_ALIGNED(secs_map[i].sec->start)) {
813 			kvm_err("EL2 sections are not page-aligned\n");
814 			return -EINVAL;
815 		}
816 	}
817 
818 	if (!try_module_get(this)) {
819 		kvm_err("Kernel module has been unloaded\n");
820 		return -ENODEV;
821 	}
822 
823 	/* Missing or empty module sections are placed first */
824 	sort(secs_map, ARRAY_SIZE(secs_map), sizeof(secs_map[0]), __pkvm_cmp_mod_sec, NULL);
825 	for (secs_first = 0; secs_first < ARRAY_SIZE(secs_map); secs_first++) {
826 		start = secs_map[secs_first].sec->start;
827 		if (start)
828 			break;
829 	}
830 	end = secs_map[ARRAY_SIZE(secs_map) - 1].sec->end;
831 	size = end - start;
832 
833 	arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__pkvm_alloc_module_va),
834 			  size >> PAGE_SHIFT, &res);
835 	if (res.a0 != SMCCC_RET_SUCCESS || !res.a1) {
836 		kvm_err("Failed to allocate hypervisor VA space for EL2 module\n");
837 		module_put(this);
838 		return res.a0 == SMCCC_RET_SUCCESS ? -ENOMEM : -EPERM;
839 	}
840 	hyp_va = (void *)res.a1;
841 
842 	/*
843 	 * The token can be used for other calls related to this module.
844 	 * Conveniently the only information needed is this addr so let's use it
845 	 * as an identifier.
846 	 */
847 	if (token)
848 		*token = (unsigned long)hyp_va;
849 
850 	endrel = (void *)mod->relocs + mod->nr_relocs * sizeof(*endrel);
851 	kvm_apply_hyp_module_relocations(start, hyp_va, mod->relocs, endrel);
852 
853 	/*
854 	 * Exclude EL2 module sections from kmemleak before making them
855 	 * inaccessible.
856 	 */
857 	kmemleak_free_part(start, size);
858 
859 	ret = pkvm_map_module_sections(secs_map + secs_first, hyp_va,
860 				       ARRAY_SIZE(secs_map) - secs_first);
861 	if (ret) {
862 		kvm_err("Failed to map EL2 module page: %d\n", ret);
863 		module_put(this);
864 		return ret;
865 	}
866 
867 	offset = (size_t)((void *)mod->init - start);
868 	ret = kvm_call_hyp_nvhe(__pkvm_init_module, hyp_va + offset);
869 	if (ret) {
870 		kvm_err("Failed to init EL2 module: %d\n", ret);
871 		pkvm_unmap_module_sections(secs_map, hyp_va, ARRAY_SIZE(secs_map));
872 		module_put(this);
873 		return ret;
874 	}
875 
876 	return 0;
877 }
878 EXPORT_SYMBOL(__pkvm_load_el2_module);
879 
__pkvm_register_el2_call(unsigned long hfn_hyp_va)880 int __pkvm_register_el2_call(unsigned long hfn_hyp_va)
881 {
882 	return kvm_call_hyp_nvhe(__pkvm_register_hcall, hfn_hyp_va);
883 }
884 EXPORT_SYMBOL(__pkvm_register_el2_call);
885 #endif /* CONFIG_MODULES */
886