• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2020 - Google LLC
4  * Author: Quentin Perret <qperret@google.com>
5  */
6 
7 #include <linux/init.h>
8 #include <linux/initrd.h>
9 #include <linux/io.h>
10 #include <linux/kmemleak.h>
11 #include <linux/kvm_host.h>
12 #include <linux/memblock.h>
13 #include <linux/mm.h>
14 #include <linux/mutex.h>
15 #include <linux/of_address.h>
16 #include <linux/of_fdt.h>
17 #include <linux/of_reserved_mem.h>
18 #include <linux/sort.h>
19 #include <linux/stat.h>
20 
21 #include <asm/kvm_host.h>
22 #include <asm/kvm_hyp.h>
23 #include <asm/kvm_mmu.h>
24 #include <asm/kvm_pkvm.h>
25 #include <asm/kvm_pkvm_module.h>
26 #include <asm/setup.h>
27 
28 #include <uapi/linux/mount.h>
29 #include <linux/init_syscalls.h>
30 
31 #include "hyp_constants.h"
32 #include "kvm_ptdump.h"
33 #include "hyp_trace.h"
34 
35 DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
36 
37 static struct reserved_mem *pkvm_firmware_mem;
38 static phys_addr_t *pvmfw_base = &kvm_nvhe_sym(pvmfw_base);
39 static phys_addr_t *pvmfw_size = &kvm_nvhe_sym(pvmfw_size);
40 
41 static struct pkvm_moveable_reg *moveable_regs = kvm_nvhe_sym(pkvm_moveable_regs);
42 static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory);
43 static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr);
44 
45 phys_addr_t hyp_mem_base;
46 phys_addr_t hyp_mem_size;
47 
cmp_hyp_memblock(const void * p1,const void * p2)48 static int cmp_hyp_memblock(const void *p1, const void *p2)
49 {
50 	const struct memblock_region *r1 = p1;
51 	const struct memblock_region *r2 = p2;
52 
53 	return r1->base < r2->base ? -1 : (r1->base > r2->base);
54 }
55 
sort_memblock_regions(void)56 static void __init sort_memblock_regions(void)
57 {
58 	sort(hyp_memory,
59 	     *hyp_memblock_nr_ptr,
60 	     sizeof(struct memblock_region),
61 	     cmp_hyp_memblock,
62 	     NULL);
63 }
64 
register_memblock_regions(void)65 static int __init register_memblock_regions(void)
66 {
67 	struct memblock_region *reg;
68 
69 	for_each_mem_region(reg) {
70 		if (*hyp_memblock_nr_ptr >= HYP_MEMBLOCK_REGIONS)
71 			return -ENOMEM;
72 
73 		hyp_memory[*hyp_memblock_nr_ptr] = *reg;
74 		(*hyp_memblock_nr_ptr)++;
75 	}
76 	sort_memblock_regions();
77 
78 	return 0;
79 }
80 
cmp_moveable_reg(const void * p1,const void * p2)81 static int cmp_moveable_reg(const void *p1, const void *p2)
82 {
83 	const struct pkvm_moveable_reg *r1 = p1;
84 	const struct pkvm_moveable_reg *r2 = p2;
85 
86 	/*
87 	 * Moveable regions may overlap, so put the largest one first when start
88 	 * addresses are equal to allow a simpler walk from e.g.
89 	 * host_stage2_unmap_unmoveable_regs().
90 	 */
91 	if (r1->start < r2->start)
92 		return -1;
93 	else if (r1->start > r2->start)
94 		return 1;
95 	else if (r1->size > r2->size)
96 		return -1;
97 	else if (r1->size < r2->size)
98 		return 1;
99 	return 0;
100 }
101 
sort_moveable_regs(void)102 static void __init sort_moveable_regs(void)
103 {
104 	sort(moveable_regs,
105 	     kvm_nvhe_sym(pkvm_moveable_regs_nr),
106 	     sizeof(struct pkvm_moveable_reg),
107 	     cmp_moveable_reg,
108 	     NULL);
109 }
110 
register_moveable_regions(void)111 static int __init register_moveable_regions(void)
112 {
113 	struct memblock_region *reg;
114 	struct device_node *np;
115 	int i = 0;
116 
117 	for_each_mem_region(reg) {
118 		if (i >= PKVM_NR_MOVEABLE_REGS)
119 			return -ENOMEM;
120 		moveable_regs[i].start = reg->base;
121 		moveable_regs[i].size = reg->size;
122 		moveable_regs[i].type = PKVM_MREG_MEMORY;
123 		i++;
124 	}
125 
126 	for_each_compatible_node(np, NULL, "pkvm,protected-region") {
127 		struct resource res;
128 		u64 start, size;
129 		int ret;
130 
131 		if (i >= PKVM_NR_MOVEABLE_REGS)
132 			return -ENOMEM;
133 
134 		ret = of_address_to_resource(np, 0, &res);
135 		if (ret)
136 			return ret;
137 
138 		start = res.start;
139 		size = resource_size(&res);
140 		if (!PAGE_ALIGNED(start) || !PAGE_ALIGNED(size))
141 			return -EINVAL;
142 
143 		moveable_regs[i].start = start;
144 		moveable_regs[i].size = size;
145 		moveable_regs[i].type = PKVM_MREG_PROTECTED_RANGE;
146 		i++;
147 	}
148 
149 	kvm_nvhe_sym(pkvm_moveable_regs_nr) = i;
150 	sort_moveable_regs();
151 
152 	return 0;
153 }
154 
kvm_hyp_reserve(void)155 void __init kvm_hyp_reserve(void)
156 {
157 	u64 hyp_mem_pages = 0;
158 	int ret;
159 
160 	if (!is_hyp_mode_available() || is_kernel_in_hyp_mode())
161 		return;
162 
163 	if (kvm_get_mode() != KVM_MODE_PROTECTED)
164 		return;
165 
166 	ret = register_memblock_regions();
167 	if (ret) {
168 		*hyp_memblock_nr_ptr = 0;
169 		kvm_err("Failed to register hyp memblocks: %d\n", ret);
170 		return;
171 	}
172 
173 	ret = register_moveable_regions();
174 	if (ret) {
175 		*hyp_memblock_nr_ptr = 0;
176 		kvm_err("Failed to register pkvm moveable regions: %d\n", ret);
177 		return;
178 	}
179 
180 	hyp_mem_pages += hyp_s1_pgtable_pages();
181 	hyp_mem_pages += host_s2_pgtable_pages();
182 	hyp_mem_pages += hyp_vm_table_pages();
183 	hyp_mem_pages += hyp_vmemmap_pages(STRUCT_HYP_PAGE_SIZE);
184 	hyp_mem_pages += hyp_ffa_proxy_pages();
185 
186 	/*
187 	 * Try to allocate a PMD-aligned region to reduce TLB pressure once
188 	 * this is unmapped from the host stage-2, and fallback to PAGE_SIZE.
189 	 */
190 	hyp_mem_size = hyp_mem_pages << PAGE_SHIFT;
191 	hyp_mem_base = memblock_phys_alloc(ALIGN(hyp_mem_size, PMD_SIZE),
192 					   PMD_SIZE);
193 	if (!hyp_mem_base)
194 		hyp_mem_base = memblock_phys_alloc(hyp_mem_size, PAGE_SIZE);
195 	else
196 		hyp_mem_size = ALIGN(hyp_mem_size, PMD_SIZE);
197 
198 	if (!hyp_mem_base) {
199 		kvm_err("Failed to reserve hyp memory\n");
200 		return;
201 	}
202 
203 	kvm_info("Reserved %lld MiB at 0x%llx\n", hyp_mem_size >> 20,
204 		 hyp_mem_base);
205 }
206 
__pkvm_create_hyp_vcpu(struct kvm * host_kvm,struct kvm_vcpu * host_vcpu,unsigned long idx)207 static int __pkvm_create_hyp_vcpu(struct kvm *host_kvm, struct kvm_vcpu *host_vcpu, unsigned long idx)
208 {
209 	pkvm_handle_t handle = host_kvm->arch.pkvm.handle;
210 	struct kvm_hyp_req *hyp_reqs;
211 	int ret;
212 
213 	init_hyp_stage2_memcache(&host_vcpu->arch.stage2_mc);
214 
215 	/* Indexing of the vcpus to be sequential starting at 0. */
216 	if (WARN_ON(host_vcpu->vcpu_idx != idx))
217 		return -EINVAL;
218 
219 	hyp_reqs = (struct kvm_hyp_req *)__get_free_page(GFP_KERNEL_ACCOUNT);
220 	if (!hyp_reqs)
221 		return -ENOMEM;
222 
223 	ret = kvm_share_hyp(hyp_reqs, hyp_reqs + 1);
224 	if (ret)
225 		goto err_free_reqs;
226 	host_vcpu->arch.hyp_reqs = hyp_reqs;
227 
228 	ret = kvm_call_refill_hyp_nvhe(__pkvm_init_vcpu,
229 				       handle, host_vcpu);
230 	if (!ret)
231 		return 0;
232 
233 	kvm_unshare_hyp(hyp_reqs, hyp_reqs + 1);
234 err_free_reqs:
235 	free_page((unsigned long)hyp_reqs);
236 	host_vcpu->arch.hyp_reqs = NULL;
237 
238 	return ret;
239 }
240 
__pkvm_vcpu_hyp_created(struct kvm_vcpu * vcpu)241 static void __pkvm_vcpu_hyp_created(struct kvm_vcpu *vcpu)
242 {
243 	if (kvm_vm_is_protected(vcpu->kvm))
244 		vcpu->arch.sve_state = NULL;
245 }
246 
247 /*
248  * Handle broken down huge pages which have not been reported to the
249  * kvm_pinned_page.
250  */
pkvm_call_hyp_nvhe_ppage(struct kvm_pinned_page * ppage,int (* call_hyp_nvhe)(u64 pfn,u64 gfn,u8 order,void * args),void * args,bool unmap)251 int pkvm_call_hyp_nvhe_ppage(struct kvm_pinned_page *ppage,
252 			     int (*call_hyp_nvhe)(u64 pfn, u64 gfn, u8 order, void* args),
253 			     void *args, bool unmap)
254 {
255 	size_t page_size, size = PAGE_SIZE << ppage->order;
256 	u64 pfn = page_to_pfn(ppage->page);
257 	u8 order = ppage->order;
258 	u64 gfn = ppage->ipa >> PAGE_SHIFT;
259 
260 	/* We already know this huge-page has been broken down in the stage-2 */
261 	if (ppage->pins < (1 << order))
262 		order = 0;
263 
264 	while (size) {
265 		int err = call_hyp_nvhe(pfn, gfn, order, args);
266 
267 		switch (err) {
268 		/* The stage-2 huge page has been broken down */
269 		case -E2BIG:
270 			if (order)
271 				order = 0;
272 			else
273 				/* Something is really wrong ... */
274 				return -EINVAL;
275 			break;
276 		/* This has been unmapped already */
277 		case -ENOENT:
278 			/*
279 			 * We are not supposed to lose track of PAGE_SIZE pinned
280 			 * page.
281 			 */
282 			if (!ppage->order)
283 				return -EINVAL;
284 
285 			fallthrough;
286 		case 0:
287 			page_size = PAGE_SIZE << order;
288 			gfn += 1 << order;
289 			pfn += 1 << order;
290 
291 			if (page_size > size)
292 				return -EINVAL;
293 
294 			/* If -ENOENT, pins was already dropped. */
295 			if (unmap && !err)
296 				ppage->pins -= 1 << order;
297 
298 			if (!ppage->pins)
299 				return 0;
300 
301 			size -= page_size;
302 			break;
303 		default:
304 			return err;
305 		}
306 	}
307 
308 	return 0;
309 }
310 
__reclaim_dying_guest_page_call(u64 pfn,u64 gfn,u8 order,void * args)311 static int __reclaim_dying_guest_page_call(u64 pfn, u64 gfn, u8 order, void *args)
312 {
313 	struct kvm *host_kvm = args;
314 
315 	return kvm_call_hyp_nvhe(__pkvm_reclaim_dying_guest_page,
316 				 host_kvm->arch.pkvm.handle,
317 				 pfn, gfn, order);
318 }
319 
__pkvm_destroy_hyp_vm(struct kvm * host_kvm)320 static void __pkvm_destroy_hyp_vm(struct kvm *host_kvm)
321 {
322 	struct mm_struct *mm = current->mm;
323 	struct kvm_pinned_page *ppage;
324 	struct kvm_vcpu *host_vcpu;
325 	unsigned long idx, ipa = 0;
326 
327 	if (!host_kvm->arch.pkvm.handle)
328 		goto out_free;
329 
330 	WARN_ON(kvm_call_hyp_nvhe(__pkvm_start_teardown_vm, host_kvm->arch.pkvm.handle));
331 
332 	mt_for_each(&host_kvm->arch.pkvm.pinned_pages, ppage, ipa, ULONG_MAX) {
333 		WARN_ON(pkvm_call_hyp_nvhe_ppage(ppage,
334 						 __reclaim_dying_guest_page_call,
335 						 host_kvm, true));
336 		cond_resched();
337 
338 		account_locked_vm(mm, 1, false);
339 		unpin_user_pages_dirty_lock(&ppage->page, 1, host_kvm->arch.pkvm.enabled);
340 		kfree(ppage);
341 	}
342 	mtree_destroy(&host_kvm->arch.pkvm.pinned_pages);
343 
344 	WARN_ON(kvm_call_hyp_nvhe(__pkvm_finalize_teardown_vm, host_kvm->arch.pkvm.handle));
345 
346 out_free:
347 	host_kvm->arch.pkvm.handle = 0;
348 
349 	atomic64_sub(host_kvm->arch.pkvm.stage2_teardown_mc.nr_pages << PAGE_SHIFT,
350 		     &host_kvm->stat.protected_hyp_mem);
351 	atomic64_sub(host_kvm->arch.pkvm.stage2_teardown_mc.nr_pages << PAGE_SHIFT,
352 		     &host_kvm->stat.protected_pgtable_mem);
353 	free_hyp_memcache(&host_kvm->arch.pkvm.stage2_teardown_mc);
354 
355 	kvm_for_each_vcpu(idx, host_vcpu, host_kvm) {
356 		struct kvm_hyp_req *hyp_reqs = host_vcpu->arch.hyp_reqs;
357 
358 		if (!hyp_reqs)
359 			continue;
360 
361 		kvm_unshare_hyp(hyp_reqs, hyp_reqs + 1);
362 		host_vcpu->arch.hyp_reqs = NULL;
363 		free_page((unsigned long)hyp_reqs);
364 	}
365 }
366 
367 /*
368  * Allocates and donates memory for hypervisor VM structs at EL2.
369  *
370  * Allocates space for the VM state, which includes the hyp vm as well as
371  * the hyp vcpus.
372  *
373  * Stores an opaque handler in the kvm struct for future reference.
374  *
375  * Return 0 on success, negative error code on failure.
376  */
__pkvm_create_hyp_vm(struct kvm * host_kvm)377 static int __pkvm_create_hyp_vm(struct kvm *host_kvm)
378 {
379 	struct kvm_vcpu *host_vcpu;
380 	pkvm_handle_t handle;
381 	unsigned long idx;
382 	size_t pgd_sz;
383 	void *pgd;
384 	int ret;
385 
386 	if (host_kvm->created_vcpus < 1)
387 		return -EINVAL;
388 
389 	pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.vtcr);
390 
391 	/*
392 	 * The PGD pages will be reclaimed using a hyp_memcache which implies
393 	 * page granularity. So, use alloc_pages_exact() to get individual
394 	 * refcounts.
395 	 */
396 	pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT);
397 	if (!pgd)
398 		return -ENOMEM;
399 	atomic64_add(pgd_sz, &host_kvm->stat.protected_hyp_mem);
400 
401 	init_hyp_stage2_memcache(&host_kvm->arch.pkvm.stage2_teardown_mc);
402 
403 	/* Donate the VM memory to hyp and let hyp initialize it. */
404 	ret = kvm_call_refill_hyp_nvhe(__pkvm_init_vm, host_kvm, pgd);
405 	if (ret < 0)
406 		goto free_pgd;
407 
408 	handle = ret;
409 
410 	host_kvm->arch.pkvm.handle = handle;
411 
412 	/* Donate memory for the vcpus at hyp and initialize it. */
413 	kvm_for_each_vcpu(idx, host_vcpu, host_kvm) {
414 		ret = __pkvm_create_hyp_vcpu(host_kvm, host_vcpu, idx);
415 		if (ret)
416 			goto destroy_vm;
417 		__pkvm_vcpu_hyp_created(host_vcpu);
418 	}
419 
420 	atomic64_set(&host_kvm->stat.protected_pgtable_mem, pgd_sz);
421 	kvm_account_pgtable_pages(pgd, pgd_sz >> PAGE_SHIFT);
422 
423 	return 0;
424 
425 destroy_vm:
426 	__pkvm_destroy_hyp_vm(host_kvm);
427 	return ret;
428 free_pgd:
429 	free_pages_exact(pgd, pgd_sz);
430 	atomic64_sub(pgd_sz, &host_kvm->stat.protected_hyp_mem);
431 
432 	return ret;
433 }
434 
pkvm_create_hyp_vm(struct kvm * host_kvm)435 int pkvm_create_hyp_vm(struct kvm *host_kvm)
436 {
437 	int ret = 0;
438 
439 	mutex_lock(&host_kvm->arch.config_lock);
440 	if (!host_kvm->arch.pkvm.handle)
441 		ret = __pkvm_create_hyp_vm(host_kvm);
442 	mutex_unlock(&host_kvm->arch.config_lock);
443 
444 	return ret;
445 }
446 
pkvm_destroy_hyp_vm(struct kvm * host_kvm)447 void pkvm_destroy_hyp_vm(struct kvm *host_kvm)
448 {
449 	mutex_lock(&host_kvm->arch.config_lock);
450 	__pkvm_destroy_hyp_vm(host_kvm);
451 	mutex_unlock(&host_kvm->arch.config_lock);
452 }
453 
pkvm_init_host_vm(struct kvm * host_kvm,unsigned long type)454 int pkvm_init_host_vm(struct kvm *host_kvm, unsigned long type)
455 {
456 	if (!(type & KVM_VM_TYPE_ARM_PROTECTED))
457 		return 0;
458 
459 	if (!is_protected_kvm_enabled())
460 		return -EINVAL;
461 
462 	host_kvm->arch.pkvm.pvmfw_load_addr = PVMFW_INVALID_LOAD_ADDR;
463 	host_kvm->arch.pkvm.enabled = true;
464 	return 0;
465 }
466 
_kvm_host_prot_finalize(void * arg)467 static void __init _kvm_host_prot_finalize(void *arg)
468 {
469 	int *err = arg;
470 
471 	if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize)))
472 		WRITE_ONCE(*err, -EINVAL);
473 }
474 
pkvm_drop_host_privileges(void)475 static int __init pkvm_drop_host_privileges(void)
476 {
477 	int ret = 0;
478 
479 	/*
480 	 * Flip the static key upfront as that may no longer be possible
481 	 * once the host stage 2 is installed.
482 	 */
483 	static_branch_enable(&kvm_protected_mode_initialized);
484 	on_each_cpu(_kvm_host_prot_finalize, &ret, 1);
485 	return ret;
486 }
487 
488 static int __init pkvm_firmware_rmem_clear(void);
489 
finalize_pkvm(void)490 static int __init finalize_pkvm(void)
491 {
492 	int ret;
493 
494 	if (!is_protected_kvm_enabled() || !is_kvm_arm_initialised()) {
495 		pkvm_firmware_rmem_clear();
496 		return 0;
497 	}
498 
499 	/*
500 	 * Modules can play an essential part in the pKVM protection. All of
501 	 * them must properly load to enable protected VMs.
502 	 */
503 	if (pkvm_load_early_modules())
504 		pkvm_firmware_rmem_clear();
505 
506 	ret = kvm_iommu_init_driver();
507 	if (ret) {
508 		pr_err("Failed to init KVM IOMMU driver: %d\n", ret);
509 		pkvm_firmware_rmem_clear();
510 	}
511 
512 	/*
513 	 * Exclude HYP sections from kmemleak so that they don't get peeked
514 	 * at, which would end badly once inaccessible.
515 	 */
516 	kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
517 	kmemleak_free_part(__hyp_data_start, __hyp_data_end - __hyp_data_start);
518 	kmemleak_free_part(__hyp_rodata_start, __hyp_rodata_end - __hyp_rodata_start);
519 	kmemleak_free_part_phys(hyp_mem_base, hyp_mem_size);
520 
521 	kvm_ptdump_host_register();
522 
523 	ret = pkvm_drop_host_privileges();
524 	if (ret) {
525 		pr_err("Failed to finalize Hyp protection: %d\n", ret);
526 		BUG();
527 	}
528 
529 	return 0;
530 }
531 device_initcall_sync(finalize_pkvm);
532 
pkvm_host_reclaim_page(struct kvm * host_kvm,phys_addr_t ipa)533 void pkvm_host_reclaim_page(struct kvm *host_kvm, phys_addr_t ipa)
534 {
535 	struct mm_struct *mm = current->mm;
536 	struct kvm_pinned_page *ppage;
537 	unsigned long index = ipa;
538 
539 	write_lock(&host_kvm->mmu_lock);
540 	ppage = mt_find(&host_kvm->arch.pkvm.pinned_pages, &index,
541 			index + PAGE_SIZE - 1);
542 	if (ppage) {
543 		if (ppage->pins)
544 			ppage->pins--;
545 		else
546 			WARN_ON(1);
547 
548 		if (!ppage->pins)
549 			mtree_erase(&host_kvm->arch.pkvm.pinned_pages, ipa);
550 	}
551 	write_unlock(&host_kvm->mmu_lock);
552 
553 	WARN_ON(!ppage);
554 	if (!ppage || ppage->pins)
555 		return;
556 
557 	account_locked_vm(mm, 1, false);
558 	unpin_user_pages_dirty_lock(&ppage->page, 1, host_kvm->arch.pkvm.enabled);
559 	kfree(ppage);
560 }
561 
pkvm_firmware_rmem_err(struct reserved_mem * rmem,const char * reason)562 static int __init pkvm_firmware_rmem_err(struct reserved_mem *rmem,
563 					 const char *reason)
564 {
565 	phys_addr_t end = rmem->base + rmem->size;
566 
567 	kvm_err("Ignoring pkvm guest firmware memory reservation [%pa - %pa]: %s\n",
568 		&rmem->base, &end, reason);
569 	return -EINVAL;
570 }
571 
pkvm_firmware_rmem_init(struct reserved_mem * rmem)572 static int __init pkvm_firmware_rmem_init(struct reserved_mem *rmem)
573 {
574 	unsigned long node = rmem->fdt_node;
575 
576 	if (pkvm_firmware_mem)
577 		return pkvm_firmware_rmem_err(rmem, "duplicate reservation");
578 
579 	if (!of_get_flat_dt_prop(node, "no-map", NULL))
580 		return pkvm_firmware_rmem_err(rmem, "missing \"no-map\" property");
581 
582 	if (of_get_flat_dt_prop(node, "reusable", NULL))
583 		return pkvm_firmware_rmem_err(rmem, "\"reusable\" property unsupported");
584 
585 	if (!PAGE_ALIGNED(rmem->base))
586 		return pkvm_firmware_rmem_err(rmem, "base is not page-aligned");
587 
588 	if (!PAGE_ALIGNED(rmem->size))
589 		return pkvm_firmware_rmem_err(rmem, "size is not page-aligned");
590 
591 	*pvmfw_size = rmem->size;
592 	*pvmfw_base = rmem->base;
593 	pkvm_firmware_mem = rmem;
594 	return 0;
595 }
596 RESERVEDMEM_OF_DECLARE(pkvm_firmware, "linux,pkvm-guest-firmware-memory",
597 		       pkvm_firmware_rmem_init);
598 
pkvm_firmware_rmem_clear(void)599 static int __init pkvm_firmware_rmem_clear(void)
600 {
601 	void *addr;
602 	phys_addr_t size;
603 
604 	if (likely(!pkvm_firmware_mem))
605 		return 0;
606 
607 	kvm_info("Clearing pKVM firmware memory\n");
608 	size = pkvm_firmware_mem->size;
609 	addr = memremap(pkvm_firmware_mem->base, size, MEMREMAP_WB);
610 	if (!addr)
611 		return -EINVAL;
612 
613 	memset(addr, 0, size);
614 	dcache_clean_poc((unsigned long)addr, (unsigned long)addr + size);
615 	memunmap(addr);
616 	return 0;
617 }
618 
pkvm_vm_ioctl_set_fw_ipa(struct kvm * kvm,u64 ipa)619 static int pkvm_vm_ioctl_set_fw_ipa(struct kvm *kvm, u64 ipa)
620 {
621 	int ret = 0;
622 
623 	if (!pkvm_firmware_mem)
624 		return -EINVAL;
625 
626 	mutex_lock(&kvm->lock);
627 	if (kvm->arch.pkvm.handle) {
628 		ret = -EBUSY;
629 		goto out_unlock;
630 	}
631 
632 	kvm->arch.pkvm.pvmfw_load_addr = ipa;
633 out_unlock:
634 	mutex_unlock(&kvm->lock);
635 	return ret;
636 }
637 
pkvm_vm_ioctl_info(struct kvm * kvm,struct kvm_protected_vm_info __user * info)638 static int pkvm_vm_ioctl_info(struct kvm *kvm,
639 			      struct kvm_protected_vm_info __user *info)
640 {
641 	struct kvm_protected_vm_info kinfo = {
642 		.firmware_size = pkvm_firmware_mem ?
643 				 pkvm_firmware_mem->size :
644 				 0,
645 	};
646 
647 	return copy_to_user(info, &kinfo, sizeof(kinfo)) ? -EFAULT : 0;
648 }
649 
pkvm_vm_ioctl_enable_cap(struct kvm * kvm,struct kvm_enable_cap * cap)650 int pkvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
651 {
652 	if (!kvm_vm_is_protected(kvm))
653 		return -EINVAL;
654 
655 	if (cap->args[1] || cap->args[2] || cap->args[3])
656 		return -EINVAL;
657 
658 	switch (cap->flags) {
659 	case KVM_CAP_ARM_PROTECTED_VM_FLAGS_SET_FW_IPA:
660 		return pkvm_vm_ioctl_set_fw_ipa(kvm, cap->args[0]);
661 	case KVM_CAP_ARM_PROTECTED_VM_FLAGS_INFO:
662 		return pkvm_vm_ioctl_info(kvm, (void __force __user *)cap->args[0]);
663 	default:
664 		return -EINVAL;
665 	}
666 
667 	return 0;
668 }
669 
670 #ifdef CONFIG_MODULES
671 static char early_pkvm_modules[COMMAND_LINE_SIZE] __initdata;
672 
early_pkvm_modules_cfg(char * arg)673 static int __init early_pkvm_modules_cfg(char *arg)
674 {
675 	/*
676 	 * Loading pKVM modules with kvm-arm.protected_modules is deprecated
677 	 * Use kvm-arm.protected_modules=<module1>,<module2>
678 	 */
679 	if (!arg)
680 		return -EINVAL;
681 
682 	strscpy(early_pkvm_modules, arg, COMMAND_LINE_SIZE);
683 
684 	return 0;
685 }
686 early_param("kvm-arm.protected_modules", early_pkvm_modules_cfg);
687 
free_modprobe_argv(struct subprocess_info * info)688 static void free_modprobe_argv(struct subprocess_info *info)
689 {
690 	kfree(info->argv);
691 }
692 
693 /*
694  * Heavily inspired by request_module(). The latest couldn't be reused though as
695  * the feature can be disabled depending on umh configuration. Here some
696  * security is enforced by making sure this can be called only when pKVM is
697  * enabled, not yet completely initialized.
698  */
__pkvm_request_early_module(char * module_name,char * module_path)699 static int __init __pkvm_request_early_module(char *module_name,
700 					      char *module_path)
701 {
702 	char *modprobe_path = CONFIG_MODPROBE_PATH;
703 	struct subprocess_info *info;
704 	static char *envp[] = {
705 		"HOME=/",
706 		"TERM=linux",
707 		"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
708 		NULL
709 	};
710 	static bool proc;
711 	char **argv;
712 	int idx = 0;
713 
714 	if (!is_protected_kvm_enabled())
715 		return -EACCES;
716 
717 	if (static_branch_likely(&kvm_protected_mode_initialized))
718 		return -EACCES;
719 
720 	argv = kmalloc(sizeof(char *) * 7, GFP_KERNEL);
721 	if (!argv)
722 		return -ENOMEM;
723 
724 	argv[idx++] = modprobe_path;
725 	argv[idx++] = "-q";
726 	if (*module_path != '\0') {
727 		argv[idx++] = "-d";
728 		argv[idx++] = module_path;
729 	}
730 	argv[idx++] = "--";
731 	argv[idx++] = module_name;
732 	argv[idx++] = NULL;
733 
734 	info = call_usermodehelper_setup(modprobe_path, argv, envp, GFP_KERNEL,
735 					 NULL, free_modprobe_argv, NULL);
736 	if (!info)
737 		goto err;
738 
739 	/* Even with CONFIG_STATIC_USERMODEHELPER we really want this path */
740 	info->path = modprobe_path;
741 
742 	if (!proc) {
743 		wait_for_initramfs();
744 		if (init_mount("proc", "/proc", "proc",
745 			       MS_SILENT | MS_NOEXEC | MS_NOSUID, NULL))
746 			pr_warn("Couldn't mount /proc, pKVM module parameters will be ignored\n");
747 
748 		proc = true;
749 	}
750 
751 	return call_usermodehelper_exec(info, UMH_WAIT_PROC | UMH_KILLABLE);
752 err:
753 	kfree(argv);
754 
755 	return -ENOMEM;
756 }
757 
pkvm_request_early_module(char * module_name,char * module_path)758 static int __init pkvm_request_early_module(char *module_name, char *module_path)
759 {
760 	int err = __pkvm_request_early_module(module_name, module_path);
761 
762 	if (!err)
763 		return 0;
764 
765 	/* Already tried the default path */
766 	if (*module_path == '\0')
767 		return err;
768 
769 	pr_info("loading %s from %s failed, fallback to the default path\n",
770 		module_name, module_path);
771 
772 	return __pkvm_request_early_module(module_name, "");
773 }
774 
pkvm_load_early_modules(void)775 int __init pkvm_load_early_modules(void)
776 {
777 	char *token, *buf = early_pkvm_modules;
778 	char *module_path = CONFIG_PKVM_MODULE_PATH;
779 	int err;
780 
781 	while (true) {
782 		token = strsep(&buf, ",");
783 
784 		if (!token)
785 			break;
786 
787 		if (*token) {
788 			err = pkvm_request_early_module(token, module_path);
789 			if (err) {
790 				pr_err("Failed to load pkvm module %s: %d\n",
791 				       token, err);
792 				return err;
793 			}
794 			/* Do it every iteration to iron out the dependencies. */
795 			flush_deferred_probe_now();
796 		}
797 
798 		if (buf)
799 			*(buf - 1) = ',';
800 	}
801 
802 	return 0;
803 }
804 
805 #ifdef CONFIG_PROTECTED_NVHE_STACKTRACE
806 static LIST_HEAD(pkvm_modules);
807 
pkvm_el2_mod_add(struct pkvm_el2_module * mod)808 static void pkvm_el2_mod_add(struct pkvm_el2_module *mod)
809 {
810 	INIT_LIST_HEAD(&mod->node);
811 	list_add(&mod->node, &pkvm_modules);
812 }
813 
pkvm_el2_mod_kern_va(unsigned long addr)814 unsigned long pkvm_el2_mod_kern_va(unsigned long addr)
815 {
816 	struct pkvm_el2_module *mod;
817 
818 	list_for_each_entry(mod, &pkvm_modules, node) {
819 		size_t len = (unsigned long)mod->sections.end -
820 			     (unsigned long)mod->sections.start;
821 
822 		if (addr >= (unsigned long)mod->token &&
823 		    addr < (unsigned long)mod->token + len)
824 			return (unsigned long)mod->sections.start +
825 				(addr - mod->token);
826 	}
827 
828 	return 0;
829 }
830 #else
pkvm_el2_mod_add(struct pkvm_el2_module * mod)831 static void pkvm_el2_mod_add(struct pkvm_el2_module *mod) { }
pkvm_el2_mod_kern_va(unsigned long addr)832 unsigned long pkvm_el2_mod_kern_va(unsigned long addr) { return 0; }
833 #endif
834 
835 struct pkvm_mod_sec_mapping {
836 	struct pkvm_module_section *sec;
837 	enum kvm_pgtable_prot prot;
838 };
839 
pkvm_unmap_module_pages(void * kern_va,void * hyp_va,size_t size)840 static void pkvm_unmap_module_pages(void *kern_va, void *hyp_va, size_t size)
841 {
842 	size_t offset;
843 	u64 pfn;
844 
845 	for (offset = 0; offset < size; offset += PAGE_SIZE) {
846 		pfn = vmalloc_to_pfn(kern_va + offset);
847 		kvm_call_hyp_nvhe(__pkvm_unmap_module_page, pfn,
848 				  hyp_va + offset);
849 	}
850 }
851 
pkvm_unmap_module_sections(struct pkvm_mod_sec_mapping * secs_map,void * hyp_va_base,int nr_secs)852 static void pkvm_unmap_module_sections(struct pkvm_mod_sec_mapping *secs_map, void *hyp_va_base, int nr_secs)
853 {
854 	size_t offset, size;
855 	void *start;
856 	int i;
857 
858 	for (i = 0; i < nr_secs; i++) {
859 		start = secs_map[i].sec->start;
860 		size = secs_map[i].sec->end - start;
861 		offset = start - secs_map[0].sec->start;
862 		pkvm_unmap_module_pages(start, hyp_va_base + offset, size);
863 	}
864 }
865 
pkvm_map_module_section(struct pkvm_mod_sec_mapping * sec_map,void * hyp_va)866 static int pkvm_map_module_section(struct pkvm_mod_sec_mapping *sec_map, void *hyp_va)
867 {
868 	size_t offset, size = sec_map->sec->end - sec_map->sec->start;
869 	int ret;
870 	u64 pfn;
871 
872 	for (offset = 0; offset < size; offset += PAGE_SIZE) {
873 		pfn = vmalloc_to_pfn(sec_map->sec->start + offset);
874 		ret = kvm_call_hyp_nvhe(__pkvm_map_module_page, pfn,
875 					hyp_va + offset, sec_map->prot);
876 		if (ret) {
877 			pkvm_unmap_module_pages(sec_map->sec->start, hyp_va, offset);
878 			return ret;
879 		}
880 	}
881 
882 	return 0;
883 }
884 
pkvm_map_module_sections(struct pkvm_mod_sec_mapping * secs_map,void * hyp_va_base,int nr_secs)885 static int pkvm_map_module_sections(struct pkvm_mod_sec_mapping *secs_map, void *hyp_va_base, int nr_secs)
886 {
887 	size_t offset;
888 	int i, ret;
889 
890 	for (i = 0; i < nr_secs; i++) {
891 		offset = secs_map[i].sec->start - secs_map[0].sec->start;
892 		ret = pkvm_map_module_section(&secs_map[i], hyp_va_base + offset);
893 		if (ret) {
894 			pkvm_unmap_module_sections(secs_map, hyp_va_base, i);
895 			return ret;
896 		}
897 	}
898 
899 	return 0;
900 }
__pkvm_cmp_mod_sec(const void * p1,const void * p2)901 static int __pkvm_cmp_mod_sec(const void *p1, const void *p2)
902 {
903 	struct pkvm_mod_sec_mapping const *s1 = p1;
904 	struct pkvm_mod_sec_mapping const *s2 = p2;
905 
906 	return s1->sec->start < s2->sec->start ? -1 : s1->sec->start > s2->sec->start;
907 }
908 
__pkvm_load_el2_module(struct module * this,unsigned long * token)909 int __pkvm_load_el2_module(struct module *this, unsigned long *token)
910 {
911 	struct pkvm_el2_module *mod = &this->arch.hyp;
912 	struct pkvm_mod_sec_mapping secs_map[] = {
913 		{ &mod->text, KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_X },
914 		{ &mod->bss, KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W },
915 		{ &mod->rodata, KVM_PGTABLE_PROT_R },
916 		{ &mod->event_ids, KVM_PGTABLE_PROT_R },
917 		{ &mod->data, KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W },
918 	};
919 	void *start, *end, *hyp_va;
920 	struct arm_smccc_res res;
921 	kvm_nvhe_reloc_t *endrel;
922 	int ret, i, secs_first;
923 	size_t offset, size;
924 
925 	/* The pKVM hyp only allows loading before it is fully initialized */
926 	if (!is_protected_kvm_enabled() || is_pkvm_initialized())
927 		return -EOPNOTSUPP;
928 
929 	for (i = 0; i < ARRAY_SIZE(secs_map); i++) {
930 		if (!PAGE_ALIGNED(secs_map[i].sec->start)) {
931 			kvm_err("EL2 sections are not page-aligned\n");
932 			return -EINVAL;
933 		}
934 	}
935 
936 	if (!try_module_get(this)) {
937 		kvm_err("Kernel module has been unloaded\n");
938 		return -ENODEV;
939 	}
940 
941 	/* Missing or empty module sections are placed first */
942 	sort(secs_map, ARRAY_SIZE(secs_map), sizeof(secs_map[0]), __pkvm_cmp_mod_sec, NULL);
943 	for (secs_first = 0; secs_first < ARRAY_SIZE(secs_map); secs_first++) {
944 		start = secs_map[secs_first].sec->start;
945 		if (start)
946 			break;
947 	}
948 	end = secs_map[ARRAY_SIZE(secs_map) - 1].sec->end;
949 	size = end - start;
950 
951 	arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__pkvm_alloc_module_va),
952 			  size >> PAGE_SHIFT, &res);
953 	if (res.a0 != SMCCC_RET_SUCCESS || !res.a1) {
954 		kvm_err("Failed to allocate hypervisor VA space for EL2 module\n");
955 		module_put(this);
956 		return res.a0 == SMCCC_RET_SUCCESS ? -ENOMEM : -EPERM;
957 	}
958 	hyp_va = (void *)res.a1;
959 
960 	/*
961 	 * The token can be used for other calls related to this module.
962 	 * Conveniently the only information needed is this addr so let's use it
963 	 * as an identifier.
964 	 */
965 	if (token)
966 		*token = (unsigned long)hyp_va;
967 
968 	mod->token = (unsigned long)hyp_va;
969 	mod->sections.start = start;
970 	mod->sections.end = end;
971 
972 	endrel = (void *)mod->relocs + mod->nr_relocs * sizeof(*endrel);
973 	kvm_apply_hyp_module_relocations(start, hyp_va, mod->relocs, endrel);
974 
975 	/*
976 	 * Exclude EL2 module sections from kmemleak before making them
977 	 * inaccessible.
978 	 */
979 	kmemleak_free_part(start, size);
980 
981 	ret = hyp_trace_init_mod_events(mod->hyp_events,
982 					mod->event_ids.start,
983 					mod->nr_hyp_events);
984 	if (ret)
985 		kvm_err("Failed to init module events: %d\n", ret);
986 
987 	ret = pkvm_map_module_sections(secs_map + secs_first, hyp_va,
988 				       ARRAY_SIZE(secs_map) - secs_first);
989 	if (ret) {
990 		kvm_err("Failed to map EL2 module page: %d\n", ret);
991 		module_put(this);
992 		return ret;
993 	}
994 
995 	offset = (size_t)((void *)mod->init - start);
996 	ret = kvm_call_hyp_nvhe(__pkvm_init_module, hyp_va + offset);
997 	if (ret) {
998 		kvm_err("Failed to init EL2 module: %d\n", ret);
999 		pkvm_unmap_module_sections(secs_map, hyp_va, ARRAY_SIZE(secs_map));
1000 		module_put(this);
1001 		return ret;
1002 	}
1003 
1004 	pkvm_el2_mod_add(mod);
1005 
1006 	return 0;
1007 }
1008 EXPORT_SYMBOL(__pkvm_load_el2_module);
1009 
__pkvm_register_el2_call(unsigned long hfn_hyp_va)1010 int __pkvm_register_el2_call(unsigned long hfn_hyp_va)
1011 {
1012 	return kvm_call_hyp_nvhe(__pkvm_register_hcall, hfn_hyp_va);
1013 }
1014 EXPORT_SYMBOL(__pkvm_register_el2_call);
1015 #endif /* CONFIG_MODULES */
1016 
__pkvm_topup_hyp_alloc_mgt(unsigned long id,unsigned long nr_pages,unsigned long sz_alloc)1017 int __pkvm_topup_hyp_alloc_mgt(unsigned long id, unsigned long nr_pages, unsigned long sz_alloc)
1018 {
1019 	struct kvm_hyp_memcache mc;
1020 	int ret;
1021 
1022 	init_hyp_memcache(&mc);
1023 
1024 	ret = topup_hyp_memcache(&mc, nr_pages, get_order(sz_alloc));
1025 	if (ret)
1026 		return ret;
1027 
1028 	ret = kvm_call_hyp_nvhe(__pkvm_hyp_alloc_mgt_refill, id,
1029 				mc.head, mc.nr_pages);
1030 	if (ret)
1031 		free_hyp_memcache(&mc);
1032 
1033 	return ret;
1034 }
1035 EXPORT_SYMBOL(__pkvm_topup_hyp_alloc_mgt);
1036 
__pkvm_topup_hyp_alloc(unsigned long nr_pages)1037 int __pkvm_topup_hyp_alloc(unsigned long nr_pages)
1038 {
1039 	return __pkvm_topup_hyp_alloc_mgt(HYP_ALLOC_MGT_HEAP_ID, nr_pages, PAGE_SIZE);
1040 }
1041 EXPORT_SYMBOL(__pkvm_topup_hyp_alloc);
1042 
__pkvm_reclaim_hyp_alloc_mgt(unsigned long nr_pages)1043 unsigned long __pkvm_reclaim_hyp_alloc_mgt(unsigned long nr_pages)
1044 {
1045 	unsigned long ratelimit, last_reclaim, reclaimed = 0;
1046 	struct kvm_hyp_memcache mc;
1047 	struct arm_smccc_res res;
1048 
1049 	init_hyp_memcache(&mc);
1050 
1051 	do {
1052 		/* Arbitrary upper bound to limit the time spent at EL2 */
1053 		ratelimit = min(nr_pages, 16UL);
1054 
1055 		arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__pkvm_hyp_alloc_mgt_reclaim),
1056 				  ratelimit, &res);
1057 		if (WARN_ON(res.a0 != SMCCC_RET_SUCCESS))
1058 			break;
1059 
1060 		mc.head = res.a1;
1061 		last_reclaim = mc.nr_pages = res.a2;
1062 
1063 		free_hyp_memcache(&mc);
1064 		reclaimed += last_reclaim;
1065 
1066 	} while (last_reclaim && (reclaimed < nr_pages));
1067 
1068 	return reclaimed;
1069 }
1070