• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2020 - Google LLC
4  * Author: Quentin Perret <qperret@google.com>
5  */
6 
7 #include <linux/arm_ffa.h>
8 #include <linux/delay.h>
9 #include <linux/init.h>
10 #include <linux/initrd.h>
11 #include <linux/interval_tree_generic.h>
12 #include <linux/io.h>
13 #include <linux/iommu.h>
14 #include <linux/kmemleak.h>
15 #include <linux/kvm_host.h>
16 #include <asm/kvm_mmu.h>
17 #include <linux/memblock.h>
18 #include <linux/mm.h>
19 #include <linux/mutex.h>
20 #include <linux/of_address.h>
21 #include <linux/of_fdt.h>
22 #include <linux/of_reserved_mem.h>
23 #include <linux/platform_device.h>
24 #include <linux/sort.h>
25 
26 #include <asm/kvm_host.h>
27 #include <asm/kvm_hyp.h>
28 #include <asm/kvm_mmu.h>
29 #include <asm/kvm_pkvm.h>
30 #include <asm/kvm_pkvm_module.h>
31 #include <asm/patching.h>
32 #include <asm/setup.h>
33 
34 #include <kvm/device.h>
35 
36 #include <linux/init_syscalls.h>
37 #include <uapi/linux/mount.h>
38 
39 #include "hyp_constants.h"
40 #include "hyp_trace.h"
41 
42 #define PKVM_DEVICE_ASSIGN_COMPAT	"pkvm,device-assignment"
43 
44 /*
45  * Retry the VM creation message for the host for a maximul total
46  * amount of times, with sleeps in between. For the first few attempts,
47  * do a faster reschedule instead of a full sleep.
48  */
49 #define VM_AVAILABILITY_FAST_RETRIES	5
50 #define VM_AVAILABILITY_TOTAL_RETRIES	500
51 #define VM_AVAILABILITY_RETRY_SLEEP_MS	10
52 
53 DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
54 
55 static phys_addr_t pvmfw_base;
56 static phys_addr_t pvmfw_size;
57 
58 static struct pkvm_moveable_reg *moveable_regs = kvm_nvhe_sym(pkvm_moveable_regs);
59 static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory);
60 static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr);
61 
62 phys_addr_t hyp_mem_base;
63 phys_addr_t hyp_mem_size;
64 
65 extern struct pkvm_device *kvm_nvhe_sym(registered_devices);
66 extern u32 kvm_nvhe_sym(registered_devices_nr);
67 
cmp_hyp_memblock(const void * p1,const void * p2)68 static int cmp_hyp_memblock(const void *p1, const void *p2)
69 {
70 	const struct memblock_region *r1 = p1;
71 	const struct memblock_region *r2 = p2;
72 
73 	return r1->base < r2->base ? -1 : (r1->base > r2->base);
74 }
75 
sort_memblock_regions(void)76 static void __init sort_memblock_regions(void)
77 {
78 	sort(hyp_memory,
79 	     *hyp_memblock_nr_ptr,
80 	     sizeof(struct memblock_region),
81 	     cmp_hyp_memblock,
82 	     NULL);
83 }
84 
register_memblock_regions(void)85 static int __init register_memblock_regions(void)
86 {
87 	struct memblock_region *reg;
88 	bool pvmfw_in_mem = false;
89 
90 	for_each_mem_region(reg) {
91 		if (*hyp_memblock_nr_ptr >= HYP_MEMBLOCK_REGIONS)
92 			return -ENOMEM;
93 
94 		hyp_memory[*hyp_memblock_nr_ptr] = *reg;
95 		(*hyp_memblock_nr_ptr)++;
96 
97 		if (!pvmfw_size || pvmfw_in_mem ||
98 			!memblock_addrs_overlap(reg->base, reg->size, pvmfw_base, pvmfw_size))
99 			continue;
100 		/* If the pvmfw region overlaps a memblock, it must be a subset */
101 		if (pvmfw_base < reg->base || (pvmfw_base + pvmfw_size) > (reg->base + reg->size))
102 			return -EINVAL;
103 		pvmfw_in_mem = true;
104 	}
105 
106 	if (pvmfw_size && !pvmfw_in_mem) {
107 		if (*hyp_memblock_nr_ptr >= HYP_MEMBLOCK_REGIONS)
108 			return -ENOMEM;
109 
110 		hyp_memory[*hyp_memblock_nr_ptr] = (struct memblock_region) {
111 			.base   = pvmfw_base,
112 			.size   = pvmfw_size,
113 			.flags  = MEMBLOCK_NOMAP,
114 		};
115 		(*hyp_memblock_nr_ptr)++;
116 	}
117 	sort_memblock_regions();
118 
119 	return 0;
120 }
121 
cmp_moveable_reg(const void * p1,const void * p2)122 static int cmp_moveable_reg(const void *p1, const void *p2)
123 {
124 	const struct pkvm_moveable_reg *r1 = p1;
125 	const struct pkvm_moveable_reg *r2 = p2;
126 
127 	/*
128 	 * Moveable regions may overlap, so put the largest one first when start
129 	 * addresses are equal to allow a simpler walk from e.g.
130 	 * host_stage2_unmap_unmoveable_regs().
131 	 */
132 	if (r1->start < r2->start)
133 		return -1;
134 	else if (r1->start > r2->start)
135 		return 1;
136 	else if (r1->size > r2->size)
137 		return -1;
138 	else if (r1->size < r2->size)
139 		return 1;
140 	return 0;
141 }
142 
sort_moveable_regs(void)143 static void __init sort_moveable_regs(void)
144 {
145 	sort(moveable_regs,
146 	     kvm_nvhe_sym(pkvm_moveable_regs_nr),
147 	     sizeof(struct pkvm_moveable_reg),
148 	     cmp_moveable_reg,
149 	     NULL);
150 }
151 
register_moveable_fdt_resource(struct device_node * np,enum pkvm_moveable_reg_type type)152 static int __init register_moveable_fdt_resource(struct device_node *np,
153 						 enum pkvm_moveable_reg_type type)
154 {
155 	struct resource res;
156 	u64 start, size;
157 	unsigned int j = 0;
158 	unsigned int i = kvm_nvhe_sym(pkvm_moveable_regs_nr);
159 
160 	while(!of_address_to_resource(np, j, &res)) {
161 		if (i >= PKVM_NR_MOVEABLE_REGS)
162 			return -ENOMEM;
163 
164 		start = res.start;
165 		size = resource_size(&res);
166 		if (!PAGE_ALIGNED(start) || !PAGE_ALIGNED(size))
167 			return -EINVAL;
168 
169 		moveable_regs[i].start = start;
170 		moveable_regs[i].size = size;
171 		moveable_regs[i].type = type;
172 		i++;
173 		j++;
174 	}
175 
176 	kvm_nvhe_sym(pkvm_moveable_regs_nr) = i;
177 	return 0;
178 }
179 
register_moveable_regions(void)180 static int __init register_moveable_regions(void)
181 {
182 	struct memblock_region *reg;
183 	struct device_node *np;
184 	int i = 0, ret = 0, idx = 0;
185 
186 	for_each_mem_region(reg) {
187 		if (i >= PKVM_NR_MOVEABLE_REGS)
188 			return -ENOMEM;
189 		moveable_regs[i].start = reg->base;
190 		moveable_regs[i].size = reg->size;
191 		moveable_regs[i].type = PKVM_MREG_MEMORY;
192 		i++;
193 	}
194 	kvm_nvhe_sym(pkvm_moveable_regs_nr) = i;
195 
196 	for_each_compatible_node(np, NULL, "pkvm,protected-region") {
197 		ret = register_moveable_fdt_resource(np, PKVM_MREG_PROTECTED_RANGE);
198 		if (ret)
199 			goto out_fail;
200 	}
201 
202 	for_each_compatible_node(np, NULL, PKVM_DEVICE_ASSIGN_COMPAT) {
203 		struct of_phandle_args args;
204 
205 		while (!of_parse_phandle_with_fixed_args(np, "devices", 1, idx, &args)) {
206 			idx++;
207 			ret = register_moveable_fdt_resource(args.np, PKVM_MREG_ASSIGN_MMIO);
208 			of_node_put(args.np);
209 			if (ret)
210 				goto out_fail;
211 		}
212 	}
213 
214 	sort_moveable_regs();
215 
216 	return ret;
217 out_fail:
218 	of_node_put(np);
219 	kvm_nvhe_sym(pkvm_moveable_regs_nr) = 0;
220 	return ret;
221 }
222 
early_hyp_lm_size_mb_cfg(char * arg)223 static int __init early_hyp_lm_size_mb_cfg(char *arg)
224 {
225 	return kstrtoull(arg, 10, &kvm_nvhe_sym(hyp_lm_size_mb));
226 }
227 early_param("kvm-arm.hyp_lm_size_mb", early_hyp_lm_size_mb_cfg);
228 
229 DEFINE_STATIC_KEY_FALSE(kvm_ffa_unmap_on_lend);
230 
kvm_hyp_reserve(void)231 void __init kvm_hyp_reserve(void)
232 {
233 	u64 hyp_mem_pages = 0;
234 	int ret;
235 
236 	if (!is_hyp_mode_available() || is_kernel_in_hyp_mode())
237 		return;
238 
239 	if (kvm_get_mode() != KVM_MODE_PROTECTED)
240 		return;
241 
242 	ret = register_memblock_regions();
243 	if (ret) {
244 		*hyp_memblock_nr_ptr = 0;
245 		kvm_err("Failed to register hyp memblocks: %d\n", ret);
246 		return;
247 	}
248 
249 	ret = register_moveable_regions();
250 	if (ret) {
251 		*hyp_memblock_nr_ptr = 0;
252 		kvm_err("Failed to register pkvm moveable regions: %d\n", ret);
253 		return;
254 	}
255 
256 	hyp_mem_pages += hyp_s1_pgtable_pages();
257 	hyp_mem_pages += host_s2_pgtable_pages();
258 	hyp_mem_pages += hyp_vm_table_pages();
259 	hyp_mem_pages += hyp_vmemmap_pages(STRUCT_HYP_PAGE_SIZE);
260 	hyp_mem_pages += pkvm_selftest_pages();
261 	hyp_mem_pages += hyp_ffa_proxy_pages();
262 
263 	if (static_branch_unlikely(&kvm_ffa_unmap_on_lend))
264 		hyp_mem_pages += KVM_FFA_SPM_HANDLE_NR_PAGES;
265 
266 	hyp_mem_pages++; /* hyp_ppages */
267 
268 	/*
269 	 * Try to allocate a PMD-aligned region to reduce TLB pressure once
270 	 * this is unmapped from the host stage-2, and fallback to PAGE_SIZE.
271 	 */
272 	hyp_mem_size = hyp_mem_pages << PAGE_SHIFT;
273 	hyp_mem_base = memblock_phys_alloc(ALIGN(hyp_mem_size, PMD_SIZE),
274 					   PMD_SIZE);
275 	if (!hyp_mem_base)
276 		hyp_mem_base = memblock_phys_alloc(hyp_mem_size, PAGE_SIZE);
277 	else
278 		hyp_mem_size = ALIGN(hyp_mem_size, PMD_SIZE);
279 
280 	if (!hyp_mem_base) {
281 		kvm_err("Failed to reserve hyp memory\n");
282 		return;
283 	}
284 
285 	kvm_info("Reserved %lld MiB at 0x%llx\n", hyp_mem_size >> 20,
286 		 hyp_mem_base);
287 }
288 
289 
__pkvm_vcpu_hyp_created(struct kvm_vcpu * vcpu)290 static void __pkvm_vcpu_hyp_created(struct kvm_vcpu *vcpu)
291 {
292 	if (kvm_vm_is_protected(vcpu->kvm))
293 		vcpu->arch.sve_state = NULL;
294 
295 	vcpu_set_flag(vcpu, VCPU_PKVM_FINALIZED);
296 }
297 
__pkvm_create_hyp_vcpu(struct kvm_vcpu * host_vcpu)298 static int __pkvm_create_hyp_vcpu(struct kvm_vcpu *host_vcpu)
299 {
300 	pkvm_handle_t handle = host_vcpu->kvm->arch.pkvm.handle;
301 	struct kvm_hyp_req *hyp_reqs;
302 	int ret;
303 
304 	init_hyp_stage2_memcache(&host_vcpu->arch.stage2_mc);
305 
306 	hyp_reqs = (struct kvm_hyp_req *)__get_free_page(GFP_KERNEL_ACCOUNT);
307 	if (!hyp_reqs)
308 		return -ENOMEM;
309 
310 	ret = kvm_share_hyp(hyp_reqs, hyp_reqs + 1);
311 	if (ret)
312 		goto err_free_reqs;
313 	host_vcpu->arch.hyp_reqs = hyp_reqs;
314 
315 	ret = kvm_call_refill_hyp_nvhe(__pkvm_init_vcpu, handle, host_vcpu);
316 	if (!ret) {
317 		__pkvm_vcpu_hyp_created(host_vcpu);
318 		return 0;
319 	}
320 
321 	kvm_unshare_hyp(hyp_reqs, hyp_reqs + 1);
322 err_free_reqs:
323 	free_page((unsigned long)hyp_reqs);
324 	host_vcpu->arch.hyp_reqs = NULL;
325 
326 	return ret;
327 }
328 
329 /*
330  * Handle split huge pages which have not been reported to the kvm_pinned_page tree.
331  */
pkvm_call_hyp_nvhe_ppage(struct kvm_pinned_page * ppage,int (* call_hyp_nvhe)(u64 pfn,u64 gfn,u8 order,void * args),void * args)332 static int pkvm_call_hyp_nvhe_ppage(struct kvm_pinned_page *ppage,
333 				    int (*call_hyp_nvhe)(u64 pfn, u64 gfn, u8 order, void *args),
334 				    void *args)
335 {
336 	size_t page_size, size = PAGE_SIZE << ppage->order;
337 	u64 pfn = page_to_pfn(ppage->page);
338 	u8 order = ppage->order;
339 	u64 gfn = ppage->ipa >> PAGE_SHIFT;
340 
341 	while (size) {
342 		int err = call_hyp_nvhe(pfn, gfn, order, args);
343 
344 		switch (err) {
345 		case -E2BIG:
346 			if (order)
347 				order = 0;
348 			else
349 				/* Something is really wrong ... */
350 				return -EINVAL;
351 			break;
352 		case 0:
353 			page_size = PAGE_SIZE << order;
354 			gfn += 1 << order;
355 			pfn += 1 << order;
356 
357 			if (page_size > size)
358 				return -EINVAL;
359 
360 			size -= page_size;
361 			break;
362 		default:
363 			return err;
364 		}
365 	}
366 
367 	return 0;
368 }
369 
__reclaim_dying_guest_page_call(u64 pfn,u64 gfn,u8 order,void * args)370 static int __reclaim_dying_guest_page_call(u64 pfn, u64 gfn, u8 order, void *args)
371 {
372 	struct kvm *host_kvm = args;
373 
374 	return kvm_call_hyp_nvhe(__pkvm_reclaim_dying_guest_page,
375 				 host_kvm->arch.pkvm.handle,
376 				 pfn, gfn, order);
377 }
378 
379 /* __pkvm_notify_guest_vm_avail_retry - notify secure of the VM state change
380  * @host_kvm: the kvm structure
381  * @availability_msg: the VM state that will be notified
382  *
383  * Returns: 0 when the notification is sent with success, -EINTR or -EAGAIN if
384  * the destruction notification is interrupted and retries exceeded and
385  * a positive value indicating the remaining jiffies when the creation
386  * notification is sent but interrupted.
387  */
__pkvm_notify_guest_vm_avail_retry(struct kvm * host_kvm,u32 availability_msg)388 static int __pkvm_notify_guest_vm_avail_retry(struct kvm *host_kvm, u32 availability_msg)
389 {
390 	int ret, retries;
391 	long timeout;
392 
393 	if (!host_kvm->arch.pkvm.ffa_support)
394 		return 0;
395 
396 	for (retries = 0; retries < VM_AVAILABILITY_TOTAL_RETRIES; retries++) {
397 		ret = kvm_call_hyp_nvhe(__pkvm_notify_guest_vm_avail,
398 					host_kvm->arch.pkvm.handle);
399 		if (!ret)
400 			return 0;
401 		else if (ret != -EINTR && ret != -EAGAIN)
402 			return ret;
403 
404 		if (retries < VM_AVAILABILITY_FAST_RETRIES) {
405 			cond_resched();
406 		} else if (availability_msg == FFA_VM_DESTRUCTION_MSG) {
407 			msleep(VM_AVAILABILITY_RETRY_SLEEP_MS);
408 		} else {
409 			timeout = msecs_to_jiffies(VM_AVAILABILITY_RETRY_SLEEP_MS);
410 			timeout = schedule_timeout_killable(timeout);
411 			if (timeout) {
412 				/*
413 				 * The timer did not expire,
414 				 * most likely because the
415 				 * process was killed.
416 				 */
417 				return ret;
418 			}
419 		}
420 	}
421 
422 	return ret;
423 }
424 
__pkvm_destroy_hyp_vm(struct kvm * host_kvm)425 static void __pkvm_destroy_hyp_vm(struct kvm *host_kvm)
426 {
427 	struct mm_struct *mm = current->mm;
428 	struct kvm_pinned_page *ppage;
429 	struct kvm_vcpu *host_vcpu;
430 	unsigned long nr_busy;
431 	unsigned long pages;
432 	unsigned long idx;
433 	int ret, notify_status;
434 
435 	if (!pkvm_is_hyp_created(host_kvm))
436 		goto out_free;
437 
438 	WARN_ON(kvm_call_hyp_nvhe(__pkvm_start_teardown_vm, host_kvm->arch.pkvm.handle));
439 
440 retry:
441 	pages = 0;
442 	nr_busy = 0;
443 	ppage = kvm_pinned_pages_iter_first(&host_kvm->arch.pkvm.pinned_pages, 0, ~(0UL));
444 	while (ppage) {
445 		struct kvm_pinned_page *next;
446 
447 		ret = pkvm_call_hyp_nvhe_ppage(ppage, __reclaim_dying_guest_page_call,
448 					       host_kvm);
449 		cond_resched();
450 		if (ret == -EBUSY) {
451 			nr_busy++;
452 			next = kvm_pinned_pages_iter_next(ppage, 0, ~(0UL));
453 			ppage = next;
454 			continue;
455 		}
456 		WARN_ON(ret);
457 
458 		unpin_user_pages_dirty_lock(&ppage->page, 1, true);
459 		next = kvm_pinned_pages_iter_next(ppage, 0, ~(0UL));
460 		kvm_pinned_pages_remove(ppage, &host_kvm->arch.pkvm.pinned_pages);
461 		pages += 1 << ppage->order;
462 		kfree(ppage);
463 		ppage = next;
464 	}
465 
466 	account_locked_vm(mm, pages, false);
467 
468 	notify_status = __pkvm_notify_guest_vm_avail_retry(host_kvm, FFA_VM_DESTRUCTION_MSG);
469 	if (nr_busy) {
470 		do {
471 			ret = kvm_call_hyp_nvhe(__pkvm_reclaim_dying_guest_ffa_resources,
472 						host_kvm->arch.pkvm.handle);
473 			WARN_ON(ret && ret != -EAGAIN);
474 
475 			if (notify_status == -EINTR || notify_status == -EAGAIN)
476 				notify_status = __pkvm_notify_guest_vm_avail_retry(
477 						host_kvm, FFA_VM_DESTRUCTION_MSG);
478 			cond_resched();
479 		} while (ret == -EAGAIN);
480 		goto retry;
481 	}
482 
483 	WARN_ON(kvm_call_hyp_nvhe(__pkvm_finalize_teardown_vm, host_kvm->arch.pkvm.handle));
484 
485 out_free:
486 	host_kvm->arch.pkvm.handle = 0;
487 
488 	atomic64_sub(host_kvm->arch.pkvm.stage2_teardown_mc.nr_pages << PAGE_SHIFT,
489 		     &host_kvm->stat.protected_hyp_mem);
490 	free_hyp_memcache(&host_kvm->arch.pkvm.stage2_teardown_mc);
491 
492 	kvm_iommu_guest_free_mc(&host_kvm->arch.pkvm.teardown_iommu_mc);
493 
494 	kvm_for_each_vcpu(idx, host_vcpu, host_kvm) {
495 		struct kvm_hyp_req *hyp_reqs = host_vcpu->arch.hyp_reqs;
496 
497 		if (!hyp_reqs)
498 			continue;
499 
500 		kvm_unshare_hyp(hyp_reqs, hyp_reqs + 1);
501 		host_vcpu->arch.hyp_reqs = NULL;
502 		free_page((unsigned long)hyp_reqs);
503 
504 		kvm_iommu_guest_free_mc(&host_vcpu->arch.iommu_mc);
505 	}
506 }
507 
508 /*
509  * Allocates and donates memory for hypervisor VM structs at EL2.
510  *
511  * Allocates space for the VM state, which includes the hyp vm as well as
512  * the hyp vcpus.
513  *
514  * Stores an opaque handler in the kvm struct for future reference.
515  *
516  * Return 0 on success, negative error code on failure.
517  */
__pkvm_create_hyp_vm(struct kvm * host_kvm)518 static int __pkvm_create_hyp_vm(struct kvm *host_kvm)
519 {
520 	size_t pgd_sz;
521 	void *pgd;
522 	int ret;
523 
524 	if (host_kvm->created_vcpus < 1)
525 		return -EINVAL;
526 
527 	pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.mmu.vtcr);
528 
529 	/*
530 	 * The PGD pages will be reclaimed using a hyp_memcache which implies
531 	 * page granularity. So, use alloc_pages_exact() to get individual
532 	 * refcounts.
533 	 */
534 	pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT);
535 	if (!pgd)
536 		return -ENOMEM;
537 	atomic64_add(pgd_sz, &host_kvm->stat.protected_hyp_mem);
538 
539 	init_hyp_stage2_memcache(&host_kvm->arch.pkvm.stage2_teardown_mc);
540 
541 	/* Donate the VM memory to hyp and let hyp initialize it. */
542 	ret = kvm_call_refill_hyp_nvhe(__pkvm_init_vm, host_kvm, pgd);
543 	if (ret < 0)
544 		goto free_pgd;
545 
546 	WRITE_ONCE(host_kvm->arch.pkvm.handle, ret);
547 
548 	kvm_account_pgtable_pages(pgd, pgd_sz >> PAGE_SHIFT);
549 
550 	return __pkvm_notify_guest_vm_avail_retry(host_kvm, FFA_VM_CREATION_MSG);
551 free_pgd:
552 	free_pages_exact(pgd, pgd_sz);
553 	atomic64_sub(pgd_sz, &host_kvm->stat.protected_hyp_mem);
554 
555 	return ret;
556 }
557 
pkvm_is_hyp_created(struct kvm * host_kvm)558 bool pkvm_is_hyp_created(struct kvm *host_kvm)
559 {
560 	return READ_ONCE(host_kvm->arch.pkvm.handle);
561 }
562 
pkvm_create_hyp_vm(struct kvm * host_kvm)563 int pkvm_create_hyp_vm(struct kvm *host_kvm)
564 {
565 	int ret = 0;
566 
567 	mutex_lock(&host_kvm->arch.config_lock);
568 	if (!pkvm_is_hyp_created(host_kvm))
569 		ret = __pkvm_create_hyp_vm(host_kvm);
570 	mutex_unlock(&host_kvm->arch.config_lock);
571 
572 	return ret;
573 }
574 
pkvm_create_hyp_vcpu(struct kvm_vcpu * vcpu)575 int pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu)
576 {
577 	int ret = 0;
578 
579 	mutex_lock(&vcpu->kvm->arch.config_lock);
580 	if (!vcpu_get_flag(vcpu, VCPU_PKVM_FINALIZED))
581 		ret = __pkvm_create_hyp_vcpu(vcpu);
582 	mutex_unlock(&vcpu->kvm->arch.config_lock);
583 
584 	return ret;
585 }
586 
pkvm_destroy_hyp_vm(struct kvm * host_kvm)587 void pkvm_destroy_hyp_vm(struct kvm *host_kvm)
588 {
589 	mutex_lock(&host_kvm->arch.config_lock);
590 	__pkvm_destroy_hyp_vm(host_kvm);
591 	mutex_unlock(&host_kvm->arch.config_lock);
592 }
593 
pkvm_init_host_vm(struct kvm * host_kvm,unsigned long type)594 int pkvm_init_host_vm(struct kvm *host_kvm, unsigned long type)
595 {
596 	if (!(type & KVM_VM_TYPE_ARM_PROTECTED))
597 		return 0;
598 
599 	if (!is_protected_kvm_enabled())
600 		return -EINVAL;
601 
602 	host_kvm->arch.pkvm.pvmfw_load_addr = PVMFW_INVALID_LOAD_ADDR;
603 	host_kvm->arch.pkvm.enabled = true;
604 	return 0;
605 }
606 
pkvm_register_device(struct of_phandle_args * args,struct pkvm_device * dev)607 static int pkvm_register_device(struct of_phandle_args *args,
608 				struct pkvm_device *dev)
609 {
610 	struct device_node *np = args->np;
611 	struct of_phandle_args iommu_spec;
612 	u32 group_id = args->args[0];
613 	struct resource res;
614 	u64 base, size, iommu_id;
615 	unsigned int j = 0;
616 
617 	/* Parse regs */
618 	while (!of_address_to_resource(np, j, &res)) {
619 		if (j >= PKVM_DEVICE_MAX_RESOURCE)
620 			return -E2BIG;
621 
622 		base = res.start;
623 		size = resource_size(&res);
624 		if (!PAGE_ALIGNED(base) || !PAGE_ALIGNED(size))
625 			return -EINVAL;
626 
627 		dev->resources[j].base = base;
628 		dev->resources[j].size = size;
629 		j++;
630 	}
631 	dev->nr_resources = j;
632 
633 	/* Parse iommus */
634 	j = 0;
635 	while (!of_parse_phandle_with_args(np, "iommus",
636 					   "#iommu-cells",
637 					   j, &iommu_spec)) {
638 		if (iommu_spec.args_count != 1) {
639 			kvm_err("[Devices] Unsupported binding for %s, expected <&iommu id>",
640 				np->full_name);
641 			return -EINVAL;
642 		}
643 
644 		if (j >= PKVM_DEVICE_MAX_RESOURCE) {
645 			of_node_put(iommu_spec.np);
646 			return -E2BIG;
647 		}
648 
649 		iommu_id = kvm_get_iommu_id_by_of(iommu_spec.np);
650 
651 		dev->iommus[j].id = iommu_id;
652 		dev->iommus[j].endpoint = iommu_spec.args[0];
653 		of_node_put(iommu_spec.np);
654 		j++;
655 	}
656 
657 	dev->nr_iommus = j;
658 	dev->ctxt = NULL;
659 	dev->group_id = group_id;
660 
661 	return 0;
662 }
663 
pkvm_init_devices(void)664 static int pkvm_init_devices(void)
665 {
666 	struct device_node *np;
667 	int idx = 0, ret = 0, dev_cnt = 0;
668 	size_t dev_sz;
669 	struct pkvm_device *dev_base;
670 
671 	for_each_compatible_node (np, NULL, PKVM_DEVICE_ASSIGN_COMPAT) {
672 		struct of_phandle_args args;
673 
674 		while (!of_parse_phandle_with_fixed_args(np, "devices", 1, dev_cnt, &args)) {
675 			dev_cnt++;
676 			of_node_put(args.np);
677 		}
678 	}
679 	kvm_info("Found %d assignable devices", dev_cnt);
680 
681 	if (!dev_cnt)
682 		return 0;
683 
684 	dev_sz = PAGE_ALIGN(size_mul(sizeof(struct pkvm_device), dev_cnt));
685 
686 	dev_base = alloc_pages_exact(dev_sz, GFP_KERNEL_ACCOUNT);
687 
688 	if (!dev_base)
689 		return -ENOMEM;
690 
691 	for_each_compatible_node(np, NULL, PKVM_DEVICE_ASSIGN_COMPAT) {
692 		struct of_phandle_args args;
693 
694 		while (!of_parse_phandle_with_fixed_args(np, "devices", 1, idx, &args)) {
695 			ret = pkvm_register_device(&args, &dev_base[idx]);
696 			of_node_put(args.np);
697 			if (ret) {
698 				of_node_put(np);
699 				goto out_free;
700 			}
701 			idx++;
702 		}
703 	}
704 
705 	kvm_nvhe_sym(registered_devices_nr) = dev_cnt;
706 	kvm_nvhe_sym(registered_devices) = dev_base;
707 	return ret;
708 
709 out_free:
710 	free_pages_exact(dev_base, dev_sz);
711 	return ret;
712 }
713 
_kvm_host_prot_finalize(void * arg)714 static void __init _kvm_host_prot_finalize(void *arg)
715 {
716 	int *err = arg;
717 
718 	if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize)))
719 		WRITE_ONCE(*err, -EINVAL);
720 }
721 
pkvm_drop_host_privileges(void)722 static int __init pkvm_drop_host_privileges(void)
723 {
724 	int ret = 0;
725 
726 	/*
727 	 * Flip the static key upfront as that may no longer be possible
728 	 * once the host stage 2 is installed.
729 	 */
730 	static_branch_enable(&kvm_protected_mode_initialized);
731 	on_each_cpu(_kvm_host_prot_finalize, &ret, 1);
732 	return ret;
733 }
734 
735 static int __init pkvm_firmware_rmem_clear(void);
736 
finalize_pkvm(void)737 static int __init finalize_pkvm(void)
738 {
739 	int ret;
740 
741 	if (!is_protected_kvm_enabled() || !is_kvm_arm_initialised()) {
742 		pkvm_firmware_rmem_clear();
743 		return 0;
744 	}
745 
746 	/*
747 	 * Modules can play an essential part in the pKVM protection. All of
748 	 * them must properly load to enable protected VMs.
749 	 */
750 	if (pkvm_load_early_modules())
751 		pkvm_firmware_rmem_clear();
752 
753 	ret = kvm_iommu_init_driver();
754 	if (ret) {
755 		pr_err("Failed to init KVM IOMMU driver: %d\n", ret);
756 		pkvm_firmware_rmem_clear();
757 	}
758 
759 	ret = pkvm_init_devices();
760 	if (ret) {
761 		pr_err("Failed to init kvm devices %d\n", ret);
762 		pkvm_firmware_rmem_clear();
763 	}
764 
765 	ret = kvm_call_hyp_nvhe(__pkvm_devices_init);
766 	if (ret)
767 		pr_warn("Assignable devices failed to initialize in the hypervisor %d", ret);
768 
769 	/*
770 	 * Exclude HYP sections from kmemleak so that they don't get peeked
771 	 * at, which would end badly once inaccessible.
772 	 */
773 	kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
774 	kmemleak_free_part(__hyp_data_start, __hyp_data_end - __hyp_data_start);
775 	kmemleak_free_part(__hyp_rodata_start, __hyp_rodata_end - __hyp_rodata_start);
776 	kmemleak_free_part_phys(hyp_mem_base, hyp_mem_size);
777 
778 	kvm_s2_ptdump_host_create_debugfs();
779 
780 	ret = pkvm_drop_host_privileges();
781 	if (ret) {
782 		pr_err("Failed to finalize Hyp protection: %d\n", ret);
783 		kvm_iommu_remove_driver();
784 	}
785 
786 	return 0;
787 }
788 device_initcall_sync(finalize_pkvm);
789 
pkvm_host_reclaim_page(struct kvm * host_kvm,phys_addr_t ipa)790 void pkvm_host_reclaim_page(struct kvm *host_kvm, phys_addr_t ipa)
791 {
792 	struct mm_struct *mm = current->mm;
793 	struct kvm_pinned_page *ppage;
794 	u8 order;
795 
796 	write_lock(&host_kvm->mmu_lock);
797 	ppage = kvm_pinned_pages_iter_first(&host_kvm->arch.pkvm.pinned_pages,
798 					   ipa, ipa + PAGE_SIZE - 1);
799 	if (ppage) {
800 		order = ppage->order;
801 		if (!order)
802 			kvm_pinned_pages_remove(ppage, &host_kvm->arch.pkvm.pinned_pages);
803 	}
804 	write_unlock(&host_kvm->mmu_lock);
805 
806 	if (WARN_ON(!ppage || order))
807 		return;
808 
809 	account_locked_vm(mm, 1 << ppage->order, false);
810 	unpin_user_pages_dirty_lock(&ppage->page, 1, true);
811 	kfree(ppage);
812 }
813 
pkvm_enable_smc_forwarding(struct file * kvm_file)814 int pkvm_enable_smc_forwarding(struct file *kvm_file)
815 {
816 	struct kvm *host_kvm;
817 
818 	if (!file_is_kvm(kvm_file))
819 		return -EINVAL;
820 
821 	if (!kvm_get_kvm_safe(kvm_file->private_data))
822 		return -EINVAL;
823 
824 	host_kvm = kvm_file->private_data;
825 	if (!host_kvm)
826 		return -EINVAL;
827 
828 	host_kvm->arch.pkvm.smc_forwarded = true;
829 
830 	return 0;
831 }
832 
pkvm_firmware_rmem_err(struct reserved_mem * rmem,const char * reason)833 static int __init pkvm_firmware_rmem_err(struct reserved_mem *rmem,
834 					 const char *reason)
835 {
836 	phys_addr_t end = rmem->base + rmem->size;
837 
838 	kvm_err("Ignoring pkvm guest firmware memory reservation [%pa - %pa]: %s\n",
839 		&rmem->base, &end, reason);
840 	return -EINVAL;
841 }
842 
pkvm_firmware_rmem_init(struct reserved_mem * rmem)843 static int __init pkvm_firmware_rmem_init(struct reserved_mem *rmem)
844 {
845 	unsigned long node = rmem->fdt_node;
846 
847 	if (pvmfw_size)
848 		return pkvm_firmware_rmem_err(rmem, "duplicate reservation");
849 
850 	if (!of_get_flat_dt_prop(node, "no-map", NULL))
851 		return pkvm_firmware_rmem_err(rmem, "missing \"no-map\" property");
852 
853 	if (of_get_flat_dt_prop(node, "reusable", NULL))
854 		return pkvm_firmware_rmem_err(rmem, "\"reusable\" property unsupported");
855 
856 	if (!PAGE_ALIGNED(rmem->base))
857 		return pkvm_firmware_rmem_err(rmem, "base is not page-aligned");
858 
859 	if (!PAGE_ALIGNED(rmem->size))
860 		return pkvm_firmware_rmem_err(rmem, "size is not page-aligned");
861 
862 	pvmfw_size = kvm_nvhe_sym(pvmfw_size) = rmem->size;
863 	pvmfw_base = kvm_nvhe_sym(pvmfw_base) = rmem->base;
864 	return 0;
865 }
866 RESERVEDMEM_OF_DECLARE(pkvm_firmware, "linux,pkvm-guest-firmware-memory",
867 		       pkvm_firmware_rmem_init);
868 
pkvm_firmware_rmem_clear(void)869 static int __init pkvm_firmware_rmem_clear(void)
870 {
871 	void *addr;
872 	phys_addr_t size;
873 
874 	if (likely(!pvmfw_size))
875 		return 0;
876 
877 	kvm_info("Clearing pKVM firmware memory\n");
878 	size = pvmfw_size;
879 	addr = memremap(pvmfw_base, size, MEMREMAP_WB);
880 
881 	pvmfw_size = kvm_nvhe_sym(pvmfw_size) = 0;
882 	pvmfw_base = kvm_nvhe_sym(pvmfw_base) = 0;
883 
884 	if (!addr)
885 		return -EINVAL;
886 
887 	memset(addr, 0, size);
888 	dcache_clean_poc((unsigned long)addr, (unsigned long)addr + size);
889 	memunmap(addr);
890 	return 0;
891 }
892 
pkvm_vm_ioctl_set_fw_ipa(struct kvm * kvm,u64 ipa)893 static int pkvm_vm_ioctl_set_fw_ipa(struct kvm *kvm, u64 ipa)
894 {
895 	int ret = 0;
896 
897 	if (!pvmfw_size)
898 		return -EINVAL;
899 
900 	mutex_lock(&kvm->lock);
901 	if (kvm->arch.pkvm.handle) {
902 		ret = -EBUSY;
903 		goto out_unlock;
904 	}
905 
906 	kvm->arch.pkvm.pvmfw_load_addr = ipa;
907 out_unlock:
908 	mutex_unlock(&kvm->lock);
909 	return ret;
910 }
911 
pkvm_get_ffa_version(void)912 static u32 pkvm_get_ffa_version(void)
913 {
914 	static u32 ffa_version;
915 	u32 ret;
916 
917 	ret = READ_ONCE(ffa_version);
918 	if (ret)
919 		return ret;
920 
921 	ret = kvm_call_hyp_nvhe(__pkvm_host_get_ffa_version);
922 	WRITE_ONCE(ffa_version, ret);
923 	return ret;
924 
925 }
926 
pkvm_vm_ioctl_info(struct kvm * kvm,struct kvm_protected_vm_info __user * info)927 static int pkvm_vm_ioctl_info(struct kvm *kvm,
928 			      struct kvm_protected_vm_info __user *info)
929 {
930 	struct kvm_protected_vm_info kinfo = {
931 		.firmware_size = pvmfw_size,
932 		.ffa_version = pkvm_get_ffa_version(),
933 	};
934 
935 	return copy_to_user(info, &kinfo, sizeof(kinfo)) ? -EFAULT : 0;
936 }
937 
pkvm_vm_ioctl_ffa_support(struct kvm * kvm,u32 enable)938 static int pkvm_vm_ioctl_ffa_support(struct kvm *kvm, u32 enable)
939 {
940 	int ret = 0;
941 	u32 ffa_version;
942 
943 	/* Restrict userspace from having an IPC channel over FF-A with secure */
944 	if (!capable(CAP_IPC_OWNER))
945 		return -EPERM;
946 
947 	/*
948 	 * If the host hasn't negotiated a version don't enable the
949 	 * FF-A capability.
950 	 */
951 	ffa_version = pkvm_get_ffa_version();
952 	if (!ffa_version)
953 		return -EINVAL;
954 
955 	mutex_lock(&kvm->arch.config_lock);
956 	if (kvm->arch.pkvm.handle) {
957 		ret = -EBUSY;
958 		goto out_unlock;
959 	}
960 
961 	kvm->arch.pkvm.ffa_support = enable;
962 out_unlock:
963 	mutex_unlock(&kvm->arch.config_lock);
964 	return ret;
965 }
966 
pkvm_vm_ioctl_enable_cap(struct kvm * kvm,struct kvm_enable_cap * cap)967 int pkvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
968 {
969 	if (!kvm_vm_is_protected(kvm))
970 		return -EINVAL;
971 
972 	if (cap->args[1] || cap->args[2] || cap->args[3])
973 		return -EINVAL;
974 
975 	switch (cap->flags) {
976 	case KVM_CAP_ARM_PROTECTED_VM_FLAGS_SET_FW_IPA:
977 		return pkvm_vm_ioctl_set_fw_ipa(kvm, cap->args[0]);
978 	case KVM_CAP_ARM_PROTECTED_VM_FLAGS_INFO:
979 		return pkvm_vm_ioctl_info(kvm, (void __force __user *)cap->args[0]);
980 	case KVM_CAP_ARM_PROTECTED_VM_FLAGS_SET_FFA:
981 		return pkvm_vm_ioctl_ffa_support(kvm, cap->args[0]);
982 	default:
983 		return -EINVAL;
984 	}
985 
986 	return 0;
987 }
988 
989 #ifdef CONFIG_MODULES
990 static char early_pkvm_modules[COMMAND_LINE_SIZE] __initdata;
991 
early_pkvm_modules_cfg(char * arg)992 static int __init early_pkvm_modules_cfg(char *arg)
993 {
994 	/*
995 	 * Loading pKVM modules with kvm-arm.protected_modules is deprecated
996 	 * Use kvm-arm.protected_modules=<module1>,<module2>
997 	 */
998 	if (!arg)
999 		return -EINVAL;
1000 
1001 	strscpy(early_pkvm_modules, arg, COMMAND_LINE_SIZE);
1002 
1003 	return 0;
1004 }
1005 early_param("kvm-arm.protected_modules", early_pkvm_modules_cfg);
1006 
free_modprobe_argv(struct subprocess_info * info)1007 static void __init free_modprobe_argv(struct subprocess_info *info)
1008 {
1009 	kfree(info->argv);
1010 }
1011 
init_modprobe(struct subprocess_info * info,struct cred * new)1012 static int __init init_modprobe(struct subprocess_info *info, struct cred *new)
1013 {
1014 	struct file *file = filp_open("/dev/kmsg", O_RDWR, 0);
1015 
1016 	if (IS_ERR(file)) {
1017 		pr_warn("Warning: unable to open /dev/kmsg, modprobe will be silent.\n");
1018 		return 0;
1019 	}
1020 
1021 	init_dup(file);
1022 	init_dup(file);
1023 	init_dup(file);
1024 	fput(file);
1025 
1026 	return 0;
1027 }
1028 
1029 /*
1030  * Heavily inspired by request_module(). The latest couldn't be reused though as
1031  * the feature can be disabled depending on umh configuration. Here some
1032  * security is enforced by making sure this can be called only when pKVM is
1033  * enabled, not yet completely initialized.
1034  */
__pkvm_request_early_module(char * module_name,char * module_path)1035 static int __init __pkvm_request_early_module(char *module_name,
1036 					      char *module_path)
1037 {
1038 	char *modprobe_path = CONFIG_MODPROBE_PATH;
1039 	struct subprocess_info *info;
1040 	static char *envp[] = {
1041 		"HOME=/",
1042 		"TERM=linux",
1043 		"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
1044 		NULL
1045 	};
1046 	static bool proc;
1047 	char **argv;
1048 	int idx = 0;
1049 
1050 	if (!is_protected_kvm_enabled())
1051 		return -EACCES;
1052 
1053 	if (static_branch_likely(&kvm_protected_mode_initialized))
1054 		return -EACCES;
1055 
1056 	argv = kmalloc(sizeof(char *) * 7, GFP_KERNEL);
1057 	if (!argv)
1058 		return -ENOMEM;
1059 
1060 	argv[idx++] = modprobe_path;
1061 	argv[idx++] = "-q";
1062 	if (*module_path != '\0') {
1063 		argv[idx++] = "-d";
1064 		argv[idx++] = module_path;
1065 	}
1066 	argv[idx++] = "--";
1067 	argv[idx++] = module_name;
1068 	argv[idx++] = NULL;
1069 
1070 	info = call_usermodehelper_setup(modprobe_path, argv, envp, GFP_KERNEL,
1071 					 init_modprobe, free_modprobe_argv, NULL);
1072 	if (!info)
1073 		goto err;
1074 
1075 	/* Even with CONFIG_STATIC_USERMODEHELPER we really want this path */
1076 	info->path = modprobe_path;
1077 
1078 	if (!proc) {
1079 		wait_for_initramfs();
1080 		if (init_mount("proc", "/proc", "proc",
1081 			       MS_SILENT | MS_NOEXEC | MS_NOSUID, NULL))
1082 			pr_warn("Couldn't mount /proc, pKVM module parameters will be ignored\n");
1083 
1084 		proc = true;
1085 	}
1086 
1087 	return call_usermodehelper_exec(info, UMH_WAIT_PROC | UMH_KILLABLE);
1088 err:
1089 	kfree(argv);
1090 
1091 	return -ENOMEM;
1092 }
1093 
pkvm_request_early_module(char * module_name,char * module_path)1094 static int __init pkvm_request_early_module(char *module_name, char *module_path)
1095 {
1096 	int err = __pkvm_request_early_module(module_name, module_path);
1097 
1098 	if (!err)
1099 		return 0;
1100 
1101 	/* Already tried the default path */
1102 	if (*module_path == '\0')
1103 		return err;
1104 
1105 	pr_info("loading %s from %s failed, fallback to the default path\n",
1106 		module_name, module_path);
1107 
1108 	return __pkvm_request_early_module(module_name, "");
1109 }
1110 
1111 static void pkvm_el2_mod_free(void);
1112 
pkvm_load_early_modules(void)1113 int __init pkvm_load_early_modules(void)
1114 {
1115 	char *token, *buf = early_pkvm_modules;
1116 	char *module_path = CONFIG_PKVM_MODULE_PATH;
1117 	int err = 0;
1118 
1119 	while (true) {
1120 		token = strsep(&buf, ",");
1121 
1122 		if (!token)
1123 			break;
1124 
1125 		if (*token) {
1126 			err = pkvm_request_early_module(token, module_path);
1127 			if (err) {
1128 				pr_err("Failed to load pkvm module %s: %d\n",
1129 				       token, err);
1130 				goto out;
1131 			}
1132 		}
1133 
1134 		if (buf)
1135 			*(buf - 1) = ',';
1136 	}
1137 
1138 out:
1139 	pkvm_el2_mod_free();
1140 
1141 	return err;
1142 }
1143 
1144 static LIST_HEAD(pkvm_modules);
1145 
pkvm_el2_mod_add(struct pkvm_el2_module * mod)1146 static void pkvm_el2_mod_add(struct pkvm_el2_module *mod)
1147 {
1148 	INIT_LIST_HEAD(&mod->node);
1149 	list_add(&mod->node, &pkvm_modules);
1150 }
1151 
pkvm_el2_mod_free(void)1152 static void pkvm_el2_mod_free(void)
1153 {
1154 	struct pkvm_el2_sym *sym, *tmp;
1155 	struct pkvm_el2_module *mod;
1156 
1157 	list_for_each_entry(mod, &pkvm_modules, node) {
1158 		list_for_each_entry_safe(sym, tmp, &mod->ext_symbols, node) {
1159 			list_del(&sym->node);
1160 			kfree(sym->name);
1161 			kfree(sym);
1162 		}
1163 	}
1164 }
1165 
pkvm_el2_mod_to_module(struct pkvm_el2_module * hyp_mod)1166 static struct module *pkvm_el2_mod_to_module(struct pkvm_el2_module *hyp_mod)
1167 {
1168 	struct mod_arch_specific *arch;
1169 
1170 	arch = container_of(hyp_mod, struct mod_arch_specific, hyp);
1171 	return container_of(arch, struct module, arch);
1172 }
1173 
1174 #ifdef CONFIG_PKVM_STACKTRACE
pkvm_el2_mod_kern_va(unsigned long addr)1175 unsigned long pkvm_el2_mod_kern_va(unsigned long addr)
1176 {
1177 	struct pkvm_el2_module *mod;
1178 
1179 	list_for_each_entry(mod, &pkvm_modules, node) {
1180 		unsigned long hyp_va = (unsigned long)mod->hyp_va;
1181 		size_t len = (unsigned long)mod->sections.end -
1182 			     (unsigned long)mod->sections.start;
1183 
1184 		if (addr >= hyp_va && addr < (hyp_va + len))
1185 			return (unsigned long)mod->sections.start +
1186 				(addr - hyp_va);
1187 	}
1188 
1189 	return 0;
1190 }
1191 #else
pkvm_el2_mod_kern_va(unsigned long addr)1192 unsigned long pkvm_el2_mod_kern_va(unsigned long addr) { return 0; }
1193 #endif
1194 
pkvm_el2_mod_lookup_symbol(const char * name,unsigned long * addr)1195 static struct pkvm_el2_module *pkvm_el2_mod_lookup_symbol(const char *name,
1196 							  unsigned long *addr)
1197 {
1198 	struct pkvm_el2_module *hyp_mod;
1199 	unsigned long __addr;
1200 
1201 	list_for_each_entry(hyp_mod, &pkvm_modules, node) {
1202 		struct module *mod = pkvm_el2_mod_to_module(hyp_mod);
1203 
1204 		__addr = find_kallsyms_symbol_value(mod, name);
1205 		if (!__addr)
1206 			continue;
1207 
1208 		*addr = __addr;
1209 		return hyp_mod;
1210 	}
1211 
1212 	return NULL;
1213 }
1214 
within_pkvm_module_section(struct pkvm_module_section * section,unsigned long addr)1215 static bool within_pkvm_module_section(struct pkvm_module_section *section,
1216 				       unsigned long addr)
1217 {
1218 	return (addr > (unsigned long)section->start) &&
1219 		(addr < (unsigned long)section->end);
1220 }
1221 
pkvm_reloc_imported_symbol(struct pkvm_el2_module * importer,struct pkvm_el2_sym * sym,unsigned long hyp_dst)1222 static int pkvm_reloc_imported_symbol(struct pkvm_el2_module *importer,
1223 				      struct pkvm_el2_sym *sym,
1224 				      unsigned long hyp_dst)
1225 {
1226 	s64 val, val_max = (s64)(~(BIT(25) - 1)) << 2;
1227 	u32 insn = le32_to_cpu(*sym->rela_pos);
1228 	unsigned long hyp_src;
1229 	u64 imm;
1230 
1231 	if (!within_pkvm_module_section(&importer->text,
1232 					(unsigned long)sym->rela_pos))
1233 		return -EINVAL;
1234 
1235 	hyp_src = (unsigned long)importer->hyp_va +
1236 		((void *)sym->rela_pos - importer->text.start);
1237 
1238 	/*
1239 	 * Module hyp VAs are allocated going upward. Source MUST have a
1240 	 * lower address than the destination
1241 	 */
1242 	if (WARN_ON(hyp_src < hyp_dst))
1243 		return -EINVAL;
1244 
1245 	val = hyp_dst - hyp_src;
1246 	if (val < val_max) {
1247 		pr_warn("Exported symbol %s is too far for the relocation in module %s\n",
1248 			sym->name, pkvm_el2_mod_to_module(importer)->name);
1249 		return -ERANGE;
1250 	}
1251 
1252 	/* offset encoded as imm26 * 4 */
1253 	imm = (val >> 2) & (BIT(26) - 1);
1254 
1255 	insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_26, insn, imm);
1256 
1257 	return aarch64_insn_patch_text_nosync((void *)sym->rela_pos, insn);
1258 }
1259 
pkvm_reloc_imported_symbols(struct pkvm_el2_module * importer)1260 static int pkvm_reloc_imported_symbols(struct pkvm_el2_module *importer)
1261 {
1262 	unsigned long addr, offset, hyp_addr;
1263 	struct pkvm_el2_module *exporter;
1264 	struct pkvm_el2_sym *sym;
1265 
1266 	list_for_each_entry(sym, &importer->ext_symbols, node) {
1267 		exporter = pkvm_el2_mod_lookup_symbol(sym->name, &addr);
1268 		if (!exporter) {
1269 			pr_warn("pKVM symbol %s not exported by any module\n",
1270 				sym->name);
1271 			return -EINVAL;
1272 		}
1273 
1274 		if (!within_pkvm_module_section(&exporter->text, addr)) {
1275 			pr_warn("pKVM symbol %s not part of %s .text section\n",
1276 				sym->name,
1277 				pkvm_el2_mod_to_module(exporter)->name);
1278 			return -EINVAL;
1279 		}
1280 
1281 		/* hyp addr in the exporter */
1282 		offset = addr - (unsigned long)exporter->text.start;
1283 		hyp_addr = (unsigned long)exporter->hyp_va + offset;
1284 
1285 		pkvm_reloc_imported_symbol(importer, sym, hyp_addr);
1286 	}
1287 
1288 	return 0;
1289 }
1290 
1291 struct pkvm_mod_sec_mapping {
1292 	struct pkvm_module_section *sec;
1293 	enum kvm_pgtable_prot prot;
1294 };
1295 
pkvm_unmap_module_pages(void * kern_va,void * hyp_va,size_t size)1296 static void pkvm_unmap_module_pages(void *kern_va, void *hyp_va, size_t size)
1297 {
1298 	size_t offset;
1299 	u64 pfn;
1300 
1301 	for (offset = 0; offset < size; offset += PAGE_SIZE) {
1302 		pfn = vmalloc_to_pfn(kern_va + offset);
1303 		kvm_call_hyp_nvhe(__pkvm_unmap_module_page, pfn,
1304 				  hyp_va + offset);
1305 	}
1306 }
1307 
pkvm_unmap_module_sections(struct pkvm_mod_sec_mapping * secs_map,void * hyp_va_base,int nr_secs)1308 static void pkvm_unmap_module_sections(struct pkvm_mod_sec_mapping *secs_map, void *hyp_va_base, int nr_secs)
1309 {
1310 	size_t offset, size;
1311 	void *start;
1312 	int i;
1313 
1314 	for (i = 0; i < nr_secs; i++) {
1315 		start = secs_map[i].sec->start;
1316 		size = secs_map[i].sec->end - start;
1317 		offset = start - secs_map[0].sec->start;
1318 		pkvm_unmap_module_pages(start, hyp_va_base + offset, size);
1319 	}
1320 }
1321 
pkvm_map_module_section(struct pkvm_mod_sec_mapping * sec_map,void * hyp_va)1322 static int pkvm_map_module_section(struct pkvm_mod_sec_mapping *sec_map, void *hyp_va)
1323 {
1324 	size_t offset, size = sec_map->sec->end - sec_map->sec->start;
1325 	int ret;
1326 	u64 pfn;
1327 
1328 	for (offset = 0; offset < size; offset += PAGE_SIZE) {
1329 		pfn = vmalloc_to_pfn(sec_map->sec->start + offset);
1330 		ret = kvm_call_hyp_nvhe(__pkvm_map_module_page, pfn,
1331 					hyp_va + offset, sec_map->prot);
1332 		if (ret) {
1333 			pkvm_unmap_module_pages(sec_map->sec->start, hyp_va, offset);
1334 			return ret;
1335 		}
1336 	}
1337 
1338 	return 0;
1339 }
1340 
pkvm_map_module_sections(struct pkvm_mod_sec_mapping * secs_map,void * hyp_va_base,int nr_secs)1341 static int pkvm_map_module_sections(struct pkvm_mod_sec_mapping *secs_map,
1342 				    void *hyp_va_base, int nr_secs)
1343 {
1344 	size_t offset;
1345 	int i, ret;
1346 
1347 	for (i = 0; i < nr_secs; i++) {
1348 		offset = secs_map[i].sec->start - secs_map[0].sec->start;
1349 		ret = pkvm_map_module_section(&secs_map[i], hyp_va_base + offset);
1350 		if (ret) {
1351 			pkvm_unmap_module_sections(secs_map, hyp_va_base, i);
1352 			return ret;
1353 		}
1354 	}
1355 
1356 	return 0;
1357 }
1358 
__pkvm_cmp_mod_sec(const void * p1,const void * p2)1359 static int __pkvm_cmp_mod_sec(const void *p1, const void *p2)
1360 {
1361 	struct pkvm_mod_sec_mapping const *s1 = p1;
1362 	struct pkvm_mod_sec_mapping const *s2 = p2;
1363 
1364 	return s1->sec->start < s2->sec->start ? -1 : s1->sec->start > s2->sec->start;
1365 }
1366 
pkvm_map_module_struct(struct pkvm_el2_module * mod)1367 static void *pkvm_map_module_struct(struct pkvm_el2_module *mod)
1368 {
1369 	void *addr = (void *)__get_free_page(GFP_KERNEL);
1370 
1371 	if (!addr)
1372 		return NULL;
1373 
1374 	if (kvm_share_hyp(addr, addr + PAGE_SIZE)) {
1375 		free_page((unsigned long)addr);
1376 		return NULL;
1377 	}
1378 
1379 	/*
1380 	 * pkvm_el2_module being stored in vmalloc we can't guarantee a
1381 	 * linear map for the hypervisor to rely on. Copy the struct instead.
1382 	 */
1383 	memcpy(addr, mod, sizeof(*mod));
1384 
1385 	return addr;
1386 }
1387 
pkvm_unmap_module_struct(void * addr)1388 static void pkvm_unmap_module_struct(void *addr)
1389 {
1390 	kvm_unshare_hyp(addr, addr + PAGE_SIZE);
1391 	free_page((unsigned long)addr);
1392 }
1393 
pkvm_module_kmemleak(struct module * this,struct pkvm_mod_sec_mapping * sec_map,int nr_sections)1394 static void pkvm_module_kmemleak(struct module *this,
1395 				 struct pkvm_mod_sec_mapping *sec_map,
1396 				 int nr_sections)
1397 {
1398 	void *start, *end;
1399 	int i;
1400 
1401 	if (!this)
1402 		return;
1403 
1404 	/*
1405 	 * The module loader already removes read-only sections from kmemleak
1406 	 * scanned objects. However, few hyp sections are installed into
1407 	 * MOD_DATA. Skip those sections before they are made inaccessible from
1408 	 * the host.
1409 	 */
1410 
1411 	start = this->mem[MOD_DATA].base;
1412 	end = start + this->mem[MOD_DATA].size;
1413 
1414 	for (i = 0; i < nr_sections; i++, sec_map++) {
1415 		if (sec_map->sec->start < start || sec_map->sec->start >= end)
1416 			continue;
1417 
1418 		kmemleak_scan_area(start, sec_map->sec->start - start, GFP_KERNEL);
1419 		start = sec_map->sec->end;
1420 	}
1421 
1422 	kmemleak_scan_area(start, end - start, GFP_KERNEL);
1423 }
1424 
__pkvm_load_el2_module(struct module * this,unsigned long * token)1425 int __pkvm_load_el2_module(struct module *this, unsigned long *token)
1426 {
1427 	struct pkvm_el2_module *mod = &this->arch.hyp;
1428 	struct pkvm_mod_sec_mapping secs_map[] = {
1429 		{ &mod->text, KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_X },
1430 		{ &mod->bss, KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W },
1431 		{ &mod->rodata, KVM_PGTABLE_PROT_R },
1432 		{ &mod->event_ids, KVM_PGTABLE_PROT_R },
1433 		{ &mod->patchable_function_entries, KVM_PGTABLE_PROT_R },
1434 		{ &mod->data, KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W },
1435 	};
1436 	void *start, *end, *hyp_va, *mod_remap;
1437 	struct arm_smccc_res res;
1438 	kvm_nvhe_reloc_t *endrel;
1439 	int ret, i, secs_first;
1440 	size_t size;
1441 
1442 	/* The pKVM hyp only allows loading before it is fully initialized */
1443 	if (!is_protected_kvm_enabled() || is_pkvm_initialized())
1444 		return -EOPNOTSUPP;
1445 
1446 	for (i = 0; i < ARRAY_SIZE(secs_map); i++) {
1447 		if (!PAGE_ALIGNED(secs_map[i].sec->start)) {
1448 			kvm_err("EL2 sections are not page-aligned\n");
1449 			return -EINVAL;
1450 		}
1451 	}
1452 
1453 	if (!try_module_get(this)) {
1454 		kvm_err("Kernel module has been unloaded\n");
1455 		return -ENODEV;
1456 	}
1457 
1458 	/* Missing or empty module sections are placed first */
1459 	sort(secs_map, ARRAY_SIZE(secs_map), sizeof(secs_map[0]), __pkvm_cmp_mod_sec, NULL);
1460 	for (secs_first = 0; secs_first < ARRAY_SIZE(secs_map); secs_first++) {
1461 		start = secs_map[secs_first].sec->start;
1462 		if (start)
1463 			break;
1464 	}
1465 	end = secs_map[ARRAY_SIZE(secs_map) - 1].sec->end;
1466 	size = end - start;
1467 
1468 	mod->sections.start = start;
1469 	mod->sections.end = end;
1470 
1471 	arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__pkvm_alloc_module_va),
1472 			  size >> PAGE_SHIFT, &res);
1473 	if (res.a0 != SMCCC_RET_SUCCESS || !res.a1) {
1474 		kvm_err("Failed to allocate hypervisor VA space for EL2 module\n");
1475 		module_put(this);
1476 		return res.a0 == SMCCC_RET_SUCCESS ? -ENOMEM : -EPERM;
1477 	}
1478 	hyp_va = (void *)res.a1;
1479 	mod->hyp_va = hyp_va;
1480 
1481 	/*
1482 	 * The token can be used for other calls related to this module.
1483 	 * Conveniently the only information needed is this addr so let's use it
1484 	 * as an identifier.
1485 	 */
1486 	if (token)
1487 		*token = (unsigned long)hyp_va;
1488 
1489 	mod->sections.start = start;
1490 	mod->sections.end = end;
1491 
1492 	endrel = (void *)mod->relocs + mod->nr_relocs * sizeof(*endrel);
1493 	kvm_apply_hyp_module_relocations(mod, mod->relocs, endrel);
1494 
1495 	ret = pkvm_reloc_imported_symbols(mod);
1496 	if (ret)
1497 		return ret;
1498 
1499 	pkvm_module_kmemleak(this, secs_map, ARRAY_SIZE(secs_map));
1500 
1501 	ret = hyp_trace_init_mod_events(mod);
1502 	if (ret)
1503 		kvm_err("Failed to init module events: %d\n", ret);
1504 
1505 	mod_remap = pkvm_map_module_struct(mod);
1506 	if (!mod_remap) {
1507 		module_put(this);
1508 		return -ENOMEM;
1509 	}
1510 
1511 	ret = pkvm_map_module_sections(secs_map + secs_first, hyp_va,
1512 				       ARRAY_SIZE(secs_map) - secs_first);
1513 	if (ret) {
1514 		kvm_err("Failed to map EL2 module page: %d\n", ret);
1515 		pkvm_unmap_module_struct(mod_remap);
1516 		module_put(this);
1517 		return ret;
1518 	}
1519 
1520 	pkvm_el2_mod_add(mod);
1521 
1522 	ret = kvm_call_hyp_nvhe(__pkvm_init_module, mod_remap);
1523 	pkvm_unmap_module_struct(mod_remap);
1524 	if (ret) {
1525 		kvm_err("Failed to init EL2 module: %d\n", ret);
1526 		list_del(&mod->node);
1527 		pkvm_unmap_module_sections(secs_map, hyp_va, ARRAY_SIZE(secs_map));
1528 		module_put(this);
1529 		return ret;
1530 	}
1531 
1532 	hyp_trace_enable_event_early();
1533 
1534 	return 0;
1535 }
1536 EXPORT_SYMBOL(__pkvm_load_el2_module);
1537 
__pkvm_register_el2_call(unsigned long hfn_hyp_va)1538 int __pkvm_register_el2_call(unsigned long hfn_hyp_va)
1539 {
1540 	return kvm_call_hyp_nvhe(__pkvm_register_hcall, hfn_hyp_va);
1541 }
1542 EXPORT_SYMBOL(__pkvm_register_el2_call);
1543 
pkvm_el2_mod_frob_sections(Elf_Ehdr * ehdr,Elf_Shdr * sechdrs,char * secstrings)1544 void pkvm_el2_mod_frob_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings)
1545 {
1546 #ifdef CONFIG_PKVM_FTRACE
1547 	int i;
1548 
1549 	for (i = 0; i < ehdr->e_shnum; i++) {
1550 		if (!strcmp(secstrings + sechdrs[i].sh_name, ".hyp.text")) {
1551 			Elf_Shdr *hyp_text = sechdrs + i;
1552 
1553 			/* .hyp.text.ftrace_tramp pollutes .hyp.text flags */
1554 			hyp_text->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
1555 			break;
1556 		}
1557 	}
1558 #endif
1559 }
1560 #endif /* CONFIG_MODULES */
1561 
__pkvm_topup_hyp_alloc_mgt_mc(unsigned long id,struct kvm_hyp_memcache * mc)1562 int __pkvm_topup_hyp_alloc_mgt_mc(unsigned long id, struct kvm_hyp_memcache *mc)
1563 {
1564 	struct arm_smccc_res res;
1565 
1566 	res = kvm_call_hyp_nvhe_smccc(__pkvm_hyp_alloc_mgt_refill,
1567 				      id, mc->head, mc->nr_pages);
1568 	mc->head = res.a2;
1569 	mc->nr_pages = res.a3;
1570 	return res.a1;
1571 }
1572 EXPORT_SYMBOL(__pkvm_topup_hyp_alloc_mgt_mc);
1573 
__pkvm_topup_hyp_alloc(unsigned long nr_pages)1574 int __pkvm_topup_hyp_alloc(unsigned long nr_pages)
1575 {
1576 	struct kvm_hyp_memcache mc;
1577 	int ret;
1578 
1579 	init_hyp_memcache(&mc);
1580 
1581 	ret = topup_hyp_memcache(&mc, nr_pages, 0);
1582 	if (ret)
1583 		return ret;
1584 
1585 	ret = __pkvm_topup_hyp_alloc_mgt_mc(HYP_ALLOC_MGT_HEAP_ID, &mc);
1586 	if (ret)
1587 		free_hyp_memcache(&mc);
1588 
1589 	return ret;
1590 }
1591 EXPORT_SYMBOL(__pkvm_topup_hyp_alloc);
1592 
__pkvm_reclaim_hyp_alloc_mgt(unsigned long nr_pages)1593 unsigned long __pkvm_reclaim_hyp_alloc_mgt(unsigned long nr_pages)
1594 {
1595 	unsigned long ratelimit, last_reclaim, reclaimed = 0;
1596 	struct kvm_hyp_memcache mc;
1597 	struct arm_smccc_res res;
1598 
1599 	init_hyp_memcache(&mc);
1600 
1601 	do {
1602 		/* Arbitrary upper bound to limit the time spent at EL2 */
1603 		ratelimit = min(nr_pages, 16UL);
1604 
1605 		arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__pkvm_hyp_alloc_mgt_reclaim),
1606 				  ratelimit, &res);
1607 		if (WARN_ON(res.a0 != SMCCC_RET_SUCCESS))
1608 			break;
1609 
1610 		mc.head = res.a1;
1611 		last_reclaim = mc.nr_pages = res.a2;
1612 
1613 		free_hyp_memcache(&mc);
1614 		reclaimed += last_reclaim;
1615 
1616 	} while (last_reclaim && (reclaimed < nr_pages));
1617 
1618 	return reclaimed;
1619 }
1620 
__pkvm_topup_hyp_alloc_mgt_gfp(unsigned long id,unsigned long nr_pages,unsigned long sz_alloc,gfp_t gfp)1621 int __pkvm_topup_hyp_alloc_mgt_gfp(unsigned long id, unsigned long nr_pages,
1622 				   unsigned long sz_alloc, gfp_t gfp)
1623 {
1624 	struct kvm_hyp_memcache mc;
1625 	int ret;
1626 
1627 	init_hyp_memcache(&mc);
1628 
1629 	ret = topup_hyp_memcache_gfp(&mc, nr_pages, get_order(sz_alloc), gfp);
1630 	if (ret)
1631 		return ret;
1632 
1633 	ret = __pkvm_topup_hyp_alloc_mgt_mc(id, &mc);
1634 	if (ret) {
1635 		kvm_err("Failed topup %ld pages = %ld, size = %ld err = %d, freeing %ld pages\n",
1636 			id, nr_pages, sz_alloc, ret, mc.nr_pages);
1637 		free_hyp_memcache(&mc);
1638 	}
1639 
1640 	return ret;
1641 }
1642 EXPORT_SYMBOL(__pkvm_topup_hyp_alloc_mgt_gfp);
1643 
__pkvm_donate_resource(struct resource * r)1644 static int __pkvm_donate_resource(struct resource *r)
1645 {
1646 	if (!PAGE_ALIGNED(resource_size(r)) || !PAGE_ALIGNED(r->start))
1647 		return -EINVAL;
1648 
1649 	return kvm_call_hyp_nvhe(__pkvm_host_donate_hyp_mmio,
1650 				 __phys_to_pfn(r->start),
1651 				 resource_size(r) >> PAGE_SHIFT);
1652 
1653 }
1654 
__pkvm_reclaim_resource(struct resource * r)1655 static int __pkvm_reclaim_resource(struct resource *r)
1656 {
1657 	if (!PAGE_ALIGNED(resource_size(r)) || !PAGE_ALIGNED(r->start))
1658 		return -EINVAL;
1659 
1660 	return kvm_call_hyp_nvhe(__pkvm_host_reclaim_hyp_mmio,
1661 				 __phys_to_pfn(r->start),
1662 				 resource_size(r) >> PAGE_SHIFT);
1663 }
1664 
__pkvm_arch_assign_device(struct device * dev,void * data)1665 static int __pkvm_arch_assign_device(struct device *dev, void *data)
1666 {
1667 	struct platform_device *pdev;
1668 	struct resource *r;
1669 	int index = 0;
1670 	int ret = 0;
1671 
1672 	if (!dev_is_platform(dev))
1673 		return -EOPNOTSUPP;
1674 
1675 	pdev = to_platform_device(dev);
1676 
1677 	while ((r = platform_get_resource(pdev, IORESOURCE_MEM, index++))) {
1678 		ret = __pkvm_donate_resource(r);
1679 		if (ret)
1680 			break;
1681 	}
1682 
1683 	if (ret) {
1684 		while (index--) {
1685 			r = platform_get_resource(pdev, IORESOURCE_MEM, index);
1686 			__pkvm_reclaim_resource(r);
1687 		}
1688 	}
1689 	return ret;
1690 }
1691 
__pkvm_arch_reclaim_device(struct device * dev,void * data)1692 static int __pkvm_arch_reclaim_device(struct device *dev, void *data)
1693 {
1694 	struct platform_device *pdev;
1695 	struct resource *r;
1696 	int index = 0;
1697 
1698 	pdev = to_platform_device(dev);
1699 
1700 	while ((r = platform_get_resource(pdev, IORESOURCE_MEM, index++)))
1701 		__pkvm_reclaim_resource(r);
1702 
1703 	return 0;
1704 }
1705 
kvm_arch_assign_device(struct device * dev)1706 int kvm_arch_assign_device(struct device *dev)
1707 {
1708 	if (!is_protected_kvm_enabled())
1709 		return 0;
1710 
1711 	return __pkvm_arch_assign_device(dev, NULL);
1712 }
1713 
kvm_arch_assign_group(struct iommu_group * group)1714 int kvm_arch_assign_group(struct iommu_group *group)
1715 {
1716 	int ret;
1717 
1718 	if (!is_protected_kvm_enabled())
1719 		return 0;
1720 
1721 	ret = iommu_group_for_each_dev(group, NULL, __pkvm_arch_assign_device);
1722 
1723 	if (ret)
1724 		iommu_group_for_each_dev(group, NULL, __pkvm_arch_reclaim_device);
1725 
1726 	return ret;
1727 }
1728 
kvm_arch_reclaim_device(struct device * dev)1729 void kvm_arch_reclaim_device(struct device *dev)
1730 {
1731 	if (!is_protected_kvm_enabled())
1732 		return;
1733 
1734 	__pkvm_arch_reclaim_device(dev, NULL);
1735 }
1736 
kvm_arch_reclaim_group(struct iommu_group * group)1737 void kvm_arch_reclaim_group(struct iommu_group *group)
1738 {
1739 	if (!is_protected_kvm_enabled())
1740 		return;
1741 
1742 	iommu_group_for_each_dev(group, NULL, __pkvm_arch_reclaim_device);
1743 }
1744 
__pkvm_mapping_start(struct pkvm_mapping * m)1745 static u64 __pkvm_mapping_start(struct pkvm_mapping *m)
1746 {
1747 	return m->gfn * PAGE_SIZE;
1748 }
1749 
__pkvm_mapping_end(struct pkvm_mapping * m)1750 static u64 __pkvm_mapping_end(struct pkvm_mapping *m)
1751 {
1752 	return (m->gfn + m->nr_pages) * PAGE_SIZE - 1;
1753 }
1754 
1755 INTERVAL_TREE_DEFINE(struct pkvm_mapping, node, u64, __subtree_last,
1756 		__pkvm_mapping_start, __pkvm_mapping_end, static,
1757 		pkvm_mapping);
1758 
1759 #define for_each_mapping_in_range_safe(__pgt, __start, __end, __map)				\
1760 	for (struct pkvm_mapping *__tmp = pkvm_mapping_iter_first(&(__pgt)->pkvm_mappings,	\
1761 								  __start, __end - 1);		\
1762 	     __tmp && ({									\
1763 				__map = __tmp;							\
1764 				__tmp = pkvm_mapping_iter_next(__map, __start, __end - 1);	\
1765 				true;								\
1766 		       });									\
1767 	    )
1768 
pkvm_pgtable_stage2_init(struct kvm_pgtable * pgt,struct kvm_s2_mmu * mmu,struct kvm_pgtable_mm_ops * mm_ops,struct kvm_pgtable_pte_ops * pte_ops)1769 int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
1770 			     struct kvm_pgtable_mm_ops *mm_ops, struct kvm_pgtable_pte_ops *pte_ops)
1771 {
1772 	pgt->pkvm_mappings	= RB_ROOT_CACHED;
1773 	pgt->mmu		= mmu;
1774 
1775 	return 0;
1776 }
1777 
__pkvm_pgtable_stage2_unmap(struct kvm_pgtable * pgt,u64 start,u64 end)1778 static int __pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 start, u64 end)
1779 {
1780 	struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
1781 	pkvm_handle_t handle = kvm->arch.pkvm.handle;
1782 	struct pkvm_mapping *mapping;
1783 	int ret;
1784 
1785 	if (!handle)
1786 		return 0;
1787 
1788 	for_each_mapping_in_range_safe(pgt, start, end, mapping) {
1789 		ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn,
1790 					mapping->nr_pages);
1791 		if (WARN_ON(ret))
1792 			return ret;
1793 		pkvm_mapping_remove(mapping, &pgt->pkvm_mappings);
1794 		kfree(mapping);
1795 	}
1796 
1797 	return 0;
1798 }
1799 
pkvm_pgtable_stage2_destroy(struct kvm_pgtable * pgt)1800 void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
1801 {
1802 	__pkvm_pgtable_stage2_unmap(pgt, 0, ~(0ULL));
1803 }
1804 
pkvm_pgtable_stage2_map(struct kvm_pgtable * pgt,u64 addr,u64 size,u64 phys,enum kvm_pgtable_prot prot,void * mc,enum kvm_pgtable_walk_flags flags)1805 int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
1806 			   u64 phys, enum kvm_pgtable_prot prot,
1807 			   void *mc, enum kvm_pgtable_walk_flags flags)
1808 {
1809 	struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
1810 	struct pkvm_mapping *mapping = NULL;
1811 	struct kvm_hyp_memcache *cache = mc;
1812 	u64 gfn = addr >> PAGE_SHIFT;
1813 	u64 pfn = phys >> PAGE_SHIFT;
1814 	int ret;
1815 
1816 	if (size != PAGE_SIZE && size != PMD_SIZE)
1817 		return -EINVAL;
1818 
1819 	lockdep_assert_held_write(&kvm->mmu_lock);
1820 
1821 	/*
1822 	 * Calling stage2_map() on top of existing mappings is either happening because of a race
1823 	 * with another vCPU, or because we're changing between page and block mappings. As per
1824 	 * user_mem_abort(), same-size permission faults are handled in the relax_perms() path.
1825 	 */
1826 	mapping = pkvm_mapping_iter_first(&pgt->pkvm_mappings, addr, addr + size - 1);
1827 	if (mapping) {
1828 		if (size == (mapping->nr_pages * PAGE_SIZE))
1829 			return -EAGAIN;
1830 
1831 		/* Remove _any_ pkvm_mapping overlapping with the range, bigger or smaller. */
1832 		ret = __pkvm_pgtable_stage2_unmap(pgt, addr, addr + size);
1833 		if (ret)
1834 			return ret;
1835 		mapping = NULL;
1836 	}
1837 
1838 	ret = kvm_call_hyp_nvhe(__pkvm_host_share_guest, pfn, gfn, prot, size / PAGE_SIZE);
1839 	if (ret) {
1840 		WARN_ON(ret != -ENOMEM);
1841 		return ret;
1842 	}
1843 
1844 	swap(mapping, cache->mapping);
1845 	mapping->gfn = gfn;
1846 	mapping->pfn = pfn;
1847 	mapping->nr_pages = size / PAGE_SIZE;
1848 	pkvm_mapping_insert(mapping, &pgt->pkvm_mappings);
1849 
1850 	return ret;
1851 }
1852 
pkvm_pgtable_stage2_unmap(struct kvm_pgtable * pgt,u64 addr,u64 size)1853 int pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
1854 {
1855 	lockdep_assert_held_write(&kvm_s2_mmu_to_kvm(pgt->mmu)->mmu_lock);
1856 
1857 	return __pkvm_pgtable_stage2_unmap(pgt, addr, addr + size);
1858 }
1859 
pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable * pgt,u64 addr,u64 size)1860 int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size)
1861 {
1862 	struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
1863 	pkvm_handle_t handle = kvm->arch.pkvm.handle;
1864 
1865 	return kvm_call_hyp_nvhe(__pkvm_host_wrprotect_guest, handle, addr >> PAGE_SHIFT, size);
1866 }
1867 
pkvm_pgtable_stage2_flush(struct kvm_pgtable * pgt,u64 addr,u64 size)1868 int pkvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
1869 {
1870 	struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
1871 	struct pkvm_mapping *mapping;
1872 
1873 	lockdep_assert_held(&kvm->mmu_lock);
1874 	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
1875 		__clean_dcache_guest_page(pfn_to_kaddr(mapping->pfn), PAGE_SIZE * mapping->nr_pages);
1876 
1877 	return 0;
1878 }
1879 
pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable * pgt,u64 addr,u64 size,bool mkold)1880 bool pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64 size, bool mkold)
1881 {
1882 	struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
1883 	pkvm_handle_t handle = kvm->arch.pkvm.handle;
1884 
1885 	return kvm_call_hyp_nvhe(__pkvm_host_test_clear_young_guest, handle, addr >> PAGE_SHIFT,
1886 				 size, mkold);
1887 }
1888 
pkvm_pgtable_stage2_relax_perms(struct kvm_pgtable * pgt,u64 addr,enum kvm_pgtable_prot prot,enum kvm_pgtable_walk_flags flags)1889 int pkvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_prot prot,
1890 				    enum kvm_pgtable_walk_flags flags)
1891 {
1892 	return kvm_call_hyp_nvhe(__pkvm_host_relax_perms_guest, addr >> PAGE_SHIFT, prot);
1893 }
1894 
pkvm_pgtable_stage2_mkyoung(struct kvm_pgtable * pgt,u64 addr,enum kvm_pgtable_walk_flags flags)1895 kvm_pte_t pkvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr,
1896 				 enum kvm_pgtable_walk_flags flags)
1897 {
1898 	return kvm_call_hyp_nvhe(__pkvm_host_mkyoung_guest, addr >> PAGE_SHIFT);
1899 }
1900 
pkvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops * mm_ops,struct kvm_pgtable_pte_ops * pte_ops,void * pgtable,s8 level)1901 void pkvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops,
1902 				       struct kvm_pgtable_pte_ops *pte_ops,
1903 				       void *pgtable, s8 level)
1904 {
1905 	WARN_ON_ONCE(1);
1906 }
1907 
pkvm_pgtable_stage2_create_unlinked(struct kvm_pgtable * pgt,u64 phys,s8 level,enum kvm_pgtable_prot prot,void * mc,bool force_pte)1908 kvm_pte_t *pkvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, u64 phys, s8 level,
1909 					enum kvm_pgtable_prot prot, void *mc, bool force_pte)
1910 {
1911 	WARN_ON_ONCE(1);
1912 	return NULL;
1913 }
1914 
pkvm_pgtable_stage2_split(struct kvm_pgtable * pgt,u64 addr,u64 size,void * mc)1915 int pkvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size, void *mc)
1916 {
1917 	WARN_ON_ONCE(1);
1918 	return -EINVAL;
1919 }
1920 
early_ffa_unmap_on_lend_cfg(char * arg)1921 static int early_ffa_unmap_on_lend_cfg(char *arg)
1922 {
1923 	static_branch_enable(&kvm_ffa_unmap_on_lend);
1924 	return 0;
1925 }
1926 
1927 early_param("kvm-arm.ffa-unmap-on-lend", early_ffa_unmap_on_lend_cfg);
1928