• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * IOMMU operations for pKVM
4  *
5  * Copyright (C) 2022 Linaro Ltd.
6  */
7 #include <asm/kvm_hyp.h>
8 #include <asm/kvm_hypevents.h>
9 
10 #include <hyp/adjust_pc.h>
11 
12 #include <kvm/iommu.h>
13 #include <kvm/device.h>
14 
15 #include <nvhe/iommu.h>
16 #include <nvhe/mem_protect.h>
17 #include <nvhe/mm.h>
18 
19 /* Only one set of ops supported, similary to the kernel */
20 struct kvm_iommu_ops *kvm_iommu_ops;
21 void **kvm_hyp_iommu_domains;
22 
23 /* Hypervisor is non-preemptable, so cur_context can be per cpu. */
24 DEFINE_PER_CPU(struct pkvm_hyp_vcpu *, __cur_context);
25 #define cur_context (*this_cpu_ptr(&__cur_context))
26 
27 phys_addr_t cma_base;
28 size_t cma_size;
29 
30 #define MAX_BLOCK_POOLS 16
31 
32 /*
33  * Common pool that can be used by IOMMU driver to allocate pages.
34  */
35 static struct hyp_pool iommu_system_pool;
36 static struct hyp_pool iommu_block_pools[MAX_BLOCK_POOLS];
37 static struct hyp_pool iommu_atomic_pool;
38 
39 /*
40  * hyp_pool->lock is dropped multiple times during a block_pool reclaim. We then
41  * need another global lock to serialize that operation with an allocation.
42  */
43 static DEFINE_HYP_SPINLOCK(__block_pools_lock);
44 static bool __block_pools_available;
45 
46 static const u8 pmd_order = PMD_SHIFT - PAGE_SHIFT;
47 
48 DECLARE_PER_CPU(struct kvm_hyp_req, host_hyp_reqs);
49 
50 /* Protects domains in kvm_hyp_iommu_domains */
51 static DEFINE_HYP_SPINLOCK(kvm_iommu_domain_lock);
52 
53 static atomic_t kvm_iommu_idmap_initialized;
54 
kvm_iommu_idmap_init_done(void)55 static inline void kvm_iommu_idmap_init_done(void)
56 {
57 	atomic_set_release(&kvm_iommu_idmap_initialized, 1);
58 }
59 
kvm_iommu_is_ready(void)60 static inline bool kvm_iommu_is_ready(void)
61 {
62 	return atomic_read_acquire(&kvm_iommu_idmap_initialized) == 1;
63 }
64 
kvm_iommu_donate_from_cma(phys_addr_t phys,unsigned long order)65 static bool kvm_iommu_donate_from_cma(phys_addr_t phys, unsigned long order)
66 {
67 	phys_addr_t end = phys + PAGE_SIZE * (1 << order);
68 
69 	if (end <= phys)
70 		return false;
71 
72 	if (order != pmd_order)
73 		return false;
74 
75 	if (!IS_ALIGNED(phys, PMD_SIZE))
76 		return false;
77 
78 	if (phys < cma_base || end > cma_base + cma_size)
79 		return false;
80 
81 	return true;
82 }
83 
__get_empty_block_pool(phys_addr_t phys)84 static struct hyp_pool *__get_empty_block_pool(phys_addr_t phys)
85 {
86 	int p;
87 
88 	for (p = 0; p < MAX_BLOCK_POOLS; p++) {
89 		struct hyp_pool *pool = &iommu_block_pools[p];
90 
91 		if (pool->max_order)
92 			continue;
93 
94 		if (hyp_pool_init(pool, hyp_phys_to_pfn(phys), 1 << pmd_order, 0))
95 			return NULL;
96 
97 		WRITE_ONCE(__block_pools_available, 1);
98 
99 		return pool;
100 	}
101 
102 	return NULL;
103 }
104 
__repudiate_host_page(void * addr,unsigned long order,struct kvm_hyp_memcache * host_mc)105 static void __repudiate_host_page(void *addr, unsigned long order,
106 				  struct kvm_hyp_memcache *host_mc)
107 {
108 	push_hyp_memcache(host_mc, addr, hyp_virt_to_phys, order);
109 	WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(addr), 1 << order));
110 }
111 
kvm_iommu_refill(struct kvm_hyp_memcache * host_mc)112 int kvm_iommu_refill(struct kvm_hyp_memcache *host_mc)
113 {
114 	struct kvm_hyp_memcache tmp_mc = *host_mc;
115 
116 	if (!kvm_iommu_ops)
117 		return -EINVAL;
118 
119 	while (tmp_mc.nr_pages) {
120 		unsigned long order = FIELD_GET(~PAGE_MASK, tmp_mc.head);
121 		phys_addr_t phys = tmp_mc.head & PAGE_MASK;
122 		struct hyp_pool *pool = &iommu_system_pool;
123 		u64 nr_pages;
124 		void *addr;
125 
126 		if (check_shl_overflow(1UL, order, &nr_pages) ||
127 		    !IS_ALIGNED(phys, PAGE_SIZE << order))
128 			return -EINVAL;
129 
130 		addr = admit_host_page(&tmp_mc, order);
131 		if (!addr)
132 			return -EINVAL;
133 		*host_mc = tmp_mc;
134 
135 		if (kvm_iommu_donate_from_cma(phys, order)) {
136 			hyp_spin_lock(&__block_pools_lock);
137 			pool = __get_empty_block_pool(phys);
138 			hyp_spin_unlock(&__block_pools_lock);
139 			if (!pool) {
140 				__repudiate_host_page(addr, order, &tmp_mc);
141 				*host_mc = tmp_mc;
142 				return -EBUSY;
143 			}
144 		} else {
145 			hyp_virt_to_page(addr)->order = order;
146 			hyp_set_page_refcounted(hyp_virt_to_page(addr));
147 			hyp_put_page(pool, addr);
148 		}
149 	}
150 
151 	return 0;
152 }
153 
kvm_iommu_reclaim(struct kvm_hyp_memcache * host_mc,int target)154 void kvm_iommu_reclaim(struct kvm_hyp_memcache *host_mc, int target)
155 {
156 	unsigned long prev_nr_pages = host_mc->nr_pages;
157 	unsigned long block_pages = 1 << pmd_order;
158 	int p = 0;
159 
160 	if (!kvm_iommu_ops)
161 		return;
162 
163 	reclaim_hyp_pool(&iommu_system_pool, host_mc, target);
164 
165 	target -= host_mc->nr_pages - prev_nr_pages;
166 
167 	while (target > block_pages && p < MAX_BLOCK_POOLS) {
168 		struct hyp_pool *pool = &iommu_block_pools[p];
169 
170 		hyp_spin_lock(&__block_pools_lock);
171 
172 		if (hyp_pool_free_pages(pool) == block_pages) {
173 			reclaim_hyp_pool(pool, host_mc, block_pages);
174 			hyp_pool_init_empty(pool, 1);
175 			target -= block_pages;
176 		}
177 
178 		hyp_spin_unlock(&__block_pools_lock);
179 		p++;
180 	}
181 }
182 
kvm_iommu_reclaimable(void)183 int kvm_iommu_reclaimable(void)
184 {
185 	unsigned long reclaimable = 0;
186 	int p;
187 
188 	if (!kvm_iommu_ops)
189 		return 0;
190 
191 	reclaimable += hyp_pool_free_pages(&iommu_system_pool);
192 
193 	/*
194 	 * This also accounts for blocks, allocated from the CMA region. This is
195 	 * not exactly what the shrinker wants... but we need to have a way to
196 	 * report this memory to the host.
197 	 */
198 
199 	for (p = 0; p < MAX_BLOCK_POOLS; p++) {
200 		unsigned long __free_pages = hyp_pool_free_pages(&iommu_block_pools[p]);
201 
202 		if (__free_pages == 1 << pmd_order)
203 			reclaimable += __free_pages;
204 	}
205 
206 	return reclaimable;
207 }
208 
209 struct hyp_mgt_allocator_ops kvm_iommu_allocator_ops = {
210 	.refill = kvm_iommu_refill,
211 	.reclaim = kvm_iommu_reclaim,
212 	.reclaimable = kvm_iommu_reclaimable,
213 };
214 
215 /* Return current vcpu or NULL for host. */
__get_vcpu(void)216 struct pkvm_hyp_vcpu *__get_vcpu(void)
217 {
218 	struct kvm_vcpu *vcpu = this_cpu_ptr(&kvm_host_data)->host_ctxt.__hyp_running_vcpu;
219 
220 	if (vcpu)
221 		return container_of(vcpu, struct pkvm_hyp_vcpu, vcpu);
222 	/* Maybe guest is not loaded but we are in teardown context. */
223 	return cur_context;
224 }
225 
iommu_pkvm_unuse_dma(u64 phys_addr,size_t size)226 int iommu_pkvm_unuse_dma(u64 phys_addr, size_t size)
227 {
228 	return __pkvm_unuse_dma(phys_addr, size, __get_vcpu());
229 }
230 
__kvm_iommu_alloc_pages(u8 order,struct hyp_pool ** pool)231 static void *__kvm_iommu_alloc_pages(u8 order, struct hyp_pool **pool)
232 {
233 	static int last_block_pool;
234 	void *p;
235 	int i;
236 
237 	if (!READ_ONCE(__block_pools_available))
238 		goto from_system_pool;
239 
240 	hyp_spin_lock(&__block_pools_lock);
241 
242 	i = last_block_pool;
243 	do {
244 		*pool = &iommu_block_pools[i];
245 		p = hyp_alloc_pages(*pool, order);
246 		if (p) {
247 			last_block_pool = i;
248 			hyp_spin_unlock(&__block_pools_lock);
249 			return p;
250 		}
251 
252 		if (++i >= MAX_BLOCK_POOLS)
253 			i = 0;
254 	} while (i != last_block_pool);
255 
256 	WRITE_ONCE(__block_pools_available, 0);
257 
258 	hyp_spin_unlock(&__block_pools_lock);
259 
260 from_system_pool:
261 	*pool = &iommu_system_pool;
262 	return hyp_alloc_pages(*pool, order);
263 }
264 
kvm_iommu_donate_pages(u8 order,int flags)265 void *kvm_iommu_donate_pages(u8 order, int flags)
266 {
267 	struct kvm_hyp_req *req = this_cpu_ptr(&host_hyp_reqs);
268 	struct pkvm_hyp_vcpu *hyp_vcpu = __get_vcpu();
269 	size_t size = (1 << order) * PAGE_SIZE;
270 	struct hyp_pool *pool;
271 	void *p;
272 
273 	if (hyp_vcpu) {
274 		pool = &pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu)->iommu_pool;
275 		p = hyp_alloc_pages(pool, order);
276 	} else {
277 		p = __kvm_iommu_alloc_pages(order, &pool);
278 	}
279 
280 	if (p) {
281 		/*
282 		 * If page request is non-cacheable remap it as such
283 		 * as all pages in the pool are mapped before hand and
284 		 * assumed to be cacheable.
285 		 */
286 		if (flags & IOMMU_PAGE_NOCACHE) {
287 			int ret;
288 
289 			/* Make sure all data written before converting to nc. */
290 			kvm_flush_dcache_to_poc(p, size);
291 
292 			ret = pkvm_remap_range(p, 1 << order, true);
293 			if (ret) {
294 				hyp_put_page(pool, p);
295 				return NULL;
296 			}
297 		}
298 		return p;
299 	}
300 
301 	if (hyp_vcpu) {
302 		req = pkvm_hyp_req_reserve(hyp_vcpu, KVM_HYP_REQ_TYPE_MEM);
303 		if (WARN_ON(!req))
304 			return NULL;
305 	}
306 
307 	req->type = KVM_HYP_REQ_TYPE_MEM;
308 	req->mem.dest = REQ_MEM_DEST_HYP_IOMMU;
309 	req->mem.sz_alloc = size;
310 	req->mem.nr_pages = 1;
311 	return NULL;
312 }
313 
__kvm_iommu_reclaim_pages(struct hyp_pool * pool,void * p,u8 order)314 static void __kvm_iommu_reclaim_pages(struct hyp_pool *pool, void *p, u8 order)
315 {
316 	/*
317 	 * Remap all pages to cacheable, as we don't know, may be use a flag
318 	 * in the vmemmap or trust the driver to pass the cacheability same
319 	 * as the allocation on free?
320 	 */
321 	pkvm_remap_range(p, 1 << order, false);
322 	hyp_put_page(pool, p);
323 }
324 
kvm_iommu_reclaim_pages(void * p,u8 order)325 void kvm_iommu_reclaim_pages(void *p, u8 order)
326 {
327 	struct pkvm_hyp_vcpu *hyp_vcpu = __get_vcpu();
328 	phys_addr_t phys = hyp_virt_to_phys(p);
329 	int i;
330 
331 	if (hyp_vcpu) {
332 		__kvm_iommu_reclaim_pages(&pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu)->iommu_pool, p, order);
333 		return;
334 	}
335 
336 	if (phys < cma_base || phys >= (cma_base + cma_size)) {
337 		__kvm_iommu_reclaim_pages(&iommu_system_pool, p, order);
338 		return;
339 	}
340 
341 	hyp_spin_lock(&__block_pools_lock);
342 
343 	for (i = 0; i < MAX_BLOCK_POOLS; i++) {
344 		struct hyp_pool *pool = &iommu_block_pools[i];
345 
346 		if (!pool->max_order)
347 			continue;
348 
349 		if (phys >= pool->range_start && phys < pool->range_end) {
350 			__kvm_iommu_reclaim_pages(pool, p, order);
351 			hyp_spin_unlock(&__block_pools_lock);
352 			return;
353 		}
354 	}
355 
356 	hyp_spin_lock(&__block_pools_lock);
357 
358 	WARN_ON(1);
359 }
360 
kvm_iommu_donate_pages_atomic(u8 order)361 void *kvm_iommu_donate_pages_atomic(u8 order)
362 {
363 	return hyp_alloc_pages(&iommu_atomic_pool, order);
364 }
365 
kvm_iommu_reclaim_pages_atomic(void * p,u8 order)366 void kvm_iommu_reclaim_pages_atomic(void *p, u8 order)
367 {
368 	hyp_put_page(&iommu_atomic_pool, p);
369 }
370 
371 static struct kvm_hyp_iommu_domain *
__handle_to_domain(pkvm_handle_t domain_id,bool alloc)372 __handle_to_domain(pkvm_handle_t domain_id, bool alloc)
373 {
374 	int idx;
375 	struct kvm_hyp_iommu_domain *domains;
376 
377 	if (domain_id >= KVM_IOMMU_MAX_DOMAINS)
378 		return NULL;
379 	domain_id = array_index_nospec(domain_id, KVM_IOMMU_MAX_DOMAINS);
380 
381 	idx = domain_id / KVM_IOMMU_DOMAINS_PER_PAGE;
382 	domains = (struct kvm_hyp_iommu_domain *)READ_ONCE(kvm_hyp_iommu_domains[idx]);
383 	if (!domains) {
384 		if (!alloc)
385 			return NULL;
386 		domains = kvm_iommu_donate_page();
387 		if (!domains)
388 			return NULL;
389 		/*
390 		 * handle_to_domain() does not have to be called under a lock,
391 		 * but even though we allocate a leaf in all cases, it's only
392 		 * really a valid thing to do under alloc_domain(), which uses a
393 		 * lock. Races are therefore a host bug and we don't need to be
394 		 * delicate about it.
395 		 */
396 		if (WARN_ON(cmpxchg64_relaxed(&kvm_hyp_iommu_domains[idx], 0,
397 					      (void *)domains) != 0)) {
398 			kvm_iommu_reclaim_page(domains);
399 			return NULL;
400 		}
401 	}
402 	return &domains[domain_id % KVM_IOMMU_DOMAINS_PER_PAGE];
403 }
404 
405 static struct kvm_hyp_iommu_domain *
handle_to_domain(pkvm_handle_t domain_id)406 handle_to_domain(pkvm_handle_t domain_id)
407 {
408 	return __handle_to_domain(domain_id, true);
409 }
410 
domain_get(struct kvm_hyp_iommu_domain * domain)411 static int domain_get(struct kvm_hyp_iommu_domain *domain)
412 {
413 	int old = atomic_fetch_inc_acquire(&domain->refs);
414 	struct pkvm_hyp_vcpu *hyp_vcpu = __get_vcpu();
415 	int ret = 0;
416 
417 	BUG_ON(!old || (old + 1 < 0));
418 
419 	/* check done after refcount is elevated to avoid race with alloc_domain */
420 	if (!hyp_vcpu && domain->vm)
421 		ret = -EPERM;
422 	if (hyp_vcpu && (domain->vm != pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu)))
423 		ret = -EPERM;
424 
425 	if (ret)
426 		atomic_dec_return_release(&domain->refs);
427 	return ret;
428 }
429 
domain_put(struct kvm_hyp_iommu_domain * domain)430 static void domain_put(struct kvm_hyp_iommu_domain *domain)
431 {
432 	struct pkvm_hyp_vcpu *hyp_vcpu = __get_vcpu();
433 
434 	BUG_ON(!atomic_dec_return_release(&domain->refs));
435 	WARN_ON(!hyp_vcpu && domain->vm);
436 	WARN_ON(hyp_vcpu && (domain->vm != pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu)));
437 }
438 
kvm_iommu_init_atomic_pool(struct kvm_hyp_memcache * atomic_mc)439 static int kvm_iommu_init_atomic_pool(struct kvm_hyp_memcache *atomic_mc)
440 {
441 	int ret;
442 
443 	/* atomic_mc is optional. */
444 	if (!atomic_mc->head)
445 		return 0;
446 	ret = hyp_pool_init_empty(&iommu_atomic_pool, 1024 /* order = 10*/);
447 	if (ret)
448 		return ret;
449 
450 	return refill_hyp_pool(&iommu_atomic_pool, atomic_mc);
451 }
452 
kvm_iommu_init(struct kvm_iommu_ops * ops,struct kvm_hyp_memcache * atomic_mc)453 int kvm_iommu_init(struct kvm_iommu_ops *ops,
454 		   struct kvm_hyp_memcache *atomic_mc)
455 {
456 	int i, ret;
457 	u64 domain_root_pfn = __hyp_pa(kvm_hyp_iommu_domains) >> PAGE_SHIFT;
458 
459 	if (!ops ||
460 	    !ops->init ||
461 	    !ops->alloc_domain ||
462 	    !ops->free_domain ||
463 	    !ops->get_iommu_by_id)
464 		return -ENODEV;
465 
466 	ret = hyp_pool_init_empty(&iommu_system_pool, 64);
467 	if (ret)
468 		return ret;
469 
470 	ret = __pkvm_host_donate_hyp(domain_root_pfn,
471 				     KVM_IOMMU_DOMAINS_ROOT_ORDER_NR);
472 	if (ret)
473 		return ret;
474 
475 	kvm_iommu_ops = ops;
476 
477 	ret = kvm_iommu_init_atomic_pool(atomic_mc);
478 	if (ret)
479 		return ret;
480 
481 	for (i = 0; i < MAX_BLOCK_POOLS; i++) {
482 		ret = hyp_pool_init_empty(&iommu_block_pools[i], 1);
483 		if (ret)
484 			return ret;
485 	}
486 
487 
488 	ret = ops->init();
489 	if (ret)
490 		goto out_reclaim_domain;
491 
492 	return ret;
493 
494 out_reclaim_domain:
495 	__pkvm_hyp_donate_host(domain_root_pfn, KVM_IOMMU_DOMAINS_ROOT_ORDER_NR);
496 	return ret;
497 }
498 
kvm_iommu_alloc_domain(pkvm_handle_t domain_id,int type)499 int kvm_iommu_alloc_domain(pkvm_handle_t domain_id, int type)
500 {
501 	int ret = -EINVAL;
502 	struct kvm_hyp_iommu_domain *domain;
503 	struct pkvm_hyp_vcpu *hyp_vcpu = __get_vcpu();
504 	struct pkvm_hyp_vm *vm;
505 
506 	/*
507 	 * Host only has access to the lower half of the domain IDs.
508 	 * Guest ID space is managed by the hypervisor, so it is trusted.
509 	 */
510 	if (!hyp_vcpu && (domain_id >= (KVM_IOMMU_MAX_DOMAINS >> 1)))
511 		return -EINVAL;
512 
513 	domain = handle_to_domain(domain_id);
514 	if (!domain)
515 		return -ENOMEM;
516 
517 	hyp_spin_lock(&kvm_iommu_domain_lock);
518 	if (atomic_read(&domain->refs))
519 		goto out_unlock;
520 
521 	domain->domain_id = domain_id;
522 	ret = kvm_iommu_ops->alloc_domain(domain, type);
523 	if (ret)
524 		goto out_unlock;
525 
526 	if (hyp_vcpu) {
527 		vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
528 		domain->vm = vm;
529 	}
530 	atomic_set_release(&domain->refs, 1);
531 out_unlock:
532 	hyp_spin_unlock(&kvm_iommu_domain_lock);
533 	return ret;
534 }
535 
kvm_iommu_free_domain(pkvm_handle_t domain_id)536 int kvm_iommu_free_domain(pkvm_handle_t domain_id)
537 {
538 	int ret = 0;
539 	struct kvm_hyp_iommu_domain *domain;
540 	struct pkvm_hyp_vcpu *hyp_vcpu = __get_vcpu();
541 	struct pkvm_hyp_vm *vm = NULL;
542 
543 	domain = handle_to_domain(domain_id);
544 	if (!domain)
545 		return -EINVAL;
546 
547 	hyp_spin_lock(&kvm_iommu_domain_lock);
548 	if (hyp_vcpu)
549 		vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
550 
551 	if (WARN_ON(atomic_cmpxchg_acquire(&domain->refs, 1, 0) != 1) || domain->vm != vm) {
552 		ret = -EINVAL;
553 		goto out_unlock;
554 	}
555 
556 	kvm_iommu_ops->free_domain(domain);
557 
558 	memset(domain, 0, sizeof(*domain));
559 
560 out_unlock:
561 	hyp_spin_unlock(&kvm_iommu_domain_lock);
562 
563 	return ret;
564 }
565 
kvm_iommu_force_free_domain(pkvm_handle_t domain_id,struct pkvm_hyp_vm * vm)566 int kvm_iommu_force_free_domain(pkvm_handle_t domain_id, struct pkvm_hyp_vm *vm)
567 {
568 	struct kvm_hyp_iommu_domain *domain = handle_to_domain(domain_id);
569 
570 	BUG_ON(!domain);
571 	cur_context = vm->vcpus[0];
572 
573 	hyp_spin_lock(&kvm_iommu_domain_lock);
574 	atomic_set(&domain->refs, 0);
575 	kvm_iommu_ops->free_domain(domain);
576 	memset(domain, 0, sizeof(*domain));
577 	hyp_spin_unlock(&kvm_iommu_domain_lock);
578 	cur_context = NULL;
579 
580 	return 0;
581 }
582 
kvm_iommu_attach_dev(pkvm_handle_t iommu_id,pkvm_handle_t domain_id,u32 endpoint_id,u32 pasid,u32 pasid_bits,unsigned long flags)583 int kvm_iommu_attach_dev(pkvm_handle_t iommu_id, pkvm_handle_t domain_id,
584 			 u32 endpoint_id, u32 pasid, u32 pasid_bits,
585 			 unsigned long flags)
586 {
587 	int ret;
588 	struct kvm_hyp_iommu *iommu;
589 	struct kvm_hyp_iommu_domain *domain;
590 	struct pkvm_hyp_vcpu *hyp_vcpu = __get_vcpu();
591 	struct pkvm_hyp_vm *vm = NULL;
592 
593 	if (!kvm_iommu_ops || !kvm_iommu_ops->attach_dev)
594 		return -ENODEV;
595 
596 	iommu = kvm_iommu_ops->get_iommu_by_id(iommu_id);
597 	if (!iommu)
598 		return -EINVAL;
599 
600 	if (hyp_vcpu)
601 		vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
602 	/*
603 	 * Make sure device can't transition to/from VMs while in the middle of attach.
604 	 */
605 	ret = pkvm_devices_get_context(iommu_id, endpoint_id, vm);
606 	if (ret)
607 		return ret;
608 
609 	domain = handle_to_domain(domain_id);
610 	if (!domain || domain_get(domain)) {
611 		ret = -EINVAL;
612 		goto out_unlock;
613 	}
614 
615 	ret = kvm_iommu_ops->attach_dev(iommu, domain, endpoint_id, pasid, pasid_bits, flags);
616 	if (ret)
617 		domain_put(domain);
618 
619 out_unlock:
620 	pkvm_devices_put_context(iommu_id, endpoint_id);
621 	return ret;
622 }
623 
kvm_iommu_detach_dev(pkvm_handle_t iommu_id,pkvm_handle_t domain_id,u32 endpoint_id,u32 pasid)624 int kvm_iommu_detach_dev(pkvm_handle_t iommu_id, pkvm_handle_t domain_id,
625 			 u32 endpoint_id, u32 pasid)
626 {
627 	int ret;
628 	struct kvm_hyp_iommu *iommu;
629 	struct kvm_hyp_iommu_domain *domain;
630 	struct pkvm_hyp_vcpu *hyp_vcpu = __get_vcpu();
631 	struct pkvm_hyp_vm *vm = NULL;
632 
633 	if (!kvm_iommu_ops || !kvm_iommu_ops->detach_dev)
634 		return -ENODEV;
635 
636 	iommu = kvm_iommu_ops->get_iommu_by_id(iommu_id);
637 	if (!iommu)
638 		return -EINVAL;
639 
640 	if (hyp_vcpu)
641 		vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
642 	/* See kvm_iommu_attach_dev(). */
643 	ret = pkvm_devices_get_context(iommu_id, endpoint_id, vm);
644 	if (ret)
645 		return ret;
646 
647 	domain = handle_to_domain(domain_id);
648 	if (!domain || atomic_read(&domain->refs) <= 1) {
649 		ret = -EINVAL;
650 		goto out_unlock;
651 	}
652 
653 	ret = kvm_iommu_ops->detach_dev(iommu, domain, endpoint_id, pasid);
654 	if (ret)
655 		goto out_unlock;
656 
657 	domain_put(domain);
658 
659 out_unlock:
660 	pkvm_devices_put_context(iommu_id, endpoint_id);
661 	return ret;
662 }
663 
664 #define IOMMU_PROT_MASK (IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE |\
665 			 IOMMU_NOEXEC | IOMMU_MMIO | IOMMU_PRIV)
666 
kvm_iommu_map_pages(pkvm_handle_t domain_id,unsigned long iova,phys_addr_t paddr,size_t pgsize,size_t pgcount,int prot,unsigned long * mapped)667 size_t kvm_iommu_map_pages(pkvm_handle_t domain_id,
668 			   unsigned long iova, phys_addr_t paddr, size_t pgsize,
669 			   size_t pgcount, int prot, unsigned long *mapped)
670 {
671 	size_t size;
672 	int ret;
673 	size_t total_mapped = 0;
674 	struct kvm_hyp_iommu_domain *domain;
675 
676 	if (!kvm_iommu_ops || !kvm_iommu_ops->map_pages)
677 		return -ENODEV;
678 
679 	*mapped = 0;
680 
681 	if (prot & ~IOMMU_PROT_MASK)
682 		return -EOPNOTSUPP;
683 
684 	if (__builtin_mul_overflow(pgsize, pgcount, &size) ||
685 	    iova + size < iova || paddr + size < paddr)
686 		return -E2BIG;
687 
688 	if (domain_id == KVM_IOMMU_DOMAIN_IDMAP_ID)
689 		return -EINVAL;
690 
691 	domain = handle_to_domain(domain_id);
692 	if (!domain || domain_get(domain))
693 		return -ENOENT;
694 
695 	ret = __pkvm_use_dma(paddr, size, __get_vcpu());
696 	if (ret)
697 		goto out_put_domain;
698 
699 	ret = kvm_iommu_ops->map_pages(domain, iova, paddr, pgsize, pgcount,
700 				       prot, &total_mapped);
701 
702 	pgcount -= total_mapped / pgsize;
703 	/*
704 	 * unuse the bits that haven't been mapped yet. The host calls back
705 	 * either to continue mapping, or to unmap and unuse what's been done
706 	 * so far.
707 	 */
708 	if (pgcount)
709 		__pkvm_unuse_dma(paddr + total_mapped, pgcount * pgsize, __get_vcpu());
710 
711 	*mapped = total_mapped;
712 
713 out_put_domain:
714 	domain_put(domain);
715 	/* Mask -ENOMEM, as it's passed as a request. */
716 	return ret == -ENOMEM ? 0 : ret;
717 }
718 
kvm_iommu_iotlb_sync(struct kvm_hyp_iommu_domain * domain,struct iommu_iotlb_gather * iotlb_gather)719 static inline void kvm_iommu_iotlb_sync(struct kvm_hyp_iommu_domain *domain,
720 					struct iommu_iotlb_gather *iotlb_gather)
721 {
722 	if (kvm_iommu_ops->iotlb_sync)
723 		kvm_iommu_ops->iotlb_sync(domain, iotlb_gather);
724 
725 	iommu_iotlb_gather_init(iotlb_gather);
726 }
727 
kvm_iommu_iotlb_gather_add_page(struct kvm_hyp_iommu_domain * domain,struct iommu_iotlb_gather * gather,unsigned long iova,size_t size)728 void kvm_iommu_iotlb_gather_add_page(struct kvm_hyp_iommu_domain *domain,
729 				     struct iommu_iotlb_gather *gather,
730 				     unsigned long iova,
731 				     size_t size)
732 {
733 	_iommu_iotlb_add_page(domain, gather, iova, size, kvm_iommu_iotlb_sync);
734 }
735 
kvm_iommu_unmap_pages(pkvm_handle_t domain_id,unsigned long iova,size_t pgsize,size_t pgcount)736 size_t kvm_iommu_unmap_pages(pkvm_handle_t domain_id, unsigned long iova,
737 			     size_t pgsize, size_t pgcount)
738 {
739 	size_t size;
740 	size_t unmapped;
741 	struct kvm_hyp_iommu_domain *domain;
742 	struct iommu_iotlb_gather iotlb_gather;
743 
744 	if (!kvm_iommu_ops || !kvm_iommu_ops->unmap_pages)
745 		return -ENODEV;
746 
747 	if (!pgsize || !pgcount)
748 		return 0;
749 
750 	if (__builtin_mul_overflow(pgsize, pgcount, &size) ||
751 	    iova + size < iova)
752 		return 0;
753 
754 	if (domain_id == KVM_IOMMU_DOMAIN_IDMAP_ID)
755 		return 0;
756 
757 	domain = handle_to_domain(domain_id);
758 	if (!domain || domain_get(domain))
759 		return 0;
760 
761 	iommu_iotlb_gather_init(&iotlb_gather);
762 	/*
763 	 * Unlike map, the common code doesn't call the __pkvm_host_unuse_dma,
764 	 * because this means that we need either walk the table using iova_to_phys
765 	 * similar to VFIO then unmap and call this function, or unmap leaf (page or
766 	 * block) at a time, where both might be suboptimal.
767 	 * For some IOMMU, we can do 2 walks where one only invalidate the pages
768 	 * and the other decrement the refcount.
769 	 * As, semantics for this might differ between IOMMUs and it's hard to
770 	 * standardized, we leave that to the driver.
771 	 */
772 	unmapped = kvm_iommu_ops->unmap_pages(domain, iova, pgsize,
773 						pgcount, &iotlb_gather);
774 	kvm_iommu_iotlb_sync(domain, &iotlb_gather);
775 
776 	domain_put(domain);
777 	return unmapped;
778 }
779 
kvm_iommu_iova_to_phys(pkvm_handle_t domain_id,unsigned long iova)780 phys_addr_t kvm_iommu_iova_to_phys(pkvm_handle_t domain_id, unsigned long iova)
781 {
782 	phys_addr_t phys = 0;
783 	struct kvm_hyp_iommu_domain *domain;
784 
785 	if (!kvm_iommu_ops || !kvm_iommu_ops->iova_to_phys)
786 		return -ENODEV;
787 
788 	if (domain_id == KVM_IOMMU_DOMAIN_IDMAP_ID)
789 		return iova;
790 
791 	domain = handle_to_domain( domain_id);
792 
793 	if (!domain || domain_get(domain))
794 		return 0;
795 
796 	phys = kvm_iommu_ops->iova_to_phys(domain, iova);
797 	domain_put(domain);
798 	return phys;
799 }
800 
kvm_iommu_host_dabt_handler(struct kvm_cpu_context * host_ctxt,u64 esr,u64 addr)801 bool kvm_iommu_host_dabt_handler(struct kvm_cpu_context *host_ctxt, u64 esr, u64 addr)
802 {
803 	bool ret = false;
804 
805 	if (kvm_iommu_ops && kvm_iommu_ops->dabt_handler)
806 		ret = kvm_iommu_ops->dabt_handler(&host_ctxt->regs, esr, addr);
807 
808 	if (ret)
809 		kvm_skip_host_instr();
810 
811 	return ret;
812 }
813 
kvm_iommu_map_sg(pkvm_handle_t domain_id,unsigned long iova,struct kvm_iommu_sg * sg,unsigned int nent,unsigned int prot)814 size_t kvm_iommu_map_sg(pkvm_handle_t domain_id, unsigned long iova, struct kvm_iommu_sg *sg,
815 			unsigned int nent, unsigned int prot)
816 {
817 	int ret;
818 	size_t total_mapped = 0, mapped;
819 	struct kvm_hyp_iommu_domain *domain;
820 	phys_addr_t phys;
821 	size_t size, pgsize, pgcount;
822 	unsigned int orig_nent = nent;
823 	struct kvm_iommu_sg *orig_sg = sg;
824 
825 	if (!kvm_iommu_ops || !kvm_iommu_ops->map_pages)
826 		return 0;
827 
828 	if (prot & ~IOMMU_PROT_MASK)
829 		return 0;
830 
831 	domain = handle_to_domain(domain_id);
832 	if (!domain || domain_get(domain))
833 		return 0;
834 
835 	ret = hyp_pin_shared_mem(sg, sg + nent);
836 	if (ret)
837 		goto out_put_domain;
838 
839 	while (nent--) {
840 		phys = sg->phys;
841 		pgsize = sg->pgsize;
842 		pgcount = sg->pgcount;
843 
844 		if (__builtin_mul_overflow(pgsize, pgcount, &size) ||
845 		    iova + size < iova)
846 			goto out_unpin_sg;
847 
848 		ret = __pkvm_use_dma(phys, size, __get_vcpu());
849 		if (ret)
850 			goto out_unpin_sg;
851 
852 		mapped = 0;
853 		kvm_iommu_ops->map_pages(domain, iova, phys, pgsize, pgcount, prot, &mapped);
854 		total_mapped += mapped;
855 		phys += mapped;
856 		iova += mapped;
857 		/* Might need memory */
858 		if (mapped != size) {
859 			__pkvm_unuse_dma(phys, size - mapped, __get_vcpu());
860 			break;
861 		}
862 		sg++;
863 	}
864 
865 out_unpin_sg:
866 	hyp_unpin_shared_mem(orig_sg, orig_sg + orig_nent);
867 out_put_domain:
868 	domain_put(domain);
869 	return total_mapped;
870 }
871 
kvm_iommu_dev_block_dma(pkvm_handle_t iommu_id,u32 endpoint_id,bool host_to_guest)872 int kvm_iommu_dev_block_dma(pkvm_handle_t iommu_id, u32 endpoint_id, bool host_to_guest)
873 {
874 	struct kvm_hyp_iommu *iommu;
875 
876 	if (!kvm_iommu_ops || !kvm_iommu_ops->dev_block_dma)
877 		return -ENODEV;
878 
879 	iommu = kvm_iommu_ops->get_iommu_by_id(iommu_id);
880 	if (!iommu)
881 		return -ENOENT;
882 
883 	return kvm_iommu_ops->dev_block_dma(iommu, endpoint_id, host_to_guest);
884 }
885 
iommu_power_on(struct kvm_power_domain * pd)886 static int iommu_power_on(struct kvm_power_domain *pd)
887 {
888 	struct kvm_hyp_iommu *iommu = container_of(pd, struct kvm_hyp_iommu,
889 						   power_domain);
890 	int ret;
891 
892 	kvm_iommu_lock(iommu);
893 	ret = kvm_iommu_ops->resume ? kvm_iommu_ops->resume(iommu) : 0;
894 	if (!ret)
895 		iommu->power_is_off = false;
896 	kvm_iommu_unlock(iommu);
897 	return ret;
898 }
899 
iommu_power_off(struct kvm_power_domain * pd)900 static int iommu_power_off(struct kvm_power_domain *pd)
901 {
902 	struct kvm_hyp_iommu *iommu = container_of(pd, struct kvm_hyp_iommu,
903 						   power_domain);
904 	int ret;
905 
906 	kvm_iommu_lock(iommu);
907 	ret = kvm_iommu_ops->suspend ? kvm_iommu_ops->suspend(iommu) : 0;
908 	if (!ret)
909 		iommu->power_is_off = true;
910 	kvm_iommu_unlock(iommu);
911 	return ret;
912 }
913 
914 static const struct kvm_power_domain_ops iommu_power_ops = {
915 	.power_on	= iommu_power_on,
916 	.power_off	= iommu_power_off,
917 };
918 
919 /* Must be called from the IOMMU driver per IOMMU */
kvm_iommu_init_device(struct kvm_hyp_iommu * iommu)920 int kvm_iommu_init_device(struct kvm_hyp_iommu *iommu)
921 {
922 	kvm_iommu_lock_init(iommu);
923 
924 	return pkvm_init_power_domain(&iommu->power_domain, &iommu_power_ops);
925 }
926 
pkvm_to_iommu_prot(int prot)927 static inline int pkvm_to_iommu_prot(int prot)
928 {
929 	switch (prot) {
930 	case PKVM_HOST_MEM_PROT:
931 		return IOMMU_READ | IOMMU_WRITE;
932 	case PKVM_HOST_MMIO_PROT:
933 		return IOMMU_READ | IOMMU_WRITE | IOMMU_MMIO;
934 	case 0:
935 		return 0;
936 	default:
937 		/* We don't understand that, it might cause corruption, so panic. */
938 		BUG();
939 	}
940 
941 	return 0;
942 }
943 
kvm_iommu_host_stage2_idmap(phys_addr_t start,phys_addr_t end,enum kvm_pgtable_prot prot)944 void kvm_iommu_host_stage2_idmap(phys_addr_t start, phys_addr_t end,
945 				 enum kvm_pgtable_prot prot)
946 {
947 	struct kvm_hyp_iommu_domain *domain;
948 
949 	if (!kvm_iommu_is_ready())
950 		return;
951 
952 	trace_iommu_idmap(start, end, prot);
953 
954 	domain = __handle_to_domain(KVM_IOMMU_DOMAIN_IDMAP_ID, false);
955 
956 	kvm_iommu_ops->host_stage2_idmap(domain, start, end, pkvm_to_iommu_prot(prot));
957 }
958 
kvm_iommu_host_stage2_idmap_complete(bool map)959 void kvm_iommu_host_stage2_idmap_complete(bool map)
960 {
961 	if (!kvm_iommu_is_ready() ||
962 	    !kvm_iommu_ops->host_stage2_idmap_complete)
963 		return;
964 
965 	trace_iommu_idmap_complete(map);
966 	kvm_iommu_ops->host_stage2_idmap_complete(map);
967 }
968 
__snapshot_host_stage2(const struct kvm_pgtable_visit_ctx * ctx,enum kvm_pgtable_walk_flags visit)969 static int __snapshot_host_stage2(const struct kvm_pgtable_visit_ctx *ctx,
970 				  enum kvm_pgtable_walk_flags visit)
971 {
972 	u64 start = ctx->addr;
973 	kvm_pte_t pte = *ctx->ptep;
974 	u32 level = ctx->level;
975 	struct kvm_hyp_iommu_domain *domain = ctx->arg;
976 	u64 end = start + kvm_granule_size(level);
977 	int prot = IOMMU_READ | IOMMU_WRITE;
978 
979 	if (!addr_is_memory(start))
980 		prot |= IOMMU_MMIO;
981 
982 	if (!pte || kvm_pte_valid(pte))
983 		kvm_iommu_ops->host_stage2_idmap(domain, start, end, prot);
984 
985 	return 0;
986 }
987 
kvm_iommu_snapshot_host_stage2(struct kvm_hyp_iommu_domain * domain)988 int kvm_iommu_snapshot_host_stage2(struct kvm_hyp_iommu_domain *domain)
989 {
990 	int ret;
991 	struct kvm_pgtable_walker walker = {
992 		.cb	= __snapshot_host_stage2,
993 		.flags	= KVM_PGTABLE_WALK_LEAF,
994 		.arg = domain,
995 	};
996 	struct kvm_pgtable *pgt = &host_mmu.pgt;
997 
998 	hyp_spin_lock(&host_mmu.lock);
999 	ret = kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker);
1000 	/* Start receiving calls to host_stage2_idmap. */
1001 	if (!ret)
1002 		kvm_iommu_idmap_init_done();
1003 	hyp_spin_unlock(&host_mmu.lock);
1004 
1005 	return ret;
1006 }
1007 
kvm_iommu_id_to_token(pkvm_handle_t id,u64 * out_token)1008 int kvm_iommu_id_to_token(pkvm_handle_t id, u64 *out_token)
1009 {
1010 	if (!kvm_iommu_ops || !kvm_iommu_ops->get_iommu_token_by_id)
1011 		return -ENODEV;
1012 	return kvm_iommu_ops->get_iommu_token_by_id(id, out_token);
1013 }
1014