1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * IOMMU operations for pKVM
4 *
5 * Copyright (C) 2022 Linaro Ltd.
6 */
7 #include <asm/kvm_hyp.h>
8 #include <asm/kvm_hypevents.h>
9
10 #include <hyp/adjust_pc.h>
11
12 #include <kvm/iommu.h>
13 #include <kvm/device.h>
14
15 #include <nvhe/iommu.h>
16 #include <nvhe/mem_protect.h>
17 #include <nvhe/mm.h>
18
19 /* Only one set of ops supported, similary to the kernel */
20 struct kvm_iommu_ops *kvm_iommu_ops;
21 void **kvm_hyp_iommu_domains;
22
23 /* Hypervisor is non-preemptable, so cur_context can be per cpu. */
24 DEFINE_PER_CPU(struct pkvm_hyp_vcpu *, __cur_context);
25 #define cur_context (*this_cpu_ptr(&__cur_context))
26
27 phys_addr_t cma_base;
28 size_t cma_size;
29
30 #define MAX_BLOCK_POOLS 16
31
32 /*
33 * Common pool that can be used by IOMMU driver to allocate pages.
34 */
35 static struct hyp_pool iommu_system_pool;
36 static struct hyp_pool iommu_block_pools[MAX_BLOCK_POOLS];
37 static struct hyp_pool iommu_atomic_pool;
38
39 /*
40 * hyp_pool->lock is dropped multiple times during a block_pool reclaim. We then
41 * need another global lock to serialize that operation with an allocation.
42 */
43 static DEFINE_HYP_SPINLOCK(__block_pools_lock);
44 static bool __block_pools_available;
45
46 static const u8 pmd_order = PMD_SHIFT - PAGE_SHIFT;
47
48 DECLARE_PER_CPU(struct kvm_hyp_req, host_hyp_reqs);
49
50 /* Protects domains in kvm_hyp_iommu_domains */
51 static DEFINE_HYP_SPINLOCK(kvm_iommu_domain_lock);
52
53 static atomic_t kvm_iommu_idmap_initialized;
54
kvm_iommu_idmap_init_done(void)55 static inline void kvm_iommu_idmap_init_done(void)
56 {
57 atomic_set_release(&kvm_iommu_idmap_initialized, 1);
58 }
59
kvm_iommu_is_ready(void)60 static inline bool kvm_iommu_is_ready(void)
61 {
62 return atomic_read_acquire(&kvm_iommu_idmap_initialized) == 1;
63 }
64
kvm_iommu_donate_from_cma(phys_addr_t phys,unsigned long order)65 static bool kvm_iommu_donate_from_cma(phys_addr_t phys, unsigned long order)
66 {
67 phys_addr_t end = phys + PAGE_SIZE * (1 << order);
68
69 if (end <= phys)
70 return false;
71
72 if (order != pmd_order)
73 return false;
74
75 if (!IS_ALIGNED(phys, PMD_SIZE))
76 return false;
77
78 if (phys < cma_base || end > cma_base + cma_size)
79 return false;
80
81 return true;
82 }
83
__get_empty_block_pool(phys_addr_t phys)84 static struct hyp_pool *__get_empty_block_pool(phys_addr_t phys)
85 {
86 int p;
87
88 for (p = 0; p < MAX_BLOCK_POOLS; p++) {
89 struct hyp_pool *pool = &iommu_block_pools[p];
90
91 if (pool->max_order)
92 continue;
93
94 if (hyp_pool_init(pool, hyp_phys_to_pfn(phys), 1 << pmd_order, 0))
95 return NULL;
96
97 WRITE_ONCE(__block_pools_available, 1);
98
99 return pool;
100 }
101
102 return NULL;
103 }
104
__repudiate_host_page(void * addr,unsigned long order,struct kvm_hyp_memcache * host_mc)105 static void __repudiate_host_page(void *addr, unsigned long order,
106 struct kvm_hyp_memcache *host_mc)
107 {
108 push_hyp_memcache(host_mc, addr, hyp_virt_to_phys, order);
109 WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(addr), 1 << order));
110 }
111
kvm_iommu_refill(struct kvm_hyp_memcache * host_mc)112 int kvm_iommu_refill(struct kvm_hyp_memcache *host_mc)
113 {
114 struct kvm_hyp_memcache tmp_mc = *host_mc;
115
116 if (!kvm_iommu_ops)
117 return -EINVAL;
118
119 while (tmp_mc.nr_pages) {
120 unsigned long order = FIELD_GET(~PAGE_MASK, tmp_mc.head);
121 phys_addr_t phys = tmp_mc.head & PAGE_MASK;
122 struct hyp_pool *pool = &iommu_system_pool;
123 u64 nr_pages;
124 void *addr;
125
126 if (check_shl_overflow(1UL, order, &nr_pages) ||
127 !IS_ALIGNED(phys, PAGE_SIZE << order))
128 return -EINVAL;
129
130 addr = admit_host_page(&tmp_mc, order);
131 if (!addr)
132 return -EINVAL;
133 *host_mc = tmp_mc;
134
135 if (kvm_iommu_donate_from_cma(phys, order)) {
136 hyp_spin_lock(&__block_pools_lock);
137 pool = __get_empty_block_pool(phys);
138 hyp_spin_unlock(&__block_pools_lock);
139 if (!pool) {
140 __repudiate_host_page(addr, order, &tmp_mc);
141 *host_mc = tmp_mc;
142 return -EBUSY;
143 }
144 } else {
145 hyp_virt_to_page(addr)->order = order;
146 hyp_set_page_refcounted(hyp_virt_to_page(addr));
147 hyp_put_page(pool, addr);
148 }
149 }
150
151 return 0;
152 }
153
kvm_iommu_reclaim(struct kvm_hyp_memcache * host_mc,int target)154 void kvm_iommu_reclaim(struct kvm_hyp_memcache *host_mc, int target)
155 {
156 unsigned long prev_nr_pages = host_mc->nr_pages;
157 unsigned long block_pages = 1 << pmd_order;
158 int p = 0;
159
160 if (!kvm_iommu_ops)
161 return;
162
163 reclaim_hyp_pool(&iommu_system_pool, host_mc, target);
164
165 target -= host_mc->nr_pages - prev_nr_pages;
166
167 while (target > block_pages && p < MAX_BLOCK_POOLS) {
168 struct hyp_pool *pool = &iommu_block_pools[p];
169
170 hyp_spin_lock(&__block_pools_lock);
171
172 if (hyp_pool_free_pages(pool) == block_pages) {
173 reclaim_hyp_pool(pool, host_mc, block_pages);
174 hyp_pool_init_empty(pool, 1);
175 target -= block_pages;
176 }
177
178 hyp_spin_unlock(&__block_pools_lock);
179 p++;
180 }
181 }
182
kvm_iommu_reclaimable(void)183 int kvm_iommu_reclaimable(void)
184 {
185 unsigned long reclaimable = 0;
186 int p;
187
188 if (!kvm_iommu_ops)
189 return 0;
190
191 reclaimable += hyp_pool_free_pages(&iommu_system_pool);
192
193 /*
194 * This also accounts for blocks, allocated from the CMA region. This is
195 * not exactly what the shrinker wants... but we need to have a way to
196 * report this memory to the host.
197 */
198
199 for (p = 0; p < MAX_BLOCK_POOLS; p++) {
200 unsigned long __free_pages = hyp_pool_free_pages(&iommu_block_pools[p]);
201
202 if (__free_pages == 1 << pmd_order)
203 reclaimable += __free_pages;
204 }
205
206 return reclaimable;
207 }
208
209 struct hyp_mgt_allocator_ops kvm_iommu_allocator_ops = {
210 .refill = kvm_iommu_refill,
211 .reclaim = kvm_iommu_reclaim,
212 .reclaimable = kvm_iommu_reclaimable,
213 };
214
215 /* Return current vcpu or NULL for host. */
__get_vcpu(void)216 struct pkvm_hyp_vcpu *__get_vcpu(void)
217 {
218 struct kvm_vcpu *vcpu = this_cpu_ptr(&kvm_host_data)->host_ctxt.__hyp_running_vcpu;
219
220 if (vcpu)
221 return container_of(vcpu, struct pkvm_hyp_vcpu, vcpu);
222 /* Maybe guest is not loaded but we are in teardown context. */
223 return cur_context;
224 }
225
iommu_pkvm_unuse_dma(u64 phys_addr,size_t size)226 int iommu_pkvm_unuse_dma(u64 phys_addr, size_t size)
227 {
228 return __pkvm_unuse_dma(phys_addr, size, __get_vcpu());
229 }
230
__kvm_iommu_alloc_pages(u8 order,struct hyp_pool ** pool)231 static void *__kvm_iommu_alloc_pages(u8 order, struct hyp_pool **pool)
232 {
233 static int last_block_pool;
234 void *p;
235 int i;
236
237 if (!READ_ONCE(__block_pools_available))
238 goto from_system_pool;
239
240 hyp_spin_lock(&__block_pools_lock);
241
242 i = last_block_pool;
243 do {
244 *pool = &iommu_block_pools[i];
245 p = hyp_alloc_pages(*pool, order);
246 if (p) {
247 last_block_pool = i;
248 hyp_spin_unlock(&__block_pools_lock);
249 return p;
250 }
251
252 if (++i >= MAX_BLOCK_POOLS)
253 i = 0;
254 } while (i != last_block_pool);
255
256 WRITE_ONCE(__block_pools_available, 0);
257
258 hyp_spin_unlock(&__block_pools_lock);
259
260 from_system_pool:
261 *pool = &iommu_system_pool;
262 return hyp_alloc_pages(*pool, order);
263 }
264
kvm_iommu_donate_pages(u8 order,int flags)265 void *kvm_iommu_donate_pages(u8 order, int flags)
266 {
267 struct kvm_hyp_req *req = this_cpu_ptr(&host_hyp_reqs);
268 struct pkvm_hyp_vcpu *hyp_vcpu = __get_vcpu();
269 size_t size = (1 << order) * PAGE_SIZE;
270 struct hyp_pool *pool;
271 void *p;
272
273 if (hyp_vcpu) {
274 pool = &pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu)->iommu_pool;
275 p = hyp_alloc_pages(pool, order);
276 } else {
277 p = __kvm_iommu_alloc_pages(order, &pool);
278 }
279
280 if (p) {
281 /*
282 * If page request is non-cacheable remap it as such
283 * as all pages in the pool are mapped before hand and
284 * assumed to be cacheable.
285 */
286 if (flags & IOMMU_PAGE_NOCACHE) {
287 int ret;
288
289 /* Make sure all data written before converting to nc. */
290 kvm_flush_dcache_to_poc(p, size);
291
292 ret = pkvm_remap_range(p, 1 << order, true);
293 if (ret) {
294 hyp_put_page(pool, p);
295 return NULL;
296 }
297 }
298 return p;
299 }
300
301 if (hyp_vcpu) {
302 req = pkvm_hyp_req_reserve(hyp_vcpu, KVM_HYP_REQ_TYPE_MEM);
303 if (WARN_ON(!req))
304 return NULL;
305 }
306
307 req->type = KVM_HYP_REQ_TYPE_MEM;
308 req->mem.dest = REQ_MEM_DEST_HYP_IOMMU;
309 req->mem.sz_alloc = size;
310 req->mem.nr_pages = 1;
311 return NULL;
312 }
313
__kvm_iommu_reclaim_pages(struct hyp_pool * pool,void * p,u8 order)314 static void __kvm_iommu_reclaim_pages(struct hyp_pool *pool, void *p, u8 order)
315 {
316 /*
317 * Remap all pages to cacheable, as we don't know, may be use a flag
318 * in the vmemmap or trust the driver to pass the cacheability same
319 * as the allocation on free?
320 */
321 pkvm_remap_range(p, 1 << order, false);
322 hyp_put_page(pool, p);
323 }
324
kvm_iommu_reclaim_pages(void * p,u8 order)325 void kvm_iommu_reclaim_pages(void *p, u8 order)
326 {
327 struct pkvm_hyp_vcpu *hyp_vcpu = __get_vcpu();
328 phys_addr_t phys = hyp_virt_to_phys(p);
329 int i;
330
331 if (hyp_vcpu) {
332 __kvm_iommu_reclaim_pages(&pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu)->iommu_pool, p, order);
333 return;
334 }
335
336 if (phys < cma_base || phys >= (cma_base + cma_size)) {
337 __kvm_iommu_reclaim_pages(&iommu_system_pool, p, order);
338 return;
339 }
340
341 hyp_spin_lock(&__block_pools_lock);
342
343 for (i = 0; i < MAX_BLOCK_POOLS; i++) {
344 struct hyp_pool *pool = &iommu_block_pools[i];
345
346 if (!pool->max_order)
347 continue;
348
349 if (phys >= pool->range_start && phys < pool->range_end) {
350 __kvm_iommu_reclaim_pages(pool, p, order);
351 hyp_spin_unlock(&__block_pools_lock);
352 return;
353 }
354 }
355
356 hyp_spin_lock(&__block_pools_lock);
357
358 WARN_ON(1);
359 }
360
kvm_iommu_donate_pages_atomic(u8 order)361 void *kvm_iommu_donate_pages_atomic(u8 order)
362 {
363 return hyp_alloc_pages(&iommu_atomic_pool, order);
364 }
365
kvm_iommu_reclaim_pages_atomic(void * p,u8 order)366 void kvm_iommu_reclaim_pages_atomic(void *p, u8 order)
367 {
368 hyp_put_page(&iommu_atomic_pool, p);
369 }
370
371 static struct kvm_hyp_iommu_domain *
__handle_to_domain(pkvm_handle_t domain_id,bool alloc)372 __handle_to_domain(pkvm_handle_t domain_id, bool alloc)
373 {
374 int idx;
375 struct kvm_hyp_iommu_domain *domains;
376
377 if (domain_id >= KVM_IOMMU_MAX_DOMAINS)
378 return NULL;
379 domain_id = array_index_nospec(domain_id, KVM_IOMMU_MAX_DOMAINS);
380
381 idx = domain_id / KVM_IOMMU_DOMAINS_PER_PAGE;
382 domains = (struct kvm_hyp_iommu_domain *)READ_ONCE(kvm_hyp_iommu_domains[idx]);
383 if (!domains) {
384 if (!alloc)
385 return NULL;
386 domains = kvm_iommu_donate_page();
387 if (!domains)
388 return NULL;
389 /*
390 * handle_to_domain() does not have to be called under a lock,
391 * but even though we allocate a leaf in all cases, it's only
392 * really a valid thing to do under alloc_domain(), which uses a
393 * lock. Races are therefore a host bug and we don't need to be
394 * delicate about it.
395 */
396 if (WARN_ON(cmpxchg64_relaxed(&kvm_hyp_iommu_domains[idx], 0,
397 (void *)domains) != 0)) {
398 kvm_iommu_reclaim_page(domains);
399 return NULL;
400 }
401 }
402 return &domains[domain_id % KVM_IOMMU_DOMAINS_PER_PAGE];
403 }
404
405 static struct kvm_hyp_iommu_domain *
handle_to_domain(pkvm_handle_t domain_id)406 handle_to_domain(pkvm_handle_t domain_id)
407 {
408 return __handle_to_domain(domain_id, true);
409 }
410
domain_get(struct kvm_hyp_iommu_domain * domain)411 static int domain_get(struct kvm_hyp_iommu_domain *domain)
412 {
413 int old = atomic_fetch_inc_acquire(&domain->refs);
414 struct pkvm_hyp_vcpu *hyp_vcpu = __get_vcpu();
415 int ret = 0;
416
417 BUG_ON(!old || (old + 1 < 0));
418
419 /* check done after refcount is elevated to avoid race with alloc_domain */
420 if (!hyp_vcpu && domain->vm)
421 ret = -EPERM;
422 if (hyp_vcpu && (domain->vm != pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu)))
423 ret = -EPERM;
424
425 if (ret)
426 atomic_dec_return_release(&domain->refs);
427 return ret;
428 }
429
domain_put(struct kvm_hyp_iommu_domain * domain)430 static void domain_put(struct kvm_hyp_iommu_domain *domain)
431 {
432 struct pkvm_hyp_vcpu *hyp_vcpu = __get_vcpu();
433
434 BUG_ON(!atomic_dec_return_release(&domain->refs));
435 WARN_ON(!hyp_vcpu && domain->vm);
436 WARN_ON(hyp_vcpu && (domain->vm != pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu)));
437 }
438
kvm_iommu_init_atomic_pool(struct kvm_hyp_memcache * atomic_mc)439 static int kvm_iommu_init_atomic_pool(struct kvm_hyp_memcache *atomic_mc)
440 {
441 int ret;
442
443 /* atomic_mc is optional. */
444 if (!atomic_mc->head)
445 return 0;
446 ret = hyp_pool_init_empty(&iommu_atomic_pool, 1024 /* order = 10*/);
447 if (ret)
448 return ret;
449
450 return refill_hyp_pool(&iommu_atomic_pool, atomic_mc);
451 }
452
kvm_iommu_init(struct kvm_iommu_ops * ops,struct kvm_hyp_memcache * atomic_mc)453 int kvm_iommu_init(struct kvm_iommu_ops *ops,
454 struct kvm_hyp_memcache *atomic_mc)
455 {
456 int i, ret;
457 u64 domain_root_pfn = __hyp_pa(kvm_hyp_iommu_domains) >> PAGE_SHIFT;
458
459 if (!ops ||
460 !ops->init ||
461 !ops->alloc_domain ||
462 !ops->free_domain ||
463 !ops->get_iommu_by_id)
464 return -ENODEV;
465
466 ret = hyp_pool_init_empty(&iommu_system_pool, 64);
467 if (ret)
468 return ret;
469
470 ret = __pkvm_host_donate_hyp(domain_root_pfn,
471 KVM_IOMMU_DOMAINS_ROOT_ORDER_NR);
472 if (ret)
473 return ret;
474
475 kvm_iommu_ops = ops;
476
477 ret = kvm_iommu_init_atomic_pool(atomic_mc);
478 if (ret)
479 return ret;
480
481 for (i = 0; i < MAX_BLOCK_POOLS; i++) {
482 ret = hyp_pool_init_empty(&iommu_block_pools[i], 1);
483 if (ret)
484 return ret;
485 }
486
487
488 ret = ops->init();
489 if (ret)
490 goto out_reclaim_domain;
491
492 return ret;
493
494 out_reclaim_domain:
495 __pkvm_hyp_donate_host(domain_root_pfn, KVM_IOMMU_DOMAINS_ROOT_ORDER_NR);
496 return ret;
497 }
498
kvm_iommu_alloc_domain(pkvm_handle_t domain_id,int type)499 int kvm_iommu_alloc_domain(pkvm_handle_t domain_id, int type)
500 {
501 int ret = -EINVAL;
502 struct kvm_hyp_iommu_domain *domain;
503 struct pkvm_hyp_vcpu *hyp_vcpu = __get_vcpu();
504 struct pkvm_hyp_vm *vm;
505
506 /*
507 * Host only has access to the lower half of the domain IDs.
508 * Guest ID space is managed by the hypervisor, so it is trusted.
509 */
510 if (!hyp_vcpu && (domain_id >= (KVM_IOMMU_MAX_DOMAINS >> 1)))
511 return -EINVAL;
512
513 domain = handle_to_domain(domain_id);
514 if (!domain)
515 return -ENOMEM;
516
517 hyp_spin_lock(&kvm_iommu_domain_lock);
518 if (atomic_read(&domain->refs))
519 goto out_unlock;
520
521 domain->domain_id = domain_id;
522 ret = kvm_iommu_ops->alloc_domain(domain, type);
523 if (ret)
524 goto out_unlock;
525
526 if (hyp_vcpu) {
527 vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
528 domain->vm = vm;
529 }
530 atomic_set_release(&domain->refs, 1);
531 out_unlock:
532 hyp_spin_unlock(&kvm_iommu_domain_lock);
533 return ret;
534 }
535
kvm_iommu_free_domain(pkvm_handle_t domain_id)536 int kvm_iommu_free_domain(pkvm_handle_t domain_id)
537 {
538 int ret = 0;
539 struct kvm_hyp_iommu_domain *domain;
540 struct pkvm_hyp_vcpu *hyp_vcpu = __get_vcpu();
541 struct pkvm_hyp_vm *vm = NULL;
542
543 domain = handle_to_domain(domain_id);
544 if (!domain)
545 return -EINVAL;
546
547 hyp_spin_lock(&kvm_iommu_domain_lock);
548 if (hyp_vcpu)
549 vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
550
551 if (WARN_ON(atomic_cmpxchg_acquire(&domain->refs, 1, 0) != 1) || domain->vm != vm) {
552 ret = -EINVAL;
553 goto out_unlock;
554 }
555
556 kvm_iommu_ops->free_domain(domain);
557
558 memset(domain, 0, sizeof(*domain));
559
560 out_unlock:
561 hyp_spin_unlock(&kvm_iommu_domain_lock);
562
563 return ret;
564 }
565
kvm_iommu_force_free_domain(pkvm_handle_t domain_id,struct pkvm_hyp_vm * vm)566 int kvm_iommu_force_free_domain(pkvm_handle_t domain_id, struct pkvm_hyp_vm *vm)
567 {
568 struct kvm_hyp_iommu_domain *domain = handle_to_domain(domain_id);
569
570 BUG_ON(!domain);
571 cur_context = vm->vcpus[0];
572
573 hyp_spin_lock(&kvm_iommu_domain_lock);
574 atomic_set(&domain->refs, 0);
575 kvm_iommu_ops->free_domain(domain);
576 memset(domain, 0, sizeof(*domain));
577 hyp_spin_unlock(&kvm_iommu_domain_lock);
578 cur_context = NULL;
579
580 return 0;
581 }
582
kvm_iommu_attach_dev(pkvm_handle_t iommu_id,pkvm_handle_t domain_id,u32 endpoint_id,u32 pasid,u32 pasid_bits,unsigned long flags)583 int kvm_iommu_attach_dev(pkvm_handle_t iommu_id, pkvm_handle_t domain_id,
584 u32 endpoint_id, u32 pasid, u32 pasid_bits,
585 unsigned long flags)
586 {
587 int ret;
588 struct kvm_hyp_iommu *iommu;
589 struct kvm_hyp_iommu_domain *domain;
590 struct pkvm_hyp_vcpu *hyp_vcpu = __get_vcpu();
591 struct pkvm_hyp_vm *vm = NULL;
592
593 if (!kvm_iommu_ops || !kvm_iommu_ops->attach_dev)
594 return -ENODEV;
595
596 iommu = kvm_iommu_ops->get_iommu_by_id(iommu_id);
597 if (!iommu)
598 return -EINVAL;
599
600 if (hyp_vcpu)
601 vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
602 /*
603 * Make sure device can't transition to/from VMs while in the middle of attach.
604 */
605 ret = pkvm_devices_get_context(iommu_id, endpoint_id, vm);
606 if (ret)
607 return ret;
608
609 domain = handle_to_domain(domain_id);
610 if (!domain || domain_get(domain)) {
611 ret = -EINVAL;
612 goto out_unlock;
613 }
614
615 ret = kvm_iommu_ops->attach_dev(iommu, domain, endpoint_id, pasid, pasid_bits, flags);
616 if (ret)
617 domain_put(domain);
618
619 out_unlock:
620 pkvm_devices_put_context(iommu_id, endpoint_id);
621 return ret;
622 }
623
kvm_iommu_detach_dev(pkvm_handle_t iommu_id,pkvm_handle_t domain_id,u32 endpoint_id,u32 pasid)624 int kvm_iommu_detach_dev(pkvm_handle_t iommu_id, pkvm_handle_t domain_id,
625 u32 endpoint_id, u32 pasid)
626 {
627 int ret;
628 struct kvm_hyp_iommu *iommu;
629 struct kvm_hyp_iommu_domain *domain;
630 struct pkvm_hyp_vcpu *hyp_vcpu = __get_vcpu();
631 struct pkvm_hyp_vm *vm = NULL;
632
633 if (!kvm_iommu_ops || !kvm_iommu_ops->detach_dev)
634 return -ENODEV;
635
636 iommu = kvm_iommu_ops->get_iommu_by_id(iommu_id);
637 if (!iommu)
638 return -EINVAL;
639
640 if (hyp_vcpu)
641 vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
642 /* See kvm_iommu_attach_dev(). */
643 ret = pkvm_devices_get_context(iommu_id, endpoint_id, vm);
644 if (ret)
645 return ret;
646
647 domain = handle_to_domain(domain_id);
648 if (!domain || atomic_read(&domain->refs) <= 1) {
649 ret = -EINVAL;
650 goto out_unlock;
651 }
652
653 ret = kvm_iommu_ops->detach_dev(iommu, domain, endpoint_id, pasid);
654 if (ret)
655 goto out_unlock;
656
657 domain_put(domain);
658
659 out_unlock:
660 pkvm_devices_put_context(iommu_id, endpoint_id);
661 return ret;
662 }
663
664 #define IOMMU_PROT_MASK (IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE |\
665 IOMMU_NOEXEC | IOMMU_MMIO | IOMMU_PRIV)
666
kvm_iommu_map_pages(pkvm_handle_t domain_id,unsigned long iova,phys_addr_t paddr,size_t pgsize,size_t pgcount,int prot,unsigned long * mapped)667 size_t kvm_iommu_map_pages(pkvm_handle_t domain_id,
668 unsigned long iova, phys_addr_t paddr, size_t pgsize,
669 size_t pgcount, int prot, unsigned long *mapped)
670 {
671 size_t size;
672 int ret;
673 size_t total_mapped = 0;
674 struct kvm_hyp_iommu_domain *domain;
675
676 if (!kvm_iommu_ops || !kvm_iommu_ops->map_pages)
677 return -ENODEV;
678
679 *mapped = 0;
680
681 if (prot & ~IOMMU_PROT_MASK)
682 return -EOPNOTSUPP;
683
684 if (__builtin_mul_overflow(pgsize, pgcount, &size) ||
685 iova + size < iova || paddr + size < paddr)
686 return -E2BIG;
687
688 if (domain_id == KVM_IOMMU_DOMAIN_IDMAP_ID)
689 return -EINVAL;
690
691 domain = handle_to_domain(domain_id);
692 if (!domain || domain_get(domain))
693 return -ENOENT;
694
695 ret = __pkvm_use_dma(paddr, size, __get_vcpu());
696 if (ret)
697 goto out_put_domain;
698
699 ret = kvm_iommu_ops->map_pages(domain, iova, paddr, pgsize, pgcount,
700 prot, &total_mapped);
701
702 pgcount -= total_mapped / pgsize;
703 /*
704 * unuse the bits that haven't been mapped yet. The host calls back
705 * either to continue mapping, or to unmap and unuse what's been done
706 * so far.
707 */
708 if (pgcount)
709 __pkvm_unuse_dma(paddr + total_mapped, pgcount * pgsize, __get_vcpu());
710
711 *mapped = total_mapped;
712
713 out_put_domain:
714 domain_put(domain);
715 /* Mask -ENOMEM, as it's passed as a request. */
716 return ret == -ENOMEM ? 0 : ret;
717 }
718
kvm_iommu_iotlb_sync(struct kvm_hyp_iommu_domain * domain,struct iommu_iotlb_gather * iotlb_gather)719 static inline void kvm_iommu_iotlb_sync(struct kvm_hyp_iommu_domain *domain,
720 struct iommu_iotlb_gather *iotlb_gather)
721 {
722 if (kvm_iommu_ops->iotlb_sync)
723 kvm_iommu_ops->iotlb_sync(domain, iotlb_gather);
724
725 iommu_iotlb_gather_init(iotlb_gather);
726 }
727
kvm_iommu_iotlb_gather_add_page(struct kvm_hyp_iommu_domain * domain,struct iommu_iotlb_gather * gather,unsigned long iova,size_t size)728 void kvm_iommu_iotlb_gather_add_page(struct kvm_hyp_iommu_domain *domain,
729 struct iommu_iotlb_gather *gather,
730 unsigned long iova,
731 size_t size)
732 {
733 _iommu_iotlb_add_page(domain, gather, iova, size, kvm_iommu_iotlb_sync);
734 }
735
kvm_iommu_unmap_pages(pkvm_handle_t domain_id,unsigned long iova,size_t pgsize,size_t pgcount)736 size_t kvm_iommu_unmap_pages(pkvm_handle_t domain_id, unsigned long iova,
737 size_t pgsize, size_t pgcount)
738 {
739 size_t size;
740 size_t unmapped;
741 struct kvm_hyp_iommu_domain *domain;
742 struct iommu_iotlb_gather iotlb_gather;
743
744 if (!kvm_iommu_ops || !kvm_iommu_ops->unmap_pages)
745 return -ENODEV;
746
747 if (!pgsize || !pgcount)
748 return 0;
749
750 if (__builtin_mul_overflow(pgsize, pgcount, &size) ||
751 iova + size < iova)
752 return 0;
753
754 if (domain_id == KVM_IOMMU_DOMAIN_IDMAP_ID)
755 return 0;
756
757 domain = handle_to_domain(domain_id);
758 if (!domain || domain_get(domain))
759 return 0;
760
761 iommu_iotlb_gather_init(&iotlb_gather);
762 /*
763 * Unlike map, the common code doesn't call the __pkvm_host_unuse_dma,
764 * because this means that we need either walk the table using iova_to_phys
765 * similar to VFIO then unmap and call this function, or unmap leaf (page or
766 * block) at a time, where both might be suboptimal.
767 * For some IOMMU, we can do 2 walks where one only invalidate the pages
768 * and the other decrement the refcount.
769 * As, semantics for this might differ between IOMMUs and it's hard to
770 * standardized, we leave that to the driver.
771 */
772 unmapped = kvm_iommu_ops->unmap_pages(domain, iova, pgsize,
773 pgcount, &iotlb_gather);
774 kvm_iommu_iotlb_sync(domain, &iotlb_gather);
775
776 domain_put(domain);
777 return unmapped;
778 }
779
kvm_iommu_iova_to_phys(pkvm_handle_t domain_id,unsigned long iova)780 phys_addr_t kvm_iommu_iova_to_phys(pkvm_handle_t domain_id, unsigned long iova)
781 {
782 phys_addr_t phys = 0;
783 struct kvm_hyp_iommu_domain *domain;
784
785 if (!kvm_iommu_ops || !kvm_iommu_ops->iova_to_phys)
786 return -ENODEV;
787
788 if (domain_id == KVM_IOMMU_DOMAIN_IDMAP_ID)
789 return iova;
790
791 domain = handle_to_domain( domain_id);
792
793 if (!domain || domain_get(domain))
794 return 0;
795
796 phys = kvm_iommu_ops->iova_to_phys(domain, iova);
797 domain_put(domain);
798 return phys;
799 }
800
kvm_iommu_host_dabt_handler(struct kvm_cpu_context * host_ctxt,u64 esr,u64 addr)801 bool kvm_iommu_host_dabt_handler(struct kvm_cpu_context *host_ctxt, u64 esr, u64 addr)
802 {
803 bool ret = false;
804
805 if (kvm_iommu_ops && kvm_iommu_ops->dabt_handler)
806 ret = kvm_iommu_ops->dabt_handler(&host_ctxt->regs, esr, addr);
807
808 if (ret)
809 kvm_skip_host_instr();
810
811 return ret;
812 }
813
kvm_iommu_map_sg(pkvm_handle_t domain_id,unsigned long iova,struct kvm_iommu_sg * sg,unsigned int nent,unsigned int prot)814 size_t kvm_iommu_map_sg(pkvm_handle_t domain_id, unsigned long iova, struct kvm_iommu_sg *sg,
815 unsigned int nent, unsigned int prot)
816 {
817 int ret;
818 size_t total_mapped = 0, mapped;
819 struct kvm_hyp_iommu_domain *domain;
820 phys_addr_t phys;
821 size_t size, pgsize, pgcount;
822 unsigned int orig_nent = nent;
823 struct kvm_iommu_sg *orig_sg = sg;
824
825 if (!kvm_iommu_ops || !kvm_iommu_ops->map_pages)
826 return 0;
827
828 if (prot & ~IOMMU_PROT_MASK)
829 return 0;
830
831 domain = handle_to_domain(domain_id);
832 if (!domain || domain_get(domain))
833 return 0;
834
835 ret = hyp_pin_shared_mem(sg, sg + nent);
836 if (ret)
837 goto out_put_domain;
838
839 while (nent--) {
840 phys = sg->phys;
841 pgsize = sg->pgsize;
842 pgcount = sg->pgcount;
843
844 if (__builtin_mul_overflow(pgsize, pgcount, &size) ||
845 iova + size < iova)
846 goto out_unpin_sg;
847
848 ret = __pkvm_use_dma(phys, size, __get_vcpu());
849 if (ret)
850 goto out_unpin_sg;
851
852 mapped = 0;
853 kvm_iommu_ops->map_pages(domain, iova, phys, pgsize, pgcount, prot, &mapped);
854 total_mapped += mapped;
855 phys += mapped;
856 iova += mapped;
857 /* Might need memory */
858 if (mapped != size) {
859 __pkvm_unuse_dma(phys, size - mapped, __get_vcpu());
860 break;
861 }
862 sg++;
863 }
864
865 out_unpin_sg:
866 hyp_unpin_shared_mem(orig_sg, orig_sg + orig_nent);
867 out_put_domain:
868 domain_put(domain);
869 return total_mapped;
870 }
871
kvm_iommu_dev_block_dma(pkvm_handle_t iommu_id,u32 endpoint_id,bool host_to_guest)872 int kvm_iommu_dev_block_dma(pkvm_handle_t iommu_id, u32 endpoint_id, bool host_to_guest)
873 {
874 struct kvm_hyp_iommu *iommu;
875
876 if (!kvm_iommu_ops || !kvm_iommu_ops->dev_block_dma)
877 return -ENODEV;
878
879 iommu = kvm_iommu_ops->get_iommu_by_id(iommu_id);
880 if (!iommu)
881 return -ENOENT;
882
883 return kvm_iommu_ops->dev_block_dma(iommu, endpoint_id, host_to_guest);
884 }
885
iommu_power_on(struct kvm_power_domain * pd)886 static int iommu_power_on(struct kvm_power_domain *pd)
887 {
888 struct kvm_hyp_iommu *iommu = container_of(pd, struct kvm_hyp_iommu,
889 power_domain);
890 int ret;
891
892 kvm_iommu_lock(iommu);
893 ret = kvm_iommu_ops->resume ? kvm_iommu_ops->resume(iommu) : 0;
894 if (!ret)
895 iommu->power_is_off = false;
896 kvm_iommu_unlock(iommu);
897 return ret;
898 }
899
iommu_power_off(struct kvm_power_domain * pd)900 static int iommu_power_off(struct kvm_power_domain *pd)
901 {
902 struct kvm_hyp_iommu *iommu = container_of(pd, struct kvm_hyp_iommu,
903 power_domain);
904 int ret;
905
906 kvm_iommu_lock(iommu);
907 ret = kvm_iommu_ops->suspend ? kvm_iommu_ops->suspend(iommu) : 0;
908 if (!ret)
909 iommu->power_is_off = true;
910 kvm_iommu_unlock(iommu);
911 return ret;
912 }
913
914 static const struct kvm_power_domain_ops iommu_power_ops = {
915 .power_on = iommu_power_on,
916 .power_off = iommu_power_off,
917 };
918
919 /* Must be called from the IOMMU driver per IOMMU */
kvm_iommu_init_device(struct kvm_hyp_iommu * iommu)920 int kvm_iommu_init_device(struct kvm_hyp_iommu *iommu)
921 {
922 kvm_iommu_lock_init(iommu);
923
924 return pkvm_init_power_domain(&iommu->power_domain, &iommu_power_ops);
925 }
926
pkvm_to_iommu_prot(int prot)927 static inline int pkvm_to_iommu_prot(int prot)
928 {
929 switch (prot) {
930 case PKVM_HOST_MEM_PROT:
931 return IOMMU_READ | IOMMU_WRITE;
932 case PKVM_HOST_MMIO_PROT:
933 return IOMMU_READ | IOMMU_WRITE | IOMMU_MMIO;
934 case 0:
935 return 0;
936 default:
937 /* We don't understand that, it might cause corruption, so panic. */
938 BUG();
939 }
940
941 return 0;
942 }
943
kvm_iommu_host_stage2_idmap(phys_addr_t start,phys_addr_t end,enum kvm_pgtable_prot prot)944 void kvm_iommu_host_stage2_idmap(phys_addr_t start, phys_addr_t end,
945 enum kvm_pgtable_prot prot)
946 {
947 struct kvm_hyp_iommu_domain *domain;
948
949 if (!kvm_iommu_is_ready())
950 return;
951
952 trace_iommu_idmap(start, end, prot);
953
954 domain = __handle_to_domain(KVM_IOMMU_DOMAIN_IDMAP_ID, false);
955
956 kvm_iommu_ops->host_stage2_idmap(domain, start, end, pkvm_to_iommu_prot(prot));
957 }
958
kvm_iommu_host_stage2_idmap_complete(bool map)959 void kvm_iommu_host_stage2_idmap_complete(bool map)
960 {
961 if (!kvm_iommu_is_ready() ||
962 !kvm_iommu_ops->host_stage2_idmap_complete)
963 return;
964
965 trace_iommu_idmap_complete(map);
966 kvm_iommu_ops->host_stage2_idmap_complete(map);
967 }
968
__snapshot_host_stage2(const struct kvm_pgtable_visit_ctx * ctx,enum kvm_pgtable_walk_flags visit)969 static int __snapshot_host_stage2(const struct kvm_pgtable_visit_ctx *ctx,
970 enum kvm_pgtable_walk_flags visit)
971 {
972 u64 start = ctx->addr;
973 kvm_pte_t pte = *ctx->ptep;
974 u32 level = ctx->level;
975 struct kvm_hyp_iommu_domain *domain = ctx->arg;
976 u64 end = start + kvm_granule_size(level);
977 int prot = IOMMU_READ | IOMMU_WRITE;
978
979 if (!addr_is_memory(start))
980 prot |= IOMMU_MMIO;
981
982 if (!pte || kvm_pte_valid(pte))
983 kvm_iommu_ops->host_stage2_idmap(domain, start, end, prot);
984
985 return 0;
986 }
987
kvm_iommu_snapshot_host_stage2(struct kvm_hyp_iommu_domain * domain)988 int kvm_iommu_snapshot_host_stage2(struct kvm_hyp_iommu_domain *domain)
989 {
990 int ret;
991 struct kvm_pgtable_walker walker = {
992 .cb = __snapshot_host_stage2,
993 .flags = KVM_PGTABLE_WALK_LEAF,
994 .arg = domain,
995 };
996 struct kvm_pgtable *pgt = &host_mmu.pgt;
997
998 hyp_spin_lock(&host_mmu.lock);
999 ret = kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker);
1000 /* Start receiving calls to host_stage2_idmap. */
1001 if (!ret)
1002 kvm_iommu_idmap_init_done();
1003 hyp_spin_unlock(&host_mmu.lock);
1004
1005 return ret;
1006 }
1007
kvm_iommu_id_to_token(pkvm_handle_t id,u64 * out_token)1008 int kvm_iommu_id_to_token(pkvm_handle_t id, u64 *out_token)
1009 {
1010 if (!kvm_iommu_ops || !kvm_iommu_ops->get_iommu_token_by_id)
1011 return -ENODEV;
1012 return kvm_iommu_ops->get_iommu_token_by_id(id, out_token);
1013 }
1014