• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2022 Google LLC
4  * Author: David Brazdil <dbrazdil@google.com>
5  */
6 
7 #include <linux/kvm_host.h>
8 
9 #include <asm/kvm_asm.h>
10 #include <asm/kvm_hyp.h>
11 #include <asm/kvm_mmu.h>
12 #include <asm/kvm_pkvm.h>
13 
14 #include <hyp/adjust_pc.h>
15 #include <nvhe/iommu.h>
16 #include <nvhe/mm.h>
17 #include <nvhe/modules.h>
18 #include <nvhe/pkvm.h>
19 
20 #define DRV_ID(drv_addr)			((unsigned long)drv_addr)
21 
22 enum {
23 	IOMMU_DRIVER_NOT_READY = 0,
24 	IOMMU_DRIVER_INITIALIZING,
25 	IOMMU_DRIVER_READY,
26 };
27 
28 /* List of registered IOMMU drivers, protected with iommu_drv_lock. */
29 static LIST_HEAD(iommu_drivers);
30 /* IOMMU device list. Must only be accessed with host_mmu.lock held. */
31 static LIST_HEAD(iommu_list);
32 
33 static bool iommu_finalized;
34 static DEFINE_HYP_SPINLOCK(iommu_registration_lock);
35 static DEFINE_HYP_SPINLOCK(iommu_drv_lock);
36 
37 static void *iommu_mem_pool;
38 static size_t iommu_mem_remaining;
39 
assert_host_component_locked(void)40 static void assert_host_component_locked(void)
41 {
42 	hyp_assert_lock_held(&host_mmu.lock);
43 }
44 
host_lock_component(void)45 static void host_lock_component(void)
46 {
47 	hyp_spin_lock(&host_mmu.lock);
48 }
49 
host_unlock_component(void)50 static void host_unlock_component(void)
51 {
52 	hyp_spin_unlock(&host_mmu.lock);
53 }
54 
55 /*
56  * Find IOMMU driver by its ID. The input ID is treated as unstrusted
57  * and is properly validated.
58  */
get_driver(unsigned long id)59 static inline struct pkvm_iommu_driver *get_driver(unsigned long id)
60 {
61 	struct pkvm_iommu_driver *drv, *ret = NULL;
62 
63 	hyp_spin_lock(&iommu_drv_lock);
64 	list_for_each_entry(drv, &iommu_drivers, list) {
65 		if (DRV_ID(drv) == id) {
66 			ret =  drv;
67 			break;
68 		}
69 	}
70 	hyp_spin_unlock(&iommu_drv_lock);
71 	return ret;
72 }
73 
driver_acquire_init(struct pkvm_iommu_driver * drv)74 static inline bool driver_acquire_init(struct pkvm_iommu_driver *drv)
75 {
76 	return atomic_cmpxchg_acquire(&drv->state, IOMMU_DRIVER_NOT_READY,
77 				      IOMMU_DRIVER_INITIALIZING)
78 			== IOMMU_DRIVER_NOT_READY;
79 }
80 
driver_release_init(struct pkvm_iommu_driver * drv,bool success)81 static inline void driver_release_init(struct pkvm_iommu_driver *drv,
82 				       bool success)
83 {
84 	atomic_set_release(&drv->state, success ? IOMMU_DRIVER_READY
85 						: IOMMU_DRIVER_NOT_READY);
86 }
87 
is_driver_ready(struct pkvm_iommu_driver * drv)88 static inline bool is_driver_ready(struct pkvm_iommu_driver *drv)
89 {
90 	return atomic_read(&drv->state) == IOMMU_DRIVER_READY;
91 }
92 
__iommu_alloc_size(struct pkvm_iommu_driver * drv)93 static size_t __iommu_alloc_size(struct pkvm_iommu_driver *drv)
94 {
95 	return ALIGN(sizeof(struct pkvm_iommu) + drv->ops->data_size,
96 		     sizeof(unsigned long));
97 }
98 
validate_driver_id_unique(struct pkvm_iommu_driver * drv)99 static bool validate_driver_id_unique(struct pkvm_iommu_driver *drv)
100 {
101 	struct pkvm_iommu_driver *cur;
102 
103 	hyp_assert_lock_held(&iommu_drv_lock);
104 	list_for_each_entry(cur, &iommu_drivers, list) {
105 		if (DRV_ID(drv) == DRV_ID(cur))
106 			return false;
107 	}
108 	return true;
109 }
110 
__pkvm_register_iommu_driver(struct pkvm_iommu_driver * drv)111 static int __pkvm_register_iommu_driver(struct pkvm_iommu_driver *drv)
112 {
113 	int ret = 0;
114 
115 	if (!drv)
116 		return -EINVAL;
117 
118 	hyp_assert_lock_held(&iommu_registration_lock);
119 	hyp_spin_lock(&iommu_drv_lock);
120 	if (validate_driver_id_unique(drv))
121 		list_add_tail(&drv->list, &iommu_drivers);
122 	else
123 		ret = -EEXIST;
124 	hyp_spin_unlock(&iommu_drv_lock);
125 	return ret;
126 }
127 
128 /* Global memory pool for allocating IOMMU list entry structs. */
alloc_iommu(struct pkvm_iommu_driver * drv,void * mem,size_t mem_size)129 static inline struct pkvm_iommu *alloc_iommu(struct pkvm_iommu_driver *drv,
130 					     void *mem, size_t mem_size)
131 {
132 	size_t size = __iommu_alloc_size(drv);
133 	void *ptr;
134 
135 	assert_host_component_locked();
136 
137 	/*
138 	 * If new memory is being provided, replace the existing pool with it.
139 	 * Any remaining memory in the pool is discarded.
140 	 */
141 	if (mem && mem_size) {
142 		iommu_mem_pool = mem;
143 		iommu_mem_remaining = mem_size;
144 	}
145 
146 	if (size > iommu_mem_remaining)
147 		return NULL;
148 
149 	ptr = iommu_mem_pool;
150 	iommu_mem_pool += size;
151 	iommu_mem_remaining -= size;
152 	return ptr;
153 }
154 
free_iommu(struct pkvm_iommu_driver * drv,struct pkvm_iommu * ptr)155 static inline void free_iommu(struct pkvm_iommu_driver *drv, struct pkvm_iommu *ptr)
156 {
157 	size_t size = __iommu_alloc_size(drv);
158 
159 	assert_host_component_locked();
160 
161 	if (!ptr)
162 		return;
163 
164 	/* Only allow freeing the last allocated buffer. */
165 	if ((void *)ptr + size != iommu_mem_pool)
166 		return;
167 
168 	iommu_mem_pool -= size;
169 	iommu_mem_remaining += size;
170 }
171 
is_overlap(phys_addr_t r1_start,size_t r1_size,phys_addr_t r2_start,size_t r2_size)172 static bool is_overlap(phys_addr_t r1_start, size_t r1_size,
173 		       phys_addr_t r2_start, size_t r2_size)
174 {
175 	phys_addr_t r1_end = r1_start + r1_size;
176 	phys_addr_t r2_end = r2_start + r2_size;
177 
178 	return (r1_start < r2_end) && (r2_start < r1_end);
179 }
180 
is_mmio_range(phys_addr_t base,size_t size)181 static bool is_mmio_range(phys_addr_t base, size_t size)
182 {
183 	struct memblock_region *reg;
184 	phys_addr_t limit = BIT(host_mmu.pgt.ia_bits);
185 	size_t i;
186 
187 	/* Check against limits of host IPA space. */
188 	if ((base >= limit) || !size || (size > limit - base))
189 		return false;
190 
191 	for (i = 0; i < hyp_memblock_nr; i++) {
192 		reg = &hyp_memory[i];
193 		if (is_overlap(base, size, reg->base, reg->size))
194 			return false;
195 	}
196 	return true;
197 }
198 
__snapshot_host_stage2(u64 start,u64 pa_max,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flags,void * const arg)199 static int __snapshot_host_stage2(u64 start, u64 pa_max, u32 level,
200 				  kvm_pte_t *ptep,
201 				  enum kvm_pgtable_walk_flags flags,
202 				  void * const arg)
203 {
204 	struct pkvm_iommu_driver * const drv = arg;
205 	u64 end = start + kvm_granule_size(level);
206 	kvm_pte_t pte = *ptep;
207 
208 	/*
209 	 * Valid stage-2 entries are created lazily, invalid ones eagerly.
210 	 * Note: In the future we may need to check if [start,end) is MMIO.
211 	 * Note: Drivers initialize their PTs to all memory owned by the host,
212 	 * so we only call the driver on regions where that is not the case.
213 	 */
214 	if (pte && !kvm_pte_valid(pte))
215 		drv->ops->host_stage2_idmap_prepare(start, end, /*prot*/ 0);
216 	return 0;
217 }
218 
snapshot_host_stage2(struct pkvm_iommu_driver * const drv)219 static int snapshot_host_stage2(struct pkvm_iommu_driver * const drv)
220 {
221 	struct kvm_pgtable_walker walker = {
222 		.cb	= __snapshot_host_stage2,
223 		.arg	= drv,
224 		.flags	= KVM_PGTABLE_WALK_LEAF,
225 	};
226 	struct kvm_pgtable *pgt = &host_mmu.pgt;
227 
228 	if (!drv->ops->host_stage2_idmap_prepare)
229 		return 0;
230 
231 	return kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker);
232 }
233 
validate_against_existing_iommus(struct pkvm_iommu * dev)234 static bool validate_against_existing_iommus(struct pkvm_iommu *dev)
235 {
236 	struct pkvm_iommu *other;
237 
238 	assert_host_component_locked();
239 
240 	list_for_each_entry(other, &iommu_list, list) {
241 		/* Device ID must be unique. */
242 		if (dev->id == other->id)
243 			return false;
244 
245 		/* MMIO regions must not overlap. */
246 		if (is_overlap(dev->pa, dev->size, other->pa, other->size))
247 			return false;
248 	}
249 	return true;
250 }
251 
find_iommu_by_id(unsigned long id)252 static struct pkvm_iommu *find_iommu_by_id(unsigned long id)
253 {
254 	struct pkvm_iommu *dev;
255 
256 	assert_host_component_locked();
257 
258 	list_for_each_entry(dev, &iommu_list, list) {
259 		if (dev->id == id)
260 			return dev;
261 	}
262 	return NULL;
263 }
264 
265 /*
266  * Initialize EL2 IOMMU driver.
267  *
268  * This is a common hypercall for driver initialization. Driver-specific
269  * arguments are passed in a shared memory buffer. The driver is expected to
270  * initialize it's page-table bookkeeping.
271  */
__pkvm_iommu_driver_init(struct pkvm_iommu_driver * drv,void * data,size_t size)272 int __pkvm_iommu_driver_init(struct pkvm_iommu_driver *drv, void *data, size_t size)
273 {
274 	const struct pkvm_iommu_ops *ops;
275 	int ret = 0;
276 
277 	/* New driver initialization not allowed after __pkvm_iommu_finalize(). */
278 	hyp_spin_lock(&iommu_registration_lock);
279 	if (iommu_finalized) {
280 		ret = -EPERM;
281 		goto out_unlock;
282 	}
283 
284 	ret =  __pkvm_register_iommu_driver(drv);
285 	if (ret)
286 		return ret;
287 
288 	if (!drv->ops) {
289 		ret = -EINVAL;
290 		goto out_unlock;
291 	}
292 
293 	if (!driver_acquire_init(drv)) {
294 		ret = -EBUSY;
295 		goto out_unlock;
296 	}
297 
298 	ops = drv->ops;
299 
300 	/* This can change stage-2 mappings. */
301 	if (ops->init) {
302 		ret = hyp_pin_shared_mem(data, data + size);
303 		if (!ret) {
304 			ret = ops->init(data, size);
305 			hyp_unpin_shared_mem(data, data + size);
306 		}
307 		if (ret)
308 			goto out_release;
309 	}
310 
311 	/*
312 	 * Walk host stage-2 and pass current mappings to the driver. Start
313 	 * accepting host stage-2 updates as soon as the host lock is released.
314 	 */
315 	host_lock_component();
316 	ret = snapshot_host_stage2(drv);
317 	if (!ret)
318 		driver_release_init(drv, /*success=*/true);
319 	host_unlock_component();
320 
321 out_release:
322 	if (ret)
323 		driver_release_init(drv, /*success=*/false);
324 
325 out_unlock:
326 	hyp_spin_unlock(&iommu_registration_lock);
327 	return ret;
328 }
329 
__pkvm_iommu_register(unsigned long dev_id,unsigned long drv_id,phys_addr_t dev_pa,size_t dev_size,unsigned long parent_id,u8 flags,void * kern_mem_va)330 int __pkvm_iommu_register(unsigned long dev_id, unsigned long drv_id,
331 			  phys_addr_t dev_pa, size_t dev_size,
332 			  unsigned long parent_id, u8 flags,
333 			  void *kern_mem_va)
334 {
335 	struct pkvm_iommu *dev = NULL;
336 	struct pkvm_iommu_driver *drv;
337 	void *mem_va = NULL;
338 	int ret = 0;
339 
340 	/* New device registration not allowed after __pkvm_iommu_finalize(). */
341 	hyp_spin_lock(&iommu_registration_lock);
342 	if (iommu_finalized) {
343 		ret = -EPERM;
344 		goto out_unlock;
345 	}
346 
347 	drv = get_driver(drv_id);
348 	if (!drv || !is_driver_ready(drv)) {
349 		ret = -ENOENT;
350 		goto out_unlock;
351 	}
352 
353 	if (!PAGE_ALIGNED(dev_pa) || !PAGE_ALIGNED(dev_size)) {
354 		ret = -EINVAL;
355 		goto out_unlock;
356 	}
357 
358 	if (!is_mmio_range(dev_pa, dev_size)) {
359 		ret = -EINVAL;
360 		goto out_unlock;
361 	}
362 
363 	/*
364 	 * Accept memory donation if the host is providing new memory.
365 	 * Note: We do not return the memory even if there is an error later.
366 	 */
367 	if (kern_mem_va) {
368 		mem_va = kern_hyp_va(kern_mem_va);
369 
370 		if (!PAGE_ALIGNED(mem_va)) {
371 			ret = -EINVAL;
372 			goto out_unlock;
373 		}
374 
375 		ret = __pkvm_host_donate_hyp(hyp_virt_to_pfn(mem_va), 1);
376 		if (ret)
377 			goto out_unlock;
378 	}
379 
380 	host_lock_component();
381 
382 	/* Allocate memory for the new device entry. */
383 	dev = alloc_iommu(drv, mem_va, PAGE_SIZE);
384 	if (!dev) {
385 		ret = -ENOMEM;
386 		goto out_free;
387 	}
388 
389 	/* Populate the new device entry. */
390 	*dev = (struct pkvm_iommu){
391 		.children = LIST_HEAD_INIT(dev->children),
392 		.id = dev_id,
393 		.ops = drv->ops,
394 		.pa = dev_pa,
395 		.va = hyp_phys_to_virt(dev_pa),
396 		.size = dev_size,
397 		.flags = flags,
398 	};
399 
400 	if (!validate_against_existing_iommus(dev)) {
401 		ret = -EBUSY;
402 		goto out_free;
403 	}
404 
405 	if (parent_id) {
406 		dev->parent = find_iommu_by_id(parent_id);
407 		if (!dev->parent) {
408 			ret = -EINVAL;
409 			goto out_free;
410 		}
411 
412 		if (dev->parent->ops->validate_child) {
413 			ret = dev->parent->ops->validate_child(dev->parent, dev);
414 			if (ret)
415 				goto out_free;
416 		}
417 	}
418 
419 	if (dev->ops->validate) {
420 		ret = dev->ops->validate(dev);
421 		if (ret)
422 			goto out_free;
423 	}
424 
425 	ret = __pkvm_host_donate_hyp_locked(hyp_phys_to_pfn(dev_pa),
426 					    PAGE_ALIGN(dev_size) >> PAGE_SHIFT);
427 	if (ret)
428 		goto out_free;
429 
430 	/* Register device and prevent host from mapping the MMIO range. */
431 	list_add_tail(&dev->list, &iommu_list);
432 	if (dev->parent)
433 		list_add_tail(&dev->siblings, &dev->parent->children);
434 
435 out_free:
436 	if (ret)
437 		free_iommu(drv, dev);
438 	host_unlock_component();
439 
440 out_unlock:
441 	hyp_spin_unlock(&iommu_registration_lock);
442 	return ret;
443 }
444 
__pkvm_iommu_finalize(int err)445 int __pkvm_iommu_finalize(int err)
446 {
447 	int ret = 0;
448 
449 	/* Err is not currently used in EL2.*/
450 	WARN_ON(err);
451 
452 	hyp_spin_lock(&iommu_registration_lock);
453 	if (!iommu_finalized)
454 		iommu_finalized = true;
455 	else
456 		ret = -EPERM;
457 	hyp_spin_unlock(&iommu_registration_lock);
458 
459 	return ret;
460 }
461 
__pkvm_iommu_pm_notify(unsigned long dev_id,enum pkvm_iommu_pm_event event)462 int __pkvm_iommu_pm_notify(unsigned long dev_id, enum pkvm_iommu_pm_event event)
463 {
464 	struct pkvm_iommu *dev;
465 	int ret;
466 
467 	host_lock_component();
468 	dev = find_iommu_by_id(dev_id);
469 	if (dev) {
470 		if (event == PKVM_IOMMU_PM_SUSPEND) {
471 			ret = dev->ops->suspend ? dev->ops->suspend(dev) : 0;
472 			if (!ret)
473 				dev->powered = false;
474 		} else if (event == PKVM_IOMMU_PM_RESUME) {
475 			ret = dev->ops->resume ? dev->ops->resume(dev) : 0;
476 			if (!ret)
477 				dev->powered = true;
478 		} else {
479 			ret = -EINVAL;
480 		}
481 	} else {
482 		ret = -ENODEV;
483 	}
484 	host_unlock_component();
485 	return ret;
486 }
487 
pkvm_iommu_host_dabt_handler(struct kvm_cpu_context * host_ctxt,u32 esr,phys_addr_t pa)488 bool pkvm_iommu_host_dabt_handler(struct kvm_cpu_context *host_ctxt, u32 esr,
489 				  phys_addr_t pa)
490 {
491 	struct pkvm_iommu *dev;
492 
493 	assert_host_component_locked();
494 
495 	list_for_each_entry(dev, &iommu_list, list) {
496 		if (pa < dev->pa || pa >= dev->pa + dev->size)
497 			continue;
498 
499 		/* No 'powered' check - the host assumes it is powered. */
500 		if (!dev->ops->host_dabt_handler ||
501 		    !dev->ops->host_dabt_handler(dev, host_ctxt, esr, pa - dev->pa))
502 			return false;
503 
504 		kvm_skip_host_instr();
505 		return true;
506 	}
507 	return false;
508 }
509 
pkvm_iommu_host_stage2_idmap(phys_addr_t start,phys_addr_t end,enum kvm_pgtable_prot prot)510 void pkvm_iommu_host_stage2_idmap(phys_addr_t start, phys_addr_t end,
511 				  enum kvm_pgtable_prot prot)
512 {
513 	struct pkvm_iommu_driver *drv;
514 	struct pkvm_iommu *dev;
515 
516 	assert_host_component_locked();
517 	hyp_spin_lock(&iommu_drv_lock);
518 	list_for_each_entry(drv, &iommu_drivers, list) {
519 		if (drv && is_driver_ready(drv) && drv->ops->host_stage2_idmap_prepare)
520 			drv->ops->host_stage2_idmap_prepare(start, end, prot);
521 	}
522 	hyp_spin_unlock(&iommu_drv_lock);
523 
524 	list_for_each_entry(dev, &iommu_list, list) {
525 		if (dev->powered && dev->ops->host_stage2_idmap_apply)
526 			dev->ops->host_stage2_idmap_apply(dev, start, end);
527 	}
528 
529 	list_for_each_entry(dev, &iommu_list, list) {
530 		if (dev->powered && dev->ops->host_stage2_idmap_complete)
531 			dev->ops->host_stage2_idmap_complete(dev);
532 	}
533 }
534