• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2022 Google LLC
4  * Author: David Brazdil <dbrazdil@google.com>
5  */
6 
7 #include <linux/kvm_host.h>
8 
9 #include <asm/kvm_asm.h>
10 #include <asm/kvm_hyp.h>
11 #include <asm/kvm_mmu.h>
12 #include <asm/kvm_pkvm.h>
13 
14 #include <hyp/adjust_pc.h>
15 #include <nvhe/iommu.h>
16 #include <nvhe/mm.h>
17 
18 enum {
19 	IOMMU_DRIVER_NOT_READY = 0,
20 	IOMMU_DRIVER_INITIALIZING,
21 	IOMMU_DRIVER_READY,
22 };
23 
24 struct pkvm_iommu_driver {
25 	const struct pkvm_iommu_ops *ops;
26 	atomic_t state;
27 };
28 
29 static struct pkvm_iommu_driver iommu_drivers[PKVM_IOMMU_NR_DRIVERS];
30 
31 /* IOMMU device list. Must only be accessed with host_kvm.lock held. */
32 static LIST_HEAD(iommu_list);
33 
34 static bool iommu_finalized;
35 static DEFINE_HYP_SPINLOCK(iommu_registration_lock);
36 
37 static void *iommu_mem_pool;
38 static size_t iommu_mem_remaining;
39 
assert_host_component_locked(void)40 static void assert_host_component_locked(void)
41 {
42 	hyp_assert_lock_held(&host_kvm.lock);
43 }
44 
host_lock_component(void)45 static void host_lock_component(void)
46 {
47 	hyp_spin_lock(&host_kvm.lock);
48 }
49 
host_unlock_component(void)50 static void host_unlock_component(void)
51 {
52 	hyp_spin_unlock(&host_kvm.lock);
53 }
54 
55 /*
56  * Find IOMMU driver by its ID. The input ID is treated as unstrusted
57  * and is properly validated.
58  */
get_driver(enum pkvm_iommu_driver_id id)59 static inline struct pkvm_iommu_driver *get_driver(enum pkvm_iommu_driver_id id)
60 {
61 	size_t index = (size_t)id;
62 
63 	if (index >= ARRAY_SIZE(iommu_drivers))
64 		return NULL;
65 
66 	return &iommu_drivers[index];
67 }
68 
get_driver_ops(enum pkvm_iommu_driver_id id)69 static const struct pkvm_iommu_ops *get_driver_ops(enum pkvm_iommu_driver_id id)
70 {
71 	switch (id) {
72 	case PKVM_IOMMU_DRIVER_S2MPU:
73 		return IS_ENABLED(CONFIG_KVM_S2MPU) ? &pkvm_s2mpu_ops : NULL;
74 	case PKVM_IOMMU_DRIVER_SYSMMU_SYNC:
75 		return IS_ENABLED(CONFIG_KVM_S2MPU) ? &pkvm_sysmmu_sync_ops : NULL;
76 	default:
77 		return NULL;
78 	}
79 }
80 
driver_acquire_init(struct pkvm_iommu_driver * drv)81 static inline bool driver_acquire_init(struct pkvm_iommu_driver *drv)
82 {
83 	return atomic_cmpxchg_acquire(&drv->state, IOMMU_DRIVER_NOT_READY,
84 				      IOMMU_DRIVER_INITIALIZING)
85 			== IOMMU_DRIVER_NOT_READY;
86 }
87 
driver_release_init(struct pkvm_iommu_driver * drv,bool success)88 static inline void driver_release_init(struct pkvm_iommu_driver *drv,
89 				       bool success)
90 {
91 	atomic_set_release(&drv->state, success ? IOMMU_DRIVER_READY
92 						: IOMMU_DRIVER_NOT_READY);
93 }
94 
is_driver_ready(struct pkvm_iommu_driver * drv)95 static inline bool is_driver_ready(struct pkvm_iommu_driver *drv)
96 {
97 	return atomic_read(&drv->state) == IOMMU_DRIVER_READY;
98 }
99 
__iommu_alloc_size(struct pkvm_iommu_driver * drv)100 static size_t __iommu_alloc_size(struct pkvm_iommu_driver *drv)
101 {
102 	return ALIGN(sizeof(struct pkvm_iommu) + drv->ops->data_size,
103 		     sizeof(unsigned long));
104 }
105 
106 /* Global memory pool for allocating IOMMU list entry structs. */
alloc_iommu(struct pkvm_iommu_driver * drv,void * mem,size_t mem_size)107 static inline struct pkvm_iommu *alloc_iommu(struct pkvm_iommu_driver *drv,
108 					     void *mem, size_t mem_size)
109 {
110 	size_t size = __iommu_alloc_size(drv);
111 	void *ptr;
112 
113 	assert_host_component_locked();
114 
115 	/*
116 	 * If new memory is being provided, replace the existing pool with it.
117 	 * Any remaining memory in the pool is discarded.
118 	 */
119 	if (mem && mem_size) {
120 		iommu_mem_pool = mem;
121 		iommu_mem_remaining = mem_size;
122 	}
123 
124 	if (size > iommu_mem_remaining)
125 		return NULL;
126 
127 	ptr = iommu_mem_pool;
128 	iommu_mem_pool += size;
129 	iommu_mem_remaining -= size;
130 	return ptr;
131 }
132 
free_iommu(struct pkvm_iommu_driver * drv,struct pkvm_iommu * ptr)133 static inline void free_iommu(struct pkvm_iommu_driver *drv, struct pkvm_iommu *ptr)
134 {
135 	size_t size = __iommu_alloc_size(drv);
136 
137 	assert_host_component_locked();
138 
139 	if (!ptr)
140 		return;
141 
142 	/* Only allow freeing the last allocated buffer. */
143 	if ((void*)ptr + size != iommu_mem_pool)
144 		return;
145 
146 	iommu_mem_pool -= size;
147 	iommu_mem_remaining += size;
148 }
149 
is_overlap(phys_addr_t r1_start,size_t r1_size,phys_addr_t r2_start,size_t r2_size)150 static bool is_overlap(phys_addr_t r1_start, size_t r1_size,
151 		       phys_addr_t r2_start, size_t r2_size)
152 {
153 	phys_addr_t r1_end = r1_start + r1_size;
154 	phys_addr_t r2_end = r2_start + r2_size;
155 
156 	return (r1_start < r2_end) && (r2_start < r1_end);
157 }
158 
is_mmio_range(phys_addr_t base,size_t size)159 static bool is_mmio_range(phys_addr_t base, size_t size)
160 {
161 	struct memblock_region *reg;
162 	phys_addr_t limit = BIT(host_kvm.pgt.ia_bits);
163 	size_t i;
164 
165 	/* Check against limits of host IPA space. */
166 	if ((base >= limit) || !size || (size > limit - base))
167 		return false;
168 
169 	for (i = 0; i < hyp_memblock_nr; i++) {
170 		reg = &hyp_memory[i];
171 		if (is_overlap(base, size, reg->base, reg->size))
172 			return false;
173 	}
174 	return true;
175 }
176 
__snapshot_host_stage2(u64 start,u64 pa_max,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flags,void * const arg)177 static int __snapshot_host_stage2(u64 start, u64 pa_max, u32 level,
178 				  kvm_pte_t *ptep,
179 				  enum kvm_pgtable_walk_flags flags,
180 				  void * const arg)
181 {
182 	struct pkvm_iommu_driver * const drv = arg;
183 	u64 end = start + kvm_granule_size(level);
184 	kvm_pte_t pte = *ptep;
185 
186 	/*
187 	 * Valid stage-2 entries are created lazily, invalid ones eagerly.
188 	 * Note: In the future we may need to check if [start,end) is MMIO.
189 	 * Note: Drivers initialize their PTs to all memory owned by the host,
190 	 * so we only call the driver on regions where that is not the case.
191 	 */
192 	if (pte && !kvm_pte_valid(pte))
193 		drv->ops->host_stage2_idmap_prepare(start, end, /*prot*/ 0);
194 	return 0;
195 }
196 
snapshot_host_stage2(struct pkvm_iommu_driver * const drv)197 static int snapshot_host_stage2(struct pkvm_iommu_driver * const drv)
198 {
199 	struct kvm_pgtable_walker walker = {
200 		.cb	= __snapshot_host_stage2,
201 		.arg	= drv,
202 		.flags	= KVM_PGTABLE_WALK_LEAF,
203 	};
204 	struct kvm_pgtable *pgt = &host_kvm.pgt;
205 
206 	if (!drv->ops->host_stage2_idmap_prepare)
207 		return 0;
208 
209 	return kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker);
210 }
211 
validate_against_existing_iommus(struct pkvm_iommu * dev)212 static bool validate_against_existing_iommus(struct pkvm_iommu *dev)
213 {
214 	struct pkvm_iommu *other;
215 
216 	assert_host_component_locked();
217 
218 	list_for_each_entry(other, &iommu_list, list) {
219 		/* Device ID must be unique. */
220 		if (dev->id == other->id)
221 			return false;
222 
223 		/* MMIO regions must not overlap. */
224 		if (is_overlap(dev->pa, dev->size, other->pa, other->size))
225 			return false;
226 	}
227 	return true;
228 }
229 
find_iommu_by_id(unsigned long id)230 static struct pkvm_iommu *find_iommu_by_id(unsigned long id)
231 {
232 	struct pkvm_iommu *dev;
233 
234 	assert_host_component_locked();
235 
236 	list_for_each_entry(dev, &iommu_list, list) {
237 		if (dev->id == id)
238 			return dev;
239 	}
240 	return NULL;
241 }
242 
243 /*
244  * Initialize EL2 IOMMU driver.
245  *
246  * This is a common hypercall for driver initialization. Driver-specific
247  * arguments are passed in a shared memory buffer. The driver is expected to
248  * initialize it's page-table bookkeeping.
249  */
__pkvm_iommu_driver_init(enum pkvm_iommu_driver_id id,void * data,size_t size)250 int __pkvm_iommu_driver_init(enum pkvm_iommu_driver_id id, void *data, size_t size)
251 {
252 	struct pkvm_iommu_driver *drv;
253 	const struct pkvm_iommu_ops *ops;
254 	int ret = 0;
255 
256 	data = kern_hyp_va(data);
257 
258 	/* New driver initialization not allowed after __pkvm_iommu_finalize(). */
259 	hyp_spin_lock(&iommu_registration_lock);
260 	if (iommu_finalized) {
261 		ret = -EPERM;
262 		goto out_unlock;
263 	}
264 
265 	drv = get_driver(id);
266 	ops = get_driver_ops(id);
267 	if (!drv || !ops) {
268 		ret = -EINVAL;
269 		goto out_unlock;
270 	}
271 
272 	if (!driver_acquire_init(drv)) {
273 		ret = -EBUSY;
274 		goto out_unlock;
275 	}
276 
277 	drv->ops = ops;
278 
279 	/* This can change stage-2 mappings. */
280 	if (ops->init) {
281 		ret = hyp_pin_shared_mem(data, data + size);
282 		if (!ret) {
283 			ret = ops->init(data, size);
284 			hyp_unpin_shared_mem(data, data + size);
285 		}
286 		if (ret)
287 			goto out_release;
288 	}
289 
290 	/*
291 	 * Walk host stage-2 and pass current mappings to the driver. Start
292 	 * accepting host stage-2 updates as soon as the host lock is released.
293 	 */
294 	host_lock_component();
295 	ret = snapshot_host_stage2(drv);
296 	if (!ret)
297 		driver_release_init(drv, /*success=*/true);
298 	host_unlock_component();
299 
300 out_release:
301 	if (ret)
302 		driver_release_init(drv, /*success=*/false);
303 
304 out_unlock:
305 	hyp_spin_unlock(&iommu_registration_lock);
306 	return ret;
307 }
308 
__pkvm_iommu_register(unsigned long dev_id,enum pkvm_iommu_driver_id drv_id,phys_addr_t dev_pa,size_t dev_size,unsigned long parent_id,void * kern_mem_va,size_t mem_size)309 int __pkvm_iommu_register(unsigned long dev_id,
310 			  enum pkvm_iommu_driver_id drv_id,
311 			  phys_addr_t dev_pa, size_t dev_size,
312 			  unsigned long parent_id,
313 			  void *kern_mem_va, size_t mem_size)
314 {
315 	struct pkvm_iommu *dev = NULL;
316 	struct pkvm_iommu_driver *drv;
317 	void *mem_va = NULL;
318 	int ret = 0;
319 
320 	/* New device registration not allowed after __pkvm_iommu_finalize(). */
321 	hyp_spin_lock(&iommu_registration_lock);
322 	if (iommu_finalized) {
323 		ret = -EPERM;
324 		goto out_unlock;
325 	}
326 
327 	drv = get_driver(drv_id);
328 	if (!drv || !is_driver_ready(drv)) {
329 		ret = -ENOENT;
330 		goto out_unlock;
331 	}
332 
333 	if (!PAGE_ALIGNED(dev_pa) || !PAGE_ALIGNED(dev_size)) {
334 		ret = -EINVAL;
335 		goto out_unlock;
336 	}
337 
338 	if (!is_mmio_range(dev_pa, dev_size)) {
339 		ret = -EINVAL;
340 		goto out_unlock;
341 	}
342 
343 	/*
344 	 * Accept memory donation if the host is providing new memory.
345 	 * Note: We do not return the memory even if there is an error later.
346 	 */
347 	if (kern_mem_va && mem_size) {
348 		mem_va = kern_hyp_va(kern_mem_va);
349 
350 		if (!PAGE_ALIGNED(mem_va) || !PAGE_ALIGNED(mem_size)) {
351 			ret = -EINVAL;
352 			goto out_unlock;
353 		}
354 
355 		ret = __pkvm_host_donate_hyp(hyp_virt_to_pfn(mem_va),
356 					     mem_size >> PAGE_SHIFT);
357 		if (ret)
358 			goto out_unlock;
359 	}
360 
361 	host_lock_component();
362 
363 	/* Allocate memory for the new device entry. */
364 	dev = alloc_iommu(drv, mem_va, mem_size);
365 	if (!dev) {
366 		ret = -ENOMEM;
367 		goto out_free;
368 	}
369 
370 	/* Populate the new device entry. */
371 	*dev = (struct pkvm_iommu){
372 		.children = LIST_HEAD_INIT(dev->children),
373 		.id = dev_id,
374 		.ops = drv->ops,
375 		.pa = dev_pa,
376 		.size = dev_size,
377 	};
378 
379 	if (!validate_against_existing_iommus(dev)) {
380 		ret = -EBUSY;
381 		goto out_free;
382 	}
383 
384 	if (parent_id) {
385 		dev->parent = find_iommu_by_id(parent_id);
386 		if (!dev->parent) {
387 			ret = -EINVAL;
388 			goto out_free;
389 		}
390 
391 		if (dev->parent->ops->validate_child) {
392 			ret = dev->parent->ops->validate_child(dev->parent, dev);
393 			if (ret)
394 				goto out_free;
395 		}
396 	}
397 
398 	if (dev->ops->validate) {
399 		ret = dev->ops->validate(dev);
400 		if (ret)
401 			goto out_free;
402 	}
403 
404 	/*
405 	 * Unmap the device's MMIO range from host stage-2. If registration
406 	 * is successful, future attempts to re-map will be blocked by
407 	 * pkvm_iommu_host_stage2_adjust_range.
408 	 */
409 	ret = host_stage2_unmap_dev_locked(dev_pa, dev_size);
410 	if (ret)
411 		goto out_free;
412 
413 	/* Create EL2 mapping for the device. Do it last as it is irreversible. */
414 	dev->va = (void *)__pkvm_create_private_mapping(dev_pa, dev_size,
415 							PAGE_HYP_DEVICE);
416 	if (IS_ERR(dev->va)) {
417 		ret = PTR_ERR(dev->va);
418 		goto out_free;
419 	}
420 
421 	/* Register device and prevent host from mapping the MMIO range. */
422 	list_add_tail(&dev->list, &iommu_list);
423 	if (dev->parent)
424 		list_add_tail(&dev->siblings, &dev->parent->children);
425 
426 out_free:
427 	if (ret)
428 		free_iommu(drv, dev);
429 	host_unlock_component();
430 
431 out_unlock:
432 	hyp_spin_unlock(&iommu_registration_lock);
433 	return ret;
434 }
435 
__pkvm_iommu_finalize(void)436 int __pkvm_iommu_finalize(void)
437 {
438 	int ret = 0;
439 
440 	hyp_spin_lock(&iommu_registration_lock);
441 	if (!iommu_finalized)
442 		iommu_finalized = true;
443 	else
444 		ret = -EPERM;
445 	hyp_spin_unlock(&iommu_registration_lock);
446 	return ret;
447 }
448 
__pkvm_iommu_pm_notify(unsigned long dev_id,enum pkvm_iommu_pm_event event)449 int __pkvm_iommu_pm_notify(unsigned long dev_id, enum pkvm_iommu_pm_event event)
450 {
451 	struct pkvm_iommu *dev;
452 	int ret;
453 
454 	host_lock_component();
455 	dev = find_iommu_by_id(dev_id);
456 	if (dev) {
457 		if (event == PKVM_IOMMU_PM_SUSPEND) {
458 			ret = dev->ops->suspend ? dev->ops->suspend(dev) : 0;
459 			if (!ret)
460 				dev->powered = false;
461 		} else if (event == PKVM_IOMMU_PM_RESUME) {
462 			ret = dev->ops->resume ? dev->ops->resume(dev) : 0;
463 			if (!ret)
464 				dev->powered = true;
465 		} else {
466 			ret = -EINVAL;
467 		}
468 	} else {
469 		ret = -ENODEV;
470 	}
471 	host_unlock_component();
472 	return ret;
473 }
474 
475 /*
476  * Check host memory access against IOMMUs' MMIO regions.
477  * Returns -EPERM if the address is within the bounds of a registered device.
478  * Otherwise returns zero and adjusts boundaries of the new mapping to avoid
479  * MMIO regions of registered IOMMUs.
480  */
pkvm_iommu_host_stage2_adjust_range(phys_addr_t addr,phys_addr_t * start,phys_addr_t * end)481 int pkvm_iommu_host_stage2_adjust_range(phys_addr_t addr, phys_addr_t *start,
482 					phys_addr_t *end)
483 {
484 	struct pkvm_iommu *dev;
485 	phys_addr_t new_start = *start;
486 	phys_addr_t new_end = *end;
487 	phys_addr_t dev_start, dev_end;
488 
489 	assert_host_component_locked();
490 
491 	list_for_each_entry(dev, &iommu_list, list) {
492 		dev_start = dev->pa;
493 		dev_end = dev_start + dev->size;
494 
495 		if (addr < dev_start)
496 			new_end = min(new_end, dev_start);
497 		else if (addr >= dev_end)
498 			new_start = max(new_start, dev_end);
499 		else
500 			return -EPERM;
501 	}
502 
503 	*start = new_start;
504 	*end = new_end;
505 	return 0;
506 }
507 
pkvm_iommu_host_dabt_handler(struct kvm_cpu_context * host_ctxt,u32 esr,phys_addr_t pa)508 bool pkvm_iommu_host_dabt_handler(struct kvm_cpu_context *host_ctxt, u32 esr,
509 				  phys_addr_t pa)
510 {
511 	struct pkvm_iommu *dev;
512 
513 	assert_host_component_locked();
514 
515 	list_for_each_entry(dev, &iommu_list, list) {
516 		if (pa < dev->pa || pa >= dev->pa + dev->size)
517 			continue;
518 
519 		/* No 'powered' check - the host assumes it is powered. */
520 		if (!dev->ops->host_dabt_handler ||
521 		    !dev->ops->host_dabt_handler(dev, host_ctxt, esr, pa - dev->pa))
522 			return false;
523 
524 		kvm_skip_host_instr();
525 		return true;
526 	}
527 	return false;
528 }
529 
pkvm_iommu_host_stage2_idmap(phys_addr_t start,phys_addr_t end,enum kvm_pgtable_prot prot)530 void pkvm_iommu_host_stage2_idmap(phys_addr_t start, phys_addr_t end,
531 				  enum kvm_pgtable_prot prot)
532 {
533 	struct pkvm_iommu_driver *drv;
534 	struct pkvm_iommu *dev;
535 	size_t i;
536 
537 	assert_host_component_locked();
538 
539 	for (i = 0; i < ARRAY_SIZE(iommu_drivers); i++) {
540 		drv = get_driver(i);
541 		if (drv && is_driver_ready(drv) && drv->ops->host_stage2_idmap_prepare)
542 			drv->ops->host_stage2_idmap_prepare(start, end, prot);
543 	}
544 
545 	list_for_each_entry(dev, &iommu_list, list) {
546 		if (dev->powered && dev->ops->host_stage2_idmap_apply)
547 			dev->ops->host_stage2_idmap_apply(dev, start, end);
548 	}
549 
550 	list_for_each_entry(dev, &iommu_list, list) {
551 		if (dev->powered && dev->ops->host_stage2_idmap_complete)
552 			dev->ops->host_stage2_idmap_complete(dev);
553 	}
554 }
555