• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
3  */
4 #include <linux/iommu.h>
5 #include <linux/iommufd.h>
6 #include <linux/pci-ats.h>
7 #include <linux/slab.h>
8 #include <uapi/linux/iommufd.h>
9 
10 #include "../iommu-priv.h"
11 #include "io_pagetable.h"
12 #include "iommufd_private.h"
13 
14 static bool allow_unsafe_interrupts;
15 module_param(allow_unsafe_interrupts, bool, S_IRUGO | S_IWUSR);
16 MODULE_PARM_DESC(
17 	allow_unsafe_interrupts,
18 	"Allow IOMMUFD to bind to devices even if the platform cannot isolate "
19 	"the MSI interrupt window. Enabling this is a security weakness.");
20 
iommufd_group_release(struct kref * kref)21 static void iommufd_group_release(struct kref *kref)
22 {
23 	struct iommufd_group *igroup =
24 		container_of(kref, struct iommufd_group, ref);
25 
26 	WARN_ON(igroup->hwpt || !list_empty(&igroup->device_list));
27 
28 	xa_cmpxchg(&igroup->ictx->groups, iommu_group_id(igroup->group), igroup,
29 		   NULL, GFP_KERNEL);
30 	iommu_group_put(igroup->group);
31 	mutex_destroy(&igroup->lock);
32 	kfree(igroup);
33 }
34 
iommufd_put_group(struct iommufd_group * group)35 static void iommufd_put_group(struct iommufd_group *group)
36 {
37 	kref_put(&group->ref, iommufd_group_release);
38 }
39 
iommufd_group_try_get(struct iommufd_group * igroup,struct iommu_group * group)40 static bool iommufd_group_try_get(struct iommufd_group *igroup,
41 				  struct iommu_group *group)
42 {
43 	if (!igroup)
44 		return false;
45 	/*
46 	 * group ID's cannot be re-used until the group is put back which does
47 	 * not happen if we could get an igroup pointer under the xa_lock.
48 	 */
49 	if (WARN_ON(igroup->group != group))
50 		return false;
51 	return kref_get_unless_zero(&igroup->ref);
52 }
53 
54 /*
55  * iommufd needs to store some more data for each iommu_group, we keep a
56  * parallel xarray indexed by iommu_group id to hold this instead of putting it
57  * in the core structure. To keep things simple the iommufd_group memory is
58  * unique within the iommufd_ctx. This makes it easy to check there are no
59  * memory leaks.
60  */
iommufd_get_group(struct iommufd_ctx * ictx,struct device * dev)61 static struct iommufd_group *iommufd_get_group(struct iommufd_ctx *ictx,
62 					       struct device *dev)
63 {
64 	struct iommufd_group *new_igroup;
65 	struct iommufd_group *cur_igroup;
66 	struct iommufd_group *igroup;
67 	struct iommu_group *group;
68 	unsigned int id;
69 
70 	group = iommu_group_get(dev);
71 	if (!group)
72 		return ERR_PTR(-ENODEV);
73 
74 	id = iommu_group_id(group);
75 
76 	xa_lock(&ictx->groups);
77 	igroup = xa_load(&ictx->groups, id);
78 	if (iommufd_group_try_get(igroup, group)) {
79 		xa_unlock(&ictx->groups);
80 		iommu_group_put(group);
81 		return igroup;
82 	}
83 	xa_unlock(&ictx->groups);
84 
85 	new_igroup = kzalloc(sizeof(*new_igroup), GFP_KERNEL);
86 	if (!new_igroup) {
87 		iommu_group_put(group);
88 		return ERR_PTR(-ENOMEM);
89 	}
90 
91 	kref_init(&new_igroup->ref);
92 	mutex_init(&new_igroup->lock);
93 	INIT_LIST_HEAD(&new_igroup->device_list);
94 	new_igroup->sw_msi_start = PHYS_ADDR_MAX;
95 	/* group reference moves into new_igroup */
96 	new_igroup->group = group;
97 
98 	/*
99 	 * The ictx is not additionally refcounted here becase all objects using
100 	 * an igroup must put it before their destroy completes.
101 	 */
102 	new_igroup->ictx = ictx;
103 
104 	/*
105 	 * We dropped the lock so igroup is invalid. NULL is a safe and likely
106 	 * value to assume for the xa_cmpxchg algorithm.
107 	 */
108 	cur_igroup = NULL;
109 	xa_lock(&ictx->groups);
110 	while (true) {
111 		igroup = __xa_cmpxchg(&ictx->groups, id, cur_igroup, new_igroup,
112 				      GFP_KERNEL);
113 		if (xa_is_err(igroup)) {
114 			xa_unlock(&ictx->groups);
115 			iommufd_put_group(new_igroup);
116 			return ERR_PTR(xa_err(igroup));
117 		}
118 
119 		/* new_group was successfully installed */
120 		if (cur_igroup == igroup) {
121 			xa_unlock(&ictx->groups);
122 			return new_igroup;
123 		}
124 
125 		/* Check again if the current group is any good */
126 		if (iommufd_group_try_get(igroup, group)) {
127 			xa_unlock(&ictx->groups);
128 			iommufd_put_group(new_igroup);
129 			return igroup;
130 		}
131 		cur_igroup = igroup;
132 	}
133 }
134 
iommufd_device_destroy(struct iommufd_object * obj)135 void iommufd_device_destroy(struct iommufd_object *obj)
136 {
137 	struct iommufd_device *idev =
138 		container_of(obj, struct iommufd_device, obj);
139 
140 	iommu_device_release_dma_owner(idev->dev);
141 	iommufd_put_group(idev->igroup);
142 	if (!iommufd_selftest_is_mock_dev(idev->dev))
143 		iommufd_ctx_put(idev->ictx);
144 }
145 
146 /**
147  * iommufd_device_bind - Bind a physical device to an iommu fd
148  * @ictx: iommufd file descriptor
149  * @dev: Pointer to a physical device struct
150  * @id: Output ID number to return to userspace for this device
151  *
152  * A successful bind establishes an ownership over the device and returns
153  * struct iommufd_device pointer, otherwise returns error pointer.
154  *
155  * A driver using this API must set driver_managed_dma and must not touch
156  * the device until this routine succeeds and establishes ownership.
157  *
158  * Binding a PCI device places the entire RID under iommufd control.
159  *
160  * The caller must undo this with iommufd_device_unbind()
161  */
iommufd_device_bind(struct iommufd_ctx * ictx,struct device * dev,u32 * id)162 struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx,
163 					   struct device *dev, u32 *id)
164 {
165 	struct iommufd_device *idev;
166 	struct iommufd_group *igroup;
167 	int rc;
168 
169 	/*
170 	 * iommufd always sets IOMMU_CACHE because we offer no way for userspace
171 	 * to restore cache coherency.
172 	 */
173 	if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY))
174 		return ERR_PTR(-EINVAL);
175 
176 	igroup = iommufd_get_group(ictx, dev);
177 	if (IS_ERR(igroup))
178 		return ERR_CAST(igroup);
179 
180 	/*
181 	 * For historical compat with VFIO the insecure interrupt path is
182 	 * allowed if the module parameter is set. Secure/Isolated means that a
183 	 * MemWr operation from the device (eg a simple DMA) cannot trigger an
184 	 * interrupt outside this iommufd context.
185 	 */
186 	if (!iommufd_selftest_is_mock_dev(dev) &&
187 	    !iommu_group_has_isolated_msi(igroup->group)) {
188 		if (!allow_unsafe_interrupts) {
189 			rc = -EPERM;
190 			goto out_group_put;
191 		}
192 
193 		dev_warn(
194 			dev,
195 			"MSI interrupts are not secure, they cannot be isolated by the platform. "
196 			"Check that platform features like interrupt remapping are enabled. "
197 			"Use the \"allow_unsafe_interrupts\" module parameter to override\n");
198 	}
199 
200 	rc = iommu_device_claim_dma_owner(dev, ictx);
201 	if (rc)
202 		goto out_group_put;
203 
204 	idev = iommufd_object_alloc(ictx, idev, IOMMUFD_OBJ_DEVICE);
205 	if (IS_ERR(idev)) {
206 		rc = PTR_ERR(idev);
207 		goto out_release_owner;
208 	}
209 	idev->ictx = ictx;
210 	if (!iommufd_selftest_is_mock_dev(dev))
211 		iommufd_ctx_get(ictx);
212 	idev->dev = dev;
213 	idev->enforce_cache_coherency =
214 		device_iommu_capable(dev, IOMMU_CAP_ENFORCE_CACHE_COHERENCY);
215 	/* The calling driver is a user until iommufd_device_unbind() */
216 	refcount_inc(&idev->obj.users);
217 	/* igroup refcount moves into iommufd_device */
218 	idev->igroup = igroup;
219 	mutex_init(&idev->iopf_lock);
220 
221 	/*
222 	 * If the caller fails after this success it must call
223 	 * iommufd_unbind_device() which is safe since we hold this refcount.
224 	 * This also means the device is a leaf in the graph and no other object
225 	 * can take a reference on it.
226 	 */
227 	iommufd_object_finalize(ictx, &idev->obj);
228 	*id = idev->obj.id;
229 	return idev;
230 
231 out_release_owner:
232 	iommu_device_release_dma_owner(dev);
233 out_group_put:
234 	iommufd_put_group(igroup);
235 	return ERR_PTR(rc);
236 }
237 EXPORT_SYMBOL_NS_GPL(iommufd_device_bind, IOMMUFD);
238 
239 /**
240  * iommufd_ctx_has_group - True if any device within the group is bound
241  *                         to the ictx
242  * @ictx: iommufd file descriptor
243  * @group: Pointer to a physical iommu_group struct
244  *
245  * True if any device within the group has been bound to this ictx, ex. via
246  * iommufd_device_bind(), therefore implying ictx ownership of the group.
247  */
iommufd_ctx_has_group(struct iommufd_ctx * ictx,struct iommu_group * group)248 bool iommufd_ctx_has_group(struct iommufd_ctx *ictx, struct iommu_group *group)
249 {
250 	struct iommufd_object *obj;
251 	unsigned long index;
252 
253 	if (!ictx || !group)
254 		return false;
255 
256 	xa_lock(&ictx->objects);
257 	xa_for_each(&ictx->objects, index, obj) {
258 		if (obj->type == IOMMUFD_OBJ_DEVICE &&
259 		    container_of(obj, struct iommufd_device, obj)
260 				    ->igroup->group == group) {
261 			xa_unlock(&ictx->objects);
262 			return true;
263 		}
264 	}
265 	xa_unlock(&ictx->objects);
266 	return false;
267 }
268 EXPORT_SYMBOL_NS_GPL(iommufd_ctx_has_group, IOMMUFD);
269 
270 /**
271  * iommufd_device_unbind - Undo iommufd_device_bind()
272  * @idev: Device returned by iommufd_device_bind()
273  *
274  * Release the device from iommufd control. The DMA ownership will return back
275  * to unowned with DMA controlled by the DMA API. This invalidates the
276  * iommufd_device pointer, other APIs that consume it must not be called
277  * concurrently.
278  */
iommufd_device_unbind(struct iommufd_device * idev)279 void iommufd_device_unbind(struct iommufd_device *idev)
280 {
281 	iommufd_object_destroy_user(idev->ictx, &idev->obj);
282 }
283 EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, IOMMUFD);
284 
iommufd_device_to_ictx(struct iommufd_device * idev)285 struct iommufd_ctx *iommufd_device_to_ictx(struct iommufd_device *idev)
286 {
287 	return idev->ictx;
288 }
289 EXPORT_SYMBOL_NS_GPL(iommufd_device_to_ictx, IOMMUFD);
290 
iommufd_device_to_id(struct iommufd_device * idev)291 u32 iommufd_device_to_id(struct iommufd_device *idev)
292 {
293 	return idev->obj.id;
294 }
295 EXPORT_SYMBOL_NS_GPL(iommufd_device_to_id, IOMMUFD);
296 
iommufd_group_setup_msi(struct iommufd_group * igroup,struct iommufd_hwpt_paging * hwpt_paging)297 static int iommufd_group_setup_msi(struct iommufd_group *igroup,
298 				   struct iommufd_hwpt_paging *hwpt_paging)
299 {
300 	phys_addr_t sw_msi_start = igroup->sw_msi_start;
301 	int rc;
302 
303 	/*
304 	 * If the IOMMU driver gives a IOMMU_RESV_SW_MSI then it is asking us to
305 	 * call iommu_get_msi_cookie() on its behalf. This is necessary to setup
306 	 * the MSI window so iommu_dma_prepare_msi() can install pages into our
307 	 * domain after request_irq(). If it is not done interrupts will not
308 	 * work on this domain.
309 	 *
310 	 * FIXME: This is conceptually broken for iommufd since we want to allow
311 	 * userspace to change the domains, eg switch from an identity IOAS to a
312 	 * DMA IOAS. There is currently no way to create a MSI window that
313 	 * matches what the IRQ layer actually expects in a newly created
314 	 * domain.
315 	 */
316 	if (sw_msi_start != PHYS_ADDR_MAX && !hwpt_paging->msi_cookie) {
317 		rc = iommu_get_msi_cookie(hwpt_paging->common.domain,
318 					  sw_msi_start);
319 		if (rc)
320 			return rc;
321 
322 		/*
323 		 * iommu_get_msi_cookie() can only be called once per domain,
324 		 * it returns -EBUSY on later calls.
325 		 */
326 		hwpt_paging->msi_cookie = true;
327 	}
328 	return 0;
329 }
330 
331 static int
iommufd_device_attach_reserved_iova(struct iommufd_device * idev,struct iommufd_hwpt_paging * hwpt_paging)332 iommufd_device_attach_reserved_iova(struct iommufd_device *idev,
333 				    struct iommufd_hwpt_paging *hwpt_paging)
334 {
335 	int rc;
336 
337 	lockdep_assert_held(&idev->igroup->lock);
338 
339 	rc = iopt_table_enforce_dev_resv_regions(&hwpt_paging->ioas->iopt,
340 						 idev->dev,
341 						 &idev->igroup->sw_msi_start);
342 	if (rc)
343 		return rc;
344 
345 	if (list_empty(&idev->igroup->device_list)) {
346 		rc = iommufd_group_setup_msi(idev->igroup, hwpt_paging);
347 		if (rc) {
348 			iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt,
349 						  idev->dev);
350 			return rc;
351 		}
352 	}
353 	return 0;
354 }
355 
356 /* The device attach/detach/replace helpers for attach_handle */
357 
358 /* Check if idev is attached to igroup->hwpt */
iommufd_device_is_attached(struct iommufd_device * idev)359 static bool iommufd_device_is_attached(struct iommufd_device *idev)
360 {
361 	struct iommufd_device *cur;
362 
363 	list_for_each_entry(cur, &idev->igroup->device_list, group_item)
364 		if (cur == idev)
365 			return true;
366 	return false;
367 }
368 
iommufd_hwpt_attach_device(struct iommufd_hw_pagetable * hwpt,struct iommufd_device * idev)369 static int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt,
370 				      struct iommufd_device *idev)
371 {
372 	struct iommufd_attach_handle *handle;
373 	int rc;
374 
375 	lockdep_assert_held(&idev->igroup->lock);
376 
377 	handle = kzalloc(sizeof(*handle), GFP_KERNEL);
378 	if (!handle)
379 		return -ENOMEM;
380 
381 	if (hwpt->fault) {
382 		rc = iommufd_fault_iopf_enable(idev);
383 		if (rc)
384 			goto out_free_handle;
385 	}
386 
387 	handle->idev = idev;
388 	rc = iommu_attach_group_handle(hwpt->domain, idev->igroup->group,
389 				       &handle->handle);
390 	if (rc)
391 		goto out_disable_iopf;
392 
393 	return 0;
394 
395 out_disable_iopf:
396 	if (hwpt->fault)
397 		iommufd_fault_iopf_disable(idev);
398 out_free_handle:
399 	kfree(handle);
400 	return rc;
401 }
402 
403 static struct iommufd_attach_handle *
iommufd_device_get_attach_handle(struct iommufd_device * idev)404 iommufd_device_get_attach_handle(struct iommufd_device *idev)
405 {
406 	struct iommu_attach_handle *handle;
407 
408 	lockdep_assert_held(&idev->igroup->lock);
409 
410 	handle =
411 		iommu_attach_handle_get(idev->igroup->group, IOMMU_NO_PASID, 0);
412 	if (IS_ERR(handle))
413 		return NULL;
414 	return to_iommufd_handle(handle);
415 }
416 
iommufd_hwpt_detach_device(struct iommufd_hw_pagetable * hwpt,struct iommufd_device * idev)417 static void iommufd_hwpt_detach_device(struct iommufd_hw_pagetable *hwpt,
418 				       struct iommufd_device *idev)
419 {
420 	struct iommufd_attach_handle *handle;
421 
422 	handle = iommufd_device_get_attach_handle(idev);
423 	iommu_detach_group_handle(hwpt->domain, idev->igroup->group);
424 	if (hwpt->fault) {
425 		iommufd_auto_response_faults(hwpt, handle);
426 		iommufd_fault_iopf_disable(idev);
427 	}
428 	kfree(handle);
429 }
430 
iommufd_hwpt_replace_device(struct iommufd_device * idev,struct iommufd_hw_pagetable * hwpt,struct iommufd_hw_pagetable * old)431 static int iommufd_hwpt_replace_device(struct iommufd_device *idev,
432 				       struct iommufd_hw_pagetable *hwpt,
433 				       struct iommufd_hw_pagetable *old)
434 {
435 	struct iommufd_attach_handle *handle, *old_handle =
436 		iommufd_device_get_attach_handle(idev);
437 	int rc;
438 
439 	handle = kzalloc(sizeof(*handle), GFP_KERNEL);
440 	if (!handle)
441 		return -ENOMEM;
442 
443 	if (hwpt->fault && !old->fault) {
444 		rc = iommufd_fault_iopf_enable(idev);
445 		if (rc)
446 			goto out_free_handle;
447 	}
448 
449 	handle->idev = idev;
450 	rc = iommu_replace_group_handle(idev->igroup->group, hwpt->domain,
451 					&handle->handle);
452 	if (rc)
453 		goto out_disable_iopf;
454 
455 	if (old->fault) {
456 		iommufd_auto_response_faults(hwpt, old_handle);
457 		if (!hwpt->fault)
458 			iommufd_fault_iopf_disable(idev);
459 	}
460 	kfree(old_handle);
461 
462 	return 0;
463 
464 out_disable_iopf:
465 	if (hwpt->fault && !old->fault)
466 		iommufd_fault_iopf_disable(idev);
467 out_free_handle:
468 	kfree(handle);
469 	return rc;
470 }
471 
iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable * hwpt,struct iommufd_device * idev)472 int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
473 				struct iommufd_device *idev)
474 {
475 	struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt);
476 	int rc;
477 
478 	mutex_lock(&idev->igroup->lock);
479 
480 	if (idev->igroup->hwpt != NULL && idev->igroup->hwpt != hwpt) {
481 		rc = -EINVAL;
482 		goto err_unlock;
483 	}
484 
485 	if (hwpt_paging) {
486 		rc = iommufd_device_attach_reserved_iova(idev, hwpt_paging);
487 		if (rc)
488 			goto err_unlock;
489 	}
490 
491 	/*
492 	 * Only attach to the group once for the first device that is in the
493 	 * group. All the other devices will follow this attachment. The user
494 	 * should attach every device individually to the hwpt as the per-device
495 	 * reserved regions are only updated during individual device
496 	 * attachment.
497 	 */
498 	if (list_empty(&idev->igroup->device_list)) {
499 		rc = iommufd_hwpt_attach_device(hwpt, idev);
500 		if (rc)
501 			goto err_unresv;
502 		idev->igroup->hwpt = hwpt;
503 	}
504 	refcount_inc(&hwpt->obj.users);
505 	list_add_tail(&idev->group_item, &idev->igroup->device_list);
506 	mutex_unlock(&idev->igroup->lock);
507 	return 0;
508 err_unresv:
509 	if (hwpt_paging)
510 		iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev);
511 err_unlock:
512 	mutex_unlock(&idev->igroup->lock);
513 	return rc;
514 }
515 
516 struct iommufd_hw_pagetable *
iommufd_hw_pagetable_detach(struct iommufd_device * idev)517 iommufd_hw_pagetable_detach(struct iommufd_device *idev)
518 {
519 	struct iommufd_hw_pagetable *hwpt = idev->igroup->hwpt;
520 	struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt);
521 
522 	mutex_lock(&idev->igroup->lock);
523 	list_del(&idev->group_item);
524 	if (list_empty(&idev->igroup->device_list)) {
525 		iommufd_hwpt_detach_device(hwpt, idev);
526 		idev->igroup->hwpt = NULL;
527 	}
528 	if (hwpt_paging)
529 		iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev);
530 	mutex_unlock(&idev->igroup->lock);
531 
532 	/* Caller must destroy hwpt */
533 	return hwpt;
534 }
535 
536 static struct iommufd_hw_pagetable *
iommufd_device_do_attach(struct iommufd_device * idev,struct iommufd_hw_pagetable * hwpt)537 iommufd_device_do_attach(struct iommufd_device *idev,
538 			 struct iommufd_hw_pagetable *hwpt)
539 {
540 	int rc;
541 
542 	rc = iommufd_hw_pagetable_attach(hwpt, idev);
543 	if (rc)
544 		return ERR_PTR(rc);
545 	return NULL;
546 }
547 
548 static void
iommufd_group_remove_reserved_iova(struct iommufd_group * igroup,struct iommufd_hwpt_paging * hwpt_paging)549 iommufd_group_remove_reserved_iova(struct iommufd_group *igroup,
550 				   struct iommufd_hwpt_paging *hwpt_paging)
551 {
552 	struct iommufd_device *cur;
553 
554 	lockdep_assert_held(&igroup->lock);
555 
556 	list_for_each_entry(cur, &igroup->device_list, group_item)
557 		iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, cur->dev);
558 }
559 
560 static int
iommufd_group_do_replace_reserved_iova(struct iommufd_group * igroup,struct iommufd_hwpt_paging * hwpt_paging)561 iommufd_group_do_replace_reserved_iova(struct iommufd_group *igroup,
562 				       struct iommufd_hwpt_paging *hwpt_paging)
563 {
564 	struct iommufd_hwpt_paging *old_hwpt_paging;
565 	struct iommufd_device *cur;
566 	int rc;
567 
568 	lockdep_assert_held(&igroup->lock);
569 
570 	old_hwpt_paging = find_hwpt_paging(igroup->hwpt);
571 	if (!old_hwpt_paging || hwpt_paging->ioas != old_hwpt_paging->ioas) {
572 		list_for_each_entry(cur, &igroup->device_list, group_item) {
573 			rc = iopt_table_enforce_dev_resv_regions(
574 				&hwpt_paging->ioas->iopt, cur->dev, NULL);
575 			if (rc)
576 				goto err_unresv;
577 		}
578 	}
579 
580 	rc = iommufd_group_setup_msi(igroup, hwpt_paging);
581 	if (rc)
582 		goto err_unresv;
583 	return 0;
584 
585 err_unresv:
586 	iommufd_group_remove_reserved_iova(igroup, hwpt_paging);
587 	return rc;
588 }
589 
590 static struct iommufd_hw_pagetable *
iommufd_device_do_replace(struct iommufd_device * idev,struct iommufd_hw_pagetable * hwpt)591 iommufd_device_do_replace(struct iommufd_device *idev,
592 			  struct iommufd_hw_pagetable *hwpt)
593 {
594 	struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt);
595 	struct iommufd_hwpt_paging *old_hwpt_paging;
596 	struct iommufd_group *igroup = idev->igroup;
597 	struct iommufd_hw_pagetable *old_hwpt;
598 	unsigned int num_devices;
599 	int rc;
600 
601 	mutex_lock(&idev->igroup->lock);
602 
603 	if (igroup->hwpt == NULL) {
604 		rc = -EINVAL;
605 		goto err_unlock;
606 	}
607 
608 	if (!iommufd_device_is_attached(idev)) {
609 		rc = -EINVAL;
610 		goto err_unlock;
611 	}
612 
613 	if (hwpt == igroup->hwpt) {
614 		mutex_unlock(&idev->igroup->lock);
615 		return NULL;
616 	}
617 
618 	old_hwpt = igroup->hwpt;
619 	if (hwpt_paging) {
620 		rc = iommufd_group_do_replace_reserved_iova(igroup, hwpt_paging);
621 		if (rc)
622 			goto err_unlock;
623 	}
624 
625 	rc = iommufd_hwpt_replace_device(idev, hwpt, old_hwpt);
626 	if (rc)
627 		goto err_unresv;
628 
629 	old_hwpt_paging = find_hwpt_paging(old_hwpt);
630 	if (old_hwpt_paging &&
631 	    (!hwpt_paging || hwpt_paging->ioas != old_hwpt_paging->ioas))
632 		iommufd_group_remove_reserved_iova(igroup, old_hwpt_paging);
633 
634 	igroup->hwpt = hwpt;
635 
636 	num_devices = list_count_nodes(&igroup->device_list);
637 	/*
638 	 * Move the refcounts held by the device_list to the new hwpt. Retain a
639 	 * refcount for this thread as the caller will free it.
640 	 */
641 	refcount_add(num_devices, &hwpt->obj.users);
642 	if (num_devices > 1)
643 		WARN_ON(refcount_sub_and_test(num_devices - 1,
644 					      &old_hwpt->obj.users));
645 	mutex_unlock(&idev->igroup->lock);
646 
647 	/* Caller must destroy old_hwpt */
648 	return old_hwpt;
649 err_unresv:
650 	if (hwpt_paging)
651 		iommufd_group_remove_reserved_iova(igroup, hwpt_paging);
652 err_unlock:
653 	mutex_unlock(&idev->igroup->lock);
654 	return ERR_PTR(rc);
655 }
656 
657 typedef struct iommufd_hw_pagetable *(*attach_fn)(
658 	struct iommufd_device *idev, struct iommufd_hw_pagetable *hwpt);
659 
660 /*
661  * When automatically managing the domains we search for a compatible domain in
662  * the iopt and if one is found use it, otherwise create a new domain.
663  * Automatic domain selection will never pick a manually created domain.
664  */
665 static struct iommufd_hw_pagetable *
iommufd_device_auto_get_domain(struct iommufd_device * idev,struct iommufd_ioas * ioas,u32 * pt_id,attach_fn do_attach)666 iommufd_device_auto_get_domain(struct iommufd_device *idev,
667 			       struct iommufd_ioas *ioas, u32 *pt_id,
668 			       attach_fn do_attach)
669 {
670 	/*
671 	 * iommufd_hw_pagetable_attach() is called by
672 	 * iommufd_hw_pagetable_alloc() in immediate attachment mode, same as
673 	 * iommufd_device_do_attach(). So if we are in this mode then we prefer
674 	 * to use the immediate_attach path as it supports drivers that can't
675 	 * directly allocate a domain.
676 	 */
677 	bool immediate_attach = do_attach == iommufd_device_do_attach;
678 	struct iommufd_hw_pagetable *destroy_hwpt;
679 	struct iommufd_hwpt_paging *hwpt_paging;
680 	struct iommufd_hw_pagetable *hwpt;
681 
682 	/*
683 	 * There is no differentiation when domains are allocated, so any domain
684 	 * that is willing to attach to the device is interchangeable with any
685 	 * other.
686 	 */
687 	mutex_lock(&ioas->mutex);
688 	list_for_each_entry(hwpt_paging, &ioas->hwpt_list, hwpt_item) {
689 		if (!hwpt_paging->auto_domain)
690 			continue;
691 
692 		hwpt = &hwpt_paging->common;
693 		if (!iommufd_lock_obj(&hwpt->obj))
694 			continue;
695 		destroy_hwpt = (*do_attach)(idev, hwpt);
696 		if (IS_ERR(destroy_hwpt)) {
697 			iommufd_put_object(idev->ictx, &hwpt->obj);
698 			/*
699 			 * -EINVAL means the domain is incompatible with the
700 			 * device. Other error codes should propagate to
701 			 * userspace as failure. Success means the domain is
702 			 * attached.
703 			 */
704 			if (PTR_ERR(destroy_hwpt) == -EINVAL)
705 				continue;
706 			goto out_unlock;
707 		}
708 		*pt_id = hwpt->obj.id;
709 		iommufd_put_object(idev->ictx, &hwpt->obj);
710 		goto out_unlock;
711 	}
712 
713 	hwpt_paging = iommufd_hwpt_paging_alloc(idev->ictx, ioas, idev, 0,
714 						immediate_attach, NULL);
715 	if (IS_ERR(hwpt_paging)) {
716 		destroy_hwpt = ERR_CAST(hwpt_paging);
717 		goto out_unlock;
718 	}
719 	hwpt = &hwpt_paging->common;
720 
721 	if (!immediate_attach) {
722 		destroy_hwpt = (*do_attach)(idev, hwpt);
723 		if (IS_ERR(destroy_hwpt))
724 			goto out_abort;
725 	} else {
726 		destroy_hwpt = NULL;
727 	}
728 
729 	hwpt_paging->auto_domain = true;
730 	*pt_id = hwpt->obj.id;
731 
732 	iommufd_object_finalize(idev->ictx, &hwpt->obj);
733 	mutex_unlock(&ioas->mutex);
734 	return destroy_hwpt;
735 
736 out_abort:
737 	iommufd_object_abort_and_destroy(idev->ictx, &hwpt->obj);
738 out_unlock:
739 	mutex_unlock(&ioas->mutex);
740 	return destroy_hwpt;
741 }
742 
iommufd_device_change_pt(struct iommufd_device * idev,u32 * pt_id,attach_fn do_attach)743 static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id,
744 				    attach_fn do_attach)
745 {
746 	struct iommufd_hw_pagetable *destroy_hwpt;
747 	struct iommufd_object *pt_obj;
748 
749 	pt_obj = iommufd_get_object(idev->ictx, *pt_id, IOMMUFD_OBJ_ANY);
750 	if (IS_ERR(pt_obj))
751 		return PTR_ERR(pt_obj);
752 
753 	switch (pt_obj->type) {
754 	case IOMMUFD_OBJ_HWPT_NESTED:
755 	case IOMMUFD_OBJ_HWPT_PAGING: {
756 		struct iommufd_hw_pagetable *hwpt =
757 			container_of(pt_obj, struct iommufd_hw_pagetable, obj);
758 
759 		destroy_hwpt = (*do_attach)(idev, hwpt);
760 		if (IS_ERR(destroy_hwpt))
761 			goto out_put_pt_obj;
762 		break;
763 	}
764 	case IOMMUFD_OBJ_IOAS: {
765 		struct iommufd_ioas *ioas =
766 			container_of(pt_obj, struct iommufd_ioas, obj);
767 
768 		destroy_hwpt = iommufd_device_auto_get_domain(idev, ioas, pt_id,
769 							      do_attach);
770 		if (IS_ERR(destroy_hwpt))
771 			goto out_put_pt_obj;
772 		break;
773 	}
774 	default:
775 		destroy_hwpt = ERR_PTR(-EINVAL);
776 		goto out_put_pt_obj;
777 	}
778 	iommufd_put_object(idev->ictx, pt_obj);
779 
780 	/* This destruction has to be after we unlock everything */
781 	if (destroy_hwpt)
782 		iommufd_hw_pagetable_put(idev->ictx, destroy_hwpt);
783 	return 0;
784 
785 out_put_pt_obj:
786 	iommufd_put_object(idev->ictx, pt_obj);
787 	return PTR_ERR(destroy_hwpt);
788 }
789 
790 /**
791  * iommufd_device_attach - Connect a device to an iommu_domain
792  * @idev: device to attach
793  * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HWPT_PAGING
794  *         Output the IOMMUFD_OBJ_HWPT_PAGING ID
795  *
796  * This connects the device to an iommu_domain, either automatically or manually
797  * selected. Once this completes the device could do DMA.
798  *
799  * The caller should return the resulting pt_id back to userspace.
800  * This function is undone by calling iommufd_device_detach().
801  */
iommufd_device_attach(struct iommufd_device * idev,u32 * pt_id)802 int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id)
803 {
804 	int rc;
805 
806 	rc = iommufd_device_change_pt(idev, pt_id, &iommufd_device_do_attach);
807 	if (rc)
808 		return rc;
809 
810 	/*
811 	 * Pairs with iommufd_device_detach() - catches caller bugs attempting
812 	 * to destroy a device with an attachment.
813 	 */
814 	refcount_inc(&idev->obj.users);
815 	return 0;
816 }
817 EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, IOMMUFD);
818 
819 /**
820  * iommufd_device_replace - Change the device's iommu_domain
821  * @idev: device to change
822  * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HWPT_PAGING
823  *         Output the IOMMUFD_OBJ_HWPT_PAGING ID
824  *
825  * This is the same as::
826  *
827  *   iommufd_device_detach();
828  *   iommufd_device_attach();
829  *
830  * If it fails then no change is made to the attachment. The iommu driver may
831  * implement this so there is no disruption in translation. This can only be
832  * called if iommufd_device_attach() has already succeeded.
833  */
iommufd_device_replace(struct iommufd_device * idev,u32 * pt_id)834 int iommufd_device_replace(struct iommufd_device *idev, u32 *pt_id)
835 {
836 	return iommufd_device_change_pt(idev, pt_id,
837 					&iommufd_device_do_replace);
838 }
839 EXPORT_SYMBOL_NS_GPL(iommufd_device_replace, IOMMUFD);
840 
841 /**
842  * iommufd_device_detach - Disconnect a device to an iommu_domain
843  * @idev: device to detach
844  *
845  * Undo iommufd_device_attach(). This disconnects the idev from the previously
846  * attached pt_id. The device returns back to a blocked DMA translation.
847  */
iommufd_device_detach(struct iommufd_device * idev)848 void iommufd_device_detach(struct iommufd_device *idev)
849 {
850 	struct iommufd_hw_pagetable *hwpt;
851 
852 	hwpt = iommufd_hw_pagetable_detach(idev);
853 	iommufd_hw_pagetable_put(idev->ictx, hwpt);
854 	refcount_dec(&idev->obj.users);
855 }
856 EXPORT_SYMBOL_NS_GPL(iommufd_device_detach, IOMMUFD);
857 
858 /*
859  * On success, it will refcount_inc() at a valid new_ioas and refcount_dec() at
860  * a valid cur_ioas (access->ioas). A caller passing in a valid new_ioas should
861  * call iommufd_put_object() if it does an iommufd_get_object() for a new_ioas.
862  */
iommufd_access_change_ioas(struct iommufd_access * access,struct iommufd_ioas * new_ioas)863 static int iommufd_access_change_ioas(struct iommufd_access *access,
864 				      struct iommufd_ioas *new_ioas)
865 {
866 	u32 iopt_access_list_id = access->iopt_access_list_id;
867 	struct iommufd_ioas *cur_ioas = access->ioas;
868 	int rc;
869 
870 	lockdep_assert_held(&access->ioas_lock);
871 
872 	/* We are racing with a concurrent detach, bail */
873 	if (cur_ioas != access->ioas_unpin)
874 		return -EBUSY;
875 
876 	if (cur_ioas == new_ioas)
877 		return 0;
878 
879 	/*
880 	 * Set ioas to NULL to block any further iommufd_access_pin_pages().
881 	 * iommufd_access_unpin_pages() can continue using access->ioas_unpin.
882 	 */
883 	access->ioas = NULL;
884 
885 	if (new_ioas) {
886 		rc = iopt_add_access(&new_ioas->iopt, access);
887 		if (rc) {
888 			access->ioas = cur_ioas;
889 			return rc;
890 		}
891 		refcount_inc(&new_ioas->obj.users);
892 	}
893 
894 	if (cur_ioas) {
895 		if (access->ops->unmap) {
896 			mutex_unlock(&access->ioas_lock);
897 			access->ops->unmap(access->data, 0, ULONG_MAX);
898 			mutex_lock(&access->ioas_lock);
899 		}
900 		iopt_remove_access(&cur_ioas->iopt, access, iopt_access_list_id);
901 		refcount_dec(&cur_ioas->obj.users);
902 	}
903 
904 	access->ioas = new_ioas;
905 	access->ioas_unpin = new_ioas;
906 
907 	return 0;
908 }
909 
iommufd_access_change_ioas_id(struct iommufd_access * access,u32 id)910 static int iommufd_access_change_ioas_id(struct iommufd_access *access, u32 id)
911 {
912 	struct iommufd_ioas *ioas = iommufd_get_ioas(access->ictx, id);
913 	int rc;
914 
915 	if (IS_ERR(ioas))
916 		return PTR_ERR(ioas);
917 	rc = iommufd_access_change_ioas(access, ioas);
918 	iommufd_put_object(access->ictx, &ioas->obj);
919 	return rc;
920 }
921 
iommufd_access_destroy_object(struct iommufd_object * obj)922 void iommufd_access_destroy_object(struct iommufd_object *obj)
923 {
924 	struct iommufd_access *access =
925 		container_of(obj, struct iommufd_access, obj);
926 
927 	mutex_lock(&access->ioas_lock);
928 	if (access->ioas)
929 		WARN_ON(iommufd_access_change_ioas(access, NULL));
930 	mutex_unlock(&access->ioas_lock);
931 	iommufd_ctx_put(access->ictx);
932 }
933 
934 /**
935  * iommufd_access_create - Create an iommufd_access
936  * @ictx: iommufd file descriptor
937  * @ops: Driver's ops to associate with the access
938  * @data: Opaque data to pass into ops functions
939  * @id: Output ID number to return to userspace for this access
940  *
941  * An iommufd_access allows a driver to read/write to the IOAS without using
942  * DMA. The underlying CPU memory can be accessed using the
943  * iommufd_access_pin_pages() or iommufd_access_rw() functions.
944  *
945  * The provided ops are required to use iommufd_access_pin_pages().
946  */
947 struct iommufd_access *
iommufd_access_create(struct iommufd_ctx * ictx,const struct iommufd_access_ops * ops,void * data,u32 * id)948 iommufd_access_create(struct iommufd_ctx *ictx,
949 		      const struct iommufd_access_ops *ops, void *data, u32 *id)
950 {
951 	struct iommufd_access *access;
952 
953 	/*
954 	 * There is no uAPI for the access object, but to keep things symmetric
955 	 * use the object infrastructure anyhow.
956 	 */
957 	access = iommufd_object_alloc(ictx, access, IOMMUFD_OBJ_ACCESS);
958 	if (IS_ERR(access))
959 		return access;
960 
961 	access->data = data;
962 	access->ops = ops;
963 
964 	if (ops->needs_pin_pages)
965 		access->iova_alignment = PAGE_SIZE;
966 	else
967 		access->iova_alignment = 1;
968 
969 	/* The calling driver is a user until iommufd_access_destroy() */
970 	refcount_inc(&access->obj.users);
971 	access->ictx = ictx;
972 	iommufd_ctx_get(ictx);
973 	iommufd_object_finalize(ictx, &access->obj);
974 	*id = access->obj.id;
975 	mutex_init(&access->ioas_lock);
976 	return access;
977 }
978 EXPORT_SYMBOL_NS_GPL(iommufd_access_create, IOMMUFD);
979 
980 /**
981  * iommufd_access_destroy - Destroy an iommufd_access
982  * @access: The access to destroy
983  *
984  * The caller must stop using the access before destroying it.
985  */
iommufd_access_destroy(struct iommufd_access * access)986 void iommufd_access_destroy(struct iommufd_access *access)
987 {
988 	iommufd_object_destroy_user(access->ictx, &access->obj);
989 }
990 EXPORT_SYMBOL_NS_GPL(iommufd_access_destroy, IOMMUFD);
991 
iommufd_access_detach(struct iommufd_access * access)992 void iommufd_access_detach(struct iommufd_access *access)
993 {
994 	mutex_lock(&access->ioas_lock);
995 	if (WARN_ON(!access->ioas)) {
996 		mutex_unlock(&access->ioas_lock);
997 		return;
998 	}
999 	WARN_ON(iommufd_access_change_ioas(access, NULL));
1000 	mutex_unlock(&access->ioas_lock);
1001 }
1002 EXPORT_SYMBOL_NS_GPL(iommufd_access_detach, IOMMUFD);
1003 
iommufd_access_attach(struct iommufd_access * access,u32 ioas_id)1004 int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id)
1005 {
1006 	int rc;
1007 
1008 	mutex_lock(&access->ioas_lock);
1009 	if (WARN_ON(access->ioas)) {
1010 		mutex_unlock(&access->ioas_lock);
1011 		return -EINVAL;
1012 	}
1013 
1014 	rc = iommufd_access_change_ioas_id(access, ioas_id);
1015 	mutex_unlock(&access->ioas_lock);
1016 	return rc;
1017 }
1018 EXPORT_SYMBOL_NS_GPL(iommufd_access_attach, IOMMUFD);
1019 
iommufd_access_replace(struct iommufd_access * access,u32 ioas_id)1020 int iommufd_access_replace(struct iommufd_access *access, u32 ioas_id)
1021 {
1022 	int rc;
1023 
1024 	mutex_lock(&access->ioas_lock);
1025 	if (!access->ioas) {
1026 		mutex_unlock(&access->ioas_lock);
1027 		return -ENOENT;
1028 	}
1029 	rc = iommufd_access_change_ioas_id(access, ioas_id);
1030 	mutex_unlock(&access->ioas_lock);
1031 	return rc;
1032 }
1033 EXPORT_SYMBOL_NS_GPL(iommufd_access_replace, IOMMUFD);
1034 
1035 /**
1036  * iommufd_access_notify_unmap - Notify users of an iopt to stop using it
1037  * @iopt: iopt to work on
1038  * @iova: Starting iova in the iopt
1039  * @length: Number of bytes
1040  *
1041  * After this function returns there should be no users attached to the pages
1042  * linked to this iopt that intersect with iova,length. Anyone that has attached
1043  * a user through iopt_access_pages() needs to detach it through
1044  * iommufd_access_unpin_pages() before this function returns.
1045  *
1046  * iommufd_access_destroy() will wait for any outstanding unmap callback to
1047  * complete. Once iommufd_access_destroy() no unmap ops are running or will
1048  * run in the future. Due to this a driver must not create locking that prevents
1049  * unmap to complete while iommufd_access_destroy() is running.
1050  */
iommufd_access_notify_unmap(struct io_pagetable * iopt,unsigned long iova,unsigned long length)1051 void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova,
1052 				 unsigned long length)
1053 {
1054 	struct iommufd_ioas *ioas =
1055 		container_of(iopt, struct iommufd_ioas, iopt);
1056 	struct iommufd_access *access;
1057 	unsigned long index;
1058 
1059 	xa_lock(&ioas->iopt.access_list);
1060 	xa_for_each(&ioas->iopt.access_list, index, access) {
1061 		if (!iommufd_lock_obj(&access->obj))
1062 			continue;
1063 		xa_unlock(&ioas->iopt.access_list);
1064 
1065 		access->ops->unmap(access->data, iova, length);
1066 
1067 		iommufd_put_object(access->ictx, &access->obj);
1068 		xa_lock(&ioas->iopt.access_list);
1069 	}
1070 	xa_unlock(&ioas->iopt.access_list);
1071 }
1072 
1073 /**
1074  * iommufd_access_unpin_pages() - Undo iommufd_access_pin_pages
1075  * @access: IOAS access to act on
1076  * @iova: Starting IOVA
1077  * @length: Number of bytes to access
1078  *
1079  * Return the struct page's. The caller must stop accessing them before calling
1080  * this. The iova/length must exactly match the one provided to access_pages.
1081  */
iommufd_access_unpin_pages(struct iommufd_access * access,unsigned long iova,unsigned long length)1082 void iommufd_access_unpin_pages(struct iommufd_access *access,
1083 				unsigned long iova, unsigned long length)
1084 {
1085 	struct iopt_area_contig_iter iter;
1086 	struct io_pagetable *iopt;
1087 	unsigned long last_iova;
1088 	struct iopt_area *area;
1089 
1090 	if (WARN_ON(!length) ||
1091 	    WARN_ON(check_add_overflow(iova, length - 1, &last_iova)))
1092 		return;
1093 
1094 	mutex_lock(&access->ioas_lock);
1095 	/*
1096 	 * The driver must be doing something wrong if it calls this before an
1097 	 * iommufd_access_attach() or after an iommufd_access_detach().
1098 	 */
1099 	if (WARN_ON(!access->ioas_unpin)) {
1100 		mutex_unlock(&access->ioas_lock);
1101 		return;
1102 	}
1103 	iopt = &access->ioas_unpin->iopt;
1104 
1105 	down_read(&iopt->iova_rwsem);
1106 	iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova)
1107 		iopt_area_remove_access(
1108 			area, iopt_area_iova_to_index(area, iter.cur_iova),
1109 			iopt_area_iova_to_index(
1110 				area,
1111 				min(last_iova, iopt_area_last_iova(area))));
1112 	WARN_ON(!iopt_area_contig_done(&iter));
1113 	up_read(&iopt->iova_rwsem);
1114 	mutex_unlock(&access->ioas_lock);
1115 }
1116 EXPORT_SYMBOL_NS_GPL(iommufd_access_unpin_pages, IOMMUFD);
1117 
iopt_area_contig_is_aligned(struct iopt_area_contig_iter * iter)1118 static bool iopt_area_contig_is_aligned(struct iopt_area_contig_iter *iter)
1119 {
1120 	if (iopt_area_start_byte(iter->area, iter->cur_iova) % PAGE_SIZE)
1121 		return false;
1122 
1123 	if (!iopt_area_contig_done(iter) &&
1124 	    (iopt_area_start_byte(iter->area, iopt_area_last_iova(iter->area)) %
1125 	     PAGE_SIZE) != (PAGE_SIZE - 1))
1126 		return false;
1127 	return true;
1128 }
1129 
check_area_prot(struct iopt_area * area,unsigned int flags)1130 static bool check_area_prot(struct iopt_area *area, unsigned int flags)
1131 {
1132 	if (flags & IOMMUFD_ACCESS_RW_WRITE)
1133 		return area->iommu_prot & IOMMU_WRITE;
1134 	return area->iommu_prot & IOMMU_READ;
1135 }
1136 
1137 /**
1138  * iommufd_access_pin_pages() - Return a list of pages under the iova
1139  * @access: IOAS access to act on
1140  * @iova: Starting IOVA
1141  * @length: Number of bytes to access
1142  * @out_pages: Output page list
1143  * @flags: IOPMMUFD_ACCESS_RW_* flags
1144  *
1145  * Reads @length bytes starting at iova and returns the struct page * pointers.
1146  * These can be kmap'd by the caller for CPU access.
1147  *
1148  * The caller must perform iommufd_access_unpin_pages() when done to balance
1149  * this.
1150  *
1151  * This API always requires a page aligned iova. This happens naturally if the
1152  * ioas alignment is >= PAGE_SIZE and the iova is PAGE_SIZE aligned. However
1153  * smaller alignments have corner cases where this API can fail on otherwise
1154  * aligned iova.
1155  */
iommufd_access_pin_pages(struct iommufd_access * access,unsigned long iova,unsigned long length,struct page ** out_pages,unsigned int flags)1156 int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova,
1157 			     unsigned long length, struct page **out_pages,
1158 			     unsigned int flags)
1159 {
1160 	struct iopt_area_contig_iter iter;
1161 	struct io_pagetable *iopt;
1162 	unsigned long last_iova;
1163 	struct iopt_area *area;
1164 	int rc;
1165 
1166 	/* Driver's ops don't support pin_pages */
1167 	if (IS_ENABLED(CONFIG_IOMMUFD_TEST) &&
1168 	    WARN_ON(access->iova_alignment != PAGE_SIZE || !access->ops->unmap))
1169 		return -EINVAL;
1170 
1171 	if (!length)
1172 		return -EINVAL;
1173 	if (check_add_overflow(iova, length - 1, &last_iova))
1174 		return -EOVERFLOW;
1175 
1176 	mutex_lock(&access->ioas_lock);
1177 	if (!access->ioas) {
1178 		mutex_unlock(&access->ioas_lock);
1179 		return -ENOENT;
1180 	}
1181 	iopt = &access->ioas->iopt;
1182 
1183 	down_read(&iopt->iova_rwsem);
1184 	iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) {
1185 		unsigned long last = min(last_iova, iopt_area_last_iova(area));
1186 		unsigned long last_index = iopt_area_iova_to_index(area, last);
1187 		unsigned long index =
1188 			iopt_area_iova_to_index(area, iter.cur_iova);
1189 
1190 		if (area->prevent_access ||
1191 		    !iopt_area_contig_is_aligned(&iter)) {
1192 			rc = -EINVAL;
1193 			goto err_remove;
1194 		}
1195 
1196 		if (!check_area_prot(area, flags)) {
1197 			rc = -EPERM;
1198 			goto err_remove;
1199 		}
1200 
1201 		rc = iopt_area_add_access(area, index, last_index, out_pages,
1202 					  flags);
1203 		if (rc)
1204 			goto err_remove;
1205 		out_pages += last_index - index + 1;
1206 	}
1207 	if (!iopt_area_contig_done(&iter)) {
1208 		rc = -ENOENT;
1209 		goto err_remove;
1210 	}
1211 
1212 	up_read(&iopt->iova_rwsem);
1213 	mutex_unlock(&access->ioas_lock);
1214 	return 0;
1215 
1216 err_remove:
1217 	if (iova < iter.cur_iova) {
1218 		last_iova = iter.cur_iova - 1;
1219 		iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova)
1220 			iopt_area_remove_access(
1221 				area,
1222 				iopt_area_iova_to_index(area, iter.cur_iova),
1223 				iopt_area_iova_to_index(
1224 					area, min(last_iova,
1225 						  iopt_area_last_iova(area))));
1226 	}
1227 	up_read(&iopt->iova_rwsem);
1228 	mutex_unlock(&access->ioas_lock);
1229 	return rc;
1230 }
1231 EXPORT_SYMBOL_NS_GPL(iommufd_access_pin_pages, IOMMUFD);
1232 
1233 /**
1234  * iommufd_access_rw - Read or write data under the iova
1235  * @access: IOAS access to act on
1236  * @iova: Starting IOVA
1237  * @data: Kernel buffer to copy to/from
1238  * @length: Number of bytes to access
1239  * @flags: IOMMUFD_ACCESS_RW_* flags
1240  *
1241  * Copy kernel to/from data into the range given by IOVA/length. If flags
1242  * indicates IOMMUFD_ACCESS_RW_KTHREAD then a large copy can be optimized
1243  * by changing it into copy_to/from_user().
1244  */
iommufd_access_rw(struct iommufd_access * access,unsigned long iova,void * data,size_t length,unsigned int flags)1245 int iommufd_access_rw(struct iommufd_access *access, unsigned long iova,
1246 		      void *data, size_t length, unsigned int flags)
1247 {
1248 	struct iopt_area_contig_iter iter;
1249 	struct io_pagetable *iopt;
1250 	struct iopt_area *area;
1251 	unsigned long last_iova;
1252 	int rc = -EINVAL;
1253 
1254 	if (!length)
1255 		return -EINVAL;
1256 	if (check_add_overflow(iova, length - 1, &last_iova))
1257 		return -EOVERFLOW;
1258 
1259 	mutex_lock(&access->ioas_lock);
1260 	if (!access->ioas) {
1261 		mutex_unlock(&access->ioas_lock);
1262 		return -ENOENT;
1263 	}
1264 	iopt = &access->ioas->iopt;
1265 
1266 	down_read(&iopt->iova_rwsem);
1267 	iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) {
1268 		unsigned long last = min(last_iova, iopt_area_last_iova(area));
1269 		unsigned long bytes = (last - iter.cur_iova) + 1;
1270 
1271 		if (area->prevent_access) {
1272 			rc = -EINVAL;
1273 			goto err_out;
1274 		}
1275 
1276 		if (!check_area_prot(area, flags)) {
1277 			rc = -EPERM;
1278 			goto err_out;
1279 		}
1280 
1281 		rc = iopt_pages_rw_access(
1282 			area->pages, iopt_area_start_byte(area, iter.cur_iova),
1283 			data, bytes, flags);
1284 		if (rc)
1285 			goto err_out;
1286 		data += bytes;
1287 	}
1288 	if (!iopt_area_contig_done(&iter))
1289 		rc = -ENOENT;
1290 err_out:
1291 	up_read(&iopt->iova_rwsem);
1292 	mutex_unlock(&access->ioas_lock);
1293 	return rc;
1294 }
1295 EXPORT_SYMBOL_NS_GPL(iommufd_access_rw, IOMMUFD);
1296 
iommufd_get_hw_info(struct iommufd_ucmd * ucmd)1297 int iommufd_get_hw_info(struct iommufd_ucmd *ucmd)
1298 {
1299 	struct iommu_hw_info *cmd = ucmd->cmd;
1300 	void __user *user_ptr = u64_to_user_ptr(cmd->data_uptr);
1301 	const struct iommu_ops *ops;
1302 	struct iommufd_device *idev;
1303 	unsigned int data_len;
1304 	unsigned int copy_len;
1305 	void *data;
1306 	int rc;
1307 
1308 	if (cmd->flags || cmd->__reserved[0] || cmd->__reserved[1] ||
1309 	    cmd->__reserved[2])
1310 		return -EOPNOTSUPP;
1311 
1312 	idev = iommufd_get_device(ucmd, cmd->dev_id);
1313 	if (IS_ERR(idev))
1314 		return PTR_ERR(idev);
1315 
1316 	ops = dev_iommu_ops(idev->dev);
1317 	if (ops->hw_info) {
1318 		data = ops->hw_info(idev->dev, &data_len, &cmd->out_data_type);
1319 		if (IS_ERR(data)) {
1320 			rc = PTR_ERR(data);
1321 			goto out_put;
1322 		}
1323 
1324 		/*
1325 		 * drivers that have hw_info callback should have a unique
1326 		 * iommu_hw_info_type.
1327 		 */
1328 		if (WARN_ON_ONCE(cmd->out_data_type ==
1329 				 IOMMU_HW_INFO_TYPE_NONE)) {
1330 			rc = -ENODEV;
1331 			goto out_free;
1332 		}
1333 	} else {
1334 		cmd->out_data_type = IOMMU_HW_INFO_TYPE_NONE;
1335 		data_len = 0;
1336 		data = NULL;
1337 	}
1338 
1339 	copy_len = min(cmd->data_len, data_len);
1340 	if (copy_to_user(user_ptr, data, copy_len)) {
1341 		rc = -EFAULT;
1342 		goto out_free;
1343 	}
1344 
1345 	/*
1346 	 * Zero the trailing bytes if the user buffer is bigger than the
1347 	 * data size kernel actually has.
1348 	 */
1349 	if (copy_len < cmd->data_len) {
1350 		if (clear_user(user_ptr + copy_len, cmd->data_len - copy_len)) {
1351 			rc = -EFAULT;
1352 			goto out_free;
1353 		}
1354 	}
1355 
1356 	/*
1357 	 * We return the length the kernel supports so userspace may know what
1358 	 * the kernel capability is. It could be larger than the input buffer.
1359 	 */
1360 	cmd->data_len = data_len;
1361 
1362 	cmd->out_capabilities = 0;
1363 	if (device_iommu_capable(idev->dev, IOMMU_CAP_DIRTY_TRACKING))
1364 		cmd->out_capabilities |= IOMMU_HW_CAP_DIRTY_TRACKING;
1365 
1366 	cmd->out_max_pasid_log2 = 0;
1367 	/*
1368 	 * Currently, all iommu drivers enable PASID in the probe_device()
1369 	 * op if iommu and device supports it. So the max_pasids stored in
1370 	 * dev->iommu indicates both PASID support and enable status. A
1371 	 * non-zero dev->iommu->max_pasids means PASID is supported and
1372 	 * enabled. The iommufd only reports PASID capability to userspace
1373 	 * if it's enabled.
1374 	 */
1375 	if (idev->dev->iommu->max_pasids) {
1376 		cmd->out_max_pasid_log2 = ilog2(idev->dev->iommu->max_pasids);
1377 
1378 		if (dev_is_pci(idev->dev)) {
1379 			struct pci_dev *pdev = to_pci_dev(idev->dev);
1380 			int ctrl;
1381 
1382 			ctrl = pci_pasid_status(pdev);
1383 
1384 			WARN_ON_ONCE(ctrl < 0 ||
1385 				     !(ctrl & PCI_PASID_CTRL_ENABLE));
1386 
1387 			if (ctrl & PCI_PASID_CTRL_EXEC)
1388 				cmd->out_capabilities |=
1389 						IOMMU_HW_CAP_PCI_PASID_EXEC;
1390 			if (ctrl & PCI_PASID_CTRL_PRIV)
1391 				cmd->out_capabilities |=
1392 						IOMMU_HW_CAP_PCI_PASID_PRIV;
1393 		}
1394 	}
1395 
1396 	rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
1397 out_free:
1398 	kfree(data);
1399 out_put:
1400 	iommufd_put_object(ucmd->ictx, &idev->obj);
1401 	return rc;
1402 }
1403