1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
3 */
4 #include <linux/iommu.h>
5 #include <linux/iommufd.h>
6 #include <linux/pci-ats.h>
7 #include <linux/slab.h>
8 #include <uapi/linux/iommufd.h>
9
10 #include "../iommu-priv.h"
11 #include "io_pagetable.h"
12 #include "iommufd_private.h"
13
14 static bool allow_unsafe_interrupts;
15 module_param(allow_unsafe_interrupts, bool, S_IRUGO | S_IWUSR);
16 MODULE_PARM_DESC(
17 allow_unsafe_interrupts,
18 "Allow IOMMUFD to bind to devices even if the platform cannot isolate "
19 "the MSI interrupt window. Enabling this is a security weakness.");
20
iommufd_group_release(struct kref * kref)21 static void iommufd_group_release(struct kref *kref)
22 {
23 struct iommufd_group *igroup =
24 container_of(kref, struct iommufd_group, ref);
25
26 WARN_ON(igroup->hwpt || !list_empty(&igroup->device_list));
27
28 xa_cmpxchg(&igroup->ictx->groups, iommu_group_id(igroup->group), igroup,
29 NULL, GFP_KERNEL);
30 iommu_group_put(igroup->group);
31 mutex_destroy(&igroup->lock);
32 kfree(igroup);
33 }
34
iommufd_put_group(struct iommufd_group * group)35 static void iommufd_put_group(struct iommufd_group *group)
36 {
37 kref_put(&group->ref, iommufd_group_release);
38 }
39
iommufd_group_try_get(struct iommufd_group * igroup,struct iommu_group * group)40 static bool iommufd_group_try_get(struct iommufd_group *igroup,
41 struct iommu_group *group)
42 {
43 if (!igroup)
44 return false;
45 /*
46 * group ID's cannot be re-used until the group is put back which does
47 * not happen if we could get an igroup pointer under the xa_lock.
48 */
49 if (WARN_ON(igroup->group != group))
50 return false;
51 return kref_get_unless_zero(&igroup->ref);
52 }
53
54 /*
55 * iommufd needs to store some more data for each iommu_group, we keep a
56 * parallel xarray indexed by iommu_group id to hold this instead of putting it
57 * in the core structure. To keep things simple the iommufd_group memory is
58 * unique within the iommufd_ctx. This makes it easy to check there are no
59 * memory leaks.
60 */
iommufd_get_group(struct iommufd_ctx * ictx,struct device * dev)61 static struct iommufd_group *iommufd_get_group(struct iommufd_ctx *ictx,
62 struct device *dev)
63 {
64 struct iommufd_group *new_igroup;
65 struct iommufd_group *cur_igroup;
66 struct iommufd_group *igroup;
67 struct iommu_group *group;
68 unsigned int id;
69
70 group = iommu_group_get(dev);
71 if (!group)
72 return ERR_PTR(-ENODEV);
73
74 id = iommu_group_id(group);
75
76 xa_lock(&ictx->groups);
77 igroup = xa_load(&ictx->groups, id);
78 if (iommufd_group_try_get(igroup, group)) {
79 xa_unlock(&ictx->groups);
80 iommu_group_put(group);
81 return igroup;
82 }
83 xa_unlock(&ictx->groups);
84
85 new_igroup = kzalloc(sizeof(*new_igroup), GFP_KERNEL);
86 if (!new_igroup) {
87 iommu_group_put(group);
88 return ERR_PTR(-ENOMEM);
89 }
90
91 kref_init(&new_igroup->ref);
92 mutex_init(&new_igroup->lock);
93 INIT_LIST_HEAD(&new_igroup->device_list);
94 new_igroup->sw_msi_start = PHYS_ADDR_MAX;
95 /* group reference moves into new_igroup */
96 new_igroup->group = group;
97
98 /*
99 * The ictx is not additionally refcounted here becase all objects using
100 * an igroup must put it before their destroy completes.
101 */
102 new_igroup->ictx = ictx;
103
104 /*
105 * We dropped the lock so igroup is invalid. NULL is a safe and likely
106 * value to assume for the xa_cmpxchg algorithm.
107 */
108 cur_igroup = NULL;
109 xa_lock(&ictx->groups);
110 while (true) {
111 igroup = __xa_cmpxchg(&ictx->groups, id, cur_igroup, new_igroup,
112 GFP_KERNEL);
113 if (xa_is_err(igroup)) {
114 xa_unlock(&ictx->groups);
115 iommufd_put_group(new_igroup);
116 return ERR_PTR(xa_err(igroup));
117 }
118
119 /* new_group was successfully installed */
120 if (cur_igroup == igroup) {
121 xa_unlock(&ictx->groups);
122 return new_igroup;
123 }
124
125 /* Check again if the current group is any good */
126 if (iommufd_group_try_get(igroup, group)) {
127 xa_unlock(&ictx->groups);
128 iommufd_put_group(new_igroup);
129 return igroup;
130 }
131 cur_igroup = igroup;
132 }
133 }
134
iommufd_device_destroy(struct iommufd_object * obj)135 void iommufd_device_destroy(struct iommufd_object *obj)
136 {
137 struct iommufd_device *idev =
138 container_of(obj, struct iommufd_device, obj);
139
140 iommu_device_release_dma_owner(idev->dev);
141 iommufd_put_group(idev->igroup);
142 if (!iommufd_selftest_is_mock_dev(idev->dev))
143 iommufd_ctx_put(idev->ictx);
144 }
145
146 /**
147 * iommufd_device_bind - Bind a physical device to an iommu fd
148 * @ictx: iommufd file descriptor
149 * @dev: Pointer to a physical device struct
150 * @id: Output ID number to return to userspace for this device
151 *
152 * A successful bind establishes an ownership over the device and returns
153 * struct iommufd_device pointer, otherwise returns error pointer.
154 *
155 * A driver using this API must set driver_managed_dma and must not touch
156 * the device until this routine succeeds and establishes ownership.
157 *
158 * Binding a PCI device places the entire RID under iommufd control.
159 *
160 * The caller must undo this with iommufd_device_unbind()
161 */
iommufd_device_bind(struct iommufd_ctx * ictx,struct device * dev,u32 * id)162 struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx,
163 struct device *dev, u32 *id)
164 {
165 struct iommufd_device *idev;
166 struct iommufd_group *igroup;
167 int rc;
168
169 /*
170 * iommufd always sets IOMMU_CACHE because we offer no way for userspace
171 * to restore cache coherency.
172 */
173 if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY))
174 return ERR_PTR(-EINVAL);
175
176 igroup = iommufd_get_group(ictx, dev);
177 if (IS_ERR(igroup))
178 return ERR_CAST(igroup);
179
180 /*
181 * For historical compat with VFIO the insecure interrupt path is
182 * allowed if the module parameter is set. Secure/Isolated means that a
183 * MemWr operation from the device (eg a simple DMA) cannot trigger an
184 * interrupt outside this iommufd context.
185 */
186 if (!iommufd_selftest_is_mock_dev(dev) &&
187 !iommu_group_has_isolated_msi(igroup->group)) {
188 if (!allow_unsafe_interrupts) {
189 rc = -EPERM;
190 goto out_group_put;
191 }
192
193 dev_warn(
194 dev,
195 "MSI interrupts are not secure, they cannot be isolated by the platform. "
196 "Check that platform features like interrupt remapping are enabled. "
197 "Use the \"allow_unsafe_interrupts\" module parameter to override\n");
198 }
199
200 rc = iommu_device_claim_dma_owner(dev, ictx);
201 if (rc)
202 goto out_group_put;
203
204 idev = iommufd_object_alloc(ictx, idev, IOMMUFD_OBJ_DEVICE);
205 if (IS_ERR(idev)) {
206 rc = PTR_ERR(idev);
207 goto out_release_owner;
208 }
209 idev->ictx = ictx;
210 if (!iommufd_selftest_is_mock_dev(dev))
211 iommufd_ctx_get(ictx);
212 idev->dev = dev;
213 idev->enforce_cache_coherency =
214 device_iommu_capable(dev, IOMMU_CAP_ENFORCE_CACHE_COHERENCY);
215 /* The calling driver is a user until iommufd_device_unbind() */
216 refcount_inc(&idev->obj.users);
217 /* igroup refcount moves into iommufd_device */
218 idev->igroup = igroup;
219 mutex_init(&idev->iopf_lock);
220
221 /*
222 * If the caller fails after this success it must call
223 * iommufd_unbind_device() which is safe since we hold this refcount.
224 * This also means the device is a leaf in the graph and no other object
225 * can take a reference on it.
226 */
227 iommufd_object_finalize(ictx, &idev->obj);
228 *id = idev->obj.id;
229 return idev;
230
231 out_release_owner:
232 iommu_device_release_dma_owner(dev);
233 out_group_put:
234 iommufd_put_group(igroup);
235 return ERR_PTR(rc);
236 }
237 EXPORT_SYMBOL_NS_GPL(iommufd_device_bind, IOMMUFD);
238
239 /**
240 * iommufd_ctx_has_group - True if any device within the group is bound
241 * to the ictx
242 * @ictx: iommufd file descriptor
243 * @group: Pointer to a physical iommu_group struct
244 *
245 * True if any device within the group has been bound to this ictx, ex. via
246 * iommufd_device_bind(), therefore implying ictx ownership of the group.
247 */
iommufd_ctx_has_group(struct iommufd_ctx * ictx,struct iommu_group * group)248 bool iommufd_ctx_has_group(struct iommufd_ctx *ictx, struct iommu_group *group)
249 {
250 struct iommufd_object *obj;
251 unsigned long index;
252
253 if (!ictx || !group)
254 return false;
255
256 xa_lock(&ictx->objects);
257 xa_for_each(&ictx->objects, index, obj) {
258 if (obj->type == IOMMUFD_OBJ_DEVICE &&
259 container_of(obj, struct iommufd_device, obj)
260 ->igroup->group == group) {
261 xa_unlock(&ictx->objects);
262 return true;
263 }
264 }
265 xa_unlock(&ictx->objects);
266 return false;
267 }
268 EXPORT_SYMBOL_NS_GPL(iommufd_ctx_has_group, IOMMUFD);
269
270 /**
271 * iommufd_device_unbind - Undo iommufd_device_bind()
272 * @idev: Device returned by iommufd_device_bind()
273 *
274 * Release the device from iommufd control. The DMA ownership will return back
275 * to unowned with DMA controlled by the DMA API. This invalidates the
276 * iommufd_device pointer, other APIs that consume it must not be called
277 * concurrently.
278 */
iommufd_device_unbind(struct iommufd_device * idev)279 void iommufd_device_unbind(struct iommufd_device *idev)
280 {
281 iommufd_object_destroy_user(idev->ictx, &idev->obj);
282 }
283 EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, IOMMUFD);
284
iommufd_device_to_ictx(struct iommufd_device * idev)285 struct iommufd_ctx *iommufd_device_to_ictx(struct iommufd_device *idev)
286 {
287 return idev->ictx;
288 }
289 EXPORT_SYMBOL_NS_GPL(iommufd_device_to_ictx, IOMMUFD);
290
iommufd_device_to_id(struct iommufd_device * idev)291 u32 iommufd_device_to_id(struct iommufd_device *idev)
292 {
293 return idev->obj.id;
294 }
295 EXPORT_SYMBOL_NS_GPL(iommufd_device_to_id, IOMMUFD);
296
iommufd_group_setup_msi(struct iommufd_group * igroup,struct iommufd_hwpt_paging * hwpt_paging)297 static int iommufd_group_setup_msi(struct iommufd_group *igroup,
298 struct iommufd_hwpt_paging *hwpt_paging)
299 {
300 phys_addr_t sw_msi_start = igroup->sw_msi_start;
301 int rc;
302
303 /*
304 * If the IOMMU driver gives a IOMMU_RESV_SW_MSI then it is asking us to
305 * call iommu_get_msi_cookie() on its behalf. This is necessary to setup
306 * the MSI window so iommu_dma_prepare_msi() can install pages into our
307 * domain after request_irq(). If it is not done interrupts will not
308 * work on this domain.
309 *
310 * FIXME: This is conceptually broken for iommufd since we want to allow
311 * userspace to change the domains, eg switch from an identity IOAS to a
312 * DMA IOAS. There is currently no way to create a MSI window that
313 * matches what the IRQ layer actually expects in a newly created
314 * domain.
315 */
316 if (sw_msi_start != PHYS_ADDR_MAX && !hwpt_paging->msi_cookie) {
317 rc = iommu_get_msi_cookie(hwpt_paging->common.domain,
318 sw_msi_start);
319 if (rc)
320 return rc;
321
322 /*
323 * iommu_get_msi_cookie() can only be called once per domain,
324 * it returns -EBUSY on later calls.
325 */
326 hwpt_paging->msi_cookie = true;
327 }
328 return 0;
329 }
330
331 static int
iommufd_device_attach_reserved_iova(struct iommufd_device * idev,struct iommufd_hwpt_paging * hwpt_paging)332 iommufd_device_attach_reserved_iova(struct iommufd_device *idev,
333 struct iommufd_hwpt_paging *hwpt_paging)
334 {
335 int rc;
336
337 lockdep_assert_held(&idev->igroup->lock);
338
339 rc = iopt_table_enforce_dev_resv_regions(&hwpt_paging->ioas->iopt,
340 idev->dev,
341 &idev->igroup->sw_msi_start);
342 if (rc)
343 return rc;
344
345 if (list_empty(&idev->igroup->device_list)) {
346 rc = iommufd_group_setup_msi(idev->igroup, hwpt_paging);
347 if (rc) {
348 iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt,
349 idev->dev);
350 return rc;
351 }
352 }
353 return 0;
354 }
355
356 /* The device attach/detach/replace helpers for attach_handle */
357
358 /* Check if idev is attached to igroup->hwpt */
iommufd_device_is_attached(struct iommufd_device * idev)359 static bool iommufd_device_is_attached(struct iommufd_device *idev)
360 {
361 struct iommufd_device *cur;
362
363 list_for_each_entry(cur, &idev->igroup->device_list, group_item)
364 if (cur == idev)
365 return true;
366 return false;
367 }
368
iommufd_hwpt_attach_device(struct iommufd_hw_pagetable * hwpt,struct iommufd_device * idev)369 static int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt,
370 struct iommufd_device *idev)
371 {
372 struct iommufd_attach_handle *handle;
373 int rc;
374
375 lockdep_assert_held(&idev->igroup->lock);
376
377 handle = kzalloc(sizeof(*handle), GFP_KERNEL);
378 if (!handle)
379 return -ENOMEM;
380
381 if (hwpt->fault) {
382 rc = iommufd_fault_iopf_enable(idev);
383 if (rc)
384 goto out_free_handle;
385 }
386
387 handle->idev = idev;
388 rc = iommu_attach_group_handle(hwpt->domain, idev->igroup->group,
389 &handle->handle);
390 if (rc)
391 goto out_disable_iopf;
392
393 return 0;
394
395 out_disable_iopf:
396 if (hwpt->fault)
397 iommufd_fault_iopf_disable(idev);
398 out_free_handle:
399 kfree(handle);
400 return rc;
401 }
402
403 static struct iommufd_attach_handle *
iommufd_device_get_attach_handle(struct iommufd_device * idev)404 iommufd_device_get_attach_handle(struct iommufd_device *idev)
405 {
406 struct iommu_attach_handle *handle;
407
408 lockdep_assert_held(&idev->igroup->lock);
409
410 handle =
411 iommu_attach_handle_get(idev->igroup->group, IOMMU_NO_PASID, 0);
412 if (IS_ERR(handle))
413 return NULL;
414 return to_iommufd_handle(handle);
415 }
416
iommufd_hwpt_detach_device(struct iommufd_hw_pagetable * hwpt,struct iommufd_device * idev)417 static void iommufd_hwpt_detach_device(struct iommufd_hw_pagetable *hwpt,
418 struct iommufd_device *idev)
419 {
420 struct iommufd_attach_handle *handle;
421
422 handle = iommufd_device_get_attach_handle(idev);
423 iommu_detach_group_handle(hwpt->domain, idev->igroup->group);
424 if (hwpt->fault) {
425 iommufd_auto_response_faults(hwpt, handle);
426 iommufd_fault_iopf_disable(idev);
427 }
428 kfree(handle);
429 }
430
iommufd_hwpt_replace_device(struct iommufd_device * idev,struct iommufd_hw_pagetable * hwpt,struct iommufd_hw_pagetable * old)431 static int iommufd_hwpt_replace_device(struct iommufd_device *idev,
432 struct iommufd_hw_pagetable *hwpt,
433 struct iommufd_hw_pagetable *old)
434 {
435 struct iommufd_attach_handle *handle, *old_handle =
436 iommufd_device_get_attach_handle(idev);
437 int rc;
438
439 handle = kzalloc(sizeof(*handle), GFP_KERNEL);
440 if (!handle)
441 return -ENOMEM;
442
443 if (hwpt->fault && !old->fault) {
444 rc = iommufd_fault_iopf_enable(idev);
445 if (rc)
446 goto out_free_handle;
447 }
448
449 handle->idev = idev;
450 rc = iommu_replace_group_handle(idev->igroup->group, hwpt->domain,
451 &handle->handle);
452 if (rc)
453 goto out_disable_iopf;
454
455 if (old->fault) {
456 iommufd_auto_response_faults(hwpt, old_handle);
457 if (!hwpt->fault)
458 iommufd_fault_iopf_disable(idev);
459 }
460 kfree(old_handle);
461
462 return 0;
463
464 out_disable_iopf:
465 if (hwpt->fault && !old->fault)
466 iommufd_fault_iopf_disable(idev);
467 out_free_handle:
468 kfree(handle);
469 return rc;
470 }
471
iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable * hwpt,struct iommufd_device * idev)472 int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
473 struct iommufd_device *idev)
474 {
475 struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt);
476 int rc;
477
478 mutex_lock(&idev->igroup->lock);
479
480 if (idev->igroup->hwpt != NULL && idev->igroup->hwpt != hwpt) {
481 rc = -EINVAL;
482 goto err_unlock;
483 }
484
485 if (hwpt_paging) {
486 rc = iommufd_device_attach_reserved_iova(idev, hwpt_paging);
487 if (rc)
488 goto err_unlock;
489 }
490
491 /*
492 * Only attach to the group once for the first device that is in the
493 * group. All the other devices will follow this attachment. The user
494 * should attach every device individually to the hwpt as the per-device
495 * reserved regions are only updated during individual device
496 * attachment.
497 */
498 if (list_empty(&idev->igroup->device_list)) {
499 rc = iommufd_hwpt_attach_device(hwpt, idev);
500 if (rc)
501 goto err_unresv;
502 idev->igroup->hwpt = hwpt;
503 }
504 refcount_inc(&hwpt->obj.users);
505 list_add_tail(&idev->group_item, &idev->igroup->device_list);
506 mutex_unlock(&idev->igroup->lock);
507 return 0;
508 err_unresv:
509 if (hwpt_paging)
510 iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev);
511 err_unlock:
512 mutex_unlock(&idev->igroup->lock);
513 return rc;
514 }
515
516 struct iommufd_hw_pagetable *
iommufd_hw_pagetable_detach(struct iommufd_device * idev)517 iommufd_hw_pagetable_detach(struct iommufd_device *idev)
518 {
519 struct iommufd_hw_pagetable *hwpt = idev->igroup->hwpt;
520 struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt);
521
522 mutex_lock(&idev->igroup->lock);
523 list_del(&idev->group_item);
524 if (list_empty(&idev->igroup->device_list)) {
525 iommufd_hwpt_detach_device(hwpt, idev);
526 idev->igroup->hwpt = NULL;
527 }
528 if (hwpt_paging)
529 iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev);
530 mutex_unlock(&idev->igroup->lock);
531
532 /* Caller must destroy hwpt */
533 return hwpt;
534 }
535
536 static struct iommufd_hw_pagetable *
iommufd_device_do_attach(struct iommufd_device * idev,struct iommufd_hw_pagetable * hwpt)537 iommufd_device_do_attach(struct iommufd_device *idev,
538 struct iommufd_hw_pagetable *hwpt)
539 {
540 int rc;
541
542 rc = iommufd_hw_pagetable_attach(hwpt, idev);
543 if (rc)
544 return ERR_PTR(rc);
545 return NULL;
546 }
547
548 static void
iommufd_group_remove_reserved_iova(struct iommufd_group * igroup,struct iommufd_hwpt_paging * hwpt_paging)549 iommufd_group_remove_reserved_iova(struct iommufd_group *igroup,
550 struct iommufd_hwpt_paging *hwpt_paging)
551 {
552 struct iommufd_device *cur;
553
554 lockdep_assert_held(&igroup->lock);
555
556 list_for_each_entry(cur, &igroup->device_list, group_item)
557 iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, cur->dev);
558 }
559
560 static int
iommufd_group_do_replace_reserved_iova(struct iommufd_group * igroup,struct iommufd_hwpt_paging * hwpt_paging)561 iommufd_group_do_replace_reserved_iova(struct iommufd_group *igroup,
562 struct iommufd_hwpt_paging *hwpt_paging)
563 {
564 struct iommufd_hwpt_paging *old_hwpt_paging;
565 struct iommufd_device *cur;
566 int rc;
567
568 lockdep_assert_held(&igroup->lock);
569
570 old_hwpt_paging = find_hwpt_paging(igroup->hwpt);
571 if (!old_hwpt_paging || hwpt_paging->ioas != old_hwpt_paging->ioas) {
572 list_for_each_entry(cur, &igroup->device_list, group_item) {
573 rc = iopt_table_enforce_dev_resv_regions(
574 &hwpt_paging->ioas->iopt, cur->dev, NULL);
575 if (rc)
576 goto err_unresv;
577 }
578 }
579
580 rc = iommufd_group_setup_msi(igroup, hwpt_paging);
581 if (rc)
582 goto err_unresv;
583 return 0;
584
585 err_unresv:
586 iommufd_group_remove_reserved_iova(igroup, hwpt_paging);
587 return rc;
588 }
589
590 static struct iommufd_hw_pagetable *
iommufd_device_do_replace(struct iommufd_device * idev,struct iommufd_hw_pagetable * hwpt)591 iommufd_device_do_replace(struct iommufd_device *idev,
592 struct iommufd_hw_pagetable *hwpt)
593 {
594 struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt);
595 struct iommufd_hwpt_paging *old_hwpt_paging;
596 struct iommufd_group *igroup = idev->igroup;
597 struct iommufd_hw_pagetable *old_hwpt;
598 unsigned int num_devices;
599 int rc;
600
601 mutex_lock(&idev->igroup->lock);
602
603 if (igroup->hwpt == NULL) {
604 rc = -EINVAL;
605 goto err_unlock;
606 }
607
608 if (!iommufd_device_is_attached(idev)) {
609 rc = -EINVAL;
610 goto err_unlock;
611 }
612
613 if (hwpt == igroup->hwpt) {
614 mutex_unlock(&idev->igroup->lock);
615 return NULL;
616 }
617
618 old_hwpt = igroup->hwpt;
619 if (hwpt_paging) {
620 rc = iommufd_group_do_replace_reserved_iova(igroup, hwpt_paging);
621 if (rc)
622 goto err_unlock;
623 }
624
625 rc = iommufd_hwpt_replace_device(idev, hwpt, old_hwpt);
626 if (rc)
627 goto err_unresv;
628
629 old_hwpt_paging = find_hwpt_paging(old_hwpt);
630 if (old_hwpt_paging &&
631 (!hwpt_paging || hwpt_paging->ioas != old_hwpt_paging->ioas))
632 iommufd_group_remove_reserved_iova(igroup, old_hwpt_paging);
633
634 igroup->hwpt = hwpt;
635
636 num_devices = list_count_nodes(&igroup->device_list);
637 /*
638 * Move the refcounts held by the device_list to the new hwpt. Retain a
639 * refcount for this thread as the caller will free it.
640 */
641 refcount_add(num_devices, &hwpt->obj.users);
642 if (num_devices > 1)
643 WARN_ON(refcount_sub_and_test(num_devices - 1,
644 &old_hwpt->obj.users));
645 mutex_unlock(&idev->igroup->lock);
646
647 /* Caller must destroy old_hwpt */
648 return old_hwpt;
649 err_unresv:
650 if (hwpt_paging)
651 iommufd_group_remove_reserved_iova(igroup, hwpt_paging);
652 err_unlock:
653 mutex_unlock(&idev->igroup->lock);
654 return ERR_PTR(rc);
655 }
656
657 typedef struct iommufd_hw_pagetable *(*attach_fn)(
658 struct iommufd_device *idev, struct iommufd_hw_pagetable *hwpt);
659
660 /*
661 * When automatically managing the domains we search for a compatible domain in
662 * the iopt and if one is found use it, otherwise create a new domain.
663 * Automatic domain selection will never pick a manually created domain.
664 */
665 static struct iommufd_hw_pagetable *
iommufd_device_auto_get_domain(struct iommufd_device * idev,struct iommufd_ioas * ioas,u32 * pt_id,attach_fn do_attach)666 iommufd_device_auto_get_domain(struct iommufd_device *idev,
667 struct iommufd_ioas *ioas, u32 *pt_id,
668 attach_fn do_attach)
669 {
670 /*
671 * iommufd_hw_pagetable_attach() is called by
672 * iommufd_hw_pagetable_alloc() in immediate attachment mode, same as
673 * iommufd_device_do_attach(). So if we are in this mode then we prefer
674 * to use the immediate_attach path as it supports drivers that can't
675 * directly allocate a domain.
676 */
677 bool immediate_attach = do_attach == iommufd_device_do_attach;
678 struct iommufd_hw_pagetable *destroy_hwpt;
679 struct iommufd_hwpt_paging *hwpt_paging;
680 struct iommufd_hw_pagetable *hwpt;
681
682 /*
683 * There is no differentiation when domains are allocated, so any domain
684 * that is willing to attach to the device is interchangeable with any
685 * other.
686 */
687 mutex_lock(&ioas->mutex);
688 list_for_each_entry(hwpt_paging, &ioas->hwpt_list, hwpt_item) {
689 if (!hwpt_paging->auto_domain)
690 continue;
691
692 hwpt = &hwpt_paging->common;
693 if (!iommufd_lock_obj(&hwpt->obj))
694 continue;
695 destroy_hwpt = (*do_attach)(idev, hwpt);
696 if (IS_ERR(destroy_hwpt)) {
697 iommufd_put_object(idev->ictx, &hwpt->obj);
698 /*
699 * -EINVAL means the domain is incompatible with the
700 * device. Other error codes should propagate to
701 * userspace as failure. Success means the domain is
702 * attached.
703 */
704 if (PTR_ERR(destroy_hwpt) == -EINVAL)
705 continue;
706 goto out_unlock;
707 }
708 *pt_id = hwpt->obj.id;
709 iommufd_put_object(idev->ictx, &hwpt->obj);
710 goto out_unlock;
711 }
712
713 hwpt_paging = iommufd_hwpt_paging_alloc(idev->ictx, ioas, idev, 0,
714 immediate_attach, NULL);
715 if (IS_ERR(hwpt_paging)) {
716 destroy_hwpt = ERR_CAST(hwpt_paging);
717 goto out_unlock;
718 }
719 hwpt = &hwpt_paging->common;
720
721 if (!immediate_attach) {
722 destroy_hwpt = (*do_attach)(idev, hwpt);
723 if (IS_ERR(destroy_hwpt))
724 goto out_abort;
725 } else {
726 destroy_hwpt = NULL;
727 }
728
729 hwpt_paging->auto_domain = true;
730 *pt_id = hwpt->obj.id;
731
732 iommufd_object_finalize(idev->ictx, &hwpt->obj);
733 mutex_unlock(&ioas->mutex);
734 return destroy_hwpt;
735
736 out_abort:
737 iommufd_object_abort_and_destroy(idev->ictx, &hwpt->obj);
738 out_unlock:
739 mutex_unlock(&ioas->mutex);
740 return destroy_hwpt;
741 }
742
iommufd_device_change_pt(struct iommufd_device * idev,u32 * pt_id,attach_fn do_attach)743 static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id,
744 attach_fn do_attach)
745 {
746 struct iommufd_hw_pagetable *destroy_hwpt;
747 struct iommufd_object *pt_obj;
748
749 pt_obj = iommufd_get_object(idev->ictx, *pt_id, IOMMUFD_OBJ_ANY);
750 if (IS_ERR(pt_obj))
751 return PTR_ERR(pt_obj);
752
753 switch (pt_obj->type) {
754 case IOMMUFD_OBJ_HWPT_NESTED:
755 case IOMMUFD_OBJ_HWPT_PAGING: {
756 struct iommufd_hw_pagetable *hwpt =
757 container_of(pt_obj, struct iommufd_hw_pagetable, obj);
758
759 destroy_hwpt = (*do_attach)(idev, hwpt);
760 if (IS_ERR(destroy_hwpt))
761 goto out_put_pt_obj;
762 break;
763 }
764 case IOMMUFD_OBJ_IOAS: {
765 struct iommufd_ioas *ioas =
766 container_of(pt_obj, struct iommufd_ioas, obj);
767
768 destroy_hwpt = iommufd_device_auto_get_domain(idev, ioas, pt_id,
769 do_attach);
770 if (IS_ERR(destroy_hwpt))
771 goto out_put_pt_obj;
772 break;
773 }
774 default:
775 destroy_hwpt = ERR_PTR(-EINVAL);
776 goto out_put_pt_obj;
777 }
778 iommufd_put_object(idev->ictx, pt_obj);
779
780 /* This destruction has to be after we unlock everything */
781 if (destroy_hwpt)
782 iommufd_hw_pagetable_put(idev->ictx, destroy_hwpt);
783 return 0;
784
785 out_put_pt_obj:
786 iommufd_put_object(idev->ictx, pt_obj);
787 return PTR_ERR(destroy_hwpt);
788 }
789
790 /**
791 * iommufd_device_attach - Connect a device to an iommu_domain
792 * @idev: device to attach
793 * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HWPT_PAGING
794 * Output the IOMMUFD_OBJ_HWPT_PAGING ID
795 *
796 * This connects the device to an iommu_domain, either automatically or manually
797 * selected. Once this completes the device could do DMA.
798 *
799 * The caller should return the resulting pt_id back to userspace.
800 * This function is undone by calling iommufd_device_detach().
801 */
iommufd_device_attach(struct iommufd_device * idev,u32 * pt_id)802 int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id)
803 {
804 int rc;
805
806 rc = iommufd_device_change_pt(idev, pt_id, &iommufd_device_do_attach);
807 if (rc)
808 return rc;
809
810 /*
811 * Pairs with iommufd_device_detach() - catches caller bugs attempting
812 * to destroy a device with an attachment.
813 */
814 refcount_inc(&idev->obj.users);
815 return 0;
816 }
817 EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, IOMMUFD);
818
819 /**
820 * iommufd_device_replace - Change the device's iommu_domain
821 * @idev: device to change
822 * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HWPT_PAGING
823 * Output the IOMMUFD_OBJ_HWPT_PAGING ID
824 *
825 * This is the same as::
826 *
827 * iommufd_device_detach();
828 * iommufd_device_attach();
829 *
830 * If it fails then no change is made to the attachment. The iommu driver may
831 * implement this so there is no disruption in translation. This can only be
832 * called if iommufd_device_attach() has already succeeded.
833 */
iommufd_device_replace(struct iommufd_device * idev,u32 * pt_id)834 int iommufd_device_replace(struct iommufd_device *idev, u32 *pt_id)
835 {
836 return iommufd_device_change_pt(idev, pt_id,
837 &iommufd_device_do_replace);
838 }
839 EXPORT_SYMBOL_NS_GPL(iommufd_device_replace, IOMMUFD);
840
841 /**
842 * iommufd_device_detach - Disconnect a device to an iommu_domain
843 * @idev: device to detach
844 *
845 * Undo iommufd_device_attach(). This disconnects the idev from the previously
846 * attached pt_id. The device returns back to a blocked DMA translation.
847 */
iommufd_device_detach(struct iommufd_device * idev)848 void iommufd_device_detach(struct iommufd_device *idev)
849 {
850 struct iommufd_hw_pagetable *hwpt;
851
852 hwpt = iommufd_hw_pagetable_detach(idev);
853 iommufd_hw_pagetable_put(idev->ictx, hwpt);
854 refcount_dec(&idev->obj.users);
855 }
856 EXPORT_SYMBOL_NS_GPL(iommufd_device_detach, IOMMUFD);
857
858 /*
859 * On success, it will refcount_inc() at a valid new_ioas and refcount_dec() at
860 * a valid cur_ioas (access->ioas). A caller passing in a valid new_ioas should
861 * call iommufd_put_object() if it does an iommufd_get_object() for a new_ioas.
862 */
iommufd_access_change_ioas(struct iommufd_access * access,struct iommufd_ioas * new_ioas)863 static int iommufd_access_change_ioas(struct iommufd_access *access,
864 struct iommufd_ioas *new_ioas)
865 {
866 u32 iopt_access_list_id = access->iopt_access_list_id;
867 struct iommufd_ioas *cur_ioas = access->ioas;
868 int rc;
869
870 lockdep_assert_held(&access->ioas_lock);
871
872 /* We are racing with a concurrent detach, bail */
873 if (cur_ioas != access->ioas_unpin)
874 return -EBUSY;
875
876 if (cur_ioas == new_ioas)
877 return 0;
878
879 /*
880 * Set ioas to NULL to block any further iommufd_access_pin_pages().
881 * iommufd_access_unpin_pages() can continue using access->ioas_unpin.
882 */
883 access->ioas = NULL;
884
885 if (new_ioas) {
886 rc = iopt_add_access(&new_ioas->iopt, access);
887 if (rc) {
888 access->ioas = cur_ioas;
889 return rc;
890 }
891 refcount_inc(&new_ioas->obj.users);
892 }
893
894 if (cur_ioas) {
895 if (access->ops->unmap) {
896 mutex_unlock(&access->ioas_lock);
897 access->ops->unmap(access->data, 0, ULONG_MAX);
898 mutex_lock(&access->ioas_lock);
899 }
900 iopt_remove_access(&cur_ioas->iopt, access, iopt_access_list_id);
901 refcount_dec(&cur_ioas->obj.users);
902 }
903
904 access->ioas = new_ioas;
905 access->ioas_unpin = new_ioas;
906
907 return 0;
908 }
909
iommufd_access_change_ioas_id(struct iommufd_access * access,u32 id)910 static int iommufd_access_change_ioas_id(struct iommufd_access *access, u32 id)
911 {
912 struct iommufd_ioas *ioas = iommufd_get_ioas(access->ictx, id);
913 int rc;
914
915 if (IS_ERR(ioas))
916 return PTR_ERR(ioas);
917 rc = iommufd_access_change_ioas(access, ioas);
918 iommufd_put_object(access->ictx, &ioas->obj);
919 return rc;
920 }
921
iommufd_access_destroy_object(struct iommufd_object * obj)922 void iommufd_access_destroy_object(struct iommufd_object *obj)
923 {
924 struct iommufd_access *access =
925 container_of(obj, struct iommufd_access, obj);
926
927 mutex_lock(&access->ioas_lock);
928 if (access->ioas)
929 WARN_ON(iommufd_access_change_ioas(access, NULL));
930 mutex_unlock(&access->ioas_lock);
931 iommufd_ctx_put(access->ictx);
932 }
933
934 /**
935 * iommufd_access_create - Create an iommufd_access
936 * @ictx: iommufd file descriptor
937 * @ops: Driver's ops to associate with the access
938 * @data: Opaque data to pass into ops functions
939 * @id: Output ID number to return to userspace for this access
940 *
941 * An iommufd_access allows a driver to read/write to the IOAS without using
942 * DMA. The underlying CPU memory can be accessed using the
943 * iommufd_access_pin_pages() or iommufd_access_rw() functions.
944 *
945 * The provided ops are required to use iommufd_access_pin_pages().
946 */
947 struct iommufd_access *
iommufd_access_create(struct iommufd_ctx * ictx,const struct iommufd_access_ops * ops,void * data,u32 * id)948 iommufd_access_create(struct iommufd_ctx *ictx,
949 const struct iommufd_access_ops *ops, void *data, u32 *id)
950 {
951 struct iommufd_access *access;
952
953 /*
954 * There is no uAPI for the access object, but to keep things symmetric
955 * use the object infrastructure anyhow.
956 */
957 access = iommufd_object_alloc(ictx, access, IOMMUFD_OBJ_ACCESS);
958 if (IS_ERR(access))
959 return access;
960
961 access->data = data;
962 access->ops = ops;
963
964 if (ops->needs_pin_pages)
965 access->iova_alignment = PAGE_SIZE;
966 else
967 access->iova_alignment = 1;
968
969 /* The calling driver is a user until iommufd_access_destroy() */
970 refcount_inc(&access->obj.users);
971 access->ictx = ictx;
972 iommufd_ctx_get(ictx);
973 iommufd_object_finalize(ictx, &access->obj);
974 *id = access->obj.id;
975 mutex_init(&access->ioas_lock);
976 return access;
977 }
978 EXPORT_SYMBOL_NS_GPL(iommufd_access_create, IOMMUFD);
979
980 /**
981 * iommufd_access_destroy - Destroy an iommufd_access
982 * @access: The access to destroy
983 *
984 * The caller must stop using the access before destroying it.
985 */
iommufd_access_destroy(struct iommufd_access * access)986 void iommufd_access_destroy(struct iommufd_access *access)
987 {
988 iommufd_object_destroy_user(access->ictx, &access->obj);
989 }
990 EXPORT_SYMBOL_NS_GPL(iommufd_access_destroy, IOMMUFD);
991
iommufd_access_detach(struct iommufd_access * access)992 void iommufd_access_detach(struct iommufd_access *access)
993 {
994 mutex_lock(&access->ioas_lock);
995 if (WARN_ON(!access->ioas)) {
996 mutex_unlock(&access->ioas_lock);
997 return;
998 }
999 WARN_ON(iommufd_access_change_ioas(access, NULL));
1000 mutex_unlock(&access->ioas_lock);
1001 }
1002 EXPORT_SYMBOL_NS_GPL(iommufd_access_detach, IOMMUFD);
1003
iommufd_access_attach(struct iommufd_access * access,u32 ioas_id)1004 int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id)
1005 {
1006 int rc;
1007
1008 mutex_lock(&access->ioas_lock);
1009 if (WARN_ON(access->ioas)) {
1010 mutex_unlock(&access->ioas_lock);
1011 return -EINVAL;
1012 }
1013
1014 rc = iommufd_access_change_ioas_id(access, ioas_id);
1015 mutex_unlock(&access->ioas_lock);
1016 return rc;
1017 }
1018 EXPORT_SYMBOL_NS_GPL(iommufd_access_attach, IOMMUFD);
1019
iommufd_access_replace(struct iommufd_access * access,u32 ioas_id)1020 int iommufd_access_replace(struct iommufd_access *access, u32 ioas_id)
1021 {
1022 int rc;
1023
1024 mutex_lock(&access->ioas_lock);
1025 if (!access->ioas) {
1026 mutex_unlock(&access->ioas_lock);
1027 return -ENOENT;
1028 }
1029 rc = iommufd_access_change_ioas_id(access, ioas_id);
1030 mutex_unlock(&access->ioas_lock);
1031 return rc;
1032 }
1033 EXPORT_SYMBOL_NS_GPL(iommufd_access_replace, IOMMUFD);
1034
1035 /**
1036 * iommufd_access_notify_unmap - Notify users of an iopt to stop using it
1037 * @iopt: iopt to work on
1038 * @iova: Starting iova in the iopt
1039 * @length: Number of bytes
1040 *
1041 * After this function returns there should be no users attached to the pages
1042 * linked to this iopt that intersect with iova,length. Anyone that has attached
1043 * a user through iopt_access_pages() needs to detach it through
1044 * iommufd_access_unpin_pages() before this function returns.
1045 *
1046 * iommufd_access_destroy() will wait for any outstanding unmap callback to
1047 * complete. Once iommufd_access_destroy() no unmap ops are running or will
1048 * run in the future. Due to this a driver must not create locking that prevents
1049 * unmap to complete while iommufd_access_destroy() is running.
1050 */
iommufd_access_notify_unmap(struct io_pagetable * iopt,unsigned long iova,unsigned long length)1051 void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova,
1052 unsigned long length)
1053 {
1054 struct iommufd_ioas *ioas =
1055 container_of(iopt, struct iommufd_ioas, iopt);
1056 struct iommufd_access *access;
1057 unsigned long index;
1058
1059 xa_lock(&ioas->iopt.access_list);
1060 xa_for_each(&ioas->iopt.access_list, index, access) {
1061 if (!iommufd_lock_obj(&access->obj))
1062 continue;
1063 xa_unlock(&ioas->iopt.access_list);
1064
1065 access->ops->unmap(access->data, iova, length);
1066
1067 iommufd_put_object(access->ictx, &access->obj);
1068 xa_lock(&ioas->iopt.access_list);
1069 }
1070 xa_unlock(&ioas->iopt.access_list);
1071 }
1072
1073 /**
1074 * iommufd_access_unpin_pages() - Undo iommufd_access_pin_pages
1075 * @access: IOAS access to act on
1076 * @iova: Starting IOVA
1077 * @length: Number of bytes to access
1078 *
1079 * Return the struct page's. The caller must stop accessing them before calling
1080 * this. The iova/length must exactly match the one provided to access_pages.
1081 */
iommufd_access_unpin_pages(struct iommufd_access * access,unsigned long iova,unsigned long length)1082 void iommufd_access_unpin_pages(struct iommufd_access *access,
1083 unsigned long iova, unsigned long length)
1084 {
1085 struct iopt_area_contig_iter iter;
1086 struct io_pagetable *iopt;
1087 unsigned long last_iova;
1088 struct iopt_area *area;
1089
1090 if (WARN_ON(!length) ||
1091 WARN_ON(check_add_overflow(iova, length - 1, &last_iova)))
1092 return;
1093
1094 mutex_lock(&access->ioas_lock);
1095 /*
1096 * The driver must be doing something wrong if it calls this before an
1097 * iommufd_access_attach() or after an iommufd_access_detach().
1098 */
1099 if (WARN_ON(!access->ioas_unpin)) {
1100 mutex_unlock(&access->ioas_lock);
1101 return;
1102 }
1103 iopt = &access->ioas_unpin->iopt;
1104
1105 down_read(&iopt->iova_rwsem);
1106 iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova)
1107 iopt_area_remove_access(
1108 area, iopt_area_iova_to_index(area, iter.cur_iova),
1109 iopt_area_iova_to_index(
1110 area,
1111 min(last_iova, iopt_area_last_iova(area))));
1112 WARN_ON(!iopt_area_contig_done(&iter));
1113 up_read(&iopt->iova_rwsem);
1114 mutex_unlock(&access->ioas_lock);
1115 }
1116 EXPORT_SYMBOL_NS_GPL(iommufd_access_unpin_pages, IOMMUFD);
1117
iopt_area_contig_is_aligned(struct iopt_area_contig_iter * iter)1118 static bool iopt_area_contig_is_aligned(struct iopt_area_contig_iter *iter)
1119 {
1120 if (iopt_area_start_byte(iter->area, iter->cur_iova) % PAGE_SIZE)
1121 return false;
1122
1123 if (!iopt_area_contig_done(iter) &&
1124 (iopt_area_start_byte(iter->area, iopt_area_last_iova(iter->area)) %
1125 PAGE_SIZE) != (PAGE_SIZE - 1))
1126 return false;
1127 return true;
1128 }
1129
check_area_prot(struct iopt_area * area,unsigned int flags)1130 static bool check_area_prot(struct iopt_area *area, unsigned int flags)
1131 {
1132 if (flags & IOMMUFD_ACCESS_RW_WRITE)
1133 return area->iommu_prot & IOMMU_WRITE;
1134 return area->iommu_prot & IOMMU_READ;
1135 }
1136
1137 /**
1138 * iommufd_access_pin_pages() - Return a list of pages under the iova
1139 * @access: IOAS access to act on
1140 * @iova: Starting IOVA
1141 * @length: Number of bytes to access
1142 * @out_pages: Output page list
1143 * @flags: IOPMMUFD_ACCESS_RW_* flags
1144 *
1145 * Reads @length bytes starting at iova and returns the struct page * pointers.
1146 * These can be kmap'd by the caller for CPU access.
1147 *
1148 * The caller must perform iommufd_access_unpin_pages() when done to balance
1149 * this.
1150 *
1151 * This API always requires a page aligned iova. This happens naturally if the
1152 * ioas alignment is >= PAGE_SIZE and the iova is PAGE_SIZE aligned. However
1153 * smaller alignments have corner cases where this API can fail on otherwise
1154 * aligned iova.
1155 */
iommufd_access_pin_pages(struct iommufd_access * access,unsigned long iova,unsigned long length,struct page ** out_pages,unsigned int flags)1156 int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova,
1157 unsigned long length, struct page **out_pages,
1158 unsigned int flags)
1159 {
1160 struct iopt_area_contig_iter iter;
1161 struct io_pagetable *iopt;
1162 unsigned long last_iova;
1163 struct iopt_area *area;
1164 int rc;
1165
1166 /* Driver's ops don't support pin_pages */
1167 if (IS_ENABLED(CONFIG_IOMMUFD_TEST) &&
1168 WARN_ON(access->iova_alignment != PAGE_SIZE || !access->ops->unmap))
1169 return -EINVAL;
1170
1171 if (!length)
1172 return -EINVAL;
1173 if (check_add_overflow(iova, length - 1, &last_iova))
1174 return -EOVERFLOW;
1175
1176 mutex_lock(&access->ioas_lock);
1177 if (!access->ioas) {
1178 mutex_unlock(&access->ioas_lock);
1179 return -ENOENT;
1180 }
1181 iopt = &access->ioas->iopt;
1182
1183 down_read(&iopt->iova_rwsem);
1184 iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) {
1185 unsigned long last = min(last_iova, iopt_area_last_iova(area));
1186 unsigned long last_index = iopt_area_iova_to_index(area, last);
1187 unsigned long index =
1188 iopt_area_iova_to_index(area, iter.cur_iova);
1189
1190 if (area->prevent_access ||
1191 !iopt_area_contig_is_aligned(&iter)) {
1192 rc = -EINVAL;
1193 goto err_remove;
1194 }
1195
1196 if (!check_area_prot(area, flags)) {
1197 rc = -EPERM;
1198 goto err_remove;
1199 }
1200
1201 rc = iopt_area_add_access(area, index, last_index, out_pages,
1202 flags);
1203 if (rc)
1204 goto err_remove;
1205 out_pages += last_index - index + 1;
1206 }
1207 if (!iopt_area_contig_done(&iter)) {
1208 rc = -ENOENT;
1209 goto err_remove;
1210 }
1211
1212 up_read(&iopt->iova_rwsem);
1213 mutex_unlock(&access->ioas_lock);
1214 return 0;
1215
1216 err_remove:
1217 if (iova < iter.cur_iova) {
1218 last_iova = iter.cur_iova - 1;
1219 iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova)
1220 iopt_area_remove_access(
1221 area,
1222 iopt_area_iova_to_index(area, iter.cur_iova),
1223 iopt_area_iova_to_index(
1224 area, min(last_iova,
1225 iopt_area_last_iova(area))));
1226 }
1227 up_read(&iopt->iova_rwsem);
1228 mutex_unlock(&access->ioas_lock);
1229 return rc;
1230 }
1231 EXPORT_SYMBOL_NS_GPL(iommufd_access_pin_pages, IOMMUFD);
1232
1233 /**
1234 * iommufd_access_rw - Read or write data under the iova
1235 * @access: IOAS access to act on
1236 * @iova: Starting IOVA
1237 * @data: Kernel buffer to copy to/from
1238 * @length: Number of bytes to access
1239 * @flags: IOMMUFD_ACCESS_RW_* flags
1240 *
1241 * Copy kernel to/from data into the range given by IOVA/length. If flags
1242 * indicates IOMMUFD_ACCESS_RW_KTHREAD then a large copy can be optimized
1243 * by changing it into copy_to/from_user().
1244 */
iommufd_access_rw(struct iommufd_access * access,unsigned long iova,void * data,size_t length,unsigned int flags)1245 int iommufd_access_rw(struct iommufd_access *access, unsigned long iova,
1246 void *data, size_t length, unsigned int flags)
1247 {
1248 struct iopt_area_contig_iter iter;
1249 struct io_pagetable *iopt;
1250 struct iopt_area *area;
1251 unsigned long last_iova;
1252 int rc = -EINVAL;
1253
1254 if (!length)
1255 return -EINVAL;
1256 if (check_add_overflow(iova, length - 1, &last_iova))
1257 return -EOVERFLOW;
1258
1259 mutex_lock(&access->ioas_lock);
1260 if (!access->ioas) {
1261 mutex_unlock(&access->ioas_lock);
1262 return -ENOENT;
1263 }
1264 iopt = &access->ioas->iopt;
1265
1266 down_read(&iopt->iova_rwsem);
1267 iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) {
1268 unsigned long last = min(last_iova, iopt_area_last_iova(area));
1269 unsigned long bytes = (last - iter.cur_iova) + 1;
1270
1271 if (area->prevent_access) {
1272 rc = -EINVAL;
1273 goto err_out;
1274 }
1275
1276 if (!check_area_prot(area, flags)) {
1277 rc = -EPERM;
1278 goto err_out;
1279 }
1280
1281 rc = iopt_pages_rw_access(
1282 area->pages, iopt_area_start_byte(area, iter.cur_iova),
1283 data, bytes, flags);
1284 if (rc)
1285 goto err_out;
1286 data += bytes;
1287 }
1288 if (!iopt_area_contig_done(&iter))
1289 rc = -ENOENT;
1290 err_out:
1291 up_read(&iopt->iova_rwsem);
1292 mutex_unlock(&access->ioas_lock);
1293 return rc;
1294 }
1295 EXPORT_SYMBOL_NS_GPL(iommufd_access_rw, IOMMUFD);
1296
iommufd_get_hw_info(struct iommufd_ucmd * ucmd)1297 int iommufd_get_hw_info(struct iommufd_ucmd *ucmd)
1298 {
1299 struct iommu_hw_info *cmd = ucmd->cmd;
1300 void __user *user_ptr = u64_to_user_ptr(cmd->data_uptr);
1301 const struct iommu_ops *ops;
1302 struct iommufd_device *idev;
1303 unsigned int data_len;
1304 unsigned int copy_len;
1305 void *data;
1306 int rc;
1307
1308 if (cmd->flags || cmd->__reserved[0] || cmd->__reserved[1] ||
1309 cmd->__reserved[2])
1310 return -EOPNOTSUPP;
1311
1312 idev = iommufd_get_device(ucmd, cmd->dev_id);
1313 if (IS_ERR(idev))
1314 return PTR_ERR(idev);
1315
1316 ops = dev_iommu_ops(idev->dev);
1317 if (ops->hw_info) {
1318 data = ops->hw_info(idev->dev, &data_len, &cmd->out_data_type);
1319 if (IS_ERR(data)) {
1320 rc = PTR_ERR(data);
1321 goto out_put;
1322 }
1323
1324 /*
1325 * drivers that have hw_info callback should have a unique
1326 * iommu_hw_info_type.
1327 */
1328 if (WARN_ON_ONCE(cmd->out_data_type ==
1329 IOMMU_HW_INFO_TYPE_NONE)) {
1330 rc = -ENODEV;
1331 goto out_free;
1332 }
1333 } else {
1334 cmd->out_data_type = IOMMU_HW_INFO_TYPE_NONE;
1335 data_len = 0;
1336 data = NULL;
1337 }
1338
1339 copy_len = min(cmd->data_len, data_len);
1340 if (copy_to_user(user_ptr, data, copy_len)) {
1341 rc = -EFAULT;
1342 goto out_free;
1343 }
1344
1345 /*
1346 * Zero the trailing bytes if the user buffer is bigger than the
1347 * data size kernel actually has.
1348 */
1349 if (copy_len < cmd->data_len) {
1350 if (clear_user(user_ptr + copy_len, cmd->data_len - copy_len)) {
1351 rc = -EFAULT;
1352 goto out_free;
1353 }
1354 }
1355
1356 /*
1357 * We return the length the kernel supports so userspace may know what
1358 * the kernel capability is. It could be larger than the input buffer.
1359 */
1360 cmd->data_len = data_len;
1361
1362 cmd->out_capabilities = 0;
1363 if (device_iommu_capable(idev->dev, IOMMU_CAP_DIRTY_TRACKING))
1364 cmd->out_capabilities |= IOMMU_HW_CAP_DIRTY_TRACKING;
1365
1366 cmd->out_max_pasid_log2 = 0;
1367 /*
1368 * Currently, all iommu drivers enable PASID in the probe_device()
1369 * op if iommu and device supports it. So the max_pasids stored in
1370 * dev->iommu indicates both PASID support and enable status. A
1371 * non-zero dev->iommu->max_pasids means PASID is supported and
1372 * enabled. The iommufd only reports PASID capability to userspace
1373 * if it's enabled.
1374 */
1375 if (idev->dev->iommu->max_pasids) {
1376 cmd->out_max_pasid_log2 = ilog2(idev->dev->iommu->max_pasids);
1377
1378 if (dev_is_pci(idev->dev)) {
1379 struct pci_dev *pdev = to_pci_dev(idev->dev);
1380 int ctrl;
1381
1382 ctrl = pci_pasid_status(pdev);
1383
1384 WARN_ON_ONCE(ctrl < 0 ||
1385 !(ctrl & PCI_PASID_CTRL_ENABLE));
1386
1387 if (ctrl & PCI_PASID_CTRL_EXEC)
1388 cmd->out_capabilities |=
1389 IOMMU_HW_CAP_PCI_PASID_EXEC;
1390 if (ctrl & PCI_PASID_CTRL_PRIV)
1391 cmd->out_capabilities |=
1392 IOMMU_HW_CAP_PCI_PASID_PRIV;
1393 }
1394 }
1395
1396 rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
1397 out_free:
1398 kfree(data);
1399 out_put:
1400 iommufd_put_object(ucmd->ictx, &idev->obj);
1401 return rc;
1402 }
1403