1 /*
2 * Copyright © 2015 Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * Authors: David Woodhouse <dwmw2@infradead.org>
14 */
15
16 #include <linux/intel-iommu.h>
17 #include <linux/mmu_notifier.h>
18 #include <linux/sched.h>
19 #include <linux/slab.h>
20 #include <linux/intel-svm.h>
21 #include <linux/rculist.h>
22 #include <linux/pci.h>
23 #include <linux/pci-ats.h>
24 #include <linux/dmar.h>
25 #include <linux/interrupt.h>
26
27 static irqreturn_t prq_event_thread(int irq, void *d);
28
29 struct pasid_entry {
30 u64 val;
31 };
32
33 struct pasid_state_entry {
34 u64 val;
35 };
36
intel_svm_alloc_pasid_tables(struct intel_iommu * iommu)37 int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu)
38 {
39 struct page *pages;
40 int order;
41
42 /* Start at 2 because it's defined as 2^(1+PSS) */
43 iommu->pasid_max = 2 << ecap_pss(iommu->ecap);
44
45 /* Eventually I'm promised we will get a multi-level PASID table
46 * and it won't have to be physically contiguous. Until then,
47 * limit the size because 8MiB contiguous allocations can be hard
48 * to come by. The limit of 0x20000, which is 1MiB for each of
49 * the PASID and PASID-state tables, is somewhat arbitrary. */
50 if (iommu->pasid_max > 0x20000)
51 iommu->pasid_max = 0x20000;
52
53 order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max);
54 pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
55 if (!pages) {
56 pr_warn("IOMMU: %s: Failed to allocate PASID table\n",
57 iommu->name);
58 return -ENOMEM;
59 }
60 iommu->pasid_table = page_address(pages);
61 pr_info("%s: Allocated order %d PASID table.\n", iommu->name, order);
62
63 if (ecap_dis(iommu->ecap)) {
64 /* Just making it explicit... */
65 BUILD_BUG_ON(sizeof(struct pasid_entry) != sizeof(struct pasid_state_entry));
66 pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
67 if (pages)
68 iommu->pasid_state_table = page_address(pages);
69 else
70 pr_warn("IOMMU: %s: Failed to allocate PASID state table\n",
71 iommu->name);
72 }
73
74 idr_init(&iommu->pasid_idr);
75
76 return 0;
77 }
78
intel_svm_free_pasid_tables(struct intel_iommu * iommu)79 int intel_svm_free_pasid_tables(struct intel_iommu *iommu)
80 {
81 int order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max);
82
83 if (iommu->pasid_table) {
84 free_pages((unsigned long)iommu->pasid_table, order);
85 iommu->pasid_table = NULL;
86 }
87 if (iommu->pasid_state_table) {
88 free_pages((unsigned long)iommu->pasid_state_table, order);
89 iommu->pasid_state_table = NULL;
90 }
91 idr_destroy(&iommu->pasid_idr);
92 return 0;
93 }
94
95 #define PRQ_ORDER 0
96
intel_svm_enable_prq(struct intel_iommu * iommu)97 int intel_svm_enable_prq(struct intel_iommu *iommu)
98 {
99 struct page *pages;
100 int irq, ret;
101
102 pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, PRQ_ORDER);
103 if (!pages) {
104 pr_warn("IOMMU: %s: Failed to allocate page request queue\n",
105 iommu->name);
106 return -ENOMEM;
107 }
108 iommu->prq = page_address(pages);
109
110 irq = dmar_alloc_hwirq(DMAR_UNITS_SUPPORTED + iommu->seq_id, iommu->node, iommu);
111 if (irq <= 0) {
112 pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n",
113 iommu->name);
114 ret = -EINVAL;
115 err:
116 free_pages((unsigned long)iommu->prq, PRQ_ORDER);
117 iommu->prq = NULL;
118 return ret;
119 }
120 iommu->pr_irq = irq;
121
122 snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id);
123
124 ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT,
125 iommu->prq_name, iommu);
126 if (ret) {
127 pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n",
128 iommu->name);
129 dmar_free_hwirq(irq);
130 iommu->pr_irq = 0;
131 goto err;
132 }
133 dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
134 dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
135 dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORDER);
136
137 return 0;
138 }
139
intel_svm_finish_prq(struct intel_iommu * iommu)140 int intel_svm_finish_prq(struct intel_iommu *iommu)
141 {
142 dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
143 dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
144 dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL);
145
146 if (iommu->pr_irq) {
147 free_irq(iommu->pr_irq, iommu);
148 dmar_free_hwirq(iommu->pr_irq);
149 iommu->pr_irq = 0;
150 }
151
152 free_pages((unsigned long)iommu->prq, PRQ_ORDER);
153 iommu->prq = NULL;
154
155 return 0;
156 }
157
intel_flush_svm_range_dev(struct intel_svm * svm,struct intel_svm_dev * sdev,unsigned long address,unsigned long pages,int ih,int gl)158 static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_dev *sdev,
159 unsigned long address, unsigned long pages, int ih, int gl)
160 {
161 struct qi_desc desc;
162
163 if (pages == -1) {
164 /* For global kernel pages we have to flush them in *all* PASIDs
165 * because that's the only option the hardware gives us. Despite
166 * the fact that they are actually only accessible through one. */
167 if (gl)
168 desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) |
169 QI_EIOTLB_GRAN(QI_GRAN_ALL_ALL) | QI_EIOTLB_TYPE;
170 else
171 desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) |
172 QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | QI_EIOTLB_TYPE;
173 desc.high = 0;
174 } else {
175 int mask = ilog2(__roundup_pow_of_two(pages));
176
177 desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) |
178 QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) | QI_EIOTLB_TYPE;
179 desc.high = QI_EIOTLB_ADDR(address) | QI_EIOTLB_GL(gl) |
180 QI_EIOTLB_IH(ih) | QI_EIOTLB_AM(mask);
181 }
182 qi_submit_sync(&desc, svm->iommu);
183
184 if (sdev->dev_iotlb) {
185 desc.low = QI_DEV_EIOTLB_PASID(svm->pasid) | QI_DEV_EIOTLB_SID(sdev->sid) |
186 QI_DEV_EIOTLB_QDEP(sdev->qdep) | QI_DEIOTLB_TYPE;
187 if (pages == -1) {
188 desc.high = QI_DEV_EIOTLB_ADDR(-1ULL >> 1) | QI_DEV_EIOTLB_SIZE;
189 } else if (pages > 1) {
190 /* The least significant zero bit indicates the size. So,
191 * for example, an "address" value of 0x12345f000 will
192 * flush from 0x123440000 to 0x12347ffff (256KiB). */
193 unsigned long last = address + ((unsigned long)(pages - 1) << VTD_PAGE_SHIFT);
194 unsigned long mask = __rounddown_pow_of_two(address ^ last);;
195
196 desc.high = QI_DEV_EIOTLB_ADDR((address & ~mask) | (mask - 1)) | QI_DEV_EIOTLB_SIZE;
197 } else {
198 desc.high = QI_DEV_EIOTLB_ADDR(address);
199 }
200 qi_submit_sync(&desc, svm->iommu);
201 }
202 }
203
intel_flush_svm_range(struct intel_svm * svm,unsigned long address,unsigned long pages,int ih,int gl)204 static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address,
205 unsigned long pages, int ih, int gl)
206 {
207 struct intel_svm_dev *sdev;
208
209 /* Try deferred invalidate if available */
210 if (svm->iommu->pasid_state_table &&
211 !cmpxchg64(&svm->iommu->pasid_state_table[svm->pasid].val, 0, 1ULL << 63))
212 return;
213
214 rcu_read_lock();
215 list_for_each_entry_rcu(sdev, &svm->devs, list)
216 intel_flush_svm_range_dev(svm, sdev, address, pages, ih, gl);
217 rcu_read_unlock();
218 }
219
intel_change_pte(struct mmu_notifier * mn,struct mm_struct * mm,unsigned long address,pte_t pte)220 static void intel_change_pte(struct mmu_notifier *mn, struct mm_struct *mm,
221 unsigned long address, pte_t pte)
222 {
223 struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
224
225 intel_flush_svm_range(svm, address, 1, 1, 0);
226 }
227
intel_invalidate_page(struct mmu_notifier * mn,struct mm_struct * mm,unsigned long address)228 static void intel_invalidate_page(struct mmu_notifier *mn, struct mm_struct *mm,
229 unsigned long address)
230 {
231 struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
232
233 intel_flush_svm_range(svm, address, 1, 1, 0);
234 }
235
236 /* Pages have been freed at this point */
intel_invalidate_range(struct mmu_notifier * mn,struct mm_struct * mm,unsigned long start,unsigned long end)237 static void intel_invalidate_range(struct mmu_notifier *mn,
238 struct mm_struct *mm,
239 unsigned long start, unsigned long end)
240 {
241 struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
242
243 intel_flush_svm_range(svm, start,
244 (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0, 0);
245 }
246
247
intel_flush_pasid_dev(struct intel_svm * svm,struct intel_svm_dev * sdev,int pasid)248 static void intel_flush_pasid_dev(struct intel_svm *svm, struct intel_svm_dev *sdev, int pasid)
249 {
250 struct qi_desc desc;
251
252 desc.high = 0;
253 desc.low = QI_PC_TYPE | QI_PC_DID(sdev->did) | QI_PC_PASID_SEL | QI_PC_PASID(pasid);
254
255 qi_submit_sync(&desc, svm->iommu);
256 }
257
intel_mm_release(struct mmu_notifier * mn,struct mm_struct * mm)258 static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
259 {
260 struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
261 struct intel_svm_dev *sdev;
262
263 /* This might end up being called from exit_mmap(), *before* the page
264 * tables are cleared. And __mmu_notifier_release() will delete us from
265 * the list of notifiers so that our invalidate_range() callback doesn't
266 * get called when the page tables are cleared. So we need to protect
267 * against hardware accessing those page tables.
268 *
269 * We do it by clearing the entry in the PASID table and then flushing
270 * the IOTLB and the PASID table caches. This might upset hardware;
271 * perhaps we'll want to point the PASID to a dummy PGD (like the zero
272 * page) so that we end up taking a fault that the hardware really
273 * *has* to handle gracefully without affecting other processes.
274 */
275 svm->iommu->pasid_table[svm->pasid].val = 0;
276 wmb();
277
278 rcu_read_lock();
279 list_for_each_entry_rcu(sdev, &svm->devs, list) {
280 intel_flush_pasid_dev(svm, sdev, svm->pasid);
281 intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, !svm->mm);
282 }
283 rcu_read_unlock();
284
285 }
286
287 static const struct mmu_notifier_ops intel_mmuops = {
288 .release = intel_mm_release,
289 .change_pte = intel_change_pte,
290 .invalidate_page = intel_invalidate_page,
291 .invalidate_range = intel_invalidate_range,
292 };
293
294 static DEFINE_MUTEX(pasid_mutex);
295
intel_svm_bind_mm(struct device * dev,int * pasid,int flags,struct svm_dev_ops * ops)296 int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ops *ops)
297 {
298 struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
299 struct intel_svm_dev *sdev;
300 struct intel_svm *svm = NULL;
301 struct mm_struct *mm = NULL;
302 int pasid_max;
303 int ret;
304
305 if (WARN_ON(!iommu))
306 return -EINVAL;
307
308 if (dev_is_pci(dev)) {
309 pasid_max = pci_max_pasids(to_pci_dev(dev));
310 if (pasid_max < 0)
311 return -EINVAL;
312 } else
313 pasid_max = 1 << 20;
314
315 if ((flags & SVM_FLAG_SUPERVISOR_MODE)) {
316 if (!ecap_srs(iommu->ecap))
317 return -EINVAL;
318 } else if (pasid) {
319 mm = get_task_mm(current);
320 BUG_ON(!mm);
321 }
322
323 mutex_lock(&pasid_mutex);
324 if (pasid && !(flags & SVM_FLAG_PRIVATE_PASID)) {
325 int i;
326
327 idr_for_each_entry(&iommu->pasid_idr, svm, i) {
328 if (svm->mm != mm ||
329 (svm->flags & SVM_FLAG_PRIVATE_PASID))
330 continue;
331
332 if (svm->pasid >= pasid_max) {
333 dev_warn(dev,
334 "Limited PASID width. Cannot use existing PASID %d\n",
335 svm->pasid);
336 ret = -ENOSPC;
337 goto out;
338 }
339
340 list_for_each_entry(sdev, &svm->devs, list) {
341 if (dev == sdev->dev) {
342 if (sdev->ops != ops) {
343 ret = -EBUSY;
344 goto out;
345 }
346 sdev->users++;
347 goto success;
348 }
349 }
350
351 break;
352 }
353 }
354
355 sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
356 if (!sdev) {
357 ret = -ENOMEM;
358 goto out;
359 }
360 sdev->dev = dev;
361
362 ret = intel_iommu_enable_pasid(iommu, sdev);
363 if (ret || !pasid) {
364 /* If they don't actually want to assign a PASID, this is
365 * just an enabling check/preparation. */
366 kfree(sdev);
367 goto out;
368 }
369 /* Finish the setup now we know we're keeping it */
370 sdev->users = 1;
371 sdev->ops = ops;
372 init_rcu_head(&sdev->rcu);
373
374 if (!svm) {
375 svm = kzalloc(sizeof(*svm), GFP_KERNEL);
376 if (!svm) {
377 ret = -ENOMEM;
378 kfree(sdev);
379 goto out;
380 }
381 svm->iommu = iommu;
382
383 if (pasid_max > iommu->pasid_max)
384 pasid_max = iommu->pasid_max;
385
386 /* Do not use PASID 0 in caching mode (virtualised IOMMU) */
387 ret = idr_alloc(&iommu->pasid_idr, svm,
388 !!cap_caching_mode(iommu->cap),
389 pasid_max - 1, GFP_KERNEL);
390 if (ret < 0) {
391 kfree(svm);
392 kfree(sdev);
393 goto out;
394 }
395 svm->pasid = ret;
396 svm->notifier.ops = &intel_mmuops;
397 svm->mm = mm;
398 svm->flags = flags;
399 INIT_LIST_HEAD_RCU(&svm->devs);
400 ret = -ENOMEM;
401 if (mm) {
402 ret = mmu_notifier_register(&svm->notifier, mm);
403 if (ret) {
404 idr_remove(&svm->iommu->pasid_idr, svm->pasid);
405 kfree(svm);
406 kfree(sdev);
407 goto out;
408 }
409 iommu->pasid_table[svm->pasid].val = (u64)__pa(mm->pgd) | 1;
410 } else
411 iommu->pasid_table[svm->pasid].val = (u64)__pa(init_mm.pgd) | 1 | (1ULL << 11);
412 wmb();
413 /* In caching mode, we still have to flush with PASID 0 when
414 * a PASID table entry becomes present. Not entirely clear
415 * *why* that would be the case — surely we could just issue
416 * a flush with the PASID value that we've changed? The PASID
417 * is the index into the table, after all. It's not like domain
418 * IDs in the case of the equivalent context-entry change in
419 * caching mode. And for that matter it's not entirely clear why
420 * a VMM would be in the business of caching the PASID table
421 * anyway. Surely that can be left entirely to the guest? */
422 if (cap_caching_mode(iommu->cap))
423 intel_flush_pasid_dev(svm, sdev, 0);
424 }
425 list_add_rcu(&sdev->list, &svm->devs);
426
427 success:
428 *pasid = svm->pasid;
429 ret = 0;
430 out:
431 mutex_unlock(&pasid_mutex);
432 if (mm)
433 mmput(mm);
434 return ret;
435 }
436 EXPORT_SYMBOL_GPL(intel_svm_bind_mm);
437
intel_svm_unbind_mm(struct device * dev,int pasid)438 int intel_svm_unbind_mm(struct device *dev, int pasid)
439 {
440 struct intel_svm_dev *sdev;
441 struct intel_iommu *iommu;
442 struct intel_svm *svm;
443 int ret = -EINVAL;
444
445 mutex_lock(&pasid_mutex);
446 iommu = intel_svm_device_to_iommu(dev);
447 if (!iommu || !iommu->pasid_table)
448 goto out;
449
450 svm = idr_find(&iommu->pasid_idr, pasid);
451 if (!svm)
452 goto out;
453
454 list_for_each_entry(sdev, &svm->devs, list) {
455 if (dev == sdev->dev) {
456 ret = 0;
457 sdev->users--;
458 if (!sdev->users) {
459 list_del_rcu(&sdev->list);
460 /* Flush the PASID cache and IOTLB for this device.
461 * Note that we do depend on the hardware *not* using
462 * the PASID any more. Just as we depend on other
463 * devices never using PASIDs that they have no right
464 * to use. We have a *shared* PASID table, because it's
465 * large and has to be physically contiguous. So it's
466 * hard to be as defensive as we might like. */
467 intel_flush_pasid_dev(svm, sdev, svm->pasid);
468 intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, !svm->mm);
469 kfree_rcu(sdev, rcu);
470
471 if (list_empty(&svm->devs)) {
472
473 idr_remove(&svm->iommu->pasid_idr, svm->pasid);
474 if (svm->mm)
475 mmu_notifier_unregister(&svm->notifier, svm->mm);
476
477 /* We mandate that no page faults may be outstanding
478 * for the PASID when intel_svm_unbind_mm() is called.
479 * If that is not obeyed, subtle errors will happen.
480 * Let's make them less subtle... */
481 memset(svm, 0x6b, sizeof(*svm));
482 kfree(svm);
483 }
484 }
485 break;
486 }
487 }
488 out:
489 mutex_unlock(&pasid_mutex);
490
491 return ret;
492 }
493 EXPORT_SYMBOL_GPL(intel_svm_unbind_mm);
494
495 /* Page request queue descriptor */
496 struct page_req_dsc {
497 u64 srr:1;
498 u64 bof:1;
499 u64 pasid_present:1;
500 u64 lpig:1;
501 u64 pasid:20;
502 u64 bus:8;
503 u64 private:23;
504 u64 prg_index:9;
505 u64 rd_req:1;
506 u64 wr_req:1;
507 u64 exe_req:1;
508 u64 priv_req:1;
509 u64 devfn:8;
510 u64 addr:52;
511 };
512
513 #define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x10)
514
access_error(struct vm_area_struct * vma,struct page_req_dsc * req)515 static bool access_error(struct vm_area_struct *vma, struct page_req_dsc *req)
516 {
517 unsigned long requested = 0;
518
519 if (req->exe_req)
520 requested |= VM_EXEC;
521
522 if (req->rd_req)
523 requested |= VM_READ;
524
525 if (req->wr_req)
526 requested |= VM_WRITE;
527
528 return (requested & ~vma->vm_flags) != 0;
529 }
530
prq_event_thread(int irq,void * d)531 static irqreturn_t prq_event_thread(int irq, void *d)
532 {
533 struct intel_iommu *iommu = d;
534 struct intel_svm *svm = NULL;
535 int head, tail, handled = 0;
536
537 /* Clear PPR bit before reading head/tail registers, to
538 * ensure that we get a new interrupt if needed. */
539 writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG);
540
541 tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
542 head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
543 while (head != tail) {
544 struct intel_svm_dev *sdev;
545 struct vm_area_struct *vma;
546 struct page_req_dsc *req;
547 struct qi_desc resp;
548 int ret, result;
549 u64 address;
550
551 handled = 1;
552
553 req = &iommu->prq[head / sizeof(*req)];
554
555 result = QI_RESP_FAILURE;
556 address = (u64)req->addr << VTD_PAGE_SHIFT;
557 if (!req->pasid_present) {
558 pr_err("%s: Page request without PASID: %08llx %08llx\n",
559 iommu->name, ((unsigned long long *)req)[0],
560 ((unsigned long long *)req)[1]);
561 goto bad_req;
562 }
563
564 if (!svm || svm->pasid != req->pasid) {
565 rcu_read_lock();
566 svm = idr_find(&iommu->pasid_idr, req->pasid);
567 /* It *can't* go away, because the driver is not permitted
568 * to unbind the mm while any page faults are outstanding.
569 * So we only need RCU to protect the internal idr code. */
570 rcu_read_unlock();
571
572 if (!svm) {
573 pr_err("%s: Page request for invalid PASID %d: %08llx %08llx\n",
574 iommu->name, req->pasid, ((unsigned long long *)req)[0],
575 ((unsigned long long *)req)[1]);
576 goto no_pasid;
577 }
578 }
579
580 result = QI_RESP_INVALID;
581 /* Since we're using init_mm.pgd directly, we should never take
582 * any faults on kernel addresses. */
583 if (!svm->mm)
584 goto bad_req;
585 /* If the mm is already defunct, don't handle faults. */
586 if (!atomic_inc_not_zero(&svm->mm->mm_users))
587 goto bad_req;
588 down_read(&svm->mm->mmap_sem);
589 vma = find_extend_vma(svm->mm, address);
590 if (!vma || address < vma->vm_start)
591 goto invalid;
592
593 if (access_error(vma, req))
594 goto invalid;
595
596 ret = handle_mm_fault(vma, address,
597 req->wr_req ? FAULT_FLAG_WRITE : 0);
598 if (ret & VM_FAULT_ERROR)
599 goto invalid;
600
601 result = QI_RESP_SUCCESS;
602 invalid:
603 up_read(&svm->mm->mmap_sem);
604 mmput(svm->mm);
605 bad_req:
606 /* Accounting for major/minor faults? */
607 rcu_read_lock();
608 list_for_each_entry_rcu(sdev, &svm->devs, list) {
609 if (sdev->sid == PCI_DEVID(req->bus, req->devfn))
610 break;
611 }
612 /* Other devices can go away, but the drivers are not permitted
613 * to unbind while any page faults might be in flight. So it's
614 * OK to drop the 'lock' here now we have it. */
615 rcu_read_unlock();
616
617 if (WARN_ON(&sdev->list == &svm->devs))
618 sdev = NULL;
619
620 if (sdev && sdev->ops && sdev->ops->fault_cb) {
621 int rwxp = (req->rd_req << 3) | (req->wr_req << 2) |
622 (req->exe_req << 1) | (req->priv_req);
623 sdev->ops->fault_cb(sdev->dev, req->pasid, req->addr, req->private, rwxp, result);
624 }
625 /* We get here in the error case where the PASID lookup failed,
626 and these can be NULL. Do not use them below this point! */
627 sdev = NULL;
628 svm = NULL;
629 no_pasid:
630 if (req->lpig) {
631 /* Page Group Response */
632 resp.low = QI_PGRP_PASID(req->pasid) |
633 QI_PGRP_DID((req->bus << 8) | req->devfn) |
634 QI_PGRP_PASID_P(req->pasid_present) |
635 QI_PGRP_RESP_TYPE;
636 resp.high = QI_PGRP_IDX(req->prg_index) |
637 QI_PGRP_PRIV(req->private) | QI_PGRP_RESP_CODE(result);
638
639 qi_submit_sync(&resp, iommu);
640 } else if (req->srr) {
641 /* Page Stream Response */
642 resp.low = QI_PSTRM_IDX(req->prg_index) |
643 QI_PSTRM_PRIV(req->private) | QI_PSTRM_BUS(req->bus) |
644 QI_PSTRM_PASID(req->pasid) | QI_PSTRM_RESP_TYPE;
645 resp.high = QI_PSTRM_ADDR(address) | QI_PSTRM_DEVFN(req->devfn) |
646 QI_PSTRM_RESP_CODE(result);
647
648 qi_submit_sync(&resp, iommu);
649 }
650
651 head = (head + sizeof(*req)) & PRQ_RING_MASK;
652 }
653
654 dmar_writeq(iommu->reg + DMAR_PQH_REG, tail);
655
656 return IRQ_RETVAL(handled);
657 }
658