1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright © 2006-2014 Intel Corporation.
4 *
5 * Authors: David Woodhouse <dwmw2@infradead.org>,
6 * Ashok Raj <ashok.raj@intel.com>,
7 * Shaohua Li <shaohua.li@intel.com>,
8 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
9 * Fenghua Yu <fenghua.yu@intel.com>
10 * Joerg Roedel <jroedel@suse.de>
11 */
12
13 #define pr_fmt(fmt) "DMAR: " fmt
14 #define dev_fmt(fmt) pr_fmt(fmt)
15
16 #include <linux/init.h>
17 #include <linux/bitmap.h>
18 #include <linux/debugfs.h>
19 #include <linux/export.h>
20 #include <linux/slab.h>
21 #include <linux/irq.h>
22 #include <linux/interrupt.h>
23 #include <linux/spinlock.h>
24 #include <linux/pci.h>
25 #include <linux/dmar.h>
26 #include <linux/dma-map-ops.h>
27 #include <linux/mempool.h>
28 #include <linux/memory.h>
29 #include <linux/cpu.h>
30 #include <linux/timer.h>
31 #include <linux/io.h>
32 #include <linux/iova.h>
33 #include <linux/iommu.h>
34 #include <linux/intel-iommu.h>
35 #include <linux/syscore_ops.h>
36 #include <linux/tboot.h>
37 #include <linux/dmi.h>
38 #include <linux/pci-ats.h>
39 #include <linux/memblock.h>
40 #include <linux/dma-map-ops.h>
41 #include <linux/dma-direct.h>
42 #include <linux/crash_dump.h>
43 #include <linux/numa.h>
44 #include <linux/swiotlb.h>
45 #include <asm/irq_remapping.h>
46 #include <asm/cacheflush.h>
47 #include <asm/iommu.h>
48 #include <trace/events/intel_iommu.h>
49
50 #include "../irq_remapping.h"
51 #include "pasid.h"
52
53 #define ROOT_SIZE VTD_PAGE_SIZE
54 #define CONTEXT_SIZE VTD_PAGE_SIZE
55
56 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
57 #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
58 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
59 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
60
61 #define IOAPIC_RANGE_START (0xfee00000)
62 #define IOAPIC_RANGE_END (0xfeefffff)
63 #define IOVA_START_ADDR (0x1000)
64
65 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
66
67 #define MAX_AGAW_WIDTH 64
68 #define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
69
70 #define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << ((gaw) - VTD_PAGE_SHIFT)) - 1)
71 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << (gaw)) - 1)
72
73 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
74 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
75 #define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
76 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
77 #define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
78
79 /* IO virtual address start page frame number */
80 #define IOVA_START_PFN (1)
81
82 #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
83
84 /* page table handling */
85 #define LEVEL_STRIDE (9)
86 #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
87
88 /*
89 * This bitmap is used to advertise the page sizes our hardware support
90 * to the IOMMU core, which will then use this information to split
91 * physically contiguous memory regions it is mapping into page sizes
92 * that we support.
93 *
94 * Traditionally the IOMMU core just handed us the mappings directly,
95 * after making sure the size is an order of a 4KiB page and that the
96 * mapping has natural alignment.
97 *
98 * To retain this behavior, we currently advertise that we support
99 * all page sizes that are an order of 4KiB.
100 *
101 * If at some point we'd like to utilize the IOMMU core's new behavior,
102 * we could change this to advertise the real page sizes we support.
103 */
104 #define INTEL_IOMMU_PGSIZES (~0xFFFUL)
105
agaw_to_level(int agaw)106 static inline int agaw_to_level(int agaw)
107 {
108 return agaw + 2;
109 }
110
agaw_to_width(int agaw)111 static inline int agaw_to_width(int agaw)
112 {
113 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
114 }
115
width_to_agaw(int width)116 static inline int width_to_agaw(int width)
117 {
118 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
119 }
120
level_to_offset_bits(int level)121 static inline unsigned int level_to_offset_bits(int level)
122 {
123 return (level - 1) * LEVEL_STRIDE;
124 }
125
pfn_level_offset(u64 pfn,int level)126 static inline int pfn_level_offset(u64 pfn, int level)
127 {
128 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
129 }
130
level_mask(int level)131 static inline u64 level_mask(int level)
132 {
133 return -1ULL << level_to_offset_bits(level);
134 }
135
level_size(int level)136 static inline u64 level_size(int level)
137 {
138 return 1ULL << level_to_offset_bits(level);
139 }
140
align_to_level(u64 pfn,int level)141 static inline u64 align_to_level(u64 pfn, int level)
142 {
143 return (pfn + level_size(level) - 1) & level_mask(level);
144 }
145
lvl_to_nr_pages(unsigned int lvl)146 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
147 {
148 return 1UL << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
149 }
150
151 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
152 are never going to work. */
dma_to_mm_pfn(unsigned long dma_pfn)153 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
154 {
155 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
156 }
157
mm_to_dma_pfn(unsigned long mm_pfn)158 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
159 {
160 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
161 }
page_to_dma_pfn(struct page * pg)162 static inline unsigned long page_to_dma_pfn(struct page *pg)
163 {
164 return mm_to_dma_pfn(page_to_pfn(pg));
165 }
virt_to_dma_pfn(void * p)166 static inline unsigned long virt_to_dma_pfn(void *p)
167 {
168 return page_to_dma_pfn(virt_to_page(p));
169 }
170
171 /* global iommu list, set NULL for ignored DMAR units */
172 static struct intel_iommu **g_iommus;
173
174 static void __init check_tylersburg_isoch(void);
175 static int rwbf_quirk;
176
177 /*
178 * set to 1 to panic kernel if can't successfully enable VT-d
179 * (used when kernel is launched w/ TXT)
180 */
181 static int force_on = 0;
182 static int intel_iommu_tboot_noforce;
183 static int no_platform_optin;
184
185 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
186
187 /*
188 * Take a root_entry and return the Lower Context Table Pointer (LCTP)
189 * if marked present.
190 */
root_entry_lctp(struct root_entry * re)191 static phys_addr_t root_entry_lctp(struct root_entry *re)
192 {
193 if (!(re->lo & 1))
194 return 0;
195
196 return re->lo & VTD_PAGE_MASK;
197 }
198
199 /*
200 * Take a root_entry and return the Upper Context Table Pointer (UCTP)
201 * if marked present.
202 */
root_entry_uctp(struct root_entry * re)203 static phys_addr_t root_entry_uctp(struct root_entry *re)
204 {
205 if (!(re->hi & 1))
206 return 0;
207
208 return re->hi & VTD_PAGE_MASK;
209 }
210
context_clear_pasid_enable(struct context_entry * context)211 static inline void context_clear_pasid_enable(struct context_entry *context)
212 {
213 context->lo &= ~(1ULL << 11);
214 }
215
context_pasid_enabled(struct context_entry * context)216 static inline bool context_pasid_enabled(struct context_entry *context)
217 {
218 return !!(context->lo & (1ULL << 11));
219 }
220
context_set_copied(struct context_entry * context)221 static inline void context_set_copied(struct context_entry *context)
222 {
223 context->hi |= (1ull << 3);
224 }
225
context_copied(struct context_entry * context)226 static inline bool context_copied(struct context_entry *context)
227 {
228 return !!(context->hi & (1ULL << 3));
229 }
230
__context_present(struct context_entry * context)231 static inline bool __context_present(struct context_entry *context)
232 {
233 return (context->lo & 1);
234 }
235
context_present(struct context_entry * context)236 bool context_present(struct context_entry *context)
237 {
238 return context_pasid_enabled(context) ?
239 __context_present(context) :
240 __context_present(context) && !context_copied(context);
241 }
242
context_set_present(struct context_entry * context)243 static inline void context_set_present(struct context_entry *context)
244 {
245 context->lo |= 1;
246 }
247
context_set_fault_enable(struct context_entry * context)248 static inline void context_set_fault_enable(struct context_entry *context)
249 {
250 context->lo &= (((u64)-1) << 2) | 1;
251 }
252
context_set_translation_type(struct context_entry * context,unsigned long value)253 static inline void context_set_translation_type(struct context_entry *context,
254 unsigned long value)
255 {
256 context->lo &= (((u64)-1) << 4) | 3;
257 context->lo |= (value & 3) << 2;
258 }
259
context_set_address_root(struct context_entry * context,unsigned long value)260 static inline void context_set_address_root(struct context_entry *context,
261 unsigned long value)
262 {
263 context->lo &= ~VTD_PAGE_MASK;
264 context->lo |= value & VTD_PAGE_MASK;
265 }
266
context_set_address_width(struct context_entry * context,unsigned long value)267 static inline void context_set_address_width(struct context_entry *context,
268 unsigned long value)
269 {
270 context->hi |= value & 7;
271 }
272
context_set_domain_id(struct context_entry * context,unsigned long value)273 static inline void context_set_domain_id(struct context_entry *context,
274 unsigned long value)
275 {
276 context->hi |= (value & ((1 << 16) - 1)) << 8;
277 }
278
context_domain_id(struct context_entry * c)279 static inline int context_domain_id(struct context_entry *c)
280 {
281 return((c->hi >> 8) & 0xffff);
282 }
283
context_clear_entry(struct context_entry * context)284 static inline void context_clear_entry(struct context_entry *context)
285 {
286 context->lo = 0;
287 context->hi = 0;
288 }
289
290 /*
291 * This domain is a statically identity mapping domain.
292 * 1. This domain creats a static 1:1 mapping to all usable memory.
293 * 2. It maps to each iommu if successful.
294 * 3. Each iommu mapps to this domain if successful.
295 */
296 static struct dmar_domain *si_domain;
297 static int hw_pass_through = 1;
298
299 #define for_each_domain_iommu(idx, domain) \
300 for (idx = 0; idx < g_num_of_iommus; idx++) \
301 if (domain->iommu_refcnt[idx])
302
303 struct dmar_rmrr_unit {
304 struct list_head list; /* list of rmrr units */
305 struct acpi_dmar_header *hdr; /* ACPI header */
306 u64 base_address; /* reserved base address*/
307 u64 end_address; /* reserved end address */
308 struct dmar_dev_scope *devices; /* target devices */
309 int devices_cnt; /* target device count */
310 };
311
312 struct dmar_atsr_unit {
313 struct list_head list; /* list of ATSR units */
314 struct acpi_dmar_header *hdr; /* ACPI header */
315 struct dmar_dev_scope *devices; /* target devices */
316 int devices_cnt; /* target device count */
317 u8 include_all:1; /* include all ports */
318 };
319
320 static LIST_HEAD(dmar_atsr_units);
321 static LIST_HEAD(dmar_rmrr_units);
322
323 #define for_each_rmrr_units(rmrr) \
324 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
325
326 /* bitmap for indexing intel_iommus */
327 static int g_num_of_iommus;
328
329 static void domain_exit(struct dmar_domain *domain);
330 static void domain_remove_dev_info(struct dmar_domain *domain);
331 static void dmar_remove_one_dev_info(struct device *dev);
332 static void __dmar_remove_one_dev_info(struct device_domain_info *info);
333 static int intel_iommu_attach_device(struct iommu_domain *domain,
334 struct device *dev);
335 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
336 dma_addr_t iova);
337
338 #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
339 int dmar_disabled = 0;
340 #else
341 int dmar_disabled = 1;
342 #endif /* CONFIG_INTEL_IOMMU_DEFAULT_ON */
343
344 #ifdef CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON
345 int intel_iommu_sm = 1;
346 #else
347 int intel_iommu_sm;
348 #endif /* CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON */
349
350 int intel_iommu_enabled = 0;
351 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
352
353 static int dmar_map_gfx = 1;
354 static int dmar_forcedac;
355 static int intel_iommu_strict;
356 static int intel_iommu_superpage = 1;
357 static int iommu_identity_mapping;
358 static int intel_no_bounce;
359 static int iommu_skip_te_disable;
360
361 #define IDENTMAP_GFX 2
362 #define IDENTMAP_AZALIA 4
363
364 int intel_iommu_gfx_mapped;
365 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
366
367 #define DEFER_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-2))
get_domain_info(struct device * dev)368 struct device_domain_info *get_domain_info(struct device *dev)
369 {
370 struct device_domain_info *info;
371
372 if (!dev)
373 return NULL;
374
375 info = dev_iommu_priv_get(dev);
376 if (unlikely(info == DEFER_DEVICE_DOMAIN_INFO))
377 return NULL;
378
379 return info;
380 }
381
382 DEFINE_SPINLOCK(device_domain_lock);
383 static LIST_HEAD(device_domain_list);
384
385 #define device_needs_bounce(d) (!intel_no_bounce && dev_is_pci(d) && \
386 to_pci_dev(d)->untrusted)
387
388 /*
389 * Iterate over elements in device_domain_list and call the specified
390 * callback @fn against each element.
391 */
for_each_device_domain(int (* fn)(struct device_domain_info * info,void * data),void * data)392 int for_each_device_domain(int (*fn)(struct device_domain_info *info,
393 void *data), void *data)
394 {
395 int ret = 0;
396 unsigned long flags;
397 struct device_domain_info *info;
398
399 spin_lock_irqsave(&device_domain_lock, flags);
400 list_for_each_entry(info, &device_domain_list, global) {
401 ret = fn(info, data);
402 if (ret) {
403 spin_unlock_irqrestore(&device_domain_lock, flags);
404 return ret;
405 }
406 }
407 spin_unlock_irqrestore(&device_domain_lock, flags);
408
409 return 0;
410 }
411
412 const struct iommu_ops intel_iommu_ops;
413
translation_pre_enabled(struct intel_iommu * iommu)414 static bool translation_pre_enabled(struct intel_iommu *iommu)
415 {
416 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
417 }
418
clear_translation_pre_enabled(struct intel_iommu * iommu)419 static void clear_translation_pre_enabled(struct intel_iommu *iommu)
420 {
421 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
422 }
423
init_translation_status(struct intel_iommu * iommu)424 static void init_translation_status(struct intel_iommu *iommu)
425 {
426 u32 gsts;
427
428 gsts = readl(iommu->reg + DMAR_GSTS_REG);
429 if (gsts & DMA_GSTS_TES)
430 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
431 }
432
intel_iommu_setup(char * str)433 static int __init intel_iommu_setup(char *str)
434 {
435 if (!str)
436 return -EINVAL;
437 while (*str) {
438 if (!strncmp(str, "on", 2)) {
439 dmar_disabled = 0;
440 pr_info("IOMMU enabled\n");
441 } else if (!strncmp(str, "off", 3)) {
442 dmar_disabled = 1;
443 no_platform_optin = 1;
444 pr_info("IOMMU disabled\n");
445 } else if (!strncmp(str, "igfx_off", 8)) {
446 dmar_map_gfx = 0;
447 pr_info("Disable GFX device mapping\n");
448 } else if (!strncmp(str, "forcedac", 8)) {
449 pr_info("Forcing DAC for PCI devices\n");
450 dmar_forcedac = 1;
451 } else if (!strncmp(str, "strict", 6)) {
452 pr_info("Disable batched IOTLB flush\n");
453 intel_iommu_strict = 1;
454 } else if (!strncmp(str, "sp_off", 6)) {
455 pr_info("Disable supported super page\n");
456 intel_iommu_superpage = 0;
457 } else if (!strncmp(str, "sm_on", 5)) {
458 pr_info("Intel-IOMMU: scalable mode supported\n");
459 intel_iommu_sm = 1;
460 } else if (!strncmp(str, "tboot_noforce", 13)) {
461 pr_info("Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
462 intel_iommu_tboot_noforce = 1;
463 } else if (!strncmp(str, "nobounce", 8)) {
464 pr_info("Intel-IOMMU: No bounce buffer. This could expose security risks of DMA attacks\n");
465 intel_no_bounce = 1;
466 }
467
468 str += strcspn(str, ",");
469 while (*str == ',')
470 str++;
471 }
472 return 0;
473 }
474 __setup("intel_iommu=", intel_iommu_setup);
475
476 static struct kmem_cache *iommu_domain_cache;
477 static struct kmem_cache *iommu_devinfo_cache;
478
get_iommu_domain(struct intel_iommu * iommu,u16 did)479 static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
480 {
481 struct dmar_domain **domains;
482 int idx = did >> 8;
483
484 domains = iommu->domains[idx];
485 if (!domains)
486 return NULL;
487
488 return domains[did & 0xff];
489 }
490
set_iommu_domain(struct intel_iommu * iommu,u16 did,struct dmar_domain * domain)491 static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
492 struct dmar_domain *domain)
493 {
494 struct dmar_domain **domains;
495 int idx = did >> 8;
496
497 if (!iommu->domains[idx]) {
498 size_t size = 256 * sizeof(struct dmar_domain *);
499 iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
500 }
501
502 domains = iommu->domains[idx];
503 if (WARN_ON(!domains))
504 return;
505 else
506 domains[did & 0xff] = domain;
507 }
508
alloc_pgtable_page(int node)509 void *alloc_pgtable_page(int node)
510 {
511 struct page *page;
512 void *vaddr = NULL;
513
514 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
515 if (page)
516 vaddr = page_address(page);
517 return vaddr;
518 }
519
free_pgtable_page(void * vaddr)520 void free_pgtable_page(void *vaddr)
521 {
522 free_page((unsigned long)vaddr);
523 }
524
alloc_domain_mem(void)525 static inline void *alloc_domain_mem(void)
526 {
527 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
528 }
529
free_domain_mem(void * vaddr)530 static void free_domain_mem(void *vaddr)
531 {
532 kmem_cache_free(iommu_domain_cache, vaddr);
533 }
534
alloc_devinfo_mem(void)535 static inline void * alloc_devinfo_mem(void)
536 {
537 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
538 }
539
free_devinfo_mem(void * vaddr)540 static inline void free_devinfo_mem(void *vaddr)
541 {
542 kmem_cache_free(iommu_devinfo_cache, vaddr);
543 }
544
domain_type_is_si(struct dmar_domain * domain)545 static inline int domain_type_is_si(struct dmar_domain *domain)
546 {
547 return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
548 }
549
domain_use_first_level(struct dmar_domain * domain)550 static inline bool domain_use_first_level(struct dmar_domain *domain)
551 {
552 return domain->flags & DOMAIN_FLAG_USE_FIRST_LEVEL;
553 }
554
domain_pfn_supported(struct dmar_domain * domain,unsigned long pfn)555 static inline int domain_pfn_supported(struct dmar_domain *domain,
556 unsigned long pfn)
557 {
558 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
559
560 return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
561 }
562
__iommu_calculate_agaw(struct intel_iommu * iommu,int max_gaw)563 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
564 {
565 unsigned long sagaw;
566 int agaw = -1;
567
568 sagaw = cap_sagaw(iommu->cap);
569 for (agaw = width_to_agaw(max_gaw);
570 agaw >= 0; agaw--) {
571 if (test_bit(agaw, &sagaw))
572 break;
573 }
574
575 return agaw;
576 }
577
578 /*
579 * Calculate max SAGAW for each iommu.
580 */
iommu_calculate_max_sagaw(struct intel_iommu * iommu)581 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
582 {
583 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
584 }
585
586 /*
587 * calculate agaw for each iommu.
588 * "SAGAW" may be different across iommus, use a default agaw, and
589 * get a supported less agaw for iommus that don't support the default agaw.
590 */
iommu_calculate_agaw(struct intel_iommu * iommu)591 int iommu_calculate_agaw(struct intel_iommu *iommu)
592 {
593 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
594 }
595
596 /* This functionin only returns single iommu in a domain */
domain_get_iommu(struct dmar_domain * domain)597 struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
598 {
599 int iommu_id;
600
601 /* si_domain and vm domain should not get here. */
602 if (WARN_ON(domain->domain.type != IOMMU_DOMAIN_DMA))
603 return NULL;
604
605 for_each_domain_iommu(iommu_id, domain)
606 break;
607
608 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
609 return NULL;
610
611 return g_iommus[iommu_id];
612 }
613
iommu_paging_structure_coherency(struct intel_iommu * iommu)614 static inline bool iommu_paging_structure_coherency(struct intel_iommu *iommu)
615 {
616 return sm_supported(iommu) ?
617 ecap_smpwc(iommu->ecap) : ecap_coherent(iommu->ecap);
618 }
619
domain_update_iommu_coherency(struct dmar_domain * domain)620 static void domain_update_iommu_coherency(struct dmar_domain *domain)
621 {
622 struct dmar_drhd_unit *drhd;
623 struct intel_iommu *iommu;
624 bool found = false;
625 int i;
626
627 domain->iommu_coherency = 1;
628
629 for_each_domain_iommu(i, domain) {
630 found = true;
631 if (!iommu_paging_structure_coherency(g_iommus[i])) {
632 domain->iommu_coherency = 0;
633 break;
634 }
635 }
636 if (found)
637 return;
638
639 /* No hardware attached; use lowest common denominator */
640 rcu_read_lock();
641 for_each_active_iommu(iommu, drhd) {
642 if (!iommu_paging_structure_coherency(iommu)) {
643 domain->iommu_coherency = 0;
644 break;
645 }
646 }
647 rcu_read_unlock();
648 }
649
domain_update_iommu_snooping(struct intel_iommu * skip)650 static int domain_update_iommu_snooping(struct intel_iommu *skip)
651 {
652 struct dmar_drhd_unit *drhd;
653 struct intel_iommu *iommu;
654 int ret = 1;
655
656 rcu_read_lock();
657 for_each_active_iommu(iommu, drhd) {
658 if (iommu != skip) {
659 /*
660 * If the hardware is operating in the scalable mode,
661 * the snooping control is always supported since we
662 * always set PASID-table-entry.PGSNP bit if the domain
663 * is managed outside (UNMANAGED).
664 */
665 if (!sm_supported(iommu) &&
666 !ecap_sc_support(iommu->ecap)) {
667 ret = 0;
668 break;
669 }
670 }
671 }
672 rcu_read_unlock();
673
674 return ret;
675 }
676
domain_update_iommu_superpage(struct dmar_domain * domain,struct intel_iommu * skip)677 static int domain_update_iommu_superpage(struct dmar_domain *domain,
678 struct intel_iommu *skip)
679 {
680 struct dmar_drhd_unit *drhd;
681 struct intel_iommu *iommu;
682 int mask = 0x3;
683
684 if (!intel_iommu_superpage) {
685 return 0;
686 }
687
688 /* set iommu_superpage to the smallest common denominator */
689 rcu_read_lock();
690 for_each_active_iommu(iommu, drhd) {
691 if (iommu != skip) {
692 if (domain && domain_use_first_level(domain)) {
693 if (!cap_fl1gp_support(iommu->cap))
694 mask = 0x1;
695 } else {
696 mask &= cap_super_page_val(iommu->cap);
697 }
698
699 if (!mask)
700 break;
701 }
702 }
703 rcu_read_unlock();
704
705 return fls(mask);
706 }
707
domain_update_device_node(struct dmar_domain * domain)708 static int domain_update_device_node(struct dmar_domain *domain)
709 {
710 struct device_domain_info *info;
711 int nid = NUMA_NO_NODE;
712
713 assert_spin_locked(&device_domain_lock);
714
715 if (list_empty(&domain->devices))
716 return NUMA_NO_NODE;
717
718 list_for_each_entry(info, &domain->devices, link) {
719 if (!info->dev)
720 continue;
721
722 /*
723 * There could possibly be multiple device numa nodes as devices
724 * within the same domain may sit behind different IOMMUs. There
725 * isn't perfect answer in such situation, so we select first
726 * come first served policy.
727 */
728 nid = dev_to_node(info->dev);
729 if (nid != NUMA_NO_NODE)
730 break;
731 }
732
733 return nid;
734 }
735
736 /* Some capabilities may be different across iommus */
domain_update_iommu_cap(struct dmar_domain * domain)737 static void domain_update_iommu_cap(struct dmar_domain *domain)
738 {
739 domain_update_iommu_coherency(domain);
740 domain->iommu_snooping = domain_update_iommu_snooping(NULL);
741 domain->iommu_superpage = domain_update_iommu_superpage(domain, NULL);
742
743 /*
744 * If RHSA is missing, we should default to the device numa domain
745 * as fall back.
746 */
747 if (domain->nid == NUMA_NO_NODE)
748 domain->nid = domain_update_device_node(domain);
749
750 /*
751 * First-level translation restricts the input-address to a
752 * canonical address (i.e., address bits 63:N have the same
753 * value as address bit [N-1], where N is 48-bits with 4-level
754 * paging and 57-bits with 5-level paging). Hence, skip bit
755 * [N-1].
756 */
757 if (domain_use_first_level(domain))
758 domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw - 1);
759 else
760 domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw);
761 }
762
iommu_context_addr(struct intel_iommu * iommu,u8 bus,u8 devfn,int alloc)763 struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
764 u8 devfn, int alloc)
765 {
766 struct root_entry *root = &iommu->root_entry[bus];
767 struct context_entry *context;
768 u64 *entry;
769
770 entry = &root->lo;
771 if (sm_supported(iommu)) {
772 if (devfn >= 0x80) {
773 devfn -= 0x80;
774 entry = &root->hi;
775 }
776 devfn *= 2;
777 }
778 if (*entry & 1)
779 context = phys_to_virt(*entry & VTD_PAGE_MASK);
780 else {
781 unsigned long phy_addr;
782 if (!alloc)
783 return NULL;
784
785 context = alloc_pgtable_page(iommu->node);
786 if (!context)
787 return NULL;
788
789 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
790 phy_addr = virt_to_phys((void *)context);
791 *entry = phy_addr | 1;
792 __iommu_flush_cache(iommu, entry, sizeof(*entry));
793 }
794 return &context[devfn];
795 }
796
attach_deferred(struct device * dev)797 static bool attach_deferred(struct device *dev)
798 {
799 return dev_iommu_priv_get(dev) == DEFER_DEVICE_DOMAIN_INFO;
800 }
801
802 /**
803 * is_downstream_to_pci_bridge - test if a device belongs to the PCI
804 * sub-hierarchy of a candidate PCI-PCI bridge
805 * @dev: candidate PCI device belonging to @bridge PCI sub-hierarchy
806 * @bridge: the candidate PCI-PCI bridge
807 *
808 * Return: true if @dev belongs to @bridge PCI sub-hierarchy, else false.
809 */
810 static bool
is_downstream_to_pci_bridge(struct device * dev,struct device * bridge)811 is_downstream_to_pci_bridge(struct device *dev, struct device *bridge)
812 {
813 struct pci_dev *pdev, *pbridge;
814
815 if (!dev_is_pci(dev) || !dev_is_pci(bridge))
816 return false;
817
818 pdev = to_pci_dev(dev);
819 pbridge = to_pci_dev(bridge);
820
821 if (pbridge->subordinate &&
822 pbridge->subordinate->number <= pdev->bus->number &&
823 pbridge->subordinate->busn_res.end >= pdev->bus->number)
824 return true;
825
826 return false;
827 }
828
quirk_ioat_snb_local_iommu(struct pci_dev * pdev)829 static bool quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
830 {
831 struct dmar_drhd_unit *drhd;
832 u32 vtbar;
833 int rc;
834
835 /* We know that this device on this chipset has its own IOMMU.
836 * If we find it under a different IOMMU, then the BIOS is lying
837 * to us. Hope that the IOMMU for this device is actually
838 * disabled, and it needs no translation...
839 */
840 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
841 if (rc) {
842 /* "can't" happen */
843 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
844 return false;
845 }
846 vtbar &= 0xffff0000;
847
848 /* we know that the this iommu should be at offset 0xa000 from vtbar */
849 drhd = dmar_find_matched_drhd_unit(pdev);
850 if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) {
851 pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n");
852 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
853 return true;
854 }
855
856 return false;
857 }
858
iommu_is_dummy(struct intel_iommu * iommu,struct device * dev)859 static bool iommu_is_dummy(struct intel_iommu *iommu, struct device *dev)
860 {
861 if (!iommu || iommu->drhd->ignored)
862 return true;
863
864 if (dev_is_pci(dev)) {
865 struct pci_dev *pdev = to_pci_dev(dev);
866
867 if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
868 pdev->device == PCI_DEVICE_ID_INTEL_IOAT_SNB &&
869 quirk_ioat_snb_local_iommu(pdev))
870 return true;
871 }
872
873 return false;
874 }
875
device_to_iommu(struct device * dev,u8 * bus,u8 * devfn)876 struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
877 {
878 struct dmar_drhd_unit *drhd = NULL;
879 struct pci_dev *pdev = NULL;
880 struct intel_iommu *iommu;
881 struct device *tmp;
882 u16 segment = 0;
883 int i;
884
885 if (!dev)
886 return NULL;
887
888 if (dev_is_pci(dev)) {
889 struct pci_dev *pf_pdev;
890
891 pdev = pci_real_dma_dev(to_pci_dev(dev));
892
893 /* VFs aren't listed in scope tables; we need to look up
894 * the PF instead to find the IOMMU. */
895 pf_pdev = pci_physfn(pdev);
896 dev = &pf_pdev->dev;
897 segment = pci_domain_nr(pdev->bus);
898 } else if (has_acpi_companion(dev))
899 dev = &ACPI_COMPANION(dev)->dev;
900
901 rcu_read_lock();
902 for_each_iommu(iommu, drhd) {
903 if (pdev && segment != drhd->segment)
904 continue;
905
906 for_each_active_dev_scope(drhd->devices,
907 drhd->devices_cnt, i, tmp) {
908 if (tmp == dev) {
909 /* For a VF use its original BDF# not that of the PF
910 * which we used for the IOMMU lookup. Strictly speaking
911 * we could do this for all PCI devices; we only need to
912 * get the BDF# from the scope table for ACPI matches. */
913 if (pdev && pdev->is_virtfn)
914 goto got_pdev;
915
916 if (bus && devfn) {
917 *bus = drhd->devices[i].bus;
918 *devfn = drhd->devices[i].devfn;
919 }
920 goto out;
921 }
922
923 if (is_downstream_to_pci_bridge(dev, tmp))
924 goto got_pdev;
925 }
926
927 if (pdev && drhd->include_all) {
928 got_pdev:
929 if (bus && devfn) {
930 *bus = pdev->bus->number;
931 *devfn = pdev->devfn;
932 }
933 goto out;
934 }
935 }
936 iommu = NULL;
937 out:
938 if (iommu_is_dummy(iommu, dev))
939 iommu = NULL;
940
941 rcu_read_unlock();
942
943 return iommu;
944 }
945
domain_flush_cache(struct dmar_domain * domain,void * addr,int size)946 static void domain_flush_cache(struct dmar_domain *domain,
947 void *addr, int size)
948 {
949 if (!domain->iommu_coherency)
950 clflush_cache_range(addr, size);
951 }
952
device_context_mapped(struct intel_iommu * iommu,u8 bus,u8 devfn)953 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
954 {
955 struct context_entry *context;
956 int ret = 0;
957 unsigned long flags;
958
959 spin_lock_irqsave(&iommu->lock, flags);
960 context = iommu_context_addr(iommu, bus, devfn, 0);
961 if (context)
962 ret = context_present(context);
963 spin_unlock_irqrestore(&iommu->lock, flags);
964 return ret;
965 }
966
free_context_table(struct intel_iommu * iommu)967 static void free_context_table(struct intel_iommu *iommu)
968 {
969 int i;
970 unsigned long flags;
971 struct context_entry *context;
972
973 spin_lock_irqsave(&iommu->lock, flags);
974 if (!iommu->root_entry) {
975 goto out;
976 }
977 for (i = 0; i < ROOT_ENTRY_NR; i++) {
978 context = iommu_context_addr(iommu, i, 0, 0);
979 if (context)
980 free_pgtable_page(context);
981
982 if (!sm_supported(iommu))
983 continue;
984
985 context = iommu_context_addr(iommu, i, 0x80, 0);
986 if (context)
987 free_pgtable_page(context);
988
989 }
990 free_pgtable_page(iommu->root_entry);
991 iommu->root_entry = NULL;
992 out:
993 spin_unlock_irqrestore(&iommu->lock, flags);
994 }
995
pfn_to_dma_pte(struct dmar_domain * domain,unsigned long pfn,int * target_level)996 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
997 unsigned long pfn, int *target_level)
998 {
999 struct dma_pte *parent, *pte;
1000 int level = agaw_to_level(domain->agaw);
1001 int offset;
1002
1003 BUG_ON(!domain->pgd);
1004
1005 if (!domain_pfn_supported(domain, pfn))
1006 /* Address beyond IOMMU's addressing capabilities. */
1007 return NULL;
1008
1009 parent = domain->pgd;
1010
1011 while (1) {
1012 void *tmp_page;
1013
1014 offset = pfn_level_offset(pfn, level);
1015 pte = &parent[offset];
1016 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
1017 break;
1018 if (level == *target_level)
1019 break;
1020
1021 if (!dma_pte_present(pte)) {
1022 uint64_t pteval;
1023
1024 tmp_page = alloc_pgtable_page(domain->nid);
1025
1026 if (!tmp_page)
1027 return NULL;
1028
1029 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
1030 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
1031 if (domain_use_first_level(domain)) {
1032 pteval |= DMA_FL_PTE_XD | DMA_FL_PTE_US;
1033 if (domain->domain.type == IOMMU_DOMAIN_DMA)
1034 pteval |= DMA_FL_PTE_ACCESS;
1035 }
1036 if (cmpxchg64(&pte->val, 0ULL, pteval))
1037 /* Someone else set it while we were thinking; use theirs. */
1038 free_pgtable_page(tmp_page);
1039 else
1040 domain_flush_cache(domain, pte, sizeof(*pte));
1041 }
1042 if (level == 1)
1043 break;
1044
1045 parent = phys_to_virt(dma_pte_addr(pte));
1046 level--;
1047 }
1048
1049 if (!*target_level)
1050 *target_level = level;
1051
1052 return pte;
1053 }
1054
1055 /* return address's pte at specific level */
dma_pfn_level_pte(struct dmar_domain * domain,unsigned long pfn,int level,int * large_page)1056 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
1057 unsigned long pfn,
1058 int level, int *large_page)
1059 {
1060 struct dma_pte *parent, *pte;
1061 int total = agaw_to_level(domain->agaw);
1062 int offset;
1063
1064 parent = domain->pgd;
1065 while (level <= total) {
1066 offset = pfn_level_offset(pfn, total);
1067 pte = &parent[offset];
1068 if (level == total)
1069 return pte;
1070
1071 if (!dma_pte_present(pte)) {
1072 *large_page = total;
1073 break;
1074 }
1075
1076 if (dma_pte_superpage(pte)) {
1077 *large_page = total;
1078 return pte;
1079 }
1080
1081 parent = phys_to_virt(dma_pte_addr(pte));
1082 total--;
1083 }
1084 return NULL;
1085 }
1086
1087 /* clear last level pte, a tlb flush should be followed */
dma_pte_clear_range(struct dmar_domain * domain,unsigned long start_pfn,unsigned long last_pfn)1088 static void dma_pte_clear_range(struct dmar_domain *domain,
1089 unsigned long start_pfn,
1090 unsigned long last_pfn)
1091 {
1092 unsigned int large_page;
1093 struct dma_pte *first_pte, *pte;
1094
1095 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1096 BUG_ON(!domain_pfn_supported(domain, last_pfn));
1097 BUG_ON(start_pfn > last_pfn);
1098
1099 /* we don't need lock here; nobody else touches the iova range */
1100 do {
1101 large_page = 1;
1102 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
1103 if (!pte) {
1104 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
1105 continue;
1106 }
1107 do {
1108 dma_clear_pte(pte);
1109 start_pfn += lvl_to_nr_pages(large_page);
1110 pte++;
1111 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
1112
1113 domain_flush_cache(domain, first_pte,
1114 (void *)pte - (void *)first_pte);
1115
1116 } while (start_pfn && start_pfn <= last_pfn);
1117 }
1118
dma_pte_free_level(struct dmar_domain * domain,int level,int retain_level,struct dma_pte * pte,unsigned long pfn,unsigned long start_pfn,unsigned long last_pfn)1119 static void dma_pte_free_level(struct dmar_domain *domain, int level,
1120 int retain_level, struct dma_pte *pte,
1121 unsigned long pfn, unsigned long start_pfn,
1122 unsigned long last_pfn)
1123 {
1124 pfn = max(start_pfn, pfn);
1125 pte = &pte[pfn_level_offset(pfn, level)];
1126
1127 do {
1128 unsigned long level_pfn;
1129 struct dma_pte *level_pte;
1130
1131 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
1132 goto next;
1133
1134 level_pfn = pfn & level_mask(level);
1135 level_pte = phys_to_virt(dma_pte_addr(pte));
1136
1137 if (level > 2) {
1138 dma_pte_free_level(domain, level - 1, retain_level,
1139 level_pte, level_pfn, start_pfn,
1140 last_pfn);
1141 }
1142
1143 /*
1144 * Free the page table if we're below the level we want to
1145 * retain and the range covers the entire table.
1146 */
1147 if (level < retain_level && !(start_pfn > level_pfn ||
1148 last_pfn < level_pfn + level_size(level) - 1)) {
1149 dma_clear_pte(pte);
1150 domain_flush_cache(domain, pte, sizeof(*pte));
1151 free_pgtable_page(level_pte);
1152 }
1153 next:
1154 pfn += level_size(level);
1155 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1156 }
1157
1158 /*
1159 * clear last level (leaf) ptes and free page table pages below the
1160 * level we wish to keep intact.
1161 */
dma_pte_free_pagetable(struct dmar_domain * domain,unsigned long start_pfn,unsigned long last_pfn,int retain_level)1162 static void dma_pte_free_pagetable(struct dmar_domain *domain,
1163 unsigned long start_pfn,
1164 unsigned long last_pfn,
1165 int retain_level)
1166 {
1167 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1168 BUG_ON(!domain_pfn_supported(domain, last_pfn));
1169 BUG_ON(start_pfn > last_pfn);
1170
1171 dma_pte_clear_range(domain, start_pfn, last_pfn);
1172
1173 /* We don't need lock here; nobody else touches the iova range */
1174 dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level,
1175 domain->pgd, 0, start_pfn, last_pfn);
1176
1177 /* free pgd */
1178 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1179 free_pgtable_page(domain->pgd);
1180 domain->pgd = NULL;
1181 }
1182 }
1183
1184 /* When a page at a given level is being unlinked from its parent, we don't
1185 need to *modify* it at all. All we need to do is make a list of all the
1186 pages which can be freed just as soon as we've flushed the IOTLB and we
1187 know the hardware page-walk will no longer touch them.
1188 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1189 be freed. */
dma_pte_list_pagetables(struct dmar_domain * domain,int level,struct dma_pte * pte,struct page * freelist)1190 static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1191 int level, struct dma_pte *pte,
1192 struct page *freelist)
1193 {
1194 struct page *pg;
1195
1196 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1197 pg->freelist = freelist;
1198 freelist = pg;
1199
1200 if (level == 1)
1201 return freelist;
1202
1203 pte = page_address(pg);
1204 do {
1205 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1206 freelist = dma_pte_list_pagetables(domain, level - 1,
1207 pte, freelist);
1208 pte++;
1209 } while (!first_pte_in_page(pte));
1210
1211 return freelist;
1212 }
1213
dma_pte_clear_level(struct dmar_domain * domain,int level,struct dma_pte * pte,unsigned long pfn,unsigned long start_pfn,unsigned long last_pfn,struct page * freelist)1214 static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1215 struct dma_pte *pte, unsigned long pfn,
1216 unsigned long start_pfn,
1217 unsigned long last_pfn,
1218 struct page *freelist)
1219 {
1220 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1221
1222 pfn = max(start_pfn, pfn);
1223 pte = &pte[pfn_level_offset(pfn, level)];
1224
1225 do {
1226 unsigned long level_pfn;
1227
1228 if (!dma_pte_present(pte))
1229 goto next;
1230
1231 level_pfn = pfn & level_mask(level);
1232
1233 /* If range covers entire pagetable, free it */
1234 if (start_pfn <= level_pfn &&
1235 last_pfn >= level_pfn + level_size(level) - 1) {
1236 /* These suborbinate page tables are going away entirely. Don't
1237 bother to clear them; we're just going to *free* them. */
1238 if (level > 1 && !dma_pte_superpage(pte))
1239 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1240
1241 dma_clear_pte(pte);
1242 if (!first_pte)
1243 first_pte = pte;
1244 last_pte = pte;
1245 } else if (level > 1) {
1246 /* Recurse down into a level that isn't *entirely* obsolete */
1247 freelist = dma_pte_clear_level(domain, level - 1,
1248 phys_to_virt(dma_pte_addr(pte)),
1249 level_pfn, start_pfn, last_pfn,
1250 freelist);
1251 }
1252 next:
1253 pfn += level_size(level);
1254 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1255
1256 if (first_pte)
1257 domain_flush_cache(domain, first_pte,
1258 (void *)++last_pte - (void *)first_pte);
1259
1260 return freelist;
1261 }
1262
1263 /* We can't just free the pages because the IOMMU may still be walking
1264 the page tables, and may have cached the intermediate levels. The
1265 pages can only be freed after the IOTLB flush has been done. */
domain_unmap(struct dmar_domain * domain,unsigned long start_pfn,unsigned long last_pfn)1266 static struct page *domain_unmap(struct dmar_domain *domain,
1267 unsigned long start_pfn,
1268 unsigned long last_pfn)
1269 {
1270 struct page *freelist;
1271
1272 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1273 BUG_ON(!domain_pfn_supported(domain, last_pfn));
1274 BUG_ON(start_pfn > last_pfn);
1275
1276 /* we don't need lock here; nobody else touches the iova range */
1277 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1278 domain->pgd, 0, start_pfn, last_pfn, NULL);
1279
1280 /* free pgd */
1281 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1282 struct page *pgd_page = virt_to_page(domain->pgd);
1283 pgd_page->freelist = freelist;
1284 freelist = pgd_page;
1285
1286 domain->pgd = NULL;
1287 }
1288
1289 return freelist;
1290 }
1291
dma_free_pagelist(struct page * freelist)1292 static void dma_free_pagelist(struct page *freelist)
1293 {
1294 struct page *pg;
1295
1296 while ((pg = freelist)) {
1297 freelist = pg->freelist;
1298 free_pgtable_page(page_address(pg));
1299 }
1300 }
1301
iova_entry_free(unsigned long data)1302 static void iova_entry_free(unsigned long data)
1303 {
1304 struct page *freelist = (struct page *)data;
1305
1306 dma_free_pagelist(freelist);
1307 }
1308
1309 /* iommu handling */
iommu_alloc_root_entry(struct intel_iommu * iommu)1310 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1311 {
1312 struct root_entry *root;
1313 unsigned long flags;
1314
1315 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
1316 if (!root) {
1317 pr_err("Allocating root entry for %s failed\n",
1318 iommu->name);
1319 return -ENOMEM;
1320 }
1321
1322 __iommu_flush_cache(iommu, root, ROOT_SIZE);
1323
1324 spin_lock_irqsave(&iommu->lock, flags);
1325 iommu->root_entry = root;
1326 spin_unlock_irqrestore(&iommu->lock, flags);
1327
1328 return 0;
1329 }
1330
iommu_set_root_entry(struct intel_iommu * iommu)1331 static void iommu_set_root_entry(struct intel_iommu *iommu)
1332 {
1333 u64 addr;
1334 u32 sts;
1335 unsigned long flag;
1336
1337 addr = virt_to_phys(iommu->root_entry);
1338 if (sm_supported(iommu))
1339 addr |= DMA_RTADDR_SMT;
1340
1341 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1342 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
1343
1344 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1345
1346 /* Make sure hardware complete it */
1347 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1348 readl, (sts & DMA_GSTS_RTPS), sts);
1349
1350 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1351
1352 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
1353 if (sm_supported(iommu))
1354 qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0);
1355 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
1356 }
1357
iommu_flush_write_buffer(struct intel_iommu * iommu)1358 void iommu_flush_write_buffer(struct intel_iommu *iommu)
1359 {
1360 u32 val;
1361 unsigned long flag;
1362
1363 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1364 return;
1365
1366 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1367 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1368
1369 /* Make sure hardware complete it */
1370 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1371 readl, (!(val & DMA_GSTS_WBFS)), val);
1372
1373 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1374 }
1375
1376 /* return value determine if we need a write buffer flush */
__iommu_flush_context(struct intel_iommu * iommu,u16 did,u16 source_id,u8 function_mask,u64 type)1377 static void __iommu_flush_context(struct intel_iommu *iommu,
1378 u16 did, u16 source_id, u8 function_mask,
1379 u64 type)
1380 {
1381 u64 val = 0;
1382 unsigned long flag;
1383
1384 switch (type) {
1385 case DMA_CCMD_GLOBAL_INVL:
1386 val = DMA_CCMD_GLOBAL_INVL;
1387 break;
1388 case DMA_CCMD_DOMAIN_INVL:
1389 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1390 break;
1391 case DMA_CCMD_DEVICE_INVL:
1392 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1393 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1394 break;
1395 default:
1396 BUG();
1397 }
1398 val |= DMA_CCMD_ICC;
1399
1400 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1401 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1402
1403 /* Make sure hardware complete it */
1404 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1405 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1406
1407 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1408 }
1409
1410 /* return value determine if we need a write buffer flush */
__iommu_flush_iotlb(struct intel_iommu * iommu,u16 did,u64 addr,unsigned int size_order,u64 type)1411 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1412 u64 addr, unsigned int size_order, u64 type)
1413 {
1414 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1415 u64 val = 0, val_iva = 0;
1416 unsigned long flag;
1417
1418 switch (type) {
1419 case DMA_TLB_GLOBAL_FLUSH:
1420 /* global flush doesn't need set IVA_REG */
1421 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1422 break;
1423 case DMA_TLB_DSI_FLUSH:
1424 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1425 break;
1426 case DMA_TLB_PSI_FLUSH:
1427 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1428 /* IH bit is passed in as part of address */
1429 val_iva = size_order | addr;
1430 break;
1431 default:
1432 BUG();
1433 }
1434 /* Note: set drain read/write */
1435 #if 0
1436 /*
1437 * This is probably to be super secure.. Looks like we can
1438 * ignore it without any impact.
1439 */
1440 if (cap_read_drain(iommu->cap))
1441 val |= DMA_TLB_READ_DRAIN;
1442 #endif
1443 if (cap_write_drain(iommu->cap))
1444 val |= DMA_TLB_WRITE_DRAIN;
1445
1446 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1447 /* Note: Only uses first TLB reg currently */
1448 if (val_iva)
1449 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1450 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1451
1452 /* Make sure hardware complete it */
1453 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1454 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1455
1456 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1457
1458 /* check IOTLB invalidation granularity */
1459 if (DMA_TLB_IAIG(val) == 0)
1460 pr_err("Flush IOTLB failed\n");
1461 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1462 pr_debug("TLB flush request %Lx, actual %Lx\n",
1463 (unsigned long long)DMA_TLB_IIRG(type),
1464 (unsigned long long)DMA_TLB_IAIG(val));
1465 }
1466
1467 static struct device_domain_info *
iommu_support_dev_iotlb(struct dmar_domain * domain,struct intel_iommu * iommu,u8 bus,u8 devfn)1468 iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1469 u8 bus, u8 devfn)
1470 {
1471 struct device_domain_info *info;
1472
1473 assert_spin_locked(&device_domain_lock);
1474
1475 if (!iommu->qi)
1476 return NULL;
1477
1478 list_for_each_entry(info, &domain->devices, link)
1479 if (info->iommu == iommu && info->bus == bus &&
1480 info->devfn == devfn) {
1481 if (info->ats_supported && info->dev)
1482 return info;
1483 break;
1484 }
1485
1486 return NULL;
1487 }
1488
domain_update_iotlb(struct dmar_domain * domain)1489 static void domain_update_iotlb(struct dmar_domain *domain)
1490 {
1491 struct device_domain_info *info;
1492 bool has_iotlb_device = false;
1493
1494 assert_spin_locked(&device_domain_lock);
1495
1496 list_for_each_entry(info, &domain->devices, link) {
1497 struct pci_dev *pdev;
1498
1499 if (!info->dev || !dev_is_pci(info->dev))
1500 continue;
1501
1502 pdev = to_pci_dev(info->dev);
1503 if (pdev->ats_enabled) {
1504 has_iotlb_device = true;
1505 break;
1506 }
1507 }
1508
1509 domain->has_iotlb_device = has_iotlb_device;
1510 }
1511
iommu_enable_dev_iotlb(struct device_domain_info * info)1512 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1513 {
1514 struct pci_dev *pdev;
1515
1516 assert_spin_locked(&device_domain_lock);
1517
1518 if (!info || !dev_is_pci(info->dev))
1519 return;
1520
1521 pdev = to_pci_dev(info->dev);
1522 /* For IOMMU that supports device IOTLB throttling (DIT), we assign
1523 * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge
1524 * queue depth at PF level. If DIT is not set, PFSID will be treated as
1525 * reserved, which should be set to 0.
1526 */
1527 if (!ecap_dit(info->iommu->ecap))
1528 info->pfsid = 0;
1529 else {
1530 struct pci_dev *pf_pdev;
1531
1532 /* pdev will be returned if device is not a vf */
1533 pf_pdev = pci_physfn(pdev);
1534 info->pfsid = pci_dev_id(pf_pdev);
1535 }
1536
1537 #ifdef CONFIG_INTEL_IOMMU_SVM
1538 /* The PCIe spec, in its wisdom, declares that the behaviour of
1539 the device if you enable PASID support after ATS support is
1540 undefined. So always enable PASID support on devices which
1541 have it, even if we can't yet know if we're ever going to
1542 use it. */
1543 if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1544 info->pasid_enabled = 1;
1545
1546 if (info->pri_supported &&
1547 (info->pasid_enabled ? pci_prg_resp_pasid_required(pdev) : 1) &&
1548 !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
1549 info->pri_enabled = 1;
1550 #endif
1551 if (info->ats_supported && pci_ats_page_aligned(pdev) &&
1552 !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
1553 info->ats_enabled = 1;
1554 domain_update_iotlb(info->domain);
1555 info->ats_qdep = pci_ats_queue_depth(pdev);
1556 }
1557 }
1558
iommu_disable_dev_iotlb(struct device_domain_info * info)1559 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1560 {
1561 struct pci_dev *pdev;
1562
1563 assert_spin_locked(&device_domain_lock);
1564
1565 if (!dev_is_pci(info->dev))
1566 return;
1567
1568 pdev = to_pci_dev(info->dev);
1569
1570 if (info->ats_enabled) {
1571 pci_disable_ats(pdev);
1572 info->ats_enabled = 0;
1573 domain_update_iotlb(info->domain);
1574 }
1575 #ifdef CONFIG_INTEL_IOMMU_SVM
1576 if (info->pri_enabled) {
1577 pci_disable_pri(pdev);
1578 info->pri_enabled = 0;
1579 }
1580 if (info->pasid_enabled) {
1581 pci_disable_pasid(pdev);
1582 info->pasid_enabled = 0;
1583 }
1584 #endif
1585 }
1586
iommu_flush_dev_iotlb(struct dmar_domain * domain,u64 addr,unsigned mask)1587 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1588 u64 addr, unsigned mask)
1589 {
1590 u16 sid, qdep;
1591 unsigned long flags;
1592 struct device_domain_info *info;
1593
1594 if (!domain->has_iotlb_device)
1595 return;
1596
1597 spin_lock_irqsave(&device_domain_lock, flags);
1598 list_for_each_entry(info, &domain->devices, link) {
1599 if (!info->ats_enabled)
1600 continue;
1601
1602 sid = info->bus << 8 | info->devfn;
1603 qdep = info->ats_qdep;
1604 qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
1605 qdep, addr, mask);
1606 }
1607 spin_unlock_irqrestore(&device_domain_lock, flags);
1608 }
1609
domain_flush_piotlb(struct intel_iommu * iommu,struct dmar_domain * domain,u64 addr,unsigned long npages,bool ih)1610 static void domain_flush_piotlb(struct intel_iommu *iommu,
1611 struct dmar_domain *domain,
1612 u64 addr, unsigned long npages, bool ih)
1613 {
1614 u16 did = domain->iommu_did[iommu->seq_id];
1615
1616 if (domain->default_pasid)
1617 qi_flush_piotlb(iommu, did, domain->default_pasid,
1618 addr, npages, ih);
1619
1620 if (!list_empty(&domain->devices))
1621 qi_flush_piotlb(iommu, did, PASID_RID2PASID, addr, npages, ih);
1622 }
1623
iommu_flush_iotlb_psi(struct intel_iommu * iommu,struct dmar_domain * domain,unsigned long pfn,unsigned int pages,int ih,int map)1624 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1625 struct dmar_domain *domain,
1626 unsigned long pfn, unsigned int pages,
1627 int ih, int map)
1628 {
1629 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1630 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1631 u16 did = domain->iommu_did[iommu->seq_id];
1632
1633 BUG_ON(pages == 0);
1634
1635 if (ih)
1636 ih = 1 << 6;
1637
1638 if (domain_use_first_level(domain)) {
1639 domain_flush_piotlb(iommu, domain, addr, pages, ih);
1640 } else {
1641 /*
1642 * Fallback to domain selective flush if no PSI support or
1643 * the size is too big. PSI requires page size to be 2 ^ x,
1644 * and the base address is naturally aligned to the size.
1645 */
1646 if (!cap_pgsel_inv(iommu->cap) ||
1647 mask > cap_max_amask_val(iommu->cap))
1648 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1649 DMA_TLB_DSI_FLUSH);
1650 else
1651 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1652 DMA_TLB_PSI_FLUSH);
1653 }
1654
1655 /*
1656 * In caching mode, changes of pages from non-present to present require
1657 * flush. However, device IOTLB doesn't need to be flushed in this case.
1658 */
1659 if (!cap_caching_mode(iommu->cap) || !map)
1660 iommu_flush_dev_iotlb(domain, addr, mask);
1661 }
1662
1663 /* Notification for newly created mappings */
__mapping_notify_one(struct intel_iommu * iommu,struct dmar_domain * domain,unsigned long pfn,unsigned int pages)1664 static inline void __mapping_notify_one(struct intel_iommu *iommu,
1665 struct dmar_domain *domain,
1666 unsigned long pfn, unsigned int pages)
1667 {
1668 /*
1669 * It's a non-present to present mapping. Only flush if caching mode
1670 * and second level.
1671 */
1672 if (cap_caching_mode(iommu->cap) && !domain_use_first_level(domain))
1673 iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1);
1674 else
1675 iommu_flush_write_buffer(iommu);
1676 }
1677
iommu_flush_iova(struct iova_domain * iovad)1678 static void iommu_flush_iova(struct iova_domain *iovad)
1679 {
1680 struct dmar_domain *domain;
1681 int idx;
1682
1683 domain = container_of(iovad, struct dmar_domain, iovad);
1684
1685 for_each_domain_iommu(idx, domain) {
1686 struct intel_iommu *iommu = g_iommus[idx];
1687 u16 did = domain->iommu_did[iommu->seq_id];
1688
1689 if (domain_use_first_level(domain))
1690 domain_flush_piotlb(iommu, domain, 0, -1, 0);
1691 else
1692 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1693 DMA_TLB_DSI_FLUSH);
1694
1695 if (!cap_caching_mode(iommu->cap))
1696 iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1697 0, MAX_AGAW_PFN_WIDTH);
1698 }
1699 }
1700
iommu_disable_protect_mem_regions(struct intel_iommu * iommu)1701 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1702 {
1703 u32 pmen;
1704 unsigned long flags;
1705
1706 if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap))
1707 return;
1708
1709 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1710 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1711 pmen &= ~DMA_PMEN_EPM;
1712 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1713
1714 /* wait for the protected region status bit to clear */
1715 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1716 readl, !(pmen & DMA_PMEN_PRS), pmen);
1717
1718 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1719 }
1720
iommu_enable_translation(struct intel_iommu * iommu)1721 static void iommu_enable_translation(struct intel_iommu *iommu)
1722 {
1723 u32 sts;
1724 unsigned long flags;
1725
1726 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1727 iommu->gcmd |= DMA_GCMD_TE;
1728 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1729
1730 /* Make sure hardware complete it */
1731 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1732 readl, (sts & DMA_GSTS_TES), sts);
1733
1734 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1735 }
1736
iommu_disable_translation(struct intel_iommu * iommu)1737 static void iommu_disable_translation(struct intel_iommu *iommu)
1738 {
1739 u32 sts;
1740 unsigned long flag;
1741
1742 if (iommu_skip_te_disable && iommu->drhd->gfx_dedicated &&
1743 (cap_read_drain(iommu->cap) || cap_write_drain(iommu->cap)))
1744 return;
1745
1746 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1747 iommu->gcmd &= ~DMA_GCMD_TE;
1748 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1749
1750 /* Make sure hardware complete it */
1751 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1752 readl, (!(sts & DMA_GSTS_TES)), sts);
1753
1754 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1755 }
1756
iommu_init_domains(struct intel_iommu * iommu)1757 static int iommu_init_domains(struct intel_iommu *iommu)
1758 {
1759 u32 ndomains, nlongs;
1760 size_t size;
1761
1762 ndomains = cap_ndoms(iommu->cap);
1763 pr_debug("%s: Number of Domains supported <%d>\n",
1764 iommu->name, ndomains);
1765 nlongs = BITS_TO_LONGS(ndomains);
1766
1767 spin_lock_init(&iommu->lock);
1768
1769 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1770 if (!iommu->domain_ids) {
1771 pr_err("%s: Allocating domain id array failed\n",
1772 iommu->name);
1773 return -ENOMEM;
1774 }
1775
1776 size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **);
1777 iommu->domains = kzalloc(size, GFP_KERNEL);
1778
1779 if (iommu->domains) {
1780 size = 256 * sizeof(struct dmar_domain *);
1781 iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1782 }
1783
1784 if (!iommu->domains || !iommu->domains[0]) {
1785 pr_err("%s: Allocating domain array failed\n",
1786 iommu->name);
1787 kfree(iommu->domain_ids);
1788 kfree(iommu->domains);
1789 iommu->domain_ids = NULL;
1790 iommu->domains = NULL;
1791 return -ENOMEM;
1792 }
1793
1794 /*
1795 * If Caching mode is set, then invalid translations are tagged
1796 * with domain-id 0, hence we need to pre-allocate it. We also
1797 * use domain-id 0 as a marker for non-allocated domain-id, so
1798 * make sure it is not used for a real domain.
1799 */
1800 set_bit(0, iommu->domain_ids);
1801
1802 /*
1803 * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid
1804 * entry for first-level or pass-through translation modes should
1805 * be programmed with a domain id different from those used for
1806 * second-level or nested translation. We reserve a domain id for
1807 * this purpose.
1808 */
1809 if (sm_supported(iommu))
1810 set_bit(FLPT_DEFAULT_DID, iommu->domain_ids);
1811
1812 return 0;
1813 }
1814
disable_dmar_iommu(struct intel_iommu * iommu)1815 static void disable_dmar_iommu(struct intel_iommu *iommu)
1816 {
1817 struct device_domain_info *info, *tmp;
1818 unsigned long flags;
1819
1820 if (!iommu->domains || !iommu->domain_ids)
1821 return;
1822
1823 spin_lock_irqsave(&device_domain_lock, flags);
1824 list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
1825 if (info->iommu != iommu)
1826 continue;
1827
1828 if (!info->dev || !info->domain)
1829 continue;
1830
1831 __dmar_remove_one_dev_info(info);
1832 }
1833 spin_unlock_irqrestore(&device_domain_lock, flags);
1834
1835 if (iommu->gcmd & DMA_GCMD_TE)
1836 iommu_disable_translation(iommu);
1837 }
1838
free_dmar_iommu(struct intel_iommu * iommu)1839 static void free_dmar_iommu(struct intel_iommu *iommu)
1840 {
1841 if ((iommu->domains) && (iommu->domain_ids)) {
1842 int elems = ALIGN(cap_ndoms(iommu->cap), 256) >> 8;
1843 int i;
1844
1845 for (i = 0; i < elems; i++)
1846 kfree(iommu->domains[i]);
1847 kfree(iommu->domains);
1848 kfree(iommu->domain_ids);
1849 iommu->domains = NULL;
1850 iommu->domain_ids = NULL;
1851 }
1852
1853 g_iommus[iommu->seq_id] = NULL;
1854
1855 /* free context mapping */
1856 free_context_table(iommu);
1857
1858 #ifdef CONFIG_INTEL_IOMMU_SVM
1859 if (pasid_supported(iommu)) {
1860 if (ecap_prs(iommu->ecap))
1861 intel_svm_finish_prq(iommu);
1862 }
1863 if (vccap_pasid(iommu->vccap))
1864 ioasid_unregister_allocator(&iommu->pasid_allocator);
1865
1866 #endif
1867 }
1868
1869 /*
1870 * Check and return whether first level is used by default for
1871 * DMA translation.
1872 */
first_level_by_default(void)1873 static bool first_level_by_default(void)
1874 {
1875 struct dmar_drhd_unit *drhd;
1876 struct intel_iommu *iommu;
1877 static int first_level_support = -1;
1878
1879 if (likely(first_level_support != -1))
1880 return first_level_support;
1881
1882 first_level_support = 1;
1883
1884 rcu_read_lock();
1885 for_each_active_iommu(iommu, drhd) {
1886 if (!sm_supported(iommu) || !ecap_flts(iommu->ecap)) {
1887 first_level_support = 0;
1888 break;
1889 }
1890 }
1891 rcu_read_unlock();
1892
1893 return first_level_support;
1894 }
1895
alloc_domain(int flags)1896 static struct dmar_domain *alloc_domain(int flags)
1897 {
1898 struct dmar_domain *domain;
1899
1900 domain = alloc_domain_mem();
1901 if (!domain)
1902 return NULL;
1903
1904 memset(domain, 0, sizeof(*domain));
1905 domain->nid = NUMA_NO_NODE;
1906 domain->flags = flags;
1907 if (first_level_by_default())
1908 domain->flags |= DOMAIN_FLAG_USE_FIRST_LEVEL;
1909 domain->has_iotlb_device = false;
1910 INIT_LIST_HEAD(&domain->devices);
1911
1912 return domain;
1913 }
1914
1915 /* Must be called with iommu->lock */
domain_attach_iommu(struct dmar_domain * domain,struct intel_iommu * iommu)1916 static int domain_attach_iommu(struct dmar_domain *domain,
1917 struct intel_iommu *iommu)
1918 {
1919 unsigned long ndomains;
1920 int num;
1921
1922 assert_spin_locked(&device_domain_lock);
1923 assert_spin_locked(&iommu->lock);
1924
1925 domain->iommu_refcnt[iommu->seq_id] += 1;
1926 domain->iommu_count += 1;
1927 if (domain->iommu_refcnt[iommu->seq_id] == 1) {
1928 ndomains = cap_ndoms(iommu->cap);
1929 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1930
1931 if (num >= ndomains) {
1932 pr_err("%s: No free domain ids\n", iommu->name);
1933 domain->iommu_refcnt[iommu->seq_id] -= 1;
1934 domain->iommu_count -= 1;
1935 return -ENOSPC;
1936 }
1937
1938 set_bit(num, iommu->domain_ids);
1939 set_iommu_domain(iommu, num, domain);
1940
1941 domain->iommu_did[iommu->seq_id] = num;
1942 domain->nid = iommu->node;
1943
1944 domain_update_iommu_cap(domain);
1945 }
1946
1947 return 0;
1948 }
1949
domain_detach_iommu(struct dmar_domain * domain,struct intel_iommu * iommu)1950 static int domain_detach_iommu(struct dmar_domain *domain,
1951 struct intel_iommu *iommu)
1952 {
1953 int num, count;
1954
1955 assert_spin_locked(&device_domain_lock);
1956 assert_spin_locked(&iommu->lock);
1957
1958 domain->iommu_refcnt[iommu->seq_id] -= 1;
1959 count = --domain->iommu_count;
1960 if (domain->iommu_refcnt[iommu->seq_id] == 0) {
1961 num = domain->iommu_did[iommu->seq_id];
1962 clear_bit(num, iommu->domain_ids);
1963 set_iommu_domain(iommu, num, NULL);
1964
1965 domain_update_iommu_cap(domain);
1966 domain->iommu_did[iommu->seq_id] = 0;
1967 }
1968
1969 return count;
1970 }
1971
1972 static struct iova_domain reserved_iova_list;
1973 static struct lock_class_key reserved_rbtree_key;
1974
dmar_init_reserved_ranges(void)1975 static int dmar_init_reserved_ranges(void)
1976 {
1977 struct pci_dev *pdev = NULL;
1978 struct iova *iova;
1979 int i;
1980
1981 init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN);
1982
1983 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1984 &reserved_rbtree_key);
1985
1986 /* IOAPIC ranges shouldn't be accessed by DMA */
1987 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1988 IOVA_PFN(IOAPIC_RANGE_END));
1989 if (!iova) {
1990 pr_err("Reserve IOAPIC range failed\n");
1991 return -ENODEV;
1992 }
1993
1994 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1995 for_each_pci_dev(pdev) {
1996 struct resource *r;
1997
1998 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1999 r = &pdev->resource[i];
2000 if (!r->flags || !(r->flags & IORESOURCE_MEM))
2001 continue;
2002 iova = reserve_iova(&reserved_iova_list,
2003 IOVA_PFN(r->start),
2004 IOVA_PFN(r->end));
2005 if (!iova) {
2006 pci_err(pdev, "Reserve iova for %pR failed\n", r);
2007 return -ENODEV;
2008 }
2009 }
2010 }
2011 return 0;
2012 }
2013
guestwidth_to_adjustwidth(int gaw)2014 static inline int guestwidth_to_adjustwidth(int gaw)
2015 {
2016 int agaw;
2017 int r = (gaw - 12) % 9;
2018
2019 if (r == 0)
2020 agaw = gaw;
2021 else
2022 agaw = gaw + 9 - r;
2023 if (agaw > 64)
2024 agaw = 64;
2025 return agaw;
2026 }
2027
domain_exit(struct dmar_domain * domain)2028 static void domain_exit(struct dmar_domain *domain)
2029 {
2030
2031 /* Remove associated devices and clear attached or cached domains */
2032 domain_remove_dev_info(domain);
2033
2034 /* destroy iovas */
2035 if (domain->domain.type == IOMMU_DOMAIN_DMA)
2036 put_iova_domain(&domain->iovad);
2037
2038 if (domain->pgd) {
2039 struct page *freelist;
2040
2041 freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
2042 dma_free_pagelist(freelist);
2043 }
2044
2045 free_domain_mem(domain);
2046 }
2047
2048 /*
2049 * Get the PASID directory size for scalable mode context entry.
2050 * Value of X in the PDTS field of a scalable mode context entry
2051 * indicates PASID directory with 2^(X + 7) entries.
2052 */
context_get_sm_pds(struct pasid_table * table)2053 static inline unsigned long context_get_sm_pds(struct pasid_table *table)
2054 {
2055 int pds, max_pde;
2056
2057 max_pde = table->max_pasid >> PASID_PDE_SHIFT;
2058 pds = find_first_bit((unsigned long *)&max_pde, MAX_NR_PASID_BITS);
2059 if (pds < 7)
2060 return 0;
2061
2062 return pds - 7;
2063 }
2064
2065 /*
2066 * Set the RID_PASID field of a scalable mode context entry. The
2067 * IOMMU hardware will use the PASID value set in this field for
2068 * DMA translations of DMA requests without PASID.
2069 */
2070 static inline void
context_set_sm_rid2pasid(struct context_entry * context,unsigned long pasid)2071 context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid)
2072 {
2073 context->hi |= pasid & ((1 << 20) - 1);
2074 }
2075
2076 /*
2077 * Set the DTE(Device-TLB Enable) field of a scalable mode context
2078 * entry.
2079 */
context_set_sm_dte(struct context_entry * context)2080 static inline void context_set_sm_dte(struct context_entry *context)
2081 {
2082 context->lo |= (1 << 2);
2083 }
2084
2085 /*
2086 * Set the PRE(Page Request Enable) field of a scalable mode context
2087 * entry.
2088 */
context_set_sm_pre(struct context_entry * context)2089 static inline void context_set_sm_pre(struct context_entry *context)
2090 {
2091 context->lo |= (1 << 4);
2092 }
2093
2094 /* Convert value to context PASID directory size field coding. */
2095 #define context_pdts(pds) (((pds) & 0x7) << 9)
2096
domain_context_mapping_one(struct dmar_domain * domain,struct intel_iommu * iommu,struct pasid_table * table,u8 bus,u8 devfn)2097 static int domain_context_mapping_one(struct dmar_domain *domain,
2098 struct intel_iommu *iommu,
2099 struct pasid_table *table,
2100 u8 bus, u8 devfn)
2101 {
2102 u16 did = domain->iommu_did[iommu->seq_id];
2103 int translation = CONTEXT_TT_MULTI_LEVEL;
2104 struct device_domain_info *info = NULL;
2105 struct context_entry *context;
2106 unsigned long flags;
2107 int ret;
2108
2109 WARN_ON(did == 0);
2110
2111 if (hw_pass_through && domain_type_is_si(domain))
2112 translation = CONTEXT_TT_PASS_THROUGH;
2113
2114 pr_debug("Set context mapping for %02x:%02x.%d\n",
2115 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
2116
2117 BUG_ON(!domain->pgd);
2118
2119 spin_lock_irqsave(&device_domain_lock, flags);
2120 spin_lock(&iommu->lock);
2121
2122 ret = -ENOMEM;
2123 context = iommu_context_addr(iommu, bus, devfn, 1);
2124 if (!context)
2125 goto out_unlock;
2126
2127 ret = 0;
2128 if (context_present(context))
2129 goto out_unlock;
2130
2131 /*
2132 * For kdump cases, old valid entries may be cached due to the
2133 * in-flight DMA and copied pgtable, but there is no unmapping
2134 * behaviour for them, thus we need an explicit cache flush for
2135 * the newly-mapped device. For kdump, at this point, the device
2136 * is supposed to finish reset at its driver probe stage, so no
2137 * in-flight DMA will exist, and we don't need to worry anymore
2138 * hereafter.
2139 */
2140 if (context_copied(context)) {
2141 u16 did_old = context_domain_id(context);
2142
2143 if (did_old < cap_ndoms(iommu->cap)) {
2144 iommu->flush.flush_context(iommu, did_old,
2145 (((u16)bus) << 8) | devfn,
2146 DMA_CCMD_MASK_NOBIT,
2147 DMA_CCMD_DEVICE_INVL);
2148 iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
2149 DMA_TLB_DSI_FLUSH);
2150 }
2151 }
2152
2153 context_clear_entry(context);
2154
2155 if (sm_supported(iommu)) {
2156 unsigned long pds;
2157
2158 WARN_ON(!table);
2159
2160 /* Setup the PASID DIR pointer: */
2161 pds = context_get_sm_pds(table);
2162 context->lo = (u64)virt_to_phys(table->table) |
2163 context_pdts(pds);
2164
2165 /* Setup the RID_PASID field: */
2166 context_set_sm_rid2pasid(context, PASID_RID2PASID);
2167
2168 /*
2169 * Setup the Device-TLB enable bit and Page request
2170 * Enable bit:
2171 */
2172 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2173 if (info && info->ats_supported)
2174 context_set_sm_dte(context);
2175 if (info && info->pri_supported)
2176 context_set_sm_pre(context);
2177 } else {
2178 struct dma_pte *pgd = domain->pgd;
2179 int agaw;
2180
2181 context_set_domain_id(context, did);
2182
2183 if (translation != CONTEXT_TT_PASS_THROUGH) {
2184 /*
2185 * Skip top levels of page tables for iommu which has
2186 * less agaw than default. Unnecessary for PT mode.
2187 */
2188 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
2189 ret = -ENOMEM;
2190 pgd = phys_to_virt(dma_pte_addr(pgd));
2191 if (!dma_pte_present(pgd))
2192 goto out_unlock;
2193 }
2194
2195 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2196 if (info && info->ats_supported)
2197 translation = CONTEXT_TT_DEV_IOTLB;
2198 else
2199 translation = CONTEXT_TT_MULTI_LEVEL;
2200
2201 context_set_address_root(context, virt_to_phys(pgd));
2202 context_set_address_width(context, agaw);
2203 } else {
2204 /*
2205 * In pass through mode, AW must be programmed to
2206 * indicate the largest AGAW value supported by
2207 * hardware. And ASR is ignored by hardware.
2208 */
2209 context_set_address_width(context, iommu->msagaw);
2210 }
2211
2212 context_set_translation_type(context, translation);
2213 }
2214
2215 context_set_fault_enable(context);
2216 context_set_present(context);
2217 if (!ecap_coherent(iommu->ecap))
2218 clflush_cache_range(context, sizeof(*context));
2219
2220 /*
2221 * It's a non-present to present mapping. If hardware doesn't cache
2222 * non-present entry we only need to flush the write-buffer. If the
2223 * _does_ cache non-present entries, then it does so in the special
2224 * domain #0, which we have to flush:
2225 */
2226 if (cap_caching_mode(iommu->cap)) {
2227 iommu->flush.flush_context(iommu, 0,
2228 (((u16)bus) << 8) | devfn,
2229 DMA_CCMD_MASK_NOBIT,
2230 DMA_CCMD_DEVICE_INVL);
2231 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
2232 } else {
2233 iommu_flush_write_buffer(iommu);
2234 }
2235 iommu_enable_dev_iotlb(info);
2236
2237 ret = 0;
2238
2239 out_unlock:
2240 spin_unlock(&iommu->lock);
2241 spin_unlock_irqrestore(&device_domain_lock, flags);
2242
2243 return ret;
2244 }
2245
2246 struct domain_context_mapping_data {
2247 struct dmar_domain *domain;
2248 struct intel_iommu *iommu;
2249 struct pasid_table *table;
2250 };
2251
domain_context_mapping_cb(struct pci_dev * pdev,u16 alias,void * opaque)2252 static int domain_context_mapping_cb(struct pci_dev *pdev,
2253 u16 alias, void *opaque)
2254 {
2255 struct domain_context_mapping_data *data = opaque;
2256
2257 return domain_context_mapping_one(data->domain, data->iommu,
2258 data->table, PCI_BUS_NUM(alias),
2259 alias & 0xff);
2260 }
2261
2262 static int
domain_context_mapping(struct dmar_domain * domain,struct device * dev)2263 domain_context_mapping(struct dmar_domain *domain, struct device *dev)
2264 {
2265 struct domain_context_mapping_data data;
2266 struct pasid_table *table;
2267 struct intel_iommu *iommu;
2268 u8 bus, devfn;
2269
2270 iommu = device_to_iommu(dev, &bus, &devfn);
2271 if (!iommu)
2272 return -ENODEV;
2273
2274 table = intel_pasid_get_table(dev);
2275
2276 if (!dev_is_pci(dev))
2277 return domain_context_mapping_one(domain, iommu, table,
2278 bus, devfn);
2279
2280 data.domain = domain;
2281 data.iommu = iommu;
2282 data.table = table;
2283
2284 return pci_for_each_dma_alias(to_pci_dev(dev),
2285 &domain_context_mapping_cb, &data);
2286 }
2287
domain_context_mapped_cb(struct pci_dev * pdev,u16 alias,void * opaque)2288 static int domain_context_mapped_cb(struct pci_dev *pdev,
2289 u16 alias, void *opaque)
2290 {
2291 struct intel_iommu *iommu = opaque;
2292
2293 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
2294 }
2295
domain_context_mapped(struct device * dev)2296 static int domain_context_mapped(struct device *dev)
2297 {
2298 struct intel_iommu *iommu;
2299 u8 bus, devfn;
2300
2301 iommu = device_to_iommu(dev, &bus, &devfn);
2302 if (!iommu)
2303 return -ENODEV;
2304
2305 if (!dev_is_pci(dev))
2306 return device_context_mapped(iommu, bus, devfn);
2307
2308 return !pci_for_each_dma_alias(to_pci_dev(dev),
2309 domain_context_mapped_cb, iommu);
2310 }
2311
2312 /* Returns a number of VTD pages, but aligned to MM page size */
aligned_nrpages(unsigned long host_addr,size_t size)2313 static inline unsigned long aligned_nrpages(unsigned long host_addr,
2314 size_t size)
2315 {
2316 host_addr &= ~PAGE_MASK;
2317 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2318 }
2319
2320 /* Return largest possible superpage level for a given mapping */
hardware_largepage_caps(struct dmar_domain * domain,unsigned long iov_pfn,unsigned long phy_pfn,unsigned long pages)2321 static inline int hardware_largepage_caps(struct dmar_domain *domain,
2322 unsigned long iov_pfn,
2323 unsigned long phy_pfn,
2324 unsigned long pages)
2325 {
2326 int support, level = 1;
2327 unsigned long pfnmerge;
2328
2329 support = domain->iommu_superpage;
2330
2331 /* To use a large page, the virtual *and* physical addresses
2332 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2333 of them will mean we have to use smaller pages. So just
2334 merge them and check both at once. */
2335 pfnmerge = iov_pfn | phy_pfn;
2336
2337 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2338 pages >>= VTD_STRIDE_SHIFT;
2339 if (!pages)
2340 break;
2341 pfnmerge >>= VTD_STRIDE_SHIFT;
2342 level++;
2343 support--;
2344 }
2345 return level;
2346 }
2347
__domain_mapping(struct dmar_domain * domain,unsigned long iov_pfn,struct scatterlist * sg,unsigned long phys_pfn,unsigned long nr_pages,int prot)2348 static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2349 struct scatterlist *sg, unsigned long phys_pfn,
2350 unsigned long nr_pages, int prot)
2351 {
2352 struct dma_pte *first_pte = NULL, *pte = NULL;
2353 phys_addr_t pteval;
2354 unsigned long sg_res = 0;
2355 unsigned int largepage_lvl = 0;
2356 unsigned long lvl_pages = 0;
2357 u64 attr;
2358
2359 BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
2360
2361 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2362 return -EINVAL;
2363
2364 attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP);
2365 attr |= DMA_FL_PTE_PRESENT;
2366 if (domain_use_first_level(domain)) {
2367 attr |= DMA_FL_PTE_XD | DMA_FL_PTE_US;
2368
2369 if (domain->domain.type == IOMMU_DOMAIN_DMA) {
2370 attr |= DMA_FL_PTE_ACCESS;
2371 if (prot & DMA_PTE_WRITE)
2372 attr |= DMA_FL_PTE_DIRTY;
2373 }
2374 }
2375
2376 if (!sg) {
2377 sg_res = nr_pages;
2378 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr;
2379 }
2380
2381 while (nr_pages > 0) {
2382 uint64_t tmp;
2383
2384 if (!sg_res) {
2385 unsigned int pgoff = sg->offset & ~PAGE_MASK;
2386
2387 sg_res = aligned_nrpages(sg->offset, sg->length);
2388 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff;
2389 sg->dma_length = sg->length;
2390 pteval = (sg_phys(sg) - pgoff) | attr;
2391 phys_pfn = pteval >> VTD_PAGE_SHIFT;
2392 }
2393
2394 if (!pte) {
2395 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2396
2397 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
2398 if (!pte)
2399 return -ENOMEM;
2400 /* It is large page*/
2401 if (largepage_lvl > 1) {
2402 unsigned long nr_superpages, end_pfn;
2403
2404 pteval |= DMA_PTE_LARGE_PAGE;
2405 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2406
2407 nr_superpages = sg_res / lvl_pages;
2408 end_pfn = iov_pfn + nr_superpages * lvl_pages - 1;
2409
2410 /*
2411 * Ensure that old small page tables are
2412 * removed to make room for superpage(s).
2413 * We're adding new large pages, so make sure
2414 * we don't remove their parent tables.
2415 */
2416 dma_pte_free_pagetable(domain, iov_pfn, end_pfn,
2417 largepage_lvl + 1);
2418 } else {
2419 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
2420 }
2421
2422 }
2423 /* We don't need lock here, nobody else
2424 * touches the iova range
2425 */
2426 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
2427 if (tmp) {
2428 static int dumps = 5;
2429 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2430 iov_pfn, tmp, (unsigned long long)pteval);
2431 if (dumps) {
2432 dumps--;
2433 debug_dma_dump_mappings(NULL);
2434 }
2435 WARN_ON(1);
2436 }
2437
2438 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2439
2440 BUG_ON(nr_pages < lvl_pages);
2441 BUG_ON(sg_res < lvl_pages);
2442
2443 nr_pages -= lvl_pages;
2444 iov_pfn += lvl_pages;
2445 phys_pfn += lvl_pages;
2446 pteval += lvl_pages * VTD_PAGE_SIZE;
2447 sg_res -= lvl_pages;
2448
2449 /* If the next PTE would be the first in a new page, then we
2450 need to flush the cache on the entries we've just written.
2451 And then we'll need to recalculate 'pte', so clear it and
2452 let it get set again in the if (!pte) block above.
2453
2454 If we're done (!nr_pages) we need to flush the cache too.
2455
2456 Also if we've been setting superpages, we may need to
2457 recalculate 'pte' and switch back to smaller pages for the
2458 end of the mapping, if the trailing size is not enough to
2459 use another superpage (i.e. sg_res < lvl_pages). */
2460 pte++;
2461 if (!nr_pages || first_pte_in_page(pte) ||
2462 (largepage_lvl > 1 && sg_res < lvl_pages)) {
2463 domain_flush_cache(domain, first_pte,
2464 (void *)pte - (void *)first_pte);
2465 pte = NULL;
2466 }
2467
2468 if (!sg_res && nr_pages)
2469 sg = sg_next(sg);
2470 }
2471 return 0;
2472 }
2473
domain_mapping(struct dmar_domain * domain,unsigned long iov_pfn,struct scatterlist * sg,unsigned long phys_pfn,unsigned long nr_pages,int prot)2474 static int domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2475 struct scatterlist *sg, unsigned long phys_pfn,
2476 unsigned long nr_pages, int prot)
2477 {
2478 int iommu_id, ret;
2479 struct intel_iommu *iommu;
2480
2481 /* Do the real mapping first */
2482 ret = __domain_mapping(domain, iov_pfn, sg, phys_pfn, nr_pages, prot);
2483 if (ret)
2484 return ret;
2485
2486 for_each_domain_iommu(iommu_id, domain) {
2487 iommu = g_iommus[iommu_id];
2488 __mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
2489 }
2490
2491 return 0;
2492 }
2493
domain_sg_mapping(struct dmar_domain * domain,unsigned long iov_pfn,struct scatterlist * sg,unsigned long nr_pages,int prot)2494 static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2495 struct scatterlist *sg, unsigned long nr_pages,
2496 int prot)
2497 {
2498 return domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2499 }
2500
domain_pfn_mapping(struct dmar_domain * domain,unsigned long iov_pfn,unsigned long phys_pfn,unsigned long nr_pages,int prot)2501 static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2502 unsigned long phys_pfn, unsigned long nr_pages,
2503 int prot)
2504 {
2505 return domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
2506 }
2507
domain_context_clear_one(struct intel_iommu * iommu,u8 bus,u8 devfn)2508 static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
2509 {
2510 unsigned long flags;
2511 struct context_entry *context;
2512 u16 did_old;
2513
2514 if (!iommu)
2515 return;
2516
2517 spin_lock_irqsave(&iommu->lock, flags);
2518 context = iommu_context_addr(iommu, bus, devfn, 0);
2519 if (!context) {
2520 spin_unlock_irqrestore(&iommu->lock, flags);
2521 return;
2522 }
2523 did_old = context_domain_id(context);
2524 context_clear_entry(context);
2525 __iommu_flush_cache(iommu, context, sizeof(*context));
2526 spin_unlock_irqrestore(&iommu->lock, flags);
2527 iommu->flush.flush_context(iommu,
2528 did_old,
2529 (((u16)bus) << 8) | devfn,
2530 DMA_CCMD_MASK_NOBIT,
2531 DMA_CCMD_DEVICE_INVL);
2532
2533 if (sm_supported(iommu))
2534 qi_flush_pasid_cache(iommu, did_old, QI_PC_ALL_PASIDS, 0);
2535
2536 iommu->flush.flush_iotlb(iommu,
2537 did_old,
2538 0,
2539 0,
2540 DMA_TLB_DSI_FLUSH);
2541 }
2542
unlink_domain_info(struct device_domain_info * info)2543 static inline void unlink_domain_info(struct device_domain_info *info)
2544 {
2545 assert_spin_locked(&device_domain_lock);
2546 list_del(&info->link);
2547 list_del(&info->global);
2548 if (info->dev)
2549 dev_iommu_priv_set(info->dev, NULL);
2550 }
2551
domain_remove_dev_info(struct dmar_domain * domain)2552 static void domain_remove_dev_info(struct dmar_domain *domain)
2553 {
2554 struct device_domain_info *info, *tmp;
2555 unsigned long flags;
2556
2557 spin_lock_irqsave(&device_domain_lock, flags);
2558 list_for_each_entry_safe(info, tmp, &domain->devices, link)
2559 __dmar_remove_one_dev_info(info);
2560 spin_unlock_irqrestore(&device_domain_lock, flags);
2561 }
2562
find_domain(struct device * dev)2563 struct dmar_domain *find_domain(struct device *dev)
2564 {
2565 struct device_domain_info *info;
2566
2567 if (unlikely(!dev || !dev->iommu))
2568 return NULL;
2569
2570 if (unlikely(attach_deferred(dev)))
2571 return NULL;
2572
2573 /* No lock here, assumes no domain exit in normal case */
2574 info = get_domain_info(dev);
2575 if (likely(info))
2576 return info->domain;
2577
2578 return NULL;
2579 }
2580
do_deferred_attach(struct device * dev)2581 static void do_deferred_attach(struct device *dev)
2582 {
2583 struct iommu_domain *domain;
2584
2585 dev_iommu_priv_set(dev, NULL);
2586 domain = iommu_get_domain_for_dev(dev);
2587 if (domain)
2588 intel_iommu_attach_device(domain, dev);
2589 }
2590
2591 static inline struct device_domain_info *
dmar_search_domain_by_dev_info(int segment,int bus,int devfn)2592 dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2593 {
2594 struct device_domain_info *info;
2595
2596 list_for_each_entry(info, &device_domain_list, global)
2597 if (info->segment == segment && info->bus == bus &&
2598 info->devfn == devfn)
2599 return info;
2600
2601 return NULL;
2602 }
2603
domain_setup_first_level(struct intel_iommu * iommu,struct dmar_domain * domain,struct device * dev,u32 pasid)2604 static int domain_setup_first_level(struct intel_iommu *iommu,
2605 struct dmar_domain *domain,
2606 struct device *dev,
2607 u32 pasid)
2608 {
2609 struct dma_pte *pgd = domain->pgd;
2610 int agaw, level;
2611 int flags = 0;
2612
2613 /*
2614 * Skip top levels of page tables for iommu which has
2615 * less agaw than default. Unnecessary for PT mode.
2616 */
2617 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
2618 pgd = phys_to_virt(dma_pte_addr(pgd));
2619 if (!dma_pte_present(pgd))
2620 return -ENOMEM;
2621 }
2622
2623 level = agaw_to_level(agaw);
2624 if (level != 4 && level != 5)
2625 return -EINVAL;
2626
2627 if (pasid != PASID_RID2PASID)
2628 flags |= PASID_FLAG_SUPERVISOR_MODE;
2629 if (level == 5)
2630 flags |= PASID_FLAG_FL5LP;
2631
2632 if (domain->domain.type == IOMMU_DOMAIN_UNMANAGED)
2633 flags |= PASID_FLAG_PAGE_SNOOP;
2634
2635 return intel_pasid_setup_first_level(iommu, dev, (pgd_t *)pgd, pasid,
2636 domain->iommu_did[iommu->seq_id],
2637 flags);
2638 }
2639
dev_is_real_dma_subdevice(struct device * dev)2640 static bool dev_is_real_dma_subdevice(struct device *dev)
2641 {
2642 return dev && dev_is_pci(dev) &&
2643 pci_real_dma_dev(to_pci_dev(dev)) != to_pci_dev(dev);
2644 }
2645
dmar_insert_one_dev_info(struct intel_iommu * iommu,int bus,int devfn,struct device * dev,struct dmar_domain * domain)2646 static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2647 int bus, int devfn,
2648 struct device *dev,
2649 struct dmar_domain *domain)
2650 {
2651 struct dmar_domain *found = NULL;
2652 struct device_domain_info *info;
2653 unsigned long flags;
2654 int ret;
2655
2656 info = alloc_devinfo_mem();
2657 if (!info)
2658 return NULL;
2659
2660 if (!dev_is_real_dma_subdevice(dev)) {
2661 info->bus = bus;
2662 info->devfn = devfn;
2663 info->segment = iommu->segment;
2664 } else {
2665 struct pci_dev *pdev = to_pci_dev(dev);
2666
2667 info->bus = pdev->bus->number;
2668 info->devfn = pdev->devfn;
2669 info->segment = pci_domain_nr(pdev->bus);
2670 }
2671
2672 info->ats_supported = info->pasid_supported = info->pri_supported = 0;
2673 info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
2674 info->ats_qdep = 0;
2675 info->dev = dev;
2676 info->domain = domain;
2677 info->iommu = iommu;
2678 info->pasid_table = NULL;
2679 info->auxd_enabled = 0;
2680 INIT_LIST_HEAD(&info->auxiliary_domains);
2681
2682 if (dev && dev_is_pci(dev)) {
2683 struct pci_dev *pdev = to_pci_dev(info->dev);
2684
2685 if (ecap_dev_iotlb_support(iommu->ecap) &&
2686 pci_ats_supported(pdev) &&
2687 dmar_find_matched_atsr_unit(pdev))
2688 info->ats_supported = 1;
2689
2690 if (sm_supported(iommu)) {
2691 if (pasid_supported(iommu)) {
2692 int features = pci_pasid_features(pdev);
2693 if (features >= 0)
2694 info->pasid_supported = features | 1;
2695 }
2696
2697 if (info->ats_supported && ecap_prs(iommu->ecap) &&
2698 pci_pri_supported(pdev))
2699 info->pri_supported = 1;
2700 }
2701 }
2702
2703 spin_lock_irqsave(&device_domain_lock, flags);
2704 if (dev)
2705 found = find_domain(dev);
2706
2707 if (!found) {
2708 struct device_domain_info *info2;
2709 info2 = dmar_search_domain_by_dev_info(info->segment, info->bus,
2710 info->devfn);
2711 if (info2) {
2712 found = info2->domain;
2713 info2->dev = dev;
2714 }
2715 }
2716
2717 if (found) {
2718 spin_unlock_irqrestore(&device_domain_lock, flags);
2719 free_devinfo_mem(info);
2720 /* Caller must free the original domain */
2721 return found;
2722 }
2723
2724 spin_lock(&iommu->lock);
2725 ret = domain_attach_iommu(domain, iommu);
2726 spin_unlock(&iommu->lock);
2727
2728 if (ret) {
2729 spin_unlock_irqrestore(&device_domain_lock, flags);
2730 free_devinfo_mem(info);
2731 return NULL;
2732 }
2733
2734 list_add(&info->link, &domain->devices);
2735 list_add(&info->global, &device_domain_list);
2736 if (dev)
2737 dev_iommu_priv_set(dev, info);
2738 spin_unlock_irqrestore(&device_domain_lock, flags);
2739
2740 /* PASID table is mandatory for a PCI device in scalable mode. */
2741 if (dev && dev_is_pci(dev) && sm_supported(iommu)) {
2742 ret = intel_pasid_alloc_table(dev);
2743 if (ret) {
2744 dev_err(dev, "PASID table allocation failed\n");
2745 dmar_remove_one_dev_info(dev);
2746 return NULL;
2747 }
2748
2749 /* Setup the PASID entry for requests without PASID: */
2750 spin_lock_irqsave(&iommu->lock, flags);
2751 if (hw_pass_through && domain_type_is_si(domain))
2752 ret = intel_pasid_setup_pass_through(iommu, domain,
2753 dev, PASID_RID2PASID);
2754 else if (domain_use_first_level(domain))
2755 ret = domain_setup_first_level(iommu, domain, dev,
2756 PASID_RID2PASID);
2757 else
2758 ret = intel_pasid_setup_second_level(iommu, domain,
2759 dev, PASID_RID2PASID);
2760 spin_unlock_irqrestore(&iommu->lock, flags);
2761 if (ret) {
2762 dev_err(dev, "Setup RID2PASID failed\n");
2763 dmar_remove_one_dev_info(dev);
2764 return NULL;
2765 }
2766 }
2767
2768 if (dev && domain_context_mapping(domain, dev)) {
2769 dev_err(dev, "Domain context map failed\n");
2770 dmar_remove_one_dev_info(dev);
2771 return NULL;
2772 }
2773
2774 return domain;
2775 }
2776
iommu_domain_identity_map(struct dmar_domain * domain,unsigned long first_vpfn,unsigned long last_vpfn)2777 static int iommu_domain_identity_map(struct dmar_domain *domain,
2778 unsigned long first_vpfn,
2779 unsigned long last_vpfn)
2780 {
2781 /*
2782 * RMRR range might have overlap with physical memory range,
2783 * clear it first
2784 */
2785 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2786
2787 return __domain_mapping(domain, first_vpfn, NULL,
2788 first_vpfn, last_vpfn - first_vpfn + 1,
2789 DMA_PTE_READ|DMA_PTE_WRITE);
2790 }
2791
2792 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2793
si_domain_init(int hw)2794 static int __init si_domain_init(int hw)
2795 {
2796 struct dmar_rmrr_unit *rmrr;
2797 struct device *dev;
2798 int i, nid, ret;
2799
2800 si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2801 if (!si_domain)
2802 return -EFAULT;
2803
2804 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2805 domain_exit(si_domain);
2806 return -EFAULT;
2807 }
2808
2809 if (hw)
2810 return 0;
2811
2812 for_each_online_node(nid) {
2813 unsigned long start_pfn, end_pfn;
2814 int i;
2815
2816 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2817 ret = iommu_domain_identity_map(si_domain,
2818 mm_to_dma_pfn(start_pfn),
2819 mm_to_dma_pfn(end_pfn));
2820 if (ret)
2821 return ret;
2822 }
2823 }
2824
2825 /*
2826 * Identity map the RMRRs so that devices with RMRRs could also use
2827 * the si_domain.
2828 */
2829 for_each_rmrr_units(rmrr) {
2830 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
2831 i, dev) {
2832 unsigned long long start = rmrr->base_address;
2833 unsigned long long end = rmrr->end_address;
2834
2835 if (WARN_ON(end < start ||
2836 end >> agaw_to_width(si_domain->agaw)))
2837 continue;
2838
2839 ret = iommu_domain_identity_map(si_domain,
2840 mm_to_dma_pfn(start >> PAGE_SHIFT),
2841 mm_to_dma_pfn(end >> PAGE_SHIFT));
2842 if (ret)
2843 return ret;
2844 }
2845 }
2846
2847 return 0;
2848 }
2849
domain_add_dev_info(struct dmar_domain * domain,struct device * dev)2850 static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2851 {
2852 struct dmar_domain *ndomain;
2853 struct intel_iommu *iommu;
2854 u8 bus, devfn;
2855
2856 iommu = device_to_iommu(dev, &bus, &devfn);
2857 if (!iommu)
2858 return -ENODEV;
2859
2860 ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
2861 if (ndomain != domain)
2862 return -EBUSY;
2863
2864 return 0;
2865 }
2866
device_has_rmrr(struct device * dev)2867 static bool device_has_rmrr(struct device *dev)
2868 {
2869 struct dmar_rmrr_unit *rmrr;
2870 struct device *tmp;
2871 int i;
2872
2873 rcu_read_lock();
2874 for_each_rmrr_units(rmrr) {
2875 /*
2876 * Return TRUE if this RMRR contains the device that
2877 * is passed in.
2878 */
2879 for_each_active_dev_scope(rmrr->devices,
2880 rmrr->devices_cnt, i, tmp)
2881 if (tmp == dev ||
2882 is_downstream_to_pci_bridge(dev, tmp)) {
2883 rcu_read_unlock();
2884 return true;
2885 }
2886 }
2887 rcu_read_unlock();
2888 return false;
2889 }
2890
2891 /**
2892 * device_rmrr_is_relaxable - Test whether the RMRR of this device
2893 * is relaxable (ie. is allowed to be not enforced under some conditions)
2894 * @dev: device handle
2895 *
2896 * We assume that PCI USB devices with RMRRs have them largely
2897 * for historical reasons and that the RMRR space is not actively used post
2898 * boot. This exclusion may change if vendors begin to abuse it.
2899 *
2900 * The same exception is made for graphics devices, with the requirement that
2901 * any use of the RMRR regions will be torn down before assigning the device
2902 * to a guest.
2903 *
2904 * Return: true if the RMRR is relaxable, false otherwise
2905 */
device_rmrr_is_relaxable(struct device * dev)2906 static bool device_rmrr_is_relaxable(struct device *dev)
2907 {
2908 struct pci_dev *pdev;
2909
2910 if (!dev_is_pci(dev))
2911 return false;
2912
2913 pdev = to_pci_dev(dev);
2914 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
2915 return true;
2916 else
2917 return false;
2918 }
2919
2920 /*
2921 * There are a couple cases where we need to restrict the functionality of
2922 * devices associated with RMRRs. The first is when evaluating a device for
2923 * identity mapping because problems exist when devices are moved in and out
2924 * of domains and their respective RMRR information is lost. This means that
2925 * a device with associated RMRRs will never be in a "passthrough" domain.
2926 * The second is use of the device through the IOMMU API. This interface
2927 * expects to have full control of the IOVA space for the device. We cannot
2928 * satisfy both the requirement that RMRR access is maintained and have an
2929 * unencumbered IOVA space. We also have no ability to quiesce the device's
2930 * use of the RMRR space or even inform the IOMMU API user of the restriction.
2931 * We therefore prevent devices associated with an RMRR from participating in
2932 * the IOMMU API, which eliminates them from device assignment.
2933 *
2934 * In both cases, devices which have relaxable RMRRs are not concerned by this
2935 * restriction. See device_rmrr_is_relaxable comment.
2936 */
device_is_rmrr_locked(struct device * dev)2937 static bool device_is_rmrr_locked(struct device *dev)
2938 {
2939 if (!device_has_rmrr(dev))
2940 return false;
2941
2942 if (device_rmrr_is_relaxable(dev))
2943 return false;
2944
2945 return true;
2946 }
2947
2948 /*
2949 * Return the required default domain type for a specific device.
2950 *
2951 * @dev: the device in query
2952 * @startup: true if this is during early boot
2953 *
2954 * Returns:
2955 * - IOMMU_DOMAIN_DMA: device requires a dynamic mapping domain
2956 * - IOMMU_DOMAIN_IDENTITY: device requires an identical mapping domain
2957 * - 0: both identity and dynamic domains work for this device
2958 */
device_def_domain_type(struct device * dev)2959 static int device_def_domain_type(struct device *dev)
2960 {
2961 if (dev_is_pci(dev)) {
2962 struct pci_dev *pdev = to_pci_dev(dev);
2963
2964 /*
2965 * Prevent any device marked as untrusted from getting
2966 * placed into the statically identity mapping domain.
2967 */
2968 if (pdev->untrusted)
2969 return IOMMU_DOMAIN_DMA;
2970
2971 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2972 return IOMMU_DOMAIN_IDENTITY;
2973
2974 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2975 return IOMMU_DOMAIN_IDENTITY;
2976 }
2977
2978 return 0;
2979 }
2980
intel_iommu_init_qi(struct intel_iommu * iommu)2981 static void intel_iommu_init_qi(struct intel_iommu *iommu)
2982 {
2983 /*
2984 * Start from the sane iommu hardware state.
2985 * If the queued invalidation is already initialized by us
2986 * (for example, while enabling interrupt-remapping) then
2987 * we got the things already rolling from a sane state.
2988 */
2989 if (!iommu->qi) {
2990 /*
2991 * Clear any previous faults.
2992 */
2993 dmar_fault(-1, iommu);
2994 /*
2995 * Disable queued invalidation if supported and already enabled
2996 * before OS handover.
2997 */
2998 dmar_disable_qi(iommu);
2999 }
3000
3001 if (dmar_enable_qi(iommu)) {
3002 /*
3003 * Queued Invalidate not enabled, use Register Based Invalidate
3004 */
3005 iommu->flush.flush_context = __iommu_flush_context;
3006 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
3007 pr_info("%s: Using Register based invalidation\n",
3008 iommu->name);
3009 } else {
3010 iommu->flush.flush_context = qi_flush_context;
3011 iommu->flush.flush_iotlb = qi_flush_iotlb;
3012 pr_info("%s: Using Queued invalidation\n", iommu->name);
3013 }
3014 }
3015
copy_context_table(struct intel_iommu * iommu,struct root_entry * old_re,struct context_entry ** tbl,int bus,bool ext)3016 static int copy_context_table(struct intel_iommu *iommu,
3017 struct root_entry *old_re,
3018 struct context_entry **tbl,
3019 int bus, bool ext)
3020 {
3021 int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
3022 struct context_entry *new_ce = NULL, ce;
3023 struct context_entry *old_ce = NULL;
3024 struct root_entry re;
3025 phys_addr_t old_ce_phys;
3026
3027 tbl_idx = ext ? bus * 2 : bus;
3028 memcpy(&re, old_re, sizeof(re));
3029
3030 for (devfn = 0; devfn < 256; devfn++) {
3031 /* First calculate the correct index */
3032 idx = (ext ? devfn * 2 : devfn) % 256;
3033
3034 if (idx == 0) {
3035 /* First save what we may have and clean up */
3036 if (new_ce) {
3037 tbl[tbl_idx] = new_ce;
3038 __iommu_flush_cache(iommu, new_ce,
3039 VTD_PAGE_SIZE);
3040 pos = 1;
3041 }
3042
3043 if (old_ce)
3044 memunmap(old_ce);
3045
3046 ret = 0;
3047 if (devfn < 0x80)
3048 old_ce_phys = root_entry_lctp(&re);
3049 else
3050 old_ce_phys = root_entry_uctp(&re);
3051
3052 if (!old_ce_phys) {
3053 if (ext && devfn == 0) {
3054 /* No LCTP, try UCTP */
3055 devfn = 0x7f;
3056 continue;
3057 } else {
3058 goto out;
3059 }
3060 }
3061
3062 ret = -ENOMEM;
3063 old_ce = memremap(old_ce_phys, PAGE_SIZE,
3064 MEMREMAP_WB);
3065 if (!old_ce)
3066 goto out;
3067
3068 new_ce = alloc_pgtable_page(iommu->node);
3069 if (!new_ce)
3070 goto out_unmap;
3071
3072 ret = 0;
3073 }
3074
3075 /* Now copy the context entry */
3076 memcpy(&ce, old_ce + idx, sizeof(ce));
3077
3078 if (!__context_present(&ce))
3079 continue;
3080
3081 did = context_domain_id(&ce);
3082 if (did >= 0 && did < cap_ndoms(iommu->cap))
3083 set_bit(did, iommu->domain_ids);
3084
3085 /*
3086 * We need a marker for copied context entries. This
3087 * marker needs to work for the old format as well as
3088 * for extended context entries.
3089 *
3090 * Bit 67 of the context entry is used. In the old
3091 * format this bit is available to software, in the
3092 * extended format it is the PGE bit, but PGE is ignored
3093 * by HW if PASIDs are disabled (and thus still
3094 * available).
3095 *
3096 * So disable PASIDs first and then mark the entry
3097 * copied. This means that we don't copy PASID
3098 * translations from the old kernel, but this is fine as
3099 * faults there are not fatal.
3100 */
3101 context_clear_pasid_enable(&ce);
3102 context_set_copied(&ce);
3103
3104 new_ce[idx] = ce;
3105 }
3106
3107 tbl[tbl_idx + pos] = new_ce;
3108
3109 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
3110
3111 out_unmap:
3112 memunmap(old_ce);
3113
3114 out:
3115 return ret;
3116 }
3117
copy_translation_tables(struct intel_iommu * iommu)3118 static int copy_translation_tables(struct intel_iommu *iommu)
3119 {
3120 struct context_entry **ctxt_tbls;
3121 struct root_entry *old_rt;
3122 phys_addr_t old_rt_phys;
3123 int ctxt_table_entries;
3124 unsigned long flags;
3125 u64 rtaddr_reg;
3126 int bus, ret;
3127 bool new_ext, ext;
3128
3129 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
3130 ext = !!(rtaddr_reg & DMA_RTADDR_RTT);
3131 new_ext = !!ecap_ecs(iommu->ecap);
3132
3133 /*
3134 * The RTT bit can only be changed when translation is disabled,
3135 * but disabling translation means to open a window for data
3136 * corruption. So bail out and don't copy anything if we would
3137 * have to change the bit.
3138 */
3139 if (new_ext != ext)
3140 return -EINVAL;
3141
3142 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
3143 if (!old_rt_phys)
3144 return -EINVAL;
3145
3146 old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
3147 if (!old_rt)
3148 return -ENOMEM;
3149
3150 /* This is too big for the stack - allocate it from slab */
3151 ctxt_table_entries = ext ? 512 : 256;
3152 ret = -ENOMEM;
3153 ctxt_tbls = kcalloc(ctxt_table_entries, sizeof(void *), GFP_KERNEL);
3154 if (!ctxt_tbls)
3155 goto out_unmap;
3156
3157 for (bus = 0; bus < 256; bus++) {
3158 ret = copy_context_table(iommu, &old_rt[bus],
3159 ctxt_tbls, bus, ext);
3160 if (ret) {
3161 pr_err("%s: Failed to copy context table for bus %d\n",
3162 iommu->name, bus);
3163 continue;
3164 }
3165 }
3166
3167 spin_lock_irqsave(&iommu->lock, flags);
3168
3169 /* Context tables are copied, now write them to the root_entry table */
3170 for (bus = 0; bus < 256; bus++) {
3171 int idx = ext ? bus * 2 : bus;
3172 u64 val;
3173
3174 if (ctxt_tbls[idx]) {
3175 val = virt_to_phys(ctxt_tbls[idx]) | 1;
3176 iommu->root_entry[bus].lo = val;
3177 }
3178
3179 if (!ext || !ctxt_tbls[idx + 1])
3180 continue;
3181
3182 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3183 iommu->root_entry[bus].hi = val;
3184 }
3185
3186 spin_unlock_irqrestore(&iommu->lock, flags);
3187
3188 kfree(ctxt_tbls);
3189
3190 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3191
3192 ret = 0;
3193
3194 out_unmap:
3195 memunmap(old_rt);
3196
3197 return ret;
3198 }
3199
3200 #ifdef CONFIG_INTEL_IOMMU_SVM
intel_vcmd_ioasid_alloc(ioasid_t min,ioasid_t max,void * data)3201 static ioasid_t intel_vcmd_ioasid_alloc(ioasid_t min, ioasid_t max, void *data)
3202 {
3203 struct intel_iommu *iommu = data;
3204 ioasid_t ioasid;
3205
3206 if (!iommu)
3207 return INVALID_IOASID;
3208 /*
3209 * VT-d virtual command interface always uses the full 20 bit
3210 * PASID range. Host can partition guest PASID range based on
3211 * policies but it is out of guest's control.
3212 */
3213 if (min < PASID_MIN || max > intel_pasid_max_id)
3214 return INVALID_IOASID;
3215
3216 if (vcmd_alloc_pasid(iommu, &ioasid))
3217 return INVALID_IOASID;
3218
3219 return ioasid;
3220 }
3221
intel_vcmd_ioasid_free(ioasid_t ioasid,void * data)3222 static void intel_vcmd_ioasid_free(ioasid_t ioasid, void *data)
3223 {
3224 struct intel_iommu *iommu = data;
3225
3226 if (!iommu)
3227 return;
3228 /*
3229 * Sanity check the ioasid owner is done at upper layer, e.g. VFIO
3230 * We can only free the PASID when all the devices are unbound.
3231 */
3232 if (ioasid_find(NULL, ioasid, NULL)) {
3233 pr_alert("Cannot free active IOASID %d\n", ioasid);
3234 return;
3235 }
3236 vcmd_free_pasid(iommu, ioasid);
3237 }
3238
register_pasid_allocator(struct intel_iommu * iommu)3239 static void register_pasid_allocator(struct intel_iommu *iommu)
3240 {
3241 /*
3242 * If we are running in the host, no need for custom allocator
3243 * in that PASIDs are allocated from the host system-wide.
3244 */
3245 if (!cap_caching_mode(iommu->cap))
3246 return;
3247
3248 if (!sm_supported(iommu)) {
3249 pr_warn("VT-d Scalable Mode not enabled, no PASID allocation\n");
3250 return;
3251 }
3252
3253 /*
3254 * Register a custom PASID allocator if we are running in a guest,
3255 * guest PASID must be obtained via virtual command interface.
3256 * There can be multiple vIOMMUs in each guest but only one allocator
3257 * is active. All vIOMMU allocators will eventually be calling the same
3258 * host allocator.
3259 */
3260 if (!vccap_pasid(iommu->vccap))
3261 return;
3262
3263 pr_info("Register custom PASID allocator\n");
3264 iommu->pasid_allocator.alloc = intel_vcmd_ioasid_alloc;
3265 iommu->pasid_allocator.free = intel_vcmd_ioasid_free;
3266 iommu->pasid_allocator.pdata = (void *)iommu;
3267 if (ioasid_register_allocator(&iommu->pasid_allocator)) {
3268 pr_warn("Custom PASID allocator failed, scalable mode disabled\n");
3269 /*
3270 * Disable scalable mode on this IOMMU if there
3271 * is no custom allocator. Mixing SM capable vIOMMU
3272 * and non-SM vIOMMU are not supported.
3273 */
3274 intel_iommu_sm = 0;
3275 }
3276 }
3277 #endif
3278
init_dmars(void)3279 static int __init init_dmars(void)
3280 {
3281 struct dmar_drhd_unit *drhd;
3282 struct intel_iommu *iommu;
3283 int ret;
3284
3285 /*
3286 * for each drhd
3287 * allocate root
3288 * initialize and program root entry to not present
3289 * endfor
3290 */
3291 for_each_drhd_unit(drhd) {
3292 /*
3293 * lock not needed as this is only incremented in the single
3294 * threaded kernel __init code path all other access are read
3295 * only
3296 */
3297 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
3298 g_num_of_iommus++;
3299 continue;
3300 }
3301 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
3302 }
3303
3304 /* Preallocate enough resources for IOMMU hot-addition */
3305 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3306 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3307
3308 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3309 GFP_KERNEL);
3310 if (!g_iommus) {
3311 pr_err("Allocating global iommu array failed\n");
3312 ret = -ENOMEM;
3313 goto error;
3314 }
3315
3316 for_each_iommu(iommu, drhd) {
3317 if (drhd->ignored) {
3318 iommu_disable_translation(iommu);
3319 continue;
3320 }
3321
3322 /*
3323 * Find the max pasid size of all IOMMU's in the system.
3324 * We need to ensure the system pasid table is no bigger
3325 * than the smallest supported.
3326 */
3327 if (pasid_supported(iommu)) {
3328 u32 temp = 2 << ecap_pss(iommu->ecap);
3329
3330 intel_pasid_max_id = min_t(u32, temp,
3331 intel_pasid_max_id);
3332 }
3333
3334 g_iommus[iommu->seq_id] = iommu;
3335
3336 intel_iommu_init_qi(iommu);
3337
3338 ret = iommu_init_domains(iommu);
3339 if (ret)
3340 goto free_iommu;
3341
3342 init_translation_status(iommu);
3343
3344 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3345 iommu_disable_translation(iommu);
3346 clear_translation_pre_enabled(iommu);
3347 pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3348 iommu->name);
3349 }
3350
3351 /*
3352 * TBD:
3353 * we could share the same root & context tables
3354 * among all IOMMU's. Need to Split it later.
3355 */
3356 ret = iommu_alloc_root_entry(iommu);
3357 if (ret)
3358 goto free_iommu;
3359
3360 if (translation_pre_enabled(iommu)) {
3361 pr_info("Translation already enabled - trying to copy translation structures\n");
3362
3363 ret = copy_translation_tables(iommu);
3364 if (ret) {
3365 /*
3366 * We found the IOMMU with translation
3367 * enabled - but failed to copy over the
3368 * old root-entry table. Try to proceed
3369 * by disabling translation now and
3370 * allocating a clean root-entry table.
3371 * This might cause DMAR faults, but
3372 * probably the dump will still succeed.
3373 */
3374 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3375 iommu->name);
3376 iommu_disable_translation(iommu);
3377 clear_translation_pre_enabled(iommu);
3378 } else {
3379 pr_info("Copied translation tables from previous kernel for %s\n",
3380 iommu->name);
3381 }
3382 }
3383
3384 if (!ecap_pass_through(iommu->ecap))
3385 hw_pass_through = 0;
3386
3387 if (!intel_iommu_strict && cap_caching_mode(iommu->cap)) {
3388 pr_warn("Disable batched IOTLB flush due to virtualization");
3389 intel_iommu_strict = 1;
3390 }
3391 intel_svm_check(iommu);
3392 }
3393
3394 /*
3395 * Now that qi is enabled on all iommus, set the root entry and flush
3396 * caches. This is required on some Intel X58 chipsets, otherwise the
3397 * flush_context function will loop forever and the boot hangs.
3398 */
3399 for_each_active_iommu(iommu, drhd) {
3400 iommu_flush_write_buffer(iommu);
3401 #ifdef CONFIG_INTEL_IOMMU_SVM
3402 register_pasid_allocator(iommu);
3403 #endif
3404 iommu_set_root_entry(iommu);
3405 }
3406
3407 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
3408 dmar_map_gfx = 0;
3409 #endif
3410
3411 if (!dmar_map_gfx)
3412 iommu_identity_mapping |= IDENTMAP_GFX;
3413
3414 check_tylersburg_isoch();
3415
3416 ret = si_domain_init(hw_pass_through);
3417 if (ret)
3418 goto free_iommu;
3419
3420 /*
3421 * for each drhd
3422 * enable fault log
3423 * global invalidate context cache
3424 * global invalidate iotlb
3425 * enable translation
3426 */
3427 for_each_iommu(iommu, drhd) {
3428 if (drhd->ignored) {
3429 /*
3430 * we always have to disable PMRs or DMA may fail on
3431 * this device
3432 */
3433 if (force_on)
3434 iommu_disable_protect_mem_regions(iommu);
3435 continue;
3436 }
3437
3438 iommu_flush_write_buffer(iommu);
3439
3440 #ifdef CONFIG_INTEL_IOMMU_SVM
3441 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
3442 /*
3443 * Call dmar_alloc_hwirq() with dmar_global_lock held,
3444 * could cause possible lock race condition.
3445 */
3446 up_write(&dmar_global_lock);
3447 ret = intel_svm_enable_prq(iommu);
3448 down_write(&dmar_global_lock);
3449 if (ret)
3450 goto free_iommu;
3451 }
3452 #endif
3453 ret = dmar_set_interrupt(iommu);
3454 if (ret)
3455 goto free_iommu;
3456 }
3457
3458 return 0;
3459
3460 free_iommu:
3461 for_each_active_iommu(iommu, drhd) {
3462 disable_dmar_iommu(iommu);
3463 free_dmar_iommu(iommu);
3464 }
3465
3466 kfree(g_iommus);
3467
3468 error:
3469 return ret;
3470 }
3471
3472 /* This takes a number of _MM_ pages, not VTD pages */
intel_alloc_iova(struct device * dev,struct dmar_domain * domain,unsigned long nrpages,uint64_t dma_mask)3473 static unsigned long intel_alloc_iova(struct device *dev,
3474 struct dmar_domain *domain,
3475 unsigned long nrpages, uint64_t dma_mask)
3476 {
3477 unsigned long iova_pfn;
3478
3479 /*
3480 * Restrict dma_mask to the width that the iommu can handle.
3481 * First-level translation restricts the input-address to a
3482 * canonical address (i.e., address bits 63:N have the same
3483 * value as address bit [N-1], where N is 48-bits with 4-level
3484 * paging and 57-bits with 5-level paging). Hence, skip bit
3485 * [N-1].
3486 */
3487 if (domain_use_first_level(domain))
3488 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw - 1),
3489 dma_mask);
3490 else
3491 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw),
3492 dma_mask);
3493
3494 /* Ensure we reserve the whole size-aligned region */
3495 nrpages = __roundup_pow_of_two(nrpages);
3496
3497 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
3498 /*
3499 * First try to allocate an io virtual address in
3500 * DMA_BIT_MASK(32) and if that fails then try allocating
3501 * from higher range
3502 */
3503 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
3504 IOVA_PFN(DMA_BIT_MASK(32)), false);
3505 if (iova_pfn)
3506 return iova_pfn;
3507 }
3508 iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
3509 IOVA_PFN(dma_mask), true);
3510 if (unlikely(!iova_pfn)) {
3511 dev_err_once(dev, "Allocating %ld-page iova failed\n",
3512 nrpages);
3513 return 0;
3514 }
3515
3516 return iova_pfn;
3517 }
3518
__intel_map_single(struct device * dev,phys_addr_t paddr,size_t size,int dir,u64 dma_mask)3519 static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
3520 size_t size, int dir, u64 dma_mask)
3521 {
3522 struct dmar_domain *domain;
3523 phys_addr_t start_paddr;
3524 unsigned long iova_pfn;
3525 int prot = 0;
3526 int ret;
3527 struct intel_iommu *iommu;
3528 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
3529
3530 BUG_ON(dir == DMA_NONE);
3531
3532 if (unlikely(attach_deferred(dev)))
3533 do_deferred_attach(dev);
3534
3535 domain = find_domain(dev);
3536 if (!domain)
3537 return DMA_MAPPING_ERROR;
3538
3539 iommu = domain_get_iommu(domain);
3540 size = aligned_nrpages(paddr, size);
3541
3542 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3543 if (!iova_pfn)
3544 goto error;
3545
3546 /*
3547 * Check if DMAR supports zero-length reads on write only
3548 * mappings..
3549 */
3550 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3551 !cap_zlr(iommu->cap))
3552 prot |= DMA_PTE_READ;
3553 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3554 prot |= DMA_PTE_WRITE;
3555 /*
3556 * paddr - (paddr + size) might be partial page, we should map the whole
3557 * page. Note: if two part of one page are separately mapped, we
3558 * might have two guest_addr mapping to the same host paddr, but this
3559 * is not a big problem
3560 */
3561 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn),
3562 mm_to_dma_pfn(paddr_pfn), size, prot);
3563 if (ret)
3564 goto error;
3565
3566 start_paddr = (phys_addr_t)iova_pfn << PAGE_SHIFT;
3567 start_paddr += paddr & ~PAGE_MASK;
3568
3569 trace_map_single(dev, start_paddr, paddr, size << VTD_PAGE_SHIFT);
3570
3571 return start_paddr;
3572
3573 error:
3574 if (iova_pfn)
3575 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
3576 dev_err(dev, "Device request: %zx@%llx dir %d --- failed\n",
3577 size, (unsigned long long)paddr, dir);
3578 return DMA_MAPPING_ERROR;
3579 }
3580
intel_map_page(struct device * dev,struct page * page,unsigned long offset,size_t size,enum dma_data_direction dir,unsigned long attrs)3581 static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3582 unsigned long offset, size_t size,
3583 enum dma_data_direction dir,
3584 unsigned long attrs)
3585 {
3586 return __intel_map_single(dev, page_to_phys(page) + offset,
3587 size, dir, *dev->dma_mask);
3588 }
3589
intel_map_resource(struct device * dev,phys_addr_t phys_addr,size_t size,enum dma_data_direction dir,unsigned long attrs)3590 static dma_addr_t intel_map_resource(struct device *dev, phys_addr_t phys_addr,
3591 size_t size, enum dma_data_direction dir,
3592 unsigned long attrs)
3593 {
3594 return __intel_map_single(dev, phys_addr, size, dir, *dev->dma_mask);
3595 }
3596
intel_unmap(struct device * dev,dma_addr_t dev_addr,size_t size)3597 static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
3598 {
3599 struct dmar_domain *domain;
3600 unsigned long start_pfn, last_pfn;
3601 unsigned long nrpages;
3602 unsigned long iova_pfn;
3603 struct intel_iommu *iommu;
3604 struct page *freelist;
3605 struct pci_dev *pdev = NULL;
3606
3607 domain = find_domain(dev);
3608 BUG_ON(!domain);
3609
3610 iommu = domain_get_iommu(domain);
3611
3612 iova_pfn = IOVA_PFN(dev_addr);
3613
3614 nrpages = aligned_nrpages(dev_addr, size);
3615 start_pfn = mm_to_dma_pfn(iova_pfn);
3616 last_pfn = start_pfn + nrpages - 1;
3617
3618 if (dev_is_pci(dev))
3619 pdev = to_pci_dev(dev);
3620
3621 freelist = domain_unmap(domain, start_pfn, last_pfn);
3622 if (intel_iommu_strict || (pdev && pdev->untrusted) ||
3623 !has_iova_flush_queue(&domain->iovad)) {
3624 iommu_flush_iotlb_psi(iommu, domain, start_pfn,
3625 nrpages, !freelist, 0);
3626 /* free iova */
3627 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
3628 dma_free_pagelist(freelist);
3629 } else {
3630 queue_iova(&domain->iovad, iova_pfn, nrpages,
3631 (unsigned long)freelist);
3632 /*
3633 * queue up the release of the unmap to save the 1/6th of the
3634 * cpu used up by the iotlb flush operation...
3635 */
3636 }
3637
3638 trace_unmap_single(dev, dev_addr, size);
3639 }
3640
intel_unmap_page(struct device * dev,dma_addr_t dev_addr,size_t size,enum dma_data_direction dir,unsigned long attrs)3641 static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3642 size_t size, enum dma_data_direction dir,
3643 unsigned long attrs)
3644 {
3645 intel_unmap(dev, dev_addr, size);
3646 }
3647
intel_unmap_resource(struct device * dev,dma_addr_t dev_addr,size_t size,enum dma_data_direction dir,unsigned long attrs)3648 static void intel_unmap_resource(struct device *dev, dma_addr_t dev_addr,
3649 size_t size, enum dma_data_direction dir, unsigned long attrs)
3650 {
3651 intel_unmap(dev, dev_addr, size);
3652 }
3653
intel_alloc_coherent(struct device * dev,size_t size,dma_addr_t * dma_handle,gfp_t flags,unsigned long attrs)3654 static void *intel_alloc_coherent(struct device *dev, size_t size,
3655 dma_addr_t *dma_handle, gfp_t flags,
3656 unsigned long attrs)
3657 {
3658 struct page *page = NULL;
3659 int order;
3660
3661 if (unlikely(attach_deferred(dev)))
3662 do_deferred_attach(dev);
3663
3664 size = PAGE_ALIGN(size);
3665 order = get_order(size);
3666
3667 if (gfpflags_allow_blocking(flags)) {
3668 unsigned int count = size >> PAGE_SHIFT;
3669
3670 page = dma_alloc_from_contiguous(dev, count, order,
3671 flags & __GFP_NOWARN);
3672 }
3673
3674 if (!page)
3675 page = alloc_pages(flags, order);
3676 if (!page)
3677 return NULL;
3678 memset(page_address(page), 0, size);
3679
3680 *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
3681 DMA_BIDIRECTIONAL,
3682 dev->coherent_dma_mask);
3683 if (*dma_handle != DMA_MAPPING_ERROR)
3684 return page_address(page);
3685 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3686 __free_pages(page, order);
3687
3688 return NULL;
3689 }
3690
intel_free_coherent(struct device * dev,size_t size,void * vaddr,dma_addr_t dma_handle,unsigned long attrs)3691 static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
3692 dma_addr_t dma_handle, unsigned long attrs)
3693 {
3694 int order;
3695 struct page *page = virt_to_page(vaddr);
3696
3697 size = PAGE_ALIGN(size);
3698 order = get_order(size);
3699
3700 intel_unmap(dev, dma_handle, size);
3701 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3702 __free_pages(page, order);
3703 }
3704
intel_unmap_sg(struct device * dev,struct scatterlist * sglist,int nelems,enum dma_data_direction dir,unsigned long attrs)3705 static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
3706 int nelems, enum dma_data_direction dir,
3707 unsigned long attrs)
3708 {
3709 dma_addr_t startaddr = sg_dma_address(sglist) & PAGE_MASK;
3710 unsigned long nrpages = 0;
3711 struct scatterlist *sg;
3712 int i;
3713
3714 for_each_sg(sglist, sg, nelems, i) {
3715 nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg));
3716 }
3717
3718 intel_unmap(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
3719
3720 trace_unmap_sg(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
3721 }
3722
intel_map_sg(struct device * dev,struct scatterlist * sglist,int nelems,enum dma_data_direction dir,unsigned long attrs)3723 static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3724 enum dma_data_direction dir, unsigned long attrs)
3725 {
3726 int i;
3727 struct dmar_domain *domain;
3728 size_t size = 0;
3729 int prot = 0;
3730 unsigned long iova_pfn;
3731 int ret;
3732 struct scatterlist *sg;
3733 unsigned long start_vpfn;
3734 struct intel_iommu *iommu;
3735
3736 BUG_ON(dir == DMA_NONE);
3737
3738 if (unlikely(attach_deferred(dev)))
3739 do_deferred_attach(dev);
3740
3741 domain = find_domain(dev);
3742 if (!domain)
3743 return 0;
3744
3745 iommu = domain_get_iommu(domain);
3746
3747 for_each_sg(sglist, sg, nelems, i)
3748 size += aligned_nrpages(sg->offset, sg->length);
3749
3750 iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
3751 *dev->dma_mask);
3752 if (!iova_pfn) {
3753 sglist->dma_length = 0;
3754 return 0;
3755 }
3756
3757 /*
3758 * Check if DMAR supports zero-length reads on write only
3759 * mappings..
3760 */
3761 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3762 !cap_zlr(iommu->cap))
3763 prot |= DMA_PTE_READ;
3764 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3765 prot |= DMA_PTE_WRITE;
3766
3767 start_vpfn = mm_to_dma_pfn(iova_pfn);
3768
3769 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3770 if (unlikely(ret)) {
3771 dma_pte_free_pagetable(domain, start_vpfn,
3772 start_vpfn + size - 1,
3773 agaw_to_level(domain->agaw) + 1);
3774 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
3775 return 0;
3776 }
3777
3778 for_each_sg(sglist, sg, nelems, i)
3779 trace_map_sg(dev, i + 1, nelems, sg);
3780
3781 return nelems;
3782 }
3783
intel_get_required_mask(struct device * dev)3784 static u64 intel_get_required_mask(struct device *dev)
3785 {
3786 return DMA_BIT_MASK(32);
3787 }
3788
3789 static const struct dma_map_ops intel_dma_ops = {
3790 .alloc = intel_alloc_coherent,
3791 .free = intel_free_coherent,
3792 .map_sg = intel_map_sg,
3793 .unmap_sg = intel_unmap_sg,
3794 .map_page = intel_map_page,
3795 .unmap_page = intel_unmap_page,
3796 .map_resource = intel_map_resource,
3797 .unmap_resource = intel_unmap_resource,
3798 .dma_supported = dma_direct_supported,
3799 .mmap = dma_common_mmap,
3800 .get_sgtable = dma_common_get_sgtable,
3801 .alloc_pages = dma_common_alloc_pages,
3802 .free_pages = dma_common_free_pages,
3803 .get_required_mask = intel_get_required_mask,
3804 };
3805
3806 static void
bounce_sync_single(struct device * dev,dma_addr_t addr,size_t size,enum dma_data_direction dir,enum dma_sync_target target)3807 bounce_sync_single(struct device *dev, dma_addr_t addr, size_t size,
3808 enum dma_data_direction dir, enum dma_sync_target target)
3809 {
3810 struct dmar_domain *domain;
3811 phys_addr_t tlb_addr;
3812
3813 domain = find_domain(dev);
3814 if (WARN_ON(!domain))
3815 return;
3816
3817 tlb_addr = intel_iommu_iova_to_phys(&domain->domain, addr);
3818 if (is_swiotlb_buffer(tlb_addr))
3819 swiotlb_tbl_sync_single(dev, tlb_addr, size, dir, target);
3820 }
3821
3822 static dma_addr_t
bounce_map_single(struct device * dev,phys_addr_t paddr,size_t size,enum dma_data_direction dir,unsigned long attrs,u64 dma_mask)3823 bounce_map_single(struct device *dev, phys_addr_t paddr, size_t size,
3824 enum dma_data_direction dir, unsigned long attrs,
3825 u64 dma_mask)
3826 {
3827 size_t aligned_size = ALIGN(size, VTD_PAGE_SIZE);
3828 struct dmar_domain *domain;
3829 struct intel_iommu *iommu;
3830 unsigned long iova_pfn;
3831 unsigned long nrpages;
3832 phys_addr_t tlb_addr;
3833 int prot = 0;
3834 int ret;
3835
3836 if (unlikely(attach_deferred(dev)))
3837 do_deferred_attach(dev);
3838
3839 domain = find_domain(dev);
3840
3841 if (WARN_ON(dir == DMA_NONE || !domain))
3842 return DMA_MAPPING_ERROR;
3843
3844 iommu = domain_get_iommu(domain);
3845 if (WARN_ON(!iommu))
3846 return DMA_MAPPING_ERROR;
3847
3848 nrpages = aligned_nrpages(0, size);
3849 iova_pfn = intel_alloc_iova(dev, domain,
3850 dma_to_mm_pfn(nrpages), dma_mask);
3851 if (!iova_pfn)
3852 return DMA_MAPPING_ERROR;
3853
3854 /*
3855 * Check if DMAR supports zero-length reads on write only
3856 * mappings..
3857 */
3858 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL ||
3859 !cap_zlr(iommu->cap))
3860 prot |= DMA_PTE_READ;
3861 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3862 prot |= DMA_PTE_WRITE;
3863
3864 /*
3865 * If both the physical buffer start address and size are
3866 * page aligned, we don't need to use a bounce page.
3867 */
3868 if (!IS_ALIGNED(paddr | size, VTD_PAGE_SIZE)) {
3869 tlb_addr = swiotlb_tbl_map_single(dev, paddr, size,
3870 aligned_size, dir, attrs);
3871 if (tlb_addr == DMA_MAPPING_ERROR) {
3872 goto swiotlb_error;
3873 } else {
3874 /* Cleanup the padding area. */
3875 void *padding_start = phys_to_virt(tlb_addr);
3876 size_t padding_size = aligned_size;
3877
3878 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
3879 (dir == DMA_TO_DEVICE ||
3880 dir == DMA_BIDIRECTIONAL)) {
3881 padding_start += size;
3882 padding_size -= size;
3883 }
3884
3885 memset(padding_start, 0, padding_size);
3886 }
3887 } else {
3888 tlb_addr = paddr;
3889 }
3890
3891 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn),
3892 tlb_addr >> VTD_PAGE_SHIFT, nrpages, prot);
3893 if (ret)
3894 goto mapping_error;
3895
3896 trace_bounce_map_single(dev, iova_pfn << PAGE_SHIFT, paddr, size);
3897
3898 return (phys_addr_t)iova_pfn << PAGE_SHIFT;
3899
3900 mapping_error:
3901 if (is_swiotlb_buffer(tlb_addr))
3902 swiotlb_tbl_unmap_single(dev, tlb_addr, size,
3903 aligned_size, dir, attrs);
3904 swiotlb_error:
3905 free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
3906 dev_err(dev, "Device bounce map: %zx@%llx dir %d --- failed\n",
3907 size, (unsigned long long)paddr, dir);
3908
3909 return DMA_MAPPING_ERROR;
3910 }
3911
3912 static void
bounce_unmap_single(struct device * dev,dma_addr_t dev_addr,size_t size,enum dma_data_direction dir,unsigned long attrs)3913 bounce_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
3914 enum dma_data_direction dir, unsigned long attrs)
3915 {
3916 size_t aligned_size = ALIGN(size, VTD_PAGE_SIZE);
3917 struct dmar_domain *domain;
3918 phys_addr_t tlb_addr;
3919
3920 domain = find_domain(dev);
3921 if (WARN_ON(!domain))
3922 return;
3923
3924 tlb_addr = intel_iommu_iova_to_phys(&domain->domain, dev_addr);
3925 if (WARN_ON(!tlb_addr))
3926 return;
3927
3928 intel_unmap(dev, dev_addr, size);
3929 if (is_swiotlb_buffer(tlb_addr))
3930 swiotlb_tbl_unmap_single(dev, tlb_addr, size,
3931 aligned_size, dir, attrs);
3932
3933 trace_bounce_unmap_single(dev, dev_addr, size);
3934 }
3935
3936 static dma_addr_t
bounce_map_page(struct device * dev,struct page * page,unsigned long offset,size_t size,enum dma_data_direction dir,unsigned long attrs)3937 bounce_map_page(struct device *dev, struct page *page, unsigned long offset,
3938 size_t size, enum dma_data_direction dir, unsigned long attrs)
3939 {
3940 return bounce_map_single(dev, page_to_phys(page) + offset,
3941 size, dir, attrs, *dev->dma_mask);
3942 }
3943
3944 static dma_addr_t
bounce_map_resource(struct device * dev,phys_addr_t phys_addr,size_t size,enum dma_data_direction dir,unsigned long attrs)3945 bounce_map_resource(struct device *dev, phys_addr_t phys_addr, size_t size,
3946 enum dma_data_direction dir, unsigned long attrs)
3947 {
3948 return bounce_map_single(dev, phys_addr, size,
3949 dir, attrs, *dev->dma_mask);
3950 }
3951
3952 static void
bounce_unmap_page(struct device * dev,dma_addr_t dev_addr,size_t size,enum dma_data_direction dir,unsigned long attrs)3953 bounce_unmap_page(struct device *dev, dma_addr_t dev_addr, size_t size,
3954 enum dma_data_direction dir, unsigned long attrs)
3955 {
3956 bounce_unmap_single(dev, dev_addr, size, dir, attrs);
3957 }
3958
3959 static void
bounce_unmap_resource(struct device * dev,dma_addr_t dev_addr,size_t size,enum dma_data_direction dir,unsigned long attrs)3960 bounce_unmap_resource(struct device *dev, dma_addr_t dev_addr, size_t size,
3961 enum dma_data_direction dir, unsigned long attrs)
3962 {
3963 bounce_unmap_single(dev, dev_addr, size, dir, attrs);
3964 }
3965
3966 static void
bounce_unmap_sg(struct device * dev,struct scatterlist * sglist,int nelems,enum dma_data_direction dir,unsigned long attrs)3967 bounce_unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3968 enum dma_data_direction dir, unsigned long attrs)
3969 {
3970 struct scatterlist *sg;
3971 int i;
3972
3973 for_each_sg(sglist, sg, nelems, i)
3974 bounce_unmap_page(dev, sg->dma_address,
3975 sg_dma_len(sg), dir, attrs);
3976 }
3977
3978 static int
bounce_map_sg(struct device * dev,struct scatterlist * sglist,int nelems,enum dma_data_direction dir,unsigned long attrs)3979 bounce_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3980 enum dma_data_direction dir, unsigned long attrs)
3981 {
3982 int i;
3983 struct scatterlist *sg;
3984
3985 for_each_sg(sglist, sg, nelems, i) {
3986 sg->dma_address = bounce_map_page(dev, sg_page(sg),
3987 sg->offset, sg->length,
3988 dir, attrs);
3989 if (sg->dma_address == DMA_MAPPING_ERROR)
3990 goto out_unmap;
3991 sg_dma_len(sg) = sg->length;
3992 }
3993
3994 for_each_sg(sglist, sg, nelems, i)
3995 trace_bounce_map_sg(dev, i + 1, nelems, sg);
3996
3997 return nelems;
3998
3999 out_unmap:
4000 bounce_unmap_sg(dev, sglist, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
4001 return 0;
4002 }
4003
4004 static void
bounce_sync_single_for_cpu(struct device * dev,dma_addr_t addr,size_t size,enum dma_data_direction dir)4005 bounce_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
4006 size_t size, enum dma_data_direction dir)
4007 {
4008 bounce_sync_single(dev, addr, size, dir, SYNC_FOR_CPU);
4009 }
4010
4011 static void
bounce_sync_single_for_device(struct device * dev,dma_addr_t addr,size_t size,enum dma_data_direction dir)4012 bounce_sync_single_for_device(struct device *dev, dma_addr_t addr,
4013 size_t size, enum dma_data_direction dir)
4014 {
4015 bounce_sync_single(dev, addr, size, dir, SYNC_FOR_DEVICE);
4016 }
4017
4018 static void
bounce_sync_sg_for_cpu(struct device * dev,struct scatterlist * sglist,int nelems,enum dma_data_direction dir)4019 bounce_sync_sg_for_cpu(struct device *dev, struct scatterlist *sglist,
4020 int nelems, enum dma_data_direction dir)
4021 {
4022 struct scatterlist *sg;
4023 int i;
4024
4025 for_each_sg(sglist, sg, nelems, i)
4026 bounce_sync_single(dev, sg_dma_address(sg),
4027 sg_dma_len(sg), dir, SYNC_FOR_CPU);
4028 }
4029
4030 static void
bounce_sync_sg_for_device(struct device * dev,struct scatterlist * sglist,int nelems,enum dma_data_direction dir)4031 bounce_sync_sg_for_device(struct device *dev, struct scatterlist *sglist,
4032 int nelems, enum dma_data_direction dir)
4033 {
4034 struct scatterlist *sg;
4035 int i;
4036
4037 for_each_sg(sglist, sg, nelems, i)
4038 bounce_sync_single(dev, sg_dma_address(sg),
4039 sg_dma_len(sg), dir, SYNC_FOR_DEVICE);
4040 }
4041
4042 static const struct dma_map_ops bounce_dma_ops = {
4043 .alloc = intel_alloc_coherent,
4044 .free = intel_free_coherent,
4045 .map_sg = bounce_map_sg,
4046 .unmap_sg = bounce_unmap_sg,
4047 .map_page = bounce_map_page,
4048 .unmap_page = bounce_unmap_page,
4049 .sync_single_for_cpu = bounce_sync_single_for_cpu,
4050 .sync_single_for_device = bounce_sync_single_for_device,
4051 .sync_sg_for_cpu = bounce_sync_sg_for_cpu,
4052 .sync_sg_for_device = bounce_sync_sg_for_device,
4053 .map_resource = bounce_map_resource,
4054 .unmap_resource = bounce_unmap_resource,
4055 .alloc_pages = dma_common_alloc_pages,
4056 .free_pages = dma_common_free_pages,
4057 .dma_supported = dma_direct_supported,
4058 };
4059
iommu_domain_cache_init(void)4060 static inline int iommu_domain_cache_init(void)
4061 {
4062 int ret = 0;
4063
4064 iommu_domain_cache = kmem_cache_create("iommu_domain",
4065 sizeof(struct dmar_domain),
4066 0,
4067 SLAB_HWCACHE_ALIGN,
4068
4069 NULL);
4070 if (!iommu_domain_cache) {
4071 pr_err("Couldn't create iommu_domain cache\n");
4072 ret = -ENOMEM;
4073 }
4074
4075 return ret;
4076 }
4077
iommu_devinfo_cache_init(void)4078 static inline int iommu_devinfo_cache_init(void)
4079 {
4080 int ret = 0;
4081
4082 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
4083 sizeof(struct device_domain_info),
4084 0,
4085 SLAB_HWCACHE_ALIGN,
4086 NULL);
4087 if (!iommu_devinfo_cache) {
4088 pr_err("Couldn't create devinfo cache\n");
4089 ret = -ENOMEM;
4090 }
4091
4092 return ret;
4093 }
4094
iommu_init_mempool(void)4095 static int __init iommu_init_mempool(void)
4096 {
4097 int ret;
4098 ret = iova_cache_get();
4099 if (ret)
4100 return ret;
4101
4102 ret = iommu_domain_cache_init();
4103 if (ret)
4104 goto domain_error;
4105
4106 ret = iommu_devinfo_cache_init();
4107 if (!ret)
4108 return ret;
4109
4110 kmem_cache_destroy(iommu_domain_cache);
4111 domain_error:
4112 iova_cache_put();
4113
4114 return -ENOMEM;
4115 }
4116
iommu_exit_mempool(void)4117 static void __init iommu_exit_mempool(void)
4118 {
4119 kmem_cache_destroy(iommu_devinfo_cache);
4120 kmem_cache_destroy(iommu_domain_cache);
4121 iova_cache_put();
4122 }
4123
init_no_remapping_devices(void)4124 static void __init init_no_remapping_devices(void)
4125 {
4126 struct dmar_drhd_unit *drhd;
4127 struct device *dev;
4128 int i;
4129
4130 for_each_drhd_unit(drhd) {
4131 if (!drhd->include_all) {
4132 for_each_active_dev_scope(drhd->devices,
4133 drhd->devices_cnt, i, dev)
4134 break;
4135 /* ignore DMAR unit if no devices exist */
4136 if (i == drhd->devices_cnt)
4137 drhd->ignored = 1;
4138 }
4139 }
4140
4141 for_each_active_drhd_unit(drhd) {
4142 if (drhd->include_all)
4143 continue;
4144
4145 for_each_active_dev_scope(drhd->devices,
4146 drhd->devices_cnt, i, dev)
4147 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
4148 break;
4149 if (i < drhd->devices_cnt)
4150 continue;
4151
4152 /* This IOMMU has *only* gfx devices. Either bypass it or
4153 set the gfx_mapped flag, as appropriate */
4154 drhd->gfx_dedicated = 1;
4155 if (!dmar_map_gfx)
4156 drhd->ignored = 1;
4157 }
4158 }
4159
4160 #ifdef CONFIG_SUSPEND
init_iommu_hw(void)4161 static int init_iommu_hw(void)
4162 {
4163 struct dmar_drhd_unit *drhd;
4164 struct intel_iommu *iommu = NULL;
4165
4166 for_each_active_iommu(iommu, drhd)
4167 if (iommu->qi)
4168 dmar_reenable_qi(iommu);
4169
4170 for_each_iommu(iommu, drhd) {
4171 if (drhd->ignored) {
4172 /*
4173 * we always have to disable PMRs or DMA may fail on
4174 * this device
4175 */
4176 if (force_on)
4177 iommu_disable_protect_mem_regions(iommu);
4178 continue;
4179 }
4180
4181 iommu_flush_write_buffer(iommu);
4182 iommu_set_root_entry(iommu);
4183 iommu_enable_translation(iommu);
4184 iommu_disable_protect_mem_regions(iommu);
4185 }
4186
4187 return 0;
4188 }
4189
iommu_flush_all(void)4190 static void iommu_flush_all(void)
4191 {
4192 struct dmar_drhd_unit *drhd;
4193 struct intel_iommu *iommu;
4194
4195 for_each_active_iommu(iommu, drhd) {
4196 iommu->flush.flush_context(iommu, 0, 0, 0,
4197 DMA_CCMD_GLOBAL_INVL);
4198 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
4199 DMA_TLB_GLOBAL_FLUSH);
4200 }
4201 }
4202
iommu_suspend(void)4203 static int iommu_suspend(void)
4204 {
4205 struct dmar_drhd_unit *drhd;
4206 struct intel_iommu *iommu = NULL;
4207 unsigned long flag;
4208
4209 for_each_active_iommu(iommu, drhd) {
4210 iommu->iommu_state = kcalloc(MAX_SR_DMAR_REGS, sizeof(u32),
4211 GFP_ATOMIC);
4212 if (!iommu->iommu_state)
4213 goto nomem;
4214 }
4215
4216 iommu_flush_all();
4217
4218 for_each_active_iommu(iommu, drhd) {
4219 iommu_disable_translation(iommu);
4220
4221 raw_spin_lock_irqsave(&iommu->register_lock, flag);
4222
4223 iommu->iommu_state[SR_DMAR_FECTL_REG] =
4224 readl(iommu->reg + DMAR_FECTL_REG);
4225 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
4226 readl(iommu->reg + DMAR_FEDATA_REG);
4227 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
4228 readl(iommu->reg + DMAR_FEADDR_REG);
4229 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
4230 readl(iommu->reg + DMAR_FEUADDR_REG);
4231
4232 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
4233 }
4234 return 0;
4235
4236 nomem:
4237 for_each_active_iommu(iommu, drhd)
4238 kfree(iommu->iommu_state);
4239
4240 return -ENOMEM;
4241 }
4242
iommu_resume(void)4243 static void iommu_resume(void)
4244 {
4245 struct dmar_drhd_unit *drhd;
4246 struct intel_iommu *iommu = NULL;
4247 unsigned long flag;
4248
4249 if (init_iommu_hw()) {
4250 if (force_on)
4251 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
4252 else
4253 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
4254 return;
4255 }
4256
4257 for_each_active_iommu(iommu, drhd) {
4258
4259 raw_spin_lock_irqsave(&iommu->register_lock, flag);
4260
4261 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
4262 iommu->reg + DMAR_FECTL_REG);
4263 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
4264 iommu->reg + DMAR_FEDATA_REG);
4265 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
4266 iommu->reg + DMAR_FEADDR_REG);
4267 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
4268 iommu->reg + DMAR_FEUADDR_REG);
4269
4270 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
4271 }
4272
4273 for_each_active_iommu(iommu, drhd)
4274 kfree(iommu->iommu_state);
4275 }
4276
4277 static struct syscore_ops iommu_syscore_ops = {
4278 .resume = iommu_resume,
4279 .suspend = iommu_suspend,
4280 };
4281
init_iommu_pm_ops(void)4282 static void __init init_iommu_pm_ops(void)
4283 {
4284 register_syscore_ops(&iommu_syscore_ops);
4285 }
4286
4287 #else
init_iommu_pm_ops(void)4288 static inline void init_iommu_pm_ops(void) {}
4289 #endif /* CONFIG_PM */
4290
rmrr_sanity_check(struct acpi_dmar_reserved_memory * rmrr)4291 static int rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr)
4292 {
4293 if (!IS_ALIGNED(rmrr->base_address, PAGE_SIZE) ||
4294 !IS_ALIGNED(rmrr->end_address + 1, PAGE_SIZE) ||
4295 rmrr->end_address <= rmrr->base_address ||
4296 arch_rmrr_sanity_check(rmrr))
4297 return -EINVAL;
4298
4299 return 0;
4300 }
4301
dmar_parse_one_rmrr(struct acpi_dmar_header * header,void * arg)4302 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
4303 {
4304 struct acpi_dmar_reserved_memory *rmrr;
4305 struct dmar_rmrr_unit *rmrru;
4306
4307 rmrr = (struct acpi_dmar_reserved_memory *)header;
4308 if (rmrr_sanity_check(rmrr)) {
4309 pr_warn(FW_BUG
4310 "Your BIOS is broken; bad RMRR [%#018Lx-%#018Lx]\n"
4311 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4312 rmrr->base_address, rmrr->end_address,
4313 dmi_get_system_info(DMI_BIOS_VENDOR),
4314 dmi_get_system_info(DMI_BIOS_VERSION),
4315 dmi_get_system_info(DMI_PRODUCT_VERSION));
4316 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
4317 }
4318
4319 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
4320 if (!rmrru)
4321 goto out;
4322
4323 rmrru->hdr = header;
4324
4325 rmrru->base_address = rmrr->base_address;
4326 rmrru->end_address = rmrr->end_address;
4327
4328 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
4329 ((void *)rmrr) + rmrr->header.length,
4330 &rmrru->devices_cnt);
4331 if (rmrru->devices_cnt && rmrru->devices == NULL)
4332 goto free_rmrru;
4333
4334 list_add(&rmrru->list, &dmar_rmrr_units);
4335
4336 return 0;
4337 free_rmrru:
4338 kfree(rmrru);
4339 out:
4340 return -ENOMEM;
4341 }
4342
dmar_find_atsr(struct acpi_dmar_atsr * atsr)4343 static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
4344 {
4345 struct dmar_atsr_unit *atsru;
4346 struct acpi_dmar_atsr *tmp;
4347
4348 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list,
4349 dmar_rcu_check()) {
4350 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
4351 if (atsr->segment != tmp->segment)
4352 continue;
4353 if (atsr->header.length != tmp->header.length)
4354 continue;
4355 if (memcmp(atsr, tmp, atsr->header.length) == 0)
4356 return atsru;
4357 }
4358
4359 return NULL;
4360 }
4361
dmar_parse_one_atsr(struct acpi_dmar_header * hdr,void * arg)4362 int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4363 {
4364 struct acpi_dmar_atsr *atsr;
4365 struct dmar_atsr_unit *atsru;
4366
4367 if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
4368 return 0;
4369
4370 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4371 atsru = dmar_find_atsr(atsr);
4372 if (atsru)
4373 return 0;
4374
4375 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
4376 if (!atsru)
4377 return -ENOMEM;
4378
4379 /*
4380 * If memory is allocated from slab by ACPI _DSM method, we need to
4381 * copy the memory content because the memory buffer will be freed
4382 * on return.
4383 */
4384 atsru->hdr = (void *)(atsru + 1);
4385 memcpy(atsru->hdr, hdr, hdr->length);
4386 atsru->include_all = atsr->flags & 0x1;
4387 if (!atsru->include_all) {
4388 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
4389 (void *)atsr + atsr->header.length,
4390 &atsru->devices_cnt);
4391 if (atsru->devices_cnt && atsru->devices == NULL) {
4392 kfree(atsru);
4393 return -ENOMEM;
4394 }
4395 }
4396
4397 list_add_rcu(&atsru->list, &dmar_atsr_units);
4398
4399 return 0;
4400 }
4401
intel_iommu_free_atsr(struct dmar_atsr_unit * atsru)4402 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
4403 {
4404 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
4405 kfree(atsru);
4406 }
4407
dmar_release_one_atsr(struct acpi_dmar_header * hdr,void * arg)4408 int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4409 {
4410 struct acpi_dmar_atsr *atsr;
4411 struct dmar_atsr_unit *atsru;
4412
4413 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4414 atsru = dmar_find_atsr(atsr);
4415 if (atsru) {
4416 list_del_rcu(&atsru->list);
4417 synchronize_rcu();
4418 intel_iommu_free_atsr(atsru);
4419 }
4420
4421 return 0;
4422 }
4423
dmar_check_one_atsr(struct acpi_dmar_header * hdr,void * arg)4424 int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4425 {
4426 int i;
4427 struct device *dev;
4428 struct acpi_dmar_atsr *atsr;
4429 struct dmar_atsr_unit *atsru;
4430
4431 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4432 atsru = dmar_find_atsr(atsr);
4433 if (!atsru)
4434 return 0;
4435
4436 if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
4437 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
4438 i, dev)
4439 return -EBUSY;
4440 }
4441
4442 return 0;
4443 }
4444
intel_iommu_add(struct dmar_drhd_unit * dmaru)4445 static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
4446 {
4447 int sp, ret;
4448 struct intel_iommu *iommu = dmaru->iommu;
4449
4450 if (g_iommus[iommu->seq_id])
4451 return 0;
4452
4453 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
4454 pr_warn("%s: Doesn't support hardware pass through.\n",
4455 iommu->name);
4456 return -ENXIO;
4457 }
4458 if (!ecap_sc_support(iommu->ecap) &&
4459 domain_update_iommu_snooping(iommu)) {
4460 pr_warn("%s: Doesn't support snooping.\n",
4461 iommu->name);
4462 return -ENXIO;
4463 }
4464 sp = domain_update_iommu_superpage(NULL, iommu) - 1;
4465 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
4466 pr_warn("%s: Doesn't support large page.\n",
4467 iommu->name);
4468 return -ENXIO;
4469 }
4470
4471 /*
4472 * Disable translation if already enabled prior to OS handover.
4473 */
4474 if (iommu->gcmd & DMA_GCMD_TE)
4475 iommu_disable_translation(iommu);
4476
4477 g_iommus[iommu->seq_id] = iommu;
4478 ret = iommu_init_domains(iommu);
4479 if (ret == 0)
4480 ret = iommu_alloc_root_entry(iommu);
4481 if (ret)
4482 goto out;
4483
4484 intel_svm_check(iommu);
4485
4486 if (dmaru->ignored) {
4487 /*
4488 * we always have to disable PMRs or DMA may fail on this device
4489 */
4490 if (force_on)
4491 iommu_disable_protect_mem_regions(iommu);
4492 return 0;
4493 }
4494
4495 intel_iommu_init_qi(iommu);
4496 iommu_flush_write_buffer(iommu);
4497
4498 #ifdef CONFIG_INTEL_IOMMU_SVM
4499 if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
4500 ret = intel_svm_enable_prq(iommu);
4501 if (ret)
4502 goto disable_iommu;
4503 }
4504 #endif
4505 ret = dmar_set_interrupt(iommu);
4506 if (ret)
4507 goto disable_iommu;
4508
4509 iommu_set_root_entry(iommu);
4510 iommu_enable_translation(iommu);
4511
4512 iommu_disable_protect_mem_regions(iommu);
4513 return 0;
4514
4515 disable_iommu:
4516 disable_dmar_iommu(iommu);
4517 out:
4518 free_dmar_iommu(iommu);
4519 return ret;
4520 }
4521
dmar_iommu_hotplug(struct dmar_drhd_unit * dmaru,bool insert)4522 int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
4523 {
4524 int ret = 0;
4525 struct intel_iommu *iommu = dmaru->iommu;
4526
4527 if (!intel_iommu_enabled)
4528 return 0;
4529 if (iommu == NULL)
4530 return -EINVAL;
4531
4532 if (insert) {
4533 ret = intel_iommu_add(dmaru);
4534 } else {
4535 disable_dmar_iommu(iommu);
4536 free_dmar_iommu(iommu);
4537 }
4538
4539 return ret;
4540 }
4541
intel_iommu_free_dmars(void)4542 static void intel_iommu_free_dmars(void)
4543 {
4544 struct dmar_rmrr_unit *rmrru, *rmrr_n;
4545 struct dmar_atsr_unit *atsru, *atsr_n;
4546
4547 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
4548 list_del(&rmrru->list);
4549 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
4550 kfree(rmrru);
4551 }
4552
4553 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
4554 list_del(&atsru->list);
4555 intel_iommu_free_atsr(atsru);
4556 }
4557 }
4558
dmar_find_matched_atsr_unit(struct pci_dev * dev)4559 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
4560 {
4561 int i, ret = 1;
4562 struct pci_bus *bus;
4563 struct pci_dev *bridge = NULL;
4564 struct device *tmp;
4565 struct acpi_dmar_atsr *atsr;
4566 struct dmar_atsr_unit *atsru;
4567
4568 dev = pci_physfn(dev);
4569 for (bus = dev->bus; bus; bus = bus->parent) {
4570 bridge = bus->self;
4571 /* If it's an integrated device, allow ATS */
4572 if (!bridge)
4573 return 1;
4574 /* Connected via non-PCIe: no ATS */
4575 if (!pci_is_pcie(bridge) ||
4576 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
4577 return 0;
4578 /* If we found the root port, look it up in the ATSR */
4579 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
4580 break;
4581 }
4582
4583 rcu_read_lock();
4584 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4585 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4586 if (atsr->segment != pci_domain_nr(dev->bus))
4587 continue;
4588
4589 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
4590 if (tmp == &bridge->dev)
4591 goto out;
4592
4593 if (atsru->include_all)
4594 goto out;
4595 }
4596 ret = 0;
4597 out:
4598 rcu_read_unlock();
4599
4600 return ret;
4601 }
4602
dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info * info)4603 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
4604 {
4605 int ret;
4606 struct dmar_rmrr_unit *rmrru;
4607 struct dmar_atsr_unit *atsru;
4608 struct acpi_dmar_atsr *atsr;
4609 struct acpi_dmar_reserved_memory *rmrr;
4610
4611 if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING)
4612 return 0;
4613
4614 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4615 rmrr = container_of(rmrru->hdr,
4616 struct acpi_dmar_reserved_memory, header);
4617 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4618 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4619 ((void *)rmrr) + rmrr->header.length,
4620 rmrr->segment, rmrru->devices,
4621 rmrru->devices_cnt);
4622 if (ret < 0)
4623 return ret;
4624 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4625 dmar_remove_dev_scope(info, rmrr->segment,
4626 rmrru->devices, rmrru->devices_cnt);
4627 }
4628 }
4629
4630 list_for_each_entry(atsru, &dmar_atsr_units, list) {
4631 if (atsru->include_all)
4632 continue;
4633
4634 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4635 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4636 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4637 (void *)atsr + atsr->header.length,
4638 atsr->segment, atsru->devices,
4639 atsru->devices_cnt);
4640 if (ret > 0)
4641 break;
4642 else if (ret < 0)
4643 return ret;
4644 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4645 if (dmar_remove_dev_scope(info, atsr->segment,
4646 atsru->devices, atsru->devices_cnt))
4647 break;
4648 }
4649 }
4650
4651 return 0;
4652 }
4653
intel_iommu_memory_notifier(struct notifier_block * nb,unsigned long val,void * v)4654 static int intel_iommu_memory_notifier(struct notifier_block *nb,
4655 unsigned long val, void *v)
4656 {
4657 struct memory_notify *mhp = v;
4658 unsigned long start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4659 unsigned long last_vpfn = mm_to_dma_pfn(mhp->start_pfn +
4660 mhp->nr_pages - 1);
4661
4662 switch (val) {
4663 case MEM_GOING_ONLINE:
4664 if (iommu_domain_identity_map(si_domain,
4665 start_vpfn, last_vpfn)) {
4666 pr_warn("Failed to build identity map for [%lx-%lx]\n",
4667 start_vpfn, last_vpfn);
4668 return NOTIFY_BAD;
4669 }
4670 break;
4671
4672 case MEM_OFFLINE:
4673 case MEM_CANCEL_ONLINE:
4674 {
4675 struct dmar_drhd_unit *drhd;
4676 struct intel_iommu *iommu;
4677 struct page *freelist;
4678
4679 freelist = domain_unmap(si_domain,
4680 start_vpfn, last_vpfn);
4681
4682 rcu_read_lock();
4683 for_each_active_iommu(iommu, drhd)
4684 iommu_flush_iotlb_psi(iommu, si_domain,
4685 start_vpfn, mhp->nr_pages,
4686 !freelist, 0);
4687 rcu_read_unlock();
4688 dma_free_pagelist(freelist);
4689 }
4690 break;
4691 }
4692
4693 return NOTIFY_OK;
4694 }
4695
4696 static struct notifier_block intel_iommu_memory_nb = {
4697 .notifier_call = intel_iommu_memory_notifier,
4698 .priority = 0
4699 };
4700
free_all_cpu_cached_iovas(unsigned int cpu)4701 static void free_all_cpu_cached_iovas(unsigned int cpu)
4702 {
4703 int i;
4704
4705 for (i = 0; i < g_num_of_iommus; i++) {
4706 struct intel_iommu *iommu = g_iommus[i];
4707 struct dmar_domain *domain;
4708 int did;
4709
4710 if (!iommu)
4711 continue;
4712
4713 for (did = 0; did < cap_ndoms(iommu->cap); did++) {
4714 domain = get_iommu_domain(iommu, (u16)did);
4715
4716 if (!domain || domain->domain.type != IOMMU_DOMAIN_DMA)
4717 continue;
4718
4719 free_cpu_cached_iovas(cpu, &domain->iovad);
4720 }
4721 }
4722 }
4723
intel_iommu_cpu_dead(unsigned int cpu)4724 static int intel_iommu_cpu_dead(unsigned int cpu)
4725 {
4726 free_all_cpu_cached_iovas(cpu);
4727 return 0;
4728 }
4729
intel_disable_iommus(void)4730 static void intel_disable_iommus(void)
4731 {
4732 struct intel_iommu *iommu = NULL;
4733 struct dmar_drhd_unit *drhd;
4734
4735 for_each_iommu(iommu, drhd)
4736 iommu_disable_translation(iommu);
4737 }
4738
intel_iommu_shutdown(void)4739 void intel_iommu_shutdown(void)
4740 {
4741 struct dmar_drhd_unit *drhd;
4742 struct intel_iommu *iommu = NULL;
4743
4744 if (no_iommu || dmar_disabled)
4745 return;
4746
4747 down_write(&dmar_global_lock);
4748
4749 /* Disable PMRs explicitly here. */
4750 for_each_iommu(iommu, drhd)
4751 iommu_disable_protect_mem_regions(iommu);
4752
4753 /* Make sure the IOMMUs are switched off */
4754 intel_disable_iommus();
4755
4756 up_write(&dmar_global_lock);
4757 }
4758
dev_to_intel_iommu(struct device * dev)4759 static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
4760 {
4761 struct iommu_device *iommu_dev = dev_to_iommu_device(dev);
4762
4763 return container_of(iommu_dev, struct intel_iommu, iommu);
4764 }
4765
intel_iommu_show_version(struct device * dev,struct device_attribute * attr,char * buf)4766 static ssize_t intel_iommu_show_version(struct device *dev,
4767 struct device_attribute *attr,
4768 char *buf)
4769 {
4770 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4771 u32 ver = readl(iommu->reg + DMAR_VER_REG);
4772 return sprintf(buf, "%d:%d\n",
4773 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4774 }
4775 static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4776
intel_iommu_show_address(struct device * dev,struct device_attribute * attr,char * buf)4777 static ssize_t intel_iommu_show_address(struct device *dev,
4778 struct device_attribute *attr,
4779 char *buf)
4780 {
4781 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4782 return sprintf(buf, "%llx\n", iommu->reg_phys);
4783 }
4784 static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4785
intel_iommu_show_cap(struct device * dev,struct device_attribute * attr,char * buf)4786 static ssize_t intel_iommu_show_cap(struct device *dev,
4787 struct device_attribute *attr,
4788 char *buf)
4789 {
4790 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4791 return sprintf(buf, "%llx\n", iommu->cap);
4792 }
4793 static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4794
intel_iommu_show_ecap(struct device * dev,struct device_attribute * attr,char * buf)4795 static ssize_t intel_iommu_show_ecap(struct device *dev,
4796 struct device_attribute *attr,
4797 char *buf)
4798 {
4799 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4800 return sprintf(buf, "%llx\n", iommu->ecap);
4801 }
4802 static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4803
intel_iommu_show_ndoms(struct device * dev,struct device_attribute * attr,char * buf)4804 static ssize_t intel_iommu_show_ndoms(struct device *dev,
4805 struct device_attribute *attr,
4806 char *buf)
4807 {
4808 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4809 return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4810 }
4811 static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
4812
intel_iommu_show_ndoms_used(struct device * dev,struct device_attribute * attr,char * buf)4813 static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
4814 struct device_attribute *attr,
4815 char *buf)
4816 {
4817 struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4818 return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4819 cap_ndoms(iommu->cap)));
4820 }
4821 static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
4822
4823 static struct attribute *intel_iommu_attrs[] = {
4824 &dev_attr_version.attr,
4825 &dev_attr_address.attr,
4826 &dev_attr_cap.attr,
4827 &dev_attr_ecap.attr,
4828 &dev_attr_domains_supported.attr,
4829 &dev_attr_domains_used.attr,
4830 NULL,
4831 };
4832
4833 static struct attribute_group intel_iommu_group = {
4834 .name = "intel-iommu",
4835 .attrs = intel_iommu_attrs,
4836 };
4837
4838 const struct attribute_group *intel_iommu_groups[] = {
4839 &intel_iommu_group,
4840 NULL,
4841 };
4842
has_external_pci(void)4843 static inline bool has_external_pci(void)
4844 {
4845 struct pci_dev *pdev = NULL;
4846
4847 for_each_pci_dev(pdev)
4848 if (pdev->external_facing)
4849 return true;
4850
4851 return false;
4852 }
4853
platform_optin_force_iommu(void)4854 static int __init platform_optin_force_iommu(void)
4855 {
4856 if (!dmar_platform_optin() || no_platform_optin || !has_external_pci())
4857 return 0;
4858
4859 if (no_iommu || dmar_disabled)
4860 pr_info("Intel-IOMMU force enabled due to platform opt in\n");
4861
4862 /*
4863 * If Intel-IOMMU is disabled by default, we will apply identity
4864 * map for all devices except those marked as being untrusted.
4865 */
4866 if (dmar_disabled)
4867 iommu_set_default_passthrough(false);
4868
4869 dmar_disabled = 0;
4870 no_iommu = 0;
4871
4872 return 1;
4873 }
4874
probe_acpi_namespace_devices(void)4875 static int __init probe_acpi_namespace_devices(void)
4876 {
4877 struct dmar_drhd_unit *drhd;
4878 /* To avoid a -Wunused-but-set-variable warning. */
4879 struct intel_iommu *iommu __maybe_unused;
4880 struct device *dev;
4881 int i, ret = 0;
4882
4883 for_each_active_iommu(iommu, drhd) {
4884 for_each_active_dev_scope(drhd->devices,
4885 drhd->devices_cnt, i, dev) {
4886 struct acpi_device_physical_node *pn;
4887 struct iommu_group *group;
4888 struct acpi_device *adev;
4889
4890 if (dev->bus != &acpi_bus_type)
4891 continue;
4892
4893 adev = to_acpi_device(dev);
4894 mutex_lock(&adev->physical_node_lock);
4895 list_for_each_entry(pn,
4896 &adev->physical_node_list, node) {
4897 group = iommu_group_get(pn->dev);
4898 if (group) {
4899 iommu_group_put(group);
4900 continue;
4901 }
4902
4903 pn->dev->bus->iommu_ops = &intel_iommu_ops;
4904 ret = iommu_probe_device(pn->dev);
4905 if (ret)
4906 break;
4907 }
4908 mutex_unlock(&adev->physical_node_lock);
4909
4910 if (ret)
4911 return ret;
4912 }
4913 }
4914
4915 return 0;
4916 }
4917
intel_iommu_init(void)4918 int __init intel_iommu_init(void)
4919 {
4920 int ret = -ENODEV;
4921 struct dmar_drhd_unit *drhd;
4922 struct intel_iommu *iommu;
4923
4924 /*
4925 * Intel IOMMU is required for a TXT/tboot launch or platform
4926 * opt in, so enforce that.
4927 */
4928 force_on = (!intel_iommu_tboot_noforce && tboot_force_iommu()) ||
4929 platform_optin_force_iommu();
4930
4931 if (iommu_init_mempool()) {
4932 if (force_on)
4933 panic("tboot: Failed to initialize iommu memory\n");
4934 return -ENOMEM;
4935 }
4936
4937 down_write(&dmar_global_lock);
4938 if (dmar_table_init()) {
4939 if (force_on)
4940 panic("tboot: Failed to initialize DMAR table\n");
4941 goto out_free_dmar;
4942 }
4943
4944 if (dmar_dev_scope_init() < 0) {
4945 if (force_on)
4946 panic("tboot: Failed to initialize DMAR device scope\n");
4947 goto out_free_dmar;
4948 }
4949
4950 up_write(&dmar_global_lock);
4951
4952 /*
4953 * The bus notifier takes the dmar_global_lock, so lockdep will
4954 * complain later when we register it under the lock.
4955 */
4956 dmar_register_bus_notifier();
4957
4958 down_write(&dmar_global_lock);
4959
4960 if (!no_iommu)
4961 intel_iommu_debugfs_init();
4962
4963 if (no_iommu || dmar_disabled) {
4964 /*
4965 * We exit the function here to ensure IOMMU's remapping and
4966 * mempool aren't setup, which means that the IOMMU's PMRs
4967 * won't be disabled via the call to init_dmars(). So disable
4968 * it explicitly here. The PMRs were setup by tboot prior to
4969 * calling SENTER, but the kernel is expected to reset/tear
4970 * down the PMRs.
4971 */
4972 if (intel_iommu_tboot_noforce) {
4973 for_each_iommu(iommu, drhd)
4974 iommu_disable_protect_mem_regions(iommu);
4975 }
4976
4977 /*
4978 * Make sure the IOMMUs are switched off, even when we
4979 * boot into a kexec kernel and the previous kernel left
4980 * them enabled
4981 */
4982 intel_disable_iommus();
4983 goto out_free_dmar;
4984 }
4985
4986 if (list_empty(&dmar_rmrr_units))
4987 pr_info("No RMRR found\n");
4988
4989 if (list_empty(&dmar_atsr_units))
4990 pr_info("No ATSR found\n");
4991
4992 if (dmar_init_reserved_ranges()) {
4993 if (force_on)
4994 panic("tboot: Failed to reserve iommu ranges\n");
4995 goto out_free_reserved_range;
4996 }
4997
4998 if (dmar_map_gfx)
4999 intel_iommu_gfx_mapped = 1;
5000
5001 init_no_remapping_devices();
5002
5003 ret = init_dmars();
5004 if (ret) {
5005 if (force_on)
5006 panic("tboot: Failed to initialize DMARs\n");
5007 pr_err("Initialization failed\n");
5008 goto out_free_reserved_range;
5009 }
5010 up_write(&dmar_global_lock);
5011
5012 init_iommu_pm_ops();
5013
5014 down_read(&dmar_global_lock);
5015 for_each_active_iommu(iommu, drhd) {
5016 iommu_device_sysfs_add(&iommu->iommu, NULL,
5017 intel_iommu_groups,
5018 "%s", iommu->name);
5019 iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
5020 iommu_device_register(&iommu->iommu);
5021 }
5022 up_read(&dmar_global_lock);
5023
5024 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
5025 if (si_domain && !hw_pass_through)
5026 register_memory_notifier(&intel_iommu_memory_nb);
5027 cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL,
5028 intel_iommu_cpu_dead);
5029
5030 down_read(&dmar_global_lock);
5031 if (probe_acpi_namespace_devices())
5032 pr_warn("ACPI name space devices didn't probe correctly\n");
5033
5034 /* Finally, we enable the DMA remapping hardware. */
5035 for_each_iommu(iommu, drhd) {
5036 if (!drhd->ignored && !translation_pre_enabled(iommu))
5037 iommu_enable_translation(iommu);
5038
5039 iommu_disable_protect_mem_regions(iommu);
5040 }
5041 up_read(&dmar_global_lock);
5042
5043 pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
5044
5045 intel_iommu_enabled = 1;
5046
5047 return 0;
5048
5049 out_free_reserved_range:
5050 put_iova_domain(&reserved_iova_list);
5051 out_free_dmar:
5052 intel_iommu_free_dmars();
5053 up_write(&dmar_global_lock);
5054 iommu_exit_mempool();
5055 return ret;
5056 }
5057
domain_context_clear_one_cb(struct pci_dev * pdev,u16 alias,void * opaque)5058 static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
5059 {
5060 struct intel_iommu *iommu = opaque;
5061
5062 domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff);
5063 return 0;
5064 }
5065
5066 /*
5067 * NB - intel-iommu lacks any sort of reference counting for the users of
5068 * dependent devices. If multiple endpoints have intersecting dependent
5069 * devices, unbinding the driver from any one of them will possibly leave
5070 * the others unable to operate.
5071 */
domain_context_clear(struct intel_iommu * iommu,struct device * dev)5072 static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
5073 {
5074 if (!iommu || !dev || !dev_is_pci(dev))
5075 return;
5076
5077 pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
5078 }
5079
__dmar_remove_one_dev_info(struct device_domain_info * info)5080 static void __dmar_remove_one_dev_info(struct device_domain_info *info)
5081 {
5082 struct dmar_domain *domain;
5083 struct intel_iommu *iommu;
5084 unsigned long flags;
5085
5086 assert_spin_locked(&device_domain_lock);
5087
5088 if (WARN_ON(!info))
5089 return;
5090
5091 iommu = info->iommu;
5092 domain = info->domain;
5093
5094 if (info->dev) {
5095 if (dev_is_pci(info->dev) && sm_supported(iommu))
5096 intel_pasid_tear_down_entry(iommu, info->dev,
5097 PASID_RID2PASID, false);
5098
5099 iommu_disable_dev_iotlb(info);
5100 if (!dev_is_real_dma_subdevice(info->dev))
5101 domain_context_clear(iommu, info->dev);
5102 intel_pasid_free_table(info->dev);
5103 }
5104
5105 unlink_domain_info(info);
5106
5107 spin_lock_irqsave(&iommu->lock, flags);
5108 domain_detach_iommu(domain, iommu);
5109 spin_unlock_irqrestore(&iommu->lock, flags);
5110
5111 free_devinfo_mem(info);
5112 }
5113
dmar_remove_one_dev_info(struct device * dev)5114 static void dmar_remove_one_dev_info(struct device *dev)
5115 {
5116 struct device_domain_info *info;
5117 unsigned long flags;
5118
5119 spin_lock_irqsave(&device_domain_lock, flags);
5120 info = get_domain_info(dev);
5121 if (info)
5122 __dmar_remove_one_dev_info(info);
5123 spin_unlock_irqrestore(&device_domain_lock, flags);
5124 }
5125
md_domain_init(struct dmar_domain * domain,int guest_width)5126 static int md_domain_init(struct dmar_domain *domain, int guest_width)
5127 {
5128 int adjust_width;
5129
5130 /* calculate AGAW */
5131 domain->gaw = guest_width;
5132 adjust_width = guestwidth_to_adjustwidth(guest_width);
5133 domain->agaw = width_to_agaw(adjust_width);
5134
5135 domain->iommu_coherency = 0;
5136 domain->iommu_snooping = 0;
5137 domain->iommu_superpage = 0;
5138 domain->max_addr = 0;
5139
5140 /* always allocate the top pgd */
5141 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5142 if (!domain->pgd)
5143 return -ENOMEM;
5144 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
5145 return 0;
5146 }
5147
intel_init_iova_domain(struct dmar_domain * dmar_domain)5148 static void intel_init_iova_domain(struct dmar_domain *dmar_domain)
5149 {
5150 init_iova_domain(&dmar_domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
5151 copy_reserved_iova(&reserved_iova_list, &dmar_domain->iovad);
5152
5153 if (!intel_iommu_strict &&
5154 init_iova_flush_queue(&dmar_domain->iovad,
5155 iommu_flush_iova, iova_entry_free))
5156 pr_info("iova flush queue initialization failed\n");
5157 }
5158
intel_iommu_domain_alloc(unsigned type)5159 static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
5160 {
5161 struct dmar_domain *dmar_domain;
5162 struct iommu_domain *domain;
5163
5164 switch (type) {
5165 case IOMMU_DOMAIN_DMA:
5166 case IOMMU_DOMAIN_UNMANAGED:
5167 dmar_domain = alloc_domain(0);
5168 if (!dmar_domain) {
5169 pr_err("Can't allocate dmar_domain\n");
5170 return NULL;
5171 }
5172 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
5173 pr_err("Domain initialization failed\n");
5174 domain_exit(dmar_domain);
5175 return NULL;
5176 }
5177
5178 if (type == IOMMU_DOMAIN_DMA)
5179 intel_init_iova_domain(dmar_domain);
5180
5181 domain = &dmar_domain->domain;
5182 domain->geometry.aperture_start = 0;
5183 domain->geometry.aperture_end =
5184 __DOMAIN_MAX_ADDR(dmar_domain->gaw);
5185 domain->geometry.force_aperture = true;
5186
5187 return domain;
5188 case IOMMU_DOMAIN_IDENTITY:
5189 return &si_domain->domain;
5190 default:
5191 return NULL;
5192 }
5193
5194 return NULL;
5195 }
5196
intel_iommu_domain_free(struct iommu_domain * domain)5197 static void intel_iommu_domain_free(struct iommu_domain *domain)
5198 {
5199 if (domain != &si_domain->domain)
5200 domain_exit(to_dmar_domain(domain));
5201 }
5202
5203 /*
5204 * Check whether a @domain could be attached to the @dev through the
5205 * aux-domain attach/detach APIs.
5206 */
5207 static inline bool
is_aux_domain(struct device * dev,struct iommu_domain * domain)5208 is_aux_domain(struct device *dev, struct iommu_domain *domain)
5209 {
5210 struct device_domain_info *info = get_domain_info(dev);
5211
5212 return info && info->auxd_enabled &&
5213 domain->type == IOMMU_DOMAIN_UNMANAGED;
5214 }
5215
auxiliary_link_device(struct dmar_domain * domain,struct device * dev)5216 static void auxiliary_link_device(struct dmar_domain *domain,
5217 struct device *dev)
5218 {
5219 struct device_domain_info *info = get_domain_info(dev);
5220
5221 assert_spin_locked(&device_domain_lock);
5222 if (WARN_ON(!info))
5223 return;
5224
5225 domain->auxd_refcnt++;
5226 list_add(&domain->auxd, &info->auxiliary_domains);
5227 }
5228
auxiliary_unlink_device(struct dmar_domain * domain,struct device * dev)5229 static void auxiliary_unlink_device(struct dmar_domain *domain,
5230 struct device *dev)
5231 {
5232 struct device_domain_info *info = get_domain_info(dev);
5233
5234 assert_spin_locked(&device_domain_lock);
5235 if (WARN_ON(!info))
5236 return;
5237
5238 list_del(&domain->auxd);
5239 domain->auxd_refcnt--;
5240
5241 if (!domain->auxd_refcnt && domain->default_pasid > 0)
5242 ioasid_free(domain->default_pasid);
5243 }
5244
aux_domain_add_dev(struct dmar_domain * domain,struct device * dev)5245 static int aux_domain_add_dev(struct dmar_domain *domain,
5246 struct device *dev)
5247 {
5248 int ret;
5249 unsigned long flags;
5250 struct intel_iommu *iommu;
5251
5252 iommu = device_to_iommu(dev, NULL, NULL);
5253 if (!iommu)
5254 return -ENODEV;
5255
5256 if (domain->default_pasid <= 0) {
5257 u32 pasid;
5258
5259 /* No private data needed for the default pasid */
5260 pasid = ioasid_alloc(NULL, PASID_MIN,
5261 pci_max_pasids(to_pci_dev(dev)) - 1,
5262 NULL);
5263 if (pasid == INVALID_IOASID) {
5264 pr_err("Can't allocate default pasid\n");
5265 return -ENODEV;
5266 }
5267 domain->default_pasid = pasid;
5268 }
5269
5270 spin_lock_irqsave(&device_domain_lock, flags);
5271 /*
5272 * iommu->lock must be held to attach domain to iommu and setup the
5273 * pasid entry for second level translation.
5274 */
5275 spin_lock(&iommu->lock);
5276 ret = domain_attach_iommu(domain, iommu);
5277 if (ret)
5278 goto attach_failed;
5279
5280 /* Setup the PASID entry for mediated devices: */
5281 if (domain_use_first_level(domain))
5282 ret = domain_setup_first_level(iommu, domain, dev,
5283 domain->default_pasid);
5284 else
5285 ret = intel_pasid_setup_second_level(iommu, domain, dev,
5286 domain->default_pasid);
5287 if (ret)
5288 goto table_failed;
5289 spin_unlock(&iommu->lock);
5290
5291 auxiliary_link_device(domain, dev);
5292
5293 spin_unlock_irqrestore(&device_domain_lock, flags);
5294
5295 return 0;
5296
5297 table_failed:
5298 domain_detach_iommu(domain, iommu);
5299 attach_failed:
5300 spin_unlock(&iommu->lock);
5301 spin_unlock_irqrestore(&device_domain_lock, flags);
5302 if (!domain->auxd_refcnt && domain->default_pasid > 0)
5303 ioasid_free(domain->default_pasid);
5304
5305 return ret;
5306 }
5307
aux_domain_remove_dev(struct dmar_domain * domain,struct device * dev)5308 static void aux_domain_remove_dev(struct dmar_domain *domain,
5309 struct device *dev)
5310 {
5311 struct device_domain_info *info;
5312 struct intel_iommu *iommu;
5313 unsigned long flags;
5314
5315 if (!is_aux_domain(dev, &domain->domain))
5316 return;
5317
5318 spin_lock_irqsave(&device_domain_lock, flags);
5319 info = get_domain_info(dev);
5320 iommu = info->iommu;
5321
5322 auxiliary_unlink_device(domain, dev);
5323
5324 spin_lock(&iommu->lock);
5325 intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid, false);
5326 domain_detach_iommu(domain, iommu);
5327 spin_unlock(&iommu->lock);
5328
5329 spin_unlock_irqrestore(&device_domain_lock, flags);
5330 }
5331
prepare_domain_attach_device(struct iommu_domain * domain,struct device * dev)5332 static int prepare_domain_attach_device(struct iommu_domain *domain,
5333 struct device *dev)
5334 {
5335 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5336 struct intel_iommu *iommu;
5337 int addr_width;
5338
5339 iommu = device_to_iommu(dev, NULL, NULL);
5340 if (!iommu)
5341 return -ENODEV;
5342
5343 /* check if this iommu agaw is sufficient for max mapped address */
5344 addr_width = agaw_to_width(iommu->agaw);
5345 if (addr_width > cap_mgaw(iommu->cap))
5346 addr_width = cap_mgaw(iommu->cap);
5347
5348 if (dmar_domain->max_addr > (1LL << addr_width)) {
5349 dev_err(dev, "%s: iommu width (%d) is not "
5350 "sufficient for the mapped address (%llx)\n",
5351 __func__, addr_width, dmar_domain->max_addr);
5352 return -EFAULT;
5353 }
5354 dmar_domain->gaw = addr_width;
5355
5356 /*
5357 * Knock out extra levels of page tables if necessary
5358 */
5359 while (iommu->agaw < dmar_domain->agaw) {
5360 struct dma_pte *pte;
5361
5362 pte = dmar_domain->pgd;
5363 if (dma_pte_present(pte)) {
5364 dmar_domain->pgd = (struct dma_pte *)
5365 phys_to_virt(dma_pte_addr(pte));
5366 free_pgtable_page(pte);
5367 }
5368 dmar_domain->agaw--;
5369 }
5370
5371 return 0;
5372 }
5373
intel_iommu_attach_device(struct iommu_domain * domain,struct device * dev)5374 static int intel_iommu_attach_device(struct iommu_domain *domain,
5375 struct device *dev)
5376 {
5377 int ret;
5378
5379 if (domain->type == IOMMU_DOMAIN_UNMANAGED &&
5380 device_is_rmrr_locked(dev)) {
5381 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
5382 return -EPERM;
5383 }
5384
5385 if (is_aux_domain(dev, domain))
5386 return -EPERM;
5387
5388 /* normally dev is not mapped */
5389 if (unlikely(domain_context_mapped(dev))) {
5390 struct dmar_domain *old_domain;
5391
5392 old_domain = find_domain(dev);
5393 if (old_domain)
5394 dmar_remove_one_dev_info(dev);
5395 }
5396
5397 ret = prepare_domain_attach_device(domain, dev);
5398 if (ret)
5399 return ret;
5400
5401 return domain_add_dev_info(to_dmar_domain(domain), dev);
5402 }
5403
intel_iommu_aux_attach_device(struct iommu_domain * domain,struct device * dev)5404 static int intel_iommu_aux_attach_device(struct iommu_domain *domain,
5405 struct device *dev)
5406 {
5407 int ret;
5408
5409 if (!is_aux_domain(dev, domain))
5410 return -EPERM;
5411
5412 ret = prepare_domain_attach_device(domain, dev);
5413 if (ret)
5414 return ret;
5415
5416 return aux_domain_add_dev(to_dmar_domain(domain), dev);
5417 }
5418
intel_iommu_detach_device(struct iommu_domain * domain,struct device * dev)5419 static void intel_iommu_detach_device(struct iommu_domain *domain,
5420 struct device *dev)
5421 {
5422 dmar_remove_one_dev_info(dev);
5423 }
5424
intel_iommu_aux_detach_device(struct iommu_domain * domain,struct device * dev)5425 static void intel_iommu_aux_detach_device(struct iommu_domain *domain,
5426 struct device *dev)
5427 {
5428 aux_domain_remove_dev(to_dmar_domain(domain), dev);
5429 }
5430
5431 #ifdef CONFIG_INTEL_IOMMU_SVM
5432 /*
5433 * 2D array for converting and sanitizing IOMMU generic TLB granularity to
5434 * VT-d granularity. Invalidation is typically included in the unmap operation
5435 * as a result of DMA or VFIO unmap. However, for assigned devices guest
5436 * owns the first level page tables. Invalidations of translation caches in the
5437 * guest are trapped and passed down to the host.
5438 *
5439 * vIOMMU in the guest will only expose first level page tables, therefore
5440 * we do not support IOTLB granularity for request without PASID (second level).
5441 *
5442 * For example, to find the VT-d granularity encoding for IOTLB
5443 * type and page selective granularity within PASID:
5444 * X: indexed by iommu cache type
5445 * Y: indexed by enum iommu_inv_granularity
5446 * [IOMMU_CACHE_INV_TYPE_IOTLB][IOMMU_INV_GRANU_ADDR]
5447 */
5448
5449 static const int
5450 inv_type_granu_table[IOMMU_CACHE_INV_TYPE_NR][IOMMU_INV_GRANU_NR] = {
5451 /*
5452 * PASID based IOTLB invalidation: PASID selective (per PASID),
5453 * page selective (address granularity)
5454 */
5455 {-EINVAL, QI_GRAN_NONG_PASID, QI_GRAN_PSI_PASID},
5456 /* PASID based dev TLBs */
5457 {-EINVAL, -EINVAL, QI_DEV_IOTLB_GRAN_PASID_SEL},
5458 /* PASID cache */
5459 {-EINVAL, -EINVAL, -EINVAL}
5460 };
5461
to_vtd_granularity(int type,int granu)5462 static inline int to_vtd_granularity(int type, int granu)
5463 {
5464 return inv_type_granu_table[type][granu];
5465 }
5466
to_vtd_size(u64 granu_size,u64 nr_granules)5467 static inline u64 to_vtd_size(u64 granu_size, u64 nr_granules)
5468 {
5469 u64 nr_pages = (granu_size * nr_granules) >> VTD_PAGE_SHIFT;
5470
5471 /* VT-d size is encoded as 2^size of 4K pages, 0 for 4k, 9 for 2MB, etc.
5472 * IOMMU cache invalidate API passes granu_size in bytes, and number of
5473 * granu size in contiguous memory.
5474 */
5475 return order_base_2(nr_pages);
5476 }
5477
5478 static int
intel_iommu_sva_invalidate(struct iommu_domain * domain,struct device * dev,struct iommu_cache_invalidate_info * inv_info)5479 intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev,
5480 struct iommu_cache_invalidate_info *inv_info)
5481 {
5482 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5483 struct device_domain_info *info;
5484 struct intel_iommu *iommu;
5485 unsigned long flags;
5486 int cache_type;
5487 u8 bus, devfn;
5488 u16 did, sid;
5489 int ret = 0;
5490 u64 size = 0;
5491
5492 if (!inv_info || !dmar_domain)
5493 return -EINVAL;
5494
5495 if (!dev || !dev_is_pci(dev))
5496 return -ENODEV;
5497
5498 iommu = device_to_iommu(dev, &bus, &devfn);
5499 if (!iommu)
5500 return -ENODEV;
5501
5502 if (!(dmar_domain->flags & DOMAIN_FLAG_NESTING_MODE))
5503 return -EINVAL;
5504
5505 spin_lock_irqsave(&device_domain_lock, flags);
5506 spin_lock(&iommu->lock);
5507 info = get_domain_info(dev);
5508 if (!info) {
5509 ret = -EINVAL;
5510 goto out_unlock;
5511 }
5512 did = dmar_domain->iommu_did[iommu->seq_id];
5513 sid = PCI_DEVID(bus, devfn);
5514
5515 /* Size is only valid in address selective invalidation */
5516 if (inv_info->granularity == IOMMU_INV_GRANU_ADDR)
5517 size = to_vtd_size(inv_info->granu.addr_info.granule_size,
5518 inv_info->granu.addr_info.nb_granules);
5519
5520 for_each_set_bit(cache_type,
5521 (unsigned long *)&inv_info->cache,
5522 IOMMU_CACHE_INV_TYPE_NR) {
5523 int granu = 0;
5524 u64 pasid = 0;
5525 u64 addr = 0;
5526
5527 granu = to_vtd_granularity(cache_type, inv_info->granularity);
5528 if (granu == -EINVAL) {
5529 pr_err_ratelimited("Invalid cache type and granu combination %d/%d\n",
5530 cache_type, inv_info->granularity);
5531 break;
5532 }
5533
5534 /*
5535 * PASID is stored in different locations based on the
5536 * granularity.
5537 */
5538 if (inv_info->granularity == IOMMU_INV_GRANU_PASID &&
5539 (inv_info->granu.pasid_info.flags & IOMMU_INV_PASID_FLAGS_PASID))
5540 pasid = inv_info->granu.pasid_info.pasid;
5541 else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR &&
5542 (inv_info->granu.addr_info.flags & IOMMU_INV_ADDR_FLAGS_PASID))
5543 pasid = inv_info->granu.addr_info.pasid;
5544
5545 switch (BIT(cache_type)) {
5546 case IOMMU_CACHE_INV_TYPE_IOTLB:
5547 /* HW will ignore LSB bits based on address mask */
5548 if (inv_info->granularity == IOMMU_INV_GRANU_ADDR &&
5549 size &&
5550 (inv_info->granu.addr_info.addr & ((BIT(VTD_PAGE_SHIFT + size)) - 1))) {
5551 pr_err_ratelimited("User address not aligned, 0x%llx, size order %llu\n",
5552 inv_info->granu.addr_info.addr, size);
5553 }
5554
5555 /*
5556 * If granu is PASID-selective, address is ignored.
5557 * We use npages = -1 to indicate that.
5558 */
5559 qi_flush_piotlb(iommu, did, pasid,
5560 mm_to_dma_pfn(inv_info->granu.addr_info.addr),
5561 (granu == QI_GRAN_NONG_PASID) ? -1 : 1 << size,
5562 inv_info->granu.addr_info.flags & IOMMU_INV_ADDR_FLAGS_LEAF);
5563
5564 if (!info->ats_enabled)
5565 break;
5566 /*
5567 * Always flush device IOTLB if ATS is enabled. vIOMMU
5568 * in the guest may assume IOTLB flush is inclusive,
5569 * which is more efficient.
5570 */
5571 fallthrough;
5572 case IOMMU_CACHE_INV_TYPE_DEV_IOTLB:
5573 /*
5574 * PASID based device TLB invalidation does not support
5575 * IOMMU_INV_GRANU_PASID granularity but only supports
5576 * IOMMU_INV_GRANU_ADDR.
5577 * The equivalent of that is we set the size to be the
5578 * entire range of 64 bit. User only provides PASID info
5579 * without address info. So we set addr to 0.
5580 */
5581 if (inv_info->granularity == IOMMU_INV_GRANU_PASID) {
5582 size = 64 - VTD_PAGE_SHIFT;
5583 addr = 0;
5584 } else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR) {
5585 addr = inv_info->granu.addr_info.addr;
5586 }
5587
5588 if (info->ats_enabled)
5589 qi_flush_dev_iotlb_pasid(iommu, sid,
5590 info->pfsid, pasid,
5591 info->ats_qdep, addr,
5592 size);
5593 else
5594 pr_warn_ratelimited("Passdown device IOTLB flush w/o ATS!\n");
5595 break;
5596 default:
5597 dev_err_ratelimited(dev, "Unsupported IOMMU invalidation type %d\n",
5598 cache_type);
5599 ret = -EINVAL;
5600 }
5601 }
5602 out_unlock:
5603 spin_unlock(&iommu->lock);
5604 spin_unlock_irqrestore(&device_domain_lock, flags);
5605
5606 return ret;
5607 }
5608 #endif
5609
intel_iommu_map(struct iommu_domain * domain,unsigned long iova,phys_addr_t hpa,size_t size,int iommu_prot,gfp_t gfp)5610 static int intel_iommu_map(struct iommu_domain *domain,
5611 unsigned long iova, phys_addr_t hpa,
5612 size_t size, int iommu_prot, gfp_t gfp)
5613 {
5614 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5615 u64 max_addr;
5616 int prot = 0;
5617 int ret;
5618
5619 if (iommu_prot & IOMMU_READ)
5620 prot |= DMA_PTE_READ;
5621 if (iommu_prot & IOMMU_WRITE)
5622 prot |= DMA_PTE_WRITE;
5623 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
5624 prot |= DMA_PTE_SNP;
5625
5626 max_addr = iova + size;
5627 if (dmar_domain->max_addr < max_addr) {
5628 u64 end;
5629
5630 /* check if minimum agaw is sufficient for mapped address */
5631 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
5632 if (end < max_addr) {
5633 pr_err("%s: iommu width (%d) is not "
5634 "sufficient for the mapped address (%llx)\n",
5635 __func__, dmar_domain->gaw, max_addr);
5636 return -EFAULT;
5637 }
5638 dmar_domain->max_addr = max_addr;
5639 }
5640 /* Round up size to next multiple of PAGE_SIZE, if it and
5641 the low bits of hpa would take us onto the next page */
5642 size = aligned_nrpages(hpa, size);
5643 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
5644 hpa >> VTD_PAGE_SHIFT, size, prot);
5645 return ret;
5646 }
5647
intel_iommu_unmap(struct iommu_domain * domain,unsigned long iova,size_t size,struct iommu_iotlb_gather * gather)5648 static size_t intel_iommu_unmap(struct iommu_domain *domain,
5649 unsigned long iova, size_t size,
5650 struct iommu_iotlb_gather *gather)
5651 {
5652 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5653 struct page *freelist = NULL;
5654 unsigned long start_pfn, last_pfn;
5655 unsigned int npages;
5656 int iommu_id, level = 0;
5657
5658 /* Cope with horrid API which requires us to unmap more than the
5659 size argument if it happens to be a large-page mapping. */
5660 BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
5661
5662 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
5663 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
5664
5665 start_pfn = iova >> VTD_PAGE_SHIFT;
5666 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
5667
5668 freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
5669
5670 npages = last_pfn - start_pfn + 1;
5671
5672 for_each_domain_iommu(iommu_id, dmar_domain)
5673 iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
5674 start_pfn, npages, !freelist, 0);
5675
5676 dma_free_pagelist(freelist);
5677
5678 if (dmar_domain->max_addr == iova + size)
5679 dmar_domain->max_addr = iova;
5680
5681 return size;
5682 }
5683
intel_iommu_iova_to_phys(struct iommu_domain * domain,dma_addr_t iova)5684 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
5685 dma_addr_t iova)
5686 {
5687 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5688 struct dma_pte *pte;
5689 int level = 0;
5690 u64 phys = 0;
5691
5692 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
5693 if (pte && dma_pte_present(pte))
5694 phys = dma_pte_addr(pte) +
5695 (iova & (BIT_MASK(level_to_offset_bits(level) +
5696 VTD_PAGE_SHIFT) - 1));
5697
5698 return phys;
5699 }
5700
scalable_mode_support(void)5701 static inline bool scalable_mode_support(void)
5702 {
5703 struct dmar_drhd_unit *drhd;
5704 struct intel_iommu *iommu;
5705 bool ret = true;
5706
5707 rcu_read_lock();
5708 for_each_active_iommu(iommu, drhd) {
5709 if (!sm_supported(iommu)) {
5710 ret = false;
5711 break;
5712 }
5713 }
5714 rcu_read_unlock();
5715
5716 return ret;
5717 }
5718
iommu_pasid_support(void)5719 static inline bool iommu_pasid_support(void)
5720 {
5721 struct dmar_drhd_unit *drhd;
5722 struct intel_iommu *iommu;
5723 bool ret = true;
5724
5725 rcu_read_lock();
5726 for_each_active_iommu(iommu, drhd) {
5727 if (!pasid_supported(iommu)) {
5728 ret = false;
5729 break;
5730 }
5731 }
5732 rcu_read_unlock();
5733
5734 return ret;
5735 }
5736
nested_mode_support(void)5737 static inline bool nested_mode_support(void)
5738 {
5739 struct dmar_drhd_unit *drhd;
5740 struct intel_iommu *iommu;
5741 bool ret = true;
5742
5743 rcu_read_lock();
5744 for_each_active_iommu(iommu, drhd) {
5745 if (!sm_supported(iommu) || !ecap_nest(iommu->ecap)) {
5746 ret = false;
5747 break;
5748 }
5749 }
5750 rcu_read_unlock();
5751
5752 return ret;
5753 }
5754
intel_iommu_capable(enum iommu_cap cap)5755 static bool intel_iommu_capable(enum iommu_cap cap)
5756 {
5757 if (cap == IOMMU_CAP_CACHE_COHERENCY)
5758 return domain_update_iommu_snooping(NULL) == 1;
5759 if (cap == IOMMU_CAP_INTR_REMAP)
5760 return irq_remapping_enabled == 1;
5761
5762 return false;
5763 }
5764
intel_iommu_probe_device(struct device * dev)5765 static struct iommu_device *intel_iommu_probe_device(struct device *dev)
5766 {
5767 struct intel_iommu *iommu;
5768
5769 iommu = device_to_iommu(dev, NULL, NULL);
5770 if (!iommu)
5771 return ERR_PTR(-ENODEV);
5772
5773 if (translation_pre_enabled(iommu))
5774 dev_iommu_priv_set(dev, DEFER_DEVICE_DOMAIN_INFO);
5775
5776 return &iommu->iommu;
5777 }
5778
intel_iommu_release_device(struct device * dev)5779 static void intel_iommu_release_device(struct device *dev)
5780 {
5781 struct intel_iommu *iommu;
5782
5783 iommu = device_to_iommu(dev, NULL, NULL);
5784 if (!iommu)
5785 return;
5786
5787 dmar_remove_one_dev_info(dev);
5788
5789 set_dma_ops(dev, NULL);
5790 }
5791
intel_iommu_probe_finalize(struct device * dev)5792 static void intel_iommu_probe_finalize(struct device *dev)
5793 {
5794 struct iommu_domain *domain;
5795
5796 domain = iommu_get_domain_for_dev(dev);
5797 if (device_needs_bounce(dev))
5798 set_dma_ops(dev, &bounce_dma_ops);
5799 else if (domain && domain->type == IOMMU_DOMAIN_DMA)
5800 set_dma_ops(dev, &intel_dma_ops);
5801 else
5802 set_dma_ops(dev, NULL);
5803 }
5804
intel_iommu_get_resv_regions(struct device * device,struct list_head * head)5805 static void intel_iommu_get_resv_regions(struct device *device,
5806 struct list_head *head)
5807 {
5808 int prot = DMA_PTE_READ | DMA_PTE_WRITE;
5809 struct iommu_resv_region *reg;
5810 struct dmar_rmrr_unit *rmrr;
5811 struct device *i_dev;
5812 int i;
5813
5814 down_read(&dmar_global_lock);
5815 for_each_rmrr_units(rmrr) {
5816 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
5817 i, i_dev) {
5818 struct iommu_resv_region *resv;
5819 enum iommu_resv_type type;
5820 size_t length;
5821
5822 if (i_dev != device &&
5823 !is_downstream_to_pci_bridge(device, i_dev))
5824 continue;
5825
5826 length = rmrr->end_address - rmrr->base_address + 1;
5827
5828 type = device_rmrr_is_relaxable(device) ?
5829 IOMMU_RESV_DIRECT_RELAXABLE : IOMMU_RESV_DIRECT;
5830
5831 resv = iommu_alloc_resv_region(rmrr->base_address,
5832 length, prot, type);
5833 if (!resv)
5834 break;
5835
5836 list_add_tail(&resv->list, head);
5837 }
5838 }
5839 up_read(&dmar_global_lock);
5840
5841 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
5842 if (dev_is_pci(device)) {
5843 struct pci_dev *pdev = to_pci_dev(device);
5844
5845 if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) {
5846 reg = iommu_alloc_resv_region(0, 1UL << 24, prot,
5847 IOMMU_RESV_DIRECT_RELAXABLE);
5848 if (reg)
5849 list_add_tail(®->list, head);
5850 }
5851 }
5852 #endif /* CONFIG_INTEL_IOMMU_FLOPPY_WA */
5853
5854 reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
5855 IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
5856 0, IOMMU_RESV_MSI);
5857 if (!reg)
5858 return;
5859 list_add_tail(®->list, head);
5860 }
5861
intel_iommu_enable_pasid(struct intel_iommu * iommu,struct device * dev)5862 int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev)
5863 {
5864 struct device_domain_info *info;
5865 struct context_entry *context;
5866 struct dmar_domain *domain;
5867 unsigned long flags;
5868 u64 ctx_lo;
5869 int ret;
5870
5871 domain = find_domain(dev);
5872 if (!domain)
5873 return -EINVAL;
5874
5875 spin_lock_irqsave(&device_domain_lock, flags);
5876 spin_lock(&iommu->lock);
5877
5878 ret = -EINVAL;
5879 info = get_domain_info(dev);
5880 if (!info || !info->pasid_supported)
5881 goto out;
5882
5883 context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
5884 if (WARN_ON(!context))
5885 goto out;
5886
5887 ctx_lo = context[0].lo;
5888
5889 if (!(ctx_lo & CONTEXT_PASIDE)) {
5890 ctx_lo |= CONTEXT_PASIDE;
5891 context[0].lo = ctx_lo;
5892 wmb();
5893 iommu->flush.flush_context(iommu,
5894 domain->iommu_did[iommu->seq_id],
5895 PCI_DEVID(info->bus, info->devfn),
5896 DMA_CCMD_MASK_NOBIT,
5897 DMA_CCMD_DEVICE_INVL);
5898 }
5899
5900 /* Enable PASID support in the device, if it wasn't already */
5901 if (!info->pasid_enabled)
5902 iommu_enable_dev_iotlb(info);
5903
5904 ret = 0;
5905
5906 out:
5907 spin_unlock(&iommu->lock);
5908 spin_unlock_irqrestore(&device_domain_lock, flags);
5909
5910 return ret;
5911 }
5912
intel_iommu_apply_resv_region(struct device * dev,struct iommu_domain * domain,struct iommu_resv_region * region)5913 static void intel_iommu_apply_resv_region(struct device *dev,
5914 struct iommu_domain *domain,
5915 struct iommu_resv_region *region)
5916 {
5917 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5918 unsigned long start, end;
5919
5920 start = IOVA_PFN(region->start);
5921 end = IOVA_PFN(region->start + region->length - 1);
5922
5923 WARN_ON_ONCE(!reserve_iova(&dmar_domain->iovad, start, end));
5924 }
5925
intel_iommu_device_group(struct device * dev)5926 static struct iommu_group *intel_iommu_device_group(struct device *dev)
5927 {
5928 if (dev_is_pci(dev))
5929 return pci_device_group(dev);
5930 return generic_device_group(dev);
5931 }
5932
intel_iommu_enable_auxd(struct device * dev)5933 static int intel_iommu_enable_auxd(struct device *dev)
5934 {
5935 struct device_domain_info *info;
5936 struct intel_iommu *iommu;
5937 unsigned long flags;
5938 int ret;
5939
5940 iommu = device_to_iommu(dev, NULL, NULL);
5941 if (!iommu || dmar_disabled)
5942 return -EINVAL;
5943
5944 if (!sm_supported(iommu) || !pasid_supported(iommu))
5945 return -EINVAL;
5946
5947 ret = intel_iommu_enable_pasid(iommu, dev);
5948 if (ret)
5949 return -ENODEV;
5950
5951 spin_lock_irqsave(&device_domain_lock, flags);
5952 info = get_domain_info(dev);
5953 info->auxd_enabled = 1;
5954 spin_unlock_irqrestore(&device_domain_lock, flags);
5955
5956 return 0;
5957 }
5958
intel_iommu_disable_auxd(struct device * dev)5959 static int intel_iommu_disable_auxd(struct device *dev)
5960 {
5961 struct device_domain_info *info;
5962 unsigned long flags;
5963
5964 spin_lock_irqsave(&device_domain_lock, flags);
5965 info = get_domain_info(dev);
5966 if (!WARN_ON(!info))
5967 info->auxd_enabled = 0;
5968 spin_unlock_irqrestore(&device_domain_lock, flags);
5969
5970 return 0;
5971 }
5972
5973 /*
5974 * A PCI express designated vendor specific extended capability is defined
5975 * in the section 3.7 of Intel scalable I/O virtualization technical spec
5976 * for system software and tools to detect endpoint devices supporting the
5977 * Intel scalable IO virtualization without host driver dependency.
5978 *
5979 * Returns the address of the matching extended capability structure within
5980 * the device's PCI configuration space or 0 if the device does not support
5981 * it.
5982 */
siov_find_pci_dvsec(struct pci_dev * pdev)5983 static int siov_find_pci_dvsec(struct pci_dev *pdev)
5984 {
5985 int pos;
5986 u16 vendor, id;
5987
5988 pos = pci_find_next_ext_capability(pdev, 0, 0x23);
5989 while (pos) {
5990 pci_read_config_word(pdev, pos + 4, &vendor);
5991 pci_read_config_word(pdev, pos + 8, &id);
5992 if (vendor == PCI_VENDOR_ID_INTEL && id == 5)
5993 return pos;
5994
5995 pos = pci_find_next_ext_capability(pdev, pos, 0x23);
5996 }
5997
5998 return 0;
5999 }
6000
6001 static bool
intel_iommu_dev_has_feat(struct device * dev,enum iommu_dev_features feat)6002 intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat)
6003 {
6004 if (feat == IOMMU_DEV_FEAT_AUX) {
6005 int ret;
6006
6007 if (!dev_is_pci(dev) || dmar_disabled ||
6008 !scalable_mode_support() || !iommu_pasid_support())
6009 return false;
6010
6011 ret = pci_pasid_features(to_pci_dev(dev));
6012 if (ret < 0)
6013 return false;
6014
6015 return !!siov_find_pci_dvsec(to_pci_dev(dev));
6016 }
6017
6018 if (feat == IOMMU_DEV_FEAT_SVA) {
6019 struct device_domain_info *info = get_domain_info(dev);
6020
6021 return info && (info->iommu->flags & VTD_FLAG_SVM_CAPABLE) &&
6022 info->pasid_supported && info->pri_supported &&
6023 info->ats_supported;
6024 }
6025
6026 return false;
6027 }
6028
6029 static int
intel_iommu_dev_enable_feat(struct device * dev,enum iommu_dev_features feat)6030 intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
6031 {
6032 if (feat == IOMMU_DEV_FEAT_AUX)
6033 return intel_iommu_enable_auxd(dev);
6034
6035 if (feat == IOMMU_DEV_FEAT_SVA) {
6036 struct device_domain_info *info = get_domain_info(dev);
6037
6038 if (!info)
6039 return -EINVAL;
6040
6041 if (info->iommu->flags & VTD_FLAG_SVM_CAPABLE)
6042 return 0;
6043 }
6044
6045 return -ENODEV;
6046 }
6047
6048 static int
intel_iommu_dev_disable_feat(struct device * dev,enum iommu_dev_features feat)6049 intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
6050 {
6051 if (feat == IOMMU_DEV_FEAT_AUX)
6052 return intel_iommu_disable_auxd(dev);
6053
6054 return -ENODEV;
6055 }
6056
6057 static bool
intel_iommu_dev_feat_enabled(struct device * dev,enum iommu_dev_features feat)6058 intel_iommu_dev_feat_enabled(struct device *dev, enum iommu_dev_features feat)
6059 {
6060 struct device_domain_info *info = get_domain_info(dev);
6061
6062 if (feat == IOMMU_DEV_FEAT_AUX)
6063 return scalable_mode_support() && info && info->auxd_enabled;
6064
6065 return false;
6066 }
6067
6068 static int
intel_iommu_aux_get_pasid(struct iommu_domain * domain,struct device * dev)6069 intel_iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev)
6070 {
6071 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
6072
6073 return dmar_domain->default_pasid > 0 ?
6074 dmar_domain->default_pasid : -EINVAL;
6075 }
6076
intel_iommu_is_attach_deferred(struct iommu_domain * domain,struct device * dev)6077 static bool intel_iommu_is_attach_deferred(struct iommu_domain *domain,
6078 struct device *dev)
6079 {
6080 return attach_deferred(dev);
6081 }
6082
6083 static int
intel_iommu_domain_set_attr(struct iommu_domain * domain,enum iommu_attr attr,void * data)6084 intel_iommu_domain_set_attr(struct iommu_domain *domain,
6085 enum iommu_attr attr, void *data)
6086 {
6087 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
6088 unsigned long flags;
6089 int ret = 0;
6090
6091 if (domain->type != IOMMU_DOMAIN_UNMANAGED)
6092 return -EINVAL;
6093
6094 switch (attr) {
6095 case DOMAIN_ATTR_NESTING:
6096 spin_lock_irqsave(&device_domain_lock, flags);
6097 if (nested_mode_support() &&
6098 list_empty(&dmar_domain->devices)) {
6099 dmar_domain->flags |= DOMAIN_FLAG_NESTING_MODE;
6100 dmar_domain->flags &= ~DOMAIN_FLAG_USE_FIRST_LEVEL;
6101 } else {
6102 ret = -ENODEV;
6103 }
6104 spin_unlock_irqrestore(&device_domain_lock, flags);
6105 break;
6106 default:
6107 ret = -EINVAL;
6108 break;
6109 }
6110
6111 return ret;
6112 }
6113
6114 /*
6115 * Check that the device does not live on an external facing PCI port that is
6116 * marked as untrusted. Such devices should not be able to apply quirks and
6117 * thus not be able to bypass the IOMMU restrictions.
6118 */
risky_device(struct pci_dev * pdev)6119 static bool risky_device(struct pci_dev *pdev)
6120 {
6121 if (pdev->untrusted) {
6122 pci_info(pdev,
6123 "Skipping IOMMU quirk for dev [%04X:%04X] on untrusted PCI link\n",
6124 pdev->vendor, pdev->device);
6125 pci_info(pdev, "Please check with your BIOS/Platform vendor about this\n");
6126 return true;
6127 }
6128 return false;
6129 }
6130
6131 const struct iommu_ops intel_iommu_ops = {
6132 .capable = intel_iommu_capable,
6133 .domain_alloc = intel_iommu_domain_alloc,
6134 .domain_free = intel_iommu_domain_free,
6135 .domain_set_attr = intel_iommu_domain_set_attr,
6136 .attach_dev = intel_iommu_attach_device,
6137 .detach_dev = intel_iommu_detach_device,
6138 .aux_attach_dev = intel_iommu_aux_attach_device,
6139 .aux_detach_dev = intel_iommu_aux_detach_device,
6140 .aux_get_pasid = intel_iommu_aux_get_pasid,
6141 .map = intel_iommu_map,
6142 .unmap = intel_iommu_unmap,
6143 .iova_to_phys = intel_iommu_iova_to_phys,
6144 .probe_device = intel_iommu_probe_device,
6145 .probe_finalize = intel_iommu_probe_finalize,
6146 .release_device = intel_iommu_release_device,
6147 .get_resv_regions = intel_iommu_get_resv_regions,
6148 .put_resv_regions = generic_iommu_put_resv_regions,
6149 .apply_resv_region = intel_iommu_apply_resv_region,
6150 .device_group = intel_iommu_device_group,
6151 .dev_has_feat = intel_iommu_dev_has_feat,
6152 .dev_feat_enabled = intel_iommu_dev_feat_enabled,
6153 .dev_enable_feat = intel_iommu_dev_enable_feat,
6154 .dev_disable_feat = intel_iommu_dev_disable_feat,
6155 .is_attach_deferred = intel_iommu_is_attach_deferred,
6156 .def_domain_type = device_def_domain_type,
6157 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
6158 #ifdef CONFIG_INTEL_IOMMU_SVM
6159 .cache_invalidate = intel_iommu_sva_invalidate,
6160 .sva_bind_gpasid = intel_svm_bind_gpasid,
6161 .sva_unbind_gpasid = intel_svm_unbind_gpasid,
6162 .sva_bind = intel_svm_bind,
6163 .sva_unbind = intel_svm_unbind,
6164 .sva_get_pasid = intel_svm_get_pasid,
6165 .page_response = intel_svm_page_response,
6166 #endif
6167 };
6168
quirk_iommu_igfx(struct pci_dev * dev)6169 static void quirk_iommu_igfx(struct pci_dev *dev)
6170 {
6171 if (risky_device(dev))
6172 return;
6173
6174 pci_info(dev, "Disabling IOMMU for graphics on this chipset\n");
6175 dmar_map_gfx = 0;
6176 }
6177
6178 /* G4x/GM45 integrated gfx dmar support is totally busted. */
6179 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_igfx);
6180 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_igfx);
6181 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_igfx);
6182 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_igfx);
6183 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx);
6184 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx);
6185 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx);
6186
6187 /* Broadwell igfx malfunctions with dmar */
6188 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx);
6189 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx);
6190 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160E, quirk_iommu_igfx);
6191 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1602, quirk_iommu_igfx);
6192 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160A, quirk_iommu_igfx);
6193 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160D, quirk_iommu_igfx);
6194 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1616, quirk_iommu_igfx);
6195 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161B, quirk_iommu_igfx);
6196 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161E, quirk_iommu_igfx);
6197 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1612, quirk_iommu_igfx);
6198 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161A, quirk_iommu_igfx);
6199 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161D, quirk_iommu_igfx);
6200 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1626, quirk_iommu_igfx);
6201 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162B, quirk_iommu_igfx);
6202 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162E, quirk_iommu_igfx);
6203 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1622, quirk_iommu_igfx);
6204 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162A, quirk_iommu_igfx);
6205 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162D, quirk_iommu_igfx);
6206 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1636, quirk_iommu_igfx);
6207 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163B, quirk_iommu_igfx);
6208 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163E, quirk_iommu_igfx);
6209 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx);
6210 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx);
6211 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx);
6212
quirk_iommu_rwbf(struct pci_dev * dev)6213 static void quirk_iommu_rwbf(struct pci_dev *dev)
6214 {
6215 if (risky_device(dev))
6216 return;
6217
6218 /*
6219 * Mobile 4 Series Chipset neglects to set RWBF capability,
6220 * but needs it. Same seems to hold for the desktop versions.
6221 */
6222 pci_info(dev, "Forcing write-buffer flush capability\n");
6223 rwbf_quirk = 1;
6224 }
6225
6226 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
6227 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
6228 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
6229 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
6230 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
6231 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
6232 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
6233
6234 #define GGC 0x52
6235 #define GGC_MEMORY_SIZE_MASK (0xf << 8)
6236 #define GGC_MEMORY_SIZE_NONE (0x0 << 8)
6237 #define GGC_MEMORY_SIZE_1M (0x1 << 8)
6238 #define GGC_MEMORY_SIZE_2M (0x3 << 8)
6239 #define GGC_MEMORY_VT_ENABLED (0x8 << 8)
6240 #define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
6241 #define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
6242 #define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
6243
quirk_calpella_no_shadow_gtt(struct pci_dev * dev)6244 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
6245 {
6246 unsigned short ggc;
6247
6248 if (risky_device(dev))
6249 return;
6250
6251 if (pci_read_config_word(dev, GGC, &ggc))
6252 return;
6253
6254 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
6255 pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
6256 dmar_map_gfx = 0;
6257 } else if (dmar_map_gfx) {
6258 /* we have to ensure the gfx device is idle before we flush */
6259 pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n");
6260 intel_iommu_strict = 1;
6261 }
6262 }
6263 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
6264 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
6265 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
6266 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
6267
quirk_igfx_skip_te_disable(struct pci_dev * dev)6268 static void quirk_igfx_skip_te_disable(struct pci_dev *dev)
6269 {
6270 unsigned short ver;
6271
6272 if (!IS_GFX_DEVICE(dev))
6273 return;
6274
6275 ver = (dev->device >> 8) & 0xff;
6276 if (ver != 0x45 && ver != 0x46 && ver != 0x4c &&
6277 ver != 0x4e && ver != 0x8a && ver != 0x98 &&
6278 ver != 0x9a)
6279 return;
6280
6281 if (risky_device(dev))
6282 return;
6283
6284 pci_info(dev, "Skip IOMMU disabling for graphics\n");
6285 iommu_skip_te_disable = 1;
6286 }
6287 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, quirk_igfx_skip_te_disable);
6288
6289 /* On Tylersburg chipsets, some BIOSes have been known to enable the
6290 ISOCH DMAR unit for the Azalia sound device, but not give it any
6291 TLB entries, which causes it to deadlock. Check for that. We do
6292 this in a function called from init_dmars(), instead of in a PCI
6293 quirk, because we don't want to print the obnoxious "BIOS broken"
6294 message if VT-d is actually disabled.
6295 */
check_tylersburg_isoch(void)6296 static void __init check_tylersburg_isoch(void)
6297 {
6298 struct pci_dev *pdev;
6299 uint32_t vtisochctrl;
6300
6301 /* If there's no Azalia in the system anyway, forget it. */
6302 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
6303 if (!pdev)
6304 return;
6305
6306 if (risky_device(pdev)) {
6307 pci_dev_put(pdev);
6308 return;
6309 }
6310
6311 pci_dev_put(pdev);
6312
6313 /* System Management Registers. Might be hidden, in which case
6314 we can't do the sanity check. But that's OK, because the
6315 known-broken BIOSes _don't_ actually hide it, so far. */
6316 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
6317 if (!pdev)
6318 return;
6319
6320 if (risky_device(pdev)) {
6321 pci_dev_put(pdev);
6322 return;
6323 }
6324
6325 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
6326 pci_dev_put(pdev);
6327 return;
6328 }
6329
6330 pci_dev_put(pdev);
6331
6332 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
6333 if (vtisochctrl & 1)
6334 return;
6335
6336 /* Drop all bits other than the number of TLB entries */
6337 vtisochctrl &= 0x1c;
6338
6339 /* If we have the recommended number of TLB entries (16), fine. */
6340 if (vtisochctrl == 0x10)
6341 return;
6342
6343 /* Zero TLB entries? You get to ride the short bus to school. */
6344 if (!vtisochctrl) {
6345 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
6346 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
6347 dmi_get_system_info(DMI_BIOS_VENDOR),
6348 dmi_get_system_info(DMI_BIOS_VERSION),
6349 dmi_get_system_info(DMI_PRODUCT_VERSION));
6350 iommu_identity_mapping |= IDENTMAP_AZALIA;
6351 return;
6352 }
6353
6354 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
6355 vtisochctrl);
6356 }
6357