• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2006-2014 Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * Authors: David Woodhouse <dwmw2@infradead.org>,
14  *          Ashok Raj <ashok.raj@intel.com>,
15  *          Shaohua Li <shaohua.li@intel.com>,
16  *          Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17  *          Fenghua Yu <fenghua.yu@intel.com>
18  *          Joerg Roedel <jroedel@suse.de>
19  */
20 
21 #define pr_fmt(fmt)     "DMAR: " fmt
22 
23 #include <linux/init.h>
24 #include <linux/bitmap.h>
25 #include <linux/debugfs.h>
26 #include <linux/export.h>
27 #include <linux/slab.h>
28 #include <linux/irq.h>
29 #include <linux/interrupt.h>
30 #include <linux/spinlock.h>
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/dma-mapping.h>
34 #include <linux/mempool.h>
35 #include <linux/memory.h>
36 #include <linux/timer.h>
37 #include <linux/io.h>
38 #include <linux/iova.h>
39 #include <linux/iommu.h>
40 #include <linux/intel-iommu.h>
41 #include <linux/syscore_ops.h>
42 #include <linux/tboot.h>
43 #include <linux/dmi.h>
44 #include <linux/pci-ats.h>
45 #include <linux/memblock.h>
46 #include <linux/dma-contiguous.h>
47 #include <linux/crash_dump.h>
48 #include <asm/irq_remapping.h>
49 #include <asm/cacheflush.h>
50 #include <asm/iommu.h>
51 
52 #include "irq_remapping.h"
53 
54 #define ROOT_SIZE		VTD_PAGE_SIZE
55 #define CONTEXT_SIZE		VTD_PAGE_SIZE
56 
57 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
58 #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
59 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
60 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
61 
62 #define IOAPIC_RANGE_START	(0xfee00000)
63 #define IOAPIC_RANGE_END	(0xfeefffff)
64 #define IOVA_START_ADDR		(0x1000)
65 
66 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
67 
68 #define MAX_AGAW_WIDTH 64
69 #define MAX_AGAW_PFN_WIDTH	(MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
70 
71 #define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
72 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
73 
74 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
75    to match. That way, we can use 'unsigned long' for PFNs with impunity. */
76 #define DOMAIN_MAX_PFN(gaw)	((unsigned long) min_t(uint64_t, \
77 				__DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
78 #define DOMAIN_MAX_ADDR(gaw)	(((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
79 
80 /* IO virtual address start page frame number */
81 #define IOVA_START_PFN		(1)
82 
83 #define IOVA_PFN(addr)		((addr) >> PAGE_SHIFT)
84 #define DMA_32BIT_PFN		IOVA_PFN(DMA_BIT_MASK(32))
85 #define DMA_64BIT_PFN		IOVA_PFN(DMA_BIT_MASK(64))
86 
87 /* page table handling */
88 #define LEVEL_STRIDE		(9)
89 #define LEVEL_MASK		(((u64)1 << LEVEL_STRIDE) - 1)
90 
91 /*
92  * This bitmap is used to advertise the page sizes our hardware support
93  * to the IOMMU core, which will then use this information to split
94  * physically contiguous memory regions it is mapping into page sizes
95  * that we support.
96  *
97  * Traditionally the IOMMU core just handed us the mappings directly,
98  * after making sure the size is an order of a 4KiB page and that the
99  * mapping has natural alignment.
100  *
101  * To retain this behavior, we currently advertise that we support
102  * all page sizes that are an order of 4KiB.
103  *
104  * If at some point we'd like to utilize the IOMMU core's new behavior,
105  * we could change this to advertise the real page sizes we support.
106  */
107 #define INTEL_IOMMU_PGSIZES	(~0xFFFUL)
108 
agaw_to_level(int agaw)109 static inline int agaw_to_level(int agaw)
110 {
111 	return agaw + 2;
112 }
113 
agaw_to_width(int agaw)114 static inline int agaw_to_width(int agaw)
115 {
116 	return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
117 }
118 
width_to_agaw(int width)119 static inline int width_to_agaw(int width)
120 {
121 	return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
122 }
123 
level_to_offset_bits(int level)124 static inline unsigned int level_to_offset_bits(int level)
125 {
126 	return (level - 1) * LEVEL_STRIDE;
127 }
128 
pfn_level_offset(unsigned long pfn,int level)129 static inline int pfn_level_offset(unsigned long pfn, int level)
130 {
131 	return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
132 }
133 
level_mask(int level)134 static inline unsigned long level_mask(int level)
135 {
136 	return -1UL << level_to_offset_bits(level);
137 }
138 
level_size(int level)139 static inline unsigned long level_size(int level)
140 {
141 	return 1UL << level_to_offset_bits(level);
142 }
143 
align_to_level(unsigned long pfn,int level)144 static inline unsigned long align_to_level(unsigned long pfn, int level)
145 {
146 	return (pfn + level_size(level) - 1) & level_mask(level);
147 }
148 
lvl_to_nr_pages(unsigned int lvl)149 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
150 {
151 	return  1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
152 }
153 
154 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
155    are never going to work. */
dma_to_mm_pfn(unsigned long dma_pfn)156 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
157 {
158 	return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
159 }
160 
mm_to_dma_pfn(unsigned long mm_pfn)161 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
162 {
163 	return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
164 }
page_to_dma_pfn(struct page * pg)165 static inline unsigned long page_to_dma_pfn(struct page *pg)
166 {
167 	return mm_to_dma_pfn(page_to_pfn(pg));
168 }
virt_to_dma_pfn(void * p)169 static inline unsigned long virt_to_dma_pfn(void *p)
170 {
171 	return page_to_dma_pfn(virt_to_page(p));
172 }
173 
174 /* global iommu list, set NULL for ignored DMAR units */
175 static struct intel_iommu **g_iommus;
176 
177 static void __init check_tylersburg_isoch(void);
178 static int rwbf_quirk;
179 
180 /*
181  * set to 1 to panic kernel if can't successfully enable VT-d
182  * (used when kernel is launched w/ TXT)
183  */
184 static int force_on = 0;
185 
186 /*
187  * 0: Present
188  * 1-11: Reserved
189  * 12-63: Context Ptr (12 - (haw-1))
190  * 64-127: Reserved
191  */
192 struct root_entry {
193 	u64	lo;
194 	u64	hi;
195 };
196 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
197 
198 /*
199  * Take a root_entry and return the Lower Context Table Pointer (LCTP)
200  * if marked present.
201  */
root_entry_lctp(struct root_entry * re)202 static phys_addr_t root_entry_lctp(struct root_entry *re)
203 {
204 	if (!(re->lo & 1))
205 		return 0;
206 
207 	return re->lo & VTD_PAGE_MASK;
208 }
209 
210 /*
211  * Take a root_entry and return the Upper Context Table Pointer (UCTP)
212  * if marked present.
213  */
root_entry_uctp(struct root_entry * re)214 static phys_addr_t root_entry_uctp(struct root_entry *re)
215 {
216 	if (!(re->hi & 1))
217 		return 0;
218 
219 	return re->hi & VTD_PAGE_MASK;
220 }
221 /*
222  * low 64 bits:
223  * 0: present
224  * 1: fault processing disable
225  * 2-3: translation type
226  * 12-63: address space root
227  * high 64 bits:
228  * 0-2: address width
229  * 3-6: aval
230  * 8-23: domain id
231  */
232 struct context_entry {
233 	u64 lo;
234 	u64 hi;
235 };
236 
context_clear_pasid_enable(struct context_entry * context)237 static inline void context_clear_pasid_enable(struct context_entry *context)
238 {
239 	context->lo &= ~(1ULL << 11);
240 }
241 
context_pasid_enabled(struct context_entry * context)242 static inline bool context_pasid_enabled(struct context_entry *context)
243 {
244 	return !!(context->lo & (1ULL << 11));
245 }
246 
context_set_copied(struct context_entry * context)247 static inline void context_set_copied(struct context_entry *context)
248 {
249 	context->hi |= (1ull << 3);
250 }
251 
context_copied(struct context_entry * context)252 static inline bool context_copied(struct context_entry *context)
253 {
254 	return !!(context->hi & (1ULL << 3));
255 }
256 
__context_present(struct context_entry * context)257 static inline bool __context_present(struct context_entry *context)
258 {
259 	return (context->lo & 1);
260 }
261 
context_present(struct context_entry * context)262 static inline bool context_present(struct context_entry *context)
263 {
264 	return context_pasid_enabled(context) ?
265 	     __context_present(context) :
266 	     __context_present(context) && !context_copied(context);
267 }
268 
context_set_present(struct context_entry * context)269 static inline void context_set_present(struct context_entry *context)
270 {
271 	context->lo |= 1;
272 }
273 
context_set_fault_enable(struct context_entry * context)274 static inline void context_set_fault_enable(struct context_entry *context)
275 {
276 	context->lo &= (((u64)-1) << 2) | 1;
277 }
278 
context_set_translation_type(struct context_entry * context,unsigned long value)279 static inline void context_set_translation_type(struct context_entry *context,
280 						unsigned long value)
281 {
282 	context->lo &= (((u64)-1) << 4) | 3;
283 	context->lo |= (value & 3) << 2;
284 }
285 
context_set_address_root(struct context_entry * context,unsigned long value)286 static inline void context_set_address_root(struct context_entry *context,
287 					    unsigned long value)
288 {
289 	context->lo &= ~VTD_PAGE_MASK;
290 	context->lo |= value & VTD_PAGE_MASK;
291 }
292 
context_set_address_width(struct context_entry * context,unsigned long value)293 static inline void context_set_address_width(struct context_entry *context,
294 					     unsigned long value)
295 {
296 	context->hi |= value & 7;
297 }
298 
context_set_domain_id(struct context_entry * context,unsigned long value)299 static inline void context_set_domain_id(struct context_entry *context,
300 					 unsigned long value)
301 {
302 	context->hi |= (value & ((1 << 16) - 1)) << 8;
303 }
304 
context_domain_id(struct context_entry * c)305 static inline int context_domain_id(struct context_entry *c)
306 {
307 	return((c->hi >> 8) & 0xffff);
308 }
309 
context_clear_entry(struct context_entry * context)310 static inline void context_clear_entry(struct context_entry *context)
311 {
312 	context->lo = 0;
313 	context->hi = 0;
314 }
315 
316 /*
317  * 0: readable
318  * 1: writable
319  * 2-6: reserved
320  * 7: super page
321  * 8-10: available
322  * 11: snoop behavior
323  * 12-63: Host physcial address
324  */
325 struct dma_pte {
326 	u64 val;
327 };
328 
dma_clear_pte(struct dma_pte * pte)329 static inline void dma_clear_pte(struct dma_pte *pte)
330 {
331 	pte->val = 0;
332 }
333 
dma_pte_addr(struct dma_pte * pte)334 static inline u64 dma_pte_addr(struct dma_pte *pte)
335 {
336 #ifdef CONFIG_64BIT
337 	return pte->val & VTD_PAGE_MASK;
338 #else
339 	/* Must have a full atomic 64-bit read */
340 	return  __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
341 #endif
342 }
343 
dma_pte_present(struct dma_pte * pte)344 static inline bool dma_pte_present(struct dma_pte *pte)
345 {
346 	return (pte->val & 3) != 0;
347 }
348 
dma_pte_superpage(struct dma_pte * pte)349 static inline bool dma_pte_superpage(struct dma_pte *pte)
350 {
351 	return (pte->val & DMA_PTE_LARGE_PAGE);
352 }
353 
first_pte_in_page(struct dma_pte * pte)354 static inline int first_pte_in_page(struct dma_pte *pte)
355 {
356 	return !((unsigned long)pte & ~VTD_PAGE_MASK);
357 }
358 
359 /*
360  * This domain is a statically identity mapping domain.
361  *	1. This domain creats a static 1:1 mapping to all usable memory.
362  * 	2. It maps to each iommu if successful.
363  *	3. Each iommu mapps to this domain if successful.
364  */
365 static struct dmar_domain *si_domain;
366 static int hw_pass_through = 1;
367 
368 /*
369  * Domain represents a virtual machine, more than one devices
370  * across iommus may be owned in one domain, e.g. kvm guest.
371  */
372 #define DOMAIN_FLAG_VIRTUAL_MACHINE	(1 << 0)
373 
374 /* si_domain contains mulitple devices */
375 #define DOMAIN_FLAG_STATIC_IDENTITY	(1 << 1)
376 
377 #define for_each_domain_iommu(idx, domain)			\
378 	for (idx = 0; idx < g_num_of_iommus; idx++)		\
379 		if (domain->iommu_refcnt[idx])
380 
381 struct dmar_domain {
382 	int	nid;			/* node id */
383 
384 	unsigned	iommu_refcnt[DMAR_UNITS_SUPPORTED];
385 					/* Refcount of devices per iommu */
386 
387 
388 	u16		iommu_did[DMAR_UNITS_SUPPORTED];
389 					/* Domain ids per IOMMU. Use u16 since
390 					 * domain ids are 16 bit wide according
391 					 * to VT-d spec, section 9.3 */
392 
393 	struct list_head devices;	/* all devices' list */
394 	struct iova_domain iovad;	/* iova's that belong to this domain */
395 
396 	struct dma_pte	*pgd;		/* virtual address */
397 	int		gaw;		/* max guest address width */
398 
399 	/* adjusted guest address width, 0 is level 2 30-bit */
400 	int		agaw;
401 
402 	int		flags;		/* flags to find out type of domain */
403 
404 	int		iommu_coherency;/* indicate coherency of iommu access */
405 	int		iommu_snooping; /* indicate snooping control feature*/
406 	int		iommu_count;	/* reference count of iommu */
407 	int		iommu_superpage;/* Level of superpages supported:
408 					   0 == 4KiB (no superpages), 1 == 2MiB,
409 					   2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
410 	u64		max_addr;	/* maximum mapped address */
411 
412 	struct iommu_domain domain;	/* generic domain data structure for
413 					   iommu core */
414 };
415 
416 /* PCI domain-device relationship */
417 struct device_domain_info {
418 	struct list_head link;	/* link to domain siblings */
419 	struct list_head global; /* link to global list */
420 	u8 bus;			/* PCI bus number */
421 	u8 devfn;		/* PCI devfn number */
422 	u16 pfsid;		/* SRIOV physical function source ID */
423 	u8 pasid_supported:3;
424 	u8 pasid_enabled:1;
425 	u8 pri_supported:1;
426 	u8 pri_enabled:1;
427 	u8 ats_supported:1;
428 	u8 ats_enabled:1;
429 	u8 ats_qdep;
430 	struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
431 	struct intel_iommu *iommu; /* IOMMU used by this device */
432 	struct dmar_domain *domain; /* pointer to domain */
433 };
434 
435 struct dmar_rmrr_unit {
436 	struct list_head list;		/* list of rmrr units	*/
437 	struct acpi_dmar_header *hdr;	/* ACPI header		*/
438 	u64	base_address;		/* reserved base address*/
439 	u64	end_address;		/* reserved end address */
440 	struct dmar_dev_scope *devices;	/* target devices */
441 	int	devices_cnt;		/* target device count */
442 };
443 
444 struct dmar_atsr_unit {
445 	struct list_head list;		/* list of ATSR units */
446 	struct acpi_dmar_header *hdr;	/* ACPI header */
447 	struct dmar_dev_scope *devices;	/* target devices */
448 	int devices_cnt;		/* target device count */
449 	u8 include_all:1;		/* include all ports */
450 };
451 
452 static LIST_HEAD(dmar_atsr_units);
453 static LIST_HEAD(dmar_rmrr_units);
454 
455 #define for_each_rmrr_units(rmrr) \
456 	list_for_each_entry(rmrr, &dmar_rmrr_units, list)
457 
458 static void flush_unmaps_timeout(unsigned long data);
459 
460 static DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
461 
462 #define HIGH_WATER_MARK 250
463 struct deferred_flush_tables {
464 	int next;
465 	struct iova *iova[HIGH_WATER_MARK];
466 	struct dmar_domain *domain[HIGH_WATER_MARK];
467 	struct page *freelist[HIGH_WATER_MARK];
468 };
469 
470 static struct deferred_flush_tables *deferred_flush;
471 
472 /* bitmap for indexing intel_iommus */
473 static int g_num_of_iommus;
474 
475 static DEFINE_SPINLOCK(async_umap_flush_lock);
476 static LIST_HEAD(unmaps_to_do);
477 
478 static int timer_on;
479 static long list_size;
480 
481 static void domain_exit(struct dmar_domain *domain);
482 static void domain_remove_dev_info(struct dmar_domain *domain);
483 static void dmar_remove_one_dev_info(struct dmar_domain *domain,
484 				     struct device *dev);
485 static void __dmar_remove_one_dev_info(struct device_domain_info *info);
486 static void domain_context_clear(struct intel_iommu *iommu,
487 				 struct device *dev);
488 static int domain_detach_iommu(struct dmar_domain *domain,
489 			       struct intel_iommu *iommu);
490 
491 #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
492 int dmar_disabled = 0;
493 #else
494 int dmar_disabled = 1;
495 #endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
496 
497 int intel_iommu_enabled = 0;
498 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
499 
500 static int dmar_map_gfx = 1;
501 static int dmar_forcedac;
502 static int intel_iommu_strict;
503 static int intel_iommu_superpage = 1;
504 static int intel_iommu_ecs = 1;
505 static int intel_iommu_pasid28;
506 static int iommu_identity_mapping;
507 
508 #define IDENTMAP_ALL		1
509 #define IDENTMAP_GFX		2
510 #define IDENTMAP_AZALIA		4
511 
512 /* Broadwell and Skylake have broken ECS support — normal so-called "second
513  * level" translation of DMA requests-without-PASID doesn't actually happen
514  * unless you also set the NESTE bit in an extended context-entry. Which of
515  * course means that SVM doesn't work because it's trying to do nested
516  * translation of the physical addresses it finds in the process page tables,
517  * through the IOVA->phys mapping found in the "second level" page tables.
518  *
519  * The VT-d specification was retroactively changed to change the definition
520  * of the capability bits and pretend that Broadwell/Skylake never happened...
521  * but unfortunately the wrong bit was changed. It's ECS which is broken, but
522  * for some reason it was the PASID capability bit which was redefined (from
523  * bit 28 on BDW/SKL to bit 40 in future).
524  *
525  * So our test for ECS needs to eschew those implementations which set the old
526  * PASID capabiity bit 28, since those are the ones on which ECS is broken.
527  * Unless we are working around the 'pasid28' limitations, that is, by putting
528  * the device into passthrough mode for normal DMA and thus masking the bug.
529  */
530 #define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \
531 			    (intel_iommu_pasid28 || !ecap_broken_pasid(iommu->ecap)))
532 /* PASID support is thus enabled if ECS is enabled and *either* of the old
533  * or new capability bits are set. */
534 #define pasid_enabled(iommu) (ecs_enabled(iommu) &&			\
535 			      (ecap_pasid(iommu->ecap) || ecap_broken_pasid(iommu->ecap)))
536 
537 int intel_iommu_gfx_mapped;
538 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
539 
540 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
541 static DEFINE_SPINLOCK(device_domain_lock);
542 static LIST_HEAD(device_domain_list);
543 
544 static const struct iommu_ops intel_iommu_ops;
545 
translation_pre_enabled(struct intel_iommu * iommu)546 static bool translation_pre_enabled(struct intel_iommu *iommu)
547 {
548 	return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
549 }
550 
clear_translation_pre_enabled(struct intel_iommu * iommu)551 static void clear_translation_pre_enabled(struct intel_iommu *iommu)
552 {
553 	iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
554 }
555 
init_translation_status(struct intel_iommu * iommu)556 static void init_translation_status(struct intel_iommu *iommu)
557 {
558 	u32 gsts;
559 
560 	gsts = readl(iommu->reg + DMAR_GSTS_REG);
561 	if (gsts & DMA_GSTS_TES)
562 		iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
563 }
564 
565 /* Convert generic 'struct iommu_domain to private struct dmar_domain */
to_dmar_domain(struct iommu_domain * dom)566 static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
567 {
568 	return container_of(dom, struct dmar_domain, domain);
569 }
570 
intel_iommu_setup(char * str)571 static int __init intel_iommu_setup(char *str)
572 {
573 	if (!str)
574 		return -EINVAL;
575 	while (*str) {
576 		if (!strncmp(str, "on", 2)) {
577 			dmar_disabled = 0;
578 			pr_info("IOMMU enabled\n");
579 		} else if (!strncmp(str, "off", 3)) {
580 			dmar_disabled = 1;
581 			pr_info("IOMMU disabled\n");
582 		} else if (!strncmp(str, "igfx_off", 8)) {
583 			dmar_map_gfx = 0;
584 			pr_info("Disable GFX device mapping\n");
585 		} else if (!strncmp(str, "forcedac", 8)) {
586 			pr_info("Forcing DAC for PCI devices\n");
587 			dmar_forcedac = 1;
588 		} else if (!strncmp(str, "strict", 6)) {
589 			pr_info("Disable batched IOTLB flush\n");
590 			intel_iommu_strict = 1;
591 		} else if (!strncmp(str, "sp_off", 6)) {
592 			pr_info("Disable supported super page\n");
593 			intel_iommu_superpage = 0;
594 		} else if (!strncmp(str, "ecs_off", 7)) {
595 			printk(KERN_INFO
596 				"Intel-IOMMU: disable extended context table support\n");
597 			intel_iommu_ecs = 0;
598 		} else if (!strncmp(str, "pasid28", 7)) {
599 			printk(KERN_INFO
600 				"Intel-IOMMU: enable pre-production PASID support\n");
601 			intel_iommu_pasid28 = 1;
602 			iommu_identity_mapping |= IDENTMAP_GFX;
603 		}
604 
605 		str += strcspn(str, ",");
606 		while (*str == ',')
607 			str++;
608 	}
609 	return 0;
610 }
611 __setup("intel_iommu=", intel_iommu_setup);
612 
613 static struct kmem_cache *iommu_domain_cache;
614 static struct kmem_cache *iommu_devinfo_cache;
615 
get_iommu_domain(struct intel_iommu * iommu,u16 did)616 static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
617 {
618 	struct dmar_domain **domains;
619 	int idx = did >> 8;
620 
621 	domains = iommu->domains[idx];
622 	if (!domains)
623 		return NULL;
624 
625 	return domains[did & 0xff];
626 }
627 
set_iommu_domain(struct intel_iommu * iommu,u16 did,struct dmar_domain * domain)628 static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
629 			     struct dmar_domain *domain)
630 {
631 	struct dmar_domain **domains;
632 	int idx = did >> 8;
633 
634 	if (!iommu->domains[idx]) {
635 		size_t size = 256 * sizeof(struct dmar_domain *);
636 		iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
637 	}
638 
639 	domains = iommu->domains[idx];
640 	if (WARN_ON(!domains))
641 		return;
642 	else
643 		domains[did & 0xff] = domain;
644 }
645 
alloc_pgtable_page(int node)646 static inline void *alloc_pgtable_page(int node)
647 {
648 	struct page *page;
649 	void *vaddr = NULL;
650 
651 	page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
652 	if (page)
653 		vaddr = page_address(page);
654 	return vaddr;
655 }
656 
free_pgtable_page(void * vaddr)657 static inline void free_pgtable_page(void *vaddr)
658 {
659 	free_page((unsigned long)vaddr);
660 }
661 
alloc_domain_mem(void)662 static inline void *alloc_domain_mem(void)
663 {
664 	return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
665 }
666 
free_domain_mem(void * vaddr)667 static void free_domain_mem(void *vaddr)
668 {
669 	kmem_cache_free(iommu_domain_cache, vaddr);
670 }
671 
alloc_devinfo_mem(void)672 static inline void * alloc_devinfo_mem(void)
673 {
674 	return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
675 }
676 
free_devinfo_mem(void * vaddr)677 static inline void free_devinfo_mem(void *vaddr)
678 {
679 	kmem_cache_free(iommu_devinfo_cache, vaddr);
680 }
681 
domain_type_is_vm(struct dmar_domain * domain)682 static inline int domain_type_is_vm(struct dmar_domain *domain)
683 {
684 	return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
685 }
686 
domain_type_is_si(struct dmar_domain * domain)687 static inline int domain_type_is_si(struct dmar_domain *domain)
688 {
689 	return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
690 }
691 
domain_type_is_vm_or_si(struct dmar_domain * domain)692 static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
693 {
694 	return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
695 				DOMAIN_FLAG_STATIC_IDENTITY);
696 }
697 
domain_pfn_supported(struct dmar_domain * domain,unsigned long pfn)698 static inline int domain_pfn_supported(struct dmar_domain *domain,
699 				       unsigned long pfn)
700 {
701 	int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
702 
703 	return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
704 }
705 
__iommu_calculate_agaw(struct intel_iommu * iommu,int max_gaw)706 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
707 {
708 	unsigned long sagaw;
709 	int agaw = -1;
710 
711 	sagaw = cap_sagaw(iommu->cap);
712 	for (agaw = width_to_agaw(max_gaw);
713 	     agaw >= 0; agaw--) {
714 		if (test_bit(agaw, &sagaw))
715 			break;
716 	}
717 
718 	return agaw;
719 }
720 
721 /*
722  * Calculate max SAGAW for each iommu.
723  */
iommu_calculate_max_sagaw(struct intel_iommu * iommu)724 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
725 {
726 	return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
727 }
728 
729 /*
730  * calculate agaw for each iommu.
731  * "SAGAW" may be different across iommus, use a default agaw, and
732  * get a supported less agaw for iommus that don't support the default agaw.
733  */
iommu_calculate_agaw(struct intel_iommu * iommu)734 int iommu_calculate_agaw(struct intel_iommu *iommu)
735 {
736 	return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
737 }
738 
739 /* This functionin only returns single iommu in a domain */
domain_get_iommu(struct dmar_domain * domain)740 static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
741 {
742 	int iommu_id;
743 
744 	/* si_domain and vm domain should not get here. */
745 	BUG_ON(domain_type_is_vm_or_si(domain));
746 	for_each_domain_iommu(iommu_id, domain)
747 		break;
748 
749 	if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
750 		return NULL;
751 
752 	return g_iommus[iommu_id];
753 }
754 
domain_update_iommu_coherency(struct dmar_domain * domain)755 static void domain_update_iommu_coherency(struct dmar_domain *domain)
756 {
757 	struct dmar_drhd_unit *drhd;
758 	struct intel_iommu *iommu;
759 	bool found = false;
760 	int i;
761 
762 	domain->iommu_coherency = 1;
763 
764 	for_each_domain_iommu(i, domain) {
765 		found = true;
766 		if (!ecap_coherent(g_iommus[i]->ecap)) {
767 			domain->iommu_coherency = 0;
768 			break;
769 		}
770 	}
771 	if (found)
772 		return;
773 
774 	/* No hardware attached; use lowest common denominator */
775 	rcu_read_lock();
776 	for_each_active_iommu(iommu, drhd) {
777 		if (!ecap_coherent(iommu->ecap)) {
778 			domain->iommu_coherency = 0;
779 			break;
780 		}
781 	}
782 	rcu_read_unlock();
783 }
784 
domain_update_iommu_snooping(struct intel_iommu * skip)785 static int domain_update_iommu_snooping(struct intel_iommu *skip)
786 {
787 	struct dmar_drhd_unit *drhd;
788 	struct intel_iommu *iommu;
789 	int ret = 1;
790 
791 	rcu_read_lock();
792 	for_each_active_iommu(iommu, drhd) {
793 		if (iommu != skip) {
794 			if (!ecap_sc_support(iommu->ecap)) {
795 				ret = 0;
796 				break;
797 			}
798 		}
799 	}
800 	rcu_read_unlock();
801 
802 	return ret;
803 }
804 
domain_update_iommu_superpage(struct intel_iommu * skip)805 static int domain_update_iommu_superpage(struct intel_iommu *skip)
806 {
807 	struct dmar_drhd_unit *drhd;
808 	struct intel_iommu *iommu;
809 	int mask = 0xf;
810 
811 	if (!intel_iommu_superpage) {
812 		return 0;
813 	}
814 
815 	/* set iommu_superpage to the smallest common denominator */
816 	rcu_read_lock();
817 	for_each_active_iommu(iommu, drhd) {
818 		if (iommu != skip) {
819 			mask &= cap_super_page_val(iommu->cap);
820 			if (!mask)
821 				break;
822 		}
823 	}
824 	rcu_read_unlock();
825 
826 	return fls(mask);
827 }
828 
829 /* Some capabilities may be different across iommus */
domain_update_iommu_cap(struct dmar_domain * domain)830 static void domain_update_iommu_cap(struct dmar_domain *domain)
831 {
832 	domain_update_iommu_coherency(domain);
833 	domain->iommu_snooping = domain_update_iommu_snooping(NULL);
834 	domain->iommu_superpage = domain_update_iommu_superpage(NULL);
835 }
836 
iommu_context_addr(struct intel_iommu * iommu,u8 bus,u8 devfn,int alloc)837 static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu,
838 						       u8 bus, u8 devfn, int alloc)
839 {
840 	struct root_entry *root = &iommu->root_entry[bus];
841 	struct context_entry *context;
842 	u64 *entry;
843 
844 	entry = &root->lo;
845 	if (ecs_enabled(iommu)) {
846 		if (devfn >= 0x80) {
847 			devfn -= 0x80;
848 			entry = &root->hi;
849 		}
850 		devfn *= 2;
851 	}
852 	if (*entry & 1)
853 		context = phys_to_virt(*entry & VTD_PAGE_MASK);
854 	else {
855 		unsigned long phy_addr;
856 		if (!alloc)
857 			return NULL;
858 
859 		context = alloc_pgtable_page(iommu->node);
860 		if (!context)
861 			return NULL;
862 
863 		__iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
864 		phy_addr = virt_to_phys((void *)context);
865 		*entry = phy_addr | 1;
866 		__iommu_flush_cache(iommu, entry, sizeof(*entry));
867 	}
868 	return &context[devfn];
869 }
870 
iommu_dummy(struct device * dev)871 static int iommu_dummy(struct device *dev)
872 {
873 	return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
874 }
875 
device_to_iommu(struct device * dev,u8 * bus,u8 * devfn)876 static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
877 {
878 	struct dmar_drhd_unit *drhd = NULL;
879 	struct intel_iommu *iommu;
880 	struct device *tmp;
881 	struct pci_dev *ptmp, *pdev = NULL;
882 	u16 segment = 0;
883 	int i;
884 
885 	if (iommu_dummy(dev))
886 		return NULL;
887 
888 	if (dev_is_pci(dev)) {
889 		struct pci_dev *pf_pdev;
890 
891 		pdev = to_pci_dev(dev);
892 		/* VFs aren't listed in scope tables; we need to look up
893 		 * the PF instead to find the IOMMU. */
894 		pf_pdev = pci_physfn(pdev);
895 		dev = &pf_pdev->dev;
896 		segment = pci_domain_nr(pdev->bus);
897 	} else if (has_acpi_companion(dev))
898 		dev = &ACPI_COMPANION(dev)->dev;
899 
900 	rcu_read_lock();
901 	for_each_active_iommu(iommu, drhd) {
902 		if (pdev && segment != drhd->segment)
903 			continue;
904 
905 		for_each_active_dev_scope(drhd->devices,
906 					  drhd->devices_cnt, i, tmp) {
907 			if (tmp == dev) {
908 				/* For a VF use its original BDF# not that of the PF
909 				 * which we used for the IOMMU lookup. Strictly speaking
910 				 * we could do this for all PCI devices; we only need to
911 				 * get the BDF# from the scope table for ACPI matches. */
912 				if (pdev && pdev->is_virtfn)
913 					goto got_pdev;
914 
915 				*bus = drhd->devices[i].bus;
916 				*devfn = drhd->devices[i].devfn;
917 				goto out;
918 			}
919 
920 			if (!pdev || !dev_is_pci(tmp))
921 				continue;
922 
923 			ptmp = to_pci_dev(tmp);
924 			if (ptmp->subordinate &&
925 			    ptmp->subordinate->number <= pdev->bus->number &&
926 			    ptmp->subordinate->busn_res.end >= pdev->bus->number)
927 				goto got_pdev;
928 		}
929 
930 		if (pdev && drhd->include_all) {
931 		got_pdev:
932 			*bus = pdev->bus->number;
933 			*devfn = pdev->devfn;
934 			goto out;
935 		}
936 	}
937 	iommu = NULL;
938  out:
939 	rcu_read_unlock();
940 
941 	return iommu;
942 }
943 
domain_flush_cache(struct dmar_domain * domain,void * addr,int size)944 static void domain_flush_cache(struct dmar_domain *domain,
945 			       void *addr, int size)
946 {
947 	if (!domain->iommu_coherency)
948 		clflush_cache_range(addr, size);
949 }
950 
device_context_mapped(struct intel_iommu * iommu,u8 bus,u8 devfn)951 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
952 {
953 	struct context_entry *context;
954 	int ret = 0;
955 	unsigned long flags;
956 
957 	spin_lock_irqsave(&iommu->lock, flags);
958 	context = iommu_context_addr(iommu, bus, devfn, 0);
959 	if (context)
960 		ret = context_present(context);
961 	spin_unlock_irqrestore(&iommu->lock, flags);
962 	return ret;
963 }
964 
clear_context_table(struct intel_iommu * iommu,u8 bus,u8 devfn)965 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
966 {
967 	struct context_entry *context;
968 	unsigned long flags;
969 
970 	spin_lock_irqsave(&iommu->lock, flags);
971 	context = iommu_context_addr(iommu, bus, devfn, 0);
972 	if (context) {
973 		context_clear_entry(context);
974 		__iommu_flush_cache(iommu, context, sizeof(*context));
975 	}
976 	spin_unlock_irqrestore(&iommu->lock, flags);
977 }
978 
free_context_table(struct intel_iommu * iommu)979 static void free_context_table(struct intel_iommu *iommu)
980 {
981 	int i;
982 	unsigned long flags;
983 	struct context_entry *context;
984 
985 	spin_lock_irqsave(&iommu->lock, flags);
986 	if (!iommu->root_entry) {
987 		goto out;
988 	}
989 	for (i = 0; i < ROOT_ENTRY_NR; i++) {
990 		context = iommu_context_addr(iommu, i, 0, 0);
991 		if (context)
992 			free_pgtable_page(context);
993 
994 		if (!ecs_enabled(iommu))
995 			continue;
996 
997 		context = iommu_context_addr(iommu, i, 0x80, 0);
998 		if (context)
999 			free_pgtable_page(context);
1000 
1001 	}
1002 	free_pgtable_page(iommu->root_entry);
1003 	iommu->root_entry = NULL;
1004 out:
1005 	spin_unlock_irqrestore(&iommu->lock, flags);
1006 }
1007 
pfn_to_dma_pte(struct dmar_domain * domain,unsigned long pfn,int * target_level)1008 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
1009 				      unsigned long pfn, int *target_level)
1010 {
1011 	struct dma_pte *parent, *pte = NULL;
1012 	int level = agaw_to_level(domain->agaw);
1013 	int offset;
1014 
1015 	BUG_ON(!domain->pgd);
1016 
1017 	if (!domain_pfn_supported(domain, pfn))
1018 		/* Address beyond IOMMU's addressing capabilities. */
1019 		return NULL;
1020 
1021 	parent = domain->pgd;
1022 
1023 	while (1) {
1024 		void *tmp_page;
1025 
1026 		offset = pfn_level_offset(pfn, level);
1027 		pte = &parent[offset];
1028 		if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
1029 			break;
1030 		if (level == *target_level)
1031 			break;
1032 
1033 		if (!dma_pte_present(pte)) {
1034 			uint64_t pteval;
1035 
1036 			tmp_page = alloc_pgtable_page(domain->nid);
1037 
1038 			if (!tmp_page)
1039 				return NULL;
1040 
1041 			domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
1042 			pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
1043 			if (cmpxchg64(&pte->val, 0ULL, pteval))
1044 				/* Someone else set it while we were thinking; use theirs. */
1045 				free_pgtable_page(tmp_page);
1046 			else
1047 				domain_flush_cache(domain, pte, sizeof(*pte));
1048 		}
1049 		if (level == 1)
1050 			break;
1051 
1052 		parent = phys_to_virt(dma_pte_addr(pte));
1053 		level--;
1054 	}
1055 
1056 	if (!*target_level)
1057 		*target_level = level;
1058 
1059 	return pte;
1060 }
1061 
1062 
1063 /* return address's pte at specific level */
dma_pfn_level_pte(struct dmar_domain * domain,unsigned long pfn,int level,int * large_page)1064 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
1065 					 unsigned long pfn,
1066 					 int level, int *large_page)
1067 {
1068 	struct dma_pte *parent, *pte = NULL;
1069 	int total = agaw_to_level(domain->agaw);
1070 	int offset;
1071 
1072 	parent = domain->pgd;
1073 	while (level <= total) {
1074 		offset = pfn_level_offset(pfn, total);
1075 		pte = &parent[offset];
1076 		if (level == total)
1077 			return pte;
1078 
1079 		if (!dma_pte_present(pte)) {
1080 			*large_page = total;
1081 			break;
1082 		}
1083 
1084 		if (dma_pte_superpage(pte)) {
1085 			*large_page = total;
1086 			return pte;
1087 		}
1088 
1089 		parent = phys_to_virt(dma_pte_addr(pte));
1090 		total--;
1091 	}
1092 	return NULL;
1093 }
1094 
1095 /* clear last level pte, a tlb flush should be followed */
dma_pte_clear_range(struct dmar_domain * domain,unsigned long start_pfn,unsigned long last_pfn)1096 static void dma_pte_clear_range(struct dmar_domain *domain,
1097 				unsigned long start_pfn,
1098 				unsigned long last_pfn)
1099 {
1100 	unsigned int large_page = 1;
1101 	struct dma_pte *first_pte, *pte;
1102 
1103 	BUG_ON(!domain_pfn_supported(domain, start_pfn));
1104 	BUG_ON(!domain_pfn_supported(domain, last_pfn));
1105 	BUG_ON(start_pfn > last_pfn);
1106 
1107 	/* we don't need lock here; nobody else touches the iova range */
1108 	do {
1109 		large_page = 1;
1110 		first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
1111 		if (!pte) {
1112 			start_pfn = align_to_level(start_pfn + 1, large_page + 1);
1113 			continue;
1114 		}
1115 		do {
1116 			dma_clear_pte(pte);
1117 			start_pfn += lvl_to_nr_pages(large_page);
1118 			pte++;
1119 		} while (start_pfn <= last_pfn && !first_pte_in_page(pte));
1120 
1121 		domain_flush_cache(domain, first_pte,
1122 				   (void *)pte - (void *)first_pte);
1123 
1124 	} while (start_pfn && start_pfn <= last_pfn);
1125 }
1126 
dma_pte_free_level(struct dmar_domain * domain,int level,struct dma_pte * pte,unsigned long pfn,unsigned long start_pfn,unsigned long last_pfn)1127 static void dma_pte_free_level(struct dmar_domain *domain, int level,
1128 			       struct dma_pte *pte, unsigned long pfn,
1129 			       unsigned long start_pfn, unsigned long last_pfn)
1130 {
1131 	pfn = max(start_pfn, pfn);
1132 	pte = &pte[pfn_level_offset(pfn, level)];
1133 
1134 	do {
1135 		unsigned long level_pfn;
1136 		struct dma_pte *level_pte;
1137 
1138 		if (!dma_pte_present(pte) || dma_pte_superpage(pte))
1139 			goto next;
1140 
1141 		level_pfn = pfn & level_mask(level);
1142 		level_pte = phys_to_virt(dma_pte_addr(pte));
1143 
1144 		if (level > 2)
1145 			dma_pte_free_level(domain, level - 1, level_pte,
1146 					   level_pfn, start_pfn, last_pfn);
1147 
1148 		/* If range covers entire pagetable, free it */
1149 		if (!(start_pfn > level_pfn ||
1150 		      last_pfn < level_pfn + level_size(level) - 1)) {
1151 			dma_clear_pte(pte);
1152 			domain_flush_cache(domain, pte, sizeof(*pte));
1153 			free_pgtable_page(level_pte);
1154 		}
1155 next:
1156 		pfn += level_size(level);
1157 	} while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1158 }
1159 
1160 /* free page table pages. last level pte should already be cleared */
dma_pte_free_pagetable(struct dmar_domain * domain,unsigned long start_pfn,unsigned long last_pfn)1161 static void dma_pte_free_pagetable(struct dmar_domain *domain,
1162 				   unsigned long start_pfn,
1163 				   unsigned long last_pfn)
1164 {
1165 	BUG_ON(!domain_pfn_supported(domain, start_pfn));
1166 	BUG_ON(!domain_pfn_supported(domain, last_pfn));
1167 	BUG_ON(start_pfn > last_pfn);
1168 
1169 	dma_pte_clear_range(domain, start_pfn, last_pfn);
1170 
1171 	/* We don't need lock here; nobody else touches the iova range */
1172 	dma_pte_free_level(domain, agaw_to_level(domain->agaw),
1173 			   domain->pgd, 0, start_pfn, last_pfn);
1174 
1175 	/* free pgd */
1176 	if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1177 		free_pgtable_page(domain->pgd);
1178 		domain->pgd = NULL;
1179 	}
1180 }
1181 
1182 /* When a page at a given level is being unlinked from its parent, we don't
1183    need to *modify* it at all. All we need to do is make a list of all the
1184    pages which can be freed just as soon as we've flushed the IOTLB and we
1185    know the hardware page-walk will no longer touch them.
1186    The 'pte' argument is the *parent* PTE, pointing to the page that is to
1187    be freed. */
dma_pte_list_pagetables(struct dmar_domain * domain,int level,struct dma_pte * pte,struct page * freelist)1188 static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1189 					    int level, struct dma_pte *pte,
1190 					    struct page *freelist)
1191 {
1192 	struct page *pg;
1193 
1194 	pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1195 	pg->freelist = freelist;
1196 	freelist = pg;
1197 
1198 	if (level == 1)
1199 		return freelist;
1200 
1201 	pte = page_address(pg);
1202 	do {
1203 		if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1204 			freelist = dma_pte_list_pagetables(domain, level - 1,
1205 							   pte, freelist);
1206 		pte++;
1207 	} while (!first_pte_in_page(pte));
1208 
1209 	return freelist;
1210 }
1211 
dma_pte_clear_level(struct dmar_domain * domain,int level,struct dma_pte * pte,unsigned long pfn,unsigned long start_pfn,unsigned long last_pfn,struct page * freelist)1212 static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1213 					struct dma_pte *pte, unsigned long pfn,
1214 					unsigned long start_pfn,
1215 					unsigned long last_pfn,
1216 					struct page *freelist)
1217 {
1218 	struct dma_pte *first_pte = NULL, *last_pte = NULL;
1219 
1220 	pfn = max(start_pfn, pfn);
1221 	pte = &pte[pfn_level_offset(pfn, level)];
1222 
1223 	do {
1224 		unsigned long level_pfn;
1225 
1226 		if (!dma_pte_present(pte))
1227 			goto next;
1228 
1229 		level_pfn = pfn & level_mask(level);
1230 
1231 		/* If range covers entire pagetable, free it */
1232 		if (start_pfn <= level_pfn &&
1233 		    last_pfn >= level_pfn + level_size(level) - 1) {
1234 			/* These suborbinate page tables are going away entirely. Don't
1235 			   bother to clear them; we're just going to *free* them. */
1236 			if (level > 1 && !dma_pte_superpage(pte))
1237 				freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1238 
1239 			dma_clear_pte(pte);
1240 			if (!first_pte)
1241 				first_pte = pte;
1242 			last_pte = pte;
1243 		} else if (level > 1) {
1244 			/* Recurse down into a level that isn't *entirely* obsolete */
1245 			freelist = dma_pte_clear_level(domain, level - 1,
1246 						       phys_to_virt(dma_pte_addr(pte)),
1247 						       level_pfn, start_pfn, last_pfn,
1248 						       freelist);
1249 		}
1250 next:
1251 		pfn += level_size(level);
1252 	} while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1253 
1254 	if (first_pte)
1255 		domain_flush_cache(domain, first_pte,
1256 				   (void *)++last_pte - (void *)first_pte);
1257 
1258 	return freelist;
1259 }
1260 
1261 /* We can't just free the pages because the IOMMU may still be walking
1262    the page tables, and may have cached the intermediate levels. The
1263    pages can only be freed after the IOTLB flush has been done. */
domain_unmap(struct dmar_domain * domain,unsigned long start_pfn,unsigned long last_pfn)1264 static struct page *domain_unmap(struct dmar_domain *domain,
1265 				 unsigned long start_pfn,
1266 				 unsigned long last_pfn)
1267 {
1268 	struct page *freelist = NULL;
1269 
1270 	BUG_ON(!domain_pfn_supported(domain, start_pfn));
1271 	BUG_ON(!domain_pfn_supported(domain, last_pfn));
1272 	BUG_ON(start_pfn > last_pfn);
1273 
1274 	/* we don't need lock here; nobody else touches the iova range */
1275 	freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1276 				       domain->pgd, 0, start_pfn, last_pfn, NULL);
1277 
1278 	/* free pgd */
1279 	if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1280 		struct page *pgd_page = virt_to_page(domain->pgd);
1281 		pgd_page->freelist = freelist;
1282 		freelist = pgd_page;
1283 
1284 		domain->pgd = NULL;
1285 	}
1286 
1287 	return freelist;
1288 }
1289 
dma_free_pagelist(struct page * freelist)1290 static void dma_free_pagelist(struct page *freelist)
1291 {
1292 	struct page *pg;
1293 
1294 	while ((pg = freelist)) {
1295 		freelist = pg->freelist;
1296 		free_pgtable_page(page_address(pg));
1297 	}
1298 }
1299 
1300 /* iommu handling */
iommu_alloc_root_entry(struct intel_iommu * iommu)1301 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1302 {
1303 	struct root_entry *root;
1304 	unsigned long flags;
1305 
1306 	root = (struct root_entry *)alloc_pgtable_page(iommu->node);
1307 	if (!root) {
1308 		pr_err("Allocating root entry for %s failed\n",
1309 			iommu->name);
1310 		return -ENOMEM;
1311 	}
1312 
1313 	__iommu_flush_cache(iommu, root, ROOT_SIZE);
1314 
1315 	spin_lock_irqsave(&iommu->lock, flags);
1316 	iommu->root_entry = root;
1317 	spin_unlock_irqrestore(&iommu->lock, flags);
1318 
1319 	return 0;
1320 }
1321 
iommu_set_root_entry(struct intel_iommu * iommu)1322 static void iommu_set_root_entry(struct intel_iommu *iommu)
1323 {
1324 	u64 addr;
1325 	u32 sts;
1326 	unsigned long flag;
1327 
1328 	addr = virt_to_phys(iommu->root_entry);
1329 	if (ecs_enabled(iommu))
1330 		addr |= DMA_RTADDR_RTT;
1331 
1332 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1333 	dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
1334 
1335 	writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1336 
1337 	/* Make sure hardware complete it */
1338 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1339 		      readl, (sts & DMA_GSTS_RTPS), sts);
1340 
1341 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1342 }
1343 
iommu_flush_write_buffer(struct intel_iommu * iommu)1344 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
1345 {
1346 	u32 val;
1347 	unsigned long flag;
1348 
1349 	if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1350 		return;
1351 
1352 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1353 	writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1354 
1355 	/* Make sure hardware complete it */
1356 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1357 		      readl, (!(val & DMA_GSTS_WBFS)), val);
1358 
1359 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1360 }
1361 
1362 /* return value determine if we need a write buffer flush */
__iommu_flush_context(struct intel_iommu * iommu,u16 did,u16 source_id,u8 function_mask,u64 type)1363 static void __iommu_flush_context(struct intel_iommu *iommu,
1364 				  u16 did, u16 source_id, u8 function_mask,
1365 				  u64 type)
1366 {
1367 	u64 val = 0;
1368 	unsigned long flag;
1369 
1370 	switch (type) {
1371 	case DMA_CCMD_GLOBAL_INVL:
1372 		val = DMA_CCMD_GLOBAL_INVL;
1373 		break;
1374 	case DMA_CCMD_DOMAIN_INVL:
1375 		val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1376 		break;
1377 	case DMA_CCMD_DEVICE_INVL:
1378 		val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1379 			| DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1380 		break;
1381 	default:
1382 		BUG();
1383 	}
1384 	val |= DMA_CCMD_ICC;
1385 
1386 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1387 	dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1388 
1389 	/* Make sure hardware complete it */
1390 	IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1391 		dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1392 
1393 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1394 }
1395 
1396 /* return value determine if we need a write buffer flush */
__iommu_flush_iotlb(struct intel_iommu * iommu,u16 did,u64 addr,unsigned int size_order,u64 type)1397 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1398 				u64 addr, unsigned int size_order, u64 type)
1399 {
1400 	int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1401 	u64 val = 0, val_iva = 0;
1402 	unsigned long flag;
1403 
1404 	switch (type) {
1405 	case DMA_TLB_GLOBAL_FLUSH:
1406 		/* global flush doesn't need set IVA_REG */
1407 		val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1408 		break;
1409 	case DMA_TLB_DSI_FLUSH:
1410 		val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1411 		break;
1412 	case DMA_TLB_PSI_FLUSH:
1413 		val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1414 		/* IH bit is passed in as part of address */
1415 		val_iva = size_order | addr;
1416 		break;
1417 	default:
1418 		BUG();
1419 	}
1420 	/* Note: set drain read/write */
1421 #if 0
1422 	/*
1423 	 * This is probably to be super secure.. Looks like we can
1424 	 * ignore it without any impact.
1425 	 */
1426 	if (cap_read_drain(iommu->cap))
1427 		val |= DMA_TLB_READ_DRAIN;
1428 #endif
1429 	if (cap_write_drain(iommu->cap))
1430 		val |= DMA_TLB_WRITE_DRAIN;
1431 
1432 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1433 	/* Note: Only uses first TLB reg currently */
1434 	if (val_iva)
1435 		dmar_writeq(iommu->reg + tlb_offset, val_iva);
1436 	dmar_writeq(iommu->reg + tlb_offset + 8, val);
1437 
1438 	/* Make sure hardware complete it */
1439 	IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1440 		dmar_readq, (!(val & DMA_TLB_IVT)), val);
1441 
1442 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1443 
1444 	/* check IOTLB invalidation granularity */
1445 	if (DMA_TLB_IAIG(val) == 0)
1446 		pr_err("Flush IOTLB failed\n");
1447 	if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1448 		pr_debug("TLB flush request %Lx, actual %Lx\n",
1449 			(unsigned long long)DMA_TLB_IIRG(type),
1450 			(unsigned long long)DMA_TLB_IAIG(val));
1451 }
1452 
1453 static struct device_domain_info *
iommu_support_dev_iotlb(struct dmar_domain * domain,struct intel_iommu * iommu,u8 bus,u8 devfn)1454 iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1455 			 u8 bus, u8 devfn)
1456 {
1457 	struct device_domain_info *info;
1458 
1459 	assert_spin_locked(&device_domain_lock);
1460 
1461 	if (!iommu->qi)
1462 		return NULL;
1463 
1464 	list_for_each_entry(info, &domain->devices, link)
1465 		if (info->iommu == iommu && info->bus == bus &&
1466 		    info->devfn == devfn) {
1467 			if (info->ats_supported && info->dev)
1468 				return info;
1469 			break;
1470 		}
1471 
1472 	return NULL;
1473 }
1474 
iommu_enable_dev_iotlb(struct device_domain_info * info)1475 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1476 {
1477 	struct pci_dev *pdev;
1478 
1479 	if (!info || !dev_is_pci(info->dev))
1480 		return;
1481 
1482 	pdev = to_pci_dev(info->dev);
1483 	/* For IOMMU that supports device IOTLB throttling (DIT), we assign
1484 	 * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge
1485 	 * queue depth at PF level. If DIT is not set, PFSID will be treated as
1486 	 * reserved, which should be set to 0.
1487 	 */
1488 	if (!ecap_dit(info->iommu->ecap))
1489 		info->pfsid = 0;
1490 	else {
1491 		struct pci_dev *pf_pdev;
1492 
1493 		/* pdev will be returned if device is not a vf */
1494 		pf_pdev = pci_physfn(pdev);
1495 		info->pfsid = PCI_DEVID(pf_pdev->bus->number, pf_pdev->devfn);
1496 	}
1497 
1498 #ifdef CONFIG_INTEL_IOMMU_SVM
1499 	/* The PCIe spec, in its wisdom, declares that the behaviour of
1500 	   the device if you enable PASID support after ATS support is
1501 	   undefined. So always enable PASID support on devices which
1502 	   have it, even if we can't yet know if we're ever going to
1503 	   use it. */
1504 	if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1505 		info->pasid_enabled = 1;
1506 
1507 	if (info->pri_supported && !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
1508 		info->pri_enabled = 1;
1509 #endif
1510 	if (info->ats_supported && !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
1511 		info->ats_enabled = 1;
1512 		info->ats_qdep = pci_ats_queue_depth(pdev);
1513 	}
1514 }
1515 
iommu_disable_dev_iotlb(struct device_domain_info * info)1516 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1517 {
1518 	struct pci_dev *pdev;
1519 
1520 	if (!dev_is_pci(info->dev))
1521 		return;
1522 
1523 	pdev = to_pci_dev(info->dev);
1524 
1525 	if (info->ats_enabled) {
1526 		pci_disable_ats(pdev);
1527 		info->ats_enabled = 0;
1528 	}
1529 #ifdef CONFIG_INTEL_IOMMU_SVM
1530 	if (info->pri_enabled) {
1531 		pci_disable_pri(pdev);
1532 		info->pri_enabled = 0;
1533 	}
1534 	if (info->pasid_enabled) {
1535 		pci_disable_pasid(pdev);
1536 		info->pasid_enabled = 0;
1537 	}
1538 #endif
1539 }
1540 
iommu_flush_dev_iotlb(struct dmar_domain * domain,u64 addr,unsigned mask)1541 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1542 				  u64 addr, unsigned mask)
1543 {
1544 	u16 sid, qdep;
1545 	unsigned long flags;
1546 	struct device_domain_info *info;
1547 
1548 	spin_lock_irqsave(&device_domain_lock, flags);
1549 	list_for_each_entry(info, &domain->devices, link) {
1550 		if (!info->ats_enabled)
1551 			continue;
1552 
1553 		sid = info->bus << 8 | info->devfn;
1554 		qdep = info->ats_qdep;
1555 		qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
1556 				qdep, addr, mask);
1557 	}
1558 	spin_unlock_irqrestore(&device_domain_lock, flags);
1559 }
1560 
iommu_flush_iotlb_psi(struct intel_iommu * iommu,struct dmar_domain * domain,unsigned long pfn,unsigned int pages,int ih,int map)1561 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1562 				  struct dmar_domain *domain,
1563 				  unsigned long pfn, unsigned int pages,
1564 				  int ih, int map)
1565 {
1566 	unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1567 	uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1568 	u16 did = domain->iommu_did[iommu->seq_id];
1569 
1570 	BUG_ON(pages == 0);
1571 
1572 	if (ih)
1573 		ih = 1 << 6;
1574 	/*
1575 	 * Fallback to domain selective flush if no PSI support or the size is
1576 	 * too big.
1577 	 * PSI requires page size to be 2 ^ x, and the base address is naturally
1578 	 * aligned to the size
1579 	 */
1580 	if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1581 		iommu->flush.flush_iotlb(iommu, did, 0, 0,
1582 						DMA_TLB_DSI_FLUSH);
1583 	else
1584 		iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1585 						DMA_TLB_PSI_FLUSH);
1586 
1587 	/*
1588 	 * In caching mode, changes of pages from non-present to present require
1589 	 * flush. However, device IOTLB doesn't need to be flushed in this case.
1590 	 */
1591 	if (!cap_caching_mode(iommu->cap) || !map)
1592 		iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1593 				      addr, mask);
1594 }
1595 
iommu_disable_protect_mem_regions(struct intel_iommu * iommu)1596 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1597 {
1598 	u32 pmen;
1599 	unsigned long flags;
1600 
1601 	if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap))
1602 		return;
1603 
1604 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
1605 	pmen = readl(iommu->reg + DMAR_PMEN_REG);
1606 	pmen &= ~DMA_PMEN_EPM;
1607 	writel(pmen, iommu->reg + DMAR_PMEN_REG);
1608 
1609 	/* wait for the protected region status bit to clear */
1610 	IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1611 		readl, !(pmen & DMA_PMEN_PRS), pmen);
1612 
1613 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1614 }
1615 
iommu_enable_translation(struct intel_iommu * iommu)1616 static void iommu_enable_translation(struct intel_iommu *iommu)
1617 {
1618 	u32 sts;
1619 	unsigned long flags;
1620 
1621 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
1622 	iommu->gcmd |= DMA_GCMD_TE;
1623 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1624 
1625 	/* Make sure hardware complete it */
1626 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1627 		      readl, (sts & DMA_GSTS_TES), sts);
1628 
1629 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1630 }
1631 
iommu_disable_translation(struct intel_iommu * iommu)1632 static void iommu_disable_translation(struct intel_iommu *iommu)
1633 {
1634 	u32 sts;
1635 	unsigned long flag;
1636 
1637 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1638 	iommu->gcmd &= ~DMA_GCMD_TE;
1639 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1640 
1641 	/* Make sure hardware complete it */
1642 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1643 		      readl, (!(sts & DMA_GSTS_TES)), sts);
1644 
1645 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1646 }
1647 
1648 
iommu_init_domains(struct intel_iommu * iommu)1649 static int iommu_init_domains(struct intel_iommu *iommu)
1650 {
1651 	u32 ndomains, nlongs;
1652 	size_t size;
1653 
1654 	ndomains = cap_ndoms(iommu->cap);
1655 	pr_debug("%s: Number of Domains supported <%d>\n",
1656 		 iommu->name, ndomains);
1657 	nlongs = BITS_TO_LONGS(ndomains);
1658 
1659 	spin_lock_init(&iommu->lock);
1660 
1661 	iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1662 	if (!iommu->domain_ids) {
1663 		pr_err("%s: Allocating domain id array failed\n",
1664 		       iommu->name);
1665 		return -ENOMEM;
1666 	}
1667 
1668 	size = ((ndomains >> 8) + 1) * sizeof(struct dmar_domain **);
1669 	iommu->domains = kzalloc(size, GFP_KERNEL);
1670 
1671 	if (iommu->domains) {
1672 		size = 256 * sizeof(struct dmar_domain *);
1673 		iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1674 	}
1675 
1676 	if (!iommu->domains || !iommu->domains[0]) {
1677 		pr_err("%s: Allocating domain array failed\n",
1678 		       iommu->name);
1679 		kfree(iommu->domain_ids);
1680 		kfree(iommu->domains);
1681 		iommu->domain_ids = NULL;
1682 		iommu->domains    = NULL;
1683 		return -ENOMEM;
1684 	}
1685 
1686 
1687 
1688 	/*
1689 	 * If Caching mode is set, then invalid translations are tagged
1690 	 * with domain-id 0, hence we need to pre-allocate it. We also
1691 	 * use domain-id 0 as a marker for non-allocated domain-id, so
1692 	 * make sure it is not used for a real domain.
1693 	 */
1694 	set_bit(0, iommu->domain_ids);
1695 
1696 	return 0;
1697 }
1698 
disable_dmar_iommu(struct intel_iommu * iommu)1699 static void disable_dmar_iommu(struct intel_iommu *iommu)
1700 {
1701 	struct device_domain_info *info, *tmp;
1702 	unsigned long flags;
1703 
1704 	if (!iommu->domains || !iommu->domain_ids)
1705 		return;
1706 
1707 again:
1708 	spin_lock_irqsave(&device_domain_lock, flags);
1709 	list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
1710 		struct dmar_domain *domain;
1711 
1712 		if (info->iommu != iommu)
1713 			continue;
1714 
1715 		if (!info->dev || !info->domain)
1716 			continue;
1717 
1718 		domain = info->domain;
1719 
1720 		__dmar_remove_one_dev_info(info);
1721 
1722 		if (!domain_type_is_vm_or_si(domain)) {
1723 			/*
1724 			 * The domain_exit() function  can't be called under
1725 			 * device_domain_lock, as it takes this lock itself.
1726 			 * So release the lock here and re-run the loop
1727 			 * afterwards.
1728 			 */
1729 			spin_unlock_irqrestore(&device_domain_lock, flags);
1730 			domain_exit(domain);
1731 			goto again;
1732 		}
1733 	}
1734 	spin_unlock_irqrestore(&device_domain_lock, flags);
1735 
1736 	if (iommu->gcmd & DMA_GCMD_TE)
1737 		iommu_disable_translation(iommu);
1738 }
1739 
free_dmar_iommu(struct intel_iommu * iommu)1740 static void free_dmar_iommu(struct intel_iommu *iommu)
1741 {
1742 	if ((iommu->domains) && (iommu->domain_ids)) {
1743 		int elems = (cap_ndoms(iommu->cap) >> 8) + 1;
1744 		int i;
1745 
1746 		for (i = 0; i < elems; i++)
1747 			kfree(iommu->domains[i]);
1748 		kfree(iommu->domains);
1749 		kfree(iommu->domain_ids);
1750 		iommu->domains = NULL;
1751 		iommu->domain_ids = NULL;
1752 	}
1753 
1754 	g_iommus[iommu->seq_id] = NULL;
1755 
1756 	/* free context mapping */
1757 	free_context_table(iommu);
1758 
1759 #ifdef CONFIG_INTEL_IOMMU_SVM
1760 	if (pasid_enabled(iommu)) {
1761 		if (ecap_prs(iommu->ecap))
1762 			intel_svm_finish_prq(iommu);
1763 		intel_svm_free_pasid_tables(iommu);
1764 	}
1765 #endif
1766 }
1767 
alloc_domain(int flags)1768 static struct dmar_domain *alloc_domain(int flags)
1769 {
1770 	struct dmar_domain *domain;
1771 
1772 	domain = alloc_domain_mem();
1773 	if (!domain)
1774 		return NULL;
1775 
1776 	memset(domain, 0, sizeof(*domain));
1777 	domain->nid = -1;
1778 	domain->flags = flags;
1779 	INIT_LIST_HEAD(&domain->devices);
1780 
1781 	return domain;
1782 }
1783 
1784 /* Must be called with iommu->lock */
domain_attach_iommu(struct dmar_domain * domain,struct intel_iommu * iommu)1785 static int domain_attach_iommu(struct dmar_domain *domain,
1786 			       struct intel_iommu *iommu)
1787 {
1788 	unsigned long ndomains;
1789 	int num;
1790 
1791 	assert_spin_locked(&device_domain_lock);
1792 	assert_spin_locked(&iommu->lock);
1793 
1794 	domain->iommu_refcnt[iommu->seq_id] += 1;
1795 	domain->iommu_count += 1;
1796 	if (domain->iommu_refcnt[iommu->seq_id] == 1) {
1797 		ndomains = cap_ndoms(iommu->cap);
1798 		num      = find_first_zero_bit(iommu->domain_ids, ndomains);
1799 
1800 		if (num >= ndomains) {
1801 			pr_err("%s: No free domain ids\n", iommu->name);
1802 			domain->iommu_refcnt[iommu->seq_id] -= 1;
1803 			domain->iommu_count -= 1;
1804 			return -ENOSPC;
1805 		}
1806 
1807 		set_bit(num, iommu->domain_ids);
1808 		set_iommu_domain(iommu, num, domain);
1809 
1810 		domain->iommu_did[iommu->seq_id] = num;
1811 		domain->nid			 = iommu->node;
1812 
1813 		domain_update_iommu_cap(domain);
1814 	}
1815 
1816 	return 0;
1817 }
1818 
domain_detach_iommu(struct dmar_domain * domain,struct intel_iommu * iommu)1819 static int domain_detach_iommu(struct dmar_domain *domain,
1820 			       struct intel_iommu *iommu)
1821 {
1822 	int num, count = INT_MAX;
1823 
1824 	assert_spin_locked(&device_domain_lock);
1825 	assert_spin_locked(&iommu->lock);
1826 
1827 	domain->iommu_refcnt[iommu->seq_id] -= 1;
1828 	count = --domain->iommu_count;
1829 	if (domain->iommu_refcnt[iommu->seq_id] == 0) {
1830 		num = domain->iommu_did[iommu->seq_id];
1831 		clear_bit(num, iommu->domain_ids);
1832 		set_iommu_domain(iommu, num, NULL);
1833 
1834 		domain_update_iommu_cap(domain);
1835 		domain->iommu_did[iommu->seq_id] = 0;
1836 	}
1837 
1838 	return count;
1839 }
1840 
1841 static struct iova_domain reserved_iova_list;
1842 static struct lock_class_key reserved_rbtree_key;
1843 
dmar_init_reserved_ranges(void)1844 static int dmar_init_reserved_ranges(void)
1845 {
1846 	struct pci_dev *pdev = NULL;
1847 	struct iova *iova;
1848 	int i;
1849 
1850 	init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN,
1851 			DMA_32BIT_PFN);
1852 
1853 	lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1854 		&reserved_rbtree_key);
1855 
1856 	/* IOAPIC ranges shouldn't be accessed by DMA */
1857 	iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1858 		IOVA_PFN(IOAPIC_RANGE_END));
1859 	if (!iova) {
1860 		pr_err("Reserve IOAPIC range failed\n");
1861 		return -ENODEV;
1862 	}
1863 
1864 	/* Reserve all PCI MMIO to avoid peer-to-peer access */
1865 	for_each_pci_dev(pdev) {
1866 		struct resource *r;
1867 
1868 		for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1869 			r = &pdev->resource[i];
1870 			if (!r->flags || !(r->flags & IORESOURCE_MEM))
1871 				continue;
1872 			iova = reserve_iova(&reserved_iova_list,
1873 					    IOVA_PFN(r->start),
1874 					    IOVA_PFN(r->end));
1875 			if (!iova) {
1876 				pr_err("Reserve iova failed\n");
1877 				return -ENODEV;
1878 			}
1879 		}
1880 	}
1881 	return 0;
1882 }
1883 
domain_reserve_special_ranges(struct dmar_domain * domain)1884 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1885 {
1886 	copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1887 }
1888 
guestwidth_to_adjustwidth(int gaw)1889 static inline int guestwidth_to_adjustwidth(int gaw)
1890 {
1891 	int agaw;
1892 	int r = (gaw - 12) % 9;
1893 
1894 	if (r == 0)
1895 		agaw = gaw;
1896 	else
1897 		agaw = gaw + 9 - r;
1898 	if (agaw > 64)
1899 		agaw = 64;
1900 	return agaw;
1901 }
1902 
domain_init(struct dmar_domain * domain,struct intel_iommu * iommu,int guest_width)1903 static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
1904 		       int guest_width)
1905 {
1906 	int adjust_width, agaw;
1907 	unsigned long sagaw;
1908 
1909 	init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
1910 			DMA_32BIT_PFN);
1911 	domain_reserve_special_ranges(domain);
1912 
1913 	/* calculate AGAW */
1914 	if (guest_width > cap_mgaw(iommu->cap))
1915 		guest_width = cap_mgaw(iommu->cap);
1916 	domain->gaw = guest_width;
1917 	adjust_width = guestwidth_to_adjustwidth(guest_width);
1918 	agaw = width_to_agaw(adjust_width);
1919 	sagaw = cap_sagaw(iommu->cap);
1920 	if (!test_bit(agaw, &sagaw)) {
1921 		/* hardware doesn't support it, choose a bigger one */
1922 		pr_debug("Hardware doesn't support agaw %d\n", agaw);
1923 		agaw = find_next_bit(&sagaw, 5, agaw);
1924 		if (agaw >= 5)
1925 			return -ENODEV;
1926 	}
1927 	domain->agaw = agaw;
1928 
1929 	if (ecap_coherent(iommu->ecap))
1930 		domain->iommu_coherency = 1;
1931 	else
1932 		domain->iommu_coherency = 0;
1933 
1934 	if (ecap_sc_support(iommu->ecap))
1935 		domain->iommu_snooping = 1;
1936 	else
1937 		domain->iommu_snooping = 0;
1938 
1939 	if (intel_iommu_superpage)
1940 		domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1941 	else
1942 		domain->iommu_superpage = 0;
1943 
1944 	domain->nid = iommu->node;
1945 
1946 	/* always allocate the top pgd */
1947 	domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1948 	if (!domain->pgd)
1949 		return -ENOMEM;
1950 	__iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1951 	return 0;
1952 }
1953 
domain_exit(struct dmar_domain * domain)1954 static void domain_exit(struct dmar_domain *domain)
1955 {
1956 	struct page *freelist = NULL;
1957 
1958 	/* Domain 0 is reserved, so dont process it */
1959 	if (!domain)
1960 		return;
1961 
1962 	/* Flush any lazy unmaps that may reference this domain */
1963 	if (!intel_iommu_strict)
1964 		flush_unmaps_timeout(0);
1965 
1966 	/* Remove associated devices and clear attached or cached domains */
1967 	rcu_read_lock();
1968 	domain_remove_dev_info(domain);
1969 	rcu_read_unlock();
1970 
1971 	/* destroy iovas */
1972 	put_iova_domain(&domain->iovad);
1973 
1974 	freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1975 
1976 	dma_free_pagelist(freelist);
1977 
1978 	free_domain_mem(domain);
1979 }
1980 
domain_context_mapping_one(struct dmar_domain * domain,struct intel_iommu * iommu,u8 bus,u8 devfn)1981 static int domain_context_mapping_one(struct dmar_domain *domain,
1982 				      struct intel_iommu *iommu,
1983 				      u8 bus, u8 devfn)
1984 {
1985 	u16 did = domain->iommu_did[iommu->seq_id];
1986 	int translation = CONTEXT_TT_MULTI_LEVEL;
1987 	struct device_domain_info *info = NULL;
1988 	struct context_entry *context;
1989 	unsigned long flags;
1990 	struct dma_pte *pgd;
1991 	int ret, agaw;
1992 
1993 	WARN_ON(did == 0);
1994 
1995 	if (hw_pass_through && domain_type_is_si(domain))
1996 		translation = CONTEXT_TT_PASS_THROUGH;
1997 
1998 	pr_debug("Set context mapping for %02x:%02x.%d\n",
1999 		bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
2000 
2001 	BUG_ON(!domain->pgd);
2002 
2003 	spin_lock_irqsave(&device_domain_lock, flags);
2004 	spin_lock(&iommu->lock);
2005 
2006 	ret = -ENOMEM;
2007 	context = iommu_context_addr(iommu, bus, devfn, 1);
2008 	if (!context)
2009 		goto out_unlock;
2010 
2011 	ret = 0;
2012 	if (context_present(context))
2013 		goto out_unlock;
2014 
2015 	/*
2016 	 * For kdump cases, old valid entries may be cached due to the
2017 	 * in-flight DMA and copied pgtable, but there is no unmapping
2018 	 * behaviour for them, thus we need an explicit cache flush for
2019 	 * the newly-mapped device. For kdump, at this point, the device
2020 	 * is supposed to finish reset at its driver probe stage, so no
2021 	 * in-flight DMA will exist, and we don't need to worry anymore
2022 	 * hereafter.
2023 	 */
2024 	if (context_copied(context)) {
2025 		u16 did_old = context_domain_id(context);
2026 
2027 		if (did_old >= 0 && did_old < cap_ndoms(iommu->cap)) {
2028 			iommu->flush.flush_context(iommu, did_old,
2029 						   (((u16)bus) << 8) | devfn,
2030 						   DMA_CCMD_MASK_NOBIT,
2031 						   DMA_CCMD_DEVICE_INVL);
2032 			iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
2033 						 DMA_TLB_DSI_FLUSH);
2034 		}
2035 	}
2036 
2037 	pgd = domain->pgd;
2038 
2039 	context_clear_entry(context);
2040 	context_set_domain_id(context, did);
2041 
2042 	/*
2043 	 * Skip top levels of page tables for iommu which has less agaw
2044 	 * than default.  Unnecessary for PT mode.
2045 	 */
2046 	if (translation != CONTEXT_TT_PASS_THROUGH) {
2047 		for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
2048 			ret = -ENOMEM;
2049 			pgd = phys_to_virt(dma_pte_addr(pgd));
2050 			if (!dma_pte_present(pgd))
2051 				goto out_unlock;
2052 		}
2053 
2054 		info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2055 		if (info && info->ats_supported)
2056 			translation = CONTEXT_TT_DEV_IOTLB;
2057 		else
2058 			translation = CONTEXT_TT_MULTI_LEVEL;
2059 
2060 		context_set_address_root(context, virt_to_phys(pgd));
2061 		context_set_address_width(context, agaw);
2062 	} else {
2063 		/*
2064 		 * In pass through mode, AW must be programmed to
2065 		 * indicate the largest AGAW value supported by
2066 		 * hardware. And ASR is ignored by hardware.
2067 		 */
2068 		context_set_address_width(context, iommu->msagaw);
2069 	}
2070 
2071 	context_set_translation_type(context, translation);
2072 	context_set_fault_enable(context);
2073 	context_set_present(context);
2074 	domain_flush_cache(domain, context, sizeof(*context));
2075 
2076 	/*
2077 	 * It's a non-present to present mapping. If hardware doesn't cache
2078 	 * non-present entry we only need to flush the write-buffer. If the
2079 	 * _does_ cache non-present entries, then it does so in the special
2080 	 * domain #0, which we have to flush:
2081 	 */
2082 	if (cap_caching_mode(iommu->cap)) {
2083 		iommu->flush.flush_context(iommu, 0,
2084 					   (((u16)bus) << 8) | devfn,
2085 					   DMA_CCMD_MASK_NOBIT,
2086 					   DMA_CCMD_DEVICE_INVL);
2087 		iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
2088 	} else {
2089 		iommu_flush_write_buffer(iommu);
2090 	}
2091 	iommu_enable_dev_iotlb(info);
2092 
2093 	ret = 0;
2094 
2095 out_unlock:
2096 	spin_unlock(&iommu->lock);
2097 	spin_unlock_irqrestore(&device_domain_lock, flags);
2098 
2099 	return ret;
2100 }
2101 
2102 struct domain_context_mapping_data {
2103 	struct dmar_domain *domain;
2104 	struct intel_iommu *iommu;
2105 };
2106 
domain_context_mapping_cb(struct pci_dev * pdev,u16 alias,void * opaque)2107 static int domain_context_mapping_cb(struct pci_dev *pdev,
2108 				     u16 alias, void *opaque)
2109 {
2110 	struct domain_context_mapping_data *data = opaque;
2111 
2112 	return domain_context_mapping_one(data->domain, data->iommu,
2113 					  PCI_BUS_NUM(alias), alias & 0xff);
2114 }
2115 
2116 static int
domain_context_mapping(struct dmar_domain * domain,struct device * dev)2117 domain_context_mapping(struct dmar_domain *domain, struct device *dev)
2118 {
2119 	struct intel_iommu *iommu;
2120 	u8 bus, devfn;
2121 	struct domain_context_mapping_data data;
2122 
2123 	iommu = device_to_iommu(dev, &bus, &devfn);
2124 	if (!iommu)
2125 		return -ENODEV;
2126 
2127 	if (!dev_is_pci(dev))
2128 		return domain_context_mapping_one(domain, iommu, bus, devfn);
2129 
2130 	data.domain = domain;
2131 	data.iommu = iommu;
2132 
2133 	return pci_for_each_dma_alias(to_pci_dev(dev),
2134 				      &domain_context_mapping_cb, &data);
2135 }
2136 
domain_context_mapped_cb(struct pci_dev * pdev,u16 alias,void * opaque)2137 static int domain_context_mapped_cb(struct pci_dev *pdev,
2138 				    u16 alias, void *opaque)
2139 {
2140 	struct intel_iommu *iommu = opaque;
2141 
2142 	return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
2143 }
2144 
domain_context_mapped(struct device * dev)2145 static int domain_context_mapped(struct device *dev)
2146 {
2147 	struct intel_iommu *iommu;
2148 	u8 bus, devfn;
2149 
2150 	iommu = device_to_iommu(dev, &bus, &devfn);
2151 	if (!iommu)
2152 		return -ENODEV;
2153 
2154 	if (!dev_is_pci(dev))
2155 		return device_context_mapped(iommu, bus, devfn);
2156 
2157 	return !pci_for_each_dma_alias(to_pci_dev(dev),
2158 				       domain_context_mapped_cb, iommu);
2159 }
2160 
2161 /* Returns a number of VTD pages, but aligned to MM page size */
aligned_nrpages(unsigned long host_addr,size_t size)2162 static inline unsigned long aligned_nrpages(unsigned long host_addr,
2163 					    size_t size)
2164 {
2165 	host_addr &= ~PAGE_MASK;
2166 	return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2167 }
2168 
2169 /* Return largest possible superpage level for a given mapping */
hardware_largepage_caps(struct dmar_domain * domain,unsigned long iov_pfn,unsigned long phy_pfn,unsigned long pages)2170 static inline int hardware_largepage_caps(struct dmar_domain *domain,
2171 					  unsigned long iov_pfn,
2172 					  unsigned long phy_pfn,
2173 					  unsigned long pages)
2174 {
2175 	int support, level = 1;
2176 	unsigned long pfnmerge;
2177 
2178 	support = domain->iommu_superpage;
2179 
2180 	/* To use a large page, the virtual *and* physical addresses
2181 	   must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2182 	   of them will mean we have to use smaller pages. So just
2183 	   merge them and check both at once. */
2184 	pfnmerge = iov_pfn | phy_pfn;
2185 
2186 	while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2187 		pages >>= VTD_STRIDE_SHIFT;
2188 		if (!pages)
2189 			break;
2190 		pfnmerge >>= VTD_STRIDE_SHIFT;
2191 		level++;
2192 		support--;
2193 	}
2194 	return level;
2195 }
2196 
__domain_mapping(struct dmar_domain * domain,unsigned long iov_pfn,struct scatterlist * sg,unsigned long phys_pfn,unsigned long nr_pages,int prot)2197 static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2198 			    struct scatterlist *sg, unsigned long phys_pfn,
2199 			    unsigned long nr_pages, int prot)
2200 {
2201 	struct dma_pte *first_pte = NULL, *pte = NULL;
2202 	phys_addr_t uninitialized_var(pteval);
2203 	unsigned long sg_res = 0;
2204 	unsigned int largepage_lvl = 0;
2205 	unsigned long lvl_pages = 0;
2206 
2207 	BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
2208 
2209 	if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2210 		return -EINVAL;
2211 
2212 	prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
2213 
2214 	if (!sg) {
2215 		sg_res = nr_pages;
2216 		pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2217 	}
2218 
2219 	while (nr_pages > 0) {
2220 		uint64_t tmp;
2221 
2222 		if (!sg_res) {
2223 			unsigned int pgoff = sg->offset & ~PAGE_MASK;
2224 
2225 			sg_res = aligned_nrpages(sg->offset, sg->length);
2226 			sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff;
2227 			sg->dma_length = sg->length;
2228 			pteval = (sg_phys(sg) - pgoff) | prot;
2229 			phys_pfn = pteval >> VTD_PAGE_SHIFT;
2230 		}
2231 
2232 		if (!pte) {
2233 			largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2234 
2235 			first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
2236 			if (!pte)
2237 				return -ENOMEM;
2238 			/* It is large page*/
2239 			if (largepage_lvl > 1) {
2240 				unsigned long nr_superpages, end_pfn;
2241 
2242 				pteval |= DMA_PTE_LARGE_PAGE;
2243 				lvl_pages = lvl_to_nr_pages(largepage_lvl);
2244 
2245 				nr_superpages = sg_res / lvl_pages;
2246 				end_pfn = iov_pfn + nr_superpages * lvl_pages - 1;
2247 
2248 				/*
2249 				 * Ensure that old small page tables are
2250 				 * removed to make room for superpage(s).
2251 				 */
2252 				dma_pte_free_pagetable(domain, iov_pfn, end_pfn);
2253 			} else {
2254 				pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
2255 			}
2256 
2257 		}
2258 		/* We don't need lock here, nobody else
2259 		 * touches the iova range
2260 		 */
2261 		tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
2262 		if (tmp) {
2263 			static int dumps = 5;
2264 			pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2265 				iov_pfn, tmp, (unsigned long long)pteval);
2266 			if (dumps) {
2267 				dumps--;
2268 				debug_dma_dump_mappings(NULL);
2269 			}
2270 			WARN_ON(1);
2271 		}
2272 
2273 		lvl_pages = lvl_to_nr_pages(largepage_lvl);
2274 
2275 		BUG_ON(nr_pages < lvl_pages);
2276 		BUG_ON(sg_res < lvl_pages);
2277 
2278 		nr_pages -= lvl_pages;
2279 		iov_pfn += lvl_pages;
2280 		phys_pfn += lvl_pages;
2281 		pteval += lvl_pages * VTD_PAGE_SIZE;
2282 		sg_res -= lvl_pages;
2283 
2284 		/* If the next PTE would be the first in a new page, then we
2285 		   need to flush the cache on the entries we've just written.
2286 		   And then we'll need to recalculate 'pte', so clear it and
2287 		   let it get set again in the if (!pte) block above.
2288 
2289 		   If we're done (!nr_pages) we need to flush the cache too.
2290 
2291 		   Also if we've been setting superpages, we may need to
2292 		   recalculate 'pte' and switch back to smaller pages for the
2293 		   end of the mapping, if the trailing size is not enough to
2294 		   use another superpage (i.e. sg_res < lvl_pages). */
2295 		pte++;
2296 		if (!nr_pages || first_pte_in_page(pte) ||
2297 		    (largepage_lvl > 1 && sg_res < lvl_pages)) {
2298 			domain_flush_cache(domain, first_pte,
2299 					   (void *)pte - (void *)first_pte);
2300 			pte = NULL;
2301 		}
2302 
2303 		if (!sg_res && nr_pages)
2304 			sg = sg_next(sg);
2305 	}
2306 	return 0;
2307 }
2308 
domain_sg_mapping(struct dmar_domain * domain,unsigned long iov_pfn,struct scatterlist * sg,unsigned long nr_pages,int prot)2309 static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2310 				    struct scatterlist *sg, unsigned long nr_pages,
2311 				    int prot)
2312 {
2313 	return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2314 }
2315 
domain_pfn_mapping(struct dmar_domain * domain,unsigned long iov_pfn,unsigned long phys_pfn,unsigned long nr_pages,int prot)2316 static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2317 				     unsigned long phys_pfn, unsigned long nr_pages,
2318 				     int prot)
2319 {
2320 	return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
2321 }
2322 
domain_context_clear_one(struct intel_iommu * iommu,u8 bus,u8 devfn)2323 static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
2324 {
2325 	if (!iommu)
2326 		return;
2327 
2328 	clear_context_table(iommu, bus, devfn);
2329 	iommu->flush.flush_context(iommu, 0, 0, 0,
2330 					   DMA_CCMD_GLOBAL_INVL);
2331 	iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2332 }
2333 
unlink_domain_info(struct device_domain_info * info)2334 static inline void unlink_domain_info(struct device_domain_info *info)
2335 {
2336 	assert_spin_locked(&device_domain_lock);
2337 	list_del(&info->link);
2338 	list_del(&info->global);
2339 	if (info->dev)
2340 		info->dev->archdata.iommu = NULL;
2341 }
2342 
domain_remove_dev_info(struct dmar_domain * domain)2343 static void domain_remove_dev_info(struct dmar_domain *domain)
2344 {
2345 	struct device_domain_info *info, *tmp;
2346 	unsigned long flags;
2347 
2348 	spin_lock_irqsave(&device_domain_lock, flags);
2349 	list_for_each_entry_safe(info, tmp, &domain->devices, link)
2350 		__dmar_remove_one_dev_info(info);
2351 	spin_unlock_irqrestore(&device_domain_lock, flags);
2352 }
2353 
2354 /*
2355  * find_domain
2356  * Note: we use struct device->archdata.iommu stores the info
2357  */
find_domain(struct device * dev)2358 static struct dmar_domain *find_domain(struct device *dev)
2359 {
2360 	struct device_domain_info *info;
2361 
2362 	/* No lock here, assumes no domain exit in normal case */
2363 	info = dev->archdata.iommu;
2364 	if (info)
2365 		return info->domain;
2366 	return NULL;
2367 }
2368 
2369 static inline struct device_domain_info *
dmar_search_domain_by_dev_info(int segment,int bus,int devfn)2370 dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2371 {
2372 	struct device_domain_info *info;
2373 
2374 	list_for_each_entry(info, &device_domain_list, global)
2375 		if (info->iommu->segment == segment && info->bus == bus &&
2376 		    info->devfn == devfn)
2377 			return info;
2378 
2379 	return NULL;
2380 }
2381 
dmar_insert_one_dev_info(struct intel_iommu * iommu,int bus,int devfn,struct device * dev,struct dmar_domain * domain)2382 static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2383 						    int bus, int devfn,
2384 						    struct device *dev,
2385 						    struct dmar_domain *domain)
2386 {
2387 	struct dmar_domain *found = NULL;
2388 	struct device_domain_info *info;
2389 	unsigned long flags;
2390 	int ret;
2391 
2392 	info = alloc_devinfo_mem();
2393 	if (!info)
2394 		return NULL;
2395 
2396 	info->bus = bus;
2397 	info->devfn = devfn;
2398 	info->ats_supported = info->pasid_supported = info->pri_supported = 0;
2399 	info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
2400 	info->ats_qdep = 0;
2401 	info->dev = dev;
2402 	info->domain = domain;
2403 	info->iommu = iommu;
2404 
2405 	if (dev && dev_is_pci(dev)) {
2406 		struct pci_dev *pdev = to_pci_dev(info->dev);
2407 
2408 		if (ecap_dev_iotlb_support(iommu->ecap) &&
2409 		    pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) &&
2410 		    dmar_find_matched_atsr_unit(pdev))
2411 			info->ats_supported = 1;
2412 
2413 		if (ecs_enabled(iommu)) {
2414 			if (pasid_enabled(iommu)) {
2415 				int features = pci_pasid_features(pdev);
2416 				if (features >= 0)
2417 					info->pasid_supported = features | 1;
2418 			}
2419 
2420 			if (info->ats_supported && ecap_prs(iommu->ecap) &&
2421 			    pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
2422 				info->pri_supported = 1;
2423 		}
2424 	}
2425 
2426 	spin_lock_irqsave(&device_domain_lock, flags);
2427 	if (dev)
2428 		found = find_domain(dev);
2429 
2430 	if (!found) {
2431 		struct device_domain_info *info2;
2432 		info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
2433 		if (info2) {
2434 			found      = info2->domain;
2435 			info2->dev = dev;
2436 		}
2437 	}
2438 
2439 	if (found) {
2440 		spin_unlock_irqrestore(&device_domain_lock, flags);
2441 		free_devinfo_mem(info);
2442 		/* Caller must free the original domain */
2443 		return found;
2444 	}
2445 
2446 	spin_lock(&iommu->lock);
2447 	ret = domain_attach_iommu(domain, iommu);
2448 	spin_unlock(&iommu->lock);
2449 
2450 	if (ret) {
2451 		spin_unlock_irqrestore(&device_domain_lock, flags);
2452 		free_devinfo_mem(info);
2453 		return NULL;
2454 	}
2455 
2456 	list_add(&info->link, &domain->devices);
2457 	list_add(&info->global, &device_domain_list);
2458 	if (dev)
2459 		dev->archdata.iommu = info;
2460 	spin_unlock_irqrestore(&device_domain_lock, flags);
2461 
2462 	if (dev && domain_context_mapping(domain, dev)) {
2463 		pr_err("Domain context map for %s failed\n", dev_name(dev));
2464 		dmar_remove_one_dev_info(domain, dev);
2465 		return NULL;
2466 	}
2467 
2468 	return domain;
2469 }
2470 
get_last_alias(struct pci_dev * pdev,u16 alias,void * opaque)2471 static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2472 {
2473 	*(u16 *)opaque = alias;
2474 	return 0;
2475 }
2476 
2477 /* domain is initialized */
get_domain_for_dev(struct device * dev,int gaw)2478 static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
2479 {
2480 	struct device_domain_info *info = NULL;
2481 	struct dmar_domain *domain, *tmp;
2482 	struct intel_iommu *iommu;
2483 	u16 req_id, dma_alias;
2484 	unsigned long flags;
2485 	u8 bus, devfn;
2486 
2487 	domain = find_domain(dev);
2488 	if (domain)
2489 		return domain;
2490 
2491 	iommu = device_to_iommu(dev, &bus, &devfn);
2492 	if (!iommu)
2493 		return NULL;
2494 
2495 	req_id = ((u16)bus << 8) | devfn;
2496 
2497 	if (dev_is_pci(dev)) {
2498 		struct pci_dev *pdev = to_pci_dev(dev);
2499 
2500 		pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2501 
2502 		spin_lock_irqsave(&device_domain_lock, flags);
2503 		info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2504 						      PCI_BUS_NUM(dma_alias),
2505 						      dma_alias & 0xff);
2506 		if (info) {
2507 			iommu = info->iommu;
2508 			domain = info->domain;
2509 		}
2510 		spin_unlock_irqrestore(&device_domain_lock, flags);
2511 
2512 		/* DMA alias already has a domain, uses it */
2513 		if (info)
2514 			goto found_domain;
2515 	}
2516 
2517 	/* Allocate and initialize new domain for the device */
2518 	domain = alloc_domain(0);
2519 	if (!domain)
2520 		return NULL;
2521 	if (domain_init(domain, iommu, gaw)) {
2522 		domain_exit(domain);
2523 		return NULL;
2524 	}
2525 
2526 	/* register PCI DMA alias device */
2527 	if (req_id != dma_alias && dev_is_pci(dev)) {
2528 		tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2529 					       dma_alias & 0xff, NULL, domain);
2530 
2531 		if (!tmp || tmp != domain) {
2532 			domain_exit(domain);
2533 			domain = tmp;
2534 		}
2535 
2536 		if (!domain)
2537 			return NULL;
2538 	}
2539 
2540 found_domain:
2541 	tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
2542 
2543 	if (!tmp || tmp != domain) {
2544 		domain_exit(domain);
2545 		domain = tmp;
2546 	}
2547 
2548 	return domain;
2549 }
2550 
iommu_domain_identity_map(struct dmar_domain * domain,unsigned long long start,unsigned long long end)2551 static int iommu_domain_identity_map(struct dmar_domain *domain,
2552 				     unsigned long long start,
2553 				     unsigned long long end)
2554 {
2555 	unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2556 	unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2557 
2558 	if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2559 			  dma_to_mm_pfn(last_vpfn))) {
2560 		pr_err("Reserving iova failed\n");
2561 		return -ENOMEM;
2562 	}
2563 
2564 	pr_debug("Mapping reserved region %llx-%llx\n", start, end);
2565 	/*
2566 	 * RMRR range might have overlap with physical memory range,
2567 	 * clear it first
2568 	 */
2569 	dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2570 
2571 	return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2572 				  last_vpfn - first_vpfn + 1,
2573 				  DMA_PTE_READ|DMA_PTE_WRITE);
2574 }
2575 
domain_prepare_identity_map(struct device * dev,struct dmar_domain * domain,unsigned long long start,unsigned long long end)2576 static int domain_prepare_identity_map(struct device *dev,
2577 				       struct dmar_domain *domain,
2578 				       unsigned long long start,
2579 				       unsigned long long end)
2580 {
2581 	/* For _hardware_ passthrough, don't bother. But for software
2582 	   passthrough, we do it anyway -- it may indicate a memory
2583 	   range which is reserved in E820, so which didn't get set
2584 	   up to start with in si_domain */
2585 	if (domain == si_domain && hw_pass_through) {
2586 		pr_warn("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2587 			dev_name(dev), start, end);
2588 		return 0;
2589 	}
2590 
2591 	pr_info("Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2592 		dev_name(dev), start, end);
2593 
2594 	if (end < start) {
2595 		WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2596 			"BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2597 			dmi_get_system_info(DMI_BIOS_VENDOR),
2598 			dmi_get_system_info(DMI_BIOS_VERSION),
2599 		     dmi_get_system_info(DMI_PRODUCT_VERSION));
2600 		return -EIO;
2601 	}
2602 
2603 	if (end >> agaw_to_width(domain->agaw)) {
2604 		WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2605 		     "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2606 		     agaw_to_width(domain->agaw),
2607 		     dmi_get_system_info(DMI_BIOS_VENDOR),
2608 		     dmi_get_system_info(DMI_BIOS_VERSION),
2609 		     dmi_get_system_info(DMI_PRODUCT_VERSION));
2610 		return -EIO;
2611 	}
2612 
2613 	return iommu_domain_identity_map(domain, start, end);
2614 }
2615 
iommu_prepare_identity_map(struct device * dev,unsigned long long start,unsigned long long end)2616 static int iommu_prepare_identity_map(struct device *dev,
2617 				      unsigned long long start,
2618 				      unsigned long long end)
2619 {
2620 	struct dmar_domain *domain;
2621 	int ret;
2622 
2623 	domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2624 	if (!domain)
2625 		return -ENOMEM;
2626 
2627 	ret = domain_prepare_identity_map(dev, domain, start, end);
2628 	if (ret)
2629 		domain_exit(domain);
2630 
2631 	return ret;
2632 }
2633 
iommu_prepare_rmrr_dev(struct dmar_rmrr_unit * rmrr,struct device * dev)2634 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2635 					 struct device *dev)
2636 {
2637 	if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2638 		return 0;
2639 	return iommu_prepare_identity_map(dev, rmrr->base_address,
2640 					  rmrr->end_address);
2641 }
2642 
2643 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
iommu_prepare_isa(void)2644 static inline void iommu_prepare_isa(void)
2645 {
2646 	struct pci_dev *pdev;
2647 	int ret;
2648 
2649 	pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2650 	if (!pdev)
2651 		return;
2652 
2653 	pr_info("Prepare 0-16MiB unity mapping for LPC\n");
2654 	ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
2655 
2656 	if (ret)
2657 		pr_err("Failed to create 0-16MiB identity map - floppy might not work\n");
2658 
2659 	pci_dev_put(pdev);
2660 }
2661 #else
iommu_prepare_isa(void)2662 static inline void iommu_prepare_isa(void)
2663 {
2664 	return;
2665 }
2666 #endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
2667 
2668 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2669 
si_domain_init(int hw)2670 static int __init si_domain_init(int hw)
2671 {
2672 	int nid, ret = 0;
2673 
2674 	si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2675 	if (!si_domain)
2676 		return -EFAULT;
2677 
2678 	if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2679 		domain_exit(si_domain);
2680 		return -EFAULT;
2681 	}
2682 
2683 	pr_debug("Identity mapping domain allocated\n");
2684 
2685 	if (hw)
2686 		return 0;
2687 
2688 	for_each_online_node(nid) {
2689 		unsigned long start_pfn, end_pfn;
2690 		int i;
2691 
2692 		for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2693 			ret = iommu_domain_identity_map(si_domain,
2694 					PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2695 			if (ret)
2696 				return ret;
2697 		}
2698 	}
2699 
2700 	return 0;
2701 }
2702 
identity_mapping(struct device * dev)2703 static int identity_mapping(struct device *dev)
2704 {
2705 	struct device_domain_info *info;
2706 
2707 	if (likely(!iommu_identity_mapping))
2708 		return 0;
2709 
2710 	info = dev->archdata.iommu;
2711 	if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2712 		return (info->domain == si_domain);
2713 
2714 	return 0;
2715 }
2716 
domain_add_dev_info(struct dmar_domain * domain,struct device * dev)2717 static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2718 {
2719 	struct dmar_domain *ndomain;
2720 	struct intel_iommu *iommu;
2721 	u8 bus, devfn;
2722 
2723 	iommu = device_to_iommu(dev, &bus, &devfn);
2724 	if (!iommu)
2725 		return -ENODEV;
2726 
2727 	ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
2728 	if (ndomain != domain)
2729 		return -EBUSY;
2730 
2731 	return 0;
2732 }
2733 
device_has_rmrr(struct device * dev)2734 static bool device_has_rmrr(struct device *dev)
2735 {
2736 	struct dmar_rmrr_unit *rmrr;
2737 	struct device *tmp;
2738 	int i;
2739 
2740 	rcu_read_lock();
2741 	for_each_rmrr_units(rmrr) {
2742 		/*
2743 		 * Return TRUE if this RMRR contains the device that
2744 		 * is passed in.
2745 		 */
2746 		for_each_active_dev_scope(rmrr->devices,
2747 					  rmrr->devices_cnt, i, tmp)
2748 			if (tmp == dev) {
2749 				rcu_read_unlock();
2750 				return true;
2751 			}
2752 	}
2753 	rcu_read_unlock();
2754 	return false;
2755 }
2756 
2757 /*
2758  * There are a couple cases where we need to restrict the functionality of
2759  * devices associated with RMRRs.  The first is when evaluating a device for
2760  * identity mapping because problems exist when devices are moved in and out
2761  * of domains and their respective RMRR information is lost.  This means that
2762  * a device with associated RMRRs will never be in a "passthrough" domain.
2763  * The second is use of the device through the IOMMU API.  This interface
2764  * expects to have full control of the IOVA space for the device.  We cannot
2765  * satisfy both the requirement that RMRR access is maintained and have an
2766  * unencumbered IOVA space.  We also have no ability to quiesce the device's
2767  * use of the RMRR space or even inform the IOMMU API user of the restriction.
2768  * We therefore prevent devices associated with an RMRR from participating in
2769  * the IOMMU API, which eliminates them from device assignment.
2770  *
2771  * In both cases we assume that PCI USB devices with RMRRs have them largely
2772  * for historical reasons and that the RMRR space is not actively used post
2773  * boot.  This exclusion may change if vendors begin to abuse it.
2774  *
2775  * The same exception is made for graphics devices, with the requirement that
2776  * any use of the RMRR regions will be torn down before assigning the device
2777  * to a guest.
2778  */
device_is_rmrr_locked(struct device * dev)2779 static bool device_is_rmrr_locked(struct device *dev)
2780 {
2781 	if (!device_has_rmrr(dev))
2782 		return false;
2783 
2784 	if (dev_is_pci(dev)) {
2785 		struct pci_dev *pdev = to_pci_dev(dev);
2786 
2787 		if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
2788 			return false;
2789 	}
2790 
2791 	return true;
2792 }
2793 
iommu_should_identity_map(struct device * dev,int startup)2794 static int iommu_should_identity_map(struct device *dev, int startup)
2795 {
2796 
2797 	if (dev_is_pci(dev)) {
2798 		struct pci_dev *pdev = to_pci_dev(dev);
2799 
2800 		if (device_is_rmrr_locked(dev))
2801 			return 0;
2802 
2803 		if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2804 			return 1;
2805 
2806 		if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2807 			return 1;
2808 
2809 		if (!(iommu_identity_mapping & IDENTMAP_ALL))
2810 			return 0;
2811 
2812 		/*
2813 		 * We want to start off with all devices in the 1:1 domain, and
2814 		 * take them out later if we find they can't access all of memory.
2815 		 *
2816 		 * However, we can't do this for PCI devices behind bridges,
2817 		 * because all PCI devices behind the same bridge will end up
2818 		 * with the same source-id on their transactions.
2819 		 *
2820 		 * Practically speaking, we can't change things around for these
2821 		 * devices at run-time, because we can't be sure there'll be no
2822 		 * DMA transactions in flight for any of their siblings.
2823 		 *
2824 		 * So PCI devices (unless they're on the root bus) as well as
2825 		 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2826 		 * the 1:1 domain, just in _case_ one of their siblings turns out
2827 		 * not to be able to map all of memory.
2828 		 */
2829 		if (!pci_is_pcie(pdev)) {
2830 			if (!pci_is_root_bus(pdev->bus))
2831 				return 0;
2832 			if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2833 				return 0;
2834 		} else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
2835 			return 0;
2836 	} else {
2837 		if (device_has_rmrr(dev))
2838 			return 0;
2839 	}
2840 
2841 	/*
2842 	 * At boot time, we don't yet know if devices will be 64-bit capable.
2843 	 * Assume that they will — if they turn out not to be, then we can
2844 	 * take them out of the 1:1 domain later.
2845 	 */
2846 	if (!startup) {
2847 		/*
2848 		 * If the device's dma_mask is less than the system's memory
2849 		 * size then this is not a candidate for identity mapping.
2850 		 */
2851 		u64 dma_mask = *dev->dma_mask;
2852 
2853 		if (dev->coherent_dma_mask &&
2854 		    dev->coherent_dma_mask < dma_mask)
2855 			dma_mask = dev->coherent_dma_mask;
2856 
2857 		return dma_mask >= dma_get_required_mask(dev);
2858 	}
2859 
2860 	return 1;
2861 }
2862 
dev_prepare_static_identity_mapping(struct device * dev,int hw)2863 static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
2864 {
2865 	int ret;
2866 
2867 	if (!iommu_should_identity_map(dev, 1))
2868 		return 0;
2869 
2870 	ret = domain_add_dev_info(si_domain, dev);
2871 	if (!ret)
2872 		pr_info("%s identity mapping for device %s\n",
2873 			hw ? "Hardware" : "Software", dev_name(dev));
2874 	else if (ret == -ENODEV)
2875 		/* device not associated with an iommu */
2876 		ret = 0;
2877 
2878 	return ret;
2879 }
2880 
2881 
iommu_prepare_static_identity_mapping(int hw)2882 static int __init iommu_prepare_static_identity_mapping(int hw)
2883 {
2884 	struct pci_dev *pdev = NULL;
2885 	struct dmar_drhd_unit *drhd;
2886 	struct intel_iommu *iommu;
2887 	struct device *dev;
2888 	int i;
2889 	int ret = 0;
2890 
2891 	for_each_pci_dev(pdev) {
2892 		ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
2893 		if (ret)
2894 			return ret;
2895 	}
2896 
2897 	for_each_active_iommu(iommu, drhd)
2898 		for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
2899 			struct acpi_device_physical_node *pn;
2900 			struct acpi_device *adev;
2901 
2902 			if (dev->bus != &acpi_bus_type)
2903 				continue;
2904 
2905 			adev= to_acpi_device(dev);
2906 			mutex_lock(&adev->physical_node_lock);
2907 			list_for_each_entry(pn, &adev->physical_node_list, node) {
2908 				ret = dev_prepare_static_identity_mapping(pn->dev, hw);
2909 				if (ret)
2910 					break;
2911 			}
2912 			mutex_unlock(&adev->physical_node_lock);
2913 			if (ret)
2914 				return ret;
2915 		}
2916 
2917 	return 0;
2918 }
2919 
intel_iommu_init_qi(struct intel_iommu * iommu)2920 static void intel_iommu_init_qi(struct intel_iommu *iommu)
2921 {
2922 	/*
2923 	 * Start from the sane iommu hardware state.
2924 	 * If the queued invalidation is already initialized by us
2925 	 * (for example, while enabling interrupt-remapping) then
2926 	 * we got the things already rolling from a sane state.
2927 	 */
2928 	if (!iommu->qi) {
2929 		/*
2930 		 * Clear any previous faults.
2931 		 */
2932 		dmar_fault(-1, iommu);
2933 		/*
2934 		 * Disable queued invalidation if supported and already enabled
2935 		 * before OS handover.
2936 		 */
2937 		dmar_disable_qi(iommu);
2938 	}
2939 
2940 	if (dmar_enable_qi(iommu)) {
2941 		/*
2942 		 * Queued Invalidate not enabled, use Register Based Invalidate
2943 		 */
2944 		iommu->flush.flush_context = __iommu_flush_context;
2945 		iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2946 		pr_info("%s: Using Register based invalidation\n",
2947 			iommu->name);
2948 	} else {
2949 		iommu->flush.flush_context = qi_flush_context;
2950 		iommu->flush.flush_iotlb = qi_flush_iotlb;
2951 		pr_info("%s: Using Queued invalidation\n", iommu->name);
2952 	}
2953 }
2954 
copy_context_table(struct intel_iommu * iommu,struct root_entry * old_re,struct context_entry ** tbl,int bus,bool ext)2955 static int copy_context_table(struct intel_iommu *iommu,
2956 			      struct root_entry *old_re,
2957 			      struct context_entry **tbl,
2958 			      int bus, bool ext)
2959 {
2960 	int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
2961 	struct context_entry *new_ce = NULL, ce;
2962 	struct context_entry *old_ce = NULL;
2963 	struct root_entry re;
2964 	phys_addr_t old_ce_phys;
2965 
2966 	tbl_idx = ext ? bus * 2 : bus;
2967 	memcpy(&re, old_re, sizeof(re));
2968 
2969 	for (devfn = 0; devfn < 256; devfn++) {
2970 		/* First calculate the correct index */
2971 		idx = (ext ? devfn * 2 : devfn) % 256;
2972 
2973 		if (idx == 0) {
2974 			/* First save what we may have and clean up */
2975 			if (new_ce) {
2976 				tbl[tbl_idx] = new_ce;
2977 				__iommu_flush_cache(iommu, new_ce,
2978 						    VTD_PAGE_SIZE);
2979 				pos = 1;
2980 			}
2981 
2982 			if (old_ce)
2983 				memunmap(old_ce);
2984 
2985 			ret = 0;
2986 			if (devfn < 0x80)
2987 				old_ce_phys = root_entry_lctp(&re);
2988 			else
2989 				old_ce_phys = root_entry_uctp(&re);
2990 
2991 			if (!old_ce_phys) {
2992 				if (ext && devfn == 0) {
2993 					/* No LCTP, try UCTP */
2994 					devfn = 0x7f;
2995 					continue;
2996 				} else {
2997 					goto out;
2998 				}
2999 			}
3000 
3001 			ret = -ENOMEM;
3002 			old_ce = memremap(old_ce_phys, PAGE_SIZE,
3003 					MEMREMAP_WB);
3004 			if (!old_ce)
3005 				goto out;
3006 
3007 			new_ce = alloc_pgtable_page(iommu->node);
3008 			if (!new_ce)
3009 				goto out_unmap;
3010 
3011 			ret = 0;
3012 		}
3013 
3014 		/* Now copy the context entry */
3015 		memcpy(&ce, old_ce + idx, sizeof(ce));
3016 
3017 		if (!__context_present(&ce))
3018 			continue;
3019 
3020 		did = context_domain_id(&ce);
3021 		if (did >= 0 && did < cap_ndoms(iommu->cap))
3022 			set_bit(did, iommu->domain_ids);
3023 
3024 		/*
3025 		 * We need a marker for copied context entries. This
3026 		 * marker needs to work for the old format as well as
3027 		 * for extended context entries.
3028 		 *
3029 		 * Bit 67 of the context entry is used. In the old
3030 		 * format this bit is available to software, in the
3031 		 * extended format it is the PGE bit, but PGE is ignored
3032 		 * by HW if PASIDs are disabled (and thus still
3033 		 * available).
3034 		 *
3035 		 * So disable PASIDs first and then mark the entry
3036 		 * copied. This means that we don't copy PASID
3037 		 * translations from the old kernel, but this is fine as
3038 		 * faults there are not fatal.
3039 		 */
3040 		context_clear_pasid_enable(&ce);
3041 		context_set_copied(&ce);
3042 
3043 		new_ce[idx] = ce;
3044 	}
3045 
3046 	tbl[tbl_idx + pos] = new_ce;
3047 
3048 	__iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
3049 
3050 out_unmap:
3051 	memunmap(old_ce);
3052 
3053 out:
3054 	return ret;
3055 }
3056 
copy_translation_tables(struct intel_iommu * iommu)3057 static int copy_translation_tables(struct intel_iommu *iommu)
3058 {
3059 	struct context_entry **ctxt_tbls;
3060 	struct root_entry *old_rt;
3061 	phys_addr_t old_rt_phys;
3062 	int ctxt_table_entries;
3063 	unsigned long flags;
3064 	u64 rtaddr_reg;
3065 	int bus, ret;
3066 	bool new_ext, ext;
3067 
3068 	rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
3069 	ext        = !!(rtaddr_reg & DMA_RTADDR_RTT);
3070 	new_ext    = !!ecap_ecs(iommu->ecap);
3071 
3072 	/*
3073 	 * The RTT bit can only be changed when translation is disabled,
3074 	 * but disabling translation means to open a window for data
3075 	 * corruption. So bail out and don't copy anything if we would
3076 	 * have to change the bit.
3077 	 */
3078 	if (new_ext != ext)
3079 		return -EINVAL;
3080 
3081 	old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
3082 	if (!old_rt_phys)
3083 		return -EINVAL;
3084 
3085 	old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
3086 	if (!old_rt)
3087 		return -ENOMEM;
3088 
3089 	/* This is too big for the stack - allocate it from slab */
3090 	ctxt_table_entries = ext ? 512 : 256;
3091 	ret = -ENOMEM;
3092 	ctxt_tbls = kzalloc(ctxt_table_entries * sizeof(void *), GFP_KERNEL);
3093 	if (!ctxt_tbls)
3094 		goto out_unmap;
3095 
3096 	for (bus = 0; bus < 256; bus++) {
3097 		ret = copy_context_table(iommu, &old_rt[bus],
3098 					 ctxt_tbls, bus, ext);
3099 		if (ret) {
3100 			pr_err("%s: Failed to copy context table for bus %d\n",
3101 				iommu->name, bus);
3102 			continue;
3103 		}
3104 	}
3105 
3106 	spin_lock_irqsave(&iommu->lock, flags);
3107 
3108 	/* Context tables are copied, now write them to the root_entry table */
3109 	for (bus = 0; bus < 256; bus++) {
3110 		int idx = ext ? bus * 2 : bus;
3111 		u64 val;
3112 
3113 		if (ctxt_tbls[idx]) {
3114 			val = virt_to_phys(ctxt_tbls[idx]) | 1;
3115 			iommu->root_entry[bus].lo = val;
3116 		}
3117 
3118 		if (!ext || !ctxt_tbls[idx + 1])
3119 			continue;
3120 
3121 		val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3122 		iommu->root_entry[bus].hi = val;
3123 	}
3124 
3125 	spin_unlock_irqrestore(&iommu->lock, flags);
3126 
3127 	kfree(ctxt_tbls);
3128 
3129 	__iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3130 
3131 	ret = 0;
3132 
3133 out_unmap:
3134 	memunmap(old_rt);
3135 
3136 	return ret;
3137 }
3138 
init_dmars(void)3139 static int __init init_dmars(void)
3140 {
3141 	struct dmar_drhd_unit *drhd;
3142 	struct dmar_rmrr_unit *rmrr;
3143 	bool copied_tables = false;
3144 	struct device *dev;
3145 	struct intel_iommu *iommu;
3146 	int i, ret;
3147 
3148 	/*
3149 	 * for each drhd
3150 	 *    allocate root
3151 	 *    initialize and program root entry to not present
3152 	 * endfor
3153 	 */
3154 	for_each_drhd_unit(drhd) {
3155 		/*
3156 		 * lock not needed as this is only incremented in the single
3157 		 * threaded kernel __init code path all other access are read
3158 		 * only
3159 		 */
3160 		if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
3161 			g_num_of_iommus++;
3162 			continue;
3163 		}
3164 		pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
3165 	}
3166 
3167 	/* Preallocate enough resources for IOMMU hot-addition */
3168 	if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3169 		g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3170 
3171 	g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3172 			GFP_KERNEL);
3173 	if (!g_iommus) {
3174 		pr_err("Allocating global iommu array failed\n");
3175 		ret = -ENOMEM;
3176 		goto error;
3177 	}
3178 
3179 	deferred_flush = kzalloc(g_num_of_iommus *
3180 		sizeof(struct deferred_flush_tables), GFP_KERNEL);
3181 	if (!deferred_flush) {
3182 		ret = -ENOMEM;
3183 		goto free_g_iommus;
3184 	}
3185 
3186 	for_each_active_iommu(iommu, drhd) {
3187 		g_iommus[iommu->seq_id] = iommu;
3188 
3189 		intel_iommu_init_qi(iommu);
3190 
3191 		ret = iommu_init_domains(iommu);
3192 		if (ret)
3193 			goto free_iommu;
3194 
3195 		init_translation_status(iommu);
3196 
3197 		if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3198 			iommu_disable_translation(iommu);
3199 			clear_translation_pre_enabled(iommu);
3200 			pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3201 				iommu->name);
3202 		}
3203 
3204 		/*
3205 		 * TBD:
3206 		 * we could share the same root & context tables
3207 		 * among all IOMMU's. Need to Split it later.
3208 		 */
3209 		ret = iommu_alloc_root_entry(iommu);
3210 		if (ret)
3211 			goto free_iommu;
3212 
3213 		if (translation_pre_enabled(iommu)) {
3214 			pr_info("Translation already enabled - trying to copy translation structures\n");
3215 
3216 			ret = copy_translation_tables(iommu);
3217 			if (ret) {
3218 				/*
3219 				 * We found the IOMMU with translation
3220 				 * enabled - but failed to copy over the
3221 				 * old root-entry table. Try to proceed
3222 				 * by disabling translation now and
3223 				 * allocating a clean root-entry table.
3224 				 * This might cause DMAR faults, but
3225 				 * probably the dump will still succeed.
3226 				 */
3227 				pr_err("Failed to copy translation tables from previous kernel for %s\n",
3228 				       iommu->name);
3229 				iommu_disable_translation(iommu);
3230 				clear_translation_pre_enabled(iommu);
3231 			} else {
3232 				pr_info("Copied translation tables from previous kernel for %s\n",
3233 					iommu->name);
3234 				copied_tables = true;
3235 			}
3236 		}
3237 
3238 		if (!ecap_pass_through(iommu->ecap))
3239 			hw_pass_through = 0;
3240 #ifdef CONFIG_INTEL_IOMMU_SVM
3241 		if (pasid_enabled(iommu))
3242 			intel_svm_alloc_pasid_tables(iommu);
3243 #endif
3244 	}
3245 
3246 	/*
3247 	 * Now that qi is enabled on all iommus, set the root entry and flush
3248 	 * caches. This is required on some Intel X58 chipsets, otherwise the
3249 	 * flush_context function will loop forever and the boot hangs.
3250 	 */
3251 	for_each_active_iommu(iommu, drhd) {
3252 		iommu_flush_write_buffer(iommu);
3253 		iommu_set_root_entry(iommu);
3254 		iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3255 		iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3256 	}
3257 
3258 	if (iommu_pass_through)
3259 		iommu_identity_mapping |= IDENTMAP_ALL;
3260 
3261 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
3262 	dmar_map_gfx = 0;
3263 #endif
3264 
3265 	if (!dmar_map_gfx)
3266 		iommu_identity_mapping |= IDENTMAP_GFX;
3267 
3268 	check_tylersburg_isoch();
3269 
3270 	if (iommu_identity_mapping) {
3271 		ret = si_domain_init(hw_pass_through);
3272 		if (ret)
3273 			goto free_iommu;
3274 	}
3275 
3276 
3277 	/*
3278 	 * If we copied translations from a previous kernel in the kdump
3279 	 * case, we can not assign the devices to domains now, as that
3280 	 * would eliminate the old mappings. So skip this part and defer
3281 	 * the assignment to device driver initialization time.
3282 	 */
3283 	if (copied_tables)
3284 		goto domains_done;
3285 
3286 	/*
3287 	 * If pass through is not set or not enabled, setup context entries for
3288 	 * identity mappings for rmrr, gfx, and isa and may fall back to static
3289 	 * identity mapping if iommu_identity_mapping is set.
3290 	 */
3291 	if (iommu_identity_mapping) {
3292 		ret = iommu_prepare_static_identity_mapping(hw_pass_through);
3293 		if (ret) {
3294 			pr_crit("Failed to setup IOMMU pass-through\n");
3295 			goto free_iommu;
3296 		}
3297 	}
3298 	/*
3299 	 * For each rmrr
3300 	 *   for each dev attached to rmrr
3301 	 *   do
3302 	 *     locate drhd for dev, alloc domain for dev
3303 	 *     allocate free domain
3304 	 *     allocate page table entries for rmrr
3305 	 *     if context not allocated for bus
3306 	 *           allocate and init context
3307 	 *           set present in root table for this bus
3308 	 *     init context with domain, translation etc
3309 	 *    endfor
3310 	 * endfor
3311 	 */
3312 	pr_info("Setting RMRR:\n");
3313 	for_each_rmrr_units(rmrr) {
3314 		/* some BIOS lists non-exist devices in DMAR table. */
3315 		for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3316 					  i, dev) {
3317 			ret = iommu_prepare_rmrr_dev(rmrr, dev);
3318 			if (ret)
3319 				pr_err("Mapping reserved region failed\n");
3320 		}
3321 	}
3322 
3323 	iommu_prepare_isa();
3324 
3325 domains_done:
3326 
3327 	/*
3328 	 * for each drhd
3329 	 *   enable fault log
3330 	 *   global invalidate context cache
3331 	 *   global invalidate iotlb
3332 	 *   enable translation
3333 	 */
3334 	for_each_iommu(iommu, drhd) {
3335 		if (drhd->ignored) {
3336 			/*
3337 			 * we always have to disable PMRs or DMA may fail on
3338 			 * this device
3339 			 */
3340 			if (force_on)
3341 				iommu_disable_protect_mem_regions(iommu);
3342 			continue;
3343 		}
3344 
3345 		iommu_flush_write_buffer(iommu);
3346 
3347 #ifdef CONFIG_INTEL_IOMMU_SVM
3348 		if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
3349 			ret = intel_svm_enable_prq(iommu);
3350 			if (ret)
3351 				goto free_iommu;
3352 		}
3353 #endif
3354 		ret = dmar_set_interrupt(iommu);
3355 		if (ret)
3356 			goto free_iommu;
3357 
3358 		if (!translation_pre_enabled(iommu))
3359 			iommu_enable_translation(iommu);
3360 
3361 		iommu_disable_protect_mem_regions(iommu);
3362 	}
3363 
3364 	return 0;
3365 
3366 free_iommu:
3367 	for_each_active_iommu(iommu, drhd) {
3368 		disable_dmar_iommu(iommu);
3369 		free_dmar_iommu(iommu);
3370 	}
3371 	kfree(deferred_flush);
3372 free_g_iommus:
3373 	kfree(g_iommus);
3374 error:
3375 	return ret;
3376 }
3377 
3378 /* This takes a number of _MM_ pages, not VTD pages */
intel_alloc_iova(struct device * dev,struct dmar_domain * domain,unsigned long nrpages,uint64_t dma_mask)3379 static struct iova *intel_alloc_iova(struct device *dev,
3380 				     struct dmar_domain *domain,
3381 				     unsigned long nrpages, uint64_t dma_mask)
3382 {
3383 	struct iova *iova = NULL;
3384 
3385 	/* Restrict dma_mask to the width that the iommu can handle */
3386 	dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
3387 	/* Ensure we reserve the whole size-aligned region */
3388 	nrpages = __roundup_pow_of_two(nrpages);
3389 
3390 	if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
3391 		/*
3392 		 * First try to allocate an io virtual address in
3393 		 * DMA_BIT_MASK(32) and if that fails then try allocating
3394 		 * from higher range
3395 		 */
3396 		iova = alloc_iova(&domain->iovad, nrpages,
3397 				  IOVA_PFN(DMA_BIT_MASK(32)), 1);
3398 		if (iova)
3399 			return iova;
3400 	}
3401 	iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
3402 	if (unlikely(!iova)) {
3403 		pr_err("Allocating %ld-page iova for %s failed",
3404 		       nrpages, dev_name(dev));
3405 		return NULL;
3406 	}
3407 
3408 	return iova;
3409 }
3410 
__get_valid_domain_for_dev(struct device * dev)3411 static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
3412 {
3413 	struct dmar_rmrr_unit *rmrr;
3414 	struct dmar_domain *domain;
3415 	struct device *i_dev;
3416 	int i, ret;
3417 
3418 	domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
3419 	if (!domain) {
3420 		pr_err("Allocating domain for %s failed\n",
3421 		       dev_name(dev));
3422 		return NULL;
3423 	}
3424 
3425 	/* We have a new domain - setup possible RMRRs for the device */
3426 	rcu_read_lock();
3427 	for_each_rmrr_units(rmrr) {
3428 		for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3429 					  i, i_dev) {
3430 			if (i_dev != dev)
3431 				continue;
3432 
3433 			ret = domain_prepare_identity_map(dev, domain,
3434 							  rmrr->base_address,
3435 							  rmrr->end_address);
3436 			if (ret)
3437 				dev_err(dev, "Mapping reserved region failed\n");
3438 		}
3439 	}
3440 	rcu_read_unlock();
3441 
3442 	return domain;
3443 }
3444 
get_valid_domain_for_dev(struct device * dev)3445 static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
3446 {
3447 	struct device_domain_info *info;
3448 
3449 	/* No lock here, assumes no domain exit in normal case */
3450 	info = dev->archdata.iommu;
3451 	if (likely(info))
3452 		return info->domain;
3453 
3454 	return __get_valid_domain_for_dev(dev);
3455 }
3456 
3457 /* Check if the dev needs to go through non-identity map and unmap process.*/
iommu_no_mapping(struct device * dev)3458 static int iommu_no_mapping(struct device *dev)
3459 {
3460 	int found;
3461 
3462 	if (iommu_dummy(dev))
3463 		return 1;
3464 
3465 	if (!iommu_identity_mapping)
3466 		return 0;
3467 
3468 	found = identity_mapping(dev);
3469 	if (found) {
3470 		if (iommu_should_identity_map(dev, 0))
3471 			return 1;
3472 		else {
3473 			/*
3474 			 * 32 bit DMA is removed from si_domain and fall back
3475 			 * to non-identity mapping.
3476 			 */
3477 			dmar_remove_one_dev_info(si_domain, dev);
3478 			pr_info("32bit %s uses non-identity mapping\n",
3479 				dev_name(dev));
3480 			return 0;
3481 		}
3482 	} else {
3483 		/*
3484 		 * In case of a detached 64 bit DMA device from vm, the device
3485 		 * is put into si_domain for identity mapping.
3486 		 */
3487 		if (iommu_should_identity_map(dev, 0)) {
3488 			int ret;
3489 			ret = domain_add_dev_info(si_domain, dev);
3490 			if (!ret) {
3491 				pr_info("64bit %s uses identity mapping\n",
3492 					dev_name(dev));
3493 				return 1;
3494 			}
3495 		}
3496 	}
3497 
3498 	return 0;
3499 }
3500 
__intel_map_single(struct device * dev,phys_addr_t paddr,size_t size,int dir,u64 dma_mask)3501 static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
3502 				     size_t size, int dir, u64 dma_mask)
3503 {
3504 	struct dmar_domain *domain;
3505 	phys_addr_t start_paddr;
3506 	struct iova *iova;
3507 	int prot = 0;
3508 	int ret;
3509 	struct intel_iommu *iommu;
3510 	unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
3511 
3512 	BUG_ON(dir == DMA_NONE);
3513 
3514 	if (iommu_no_mapping(dev))
3515 		return paddr;
3516 
3517 	domain = get_valid_domain_for_dev(dev);
3518 	if (!domain)
3519 		return 0;
3520 
3521 	iommu = domain_get_iommu(domain);
3522 	size = aligned_nrpages(paddr, size);
3523 
3524 	iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3525 	if (!iova)
3526 		goto error;
3527 
3528 	/*
3529 	 * Check if DMAR supports zero-length reads on write only
3530 	 * mappings..
3531 	 */
3532 	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3533 			!cap_zlr(iommu->cap))
3534 		prot |= DMA_PTE_READ;
3535 	if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3536 		prot |= DMA_PTE_WRITE;
3537 	/*
3538 	 * paddr - (paddr + size) might be partial page, we should map the whole
3539 	 * page.  Note: if two part of one page are separately mapped, we
3540 	 * might have two guest_addr mapping to the same host paddr, but this
3541 	 * is not a big problem
3542 	 */
3543 	ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
3544 				 mm_to_dma_pfn(paddr_pfn), size, prot);
3545 	if (ret)
3546 		goto error;
3547 
3548 	/* it's a non-present to present mapping. Only flush if caching mode */
3549 	if (cap_caching_mode(iommu->cap))
3550 		iommu_flush_iotlb_psi(iommu, domain,
3551 				      mm_to_dma_pfn(iova->pfn_lo),
3552 				      size, 0, 1);
3553 	else
3554 		iommu_flush_write_buffer(iommu);
3555 
3556 	start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
3557 	start_paddr += paddr & ~PAGE_MASK;
3558 	return start_paddr;
3559 
3560 error:
3561 	if (iova)
3562 		__free_iova(&domain->iovad, iova);
3563 	pr_err("Device %s request: %zx@%llx dir %d --- failed\n",
3564 		dev_name(dev), size, (unsigned long long)paddr, dir);
3565 	return 0;
3566 }
3567 
intel_map_page(struct device * dev,struct page * page,unsigned long offset,size_t size,enum dma_data_direction dir,struct dma_attrs * attrs)3568 static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3569 				 unsigned long offset, size_t size,
3570 				 enum dma_data_direction dir,
3571 				 struct dma_attrs *attrs)
3572 {
3573 	return __intel_map_single(dev, page_to_phys(page) + offset, size,
3574 				  dir, *dev->dma_mask);
3575 }
3576 
flush_unmaps(void)3577 static void flush_unmaps(void)
3578 {
3579 	int i, j;
3580 
3581 	timer_on = 0;
3582 
3583 	/* just flush them all */
3584 	for (i = 0; i < g_num_of_iommus; i++) {
3585 		struct intel_iommu *iommu = g_iommus[i];
3586 		if (!iommu)
3587 			continue;
3588 
3589 		if (!deferred_flush[i].next)
3590 			continue;
3591 
3592 		/* In caching mode, global flushes turn emulation expensive */
3593 		if (!cap_caching_mode(iommu->cap))
3594 			iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3595 					 DMA_TLB_GLOBAL_FLUSH);
3596 		for (j = 0; j < deferred_flush[i].next; j++) {
3597 			unsigned long mask;
3598 			struct iova *iova = deferred_flush[i].iova[j];
3599 			struct dmar_domain *domain = deferred_flush[i].domain[j];
3600 
3601 			/* On real hardware multiple invalidations are expensive */
3602 			if (cap_caching_mode(iommu->cap))
3603 				iommu_flush_iotlb_psi(iommu, domain,
3604 					iova->pfn_lo, iova_size(iova),
3605 					!deferred_flush[i].freelist[j], 0);
3606 			else {
3607 				mask = ilog2(mm_to_dma_pfn(iova_size(iova)));
3608 				iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
3609 						(uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
3610 			}
3611 			__free_iova(&deferred_flush[i].domain[j]->iovad, iova);
3612 			if (deferred_flush[i].freelist[j])
3613 				dma_free_pagelist(deferred_flush[i].freelist[j]);
3614 		}
3615 		deferred_flush[i].next = 0;
3616 	}
3617 
3618 	list_size = 0;
3619 }
3620 
flush_unmaps_timeout(unsigned long data)3621 static void flush_unmaps_timeout(unsigned long data)
3622 {
3623 	unsigned long flags;
3624 
3625 	spin_lock_irqsave(&async_umap_flush_lock, flags);
3626 	flush_unmaps();
3627 	spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3628 }
3629 
add_unmap(struct dmar_domain * dom,struct iova * iova,struct page * freelist)3630 static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *freelist)
3631 {
3632 	unsigned long flags;
3633 	int next, iommu_id;
3634 	struct intel_iommu *iommu;
3635 
3636 	spin_lock_irqsave(&async_umap_flush_lock, flags);
3637 	if (list_size == HIGH_WATER_MARK)
3638 		flush_unmaps();
3639 
3640 	iommu = domain_get_iommu(dom);
3641 	iommu_id = iommu->seq_id;
3642 
3643 	next = deferred_flush[iommu_id].next;
3644 	deferred_flush[iommu_id].domain[next] = dom;
3645 	deferred_flush[iommu_id].iova[next] = iova;
3646 	deferred_flush[iommu_id].freelist[next] = freelist;
3647 	deferred_flush[iommu_id].next++;
3648 
3649 	if (!timer_on) {
3650 		mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
3651 		timer_on = 1;
3652 	}
3653 	list_size++;
3654 	spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3655 }
3656 
intel_unmap(struct device * dev,dma_addr_t dev_addr)3657 static void intel_unmap(struct device *dev, dma_addr_t dev_addr)
3658 {
3659 	struct dmar_domain *domain;
3660 	unsigned long start_pfn, last_pfn;
3661 	struct iova *iova;
3662 	struct intel_iommu *iommu;
3663 	struct page *freelist;
3664 
3665 	if (iommu_no_mapping(dev))
3666 		return;
3667 
3668 	domain = find_domain(dev);
3669 	BUG_ON(!domain);
3670 
3671 	iommu = domain_get_iommu(domain);
3672 
3673 	iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
3674 	if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
3675 		      (unsigned long long)dev_addr))
3676 		return;
3677 
3678 	start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3679 	last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
3680 
3681 	pr_debug("Device %s unmapping: pfn %lx-%lx\n",
3682 		 dev_name(dev), start_pfn, last_pfn);
3683 
3684 	freelist = domain_unmap(domain, start_pfn, last_pfn);
3685 
3686 	if (intel_iommu_strict) {
3687 		iommu_flush_iotlb_psi(iommu, domain, start_pfn,
3688 				      last_pfn - start_pfn + 1, !freelist, 0);
3689 		/* free iova */
3690 		__free_iova(&domain->iovad, iova);
3691 		dma_free_pagelist(freelist);
3692 	} else {
3693 		add_unmap(domain, iova, freelist);
3694 		/*
3695 		 * queue up the release of the unmap to save the 1/6th of the
3696 		 * cpu used up by the iotlb flush operation...
3697 		 */
3698 	}
3699 }
3700 
intel_unmap_page(struct device * dev,dma_addr_t dev_addr,size_t size,enum dma_data_direction dir,struct dma_attrs * attrs)3701 static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3702 			     size_t size, enum dma_data_direction dir,
3703 			     struct dma_attrs *attrs)
3704 {
3705 	intel_unmap(dev, dev_addr);
3706 }
3707 
intel_alloc_coherent(struct device * dev,size_t size,dma_addr_t * dma_handle,gfp_t flags,struct dma_attrs * attrs)3708 static void *intel_alloc_coherent(struct device *dev, size_t size,
3709 				  dma_addr_t *dma_handle, gfp_t flags,
3710 				  struct dma_attrs *attrs)
3711 {
3712 	struct page *page = NULL;
3713 	int order;
3714 
3715 	size = PAGE_ALIGN(size);
3716 	order = get_order(size);
3717 
3718 	if (!iommu_no_mapping(dev))
3719 		flags &= ~(GFP_DMA | GFP_DMA32);
3720 	else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
3721 		if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
3722 			flags |= GFP_DMA;
3723 		else
3724 			flags |= GFP_DMA32;
3725 	}
3726 
3727 	if (gfpflags_allow_blocking(flags)) {
3728 		unsigned int count = size >> PAGE_SHIFT;
3729 
3730 		page = dma_alloc_from_contiguous(dev, count, order);
3731 		if (page && iommu_no_mapping(dev) &&
3732 		    page_to_phys(page) + size > dev->coherent_dma_mask) {
3733 			dma_release_from_contiguous(dev, page, count);
3734 			page = NULL;
3735 		}
3736 	}
3737 
3738 	if (!page)
3739 		page = alloc_pages(flags, order);
3740 	if (!page)
3741 		return NULL;
3742 	memset(page_address(page), 0, size);
3743 
3744 	*dma_handle = __intel_map_single(dev, page_to_phys(page), size,
3745 					 DMA_BIDIRECTIONAL,
3746 					 dev->coherent_dma_mask);
3747 	if (*dma_handle)
3748 		return page_address(page);
3749 	if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3750 		__free_pages(page, order);
3751 
3752 	return NULL;
3753 }
3754 
intel_free_coherent(struct device * dev,size_t size,void * vaddr,dma_addr_t dma_handle,struct dma_attrs * attrs)3755 static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
3756 				dma_addr_t dma_handle, struct dma_attrs *attrs)
3757 {
3758 	int order;
3759 	struct page *page = virt_to_page(vaddr);
3760 
3761 	size = PAGE_ALIGN(size);
3762 	order = get_order(size);
3763 
3764 	intel_unmap(dev, dma_handle);
3765 	if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3766 		__free_pages(page, order);
3767 }
3768 
intel_unmap_sg(struct device * dev,struct scatterlist * sglist,int nelems,enum dma_data_direction dir,struct dma_attrs * attrs)3769 static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
3770 			   int nelems, enum dma_data_direction dir,
3771 			   struct dma_attrs *attrs)
3772 {
3773 	intel_unmap(dev, sglist[0].dma_address);
3774 }
3775 
intel_nontranslate_map_sg(struct device * hddev,struct scatterlist * sglist,int nelems,int dir)3776 static int intel_nontranslate_map_sg(struct device *hddev,
3777 	struct scatterlist *sglist, int nelems, int dir)
3778 {
3779 	int i;
3780 	struct scatterlist *sg;
3781 
3782 	for_each_sg(sglist, sg, nelems, i) {
3783 		BUG_ON(!sg_page(sg));
3784 		sg->dma_address = sg_phys(sg);
3785 		sg->dma_length = sg->length;
3786 	}
3787 	return nelems;
3788 }
3789 
intel_map_sg(struct device * dev,struct scatterlist * sglist,int nelems,enum dma_data_direction dir,struct dma_attrs * attrs)3790 static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3791 			enum dma_data_direction dir, struct dma_attrs *attrs)
3792 {
3793 	int i;
3794 	struct dmar_domain *domain;
3795 	size_t size = 0;
3796 	int prot = 0;
3797 	struct iova *iova = NULL;
3798 	int ret;
3799 	struct scatterlist *sg;
3800 	unsigned long start_vpfn;
3801 	struct intel_iommu *iommu;
3802 
3803 	BUG_ON(dir == DMA_NONE);
3804 	if (iommu_no_mapping(dev))
3805 		return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
3806 
3807 	domain = get_valid_domain_for_dev(dev);
3808 	if (!domain)
3809 		return 0;
3810 
3811 	iommu = domain_get_iommu(domain);
3812 
3813 	for_each_sg(sglist, sg, nelems, i)
3814 		size += aligned_nrpages(sg->offset, sg->length);
3815 
3816 	iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
3817 				*dev->dma_mask);
3818 	if (!iova) {
3819 		sglist->dma_length = 0;
3820 		return 0;
3821 	}
3822 
3823 	/*
3824 	 * Check if DMAR supports zero-length reads on write only
3825 	 * mappings..
3826 	 */
3827 	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3828 			!cap_zlr(iommu->cap))
3829 		prot |= DMA_PTE_READ;
3830 	if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3831 		prot |= DMA_PTE_WRITE;
3832 
3833 	start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
3834 
3835 	ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3836 	if (unlikely(ret)) {
3837 		dma_pte_free_pagetable(domain, start_vpfn,
3838 				       start_vpfn + size - 1);
3839 		__free_iova(&domain->iovad, iova);
3840 		return 0;
3841 	}
3842 
3843 	/* it's a non-present to present mapping. Only flush if caching mode */
3844 	if (cap_caching_mode(iommu->cap))
3845 		iommu_flush_iotlb_psi(iommu, domain, start_vpfn, size, 0, 1);
3846 	else
3847 		iommu_flush_write_buffer(iommu);
3848 
3849 	return nelems;
3850 }
3851 
intel_mapping_error(struct device * dev,dma_addr_t dma_addr)3852 static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3853 {
3854 	return !dma_addr;
3855 }
3856 
3857 struct dma_map_ops intel_dma_ops = {
3858 	.alloc = intel_alloc_coherent,
3859 	.free = intel_free_coherent,
3860 	.map_sg = intel_map_sg,
3861 	.unmap_sg = intel_unmap_sg,
3862 	.map_page = intel_map_page,
3863 	.unmap_page = intel_unmap_page,
3864 	.mapping_error = intel_mapping_error,
3865 };
3866 
iommu_domain_cache_init(void)3867 static inline int iommu_domain_cache_init(void)
3868 {
3869 	int ret = 0;
3870 
3871 	iommu_domain_cache = kmem_cache_create("iommu_domain",
3872 					 sizeof(struct dmar_domain),
3873 					 0,
3874 					 SLAB_HWCACHE_ALIGN,
3875 
3876 					 NULL);
3877 	if (!iommu_domain_cache) {
3878 		pr_err("Couldn't create iommu_domain cache\n");
3879 		ret = -ENOMEM;
3880 	}
3881 
3882 	return ret;
3883 }
3884 
iommu_devinfo_cache_init(void)3885 static inline int iommu_devinfo_cache_init(void)
3886 {
3887 	int ret = 0;
3888 
3889 	iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3890 					 sizeof(struct device_domain_info),
3891 					 0,
3892 					 SLAB_HWCACHE_ALIGN,
3893 					 NULL);
3894 	if (!iommu_devinfo_cache) {
3895 		pr_err("Couldn't create devinfo cache\n");
3896 		ret = -ENOMEM;
3897 	}
3898 
3899 	return ret;
3900 }
3901 
iommu_init_mempool(void)3902 static int __init iommu_init_mempool(void)
3903 {
3904 	int ret;
3905 	ret = iova_cache_get();
3906 	if (ret)
3907 		return ret;
3908 
3909 	ret = iommu_domain_cache_init();
3910 	if (ret)
3911 		goto domain_error;
3912 
3913 	ret = iommu_devinfo_cache_init();
3914 	if (!ret)
3915 		return ret;
3916 
3917 	kmem_cache_destroy(iommu_domain_cache);
3918 domain_error:
3919 	iova_cache_put();
3920 
3921 	return -ENOMEM;
3922 }
3923 
iommu_exit_mempool(void)3924 static void __init iommu_exit_mempool(void)
3925 {
3926 	kmem_cache_destroy(iommu_devinfo_cache);
3927 	kmem_cache_destroy(iommu_domain_cache);
3928 	iova_cache_put();
3929 }
3930 
quirk_ioat_snb_local_iommu(struct pci_dev * pdev)3931 static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3932 {
3933 	struct dmar_drhd_unit *drhd;
3934 	u32 vtbar;
3935 	int rc;
3936 
3937 	/* We know that this device on this chipset has its own IOMMU.
3938 	 * If we find it under a different IOMMU, then the BIOS is lying
3939 	 * to us. Hope that the IOMMU for this device is actually
3940 	 * disabled, and it needs no translation...
3941 	 */
3942 	rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3943 	if (rc) {
3944 		/* "can't" happen */
3945 		dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3946 		return;
3947 	}
3948 	vtbar &= 0xffff0000;
3949 
3950 	/* we know that the this iommu should be at offset 0xa000 from vtbar */
3951 	drhd = dmar_find_matched_drhd_unit(pdev);
3952 	if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) {
3953 		pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n");
3954 		add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
3955 		pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3956 	}
3957 }
3958 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3959 
init_no_remapping_devices(void)3960 static void __init init_no_remapping_devices(void)
3961 {
3962 	struct dmar_drhd_unit *drhd;
3963 	struct device *dev;
3964 	int i;
3965 
3966 	for_each_drhd_unit(drhd) {
3967 		if (!drhd->include_all) {
3968 			for_each_active_dev_scope(drhd->devices,
3969 						  drhd->devices_cnt, i, dev)
3970 				break;
3971 			/* ignore DMAR unit if no devices exist */
3972 			if (i == drhd->devices_cnt)
3973 				drhd->ignored = 1;
3974 		}
3975 	}
3976 
3977 	for_each_active_drhd_unit(drhd) {
3978 		if (drhd->include_all)
3979 			continue;
3980 
3981 		for_each_active_dev_scope(drhd->devices,
3982 					  drhd->devices_cnt, i, dev)
3983 			if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
3984 				break;
3985 		if (i < drhd->devices_cnt)
3986 			continue;
3987 
3988 		/* This IOMMU has *only* gfx devices. Either bypass it or
3989 		   set the gfx_mapped flag, as appropriate */
3990 		if (!dmar_map_gfx) {
3991 			drhd->ignored = 1;
3992 			for_each_active_dev_scope(drhd->devices,
3993 						  drhd->devices_cnt, i, dev)
3994 				dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3995 		}
3996 	}
3997 }
3998 
3999 #ifdef CONFIG_SUSPEND
init_iommu_hw(void)4000 static int init_iommu_hw(void)
4001 {
4002 	struct dmar_drhd_unit *drhd;
4003 	struct intel_iommu *iommu = NULL;
4004 
4005 	for_each_active_iommu(iommu, drhd)
4006 		if (iommu->qi)
4007 			dmar_reenable_qi(iommu);
4008 
4009 	for_each_iommu(iommu, drhd) {
4010 		if (drhd->ignored) {
4011 			/*
4012 			 * we always have to disable PMRs or DMA may fail on
4013 			 * this device
4014 			 */
4015 			if (force_on)
4016 				iommu_disable_protect_mem_regions(iommu);
4017 			continue;
4018 		}
4019 
4020 		iommu_flush_write_buffer(iommu);
4021 
4022 		iommu_set_root_entry(iommu);
4023 
4024 		iommu->flush.flush_context(iommu, 0, 0, 0,
4025 					   DMA_CCMD_GLOBAL_INVL);
4026 		iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4027 		iommu_enable_translation(iommu);
4028 		iommu_disable_protect_mem_regions(iommu);
4029 	}
4030 
4031 	return 0;
4032 }
4033 
iommu_flush_all(void)4034 static void iommu_flush_all(void)
4035 {
4036 	struct dmar_drhd_unit *drhd;
4037 	struct intel_iommu *iommu;
4038 
4039 	for_each_active_iommu(iommu, drhd) {
4040 		iommu->flush.flush_context(iommu, 0, 0, 0,
4041 					   DMA_CCMD_GLOBAL_INVL);
4042 		iommu->flush.flush_iotlb(iommu, 0, 0, 0,
4043 					 DMA_TLB_GLOBAL_FLUSH);
4044 	}
4045 }
4046 
iommu_suspend(void)4047 static int iommu_suspend(void)
4048 {
4049 	struct dmar_drhd_unit *drhd;
4050 	struct intel_iommu *iommu = NULL;
4051 	unsigned long flag;
4052 
4053 	for_each_active_iommu(iommu, drhd) {
4054 		iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
4055 						 GFP_ATOMIC);
4056 		if (!iommu->iommu_state)
4057 			goto nomem;
4058 	}
4059 
4060 	iommu_flush_all();
4061 
4062 	for_each_active_iommu(iommu, drhd) {
4063 		iommu_disable_translation(iommu);
4064 
4065 		raw_spin_lock_irqsave(&iommu->register_lock, flag);
4066 
4067 		iommu->iommu_state[SR_DMAR_FECTL_REG] =
4068 			readl(iommu->reg + DMAR_FECTL_REG);
4069 		iommu->iommu_state[SR_DMAR_FEDATA_REG] =
4070 			readl(iommu->reg + DMAR_FEDATA_REG);
4071 		iommu->iommu_state[SR_DMAR_FEADDR_REG] =
4072 			readl(iommu->reg + DMAR_FEADDR_REG);
4073 		iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
4074 			readl(iommu->reg + DMAR_FEUADDR_REG);
4075 
4076 		raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
4077 	}
4078 	return 0;
4079 
4080 nomem:
4081 	for_each_active_iommu(iommu, drhd)
4082 		kfree(iommu->iommu_state);
4083 
4084 	return -ENOMEM;
4085 }
4086 
iommu_resume(void)4087 static void iommu_resume(void)
4088 {
4089 	struct dmar_drhd_unit *drhd;
4090 	struct intel_iommu *iommu = NULL;
4091 	unsigned long flag;
4092 
4093 	if (init_iommu_hw()) {
4094 		if (force_on)
4095 			panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
4096 		else
4097 			WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
4098 		return;
4099 	}
4100 
4101 	for_each_active_iommu(iommu, drhd) {
4102 
4103 		raw_spin_lock_irqsave(&iommu->register_lock, flag);
4104 
4105 		writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
4106 			iommu->reg + DMAR_FECTL_REG);
4107 		writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
4108 			iommu->reg + DMAR_FEDATA_REG);
4109 		writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
4110 			iommu->reg + DMAR_FEADDR_REG);
4111 		writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
4112 			iommu->reg + DMAR_FEUADDR_REG);
4113 
4114 		raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
4115 	}
4116 
4117 	for_each_active_iommu(iommu, drhd)
4118 		kfree(iommu->iommu_state);
4119 }
4120 
4121 static struct syscore_ops iommu_syscore_ops = {
4122 	.resume		= iommu_resume,
4123 	.suspend	= iommu_suspend,
4124 };
4125 
init_iommu_pm_ops(void)4126 static void __init init_iommu_pm_ops(void)
4127 {
4128 	register_syscore_ops(&iommu_syscore_ops);
4129 }
4130 
4131 #else
init_iommu_pm_ops(void)4132 static inline void init_iommu_pm_ops(void) {}
4133 #endif	/* CONFIG_PM */
4134 
4135 
dmar_parse_one_rmrr(struct acpi_dmar_header * header,void * arg)4136 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
4137 {
4138 	struct acpi_dmar_reserved_memory *rmrr;
4139 	struct dmar_rmrr_unit *rmrru;
4140 
4141 	rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
4142 	if (!rmrru)
4143 		return -ENOMEM;
4144 
4145 	rmrru->hdr = header;
4146 	rmrr = (struct acpi_dmar_reserved_memory *)header;
4147 	rmrru->base_address = rmrr->base_address;
4148 	rmrru->end_address = rmrr->end_address;
4149 	rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
4150 				((void *)rmrr) + rmrr->header.length,
4151 				&rmrru->devices_cnt);
4152 	if (rmrru->devices_cnt && rmrru->devices == NULL) {
4153 		kfree(rmrru);
4154 		return -ENOMEM;
4155 	}
4156 
4157 	list_add(&rmrru->list, &dmar_rmrr_units);
4158 
4159 	return 0;
4160 }
4161 
dmar_find_atsr(struct acpi_dmar_atsr * atsr)4162 static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
4163 {
4164 	struct dmar_atsr_unit *atsru;
4165 	struct acpi_dmar_atsr *tmp;
4166 
4167 	list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4168 		tmp = (struct acpi_dmar_atsr *)atsru->hdr;
4169 		if (atsr->segment != tmp->segment)
4170 			continue;
4171 		if (atsr->header.length != tmp->header.length)
4172 			continue;
4173 		if (memcmp(atsr, tmp, atsr->header.length) == 0)
4174 			return atsru;
4175 	}
4176 
4177 	return NULL;
4178 }
4179 
dmar_parse_one_atsr(struct acpi_dmar_header * hdr,void * arg)4180 int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4181 {
4182 	struct acpi_dmar_atsr *atsr;
4183 	struct dmar_atsr_unit *atsru;
4184 
4185 	if (system_state != SYSTEM_BOOTING && !intel_iommu_enabled)
4186 		return 0;
4187 
4188 	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4189 	atsru = dmar_find_atsr(atsr);
4190 	if (atsru)
4191 		return 0;
4192 
4193 	atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
4194 	if (!atsru)
4195 		return -ENOMEM;
4196 
4197 	/*
4198 	 * If memory is allocated from slab by ACPI _DSM method, we need to
4199 	 * copy the memory content because the memory buffer will be freed
4200 	 * on return.
4201 	 */
4202 	atsru->hdr = (void *)(atsru + 1);
4203 	memcpy(atsru->hdr, hdr, hdr->length);
4204 	atsru->include_all = atsr->flags & 0x1;
4205 	if (!atsru->include_all) {
4206 		atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
4207 				(void *)atsr + atsr->header.length,
4208 				&atsru->devices_cnt);
4209 		if (atsru->devices_cnt && atsru->devices == NULL) {
4210 			kfree(atsru);
4211 			return -ENOMEM;
4212 		}
4213 	}
4214 
4215 	list_add_rcu(&atsru->list, &dmar_atsr_units);
4216 
4217 	return 0;
4218 }
4219 
intel_iommu_free_atsr(struct dmar_atsr_unit * atsru)4220 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
4221 {
4222 	dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
4223 	kfree(atsru);
4224 }
4225 
dmar_release_one_atsr(struct acpi_dmar_header * hdr,void * arg)4226 int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4227 {
4228 	struct acpi_dmar_atsr *atsr;
4229 	struct dmar_atsr_unit *atsru;
4230 
4231 	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4232 	atsru = dmar_find_atsr(atsr);
4233 	if (atsru) {
4234 		list_del_rcu(&atsru->list);
4235 		synchronize_rcu();
4236 		intel_iommu_free_atsr(atsru);
4237 	}
4238 
4239 	return 0;
4240 }
4241 
dmar_check_one_atsr(struct acpi_dmar_header * hdr,void * arg)4242 int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4243 {
4244 	int i;
4245 	struct device *dev;
4246 	struct acpi_dmar_atsr *atsr;
4247 	struct dmar_atsr_unit *atsru;
4248 
4249 	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4250 	atsru = dmar_find_atsr(atsr);
4251 	if (!atsru)
4252 		return 0;
4253 
4254 	if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
4255 		for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
4256 					  i, dev)
4257 			return -EBUSY;
4258 	}
4259 
4260 	return 0;
4261 }
4262 
intel_iommu_add(struct dmar_drhd_unit * dmaru)4263 static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
4264 {
4265 	int sp, ret = 0;
4266 	struct intel_iommu *iommu = dmaru->iommu;
4267 
4268 	if (g_iommus[iommu->seq_id])
4269 		return 0;
4270 
4271 	if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
4272 		pr_warn("%s: Doesn't support hardware pass through.\n",
4273 			iommu->name);
4274 		return -ENXIO;
4275 	}
4276 	if (!ecap_sc_support(iommu->ecap) &&
4277 	    domain_update_iommu_snooping(iommu)) {
4278 		pr_warn("%s: Doesn't support snooping.\n",
4279 			iommu->name);
4280 		return -ENXIO;
4281 	}
4282 	sp = domain_update_iommu_superpage(iommu) - 1;
4283 	if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
4284 		pr_warn("%s: Doesn't support large page.\n",
4285 			iommu->name);
4286 		return -ENXIO;
4287 	}
4288 
4289 	/*
4290 	 * Disable translation if already enabled prior to OS handover.
4291 	 */
4292 	if (iommu->gcmd & DMA_GCMD_TE)
4293 		iommu_disable_translation(iommu);
4294 
4295 	g_iommus[iommu->seq_id] = iommu;
4296 	ret = iommu_init_domains(iommu);
4297 	if (ret == 0)
4298 		ret = iommu_alloc_root_entry(iommu);
4299 	if (ret)
4300 		goto out;
4301 
4302 #ifdef CONFIG_INTEL_IOMMU_SVM
4303 	if (pasid_enabled(iommu))
4304 		intel_svm_alloc_pasid_tables(iommu);
4305 #endif
4306 
4307 	if (dmaru->ignored) {
4308 		/*
4309 		 * we always have to disable PMRs or DMA may fail on this device
4310 		 */
4311 		if (force_on)
4312 			iommu_disable_protect_mem_regions(iommu);
4313 		return 0;
4314 	}
4315 
4316 	intel_iommu_init_qi(iommu);
4317 	iommu_flush_write_buffer(iommu);
4318 
4319 #ifdef CONFIG_INTEL_IOMMU_SVM
4320 	if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
4321 		ret = intel_svm_enable_prq(iommu);
4322 		if (ret)
4323 			goto disable_iommu;
4324 	}
4325 #endif
4326 	ret = dmar_set_interrupt(iommu);
4327 	if (ret)
4328 		goto disable_iommu;
4329 
4330 	iommu_set_root_entry(iommu);
4331 	iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
4332 	iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4333 	iommu_enable_translation(iommu);
4334 
4335 	iommu_disable_protect_mem_regions(iommu);
4336 	return 0;
4337 
4338 disable_iommu:
4339 	disable_dmar_iommu(iommu);
4340 out:
4341 	free_dmar_iommu(iommu);
4342 	return ret;
4343 }
4344 
dmar_iommu_hotplug(struct dmar_drhd_unit * dmaru,bool insert)4345 int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
4346 {
4347 	int ret = 0;
4348 	struct intel_iommu *iommu = dmaru->iommu;
4349 
4350 	if (!intel_iommu_enabled)
4351 		return 0;
4352 	if (iommu == NULL)
4353 		return -EINVAL;
4354 
4355 	if (insert) {
4356 		ret = intel_iommu_add(dmaru);
4357 	} else {
4358 		disable_dmar_iommu(iommu);
4359 		free_dmar_iommu(iommu);
4360 	}
4361 
4362 	return ret;
4363 }
4364 
intel_iommu_free_dmars(void)4365 static void intel_iommu_free_dmars(void)
4366 {
4367 	struct dmar_rmrr_unit *rmrru, *rmrr_n;
4368 	struct dmar_atsr_unit *atsru, *atsr_n;
4369 
4370 	list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
4371 		list_del(&rmrru->list);
4372 		dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
4373 		kfree(rmrru);
4374 	}
4375 
4376 	list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
4377 		list_del(&atsru->list);
4378 		intel_iommu_free_atsr(atsru);
4379 	}
4380 }
4381 
dmar_find_matched_atsr_unit(struct pci_dev * dev)4382 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
4383 {
4384 	int i, ret = 1;
4385 	struct pci_bus *bus;
4386 	struct pci_dev *bridge = NULL;
4387 	struct device *tmp;
4388 	struct acpi_dmar_atsr *atsr;
4389 	struct dmar_atsr_unit *atsru;
4390 
4391 	dev = pci_physfn(dev);
4392 	for (bus = dev->bus; bus; bus = bus->parent) {
4393 		bridge = bus->self;
4394 		/* If it's an integrated device, allow ATS */
4395 		if (!bridge)
4396 			return 1;
4397 		/* Connected via non-PCIe: no ATS */
4398 		if (!pci_is_pcie(bridge) ||
4399 		    pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
4400 			return 0;
4401 		/* If we found the root port, look it up in the ATSR */
4402 		if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
4403 			break;
4404 	}
4405 
4406 	rcu_read_lock();
4407 	list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4408 		atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4409 		if (atsr->segment != pci_domain_nr(dev->bus))
4410 			continue;
4411 
4412 		for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
4413 			if (tmp == &bridge->dev)
4414 				goto out;
4415 
4416 		if (atsru->include_all)
4417 			goto out;
4418 	}
4419 	ret = 0;
4420 out:
4421 	rcu_read_unlock();
4422 
4423 	return ret;
4424 }
4425 
dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info * info)4426 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
4427 {
4428 	int ret = 0;
4429 	struct dmar_rmrr_unit *rmrru;
4430 	struct dmar_atsr_unit *atsru;
4431 	struct acpi_dmar_atsr *atsr;
4432 	struct acpi_dmar_reserved_memory *rmrr;
4433 
4434 	if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING)
4435 		return 0;
4436 
4437 	list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4438 		rmrr = container_of(rmrru->hdr,
4439 				    struct acpi_dmar_reserved_memory, header);
4440 		if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4441 			ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4442 				((void *)rmrr) + rmrr->header.length,
4443 				rmrr->segment, rmrru->devices,
4444 				rmrru->devices_cnt);
4445 			if(ret < 0)
4446 				return ret;
4447 		} else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4448 			dmar_remove_dev_scope(info, rmrr->segment,
4449 				rmrru->devices, rmrru->devices_cnt);
4450 		}
4451 	}
4452 
4453 	list_for_each_entry(atsru, &dmar_atsr_units, list) {
4454 		if (atsru->include_all)
4455 			continue;
4456 
4457 		atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4458 		if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4459 			ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4460 					(void *)atsr + atsr->header.length,
4461 					atsr->segment, atsru->devices,
4462 					atsru->devices_cnt);
4463 			if (ret > 0)
4464 				break;
4465 			else if(ret < 0)
4466 				return ret;
4467 		} else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4468 			if (dmar_remove_dev_scope(info, atsr->segment,
4469 					atsru->devices, atsru->devices_cnt))
4470 				break;
4471 		}
4472 	}
4473 
4474 	return 0;
4475 }
4476 
4477 /*
4478  * Here we only respond to action of unbound device from driver.
4479  *
4480  * Added device is not attached to its DMAR domain here yet. That will happen
4481  * when mapping the device to iova.
4482  */
device_notifier(struct notifier_block * nb,unsigned long action,void * data)4483 static int device_notifier(struct notifier_block *nb,
4484 				  unsigned long action, void *data)
4485 {
4486 	struct device *dev = data;
4487 	struct dmar_domain *domain;
4488 
4489 	if (iommu_dummy(dev))
4490 		return 0;
4491 
4492 	if (action != BUS_NOTIFY_REMOVED_DEVICE)
4493 		return 0;
4494 
4495 	domain = find_domain(dev);
4496 	if (!domain)
4497 		return 0;
4498 
4499 	dmar_remove_one_dev_info(domain, dev);
4500 	if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices))
4501 		domain_exit(domain);
4502 
4503 	return 0;
4504 }
4505 
4506 static struct notifier_block device_nb = {
4507 	.notifier_call = device_notifier,
4508 };
4509 
intel_iommu_memory_notifier(struct notifier_block * nb,unsigned long val,void * v)4510 static int intel_iommu_memory_notifier(struct notifier_block *nb,
4511 				       unsigned long val, void *v)
4512 {
4513 	struct memory_notify *mhp = v;
4514 	unsigned long long start, end;
4515 	unsigned long start_vpfn, last_vpfn;
4516 
4517 	switch (val) {
4518 	case MEM_GOING_ONLINE:
4519 		start = mhp->start_pfn << PAGE_SHIFT;
4520 		end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4521 		if (iommu_domain_identity_map(si_domain, start, end)) {
4522 			pr_warn("Failed to build identity map for [%llx-%llx]\n",
4523 				start, end);
4524 			return NOTIFY_BAD;
4525 		}
4526 		break;
4527 
4528 	case MEM_OFFLINE:
4529 	case MEM_CANCEL_ONLINE:
4530 		start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4531 		last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4532 		while (start_vpfn <= last_vpfn) {
4533 			struct iova *iova;
4534 			struct dmar_drhd_unit *drhd;
4535 			struct intel_iommu *iommu;
4536 			struct page *freelist;
4537 
4538 			iova = find_iova(&si_domain->iovad, start_vpfn);
4539 			if (iova == NULL) {
4540 				pr_debug("Failed get IOVA for PFN %lx\n",
4541 					 start_vpfn);
4542 				break;
4543 			}
4544 
4545 			iova = split_and_remove_iova(&si_domain->iovad, iova,
4546 						     start_vpfn, last_vpfn);
4547 			if (iova == NULL) {
4548 				pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
4549 					start_vpfn, last_vpfn);
4550 				return NOTIFY_BAD;
4551 			}
4552 
4553 			freelist = domain_unmap(si_domain, iova->pfn_lo,
4554 					       iova->pfn_hi);
4555 
4556 			rcu_read_lock();
4557 			for_each_active_iommu(iommu, drhd)
4558 				iommu_flush_iotlb_psi(iommu, si_domain,
4559 					iova->pfn_lo, iova_size(iova),
4560 					!freelist, 0);
4561 			rcu_read_unlock();
4562 			dma_free_pagelist(freelist);
4563 
4564 			start_vpfn = iova->pfn_hi + 1;
4565 			free_iova_mem(iova);
4566 		}
4567 		break;
4568 	}
4569 
4570 	return NOTIFY_OK;
4571 }
4572 
4573 static struct notifier_block intel_iommu_memory_nb = {
4574 	.notifier_call = intel_iommu_memory_notifier,
4575 	.priority = 0
4576 };
4577 
4578 
intel_iommu_show_version(struct device * dev,struct device_attribute * attr,char * buf)4579 static ssize_t intel_iommu_show_version(struct device *dev,
4580 					struct device_attribute *attr,
4581 					char *buf)
4582 {
4583 	struct intel_iommu *iommu = dev_get_drvdata(dev);
4584 	u32 ver = readl(iommu->reg + DMAR_VER_REG);
4585 	return sprintf(buf, "%d:%d\n",
4586 		       DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4587 }
4588 static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4589 
intel_iommu_show_address(struct device * dev,struct device_attribute * attr,char * buf)4590 static ssize_t intel_iommu_show_address(struct device *dev,
4591 					struct device_attribute *attr,
4592 					char *buf)
4593 {
4594 	struct intel_iommu *iommu = dev_get_drvdata(dev);
4595 	return sprintf(buf, "%llx\n", iommu->reg_phys);
4596 }
4597 static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4598 
intel_iommu_show_cap(struct device * dev,struct device_attribute * attr,char * buf)4599 static ssize_t intel_iommu_show_cap(struct device *dev,
4600 				    struct device_attribute *attr,
4601 				    char *buf)
4602 {
4603 	struct intel_iommu *iommu = dev_get_drvdata(dev);
4604 	return sprintf(buf, "%llx\n", iommu->cap);
4605 }
4606 static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4607 
intel_iommu_show_ecap(struct device * dev,struct device_attribute * attr,char * buf)4608 static ssize_t intel_iommu_show_ecap(struct device *dev,
4609 				    struct device_attribute *attr,
4610 				    char *buf)
4611 {
4612 	struct intel_iommu *iommu = dev_get_drvdata(dev);
4613 	return sprintf(buf, "%llx\n", iommu->ecap);
4614 }
4615 static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4616 
intel_iommu_show_ndoms(struct device * dev,struct device_attribute * attr,char * buf)4617 static ssize_t intel_iommu_show_ndoms(struct device *dev,
4618 				      struct device_attribute *attr,
4619 				      char *buf)
4620 {
4621 	struct intel_iommu *iommu = dev_get_drvdata(dev);
4622 	return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4623 }
4624 static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
4625 
intel_iommu_show_ndoms_used(struct device * dev,struct device_attribute * attr,char * buf)4626 static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
4627 					   struct device_attribute *attr,
4628 					   char *buf)
4629 {
4630 	struct intel_iommu *iommu = dev_get_drvdata(dev);
4631 	return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4632 						  cap_ndoms(iommu->cap)));
4633 }
4634 static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
4635 
4636 static struct attribute *intel_iommu_attrs[] = {
4637 	&dev_attr_version.attr,
4638 	&dev_attr_address.attr,
4639 	&dev_attr_cap.attr,
4640 	&dev_attr_ecap.attr,
4641 	&dev_attr_domains_supported.attr,
4642 	&dev_attr_domains_used.attr,
4643 	NULL,
4644 };
4645 
4646 static struct attribute_group intel_iommu_group = {
4647 	.name = "intel-iommu",
4648 	.attrs = intel_iommu_attrs,
4649 };
4650 
4651 const struct attribute_group *intel_iommu_groups[] = {
4652 	&intel_iommu_group,
4653 	NULL,
4654 };
4655 
intel_iommu_init(void)4656 int __init intel_iommu_init(void)
4657 {
4658 	int ret = -ENODEV;
4659 	struct dmar_drhd_unit *drhd;
4660 	struct intel_iommu *iommu;
4661 
4662 	/* VT-d is required for a TXT/tboot launch, so enforce that */
4663 	force_on = tboot_force_iommu();
4664 
4665 	if (iommu_init_mempool()) {
4666 		if (force_on)
4667 			panic("tboot: Failed to initialize iommu memory\n");
4668 		return -ENOMEM;
4669 	}
4670 
4671 	down_write(&dmar_global_lock);
4672 	if (dmar_table_init()) {
4673 		if (force_on)
4674 			panic("tboot: Failed to initialize DMAR table\n");
4675 		goto out_free_dmar;
4676 	}
4677 
4678 	if (dmar_dev_scope_init() < 0) {
4679 		if (force_on)
4680 			panic("tboot: Failed to initialize DMAR device scope\n");
4681 		goto out_free_dmar;
4682 	}
4683 
4684 	if (no_iommu || dmar_disabled)
4685 		goto out_free_dmar;
4686 
4687 	if (list_empty(&dmar_rmrr_units))
4688 		pr_info("No RMRR found\n");
4689 
4690 	if (list_empty(&dmar_atsr_units))
4691 		pr_info("No ATSR found\n");
4692 
4693 	if (dmar_init_reserved_ranges()) {
4694 		if (force_on)
4695 			panic("tboot: Failed to reserve iommu ranges\n");
4696 		goto out_free_reserved_range;
4697 	}
4698 
4699 	if (dmar_map_gfx)
4700 		intel_iommu_gfx_mapped = 1;
4701 
4702 	init_no_remapping_devices();
4703 
4704 	ret = init_dmars();
4705 	if (ret) {
4706 		if (force_on)
4707 			panic("tboot: Failed to initialize DMARs\n");
4708 		pr_err("Initialization failed\n");
4709 		goto out_free_reserved_range;
4710 	}
4711 	up_write(&dmar_global_lock);
4712 	pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
4713 
4714 	init_timer(&unmap_timer);
4715 #ifdef CONFIG_SWIOTLB
4716 	swiotlb = 0;
4717 #endif
4718 	dma_ops = &intel_dma_ops;
4719 
4720 	init_iommu_pm_ops();
4721 
4722 	for_each_active_iommu(iommu, drhd)
4723 		iommu->iommu_dev = iommu_device_create(NULL, iommu,
4724 						       intel_iommu_groups,
4725 						       "%s", iommu->name);
4726 
4727 	bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
4728 	bus_register_notifier(&pci_bus_type, &device_nb);
4729 	if (si_domain && !hw_pass_through)
4730 		register_memory_notifier(&intel_iommu_memory_nb);
4731 
4732 	intel_iommu_enabled = 1;
4733 
4734 	return 0;
4735 
4736 out_free_reserved_range:
4737 	put_iova_domain(&reserved_iova_list);
4738 out_free_dmar:
4739 	intel_iommu_free_dmars();
4740 	up_write(&dmar_global_lock);
4741 	iommu_exit_mempool();
4742 	return ret;
4743 }
4744 
domain_context_clear_one_cb(struct pci_dev * pdev,u16 alias,void * opaque)4745 static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
4746 {
4747 	struct intel_iommu *iommu = opaque;
4748 
4749 	domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff);
4750 	return 0;
4751 }
4752 
4753 /*
4754  * NB - intel-iommu lacks any sort of reference counting for the users of
4755  * dependent devices.  If multiple endpoints have intersecting dependent
4756  * devices, unbinding the driver from any one of them will possibly leave
4757  * the others unable to operate.
4758  */
domain_context_clear(struct intel_iommu * iommu,struct device * dev)4759 static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
4760 {
4761 	if (!iommu || !dev || !dev_is_pci(dev))
4762 		return;
4763 
4764 	pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
4765 }
4766 
__dmar_remove_one_dev_info(struct device_domain_info * info)4767 static void __dmar_remove_one_dev_info(struct device_domain_info *info)
4768 {
4769 	struct intel_iommu *iommu;
4770 	unsigned long flags;
4771 
4772 	assert_spin_locked(&device_domain_lock);
4773 
4774 	if (WARN_ON(!info))
4775 		return;
4776 
4777 	iommu = info->iommu;
4778 
4779 	if (info->dev) {
4780 		iommu_disable_dev_iotlb(info);
4781 		domain_context_clear(iommu, info->dev);
4782 	}
4783 
4784 	unlink_domain_info(info);
4785 
4786 	spin_lock_irqsave(&iommu->lock, flags);
4787 	domain_detach_iommu(info->domain, iommu);
4788 	spin_unlock_irqrestore(&iommu->lock, flags);
4789 
4790 	free_devinfo_mem(info);
4791 }
4792 
dmar_remove_one_dev_info(struct dmar_domain * domain,struct device * dev)4793 static void dmar_remove_one_dev_info(struct dmar_domain *domain,
4794 				     struct device *dev)
4795 {
4796 	struct device_domain_info *info;
4797 	unsigned long flags;
4798 
4799 	spin_lock_irqsave(&device_domain_lock, flags);
4800 	info = dev->archdata.iommu;
4801 	__dmar_remove_one_dev_info(info);
4802 	spin_unlock_irqrestore(&device_domain_lock, flags);
4803 }
4804 
md_domain_init(struct dmar_domain * domain,int guest_width)4805 static int md_domain_init(struct dmar_domain *domain, int guest_width)
4806 {
4807 	int adjust_width;
4808 
4809 	init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
4810 			DMA_32BIT_PFN);
4811 	domain_reserve_special_ranges(domain);
4812 
4813 	/* calculate AGAW */
4814 	domain->gaw = guest_width;
4815 	adjust_width = guestwidth_to_adjustwidth(guest_width);
4816 	domain->agaw = width_to_agaw(adjust_width);
4817 
4818 	domain->iommu_coherency = 0;
4819 	domain->iommu_snooping = 0;
4820 	domain->iommu_superpage = 0;
4821 	domain->max_addr = 0;
4822 
4823 	/* always allocate the top pgd */
4824 	domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
4825 	if (!domain->pgd)
4826 		return -ENOMEM;
4827 	domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4828 	return 0;
4829 }
4830 
intel_iommu_domain_alloc(unsigned type)4831 static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
4832 {
4833 	struct dmar_domain *dmar_domain;
4834 	struct iommu_domain *domain;
4835 
4836 	if (type != IOMMU_DOMAIN_UNMANAGED)
4837 		return NULL;
4838 
4839 	dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE);
4840 	if (!dmar_domain) {
4841 		pr_err("Can't allocate dmar_domain\n");
4842 		return NULL;
4843 	}
4844 	if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
4845 		pr_err("Domain initialization failed\n");
4846 		domain_exit(dmar_domain);
4847 		return NULL;
4848 	}
4849 	domain_update_iommu_cap(dmar_domain);
4850 
4851 	domain = &dmar_domain->domain;
4852 	domain->geometry.aperture_start = 0;
4853 	domain->geometry.aperture_end   = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4854 	domain->geometry.force_aperture = true;
4855 
4856 	return domain;
4857 }
4858 
intel_iommu_domain_free(struct iommu_domain * domain)4859 static void intel_iommu_domain_free(struct iommu_domain *domain)
4860 {
4861 	domain_exit(to_dmar_domain(domain));
4862 }
4863 
intel_iommu_attach_device(struct iommu_domain * domain,struct device * dev)4864 static int intel_iommu_attach_device(struct iommu_domain *domain,
4865 				     struct device *dev)
4866 {
4867 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4868 	struct intel_iommu *iommu;
4869 	int addr_width;
4870 	u8 bus, devfn;
4871 
4872 	if (device_is_rmrr_locked(dev)) {
4873 		dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement.  Contact your platform vendor.\n");
4874 		return -EPERM;
4875 	}
4876 
4877 	/* normally dev is not mapped */
4878 	if (unlikely(domain_context_mapped(dev))) {
4879 		struct dmar_domain *old_domain;
4880 
4881 		old_domain = find_domain(dev);
4882 		if (old_domain) {
4883 			rcu_read_lock();
4884 			dmar_remove_one_dev_info(old_domain, dev);
4885 			rcu_read_unlock();
4886 
4887 			if (!domain_type_is_vm_or_si(old_domain) &&
4888 			     list_empty(&old_domain->devices))
4889 				domain_exit(old_domain);
4890 		}
4891 	}
4892 
4893 	iommu = device_to_iommu(dev, &bus, &devfn);
4894 	if (!iommu)
4895 		return -ENODEV;
4896 
4897 	/* check if this iommu agaw is sufficient for max mapped address */
4898 	addr_width = agaw_to_width(iommu->agaw);
4899 	if (addr_width > cap_mgaw(iommu->cap))
4900 		addr_width = cap_mgaw(iommu->cap);
4901 
4902 	if (dmar_domain->max_addr > (1LL << addr_width)) {
4903 		pr_err("%s: iommu width (%d) is not "
4904 		       "sufficient for the mapped address (%llx)\n",
4905 		       __func__, addr_width, dmar_domain->max_addr);
4906 		return -EFAULT;
4907 	}
4908 	dmar_domain->gaw = addr_width;
4909 
4910 	/*
4911 	 * Knock out extra levels of page tables if necessary
4912 	 */
4913 	while (iommu->agaw < dmar_domain->agaw) {
4914 		struct dma_pte *pte;
4915 
4916 		pte = dmar_domain->pgd;
4917 		if (dma_pte_present(pte)) {
4918 			dmar_domain->pgd = (struct dma_pte *)
4919 				phys_to_virt(dma_pte_addr(pte));
4920 			free_pgtable_page(pte);
4921 		}
4922 		dmar_domain->agaw--;
4923 	}
4924 
4925 	return domain_add_dev_info(dmar_domain, dev);
4926 }
4927 
intel_iommu_detach_device(struct iommu_domain * domain,struct device * dev)4928 static void intel_iommu_detach_device(struct iommu_domain *domain,
4929 				      struct device *dev)
4930 {
4931 	dmar_remove_one_dev_info(to_dmar_domain(domain), dev);
4932 }
4933 
intel_iommu_map(struct iommu_domain * domain,unsigned long iova,phys_addr_t hpa,size_t size,int iommu_prot)4934 static int intel_iommu_map(struct iommu_domain *domain,
4935 			   unsigned long iova, phys_addr_t hpa,
4936 			   size_t size, int iommu_prot)
4937 {
4938 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4939 	u64 max_addr;
4940 	int prot = 0;
4941 	int ret;
4942 
4943 	if (iommu_prot & IOMMU_READ)
4944 		prot |= DMA_PTE_READ;
4945 	if (iommu_prot & IOMMU_WRITE)
4946 		prot |= DMA_PTE_WRITE;
4947 	if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4948 		prot |= DMA_PTE_SNP;
4949 
4950 	max_addr = iova + size;
4951 	if (dmar_domain->max_addr < max_addr) {
4952 		u64 end;
4953 
4954 		/* check if minimum agaw is sufficient for mapped address */
4955 		end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
4956 		if (end < max_addr) {
4957 			pr_err("%s: iommu width (%d) is not "
4958 			       "sufficient for the mapped address (%llx)\n",
4959 			       __func__, dmar_domain->gaw, max_addr);
4960 			return -EFAULT;
4961 		}
4962 		dmar_domain->max_addr = max_addr;
4963 	}
4964 	/* Round up size to next multiple of PAGE_SIZE, if it and
4965 	   the low bits of hpa would take us onto the next page */
4966 	size = aligned_nrpages(hpa, size);
4967 	ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4968 				 hpa >> VTD_PAGE_SHIFT, size, prot);
4969 	return ret;
4970 }
4971 
intel_iommu_unmap(struct iommu_domain * domain,unsigned long iova,size_t size)4972 static size_t intel_iommu_unmap(struct iommu_domain *domain,
4973 				unsigned long iova, size_t size)
4974 {
4975 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4976 	struct page *freelist = NULL;
4977 	struct intel_iommu *iommu;
4978 	unsigned long start_pfn, last_pfn;
4979 	unsigned int npages;
4980 	int iommu_id, level = 0;
4981 
4982 	/* Cope with horrid API which requires us to unmap more than the
4983 	   size argument if it happens to be a large-page mapping. */
4984 	BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
4985 
4986 	if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
4987 		size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4988 
4989 	start_pfn = iova >> VTD_PAGE_SHIFT;
4990 	last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
4991 
4992 	freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
4993 
4994 	npages = last_pfn - start_pfn + 1;
4995 
4996 	for_each_domain_iommu(iommu_id, dmar_domain) {
4997 		iommu = g_iommus[iommu_id];
4998 
4999 		iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
5000 				      start_pfn, npages, !freelist, 0);
5001 	}
5002 
5003 	dma_free_pagelist(freelist);
5004 
5005 	if (dmar_domain->max_addr == iova + size)
5006 		dmar_domain->max_addr = iova;
5007 
5008 	return size;
5009 }
5010 
intel_iommu_iova_to_phys(struct iommu_domain * domain,dma_addr_t iova)5011 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
5012 					    dma_addr_t iova)
5013 {
5014 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5015 	struct dma_pte *pte;
5016 	int level = 0;
5017 	u64 phys = 0;
5018 
5019 	pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
5020 	if (pte && dma_pte_present(pte))
5021 		phys = dma_pte_addr(pte) +
5022 			(iova & (BIT_MASK(level_to_offset_bits(level) +
5023 						VTD_PAGE_SHIFT) - 1));
5024 
5025 	return phys;
5026 }
5027 
intel_iommu_capable(enum iommu_cap cap)5028 static bool intel_iommu_capable(enum iommu_cap cap)
5029 {
5030 	if (cap == IOMMU_CAP_CACHE_COHERENCY)
5031 		return domain_update_iommu_snooping(NULL) == 1;
5032 	if (cap == IOMMU_CAP_INTR_REMAP)
5033 		return irq_remapping_enabled == 1;
5034 
5035 	return false;
5036 }
5037 
intel_iommu_add_device(struct device * dev)5038 static int intel_iommu_add_device(struct device *dev)
5039 {
5040 	struct intel_iommu *iommu;
5041 	struct iommu_group *group;
5042 	u8 bus, devfn;
5043 
5044 	iommu = device_to_iommu(dev, &bus, &devfn);
5045 	if (!iommu)
5046 		return -ENODEV;
5047 
5048 	iommu_device_link(iommu->iommu_dev, dev);
5049 
5050 	group = iommu_group_get_for_dev(dev);
5051 
5052 	if (IS_ERR(group))
5053 		return PTR_ERR(group);
5054 
5055 	iommu_group_put(group);
5056 	return 0;
5057 }
5058 
intel_iommu_remove_device(struct device * dev)5059 static void intel_iommu_remove_device(struct device *dev)
5060 {
5061 	struct intel_iommu *iommu;
5062 	u8 bus, devfn;
5063 
5064 	iommu = device_to_iommu(dev, &bus, &devfn);
5065 	if (!iommu)
5066 		return;
5067 
5068 	iommu_group_remove_device(dev);
5069 
5070 	iommu_device_unlink(iommu->iommu_dev, dev);
5071 }
5072 
5073 #ifdef CONFIG_INTEL_IOMMU_SVM
5074 #define MAX_NR_PASID_BITS (20)
intel_iommu_get_pts(struct intel_iommu * iommu)5075 static inline unsigned long intel_iommu_get_pts(struct intel_iommu *iommu)
5076 {
5077 	/*
5078 	 * Convert ecap_pss to extend context entry pts encoding, also
5079 	 * respect the soft pasid_max value set by the iommu.
5080 	 * - number of PASID bits = ecap_pss + 1
5081 	 * - number of PASID table entries = 2^(pts + 5)
5082 	 * Therefore, pts = ecap_pss - 4
5083 	 * e.g. KBL ecap_pss = 0x13, PASID has 20 bits, pts = 15
5084 	 */
5085 	if (ecap_pss(iommu->ecap) < 5)
5086 		return 0;
5087 
5088 	/* pasid_max is encoded as actual number of entries not the bits */
5089 	return find_first_bit((unsigned long *)&iommu->pasid_max,
5090 			MAX_NR_PASID_BITS) - 5;
5091 }
5092 
intel_iommu_enable_pasid(struct intel_iommu * iommu,struct intel_svm_dev * sdev)5093 int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev)
5094 {
5095 	struct device_domain_info *info;
5096 	struct context_entry *context;
5097 	struct dmar_domain *domain;
5098 	unsigned long flags;
5099 	u64 ctx_lo;
5100 	int ret;
5101 
5102 	domain = get_valid_domain_for_dev(sdev->dev);
5103 	if (!domain)
5104 		return -EINVAL;
5105 
5106 	spin_lock_irqsave(&device_domain_lock, flags);
5107 	spin_lock(&iommu->lock);
5108 
5109 	ret = -EINVAL;
5110 	info = sdev->dev->archdata.iommu;
5111 	if (!info || !info->pasid_supported)
5112 		goto out;
5113 
5114 	context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
5115 	if (WARN_ON(!context))
5116 		goto out;
5117 
5118 	ctx_lo = context[0].lo;
5119 
5120 	sdev->did = domain->iommu_did[iommu->seq_id];
5121 	sdev->sid = PCI_DEVID(info->bus, info->devfn);
5122 
5123 	if (!(ctx_lo & CONTEXT_PASIDE)) {
5124 		context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table);
5125 		context[1].lo = (u64)virt_to_phys(iommu->pasid_table) |
5126 			intel_iommu_get_pts(iommu);
5127 
5128 		wmb();
5129 		/* CONTEXT_TT_MULTI_LEVEL and CONTEXT_TT_DEV_IOTLB are both
5130 		 * extended to permit requests-with-PASID if the PASIDE bit
5131 		 * is set. which makes sense. For CONTEXT_TT_PASS_THROUGH,
5132 		 * however, the PASIDE bit is ignored and requests-with-PASID
5133 		 * are unconditionally blocked. Which makes less sense.
5134 		 * So convert from CONTEXT_TT_PASS_THROUGH to one of the new
5135 		 * "guest mode" translation types depending on whether ATS
5136 		 * is available or not. Annoyingly, we can't use the new
5137 		 * modes *unless* PASIDE is set. */
5138 		if ((ctx_lo & CONTEXT_TT_MASK) == (CONTEXT_TT_PASS_THROUGH << 2)) {
5139 			ctx_lo &= ~CONTEXT_TT_MASK;
5140 			if (info->ats_supported)
5141 				ctx_lo |= CONTEXT_TT_PT_PASID_DEV_IOTLB << 2;
5142 			else
5143 				ctx_lo |= CONTEXT_TT_PT_PASID << 2;
5144 		}
5145 		ctx_lo |= CONTEXT_PASIDE;
5146 		if (iommu->pasid_state_table)
5147 			ctx_lo |= CONTEXT_DINVE;
5148 		if (info->pri_supported)
5149 			ctx_lo |= CONTEXT_PRS;
5150 		context[0].lo = ctx_lo;
5151 		wmb();
5152 		iommu->flush.flush_context(iommu, sdev->did, sdev->sid,
5153 					   DMA_CCMD_MASK_NOBIT,
5154 					   DMA_CCMD_DEVICE_INVL);
5155 	}
5156 
5157 	/* Enable PASID support in the device, if it wasn't already */
5158 	if (!info->pasid_enabled)
5159 		iommu_enable_dev_iotlb(info);
5160 
5161 	if (info->ats_enabled) {
5162 		sdev->dev_iotlb = 1;
5163 		sdev->qdep = info->ats_qdep;
5164 		if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
5165 			sdev->qdep = 0;
5166 	}
5167 	ret = 0;
5168 
5169  out:
5170 	spin_unlock(&iommu->lock);
5171 	spin_unlock_irqrestore(&device_domain_lock, flags);
5172 
5173 	return ret;
5174 }
5175 
intel_svm_device_to_iommu(struct device * dev)5176 struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
5177 {
5178 	struct intel_iommu *iommu;
5179 	u8 bus, devfn;
5180 
5181 	if (iommu_dummy(dev)) {
5182 		dev_warn(dev,
5183 			 "No IOMMU translation for device; cannot enable SVM\n");
5184 		return NULL;
5185 	}
5186 
5187 	iommu = device_to_iommu(dev, &bus, &devfn);
5188 	if ((!iommu)) {
5189 		dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
5190 		return NULL;
5191 	}
5192 
5193 	if (!iommu->pasid_table) {
5194 		dev_err(dev, "PASID not enabled on IOMMU; cannot enable SVM\n");
5195 		return NULL;
5196 	}
5197 
5198 	return iommu;
5199 }
5200 #endif /* CONFIG_INTEL_IOMMU_SVM */
5201 
5202 static const struct iommu_ops intel_iommu_ops = {
5203 	.capable	= intel_iommu_capable,
5204 	.domain_alloc	= intel_iommu_domain_alloc,
5205 	.domain_free	= intel_iommu_domain_free,
5206 	.attach_dev	= intel_iommu_attach_device,
5207 	.detach_dev	= intel_iommu_detach_device,
5208 	.map		= intel_iommu_map,
5209 	.unmap		= intel_iommu_unmap,
5210 	.map_sg		= default_iommu_map_sg,
5211 	.iova_to_phys	= intel_iommu_iova_to_phys,
5212 	.add_device	= intel_iommu_add_device,
5213 	.remove_device	= intel_iommu_remove_device,
5214 	.device_group   = pci_device_group,
5215 	.pgsize_bitmap	= INTEL_IOMMU_PGSIZES,
5216 };
5217 
quirk_iommu_g4x_gfx(struct pci_dev * dev)5218 static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
5219 {
5220 	/* G4x/GM45 integrated gfx dmar support is totally busted. */
5221 	pr_info("Disabling IOMMU for graphics on this chipset\n");
5222 	dmar_map_gfx = 0;
5223 }
5224 
5225 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
5226 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
5227 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
5228 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
5229 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
5230 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
5231 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
5232 
quirk_iommu_rwbf(struct pci_dev * dev)5233 static void quirk_iommu_rwbf(struct pci_dev *dev)
5234 {
5235 	/*
5236 	 * Mobile 4 Series Chipset neglects to set RWBF capability,
5237 	 * but needs it. Same seems to hold for the desktop versions.
5238 	 */
5239 	pr_info("Forcing write-buffer flush capability\n");
5240 	rwbf_quirk = 1;
5241 }
5242 
5243 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
5244 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
5245 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
5246 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
5247 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
5248 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
5249 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
5250 
5251 #define GGC 0x52
5252 #define GGC_MEMORY_SIZE_MASK	(0xf << 8)
5253 #define GGC_MEMORY_SIZE_NONE	(0x0 << 8)
5254 #define GGC_MEMORY_SIZE_1M	(0x1 << 8)
5255 #define GGC_MEMORY_SIZE_2M	(0x3 << 8)
5256 #define GGC_MEMORY_VT_ENABLED	(0x8 << 8)
5257 #define GGC_MEMORY_SIZE_2M_VT	(0x9 << 8)
5258 #define GGC_MEMORY_SIZE_3M_VT	(0xa << 8)
5259 #define GGC_MEMORY_SIZE_4M_VT	(0xb << 8)
5260 
quirk_calpella_no_shadow_gtt(struct pci_dev * dev)5261 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
5262 {
5263 	unsigned short ggc;
5264 
5265 	if (pci_read_config_word(dev, GGC, &ggc))
5266 		return;
5267 
5268 	if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
5269 		pr_info("BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
5270 		dmar_map_gfx = 0;
5271 	} else if (dmar_map_gfx) {
5272 		/* we have to ensure the gfx device is idle before we flush */
5273 		pr_info("Disabling batched IOTLB flush on Ironlake\n");
5274 		intel_iommu_strict = 1;
5275        }
5276 }
5277 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
5278 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
5279 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
5280 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
5281 
5282 /* On Tylersburg chipsets, some BIOSes have been known to enable the
5283    ISOCH DMAR unit for the Azalia sound device, but not give it any
5284    TLB entries, which causes it to deadlock. Check for that.  We do
5285    this in a function called from init_dmars(), instead of in a PCI
5286    quirk, because we don't want to print the obnoxious "BIOS broken"
5287    message if VT-d is actually disabled.
5288 */
check_tylersburg_isoch(void)5289 static void __init check_tylersburg_isoch(void)
5290 {
5291 	struct pci_dev *pdev;
5292 	uint32_t vtisochctrl;
5293 
5294 	/* If there's no Azalia in the system anyway, forget it. */
5295 	pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
5296 	if (!pdev)
5297 		return;
5298 	pci_dev_put(pdev);
5299 
5300 	/* System Management Registers. Might be hidden, in which case
5301 	   we can't do the sanity check. But that's OK, because the
5302 	   known-broken BIOSes _don't_ actually hide it, so far. */
5303 	pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
5304 	if (!pdev)
5305 		return;
5306 
5307 	if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
5308 		pci_dev_put(pdev);
5309 		return;
5310 	}
5311 
5312 	pci_dev_put(pdev);
5313 
5314 	/* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
5315 	if (vtisochctrl & 1)
5316 		return;
5317 
5318 	/* Drop all bits other than the number of TLB entries */
5319 	vtisochctrl &= 0x1c;
5320 
5321 	/* If we have the recommended number of TLB entries (16), fine. */
5322 	if (vtisochctrl == 0x10)
5323 		return;
5324 
5325 	/* Zero TLB entries? You get to ride the short bus to school. */
5326 	if (!vtisochctrl) {
5327 		WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
5328 		     "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
5329 		     dmi_get_system_info(DMI_BIOS_VENDOR),
5330 		     dmi_get_system_info(DMI_BIOS_VERSION),
5331 		     dmi_get_system_info(DMI_PRODUCT_VERSION));
5332 		iommu_identity_mapping |= IDENTMAP_AZALIA;
5333 		return;
5334 	}
5335 
5336 	pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
5337 	       vtisochctrl);
5338 }
5339