• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2006-2014 Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * Authors: David Woodhouse <dwmw2@infradead.org>,
14  *          Ashok Raj <ashok.raj@intel.com>,
15  *          Shaohua Li <shaohua.li@intel.com>,
16  *          Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17  *          Fenghua Yu <fenghua.yu@intel.com>
18  *          Joerg Roedel <jroedel@suse.de>
19  */
20 
21 #define pr_fmt(fmt)     "DMAR: " fmt
22 
23 #include <linux/init.h>
24 #include <linux/bitmap.h>
25 #include <linux/debugfs.h>
26 #include <linux/export.h>
27 #include <linux/slab.h>
28 #include <linux/irq.h>
29 #include <linux/interrupt.h>
30 #include <linux/spinlock.h>
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/dma-mapping.h>
34 #include <linux/mempool.h>
35 #include <linux/memory.h>
36 #include <linux/cpu.h>
37 #include <linux/timer.h>
38 #include <linux/io.h>
39 #include <linux/iova.h>
40 #include <linux/iommu.h>
41 #include <linux/intel-iommu.h>
42 #include <linux/syscore_ops.h>
43 #include <linux/tboot.h>
44 #include <linux/dmi.h>
45 #include <linux/pci-ats.h>
46 #include <linux/memblock.h>
47 #include <linux/dma-contiguous.h>
48 #include <linux/crash_dump.h>
49 #include <asm/irq_remapping.h>
50 #include <asm/cacheflush.h>
51 #include <asm/iommu.h>
52 
53 #include "irq_remapping.h"
54 
55 #define ROOT_SIZE		VTD_PAGE_SIZE
56 #define CONTEXT_SIZE		VTD_PAGE_SIZE
57 
58 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
59 #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
60 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
61 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
62 
63 #define IOAPIC_RANGE_START	(0xfee00000)
64 #define IOAPIC_RANGE_END	(0xfeefffff)
65 #define IOVA_START_ADDR		(0x1000)
66 
67 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
68 
69 #define MAX_AGAW_WIDTH 64
70 #define MAX_AGAW_PFN_WIDTH	(MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
71 
72 #define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
73 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
74 
75 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
76    to match. That way, we can use 'unsigned long' for PFNs with impunity. */
77 #define DOMAIN_MAX_PFN(gaw)	((unsigned long) min_t(uint64_t, \
78 				__DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
79 #define DOMAIN_MAX_ADDR(gaw)	(((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
80 
81 /* IO virtual address start page frame number */
82 #define IOVA_START_PFN		(1)
83 
84 #define IOVA_PFN(addr)		((addr) >> PAGE_SHIFT)
85 #define DMA_32BIT_PFN		IOVA_PFN(DMA_BIT_MASK(32))
86 #define DMA_64BIT_PFN		IOVA_PFN(DMA_BIT_MASK(64))
87 
88 /* page table handling */
89 #define LEVEL_STRIDE		(9)
90 #define LEVEL_MASK		(((u64)1 << LEVEL_STRIDE) - 1)
91 
92 /*
93  * This bitmap is used to advertise the page sizes our hardware support
94  * to the IOMMU core, which will then use this information to split
95  * physically contiguous memory regions it is mapping into page sizes
96  * that we support.
97  *
98  * Traditionally the IOMMU core just handed us the mappings directly,
99  * after making sure the size is an order of a 4KiB page and that the
100  * mapping has natural alignment.
101  *
102  * To retain this behavior, we currently advertise that we support
103  * all page sizes that are an order of 4KiB.
104  *
105  * If at some point we'd like to utilize the IOMMU core's new behavior,
106  * we could change this to advertise the real page sizes we support.
107  */
108 #define INTEL_IOMMU_PGSIZES	(~0xFFFUL)
109 
agaw_to_level(int agaw)110 static inline int agaw_to_level(int agaw)
111 {
112 	return agaw + 2;
113 }
114 
agaw_to_width(int agaw)115 static inline int agaw_to_width(int agaw)
116 {
117 	return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
118 }
119 
width_to_agaw(int width)120 static inline int width_to_agaw(int width)
121 {
122 	return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
123 }
124 
level_to_offset_bits(int level)125 static inline unsigned int level_to_offset_bits(int level)
126 {
127 	return (level - 1) * LEVEL_STRIDE;
128 }
129 
pfn_level_offset(unsigned long pfn,int level)130 static inline int pfn_level_offset(unsigned long pfn, int level)
131 {
132 	return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
133 }
134 
level_mask(int level)135 static inline unsigned long level_mask(int level)
136 {
137 	return -1UL << level_to_offset_bits(level);
138 }
139 
level_size(int level)140 static inline unsigned long level_size(int level)
141 {
142 	return 1UL << level_to_offset_bits(level);
143 }
144 
align_to_level(unsigned long pfn,int level)145 static inline unsigned long align_to_level(unsigned long pfn, int level)
146 {
147 	return (pfn + level_size(level) - 1) & level_mask(level);
148 }
149 
lvl_to_nr_pages(unsigned int lvl)150 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
151 {
152 	return  1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
153 }
154 
155 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
156    are never going to work. */
dma_to_mm_pfn(unsigned long dma_pfn)157 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
158 {
159 	return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
160 }
161 
mm_to_dma_pfn(unsigned long mm_pfn)162 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
163 {
164 	return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
165 }
page_to_dma_pfn(struct page * pg)166 static inline unsigned long page_to_dma_pfn(struct page *pg)
167 {
168 	return mm_to_dma_pfn(page_to_pfn(pg));
169 }
virt_to_dma_pfn(void * p)170 static inline unsigned long virt_to_dma_pfn(void *p)
171 {
172 	return page_to_dma_pfn(virt_to_page(p));
173 }
174 
175 /* global iommu list, set NULL for ignored DMAR units */
176 static struct intel_iommu **g_iommus;
177 
178 static void __init check_tylersburg_isoch(void);
179 static int rwbf_quirk;
180 
181 /*
182  * set to 1 to panic kernel if can't successfully enable VT-d
183  * (used when kernel is launched w/ TXT)
184  */
185 static int force_on = 0;
186 
187 /*
188  * 0: Present
189  * 1-11: Reserved
190  * 12-63: Context Ptr (12 - (haw-1))
191  * 64-127: Reserved
192  */
193 struct root_entry {
194 	u64	lo;
195 	u64	hi;
196 };
197 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
198 
199 /*
200  * Take a root_entry and return the Lower Context Table Pointer (LCTP)
201  * if marked present.
202  */
root_entry_lctp(struct root_entry * re)203 static phys_addr_t root_entry_lctp(struct root_entry *re)
204 {
205 	if (!(re->lo & 1))
206 		return 0;
207 
208 	return re->lo & VTD_PAGE_MASK;
209 }
210 
211 /*
212  * Take a root_entry and return the Upper Context Table Pointer (UCTP)
213  * if marked present.
214  */
root_entry_uctp(struct root_entry * re)215 static phys_addr_t root_entry_uctp(struct root_entry *re)
216 {
217 	if (!(re->hi & 1))
218 		return 0;
219 
220 	return re->hi & VTD_PAGE_MASK;
221 }
222 /*
223  * low 64 bits:
224  * 0: present
225  * 1: fault processing disable
226  * 2-3: translation type
227  * 12-63: address space root
228  * high 64 bits:
229  * 0-2: address width
230  * 3-6: aval
231  * 8-23: domain id
232  */
233 struct context_entry {
234 	u64 lo;
235 	u64 hi;
236 };
237 
context_clear_pasid_enable(struct context_entry * context)238 static inline void context_clear_pasid_enable(struct context_entry *context)
239 {
240 	context->lo &= ~(1ULL << 11);
241 }
242 
context_pasid_enabled(struct context_entry * context)243 static inline bool context_pasid_enabled(struct context_entry *context)
244 {
245 	return !!(context->lo & (1ULL << 11));
246 }
247 
context_set_copied(struct context_entry * context)248 static inline void context_set_copied(struct context_entry *context)
249 {
250 	context->hi |= (1ull << 3);
251 }
252 
context_copied(struct context_entry * context)253 static inline bool context_copied(struct context_entry *context)
254 {
255 	return !!(context->hi & (1ULL << 3));
256 }
257 
__context_present(struct context_entry * context)258 static inline bool __context_present(struct context_entry *context)
259 {
260 	return (context->lo & 1);
261 }
262 
context_present(struct context_entry * context)263 static inline bool context_present(struct context_entry *context)
264 {
265 	return context_pasid_enabled(context) ?
266 	     __context_present(context) :
267 	     __context_present(context) && !context_copied(context);
268 }
269 
context_set_present(struct context_entry * context)270 static inline void context_set_present(struct context_entry *context)
271 {
272 	context->lo |= 1;
273 }
274 
context_set_fault_enable(struct context_entry * context)275 static inline void context_set_fault_enable(struct context_entry *context)
276 {
277 	context->lo &= (((u64)-1) << 2) | 1;
278 }
279 
context_set_translation_type(struct context_entry * context,unsigned long value)280 static inline void context_set_translation_type(struct context_entry *context,
281 						unsigned long value)
282 {
283 	context->lo &= (((u64)-1) << 4) | 3;
284 	context->lo |= (value & 3) << 2;
285 }
286 
context_set_address_root(struct context_entry * context,unsigned long value)287 static inline void context_set_address_root(struct context_entry *context,
288 					    unsigned long value)
289 {
290 	context->lo &= ~VTD_PAGE_MASK;
291 	context->lo |= value & VTD_PAGE_MASK;
292 }
293 
context_set_address_width(struct context_entry * context,unsigned long value)294 static inline void context_set_address_width(struct context_entry *context,
295 					     unsigned long value)
296 {
297 	context->hi |= value & 7;
298 }
299 
context_set_domain_id(struct context_entry * context,unsigned long value)300 static inline void context_set_domain_id(struct context_entry *context,
301 					 unsigned long value)
302 {
303 	context->hi |= (value & ((1 << 16) - 1)) << 8;
304 }
305 
context_domain_id(struct context_entry * c)306 static inline int context_domain_id(struct context_entry *c)
307 {
308 	return((c->hi >> 8) & 0xffff);
309 }
310 
context_clear_entry(struct context_entry * context)311 static inline void context_clear_entry(struct context_entry *context)
312 {
313 	context->lo = 0;
314 	context->hi = 0;
315 }
316 
317 /*
318  * 0: readable
319  * 1: writable
320  * 2-6: reserved
321  * 7: super page
322  * 8-10: available
323  * 11: snoop behavior
324  * 12-63: Host physcial address
325  */
326 struct dma_pte {
327 	u64 val;
328 };
329 
dma_clear_pte(struct dma_pte * pte)330 static inline void dma_clear_pte(struct dma_pte *pte)
331 {
332 	pte->val = 0;
333 }
334 
dma_pte_addr(struct dma_pte * pte)335 static inline u64 dma_pte_addr(struct dma_pte *pte)
336 {
337 #ifdef CONFIG_64BIT
338 	return pte->val & VTD_PAGE_MASK;
339 #else
340 	/* Must have a full atomic 64-bit read */
341 	return  __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
342 #endif
343 }
344 
dma_pte_present(struct dma_pte * pte)345 static inline bool dma_pte_present(struct dma_pte *pte)
346 {
347 	return (pte->val & 3) != 0;
348 }
349 
dma_pte_superpage(struct dma_pte * pte)350 static inline bool dma_pte_superpage(struct dma_pte *pte)
351 {
352 	return (pte->val & DMA_PTE_LARGE_PAGE);
353 }
354 
first_pte_in_page(struct dma_pte * pte)355 static inline int first_pte_in_page(struct dma_pte *pte)
356 {
357 	return !((unsigned long)pte & ~VTD_PAGE_MASK);
358 }
359 
360 /*
361  * This domain is a statically identity mapping domain.
362  *	1. This domain creats a static 1:1 mapping to all usable memory.
363  * 	2. It maps to each iommu if successful.
364  *	3. Each iommu mapps to this domain if successful.
365  */
366 static struct dmar_domain *si_domain;
367 static int hw_pass_through = 1;
368 
369 /*
370  * Domain represents a virtual machine, more than one devices
371  * across iommus may be owned in one domain, e.g. kvm guest.
372  */
373 #define DOMAIN_FLAG_VIRTUAL_MACHINE	(1 << 0)
374 
375 /* si_domain contains mulitple devices */
376 #define DOMAIN_FLAG_STATIC_IDENTITY	(1 << 1)
377 
378 #define for_each_domain_iommu(idx, domain)			\
379 	for (idx = 0; idx < g_num_of_iommus; idx++)		\
380 		if (domain->iommu_refcnt[idx])
381 
382 struct dmar_domain {
383 	int	nid;			/* node id */
384 
385 	unsigned	iommu_refcnt[DMAR_UNITS_SUPPORTED];
386 					/* Refcount of devices per iommu */
387 
388 
389 	u16		iommu_did[DMAR_UNITS_SUPPORTED];
390 					/* Domain ids per IOMMU. Use u16 since
391 					 * domain ids are 16 bit wide according
392 					 * to VT-d spec, section 9.3 */
393 
394 	bool has_iotlb_device;
395 	struct list_head devices;	/* all devices' list */
396 	struct iova_domain iovad;	/* iova's that belong to this domain */
397 
398 	struct dma_pte	*pgd;		/* virtual address */
399 	int		gaw;		/* max guest address width */
400 
401 	/* adjusted guest address width, 0 is level 2 30-bit */
402 	int		agaw;
403 
404 	int		flags;		/* flags to find out type of domain */
405 
406 	int		iommu_coherency;/* indicate coherency of iommu access */
407 	int		iommu_snooping; /* indicate snooping control feature*/
408 	int		iommu_count;	/* reference count of iommu */
409 	int		iommu_superpage;/* Level of superpages supported:
410 					   0 == 4KiB (no superpages), 1 == 2MiB,
411 					   2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
412 	u64		max_addr;	/* maximum mapped address */
413 
414 	struct iommu_domain domain;	/* generic domain data structure for
415 					   iommu core */
416 };
417 
418 /* PCI domain-device relationship */
419 struct device_domain_info {
420 	struct list_head link;	/* link to domain siblings */
421 	struct list_head global; /* link to global list */
422 	u8 bus;			/* PCI bus number */
423 	u8 devfn;		/* PCI devfn number */
424 	u8 pasid_supported:3;
425 	u8 pasid_enabled:1;
426 	u8 pri_supported:1;
427 	u8 pri_enabled:1;
428 	u8 ats_supported:1;
429 	u8 ats_enabled:1;
430 	u8 ats_qdep;
431 	struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
432 	struct intel_iommu *iommu; /* IOMMU used by this device */
433 	struct dmar_domain *domain; /* pointer to domain */
434 };
435 
436 struct dmar_rmrr_unit {
437 	struct list_head list;		/* list of rmrr units	*/
438 	struct acpi_dmar_header *hdr;	/* ACPI header		*/
439 	u64	base_address;		/* reserved base address*/
440 	u64	end_address;		/* reserved end address */
441 	struct dmar_dev_scope *devices;	/* target devices */
442 	int	devices_cnt;		/* target device count */
443 };
444 
445 struct dmar_atsr_unit {
446 	struct list_head list;		/* list of ATSR units */
447 	struct acpi_dmar_header *hdr;	/* ACPI header */
448 	struct dmar_dev_scope *devices;	/* target devices */
449 	int devices_cnt;		/* target device count */
450 	u8 include_all:1;		/* include all ports */
451 };
452 
453 static LIST_HEAD(dmar_atsr_units);
454 static LIST_HEAD(dmar_rmrr_units);
455 
456 #define for_each_rmrr_units(rmrr) \
457 	list_for_each_entry(rmrr, &dmar_rmrr_units, list)
458 
459 static void flush_unmaps_timeout(unsigned long data);
460 
461 struct deferred_flush_entry {
462 	unsigned long iova_pfn;
463 	unsigned long nrpages;
464 	struct dmar_domain *domain;
465 	struct page *freelist;
466 };
467 
468 #define HIGH_WATER_MARK 250
469 struct deferred_flush_table {
470 	int next;
471 	struct deferred_flush_entry entries[HIGH_WATER_MARK];
472 };
473 
474 struct deferred_flush_data {
475 	spinlock_t lock;
476 	int timer_on;
477 	struct timer_list timer;
478 	long size;
479 	struct deferred_flush_table *tables;
480 };
481 
482 DEFINE_PER_CPU(struct deferred_flush_data, deferred_flush);
483 
484 /* bitmap for indexing intel_iommus */
485 static int g_num_of_iommus;
486 
487 static void domain_exit(struct dmar_domain *domain);
488 static void domain_remove_dev_info(struct dmar_domain *domain);
489 static void dmar_remove_one_dev_info(struct dmar_domain *domain,
490 				     struct device *dev);
491 static void __dmar_remove_one_dev_info(struct device_domain_info *info);
492 static void domain_context_clear(struct intel_iommu *iommu,
493 				 struct device *dev);
494 static int domain_detach_iommu(struct dmar_domain *domain,
495 			       struct intel_iommu *iommu);
496 
497 #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
498 int dmar_disabled = 0;
499 #else
500 int dmar_disabled = 1;
501 #endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
502 
503 int intel_iommu_enabled = 0;
504 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
505 
506 static int dmar_map_gfx = 1;
507 static int dmar_forcedac;
508 static int intel_iommu_strict;
509 static int intel_iommu_superpage = 1;
510 static int intel_iommu_ecs = 1;
511 static int intel_iommu_pasid28;
512 static int iommu_identity_mapping;
513 
514 #define IDENTMAP_ALL		1
515 #define IDENTMAP_GFX		2
516 #define IDENTMAP_AZALIA		4
517 
518 /* Broadwell and Skylake have broken ECS support — normal so-called "second
519  * level" translation of DMA requests-without-PASID doesn't actually happen
520  * unless you also set the NESTE bit in an extended context-entry. Which of
521  * course means that SVM doesn't work because it's trying to do nested
522  * translation of the physical addresses it finds in the process page tables,
523  * through the IOVA->phys mapping found in the "second level" page tables.
524  *
525  * The VT-d specification was retroactively changed to change the definition
526  * of the capability bits and pretend that Broadwell/Skylake never happened...
527  * but unfortunately the wrong bit was changed. It's ECS which is broken, but
528  * for some reason it was the PASID capability bit which was redefined (from
529  * bit 28 on BDW/SKL to bit 40 in future).
530  *
531  * So our test for ECS needs to eschew those implementations which set the old
532  * PASID capabiity bit 28, since those are the ones on which ECS is broken.
533  * Unless we are working around the 'pasid28' limitations, that is, by putting
534  * the device into passthrough mode for normal DMA and thus masking the bug.
535  */
536 #define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \
537 			    (intel_iommu_pasid28 || !ecap_broken_pasid(iommu->ecap)))
538 /* PASID support is thus enabled if ECS is enabled and *either* of the old
539  * or new capability bits are set. */
540 #define pasid_enabled(iommu) (ecs_enabled(iommu) &&			\
541 			      (ecap_pasid(iommu->ecap) || ecap_broken_pasid(iommu->ecap)))
542 
543 int intel_iommu_gfx_mapped;
544 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
545 
546 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
547 static DEFINE_SPINLOCK(device_domain_lock);
548 static LIST_HEAD(device_domain_list);
549 
550 static const struct iommu_ops intel_iommu_ops;
551 
translation_pre_enabled(struct intel_iommu * iommu)552 static bool translation_pre_enabled(struct intel_iommu *iommu)
553 {
554 	return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
555 }
556 
clear_translation_pre_enabled(struct intel_iommu * iommu)557 static void clear_translation_pre_enabled(struct intel_iommu *iommu)
558 {
559 	iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
560 }
561 
init_translation_status(struct intel_iommu * iommu)562 static void init_translation_status(struct intel_iommu *iommu)
563 {
564 	u32 gsts;
565 
566 	gsts = readl(iommu->reg + DMAR_GSTS_REG);
567 	if (gsts & DMA_GSTS_TES)
568 		iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
569 }
570 
571 /* Convert generic 'struct iommu_domain to private struct dmar_domain */
to_dmar_domain(struct iommu_domain * dom)572 static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
573 {
574 	return container_of(dom, struct dmar_domain, domain);
575 }
576 
intel_iommu_setup(char * str)577 static int __init intel_iommu_setup(char *str)
578 {
579 	if (!str)
580 		return -EINVAL;
581 	while (*str) {
582 		if (!strncmp(str, "on", 2)) {
583 			dmar_disabled = 0;
584 			pr_info("IOMMU enabled\n");
585 		} else if (!strncmp(str, "off", 3)) {
586 			dmar_disabled = 1;
587 			pr_info("IOMMU disabled\n");
588 		} else if (!strncmp(str, "igfx_off", 8)) {
589 			dmar_map_gfx = 0;
590 			pr_info("Disable GFX device mapping\n");
591 		} else if (!strncmp(str, "forcedac", 8)) {
592 			pr_info("Forcing DAC for PCI devices\n");
593 			dmar_forcedac = 1;
594 		} else if (!strncmp(str, "strict", 6)) {
595 			pr_info("Disable batched IOTLB flush\n");
596 			intel_iommu_strict = 1;
597 		} else if (!strncmp(str, "sp_off", 6)) {
598 			pr_info("Disable supported super page\n");
599 			intel_iommu_superpage = 0;
600 		} else if (!strncmp(str, "ecs_off", 7)) {
601 			printk(KERN_INFO
602 				"Intel-IOMMU: disable extended context table support\n");
603 			intel_iommu_ecs = 0;
604 		} else if (!strncmp(str, "pasid28", 7)) {
605 			printk(KERN_INFO
606 				"Intel-IOMMU: enable pre-production PASID support\n");
607 			intel_iommu_pasid28 = 1;
608 			iommu_identity_mapping |= IDENTMAP_GFX;
609 		}
610 
611 		str += strcspn(str, ",");
612 		while (*str == ',')
613 			str++;
614 	}
615 	return 0;
616 }
617 __setup("intel_iommu=", intel_iommu_setup);
618 
619 static struct kmem_cache *iommu_domain_cache;
620 static struct kmem_cache *iommu_devinfo_cache;
621 
get_iommu_domain(struct intel_iommu * iommu,u16 did)622 static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
623 {
624 	struct dmar_domain **domains;
625 	int idx = did >> 8;
626 
627 	domains = iommu->domains[idx];
628 	if (!domains)
629 		return NULL;
630 
631 	return domains[did & 0xff];
632 }
633 
set_iommu_domain(struct intel_iommu * iommu,u16 did,struct dmar_domain * domain)634 static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
635 			     struct dmar_domain *domain)
636 {
637 	struct dmar_domain **domains;
638 	int idx = did >> 8;
639 
640 	if (!iommu->domains[idx]) {
641 		size_t size = 256 * sizeof(struct dmar_domain *);
642 		iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
643 	}
644 
645 	domains = iommu->domains[idx];
646 	if (WARN_ON(!domains))
647 		return;
648 	else
649 		domains[did & 0xff] = domain;
650 }
651 
alloc_pgtable_page(int node)652 static inline void *alloc_pgtable_page(int node)
653 {
654 	struct page *page;
655 	void *vaddr = NULL;
656 
657 	page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
658 	if (page)
659 		vaddr = page_address(page);
660 	return vaddr;
661 }
662 
free_pgtable_page(void * vaddr)663 static inline void free_pgtable_page(void *vaddr)
664 {
665 	free_page((unsigned long)vaddr);
666 }
667 
alloc_domain_mem(void)668 static inline void *alloc_domain_mem(void)
669 {
670 	return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
671 }
672 
free_domain_mem(void * vaddr)673 static void free_domain_mem(void *vaddr)
674 {
675 	kmem_cache_free(iommu_domain_cache, vaddr);
676 }
677 
alloc_devinfo_mem(void)678 static inline void * alloc_devinfo_mem(void)
679 {
680 	return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
681 }
682 
free_devinfo_mem(void * vaddr)683 static inline void free_devinfo_mem(void *vaddr)
684 {
685 	kmem_cache_free(iommu_devinfo_cache, vaddr);
686 }
687 
domain_type_is_vm(struct dmar_domain * domain)688 static inline int domain_type_is_vm(struct dmar_domain *domain)
689 {
690 	return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
691 }
692 
domain_type_is_si(struct dmar_domain * domain)693 static inline int domain_type_is_si(struct dmar_domain *domain)
694 {
695 	return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
696 }
697 
domain_type_is_vm_or_si(struct dmar_domain * domain)698 static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
699 {
700 	return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
701 				DOMAIN_FLAG_STATIC_IDENTITY);
702 }
703 
domain_pfn_supported(struct dmar_domain * domain,unsigned long pfn)704 static inline int domain_pfn_supported(struct dmar_domain *domain,
705 				       unsigned long pfn)
706 {
707 	int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
708 
709 	return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
710 }
711 
__iommu_calculate_agaw(struct intel_iommu * iommu,int max_gaw)712 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
713 {
714 	unsigned long sagaw;
715 	int agaw = -1;
716 
717 	sagaw = cap_sagaw(iommu->cap);
718 	for (agaw = width_to_agaw(max_gaw);
719 	     agaw >= 0; agaw--) {
720 		if (test_bit(agaw, &sagaw))
721 			break;
722 	}
723 
724 	return agaw;
725 }
726 
727 /*
728  * Calculate max SAGAW for each iommu.
729  */
iommu_calculate_max_sagaw(struct intel_iommu * iommu)730 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
731 {
732 	return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
733 }
734 
735 /*
736  * calculate agaw for each iommu.
737  * "SAGAW" may be different across iommus, use a default agaw, and
738  * get a supported less agaw for iommus that don't support the default agaw.
739  */
iommu_calculate_agaw(struct intel_iommu * iommu)740 int iommu_calculate_agaw(struct intel_iommu *iommu)
741 {
742 	return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
743 }
744 
745 /* This functionin only returns single iommu in a domain */
domain_get_iommu(struct dmar_domain * domain)746 static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
747 {
748 	int iommu_id;
749 
750 	/* si_domain and vm domain should not get here. */
751 	BUG_ON(domain_type_is_vm_or_si(domain));
752 	for_each_domain_iommu(iommu_id, domain)
753 		break;
754 
755 	if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
756 		return NULL;
757 
758 	return g_iommus[iommu_id];
759 }
760 
domain_update_iommu_coherency(struct dmar_domain * domain)761 static void domain_update_iommu_coherency(struct dmar_domain *domain)
762 {
763 	struct dmar_drhd_unit *drhd;
764 	struct intel_iommu *iommu;
765 	bool found = false;
766 	int i;
767 
768 	domain->iommu_coherency = 1;
769 
770 	for_each_domain_iommu(i, domain) {
771 		found = true;
772 		if (!ecap_coherent(g_iommus[i]->ecap)) {
773 			domain->iommu_coherency = 0;
774 			break;
775 		}
776 	}
777 	if (found)
778 		return;
779 
780 	/* No hardware attached; use lowest common denominator */
781 	rcu_read_lock();
782 	for_each_active_iommu(iommu, drhd) {
783 		if (!ecap_coherent(iommu->ecap)) {
784 			domain->iommu_coherency = 0;
785 			break;
786 		}
787 	}
788 	rcu_read_unlock();
789 }
790 
domain_update_iommu_snooping(struct intel_iommu * skip)791 static int domain_update_iommu_snooping(struct intel_iommu *skip)
792 {
793 	struct dmar_drhd_unit *drhd;
794 	struct intel_iommu *iommu;
795 	int ret = 1;
796 
797 	rcu_read_lock();
798 	for_each_active_iommu(iommu, drhd) {
799 		if (iommu != skip) {
800 			if (!ecap_sc_support(iommu->ecap)) {
801 				ret = 0;
802 				break;
803 			}
804 		}
805 	}
806 	rcu_read_unlock();
807 
808 	return ret;
809 }
810 
domain_update_iommu_superpage(struct intel_iommu * skip)811 static int domain_update_iommu_superpage(struct intel_iommu *skip)
812 {
813 	struct dmar_drhd_unit *drhd;
814 	struct intel_iommu *iommu;
815 	int mask = 0xf;
816 
817 	if (!intel_iommu_superpage) {
818 		return 0;
819 	}
820 
821 	/* set iommu_superpage to the smallest common denominator */
822 	rcu_read_lock();
823 	for_each_active_iommu(iommu, drhd) {
824 		if (iommu != skip) {
825 			mask &= cap_super_page_val(iommu->cap);
826 			if (!mask)
827 				break;
828 		}
829 	}
830 	rcu_read_unlock();
831 
832 	return fls(mask);
833 }
834 
835 /* Some capabilities may be different across iommus */
domain_update_iommu_cap(struct dmar_domain * domain)836 static void domain_update_iommu_cap(struct dmar_domain *domain)
837 {
838 	domain_update_iommu_coherency(domain);
839 	domain->iommu_snooping = domain_update_iommu_snooping(NULL);
840 	domain->iommu_superpage = domain_update_iommu_superpage(NULL);
841 }
842 
iommu_context_addr(struct intel_iommu * iommu,u8 bus,u8 devfn,int alloc)843 static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu,
844 						       u8 bus, u8 devfn, int alloc)
845 {
846 	struct root_entry *root = &iommu->root_entry[bus];
847 	struct context_entry *context;
848 	u64 *entry;
849 
850 	entry = &root->lo;
851 	if (ecs_enabled(iommu)) {
852 		if (devfn >= 0x80) {
853 			devfn -= 0x80;
854 			entry = &root->hi;
855 		}
856 		devfn *= 2;
857 	}
858 	if (*entry & 1)
859 		context = phys_to_virt(*entry & VTD_PAGE_MASK);
860 	else {
861 		unsigned long phy_addr;
862 		if (!alloc)
863 			return NULL;
864 
865 		context = alloc_pgtable_page(iommu->node);
866 		if (!context)
867 			return NULL;
868 
869 		__iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
870 		phy_addr = virt_to_phys((void *)context);
871 		*entry = phy_addr | 1;
872 		__iommu_flush_cache(iommu, entry, sizeof(*entry));
873 	}
874 	return &context[devfn];
875 }
876 
iommu_dummy(struct device * dev)877 static int iommu_dummy(struct device *dev)
878 {
879 	return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
880 }
881 
device_to_iommu(struct device * dev,u8 * bus,u8 * devfn)882 static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
883 {
884 	struct dmar_drhd_unit *drhd = NULL;
885 	struct intel_iommu *iommu;
886 	struct device *tmp;
887 	struct pci_dev *ptmp, *pdev = NULL;
888 	u16 segment = 0;
889 	int i;
890 
891 	if (iommu_dummy(dev))
892 		return NULL;
893 
894 	if (dev_is_pci(dev)) {
895 		struct pci_dev *pf_pdev;
896 
897 		pdev = to_pci_dev(dev);
898 		/* VFs aren't listed in scope tables; we need to look up
899 		 * the PF instead to find the IOMMU. */
900 		pf_pdev = pci_physfn(pdev);
901 		dev = &pf_pdev->dev;
902 		segment = pci_domain_nr(pdev->bus);
903 	} else if (has_acpi_companion(dev))
904 		dev = &ACPI_COMPANION(dev)->dev;
905 
906 	rcu_read_lock();
907 	for_each_active_iommu(iommu, drhd) {
908 		if (pdev && segment != drhd->segment)
909 			continue;
910 
911 		for_each_active_dev_scope(drhd->devices,
912 					  drhd->devices_cnt, i, tmp) {
913 			if (tmp == dev) {
914 				/* For a VF use its original BDF# not that of the PF
915 				 * which we used for the IOMMU lookup. Strictly speaking
916 				 * we could do this for all PCI devices; we only need to
917 				 * get the BDF# from the scope table for ACPI matches. */
918 				if (pdev && pdev->is_virtfn)
919 					goto got_pdev;
920 
921 				*bus = drhd->devices[i].bus;
922 				*devfn = drhd->devices[i].devfn;
923 				goto out;
924 			}
925 
926 			if (!pdev || !dev_is_pci(tmp))
927 				continue;
928 
929 			ptmp = to_pci_dev(tmp);
930 			if (ptmp->subordinate &&
931 			    ptmp->subordinate->number <= pdev->bus->number &&
932 			    ptmp->subordinate->busn_res.end >= pdev->bus->number)
933 				goto got_pdev;
934 		}
935 
936 		if (pdev && drhd->include_all) {
937 		got_pdev:
938 			*bus = pdev->bus->number;
939 			*devfn = pdev->devfn;
940 			goto out;
941 		}
942 	}
943 	iommu = NULL;
944  out:
945 	rcu_read_unlock();
946 
947 	return iommu;
948 }
949 
domain_flush_cache(struct dmar_domain * domain,void * addr,int size)950 static void domain_flush_cache(struct dmar_domain *domain,
951 			       void *addr, int size)
952 {
953 	if (!domain->iommu_coherency)
954 		clflush_cache_range(addr, size);
955 }
956 
device_context_mapped(struct intel_iommu * iommu,u8 bus,u8 devfn)957 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
958 {
959 	struct context_entry *context;
960 	int ret = 0;
961 	unsigned long flags;
962 
963 	spin_lock_irqsave(&iommu->lock, flags);
964 	context = iommu_context_addr(iommu, bus, devfn, 0);
965 	if (context)
966 		ret = context_present(context);
967 	spin_unlock_irqrestore(&iommu->lock, flags);
968 	return ret;
969 }
970 
clear_context_table(struct intel_iommu * iommu,u8 bus,u8 devfn)971 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
972 {
973 	struct context_entry *context;
974 	unsigned long flags;
975 
976 	spin_lock_irqsave(&iommu->lock, flags);
977 	context = iommu_context_addr(iommu, bus, devfn, 0);
978 	if (context) {
979 		context_clear_entry(context);
980 		__iommu_flush_cache(iommu, context, sizeof(*context));
981 	}
982 	spin_unlock_irqrestore(&iommu->lock, flags);
983 }
984 
free_context_table(struct intel_iommu * iommu)985 static void free_context_table(struct intel_iommu *iommu)
986 {
987 	int i;
988 	unsigned long flags;
989 	struct context_entry *context;
990 
991 	spin_lock_irqsave(&iommu->lock, flags);
992 	if (!iommu->root_entry) {
993 		goto out;
994 	}
995 	for (i = 0; i < ROOT_ENTRY_NR; i++) {
996 		context = iommu_context_addr(iommu, i, 0, 0);
997 		if (context)
998 			free_pgtable_page(context);
999 
1000 		if (!ecs_enabled(iommu))
1001 			continue;
1002 
1003 		context = iommu_context_addr(iommu, i, 0x80, 0);
1004 		if (context)
1005 			free_pgtable_page(context);
1006 
1007 	}
1008 	free_pgtable_page(iommu->root_entry);
1009 	iommu->root_entry = NULL;
1010 out:
1011 	spin_unlock_irqrestore(&iommu->lock, flags);
1012 }
1013 
pfn_to_dma_pte(struct dmar_domain * domain,unsigned long pfn,int * target_level)1014 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
1015 				      unsigned long pfn, int *target_level)
1016 {
1017 	struct dma_pte *parent, *pte = NULL;
1018 	int level = agaw_to_level(domain->agaw);
1019 	int offset;
1020 
1021 	BUG_ON(!domain->pgd);
1022 
1023 	if (!domain_pfn_supported(domain, pfn))
1024 		/* Address beyond IOMMU's addressing capabilities. */
1025 		return NULL;
1026 
1027 	parent = domain->pgd;
1028 
1029 	while (1) {
1030 		void *tmp_page;
1031 
1032 		offset = pfn_level_offset(pfn, level);
1033 		pte = &parent[offset];
1034 		if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
1035 			break;
1036 		if (level == *target_level)
1037 			break;
1038 
1039 		if (!dma_pte_present(pte)) {
1040 			uint64_t pteval;
1041 
1042 			tmp_page = alloc_pgtable_page(domain->nid);
1043 
1044 			if (!tmp_page)
1045 				return NULL;
1046 
1047 			domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
1048 			pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
1049 			if (cmpxchg64(&pte->val, 0ULL, pteval))
1050 				/* Someone else set it while we were thinking; use theirs. */
1051 				free_pgtable_page(tmp_page);
1052 			else
1053 				domain_flush_cache(domain, pte, sizeof(*pte));
1054 		}
1055 		if (level == 1)
1056 			break;
1057 
1058 		parent = phys_to_virt(dma_pte_addr(pte));
1059 		level--;
1060 	}
1061 
1062 	if (!*target_level)
1063 		*target_level = level;
1064 
1065 	return pte;
1066 }
1067 
1068 
1069 /* return address's pte at specific level */
dma_pfn_level_pte(struct dmar_domain * domain,unsigned long pfn,int level,int * large_page)1070 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
1071 					 unsigned long pfn,
1072 					 int level, int *large_page)
1073 {
1074 	struct dma_pte *parent, *pte = NULL;
1075 	int total = agaw_to_level(domain->agaw);
1076 	int offset;
1077 
1078 	parent = domain->pgd;
1079 	while (level <= total) {
1080 		offset = pfn_level_offset(pfn, total);
1081 		pte = &parent[offset];
1082 		if (level == total)
1083 			return pte;
1084 
1085 		if (!dma_pte_present(pte)) {
1086 			*large_page = total;
1087 			break;
1088 		}
1089 
1090 		if (dma_pte_superpage(pte)) {
1091 			*large_page = total;
1092 			return pte;
1093 		}
1094 
1095 		parent = phys_to_virt(dma_pte_addr(pte));
1096 		total--;
1097 	}
1098 	return NULL;
1099 }
1100 
1101 /* clear last level pte, a tlb flush should be followed */
dma_pte_clear_range(struct dmar_domain * domain,unsigned long start_pfn,unsigned long last_pfn)1102 static void dma_pte_clear_range(struct dmar_domain *domain,
1103 				unsigned long start_pfn,
1104 				unsigned long last_pfn)
1105 {
1106 	unsigned int large_page = 1;
1107 	struct dma_pte *first_pte, *pte;
1108 
1109 	BUG_ON(!domain_pfn_supported(domain, start_pfn));
1110 	BUG_ON(!domain_pfn_supported(domain, last_pfn));
1111 	BUG_ON(start_pfn > last_pfn);
1112 
1113 	/* we don't need lock here; nobody else touches the iova range */
1114 	do {
1115 		large_page = 1;
1116 		first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
1117 		if (!pte) {
1118 			start_pfn = align_to_level(start_pfn + 1, large_page + 1);
1119 			continue;
1120 		}
1121 		do {
1122 			dma_clear_pte(pte);
1123 			start_pfn += lvl_to_nr_pages(large_page);
1124 			pte++;
1125 		} while (start_pfn <= last_pfn && !first_pte_in_page(pte));
1126 
1127 		domain_flush_cache(domain, first_pte,
1128 				   (void *)pte - (void *)first_pte);
1129 
1130 	} while (start_pfn && start_pfn <= last_pfn);
1131 }
1132 
dma_pte_free_level(struct dmar_domain * domain,int level,struct dma_pte * pte,unsigned long pfn,unsigned long start_pfn,unsigned long last_pfn)1133 static void dma_pte_free_level(struct dmar_domain *domain, int level,
1134 			       struct dma_pte *pte, unsigned long pfn,
1135 			       unsigned long start_pfn, unsigned long last_pfn)
1136 {
1137 	pfn = max(start_pfn, pfn);
1138 	pte = &pte[pfn_level_offset(pfn, level)];
1139 
1140 	do {
1141 		unsigned long level_pfn;
1142 		struct dma_pte *level_pte;
1143 
1144 		if (!dma_pte_present(pte) || dma_pte_superpage(pte))
1145 			goto next;
1146 
1147 		level_pfn = pfn & level_mask(level);
1148 		level_pte = phys_to_virt(dma_pte_addr(pte));
1149 
1150 		if (level > 2)
1151 			dma_pte_free_level(domain, level - 1, level_pte,
1152 					   level_pfn, start_pfn, last_pfn);
1153 
1154 		/* If range covers entire pagetable, free it */
1155 		if (!(start_pfn > level_pfn ||
1156 		      last_pfn < level_pfn + level_size(level) - 1)) {
1157 			dma_clear_pte(pte);
1158 			domain_flush_cache(domain, pte, sizeof(*pte));
1159 			free_pgtable_page(level_pte);
1160 		}
1161 next:
1162 		pfn += level_size(level);
1163 	} while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1164 }
1165 
1166 /* clear last level (leaf) ptes and free page table pages. */
dma_pte_free_pagetable(struct dmar_domain * domain,unsigned long start_pfn,unsigned long last_pfn)1167 static void dma_pte_free_pagetable(struct dmar_domain *domain,
1168 				   unsigned long start_pfn,
1169 				   unsigned long last_pfn)
1170 {
1171 	BUG_ON(!domain_pfn_supported(domain, start_pfn));
1172 	BUG_ON(!domain_pfn_supported(domain, last_pfn));
1173 	BUG_ON(start_pfn > last_pfn);
1174 
1175 	dma_pte_clear_range(domain, start_pfn, last_pfn);
1176 
1177 	/* We don't need lock here; nobody else touches the iova range */
1178 	dma_pte_free_level(domain, agaw_to_level(domain->agaw),
1179 			   domain->pgd, 0, start_pfn, last_pfn);
1180 
1181 	/* free pgd */
1182 	if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1183 		free_pgtable_page(domain->pgd);
1184 		domain->pgd = NULL;
1185 	}
1186 }
1187 
1188 /* When a page at a given level is being unlinked from its parent, we don't
1189    need to *modify* it at all. All we need to do is make a list of all the
1190    pages which can be freed just as soon as we've flushed the IOTLB and we
1191    know the hardware page-walk will no longer touch them.
1192    The 'pte' argument is the *parent* PTE, pointing to the page that is to
1193    be freed. */
dma_pte_list_pagetables(struct dmar_domain * domain,int level,struct dma_pte * pte,struct page * freelist)1194 static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1195 					    int level, struct dma_pte *pte,
1196 					    struct page *freelist)
1197 {
1198 	struct page *pg;
1199 
1200 	pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1201 	pg->freelist = freelist;
1202 	freelist = pg;
1203 
1204 	if (level == 1)
1205 		return freelist;
1206 
1207 	pte = page_address(pg);
1208 	do {
1209 		if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1210 			freelist = dma_pte_list_pagetables(domain, level - 1,
1211 							   pte, freelist);
1212 		pte++;
1213 	} while (!first_pte_in_page(pte));
1214 
1215 	return freelist;
1216 }
1217 
dma_pte_clear_level(struct dmar_domain * domain,int level,struct dma_pte * pte,unsigned long pfn,unsigned long start_pfn,unsigned long last_pfn,struct page * freelist)1218 static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1219 					struct dma_pte *pte, unsigned long pfn,
1220 					unsigned long start_pfn,
1221 					unsigned long last_pfn,
1222 					struct page *freelist)
1223 {
1224 	struct dma_pte *first_pte = NULL, *last_pte = NULL;
1225 
1226 	pfn = max(start_pfn, pfn);
1227 	pte = &pte[pfn_level_offset(pfn, level)];
1228 
1229 	do {
1230 		unsigned long level_pfn;
1231 
1232 		if (!dma_pte_present(pte))
1233 			goto next;
1234 
1235 		level_pfn = pfn & level_mask(level);
1236 
1237 		/* If range covers entire pagetable, free it */
1238 		if (start_pfn <= level_pfn &&
1239 		    last_pfn >= level_pfn + level_size(level) - 1) {
1240 			/* These suborbinate page tables are going away entirely. Don't
1241 			   bother to clear them; we're just going to *free* them. */
1242 			if (level > 1 && !dma_pte_superpage(pte))
1243 				freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1244 
1245 			dma_clear_pte(pte);
1246 			if (!first_pte)
1247 				first_pte = pte;
1248 			last_pte = pte;
1249 		} else if (level > 1) {
1250 			/* Recurse down into a level that isn't *entirely* obsolete */
1251 			freelist = dma_pte_clear_level(domain, level - 1,
1252 						       phys_to_virt(dma_pte_addr(pte)),
1253 						       level_pfn, start_pfn, last_pfn,
1254 						       freelist);
1255 		}
1256 next:
1257 		pfn += level_size(level);
1258 	} while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1259 
1260 	if (first_pte)
1261 		domain_flush_cache(domain, first_pte,
1262 				   (void *)++last_pte - (void *)first_pte);
1263 
1264 	return freelist;
1265 }
1266 
1267 /* We can't just free the pages because the IOMMU may still be walking
1268    the page tables, and may have cached the intermediate levels. The
1269    pages can only be freed after the IOTLB flush has been done. */
domain_unmap(struct dmar_domain * domain,unsigned long start_pfn,unsigned long last_pfn)1270 static struct page *domain_unmap(struct dmar_domain *domain,
1271 				 unsigned long start_pfn,
1272 				 unsigned long last_pfn)
1273 {
1274 	struct page *freelist = NULL;
1275 
1276 	BUG_ON(!domain_pfn_supported(domain, start_pfn));
1277 	BUG_ON(!domain_pfn_supported(domain, last_pfn));
1278 	BUG_ON(start_pfn > last_pfn);
1279 
1280 	/* we don't need lock here; nobody else touches the iova range */
1281 	freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1282 				       domain->pgd, 0, start_pfn, last_pfn, NULL);
1283 
1284 	/* free pgd */
1285 	if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1286 		struct page *pgd_page = virt_to_page(domain->pgd);
1287 		pgd_page->freelist = freelist;
1288 		freelist = pgd_page;
1289 
1290 		domain->pgd = NULL;
1291 	}
1292 
1293 	return freelist;
1294 }
1295 
dma_free_pagelist(struct page * freelist)1296 static void dma_free_pagelist(struct page *freelist)
1297 {
1298 	struct page *pg;
1299 
1300 	while ((pg = freelist)) {
1301 		freelist = pg->freelist;
1302 		free_pgtable_page(page_address(pg));
1303 	}
1304 }
1305 
1306 /* iommu handling */
iommu_alloc_root_entry(struct intel_iommu * iommu)1307 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1308 {
1309 	struct root_entry *root;
1310 	unsigned long flags;
1311 
1312 	root = (struct root_entry *)alloc_pgtable_page(iommu->node);
1313 	if (!root) {
1314 		pr_err("Allocating root entry for %s failed\n",
1315 			iommu->name);
1316 		return -ENOMEM;
1317 	}
1318 
1319 	__iommu_flush_cache(iommu, root, ROOT_SIZE);
1320 
1321 	spin_lock_irqsave(&iommu->lock, flags);
1322 	iommu->root_entry = root;
1323 	spin_unlock_irqrestore(&iommu->lock, flags);
1324 
1325 	return 0;
1326 }
1327 
iommu_set_root_entry(struct intel_iommu * iommu)1328 static void iommu_set_root_entry(struct intel_iommu *iommu)
1329 {
1330 	u64 addr;
1331 	u32 sts;
1332 	unsigned long flag;
1333 
1334 	addr = virt_to_phys(iommu->root_entry);
1335 	if (ecs_enabled(iommu))
1336 		addr |= DMA_RTADDR_RTT;
1337 
1338 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1339 	dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
1340 
1341 	writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1342 
1343 	/* Make sure hardware complete it */
1344 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1345 		      readl, (sts & DMA_GSTS_RTPS), sts);
1346 
1347 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1348 }
1349 
iommu_flush_write_buffer(struct intel_iommu * iommu)1350 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
1351 {
1352 	u32 val;
1353 	unsigned long flag;
1354 
1355 	if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1356 		return;
1357 
1358 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1359 	writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1360 
1361 	/* Make sure hardware complete it */
1362 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1363 		      readl, (!(val & DMA_GSTS_WBFS)), val);
1364 
1365 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1366 }
1367 
1368 /* return value determine if we need a write buffer flush */
__iommu_flush_context(struct intel_iommu * iommu,u16 did,u16 source_id,u8 function_mask,u64 type)1369 static void __iommu_flush_context(struct intel_iommu *iommu,
1370 				  u16 did, u16 source_id, u8 function_mask,
1371 				  u64 type)
1372 {
1373 	u64 val = 0;
1374 	unsigned long flag;
1375 
1376 	switch (type) {
1377 	case DMA_CCMD_GLOBAL_INVL:
1378 		val = DMA_CCMD_GLOBAL_INVL;
1379 		break;
1380 	case DMA_CCMD_DOMAIN_INVL:
1381 		val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1382 		break;
1383 	case DMA_CCMD_DEVICE_INVL:
1384 		val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1385 			| DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1386 		break;
1387 	default:
1388 		BUG();
1389 	}
1390 	val |= DMA_CCMD_ICC;
1391 
1392 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1393 	dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1394 
1395 	/* Make sure hardware complete it */
1396 	IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1397 		dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1398 
1399 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1400 }
1401 
1402 /* return value determine if we need a write buffer flush */
__iommu_flush_iotlb(struct intel_iommu * iommu,u16 did,u64 addr,unsigned int size_order,u64 type)1403 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1404 				u64 addr, unsigned int size_order, u64 type)
1405 {
1406 	int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1407 	u64 val = 0, val_iva = 0;
1408 	unsigned long flag;
1409 
1410 	switch (type) {
1411 	case DMA_TLB_GLOBAL_FLUSH:
1412 		/* global flush doesn't need set IVA_REG */
1413 		val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1414 		break;
1415 	case DMA_TLB_DSI_FLUSH:
1416 		val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1417 		break;
1418 	case DMA_TLB_PSI_FLUSH:
1419 		val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1420 		/* IH bit is passed in as part of address */
1421 		val_iva = size_order | addr;
1422 		break;
1423 	default:
1424 		BUG();
1425 	}
1426 	/* Note: set drain read/write */
1427 #if 0
1428 	/*
1429 	 * This is probably to be super secure.. Looks like we can
1430 	 * ignore it without any impact.
1431 	 */
1432 	if (cap_read_drain(iommu->cap))
1433 		val |= DMA_TLB_READ_DRAIN;
1434 #endif
1435 	if (cap_write_drain(iommu->cap))
1436 		val |= DMA_TLB_WRITE_DRAIN;
1437 
1438 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1439 	/* Note: Only uses first TLB reg currently */
1440 	if (val_iva)
1441 		dmar_writeq(iommu->reg + tlb_offset, val_iva);
1442 	dmar_writeq(iommu->reg + tlb_offset + 8, val);
1443 
1444 	/* Make sure hardware complete it */
1445 	IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1446 		dmar_readq, (!(val & DMA_TLB_IVT)), val);
1447 
1448 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1449 
1450 	/* check IOTLB invalidation granularity */
1451 	if (DMA_TLB_IAIG(val) == 0)
1452 		pr_err("Flush IOTLB failed\n");
1453 	if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1454 		pr_debug("TLB flush request %Lx, actual %Lx\n",
1455 			(unsigned long long)DMA_TLB_IIRG(type),
1456 			(unsigned long long)DMA_TLB_IAIG(val));
1457 }
1458 
1459 static struct device_domain_info *
iommu_support_dev_iotlb(struct dmar_domain * domain,struct intel_iommu * iommu,u8 bus,u8 devfn)1460 iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1461 			 u8 bus, u8 devfn)
1462 {
1463 	struct device_domain_info *info;
1464 
1465 	assert_spin_locked(&device_domain_lock);
1466 
1467 	if (!iommu->qi)
1468 		return NULL;
1469 
1470 	list_for_each_entry(info, &domain->devices, link)
1471 		if (info->iommu == iommu && info->bus == bus &&
1472 		    info->devfn == devfn) {
1473 			if (info->ats_supported && info->dev)
1474 				return info;
1475 			break;
1476 		}
1477 
1478 	return NULL;
1479 }
1480 
domain_update_iotlb(struct dmar_domain * domain)1481 static void domain_update_iotlb(struct dmar_domain *domain)
1482 {
1483 	struct device_domain_info *info;
1484 	bool has_iotlb_device = false;
1485 
1486 	assert_spin_locked(&device_domain_lock);
1487 
1488 	list_for_each_entry(info, &domain->devices, link) {
1489 		struct pci_dev *pdev;
1490 
1491 		if (!info->dev || !dev_is_pci(info->dev))
1492 			continue;
1493 
1494 		pdev = to_pci_dev(info->dev);
1495 		if (pdev->ats_enabled) {
1496 			has_iotlb_device = true;
1497 			break;
1498 		}
1499 	}
1500 
1501 	domain->has_iotlb_device = has_iotlb_device;
1502 }
1503 
iommu_enable_dev_iotlb(struct device_domain_info * info)1504 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1505 {
1506 	struct pci_dev *pdev;
1507 
1508 	assert_spin_locked(&device_domain_lock);
1509 
1510 	if (!info || !dev_is_pci(info->dev))
1511 		return;
1512 
1513 	pdev = to_pci_dev(info->dev);
1514 
1515 #ifdef CONFIG_INTEL_IOMMU_SVM
1516 	/* The PCIe spec, in its wisdom, declares that the behaviour of
1517 	   the device if you enable PASID support after ATS support is
1518 	   undefined. So always enable PASID support on devices which
1519 	   have it, even if we can't yet know if we're ever going to
1520 	   use it. */
1521 	if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1522 		info->pasid_enabled = 1;
1523 
1524 	if (info->pri_supported && !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
1525 		info->pri_enabled = 1;
1526 #endif
1527 	if (info->ats_supported && !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
1528 		info->ats_enabled = 1;
1529 		domain_update_iotlb(info->domain);
1530 		info->ats_qdep = pci_ats_queue_depth(pdev);
1531 	}
1532 }
1533 
iommu_disable_dev_iotlb(struct device_domain_info * info)1534 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1535 {
1536 	struct pci_dev *pdev;
1537 
1538 	assert_spin_locked(&device_domain_lock);
1539 
1540 	if (!dev_is_pci(info->dev))
1541 		return;
1542 
1543 	pdev = to_pci_dev(info->dev);
1544 
1545 	if (info->ats_enabled) {
1546 		pci_disable_ats(pdev);
1547 		info->ats_enabled = 0;
1548 		domain_update_iotlb(info->domain);
1549 	}
1550 #ifdef CONFIG_INTEL_IOMMU_SVM
1551 	if (info->pri_enabled) {
1552 		pci_disable_pri(pdev);
1553 		info->pri_enabled = 0;
1554 	}
1555 	if (info->pasid_enabled) {
1556 		pci_disable_pasid(pdev);
1557 		info->pasid_enabled = 0;
1558 	}
1559 #endif
1560 }
1561 
iommu_flush_dev_iotlb(struct dmar_domain * domain,u64 addr,unsigned mask)1562 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1563 				  u64 addr, unsigned mask)
1564 {
1565 	u16 sid, qdep;
1566 	unsigned long flags;
1567 	struct device_domain_info *info;
1568 
1569 	if (!domain->has_iotlb_device)
1570 		return;
1571 
1572 	spin_lock_irqsave(&device_domain_lock, flags);
1573 	list_for_each_entry(info, &domain->devices, link) {
1574 		if (!info->ats_enabled)
1575 			continue;
1576 
1577 		sid = info->bus << 8 | info->devfn;
1578 		qdep = info->ats_qdep;
1579 		qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1580 	}
1581 	spin_unlock_irqrestore(&device_domain_lock, flags);
1582 }
1583 
iommu_flush_iotlb_psi(struct intel_iommu * iommu,struct dmar_domain * domain,unsigned long pfn,unsigned int pages,int ih,int map)1584 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1585 				  struct dmar_domain *domain,
1586 				  unsigned long pfn, unsigned int pages,
1587 				  int ih, int map)
1588 {
1589 	unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1590 	uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1591 	u16 did = domain->iommu_did[iommu->seq_id];
1592 
1593 	BUG_ON(pages == 0);
1594 
1595 	if (ih)
1596 		ih = 1 << 6;
1597 	/*
1598 	 * Fallback to domain selective flush if no PSI support or the size is
1599 	 * too big.
1600 	 * PSI requires page size to be 2 ^ x, and the base address is naturally
1601 	 * aligned to the size
1602 	 */
1603 	if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1604 		iommu->flush.flush_iotlb(iommu, did, 0, 0,
1605 						DMA_TLB_DSI_FLUSH);
1606 	else
1607 		iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1608 						DMA_TLB_PSI_FLUSH);
1609 
1610 	/*
1611 	 * In caching mode, changes of pages from non-present to present require
1612 	 * flush. However, device IOTLB doesn't need to be flushed in this case.
1613 	 */
1614 	if (!cap_caching_mode(iommu->cap) || !map)
1615 		iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1616 				      addr, mask);
1617 }
1618 
iommu_disable_protect_mem_regions(struct intel_iommu * iommu)1619 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1620 {
1621 	u32 pmen;
1622 	unsigned long flags;
1623 
1624 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
1625 	pmen = readl(iommu->reg + DMAR_PMEN_REG);
1626 	pmen &= ~DMA_PMEN_EPM;
1627 	writel(pmen, iommu->reg + DMAR_PMEN_REG);
1628 
1629 	/* wait for the protected region status bit to clear */
1630 	IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1631 		readl, !(pmen & DMA_PMEN_PRS), pmen);
1632 
1633 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1634 }
1635 
iommu_enable_translation(struct intel_iommu * iommu)1636 static void iommu_enable_translation(struct intel_iommu *iommu)
1637 {
1638 	u32 sts;
1639 	unsigned long flags;
1640 
1641 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
1642 	iommu->gcmd |= DMA_GCMD_TE;
1643 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1644 
1645 	/* Make sure hardware complete it */
1646 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1647 		      readl, (sts & DMA_GSTS_TES), sts);
1648 
1649 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1650 }
1651 
iommu_disable_translation(struct intel_iommu * iommu)1652 static void iommu_disable_translation(struct intel_iommu *iommu)
1653 {
1654 	u32 sts;
1655 	unsigned long flag;
1656 
1657 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1658 	iommu->gcmd &= ~DMA_GCMD_TE;
1659 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1660 
1661 	/* Make sure hardware complete it */
1662 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1663 		      readl, (!(sts & DMA_GSTS_TES)), sts);
1664 
1665 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1666 }
1667 
1668 
iommu_init_domains(struct intel_iommu * iommu)1669 static int iommu_init_domains(struct intel_iommu *iommu)
1670 {
1671 	u32 ndomains, nlongs;
1672 	size_t size;
1673 
1674 	ndomains = cap_ndoms(iommu->cap);
1675 	pr_debug("%s: Number of Domains supported <%d>\n",
1676 		 iommu->name, ndomains);
1677 	nlongs = BITS_TO_LONGS(ndomains);
1678 
1679 	spin_lock_init(&iommu->lock);
1680 
1681 	iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1682 	if (!iommu->domain_ids) {
1683 		pr_err("%s: Allocating domain id array failed\n",
1684 		       iommu->name);
1685 		return -ENOMEM;
1686 	}
1687 
1688 	size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **);
1689 	iommu->domains = kzalloc(size, GFP_KERNEL);
1690 
1691 	if (iommu->domains) {
1692 		size = 256 * sizeof(struct dmar_domain *);
1693 		iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1694 	}
1695 
1696 	if (!iommu->domains || !iommu->domains[0]) {
1697 		pr_err("%s: Allocating domain array failed\n",
1698 		       iommu->name);
1699 		kfree(iommu->domain_ids);
1700 		kfree(iommu->domains);
1701 		iommu->domain_ids = NULL;
1702 		iommu->domains    = NULL;
1703 		return -ENOMEM;
1704 	}
1705 
1706 
1707 
1708 	/*
1709 	 * If Caching mode is set, then invalid translations are tagged
1710 	 * with domain-id 0, hence we need to pre-allocate it. We also
1711 	 * use domain-id 0 as a marker for non-allocated domain-id, so
1712 	 * make sure it is not used for a real domain.
1713 	 */
1714 	set_bit(0, iommu->domain_ids);
1715 
1716 	return 0;
1717 }
1718 
disable_dmar_iommu(struct intel_iommu * iommu)1719 static void disable_dmar_iommu(struct intel_iommu *iommu)
1720 {
1721 	struct device_domain_info *info, *tmp;
1722 	unsigned long flags;
1723 
1724 	if (!iommu->domains || !iommu->domain_ids)
1725 		return;
1726 
1727 again:
1728 	spin_lock_irqsave(&device_domain_lock, flags);
1729 	list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
1730 		struct dmar_domain *domain;
1731 
1732 		if (info->iommu != iommu)
1733 			continue;
1734 
1735 		if (!info->dev || !info->domain)
1736 			continue;
1737 
1738 		domain = info->domain;
1739 
1740 		__dmar_remove_one_dev_info(info);
1741 
1742 		if (!domain_type_is_vm_or_si(domain)) {
1743 			/*
1744 			 * The domain_exit() function  can't be called under
1745 			 * device_domain_lock, as it takes this lock itself.
1746 			 * So release the lock here and re-run the loop
1747 			 * afterwards.
1748 			 */
1749 			spin_unlock_irqrestore(&device_domain_lock, flags);
1750 			domain_exit(domain);
1751 			goto again;
1752 		}
1753 	}
1754 	spin_unlock_irqrestore(&device_domain_lock, flags);
1755 
1756 	if (iommu->gcmd & DMA_GCMD_TE)
1757 		iommu_disable_translation(iommu);
1758 }
1759 
free_dmar_iommu(struct intel_iommu * iommu)1760 static void free_dmar_iommu(struct intel_iommu *iommu)
1761 {
1762 	if ((iommu->domains) && (iommu->domain_ids)) {
1763 		int elems = ALIGN(cap_ndoms(iommu->cap), 256) >> 8;
1764 		int i;
1765 
1766 		for (i = 0; i < elems; i++)
1767 			kfree(iommu->domains[i]);
1768 		kfree(iommu->domains);
1769 		kfree(iommu->domain_ids);
1770 		iommu->domains = NULL;
1771 		iommu->domain_ids = NULL;
1772 	}
1773 
1774 	g_iommus[iommu->seq_id] = NULL;
1775 
1776 	/* free context mapping */
1777 	free_context_table(iommu);
1778 
1779 #ifdef CONFIG_INTEL_IOMMU_SVM
1780 	if (pasid_enabled(iommu)) {
1781 		if (ecap_prs(iommu->ecap))
1782 			intel_svm_finish_prq(iommu);
1783 		intel_svm_free_pasid_tables(iommu);
1784 	}
1785 #endif
1786 }
1787 
alloc_domain(int flags)1788 static struct dmar_domain *alloc_domain(int flags)
1789 {
1790 	struct dmar_domain *domain;
1791 
1792 	domain = alloc_domain_mem();
1793 	if (!domain)
1794 		return NULL;
1795 
1796 	memset(domain, 0, sizeof(*domain));
1797 	domain->nid = -1;
1798 	domain->flags = flags;
1799 	domain->has_iotlb_device = false;
1800 	INIT_LIST_HEAD(&domain->devices);
1801 
1802 	return domain;
1803 }
1804 
1805 /* Must be called with iommu->lock */
domain_attach_iommu(struct dmar_domain * domain,struct intel_iommu * iommu)1806 static int domain_attach_iommu(struct dmar_domain *domain,
1807 			       struct intel_iommu *iommu)
1808 {
1809 	unsigned long ndomains;
1810 	int num;
1811 
1812 	assert_spin_locked(&device_domain_lock);
1813 	assert_spin_locked(&iommu->lock);
1814 
1815 	domain->iommu_refcnt[iommu->seq_id] += 1;
1816 	domain->iommu_count += 1;
1817 	if (domain->iommu_refcnt[iommu->seq_id] == 1) {
1818 		ndomains = cap_ndoms(iommu->cap);
1819 		num      = find_first_zero_bit(iommu->domain_ids, ndomains);
1820 
1821 		if (num >= ndomains) {
1822 			pr_err("%s: No free domain ids\n", iommu->name);
1823 			domain->iommu_refcnt[iommu->seq_id] -= 1;
1824 			domain->iommu_count -= 1;
1825 			return -ENOSPC;
1826 		}
1827 
1828 		set_bit(num, iommu->domain_ids);
1829 		set_iommu_domain(iommu, num, domain);
1830 
1831 		domain->iommu_did[iommu->seq_id] = num;
1832 		domain->nid			 = iommu->node;
1833 
1834 		domain_update_iommu_cap(domain);
1835 	}
1836 
1837 	return 0;
1838 }
1839 
domain_detach_iommu(struct dmar_domain * domain,struct intel_iommu * iommu)1840 static int domain_detach_iommu(struct dmar_domain *domain,
1841 			       struct intel_iommu *iommu)
1842 {
1843 	int num, count = INT_MAX;
1844 
1845 	assert_spin_locked(&device_domain_lock);
1846 	assert_spin_locked(&iommu->lock);
1847 
1848 	domain->iommu_refcnt[iommu->seq_id] -= 1;
1849 	count = --domain->iommu_count;
1850 	if (domain->iommu_refcnt[iommu->seq_id] == 0) {
1851 		num = domain->iommu_did[iommu->seq_id];
1852 		clear_bit(num, iommu->domain_ids);
1853 		set_iommu_domain(iommu, num, NULL);
1854 
1855 		domain_update_iommu_cap(domain);
1856 		domain->iommu_did[iommu->seq_id] = 0;
1857 	}
1858 
1859 	return count;
1860 }
1861 
1862 static struct iova_domain reserved_iova_list;
1863 static struct lock_class_key reserved_rbtree_key;
1864 
dmar_init_reserved_ranges(void)1865 static int dmar_init_reserved_ranges(void)
1866 {
1867 	struct pci_dev *pdev = NULL;
1868 	struct iova *iova;
1869 	int i;
1870 
1871 	init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN,
1872 			DMA_32BIT_PFN);
1873 
1874 	lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1875 		&reserved_rbtree_key);
1876 
1877 	/* IOAPIC ranges shouldn't be accessed by DMA */
1878 	iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1879 		IOVA_PFN(IOAPIC_RANGE_END));
1880 	if (!iova) {
1881 		pr_err("Reserve IOAPIC range failed\n");
1882 		return -ENODEV;
1883 	}
1884 
1885 	/* Reserve all PCI MMIO to avoid peer-to-peer access */
1886 	for_each_pci_dev(pdev) {
1887 		struct resource *r;
1888 
1889 		for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1890 			r = &pdev->resource[i];
1891 			if (!r->flags || !(r->flags & IORESOURCE_MEM))
1892 				continue;
1893 			iova = reserve_iova(&reserved_iova_list,
1894 					    IOVA_PFN(r->start),
1895 					    IOVA_PFN(r->end));
1896 			if (!iova) {
1897 				pr_err("Reserve iova failed\n");
1898 				return -ENODEV;
1899 			}
1900 		}
1901 	}
1902 	return 0;
1903 }
1904 
domain_reserve_special_ranges(struct dmar_domain * domain)1905 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1906 {
1907 	copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1908 }
1909 
guestwidth_to_adjustwidth(int gaw)1910 static inline int guestwidth_to_adjustwidth(int gaw)
1911 {
1912 	int agaw;
1913 	int r = (gaw - 12) % 9;
1914 
1915 	if (r == 0)
1916 		agaw = gaw;
1917 	else
1918 		agaw = gaw + 9 - r;
1919 	if (agaw > 64)
1920 		agaw = 64;
1921 	return agaw;
1922 }
1923 
domain_init(struct dmar_domain * domain,struct intel_iommu * iommu,int guest_width)1924 static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
1925 		       int guest_width)
1926 {
1927 	int adjust_width, agaw;
1928 	unsigned long sagaw;
1929 
1930 	init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
1931 			DMA_32BIT_PFN);
1932 	domain_reserve_special_ranges(domain);
1933 
1934 	/* calculate AGAW */
1935 	if (guest_width > cap_mgaw(iommu->cap))
1936 		guest_width = cap_mgaw(iommu->cap);
1937 	domain->gaw = guest_width;
1938 	adjust_width = guestwidth_to_adjustwidth(guest_width);
1939 	agaw = width_to_agaw(adjust_width);
1940 	sagaw = cap_sagaw(iommu->cap);
1941 	if (!test_bit(agaw, &sagaw)) {
1942 		/* hardware doesn't support it, choose a bigger one */
1943 		pr_debug("Hardware doesn't support agaw %d\n", agaw);
1944 		agaw = find_next_bit(&sagaw, 5, agaw);
1945 		if (agaw >= 5)
1946 			return -ENODEV;
1947 	}
1948 	domain->agaw = agaw;
1949 
1950 	if (ecap_coherent(iommu->ecap))
1951 		domain->iommu_coherency = 1;
1952 	else
1953 		domain->iommu_coherency = 0;
1954 
1955 	if (ecap_sc_support(iommu->ecap))
1956 		domain->iommu_snooping = 1;
1957 	else
1958 		domain->iommu_snooping = 0;
1959 
1960 	if (intel_iommu_superpage)
1961 		domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1962 	else
1963 		domain->iommu_superpage = 0;
1964 
1965 	domain->nid = iommu->node;
1966 
1967 	/* always allocate the top pgd */
1968 	domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1969 	if (!domain->pgd)
1970 		return -ENOMEM;
1971 	__iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1972 	return 0;
1973 }
1974 
domain_exit(struct dmar_domain * domain)1975 static void domain_exit(struct dmar_domain *domain)
1976 {
1977 	struct page *freelist = NULL;
1978 
1979 	/* Domain 0 is reserved, so dont process it */
1980 	if (!domain)
1981 		return;
1982 
1983 	/* Flush any lazy unmaps that may reference this domain */
1984 	if (!intel_iommu_strict) {
1985 		int cpu;
1986 
1987 		for_each_possible_cpu(cpu)
1988 			flush_unmaps_timeout(cpu);
1989 	}
1990 
1991 	/* Remove associated devices and clear attached or cached domains */
1992 	rcu_read_lock();
1993 	domain_remove_dev_info(domain);
1994 	rcu_read_unlock();
1995 
1996 	/* destroy iovas */
1997 	put_iova_domain(&domain->iovad);
1998 
1999 	freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
2000 
2001 	dma_free_pagelist(freelist);
2002 
2003 	free_domain_mem(domain);
2004 }
2005 
domain_context_mapping_one(struct dmar_domain * domain,struct intel_iommu * iommu,u8 bus,u8 devfn)2006 static int domain_context_mapping_one(struct dmar_domain *domain,
2007 				      struct intel_iommu *iommu,
2008 				      u8 bus, u8 devfn)
2009 {
2010 	u16 did = domain->iommu_did[iommu->seq_id];
2011 	int translation = CONTEXT_TT_MULTI_LEVEL;
2012 	struct device_domain_info *info = NULL;
2013 	struct context_entry *context;
2014 	unsigned long flags;
2015 	struct dma_pte *pgd;
2016 	int ret, agaw;
2017 
2018 	WARN_ON(did == 0);
2019 
2020 	if (hw_pass_through && domain_type_is_si(domain))
2021 		translation = CONTEXT_TT_PASS_THROUGH;
2022 
2023 	pr_debug("Set context mapping for %02x:%02x.%d\n",
2024 		bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
2025 
2026 	BUG_ON(!domain->pgd);
2027 
2028 	spin_lock_irqsave(&device_domain_lock, flags);
2029 	spin_lock(&iommu->lock);
2030 
2031 	ret = -ENOMEM;
2032 	context = iommu_context_addr(iommu, bus, devfn, 1);
2033 	if (!context)
2034 		goto out_unlock;
2035 
2036 	ret = 0;
2037 	if (context_present(context))
2038 		goto out_unlock;
2039 
2040 	/*
2041 	 * For kdump cases, old valid entries may be cached due to the
2042 	 * in-flight DMA and copied pgtable, but there is no unmapping
2043 	 * behaviour for them, thus we need an explicit cache flush for
2044 	 * the newly-mapped device. For kdump, at this point, the device
2045 	 * is supposed to finish reset at its driver probe stage, so no
2046 	 * in-flight DMA will exist, and we don't need to worry anymore
2047 	 * hereafter.
2048 	 */
2049 	if (context_copied(context)) {
2050 		u16 did_old = context_domain_id(context);
2051 
2052 		if (did_old >= 0 && did_old < cap_ndoms(iommu->cap)) {
2053 			iommu->flush.flush_context(iommu, did_old,
2054 						   (((u16)bus) << 8) | devfn,
2055 						   DMA_CCMD_MASK_NOBIT,
2056 						   DMA_CCMD_DEVICE_INVL);
2057 			iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
2058 						 DMA_TLB_DSI_FLUSH);
2059 		}
2060 	}
2061 
2062 	pgd = domain->pgd;
2063 
2064 	context_clear_entry(context);
2065 	context_set_domain_id(context, did);
2066 
2067 	/*
2068 	 * Skip top levels of page tables for iommu which has less agaw
2069 	 * than default.  Unnecessary for PT mode.
2070 	 */
2071 	if (translation != CONTEXT_TT_PASS_THROUGH) {
2072 		for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
2073 			ret = -ENOMEM;
2074 			pgd = phys_to_virt(dma_pte_addr(pgd));
2075 			if (!dma_pte_present(pgd))
2076 				goto out_unlock;
2077 		}
2078 
2079 		info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2080 		if (info && info->ats_supported)
2081 			translation = CONTEXT_TT_DEV_IOTLB;
2082 		else
2083 			translation = CONTEXT_TT_MULTI_LEVEL;
2084 
2085 		context_set_address_root(context, virt_to_phys(pgd));
2086 		context_set_address_width(context, iommu->agaw);
2087 	} else {
2088 		/*
2089 		 * In pass through mode, AW must be programmed to
2090 		 * indicate the largest AGAW value supported by
2091 		 * hardware. And ASR is ignored by hardware.
2092 		 */
2093 		context_set_address_width(context, iommu->msagaw);
2094 	}
2095 
2096 	context_set_translation_type(context, translation);
2097 	context_set_fault_enable(context);
2098 	context_set_present(context);
2099 	domain_flush_cache(domain, context, sizeof(*context));
2100 
2101 	/*
2102 	 * It's a non-present to present mapping. If hardware doesn't cache
2103 	 * non-present entry we only need to flush the write-buffer. If the
2104 	 * _does_ cache non-present entries, then it does so in the special
2105 	 * domain #0, which we have to flush:
2106 	 */
2107 	if (cap_caching_mode(iommu->cap)) {
2108 		iommu->flush.flush_context(iommu, 0,
2109 					   (((u16)bus) << 8) | devfn,
2110 					   DMA_CCMD_MASK_NOBIT,
2111 					   DMA_CCMD_DEVICE_INVL);
2112 		iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
2113 	} else {
2114 		iommu_flush_write_buffer(iommu);
2115 	}
2116 	iommu_enable_dev_iotlb(info);
2117 
2118 	ret = 0;
2119 
2120 out_unlock:
2121 	spin_unlock(&iommu->lock);
2122 	spin_unlock_irqrestore(&device_domain_lock, flags);
2123 
2124 	return ret;
2125 }
2126 
2127 struct domain_context_mapping_data {
2128 	struct dmar_domain *domain;
2129 	struct intel_iommu *iommu;
2130 };
2131 
domain_context_mapping_cb(struct pci_dev * pdev,u16 alias,void * opaque)2132 static int domain_context_mapping_cb(struct pci_dev *pdev,
2133 				     u16 alias, void *opaque)
2134 {
2135 	struct domain_context_mapping_data *data = opaque;
2136 
2137 	return domain_context_mapping_one(data->domain, data->iommu,
2138 					  PCI_BUS_NUM(alias), alias & 0xff);
2139 }
2140 
2141 static int
domain_context_mapping(struct dmar_domain * domain,struct device * dev)2142 domain_context_mapping(struct dmar_domain *domain, struct device *dev)
2143 {
2144 	struct intel_iommu *iommu;
2145 	u8 bus, devfn;
2146 	struct domain_context_mapping_data data;
2147 
2148 	iommu = device_to_iommu(dev, &bus, &devfn);
2149 	if (!iommu)
2150 		return -ENODEV;
2151 
2152 	if (!dev_is_pci(dev))
2153 		return domain_context_mapping_one(domain, iommu, bus, devfn);
2154 
2155 	data.domain = domain;
2156 	data.iommu = iommu;
2157 
2158 	return pci_for_each_dma_alias(to_pci_dev(dev),
2159 				      &domain_context_mapping_cb, &data);
2160 }
2161 
domain_context_mapped_cb(struct pci_dev * pdev,u16 alias,void * opaque)2162 static int domain_context_mapped_cb(struct pci_dev *pdev,
2163 				    u16 alias, void *opaque)
2164 {
2165 	struct intel_iommu *iommu = opaque;
2166 
2167 	return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
2168 }
2169 
domain_context_mapped(struct device * dev)2170 static int domain_context_mapped(struct device *dev)
2171 {
2172 	struct intel_iommu *iommu;
2173 	u8 bus, devfn;
2174 
2175 	iommu = device_to_iommu(dev, &bus, &devfn);
2176 	if (!iommu)
2177 		return -ENODEV;
2178 
2179 	if (!dev_is_pci(dev))
2180 		return device_context_mapped(iommu, bus, devfn);
2181 
2182 	return !pci_for_each_dma_alias(to_pci_dev(dev),
2183 				       domain_context_mapped_cb, iommu);
2184 }
2185 
2186 /* Returns a number of VTD pages, but aligned to MM page size */
aligned_nrpages(unsigned long host_addr,size_t size)2187 static inline unsigned long aligned_nrpages(unsigned long host_addr,
2188 					    size_t size)
2189 {
2190 	host_addr &= ~PAGE_MASK;
2191 	return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2192 }
2193 
2194 /* Return largest possible superpage level for a given mapping */
hardware_largepage_caps(struct dmar_domain * domain,unsigned long iov_pfn,unsigned long phy_pfn,unsigned long pages)2195 static inline int hardware_largepage_caps(struct dmar_domain *domain,
2196 					  unsigned long iov_pfn,
2197 					  unsigned long phy_pfn,
2198 					  unsigned long pages)
2199 {
2200 	int support, level = 1;
2201 	unsigned long pfnmerge;
2202 
2203 	support = domain->iommu_superpage;
2204 
2205 	/* To use a large page, the virtual *and* physical addresses
2206 	   must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2207 	   of them will mean we have to use smaller pages. So just
2208 	   merge them and check both at once. */
2209 	pfnmerge = iov_pfn | phy_pfn;
2210 
2211 	while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2212 		pages >>= VTD_STRIDE_SHIFT;
2213 		if (!pages)
2214 			break;
2215 		pfnmerge >>= VTD_STRIDE_SHIFT;
2216 		level++;
2217 		support--;
2218 	}
2219 	return level;
2220 }
2221 
__domain_mapping(struct dmar_domain * domain,unsigned long iov_pfn,struct scatterlist * sg,unsigned long phys_pfn,unsigned long nr_pages,int prot)2222 static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2223 			    struct scatterlist *sg, unsigned long phys_pfn,
2224 			    unsigned long nr_pages, int prot)
2225 {
2226 	struct dma_pte *first_pte = NULL, *pte = NULL;
2227 	phys_addr_t uninitialized_var(pteval);
2228 	unsigned long sg_res = 0;
2229 	unsigned int largepage_lvl = 0;
2230 	unsigned long lvl_pages = 0;
2231 
2232 	BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
2233 
2234 	if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2235 		return -EINVAL;
2236 
2237 	prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
2238 
2239 	if (!sg) {
2240 		sg_res = nr_pages;
2241 		pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2242 	}
2243 
2244 	while (nr_pages > 0) {
2245 		uint64_t tmp;
2246 
2247 		if (!sg_res) {
2248 			unsigned int pgoff = sg->offset & ~PAGE_MASK;
2249 
2250 			sg_res = aligned_nrpages(sg->offset, sg->length);
2251 			sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff;
2252 			sg->dma_length = sg->length;
2253 			pteval = (sg_phys(sg) - pgoff) | prot;
2254 			phys_pfn = pteval >> VTD_PAGE_SHIFT;
2255 		}
2256 
2257 		if (!pte) {
2258 			largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2259 
2260 			first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
2261 			if (!pte)
2262 				return -ENOMEM;
2263 			/* It is large page*/
2264 			if (largepage_lvl > 1) {
2265 				unsigned long nr_superpages, end_pfn;
2266 
2267 				pteval |= DMA_PTE_LARGE_PAGE;
2268 				lvl_pages = lvl_to_nr_pages(largepage_lvl);
2269 
2270 				nr_superpages = sg_res / lvl_pages;
2271 				end_pfn = iov_pfn + nr_superpages * lvl_pages - 1;
2272 
2273 				/*
2274 				 * Ensure that old small page tables are
2275 				 * removed to make room for superpage(s).
2276 				 */
2277 				dma_pte_free_pagetable(domain, iov_pfn, end_pfn);
2278 			} else {
2279 				pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
2280 			}
2281 
2282 		}
2283 		/* We don't need lock here, nobody else
2284 		 * touches the iova range
2285 		 */
2286 		tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
2287 		if (tmp) {
2288 			static int dumps = 5;
2289 			pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2290 				iov_pfn, tmp, (unsigned long long)pteval);
2291 			if (dumps) {
2292 				dumps--;
2293 				debug_dma_dump_mappings(NULL);
2294 			}
2295 			WARN_ON(1);
2296 		}
2297 
2298 		lvl_pages = lvl_to_nr_pages(largepage_lvl);
2299 
2300 		BUG_ON(nr_pages < lvl_pages);
2301 		BUG_ON(sg_res < lvl_pages);
2302 
2303 		nr_pages -= lvl_pages;
2304 		iov_pfn += lvl_pages;
2305 		phys_pfn += lvl_pages;
2306 		pteval += lvl_pages * VTD_PAGE_SIZE;
2307 		sg_res -= lvl_pages;
2308 
2309 		/* If the next PTE would be the first in a new page, then we
2310 		   need to flush the cache on the entries we've just written.
2311 		   And then we'll need to recalculate 'pte', so clear it and
2312 		   let it get set again in the if (!pte) block above.
2313 
2314 		   If we're done (!nr_pages) we need to flush the cache too.
2315 
2316 		   Also if we've been setting superpages, we may need to
2317 		   recalculate 'pte' and switch back to smaller pages for the
2318 		   end of the mapping, if the trailing size is not enough to
2319 		   use another superpage (i.e. sg_res < lvl_pages). */
2320 		pte++;
2321 		if (!nr_pages || first_pte_in_page(pte) ||
2322 		    (largepage_lvl > 1 && sg_res < lvl_pages)) {
2323 			domain_flush_cache(domain, first_pte,
2324 					   (void *)pte - (void *)first_pte);
2325 			pte = NULL;
2326 		}
2327 
2328 		if (!sg_res && nr_pages)
2329 			sg = sg_next(sg);
2330 	}
2331 	return 0;
2332 }
2333 
domain_sg_mapping(struct dmar_domain * domain,unsigned long iov_pfn,struct scatterlist * sg,unsigned long nr_pages,int prot)2334 static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2335 				    struct scatterlist *sg, unsigned long nr_pages,
2336 				    int prot)
2337 {
2338 	return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2339 }
2340 
domain_pfn_mapping(struct dmar_domain * domain,unsigned long iov_pfn,unsigned long phys_pfn,unsigned long nr_pages,int prot)2341 static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2342 				     unsigned long phys_pfn, unsigned long nr_pages,
2343 				     int prot)
2344 {
2345 	return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
2346 }
2347 
domain_context_clear_one(struct intel_iommu * iommu,u8 bus,u8 devfn)2348 static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
2349 {
2350 	if (!iommu)
2351 		return;
2352 
2353 	clear_context_table(iommu, bus, devfn);
2354 	iommu->flush.flush_context(iommu, 0, 0, 0,
2355 					   DMA_CCMD_GLOBAL_INVL);
2356 	iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2357 }
2358 
unlink_domain_info(struct device_domain_info * info)2359 static inline void unlink_domain_info(struct device_domain_info *info)
2360 {
2361 	assert_spin_locked(&device_domain_lock);
2362 	list_del(&info->link);
2363 	list_del(&info->global);
2364 	if (info->dev)
2365 		info->dev->archdata.iommu = NULL;
2366 }
2367 
domain_remove_dev_info(struct dmar_domain * domain)2368 static void domain_remove_dev_info(struct dmar_domain *domain)
2369 {
2370 	struct device_domain_info *info, *tmp;
2371 	unsigned long flags;
2372 
2373 	spin_lock_irqsave(&device_domain_lock, flags);
2374 	list_for_each_entry_safe(info, tmp, &domain->devices, link)
2375 		__dmar_remove_one_dev_info(info);
2376 	spin_unlock_irqrestore(&device_domain_lock, flags);
2377 }
2378 
2379 /*
2380  * find_domain
2381  * Note: we use struct device->archdata.iommu stores the info
2382  */
find_domain(struct device * dev)2383 static struct dmar_domain *find_domain(struct device *dev)
2384 {
2385 	struct device_domain_info *info;
2386 
2387 	/* No lock here, assumes no domain exit in normal case */
2388 	info = dev->archdata.iommu;
2389 	if (info)
2390 		return info->domain;
2391 	return NULL;
2392 }
2393 
2394 static inline struct device_domain_info *
dmar_search_domain_by_dev_info(int segment,int bus,int devfn)2395 dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2396 {
2397 	struct device_domain_info *info;
2398 
2399 	list_for_each_entry(info, &device_domain_list, global)
2400 		if (info->iommu->segment == segment && info->bus == bus &&
2401 		    info->devfn == devfn)
2402 			return info;
2403 
2404 	return NULL;
2405 }
2406 
dmar_insert_one_dev_info(struct intel_iommu * iommu,int bus,int devfn,struct device * dev,struct dmar_domain * domain)2407 static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2408 						    int bus, int devfn,
2409 						    struct device *dev,
2410 						    struct dmar_domain *domain)
2411 {
2412 	struct dmar_domain *found = NULL;
2413 	struct device_domain_info *info;
2414 	unsigned long flags;
2415 	int ret;
2416 
2417 	info = alloc_devinfo_mem();
2418 	if (!info)
2419 		return NULL;
2420 
2421 	info->bus = bus;
2422 	info->devfn = devfn;
2423 	info->ats_supported = info->pasid_supported = info->pri_supported = 0;
2424 	info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
2425 	info->ats_qdep = 0;
2426 	info->dev = dev;
2427 	info->domain = domain;
2428 	info->iommu = iommu;
2429 
2430 	if (dev && dev_is_pci(dev)) {
2431 		struct pci_dev *pdev = to_pci_dev(info->dev);
2432 
2433 		if (ecap_dev_iotlb_support(iommu->ecap) &&
2434 		    pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) &&
2435 		    dmar_find_matched_atsr_unit(pdev))
2436 			info->ats_supported = 1;
2437 
2438 		if (ecs_enabled(iommu)) {
2439 			if (pasid_enabled(iommu)) {
2440 				int features = pci_pasid_features(pdev);
2441 				if (features >= 0)
2442 					info->pasid_supported = features | 1;
2443 			}
2444 
2445 			if (info->ats_supported && ecap_prs(iommu->ecap) &&
2446 			    pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
2447 				info->pri_supported = 1;
2448 		}
2449 	}
2450 
2451 	spin_lock_irqsave(&device_domain_lock, flags);
2452 	if (dev)
2453 		found = find_domain(dev);
2454 
2455 	if (!found) {
2456 		struct device_domain_info *info2;
2457 		info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
2458 		if (info2) {
2459 			found      = info2->domain;
2460 			info2->dev = dev;
2461 		}
2462 	}
2463 
2464 	if (found) {
2465 		spin_unlock_irqrestore(&device_domain_lock, flags);
2466 		free_devinfo_mem(info);
2467 		/* Caller must free the original domain */
2468 		return found;
2469 	}
2470 
2471 	spin_lock(&iommu->lock);
2472 	ret = domain_attach_iommu(domain, iommu);
2473 	spin_unlock(&iommu->lock);
2474 
2475 	if (ret) {
2476 		spin_unlock_irqrestore(&device_domain_lock, flags);
2477 		free_devinfo_mem(info);
2478 		return NULL;
2479 	}
2480 
2481 	list_add(&info->link, &domain->devices);
2482 	list_add(&info->global, &device_domain_list);
2483 	if (dev)
2484 		dev->archdata.iommu = info;
2485 	spin_unlock_irqrestore(&device_domain_lock, flags);
2486 
2487 	if (dev && domain_context_mapping(domain, dev)) {
2488 		pr_err("Domain context map for %s failed\n", dev_name(dev));
2489 		dmar_remove_one_dev_info(domain, dev);
2490 		return NULL;
2491 	}
2492 
2493 	return domain;
2494 }
2495 
get_last_alias(struct pci_dev * pdev,u16 alias,void * opaque)2496 static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2497 {
2498 	*(u16 *)opaque = alias;
2499 	return 0;
2500 }
2501 
find_or_alloc_domain(struct device * dev,int gaw)2502 static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
2503 {
2504 	struct device_domain_info *info = NULL;
2505 	struct dmar_domain *domain = NULL;
2506 	struct intel_iommu *iommu;
2507 	u16 req_id, dma_alias;
2508 	unsigned long flags;
2509 	u8 bus, devfn;
2510 
2511 	iommu = device_to_iommu(dev, &bus, &devfn);
2512 	if (!iommu)
2513 		return NULL;
2514 
2515 	req_id = ((u16)bus << 8) | devfn;
2516 
2517 	if (dev_is_pci(dev)) {
2518 		struct pci_dev *pdev = to_pci_dev(dev);
2519 
2520 		pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2521 
2522 		spin_lock_irqsave(&device_domain_lock, flags);
2523 		info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2524 						      PCI_BUS_NUM(dma_alias),
2525 						      dma_alias & 0xff);
2526 		if (info) {
2527 			iommu = info->iommu;
2528 			domain = info->domain;
2529 		}
2530 		spin_unlock_irqrestore(&device_domain_lock, flags);
2531 
2532 		/* DMA alias already has a domain, use it */
2533 		if (info)
2534 			goto out;
2535 	}
2536 
2537 	/* Allocate and initialize new domain for the device */
2538 	domain = alloc_domain(0);
2539 	if (!domain)
2540 		return NULL;
2541 	if (domain_init(domain, iommu, gaw)) {
2542 		domain_exit(domain);
2543 		return NULL;
2544 	}
2545 
2546 out:
2547 
2548 	return domain;
2549 }
2550 
set_domain_for_dev(struct device * dev,struct dmar_domain * domain)2551 static struct dmar_domain *set_domain_for_dev(struct device *dev,
2552 					      struct dmar_domain *domain)
2553 {
2554 	struct intel_iommu *iommu;
2555 	struct dmar_domain *tmp;
2556 	u16 req_id, dma_alias;
2557 	u8 bus, devfn;
2558 
2559 	iommu = device_to_iommu(dev, &bus, &devfn);
2560 	if (!iommu)
2561 		return NULL;
2562 
2563 	req_id = ((u16)bus << 8) | devfn;
2564 
2565 	if (dev_is_pci(dev)) {
2566 		struct pci_dev *pdev = to_pci_dev(dev);
2567 
2568 		pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2569 
2570 		/* register PCI DMA alias device */
2571 		if (req_id != dma_alias) {
2572 			tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2573 					dma_alias & 0xff, NULL, domain);
2574 
2575 			if (!tmp || tmp != domain)
2576 				return tmp;
2577 		}
2578 	}
2579 
2580 	tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
2581 	if (!tmp || tmp != domain)
2582 		return tmp;
2583 
2584 	return domain;
2585 }
2586 
get_domain_for_dev(struct device * dev,int gaw)2587 static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
2588 {
2589 	struct dmar_domain *domain, *tmp;
2590 
2591 	domain = find_domain(dev);
2592 	if (domain)
2593 		goto out;
2594 
2595 	domain = find_or_alloc_domain(dev, gaw);
2596 	if (!domain)
2597 		goto out;
2598 
2599 	tmp = set_domain_for_dev(dev, domain);
2600 	if (!tmp || domain != tmp) {
2601 		domain_exit(domain);
2602 		domain = tmp;
2603 	}
2604 
2605 out:
2606 
2607 	return domain;
2608 }
2609 
iommu_domain_identity_map(struct dmar_domain * domain,unsigned long long start,unsigned long long end)2610 static int iommu_domain_identity_map(struct dmar_domain *domain,
2611 				     unsigned long long start,
2612 				     unsigned long long end)
2613 {
2614 	unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2615 	unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2616 
2617 	if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2618 			  dma_to_mm_pfn(last_vpfn))) {
2619 		pr_err("Reserving iova failed\n");
2620 		return -ENOMEM;
2621 	}
2622 
2623 	pr_debug("Mapping reserved region %llx-%llx\n", start, end);
2624 	/*
2625 	 * RMRR range might have overlap with physical memory range,
2626 	 * clear it first
2627 	 */
2628 	dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2629 
2630 	return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2631 				  last_vpfn - first_vpfn + 1,
2632 				  DMA_PTE_READ|DMA_PTE_WRITE);
2633 }
2634 
domain_prepare_identity_map(struct device * dev,struct dmar_domain * domain,unsigned long long start,unsigned long long end)2635 static int domain_prepare_identity_map(struct device *dev,
2636 				       struct dmar_domain *domain,
2637 				       unsigned long long start,
2638 				       unsigned long long end)
2639 {
2640 	/* For _hardware_ passthrough, don't bother. But for software
2641 	   passthrough, we do it anyway -- it may indicate a memory
2642 	   range which is reserved in E820, so which didn't get set
2643 	   up to start with in si_domain */
2644 	if (domain == si_domain && hw_pass_through) {
2645 		pr_warn("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2646 			dev_name(dev), start, end);
2647 		return 0;
2648 	}
2649 
2650 	pr_info("Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2651 		dev_name(dev), start, end);
2652 
2653 	if (end < start) {
2654 		WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2655 			"BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2656 			dmi_get_system_info(DMI_BIOS_VENDOR),
2657 			dmi_get_system_info(DMI_BIOS_VERSION),
2658 		     dmi_get_system_info(DMI_PRODUCT_VERSION));
2659 		return -EIO;
2660 	}
2661 
2662 	if (end >> agaw_to_width(domain->agaw)) {
2663 		WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2664 		     "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2665 		     agaw_to_width(domain->agaw),
2666 		     dmi_get_system_info(DMI_BIOS_VENDOR),
2667 		     dmi_get_system_info(DMI_BIOS_VERSION),
2668 		     dmi_get_system_info(DMI_PRODUCT_VERSION));
2669 		return -EIO;
2670 	}
2671 
2672 	return iommu_domain_identity_map(domain, start, end);
2673 }
2674 
iommu_prepare_identity_map(struct device * dev,unsigned long long start,unsigned long long end)2675 static int iommu_prepare_identity_map(struct device *dev,
2676 				      unsigned long long start,
2677 				      unsigned long long end)
2678 {
2679 	struct dmar_domain *domain;
2680 	int ret;
2681 
2682 	domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2683 	if (!domain)
2684 		return -ENOMEM;
2685 
2686 	ret = domain_prepare_identity_map(dev, domain, start, end);
2687 	if (ret)
2688 		domain_exit(domain);
2689 
2690 	return ret;
2691 }
2692 
iommu_prepare_rmrr_dev(struct dmar_rmrr_unit * rmrr,struct device * dev)2693 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2694 					 struct device *dev)
2695 {
2696 	if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2697 		return 0;
2698 	return iommu_prepare_identity_map(dev, rmrr->base_address,
2699 					  rmrr->end_address);
2700 }
2701 
2702 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
iommu_prepare_isa(void)2703 static inline void iommu_prepare_isa(void)
2704 {
2705 	struct pci_dev *pdev;
2706 	int ret;
2707 
2708 	pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2709 	if (!pdev)
2710 		return;
2711 
2712 	pr_info("Prepare 0-16MiB unity mapping for LPC\n");
2713 	ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
2714 
2715 	if (ret)
2716 		pr_err("Failed to create 0-16MiB identity map - floppy might not work\n");
2717 
2718 	pci_dev_put(pdev);
2719 }
2720 #else
iommu_prepare_isa(void)2721 static inline void iommu_prepare_isa(void)
2722 {
2723 	return;
2724 }
2725 #endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
2726 
2727 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2728 
si_domain_init(int hw)2729 static int __init si_domain_init(int hw)
2730 {
2731 	int nid, ret = 0;
2732 
2733 	si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2734 	if (!si_domain)
2735 		return -EFAULT;
2736 
2737 	if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2738 		domain_exit(si_domain);
2739 		return -EFAULT;
2740 	}
2741 
2742 	pr_debug("Identity mapping domain allocated\n");
2743 
2744 	if (hw)
2745 		return 0;
2746 
2747 	for_each_online_node(nid) {
2748 		unsigned long start_pfn, end_pfn;
2749 		int i;
2750 
2751 		for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2752 			ret = iommu_domain_identity_map(si_domain,
2753 					PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2754 			if (ret)
2755 				return ret;
2756 		}
2757 	}
2758 
2759 	return 0;
2760 }
2761 
identity_mapping(struct device * dev)2762 static int identity_mapping(struct device *dev)
2763 {
2764 	struct device_domain_info *info;
2765 
2766 	if (likely(!iommu_identity_mapping))
2767 		return 0;
2768 
2769 	info = dev->archdata.iommu;
2770 	if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2771 		return (info->domain == si_domain);
2772 
2773 	return 0;
2774 }
2775 
domain_add_dev_info(struct dmar_domain * domain,struct device * dev)2776 static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2777 {
2778 	struct dmar_domain *ndomain;
2779 	struct intel_iommu *iommu;
2780 	u8 bus, devfn;
2781 
2782 	iommu = device_to_iommu(dev, &bus, &devfn);
2783 	if (!iommu)
2784 		return -ENODEV;
2785 
2786 	ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
2787 	if (ndomain != domain)
2788 		return -EBUSY;
2789 
2790 	return 0;
2791 }
2792 
device_has_rmrr(struct device * dev)2793 static bool device_has_rmrr(struct device *dev)
2794 {
2795 	struct dmar_rmrr_unit *rmrr;
2796 	struct device *tmp;
2797 	int i;
2798 
2799 	rcu_read_lock();
2800 	for_each_rmrr_units(rmrr) {
2801 		/*
2802 		 * Return TRUE if this RMRR contains the device that
2803 		 * is passed in.
2804 		 */
2805 		for_each_active_dev_scope(rmrr->devices,
2806 					  rmrr->devices_cnt, i, tmp)
2807 			if (tmp == dev) {
2808 				rcu_read_unlock();
2809 				return true;
2810 			}
2811 	}
2812 	rcu_read_unlock();
2813 	return false;
2814 }
2815 
2816 /*
2817  * There are a couple cases where we need to restrict the functionality of
2818  * devices associated with RMRRs.  The first is when evaluating a device for
2819  * identity mapping because problems exist when devices are moved in and out
2820  * of domains and their respective RMRR information is lost.  This means that
2821  * a device with associated RMRRs will never be in a "passthrough" domain.
2822  * The second is use of the device through the IOMMU API.  This interface
2823  * expects to have full control of the IOVA space for the device.  We cannot
2824  * satisfy both the requirement that RMRR access is maintained and have an
2825  * unencumbered IOVA space.  We also have no ability to quiesce the device's
2826  * use of the RMRR space or even inform the IOMMU API user of the restriction.
2827  * We therefore prevent devices associated with an RMRR from participating in
2828  * the IOMMU API, which eliminates them from device assignment.
2829  *
2830  * In both cases we assume that PCI USB devices with RMRRs have them largely
2831  * for historical reasons and that the RMRR space is not actively used post
2832  * boot.  This exclusion may change if vendors begin to abuse it.
2833  *
2834  * The same exception is made for graphics devices, with the requirement that
2835  * any use of the RMRR regions will be torn down before assigning the device
2836  * to a guest.
2837  */
device_is_rmrr_locked(struct device * dev)2838 static bool device_is_rmrr_locked(struct device *dev)
2839 {
2840 	if (!device_has_rmrr(dev))
2841 		return false;
2842 
2843 	if (dev_is_pci(dev)) {
2844 		struct pci_dev *pdev = to_pci_dev(dev);
2845 
2846 		if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
2847 			return false;
2848 	}
2849 
2850 	return true;
2851 }
2852 
iommu_should_identity_map(struct device * dev,int startup)2853 static int iommu_should_identity_map(struct device *dev, int startup)
2854 {
2855 
2856 	if (dev_is_pci(dev)) {
2857 		struct pci_dev *pdev = to_pci_dev(dev);
2858 
2859 		if (device_is_rmrr_locked(dev))
2860 			return 0;
2861 
2862 		if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2863 			return 1;
2864 
2865 		if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2866 			return 1;
2867 
2868 		if (!(iommu_identity_mapping & IDENTMAP_ALL))
2869 			return 0;
2870 
2871 		/*
2872 		 * We want to start off with all devices in the 1:1 domain, and
2873 		 * take them out later if we find they can't access all of memory.
2874 		 *
2875 		 * However, we can't do this for PCI devices behind bridges,
2876 		 * because all PCI devices behind the same bridge will end up
2877 		 * with the same source-id on their transactions.
2878 		 *
2879 		 * Practically speaking, we can't change things around for these
2880 		 * devices at run-time, because we can't be sure there'll be no
2881 		 * DMA transactions in flight for any of their siblings.
2882 		 *
2883 		 * So PCI devices (unless they're on the root bus) as well as
2884 		 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2885 		 * the 1:1 domain, just in _case_ one of their siblings turns out
2886 		 * not to be able to map all of memory.
2887 		 */
2888 		if (!pci_is_pcie(pdev)) {
2889 			if (!pci_is_root_bus(pdev->bus))
2890 				return 0;
2891 			if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2892 				return 0;
2893 		} else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
2894 			return 0;
2895 	} else {
2896 		if (device_has_rmrr(dev))
2897 			return 0;
2898 	}
2899 
2900 	/*
2901 	 * At boot time, we don't yet know if devices will be 64-bit capable.
2902 	 * Assume that they will — if they turn out not to be, then we can
2903 	 * take them out of the 1:1 domain later.
2904 	 */
2905 	if (!startup) {
2906 		/*
2907 		 * If the device's dma_mask is less than the system's memory
2908 		 * size then this is not a candidate for identity mapping.
2909 		 */
2910 		u64 dma_mask = *dev->dma_mask;
2911 
2912 		if (dev->coherent_dma_mask &&
2913 		    dev->coherent_dma_mask < dma_mask)
2914 			dma_mask = dev->coherent_dma_mask;
2915 
2916 		return dma_mask >= dma_get_required_mask(dev);
2917 	}
2918 
2919 	return 1;
2920 }
2921 
dev_prepare_static_identity_mapping(struct device * dev,int hw)2922 static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
2923 {
2924 	int ret;
2925 
2926 	if (!iommu_should_identity_map(dev, 1))
2927 		return 0;
2928 
2929 	ret = domain_add_dev_info(si_domain, dev);
2930 	if (!ret)
2931 		pr_info("%s identity mapping for device %s\n",
2932 			hw ? "Hardware" : "Software", dev_name(dev));
2933 	else if (ret == -ENODEV)
2934 		/* device not associated with an iommu */
2935 		ret = 0;
2936 
2937 	return ret;
2938 }
2939 
2940 
iommu_prepare_static_identity_mapping(int hw)2941 static int __init iommu_prepare_static_identity_mapping(int hw)
2942 {
2943 	struct pci_dev *pdev = NULL;
2944 	struct dmar_drhd_unit *drhd;
2945 	struct intel_iommu *iommu;
2946 	struct device *dev;
2947 	int i;
2948 	int ret = 0;
2949 
2950 	for_each_pci_dev(pdev) {
2951 		ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
2952 		if (ret)
2953 			return ret;
2954 	}
2955 
2956 	for_each_active_iommu(iommu, drhd)
2957 		for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
2958 			struct acpi_device_physical_node *pn;
2959 			struct acpi_device *adev;
2960 
2961 			if (dev->bus != &acpi_bus_type)
2962 				continue;
2963 
2964 			adev= to_acpi_device(dev);
2965 			mutex_lock(&adev->physical_node_lock);
2966 			list_for_each_entry(pn, &adev->physical_node_list, node) {
2967 				ret = dev_prepare_static_identity_mapping(pn->dev, hw);
2968 				if (ret)
2969 					break;
2970 			}
2971 			mutex_unlock(&adev->physical_node_lock);
2972 			if (ret)
2973 				return ret;
2974 		}
2975 
2976 	return 0;
2977 }
2978 
intel_iommu_init_qi(struct intel_iommu * iommu)2979 static void intel_iommu_init_qi(struct intel_iommu *iommu)
2980 {
2981 	/*
2982 	 * Start from the sane iommu hardware state.
2983 	 * If the queued invalidation is already initialized by us
2984 	 * (for example, while enabling interrupt-remapping) then
2985 	 * we got the things already rolling from a sane state.
2986 	 */
2987 	if (!iommu->qi) {
2988 		/*
2989 		 * Clear any previous faults.
2990 		 */
2991 		dmar_fault(-1, iommu);
2992 		/*
2993 		 * Disable queued invalidation if supported and already enabled
2994 		 * before OS handover.
2995 		 */
2996 		dmar_disable_qi(iommu);
2997 	}
2998 
2999 	if (dmar_enable_qi(iommu)) {
3000 		/*
3001 		 * Queued Invalidate not enabled, use Register Based Invalidate
3002 		 */
3003 		iommu->flush.flush_context = __iommu_flush_context;
3004 		iommu->flush.flush_iotlb = __iommu_flush_iotlb;
3005 		pr_info("%s: Using Register based invalidation\n",
3006 			iommu->name);
3007 	} else {
3008 		iommu->flush.flush_context = qi_flush_context;
3009 		iommu->flush.flush_iotlb = qi_flush_iotlb;
3010 		pr_info("%s: Using Queued invalidation\n", iommu->name);
3011 	}
3012 }
3013 
copy_context_table(struct intel_iommu * iommu,struct root_entry * old_re,struct context_entry ** tbl,int bus,bool ext)3014 static int copy_context_table(struct intel_iommu *iommu,
3015 			      struct root_entry *old_re,
3016 			      struct context_entry **tbl,
3017 			      int bus, bool ext)
3018 {
3019 	int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
3020 	struct context_entry *new_ce = NULL, ce;
3021 	struct context_entry *old_ce = NULL;
3022 	struct root_entry re;
3023 	phys_addr_t old_ce_phys;
3024 
3025 	tbl_idx = ext ? bus * 2 : bus;
3026 	memcpy(&re, old_re, sizeof(re));
3027 
3028 	for (devfn = 0; devfn < 256; devfn++) {
3029 		/* First calculate the correct index */
3030 		idx = (ext ? devfn * 2 : devfn) % 256;
3031 
3032 		if (idx == 0) {
3033 			/* First save what we may have and clean up */
3034 			if (new_ce) {
3035 				tbl[tbl_idx] = new_ce;
3036 				__iommu_flush_cache(iommu, new_ce,
3037 						    VTD_PAGE_SIZE);
3038 				pos = 1;
3039 			}
3040 
3041 			if (old_ce)
3042 				iounmap(old_ce);
3043 
3044 			ret = 0;
3045 			if (devfn < 0x80)
3046 				old_ce_phys = root_entry_lctp(&re);
3047 			else
3048 				old_ce_phys = root_entry_uctp(&re);
3049 
3050 			if (!old_ce_phys) {
3051 				if (ext && devfn == 0) {
3052 					/* No LCTP, try UCTP */
3053 					devfn = 0x7f;
3054 					continue;
3055 				} else {
3056 					goto out;
3057 				}
3058 			}
3059 
3060 			ret = -ENOMEM;
3061 			old_ce = memremap(old_ce_phys, PAGE_SIZE,
3062 					MEMREMAP_WB);
3063 			if (!old_ce)
3064 				goto out;
3065 
3066 			new_ce = alloc_pgtable_page(iommu->node);
3067 			if (!new_ce)
3068 				goto out_unmap;
3069 
3070 			ret = 0;
3071 		}
3072 
3073 		/* Now copy the context entry */
3074 		memcpy(&ce, old_ce + idx, sizeof(ce));
3075 
3076 		if (!__context_present(&ce))
3077 			continue;
3078 
3079 		did = context_domain_id(&ce);
3080 		if (did >= 0 && did < cap_ndoms(iommu->cap))
3081 			set_bit(did, iommu->domain_ids);
3082 
3083 		/*
3084 		 * We need a marker for copied context entries. This
3085 		 * marker needs to work for the old format as well as
3086 		 * for extended context entries.
3087 		 *
3088 		 * Bit 67 of the context entry is used. In the old
3089 		 * format this bit is available to software, in the
3090 		 * extended format it is the PGE bit, but PGE is ignored
3091 		 * by HW if PASIDs are disabled (and thus still
3092 		 * available).
3093 		 *
3094 		 * So disable PASIDs first and then mark the entry
3095 		 * copied. This means that we don't copy PASID
3096 		 * translations from the old kernel, but this is fine as
3097 		 * faults there are not fatal.
3098 		 */
3099 		context_clear_pasid_enable(&ce);
3100 		context_set_copied(&ce);
3101 
3102 		new_ce[idx] = ce;
3103 	}
3104 
3105 	tbl[tbl_idx + pos] = new_ce;
3106 
3107 	__iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
3108 
3109 out_unmap:
3110 	memunmap(old_ce);
3111 
3112 out:
3113 	return ret;
3114 }
3115 
copy_translation_tables(struct intel_iommu * iommu)3116 static int copy_translation_tables(struct intel_iommu *iommu)
3117 {
3118 	struct context_entry **ctxt_tbls;
3119 	struct root_entry *old_rt;
3120 	phys_addr_t old_rt_phys;
3121 	int ctxt_table_entries;
3122 	unsigned long flags;
3123 	u64 rtaddr_reg;
3124 	int bus, ret;
3125 	bool new_ext, ext;
3126 
3127 	rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
3128 	ext        = !!(rtaddr_reg & DMA_RTADDR_RTT);
3129 	new_ext    = !!ecap_ecs(iommu->ecap);
3130 
3131 	/*
3132 	 * The RTT bit can only be changed when translation is disabled,
3133 	 * but disabling translation means to open a window for data
3134 	 * corruption. So bail out and don't copy anything if we would
3135 	 * have to change the bit.
3136 	 */
3137 	if (new_ext != ext)
3138 		return -EINVAL;
3139 
3140 	old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
3141 	if (!old_rt_phys)
3142 		return -EINVAL;
3143 
3144 	old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
3145 	if (!old_rt)
3146 		return -ENOMEM;
3147 
3148 	/* This is too big for the stack - allocate it from slab */
3149 	ctxt_table_entries = ext ? 512 : 256;
3150 	ret = -ENOMEM;
3151 	ctxt_tbls = kzalloc(ctxt_table_entries * sizeof(void *), GFP_KERNEL);
3152 	if (!ctxt_tbls)
3153 		goto out_unmap;
3154 
3155 	for (bus = 0; bus < 256; bus++) {
3156 		ret = copy_context_table(iommu, &old_rt[bus],
3157 					 ctxt_tbls, bus, ext);
3158 		if (ret) {
3159 			pr_err("%s: Failed to copy context table for bus %d\n",
3160 				iommu->name, bus);
3161 			continue;
3162 		}
3163 	}
3164 
3165 	spin_lock_irqsave(&iommu->lock, flags);
3166 
3167 	/* Context tables are copied, now write them to the root_entry table */
3168 	for (bus = 0; bus < 256; bus++) {
3169 		int idx = ext ? bus * 2 : bus;
3170 		u64 val;
3171 
3172 		if (ctxt_tbls[idx]) {
3173 			val = virt_to_phys(ctxt_tbls[idx]) | 1;
3174 			iommu->root_entry[bus].lo = val;
3175 		}
3176 
3177 		if (!ext || !ctxt_tbls[idx + 1])
3178 			continue;
3179 
3180 		val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3181 		iommu->root_entry[bus].hi = val;
3182 	}
3183 
3184 	spin_unlock_irqrestore(&iommu->lock, flags);
3185 
3186 	kfree(ctxt_tbls);
3187 
3188 	__iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3189 
3190 	ret = 0;
3191 
3192 out_unmap:
3193 	memunmap(old_rt);
3194 
3195 	return ret;
3196 }
3197 
init_dmars(void)3198 static int __init init_dmars(void)
3199 {
3200 	struct dmar_drhd_unit *drhd;
3201 	struct dmar_rmrr_unit *rmrr;
3202 	bool copied_tables = false;
3203 	struct device *dev;
3204 	struct intel_iommu *iommu;
3205 	int i, ret, cpu;
3206 
3207 	/*
3208 	 * for each drhd
3209 	 *    allocate root
3210 	 *    initialize and program root entry to not present
3211 	 * endfor
3212 	 */
3213 	for_each_drhd_unit(drhd) {
3214 		/*
3215 		 * lock not needed as this is only incremented in the single
3216 		 * threaded kernel __init code path all other access are read
3217 		 * only
3218 		 */
3219 		if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
3220 			g_num_of_iommus++;
3221 			continue;
3222 		}
3223 		pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
3224 	}
3225 
3226 	/* Preallocate enough resources for IOMMU hot-addition */
3227 	if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3228 		g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3229 
3230 	g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3231 			GFP_KERNEL);
3232 	if (!g_iommus) {
3233 		pr_err("Allocating global iommu array failed\n");
3234 		ret = -ENOMEM;
3235 		goto error;
3236 	}
3237 
3238 	for_each_possible_cpu(cpu) {
3239 		struct deferred_flush_data *dfd = per_cpu_ptr(&deferred_flush,
3240 							      cpu);
3241 
3242 		dfd->tables = kzalloc(g_num_of_iommus *
3243 				      sizeof(struct deferred_flush_table),
3244 				      GFP_KERNEL);
3245 		if (!dfd->tables) {
3246 			ret = -ENOMEM;
3247 			goto free_g_iommus;
3248 		}
3249 
3250 		spin_lock_init(&dfd->lock);
3251 		setup_timer(&dfd->timer, flush_unmaps_timeout, cpu);
3252 	}
3253 
3254 	for_each_active_iommu(iommu, drhd) {
3255 		g_iommus[iommu->seq_id] = iommu;
3256 
3257 		intel_iommu_init_qi(iommu);
3258 
3259 		ret = iommu_init_domains(iommu);
3260 		if (ret)
3261 			goto free_iommu;
3262 
3263 		init_translation_status(iommu);
3264 
3265 		if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3266 			iommu_disable_translation(iommu);
3267 			clear_translation_pre_enabled(iommu);
3268 			pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3269 				iommu->name);
3270 		}
3271 
3272 		/*
3273 		 * TBD:
3274 		 * we could share the same root & context tables
3275 		 * among all IOMMU's. Need to Split it later.
3276 		 */
3277 		ret = iommu_alloc_root_entry(iommu);
3278 		if (ret)
3279 			goto free_iommu;
3280 
3281 		if (translation_pre_enabled(iommu)) {
3282 			pr_info("Translation already enabled - trying to copy translation structures\n");
3283 
3284 			ret = copy_translation_tables(iommu);
3285 			if (ret) {
3286 				/*
3287 				 * We found the IOMMU with translation
3288 				 * enabled - but failed to copy over the
3289 				 * old root-entry table. Try to proceed
3290 				 * by disabling translation now and
3291 				 * allocating a clean root-entry table.
3292 				 * This might cause DMAR faults, but
3293 				 * probably the dump will still succeed.
3294 				 */
3295 				pr_err("Failed to copy translation tables from previous kernel for %s\n",
3296 				       iommu->name);
3297 				iommu_disable_translation(iommu);
3298 				clear_translation_pre_enabled(iommu);
3299 			} else {
3300 				pr_info("Copied translation tables from previous kernel for %s\n",
3301 					iommu->name);
3302 				copied_tables = true;
3303 			}
3304 		}
3305 
3306 		if (!ecap_pass_through(iommu->ecap))
3307 			hw_pass_through = 0;
3308 #ifdef CONFIG_INTEL_IOMMU_SVM
3309 		if (pasid_enabled(iommu))
3310 			intel_svm_alloc_pasid_tables(iommu);
3311 #endif
3312 	}
3313 
3314 	/*
3315 	 * Now that qi is enabled on all iommus, set the root entry and flush
3316 	 * caches. This is required on some Intel X58 chipsets, otherwise the
3317 	 * flush_context function will loop forever and the boot hangs.
3318 	 */
3319 	for_each_active_iommu(iommu, drhd) {
3320 		iommu_flush_write_buffer(iommu);
3321 		iommu_set_root_entry(iommu);
3322 		iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3323 		iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3324 	}
3325 
3326 	if (iommu_pass_through)
3327 		iommu_identity_mapping |= IDENTMAP_ALL;
3328 
3329 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
3330 	iommu_identity_mapping |= IDENTMAP_GFX;
3331 #endif
3332 
3333 	check_tylersburg_isoch();
3334 
3335 	if (iommu_identity_mapping) {
3336 		ret = si_domain_init(hw_pass_through);
3337 		if (ret)
3338 			goto free_iommu;
3339 	}
3340 
3341 
3342 	/*
3343 	 * If we copied translations from a previous kernel in the kdump
3344 	 * case, we can not assign the devices to domains now, as that
3345 	 * would eliminate the old mappings. So skip this part and defer
3346 	 * the assignment to device driver initialization time.
3347 	 */
3348 	if (copied_tables)
3349 		goto domains_done;
3350 
3351 	/*
3352 	 * If pass through is not set or not enabled, setup context entries for
3353 	 * identity mappings for rmrr, gfx, and isa and may fall back to static
3354 	 * identity mapping if iommu_identity_mapping is set.
3355 	 */
3356 	if (iommu_identity_mapping) {
3357 		ret = iommu_prepare_static_identity_mapping(hw_pass_through);
3358 		if (ret) {
3359 			pr_crit("Failed to setup IOMMU pass-through\n");
3360 			goto free_iommu;
3361 		}
3362 	}
3363 	/*
3364 	 * For each rmrr
3365 	 *   for each dev attached to rmrr
3366 	 *   do
3367 	 *     locate drhd for dev, alloc domain for dev
3368 	 *     allocate free domain
3369 	 *     allocate page table entries for rmrr
3370 	 *     if context not allocated for bus
3371 	 *           allocate and init context
3372 	 *           set present in root table for this bus
3373 	 *     init context with domain, translation etc
3374 	 *    endfor
3375 	 * endfor
3376 	 */
3377 	pr_info("Setting RMRR:\n");
3378 	for_each_rmrr_units(rmrr) {
3379 		/* some BIOS lists non-exist devices in DMAR table. */
3380 		for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3381 					  i, dev) {
3382 			ret = iommu_prepare_rmrr_dev(rmrr, dev);
3383 			if (ret)
3384 				pr_err("Mapping reserved region failed\n");
3385 		}
3386 	}
3387 
3388 	iommu_prepare_isa();
3389 
3390 domains_done:
3391 
3392 	/*
3393 	 * for each drhd
3394 	 *   enable fault log
3395 	 *   global invalidate context cache
3396 	 *   global invalidate iotlb
3397 	 *   enable translation
3398 	 */
3399 	for_each_iommu(iommu, drhd) {
3400 		if (drhd->ignored) {
3401 			/*
3402 			 * we always have to disable PMRs or DMA may fail on
3403 			 * this device
3404 			 */
3405 			if (force_on)
3406 				iommu_disable_protect_mem_regions(iommu);
3407 			continue;
3408 		}
3409 
3410 		iommu_flush_write_buffer(iommu);
3411 
3412 #ifdef CONFIG_INTEL_IOMMU_SVM
3413 		if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
3414 			ret = intel_svm_enable_prq(iommu);
3415 			if (ret)
3416 				goto free_iommu;
3417 		}
3418 #endif
3419 		ret = dmar_set_interrupt(iommu);
3420 		if (ret)
3421 			goto free_iommu;
3422 
3423 		if (!translation_pre_enabled(iommu))
3424 			iommu_enable_translation(iommu);
3425 
3426 		iommu_disable_protect_mem_regions(iommu);
3427 	}
3428 
3429 	return 0;
3430 
3431 free_iommu:
3432 	for_each_active_iommu(iommu, drhd) {
3433 		disable_dmar_iommu(iommu);
3434 		free_dmar_iommu(iommu);
3435 	}
3436 free_g_iommus:
3437 	for_each_possible_cpu(cpu)
3438 		kfree(per_cpu_ptr(&deferred_flush, cpu)->tables);
3439 	kfree(g_iommus);
3440 error:
3441 	return ret;
3442 }
3443 
3444 /* This takes a number of _MM_ pages, not VTD pages */
intel_alloc_iova(struct device * dev,struct dmar_domain * domain,unsigned long nrpages,uint64_t dma_mask)3445 static unsigned long intel_alloc_iova(struct device *dev,
3446 				     struct dmar_domain *domain,
3447 				     unsigned long nrpages, uint64_t dma_mask)
3448 {
3449 	unsigned long iova_pfn = 0;
3450 
3451 	/* Restrict dma_mask to the width that the iommu can handle */
3452 	dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
3453 	/* Ensure we reserve the whole size-aligned region */
3454 	nrpages = __roundup_pow_of_two(nrpages);
3455 
3456 	if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
3457 		/*
3458 		 * First try to allocate an io virtual address in
3459 		 * DMA_BIT_MASK(32) and if that fails then try allocating
3460 		 * from higher range
3461 		 */
3462 		iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
3463 					   IOVA_PFN(DMA_BIT_MASK(32)));
3464 		if (iova_pfn)
3465 			return iova_pfn;
3466 	}
3467 	iova_pfn = alloc_iova_fast(&domain->iovad, nrpages, IOVA_PFN(dma_mask));
3468 	if (unlikely(!iova_pfn)) {
3469 		pr_err("Allocating %ld-page iova for %s failed",
3470 		       nrpages, dev_name(dev));
3471 		return 0;
3472 	}
3473 
3474 	return iova_pfn;
3475 }
3476 
__get_valid_domain_for_dev(struct device * dev)3477 static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
3478 {
3479 	struct dmar_domain *domain, *tmp;
3480 	struct dmar_rmrr_unit *rmrr;
3481 	struct device *i_dev;
3482 	int i, ret;
3483 
3484 	domain = find_domain(dev);
3485 	if (domain)
3486 		goto out;
3487 
3488 	domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
3489 	if (!domain)
3490 		goto out;
3491 
3492 	/* We have a new domain - setup possible RMRRs for the device */
3493 	rcu_read_lock();
3494 	for_each_rmrr_units(rmrr) {
3495 		for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3496 					  i, i_dev) {
3497 			if (i_dev != dev)
3498 				continue;
3499 
3500 			ret = domain_prepare_identity_map(dev, domain,
3501 							  rmrr->base_address,
3502 							  rmrr->end_address);
3503 			if (ret)
3504 				dev_err(dev, "Mapping reserved region failed\n");
3505 		}
3506 	}
3507 	rcu_read_unlock();
3508 
3509 	tmp = set_domain_for_dev(dev, domain);
3510 	if (!tmp || domain != tmp) {
3511 		domain_exit(domain);
3512 		domain = tmp;
3513 	}
3514 
3515 out:
3516 
3517 	if (!domain)
3518 		pr_err("Allocating domain for %s failed\n", dev_name(dev));
3519 
3520 
3521 	return domain;
3522 }
3523 
get_valid_domain_for_dev(struct device * dev)3524 static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
3525 {
3526 	struct device_domain_info *info;
3527 
3528 	/* No lock here, assumes no domain exit in normal case */
3529 	info = dev->archdata.iommu;
3530 	if (likely(info))
3531 		return info->domain;
3532 
3533 	return __get_valid_domain_for_dev(dev);
3534 }
3535 
3536 /* Check if the dev needs to go through non-identity map and unmap process.*/
iommu_no_mapping(struct device * dev)3537 static int iommu_no_mapping(struct device *dev)
3538 {
3539 	int found;
3540 
3541 	if (iommu_dummy(dev))
3542 		return 1;
3543 
3544 	if (!iommu_identity_mapping)
3545 		return 0;
3546 
3547 	found = identity_mapping(dev);
3548 	if (found) {
3549 		if (iommu_should_identity_map(dev, 0))
3550 			return 1;
3551 		else {
3552 			/*
3553 			 * 32 bit DMA is removed from si_domain and fall back
3554 			 * to non-identity mapping.
3555 			 */
3556 			dmar_remove_one_dev_info(si_domain, dev);
3557 			pr_info("32bit %s uses non-identity mapping\n",
3558 				dev_name(dev));
3559 			return 0;
3560 		}
3561 	} else {
3562 		/*
3563 		 * In case of a detached 64 bit DMA device from vm, the device
3564 		 * is put into si_domain for identity mapping.
3565 		 */
3566 		if (iommu_should_identity_map(dev, 0)) {
3567 			int ret;
3568 			ret = domain_add_dev_info(si_domain, dev);
3569 			if (!ret) {
3570 				pr_info("64bit %s uses identity mapping\n",
3571 					dev_name(dev));
3572 				return 1;
3573 			}
3574 		}
3575 	}
3576 
3577 	return 0;
3578 }
3579 
__intel_map_single(struct device * dev,phys_addr_t paddr,size_t size,int dir,u64 dma_mask)3580 static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
3581 				     size_t size, int dir, u64 dma_mask)
3582 {
3583 	struct dmar_domain *domain;
3584 	phys_addr_t start_paddr;
3585 	unsigned long iova_pfn;
3586 	int prot = 0;
3587 	int ret;
3588 	struct intel_iommu *iommu;
3589 	unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
3590 
3591 	BUG_ON(dir == DMA_NONE);
3592 
3593 	if (iommu_no_mapping(dev))
3594 		return paddr;
3595 
3596 	domain = get_valid_domain_for_dev(dev);
3597 	if (!domain)
3598 		return 0;
3599 
3600 	iommu = domain_get_iommu(domain);
3601 	size = aligned_nrpages(paddr, size);
3602 
3603 	iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3604 	if (!iova_pfn)
3605 		goto error;
3606 
3607 	/*
3608 	 * Check if DMAR supports zero-length reads on write only
3609 	 * mappings..
3610 	 */
3611 	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3612 			!cap_zlr(iommu->cap))
3613 		prot |= DMA_PTE_READ;
3614 	if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3615 		prot |= DMA_PTE_WRITE;
3616 	/*
3617 	 * paddr - (paddr + size) might be partial page, we should map the whole
3618 	 * page.  Note: if two part of one page are separately mapped, we
3619 	 * might have two guest_addr mapping to the same host paddr, but this
3620 	 * is not a big problem
3621 	 */
3622 	ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn),
3623 				 mm_to_dma_pfn(paddr_pfn), size, prot);
3624 	if (ret)
3625 		goto error;
3626 
3627 	/* it's a non-present to present mapping. Only flush if caching mode */
3628 	if (cap_caching_mode(iommu->cap))
3629 		iommu_flush_iotlb_psi(iommu, domain,
3630 				      mm_to_dma_pfn(iova_pfn),
3631 				      size, 0, 1);
3632 	else
3633 		iommu_flush_write_buffer(iommu);
3634 
3635 	start_paddr = (phys_addr_t)iova_pfn << PAGE_SHIFT;
3636 	start_paddr += paddr & ~PAGE_MASK;
3637 	return start_paddr;
3638 
3639 error:
3640 	if (iova_pfn)
3641 		free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
3642 	pr_err("Device %s request: %zx@%llx dir %d --- failed\n",
3643 		dev_name(dev), size, (unsigned long long)paddr, dir);
3644 	return 0;
3645 }
3646 
intel_map_page(struct device * dev,struct page * page,unsigned long offset,size_t size,enum dma_data_direction dir,unsigned long attrs)3647 static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3648 				 unsigned long offset, size_t size,
3649 				 enum dma_data_direction dir,
3650 				 unsigned long attrs)
3651 {
3652 	return __intel_map_single(dev, page_to_phys(page) + offset, size,
3653 				  dir, *dev->dma_mask);
3654 }
3655 
flush_unmaps(struct deferred_flush_data * flush_data)3656 static void flush_unmaps(struct deferred_flush_data *flush_data)
3657 {
3658 	int i, j;
3659 
3660 	flush_data->timer_on = 0;
3661 
3662 	/* just flush them all */
3663 	for (i = 0; i < g_num_of_iommus; i++) {
3664 		struct intel_iommu *iommu = g_iommus[i];
3665 		struct deferred_flush_table *flush_table =
3666 				&flush_data->tables[i];
3667 		if (!iommu)
3668 			continue;
3669 
3670 		if (!flush_table->next)
3671 			continue;
3672 
3673 		/* In caching mode, global flushes turn emulation expensive */
3674 		if (!cap_caching_mode(iommu->cap))
3675 			iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3676 					 DMA_TLB_GLOBAL_FLUSH);
3677 		for (j = 0; j < flush_table->next; j++) {
3678 			unsigned long mask;
3679 			struct deferred_flush_entry *entry =
3680 						&flush_table->entries[j];
3681 			unsigned long iova_pfn = entry->iova_pfn;
3682 			unsigned long nrpages = entry->nrpages;
3683 			struct dmar_domain *domain = entry->domain;
3684 			struct page *freelist = entry->freelist;
3685 
3686 			/* On real hardware multiple invalidations are expensive */
3687 			if (cap_caching_mode(iommu->cap))
3688 				iommu_flush_iotlb_psi(iommu, domain,
3689 					mm_to_dma_pfn(iova_pfn),
3690 					nrpages, !freelist, 0);
3691 			else {
3692 				mask = ilog2(nrpages);
3693 				iommu_flush_dev_iotlb(domain,
3694 						(uint64_t)iova_pfn << PAGE_SHIFT, mask);
3695 			}
3696 			free_iova_fast(&domain->iovad, iova_pfn, nrpages);
3697 			if (freelist)
3698 				dma_free_pagelist(freelist);
3699 		}
3700 		flush_table->next = 0;
3701 	}
3702 
3703 	flush_data->size = 0;
3704 }
3705 
flush_unmaps_timeout(unsigned long cpuid)3706 static void flush_unmaps_timeout(unsigned long cpuid)
3707 {
3708 	struct deferred_flush_data *flush_data = per_cpu_ptr(&deferred_flush, cpuid);
3709 	unsigned long flags;
3710 
3711 	spin_lock_irqsave(&flush_data->lock, flags);
3712 	flush_unmaps(flush_data);
3713 	spin_unlock_irqrestore(&flush_data->lock, flags);
3714 }
3715 
add_unmap(struct dmar_domain * dom,unsigned long iova_pfn,unsigned long nrpages,struct page * freelist)3716 static void add_unmap(struct dmar_domain *dom, unsigned long iova_pfn,
3717 		      unsigned long nrpages, struct page *freelist)
3718 {
3719 	unsigned long flags;
3720 	int entry_id, iommu_id;
3721 	struct intel_iommu *iommu;
3722 	struct deferred_flush_entry *entry;
3723 	struct deferred_flush_data *flush_data;
3724 	unsigned int cpuid;
3725 
3726 	cpuid = get_cpu();
3727 	flush_data = per_cpu_ptr(&deferred_flush, cpuid);
3728 
3729 	/* Flush all CPUs' entries to avoid deferring too much.  If
3730 	 * this becomes a bottleneck, can just flush us, and rely on
3731 	 * flush timer for the rest.
3732 	 */
3733 	if (flush_data->size == HIGH_WATER_MARK) {
3734 		int cpu;
3735 
3736 		for_each_online_cpu(cpu)
3737 			flush_unmaps_timeout(cpu);
3738 	}
3739 
3740 	spin_lock_irqsave(&flush_data->lock, flags);
3741 
3742 	iommu = domain_get_iommu(dom);
3743 	iommu_id = iommu->seq_id;
3744 
3745 	entry_id = flush_data->tables[iommu_id].next;
3746 	++(flush_data->tables[iommu_id].next);
3747 
3748 	entry = &flush_data->tables[iommu_id].entries[entry_id];
3749 	entry->domain = dom;
3750 	entry->iova_pfn = iova_pfn;
3751 	entry->nrpages = nrpages;
3752 	entry->freelist = freelist;
3753 
3754 	if (!flush_data->timer_on) {
3755 		mod_timer(&flush_data->timer, jiffies + msecs_to_jiffies(10));
3756 		flush_data->timer_on = 1;
3757 	}
3758 	flush_data->size++;
3759 	spin_unlock_irqrestore(&flush_data->lock, flags);
3760 
3761 	put_cpu();
3762 }
3763 
intel_unmap(struct device * dev,dma_addr_t dev_addr,size_t size)3764 static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
3765 {
3766 	struct dmar_domain *domain;
3767 	unsigned long start_pfn, last_pfn;
3768 	unsigned long nrpages;
3769 	unsigned long iova_pfn;
3770 	struct intel_iommu *iommu;
3771 	struct page *freelist;
3772 
3773 	if (iommu_no_mapping(dev))
3774 		return;
3775 
3776 	domain = find_domain(dev);
3777 	BUG_ON(!domain);
3778 
3779 	iommu = domain_get_iommu(domain);
3780 
3781 	iova_pfn = IOVA_PFN(dev_addr);
3782 
3783 	nrpages = aligned_nrpages(dev_addr, size);
3784 	start_pfn = mm_to_dma_pfn(iova_pfn);
3785 	last_pfn = start_pfn + nrpages - 1;
3786 
3787 	pr_debug("Device %s unmapping: pfn %lx-%lx\n",
3788 		 dev_name(dev), start_pfn, last_pfn);
3789 
3790 	freelist = domain_unmap(domain, start_pfn, last_pfn);
3791 
3792 	if (intel_iommu_strict) {
3793 		iommu_flush_iotlb_psi(iommu, domain, start_pfn,
3794 				      nrpages, !freelist, 0);
3795 		/* free iova */
3796 		free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
3797 		dma_free_pagelist(freelist);
3798 	} else {
3799 		add_unmap(domain, iova_pfn, nrpages, freelist);
3800 		/*
3801 		 * queue up the release of the unmap to save the 1/6th of the
3802 		 * cpu used up by the iotlb flush operation...
3803 		 */
3804 	}
3805 }
3806 
intel_unmap_page(struct device * dev,dma_addr_t dev_addr,size_t size,enum dma_data_direction dir,unsigned long attrs)3807 static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3808 			     size_t size, enum dma_data_direction dir,
3809 			     unsigned long attrs)
3810 {
3811 	intel_unmap(dev, dev_addr, size);
3812 }
3813 
intel_alloc_coherent(struct device * dev,size_t size,dma_addr_t * dma_handle,gfp_t flags,unsigned long attrs)3814 static void *intel_alloc_coherent(struct device *dev, size_t size,
3815 				  dma_addr_t *dma_handle, gfp_t flags,
3816 				  unsigned long attrs)
3817 {
3818 	struct page *page = NULL;
3819 	int order;
3820 
3821 	size = PAGE_ALIGN(size);
3822 	order = get_order(size);
3823 
3824 	if (!iommu_no_mapping(dev))
3825 		flags &= ~(GFP_DMA | GFP_DMA32);
3826 	else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
3827 		if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
3828 			flags |= GFP_DMA;
3829 		else
3830 			flags |= GFP_DMA32;
3831 	}
3832 
3833 	if (gfpflags_allow_blocking(flags)) {
3834 		unsigned int count = size >> PAGE_SHIFT;
3835 
3836 		page = dma_alloc_from_contiguous(dev, count, order);
3837 		if (page && iommu_no_mapping(dev) &&
3838 		    page_to_phys(page) + size > dev->coherent_dma_mask) {
3839 			dma_release_from_contiguous(dev, page, count);
3840 			page = NULL;
3841 		}
3842 	}
3843 
3844 	if (!page)
3845 		page = alloc_pages(flags, order);
3846 	if (!page)
3847 		return NULL;
3848 	memset(page_address(page), 0, size);
3849 
3850 	*dma_handle = __intel_map_single(dev, page_to_phys(page), size,
3851 					 DMA_BIDIRECTIONAL,
3852 					 dev->coherent_dma_mask);
3853 	if (*dma_handle)
3854 		return page_address(page);
3855 	if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3856 		__free_pages(page, order);
3857 
3858 	return NULL;
3859 }
3860 
intel_free_coherent(struct device * dev,size_t size,void * vaddr,dma_addr_t dma_handle,unsigned long attrs)3861 static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
3862 				dma_addr_t dma_handle, unsigned long attrs)
3863 {
3864 	int order;
3865 	struct page *page = virt_to_page(vaddr);
3866 
3867 	size = PAGE_ALIGN(size);
3868 	order = get_order(size);
3869 
3870 	intel_unmap(dev, dma_handle, size);
3871 	if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3872 		__free_pages(page, order);
3873 }
3874 
intel_unmap_sg(struct device * dev,struct scatterlist * sglist,int nelems,enum dma_data_direction dir,unsigned long attrs)3875 static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
3876 			   int nelems, enum dma_data_direction dir,
3877 			   unsigned long attrs)
3878 {
3879 	dma_addr_t startaddr = sg_dma_address(sglist) & PAGE_MASK;
3880 	unsigned long nrpages = 0;
3881 	struct scatterlist *sg;
3882 	int i;
3883 
3884 	for_each_sg(sglist, sg, nelems, i) {
3885 		nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg));
3886 	}
3887 
3888 	intel_unmap(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
3889 }
3890 
intel_nontranslate_map_sg(struct device * hddev,struct scatterlist * sglist,int nelems,int dir)3891 static int intel_nontranslate_map_sg(struct device *hddev,
3892 	struct scatterlist *sglist, int nelems, int dir)
3893 {
3894 	int i;
3895 	struct scatterlist *sg;
3896 
3897 	for_each_sg(sglist, sg, nelems, i) {
3898 		BUG_ON(!sg_page(sg));
3899 		sg->dma_address = sg_phys(sg);
3900 		sg->dma_length = sg->length;
3901 	}
3902 	return nelems;
3903 }
3904 
intel_map_sg(struct device * dev,struct scatterlist * sglist,int nelems,enum dma_data_direction dir,unsigned long attrs)3905 static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3906 			enum dma_data_direction dir, unsigned long attrs)
3907 {
3908 	int i;
3909 	struct dmar_domain *domain;
3910 	size_t size = 0;
3911 	int prot = 0;
3912 	unsigned long iova_pfn;
3913 	int ret;
3914 	struct scatterlist *sg;
3915 	unsigned long start_vpfn;
3916 	struct intel_iommu *iommu;
3917 
3918 	BUG_ON(dir == DMA_NONE);
3919 	if (iommu_no_mapping(dev))
3920 		return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
3921 
3922 	domain = get_valid_domain_for_dev(dev);
3923 	if (!domain)
3924 		return 0;
3925 
3926 	iommu = domain_get_iommu(domain);
3927 
3928 	for_each_sg(sglist, sg, nelems, i)
3929 		size += aligned_nrpages(sg->offset, sg->length);
3930 
3931 	iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
3932 				*dev->dma_mask);
3933 	if (!iova_pfn) {
3934 		sglist->dma_length = 0;
3935 		return 0;
3936 	}
3937 
3938 	/*
3939 	 * Check if DMAR supports zero-length reads on write only
3940 	 * mappings..
3941 	 */
3942 	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3943 			!cap_zlr(iommu->cap))
3944 		prot |= DMA_PTE_READ;
3945 	if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3946 		prot |= DMA_PTE_WRITE;
3947 
3948 	start_vpfn = mm_to_dma_pfn(iova_pfn);
3949 
3950 	ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3951 	if (unlikely(ret)) {
3952 		dma_pte_free_pagetable(domain, start_vpfn,
3953 				       start_vpfn + size - 1);
3954 		free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
3955 		return 0;
3956 	}
3957 
3958 	/* it's a non-present to present mapping. Only flush if caching mode */
3959 	if (cap_caching_mode(iommu->cap))
3960 		iommu_flush_iotlb_psi(iommu, domain, start_vpfn, size, 0, 1);
3961 	else
3962 		iommu_flush_write_buffer(iommu);
3963 
3964 	return nelems;
3965 }
3966 
intel_mapping_error(struct device * dev,dma_addr_t dma_addr)3967 static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3968 {
3969 	return !dma_addr;
3970 }
3971 
3972 struct dma_map_ops intel_dma_ops = {
3973 	.alloc = intel_alloc_coherent,
3974 	.free = intel_free_coherent,
3975 	.map_sg = intel_map_sg,
3976 	.unmap_sg = intel_unmap_sg,
3977 	.map_page = intel_map_page,
3978 	.unmap_page = intel_unmap_page,
3979 	.mapping_error = intel_mapping_error,
3980 };
3981 
iommu_domain_cache_init(void)3982 static inline int iommu_domain_cache_init(void)
3983 {
3984 	int ret = 0;
3985 
3986 	iommu_domain_cache = kmem_cache_create("iommu_domain",
3987 					 sizeof(struct dmar_domain),
3988 					 0,
3989 					 SLAB_HWCACHE_ALIGN,
3990 
3991 					 NULL);
3992 	if (!iommu_domain_cache) {
3993 		pr_err("Couldn't create iommu_domain cache\n");
3994 		ret = -ENOMEM;
3995 	}
3996 
3997 	return ret;
3998 }
3999 
iommu_devinfo_cache_init(void)4000 static inline int iommu_devinfo_cache_init(void)
4001 {
4002 	int ret = 0;
4003 
4004 	iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
4005 					 sizeof(struct device_domain_info),
4006 					 0,
4007 					 SLAB_HWCACHE_ALIGN,
4008 					 NULL);
4009 	if (!iommu_devinfo_cache) {
4010 		pr_err("Couldn't create devinfo cache\n");
4011 		ret = -ENOMEM;
4012 	}
4013 
4014 	return ret;
4015 }
4016 
iommu_init_mempool(void)4017 static int __init iommu_init_mempool(void)
4018 {
4019 	int ret;
4020 	ret = iova_cache_get();
4021 	if (ret)
4022 		return ret;
4023 
4024 	ret = iommu_domain_cache_init();
4025 	if (ret)
4026 		goto domain_error;
4027 
4028 	ret = iommu_devinfo_cache_init();
4029 	if (!ret)
4030 		return ret;
4031 
4032 	kmem_cache_destroy(iommu_domain_cache);
4033 domain_error:
4034 	iova_cache_put();
4035 
4036 	return -ENOMEM;
4037 }
4038 
iommu_exit_mempool(void)4039 static void __init iommu_exit_mempool(void)
4040 {
4041 	kmem_cache_destroy(iommu_devinfo_cache);
4042 	kmem_cache_destroy(iommu_domain_cache);
4043 	iova_cache_put();
4044 }
4045 
quirk_ioat_snb_local_iommu(struct pci_dev * pdev)4046 static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
4047 {
4048 	struct dmar_drhd_unit *drhd;
4049 	u32 vtbar;
4050 	int rc;
4051 
4052 	/* We know that this device on this chipset has its own IOMMU.
4053 	 * If we find it under a different IOMMU, then the BIOS is lying
4054 	 * to us. Hope that the IOMMU for this device is actually
4055 	 * disabled, and it needs no translation...
4056 	 */
4057 	rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
4058 	if (rc) {
4059 		/* "can't" happen */
4060 		dev_info(&pdev->dev, "failed to run vt-d quirk\n");
4061 		return;
4062 	}
4063 	vtbar &= 0xffff0000;
4064 
4065 	/* we know that the this iommu should be at offset 0xa000 from vtbar */
4066 	drhd = dmar_find_matched_drhd_unit(pdev);
4067 	if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
4068 			    TAINT_FIRMWARE_WORKAROUND,
4069 			    "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
4070 		pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
4071 }
4072 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
4073 
init_no_remapping_devices(void)4074 static void __init init_no_remapping_devices(void)
4075 {
4076 	struct dmar_drhd_unit *drhd;
4077 	struct device *dev;
4078 	int i;
4079 
4080 	for_each_drhd_unit(drhd) {
4081 		if (!drhd->include_all) {
4082 			for_each_active_dev_scope(drhd->devices,
4083 						  drhd->devices_cnt, i, dev)
4084 				break;
4085 			/* ignore DMAR unit if no devices exist */
4086 			if (i == drhd->devices_cnt)
4087 				drhd->ignored = 1;
4088 		}
4089 	}
4090 
4091 	for_each_active_drhd_unit(drhd) {
4092 		if (drhd->include_all)
4093 			continue;
4094 
4095 		for_each_active_dev_scope(drhd->devices,
4096 					  drhd->devices_cnt, i, dev)
4097 			if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
4098 				break;
4099 		if (i < drhd->devices_cnt)
4100 			continue;
4101 
4102 		/* This IOMMU has *only* gfx devices. Either bypass it or
4103 		   set the gfx_mapped flag, as appropriate */
4104 		if (dmar_map_gfx) {
4105 			intel_iommu_gfx_mapped = 1;
4106 		} else {
4107 			drhd->ignored = 1;
4108 			for_each_active_dev_scope(drhd->devices,
4109 						  drhd->devices_cnt, i, dev)
4110 				dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
4111 		}
4112 	}
4113 }
4114 
4115 #ifdef CONFIG_SUSPEND
init_iommu_hw(void)4116 static int init_iommu_hw(void)
4117 {
4118 	struct dmar_drhd_unit *drhd;
4119 	struct intel_iommu *iommu = NULL;
4120 
4121 	for_each_active_iommu(iommu, drhd)
4122 		if (iommu->qi)
4123 			dmar_reenable_qi(iommu);
4124 
4125 	for_each_iommu(iommu, drhd) {
4126 		if (drhd->ignored) {
4127 			/*
4128 			 * we always have to disable PMRs or DMA may fail on
4129 			 * this device
4130 			 */
4131 			if (force_on)
4132 				iommu_disable_protect_mem_regions(iommu);
4133 			continue;
4134 		}
4135 
4136 		iommu_flush_write_buffer(iommu);
4137 
4138 		iommu_set_root_entry(iommu);
4139 
4140 		iommu->flush.flush_context(iommu, 0, 0, 0,
4141 					   DMA_CCMD_GLOBAL_INVL);
4142 		iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4143 		iommu_enable_translation(iommu);
4144 		iommu_disable_protect_mem_regions(iommu);
4145 	}
4146 
4147 	return 0;
4148 }
4149 
iommu_flush_all(void)4150 static void iommu_flush_all(void)
4151 {
4152 	struct dmar_drhd_unit *drhd;
4153 	struct intel_iommu *iommu;
4154 
4155 	for_each_active_iommu(iommu, drhd) {
4156 		iommu->flush.flush_context(iommu, 0, 0, 0,
4157 					   DMA_CCMD_GLOBAL_INVL);
4158 		iommu->flush.flush_iotlb(iommu, 0, 0, 0,
4159 					 DMA_TLB_GLOBAL_FLUSH);
4160 	}
4161 }
4162 
iommu_suspend(void)4163 static int iommu_suspend(void)
4164 {
4165 	struct dmar_drhd_unit *drhd;
4166 	struct intel_iommu *iommu = NULL;
4167 	unsigned long flag;
4168 
4169 	for_each_active_iommu(iommu, drhd) {
4170 		iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
4171 						 GFP_ATOMIC);
4172 		if (!iommu->iommu_state)
4173 			goto nomem;
4174 	}
4175 
4176 	iommu_flush_all();
4177 
4178 	for_each_active_iommu(iommu, drhd) {
4179 		iommu_disable_translation(iommu);
4180 
4181 		raw_spin_lock_irqsave(&iommu->register_lock, flag);
4182 
4183 		iommu->iommu_state[SR_DMAR_FECTL_REG] =
4184 			readl(iommu->reg + DMAR_FECTL_REG);
4185 		iommu->iommu_state[SR_DMAR_FEDATA_REG] =
4186 			readl(iommu->reg + DMAR_FEDATA_REG);
4187 		iommu->iommu_state[SR_DMAR_FEADDR_REG] =
4188 			readl(iommu->reg + DMAR_FEADDR_REG);
4189 		iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
4190 			readl(iommu->reg + DMAR_FEUADDR_REG);
4191 
4192 		raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
4193 	}
4194 	return 0;
4195 
4196 nomem:
4197 	for_each_active_iommu(iommu, drhd)
4198 		kfree(iommu->iommu_state);
4199 
4200 	return -ENOMEM;
4201 }
4202 
iommu_resume(void)4203 static void iommu_resume(void)
4204 {
4205 	struct dmar_drhd_unit *drhd;
4206 	struct intel_iommu *iommu = NULL;
4207 	unsigned long flag;
4208 
4209 	if (init_iommu_hw()) {
4210 		if (force_on)
4211 			panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
4212 		else
4213 			WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
4214 		return;
4215 	}
4216 
4217 	for_each_active_iommu(iommu, drhd) {
4218 
4219 		raw_spin_lock_irqsave(&iommu->register_lock, flag);
4220 
4221 		writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
4222 			iommu->reg + DMAR_FECTL_REG);
4223 		writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
4224 			iommu->reg + DMAR_FEDATA_REG);
4225 		writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
4226 			iommu->reg + DMAR_FEADDR_REG);
4227 		writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
4228 			iommu->reg + DMAR_FEUADDR_REG);
4229 
4230 		raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
4231 	}
4232 
4233 	for_each_active_iommu(iommu, drhd)
4234 		kfree(iommu->iommu_state);
4235 }
4236 
4237 static struct syscore_ops iommu_syscore_ops = {
4238 	.resume		= iommu_resume,
4239 	.suspend	= iommu_suspend,
4240 };
4241 
init_iommu_pm_ops(void)4242 static void __init init_iommu_pm_ops(void)
4243 {
4244 	register_syscore_ops(&iommu_syscore_ops);
4245 }
4246 
4247 #else
init_iommu_pm_ops(void)4248 static inline void init_iommu_pm_ops(void) {}
4249 #endif	/* CONFIG_PM */
4250 
4251 
dmar_parse_one_rmrr(struct acpi_dmar_header * header,void * arg)4252 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
4253 {
4254 	struct acpi_dmar_reserved_memory *rmrr;
4255 	struct dmar_rmrr_unit *rmrru;
4256 
4257 	rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
4258 	if (!rmrru)
4259 		return -ENOMEM;
4260 
4261 	rmrru->hdr = header;
4262 	rmrr = (struct acpi_dmar_reserved_memory *)header;
4263 	rmrru->base_address = rmrr->base_address;
4264 	rmrru->end_address = rmrr->end_address;
4265 	rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
4266 				((void *)rmrr) + rmrr->header.length,
4267 				&rmrru->devices_cnt);
4268 	if (rmrru->devices_cnt && rmrru->devices == NULL) {
4269 		kfree(rmrru);
4270 		return -ENOMEM;
4271 	}
4272 
4273 	list_add(&rmrru->list, &dmar_rmrr_units);
4274 
4275 	return 0;
4276 }
4277 
dmar_find_atsr(struct acpi_dmar_atsr * atsr)4278 static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
4279 {
4280 	struct dmar_atsr_unit *atsru;
4281 	struct acpi_dmar_atsr *tmp;
4282 
4283 	list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4284 		tmp = (struct acpi_dmar_atsr *)atsru->hdr;
4285 		if (atsr->segment != tmp->segment)
4286 			continue;
4287 		if (atsr->header.length != tmp->header.length)
4288 			continue;
4289 		if (memcmp(atsr, tmp, atsr->header.length) == 0)
4290 			return atsru;
4291 	}
4292 
4293 	return NULL;
4294 }
4295 
dmar_parse_one_atsr(struct acpi_dmar_header * hdr,void * arg)4296 int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4297 {
4298 	struct acpi_dmar_atsr *atsr;
4299 	struct dmar_atsr_unit *atsru;
4300 
4301 	if (system_state != SYSTEM_BOOTING && !intel_iommu_enabled)
4302 		return 0;
4303 
4304 	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4305 	atsru = dmar_find_atsr(atsr);
4306 	if (atsru)
4307 		return 0;
4308 
4309 	atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
4310 	if (!atsru)
4311 		return -ENOMEM;
4312 
4313 	/*
4314 	 * If memory is allocated from slab by ACPI _DSM method, we need to
4315 	 * copy the memory content because the memory buffer will be freed
4316 	 * on return.
4317 	 */
4318 	atsru->hdr = (void *)(atsru + 1);
4319 	memcpy(atsru->hdr, hdr, hdr->length);
4320 	atsru->include_all = atsr->flags & 0x1;
4321 	if (!atsru->include_all) {
4322 		atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
4323 				(void *)atsr + atsr->header.length,
4324 				&atsru->devices_cnt);
4325 		if (atsru->devices_cnt && atsru->devices == NULL) {
4326 			kfree(atsru);
4327 			return -ENOMEM;
4328 		}
4329 	}
4330 
4331 	list_add_rcu(&atsru->list, &dmar_atsr_units);
4332 
4333 	return 0;
4334 }
4335 
intel_iommu_free_atsr(struct dmar_atsr_unit * atsru)4336 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
4337 {
4338 	dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
4339 	kfree(atsru);
4340 }
4341 
dmar_release_one_atsr(struct acpi_dmar_header * hdr,void * arg)4342 int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4343 {
4344 	struct acpi_dmar_atsr *atsr;
4345 	struct dmar_atsr_unit *atsru;
4346 
4347 	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4348 	atsru = dmar_find_atsr(atsr);
4349 	if (atsru) {
4350 		list_del_rcu(&atsru->list);
4351 		synchronize_rcu();
4352 		intel_iommu_free_atsr(atsru);
4353 	}
4354 
4355 	return 0;
4356 }
4357 
dmar_check_one_atsr(struct acpi_dmar_header * hdr,void * arg)4358 int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4359 {
4360 	int i;
4361 	struct device *dev;
4362 	struct acpi_dmar_atsr *atsr;
4363 	struct dmar_atsr_unit *atsru;
4364 
4365 	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4366 	atsru = dmar_find_atsr(atsr);
4367 	if (!atsru)
4368 		return 0;
4369 
4370 	if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
4371 		for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
4372 					  i, dev)
4373 			return -EBUSY;
4374 	}
4375 
4376 	return 0;
4377 }
4378 
intel_iommu_add(struct dmar_drhd_unit * dmaru)4379 static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
4380 {
4381 	int sp, ret = 0;
4382 	struct intel_iommu *iommu = dmaru->iommu;
4383 
4384 	if (g_iommus[iommu->seq_id])
4385 		return 0;
4386 
4387 	if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
4388 		pr_warn("%s: Doesn't support hardware pass through.\n",
4389 			iommu->name);
4390 		return -ENXIO;
4391 	}
4392 	if (!ecap_sc_support(iommu->ecap) &&
4393 	    domain_update_iommu_snooping(iommu)) {
4394 		pr_warn("%s: Doesn't support snooping.\n",
4395 			iommu->name);
4396 		return -ENXIO;
4397 	}
4398 	sp = domain_update_iommu_superpage(iommu) - 1;
4399 	if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
4400 		pr_warn("%s: Doesn't support large page.\n",
4401 			iommu->name);
4402 		return -ENXIO;
4403 	}
4404 
4405 	/*
4406 	 * Disable translation if already enabled prior to OS handover.
4407 	 */
4408 	if (iommu->gcmd & DMA_GCMD_TE)
4409 		iommu_disable_translation(iommu);
4410 
4411 	g_iommus[iommu->seq_id] = iommu;
4412 	ret = iommu_init_domains(iommu);
4413 	if (ret == 0)
4414 		ret = iommu_alloc_root_entry(iommu);
4415 	if (ret)
4416 		goto out;
4417 
4418 #ifdef CONFIG_INTEL_IOMMU_SVM
4419 	if (pasid_enabled(iommu))
4420 		intel_svm_alloc_pasid_tables(iommu);
4421 #endif
4422 
4423 	if (dmaru->ignored) {
4424 		/*
4425 		 * we always have to disable PMRs or DMA may fail on this device
4426 		 */
4427 		if (force_on)
4428 			iommu_disable_protect_mem_regions(iommu);
4429 		return 0;
4430 	}
4431 
4432 	intel_iommu_init_qi(iommu);
4433 	iommu_flush_write_buffer(iommu);
4434 
4435 #ifdef CONFIG_INTEL_IOMMU_SVM
4436 	if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
4437 		ret = intel_svm_enable_prq(iommu);
4438 		if (ret)
4439 			goto disable_iommu;
4440 	}
4441 #endif
4442 	ret = dmar_set_interrupt(iommu);
4443 	if (ret)
4444 		goto disable_iommu;
4445 
4446 	iommu_set_root_entry(iommu);
4447 	iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
4448 	iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4449 	iommu_enable_translation(iommu);
4450 
4451 	iommu_disable_protect_mem_regions(iommu);
4452 	return 0;
4453 
4454 disable_iommu:
4455 	disable_dmar_iommu(iommu);
4456 out:
4457 	free_dmar_iommu(iommu);
4458 	return ret;
4459 }
4460 
dmar_iommu_hotplug(struct dmar_drhd_unit * dmaru,bool insert)4461 int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
4462 {
4463 	int ret = 0;
4464 	struct intel_iommu *iommu = dmaru->iommu;
4465 
4466 	if (!intel_iommu_enabled)
4467 		return 0;
4468 	if (iommu == NULL)
4469 		return -EINVAL;
4470 
4471 	if (insert) {
4472 		ret = intel_iommu_add(dmaru);
4473 	} else {
4474 		disable_dmar_iommu(iommu);
4475 		free_dmar_iommu(iommu);
4476 	}
4477 
4478 	return ret;
4479 }
4480 
intel_iommu_free_dmars(void)4481 static void intel_iommu_free_dmars(void)
4482 {
4483 	struct dmar_rmrr_unit *rmrru, *rmrr_n;
4484 	struct dmar_atsr_unit *atsru, *atsr_n;
4485 
4486 	list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
4487 		list_del(&rmrru->list);
4488 		dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
4489 		kfree(rmrru);
4490 	}
4491 
4492 	list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
4493 		list_del(&atsru->list);
4494 		intel_iommu_free_atsr(atsru);
4495 	}
4496 }
4497 
dmar_find_matched_atsr_unit(struct pci_dev * dev)4498 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
4499 {
4500 	int i, ret = 1;
4501 	struct pci_bus *bus;
4502 	struct pci_dev *bridge = NULL;
4503 	struct device *tmp;
4504 	struct acpi_dmar_atsr *atsr;
4505 	struct dmar_atsr_unit *atsru;
4506 
4507 	dev = pci_physfn(dev);
4508 	for (bus = dev->bus; bus; bus = bus->parent) {
4509 		bridge = bus->self;
4510 		/* If it's an integrated device, allow ATS */
4511 		if (!bridge)
4512 			return 1;
4513 		/* Connected via non-PCIe: no ATS */
4514 		if (!pci_is_pcie(bridge) ||
4515 		    pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
4516 			return 0;
4517 		/* If we found the root port, look it up in the ATSR */
4518 		if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
4519 			break;
4520 	}
4521 
4522 	rcu_read_lock();
4523 	list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4524 		atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4525 		if (atsr->segment != pci_domain_nr(dev->bus))
4526 			continue;
4527 
4528 		for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
4529 			if (tmp == &bridge->dev)
4530 				goto out;
4531 
4532 		if (atsru->include_all)
4533 			goto out;
4534 	}
4535 	ret = 0;
4536 out:
4537 	rcu_read_unlock();
4538 
4539 	return ret;
4540 }
4541 
dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info * info)4542 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
4543 {
4544 	int ret = 0;
4545 	struct dmar_rmrr_unit *rmrru;
4546 	struct dmar_atsr_unit *atsru;
4547 	struct acpi_dmar_atsr *atsr;
4548 	struct acpi_dmar_reserved_memory *rmrr;
4549 
4550 	if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING)
4551 		return 0;
4552 
4553 	list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4554 		rmrr = container_of(rmrru->hdr,
4555 				    struct acpi_dmar_reserved_memory, header);
4556 		if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4557 			ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4558 				((void *)rmrr) + rmrr->header.length,
4559 				rmrr->segment, rmrru->devices,
4560 				rmrru->devices_cnt);
4561 			if(ret < 0)
4562 				return ret;
4563 		} else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4564 			dmar_remove_dev_scope(info, rmrr->segment,
4565 				rmrru->devices, rmrru->devices_cnt);
4566 		}
4567 	}
4568 
4569 	list_for_each_entry(atsru, &dmar_atsr_units, list) {
4570 		if (atsru->include_all)
4571 			continue;
4572 
4573 		atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4574 		if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4575 			ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4576 					(void *)atsr + atsr->header.length,
4577 					atsr->segment, atsru->devices,
4578 					atsru->devices_cnt);
4579 			if (ret > 0)
4580 				break;
4581 			else if(ret < 0)
4582 				return ret;
4583 		} else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4584 			if (dmar_remove_dev_scope(info, atsr->segment,
4585 					atsru->devices, atsru->devices_cnt))
4586 				break;
4587 		}
4588 	}
4589 
4590 	return 0;
4591 }
4592 
4593 /*
4594  * Here we only respond to action of unbound device from driver.
4595  *
4596  * Added device is not attached to its DMAR domain here yet. That will happen
4597  * when mapping the device to iova.
4598  */
device_notifier(struct notifier_block * nb,unsigned long action,void * data)4599 static int device_notifier(struct notifier_block *nb,
4600 				  unsigned long action, void *data)
4601 {
4602 	struct device *dev = data;
4603 	struct dmar_domain *domain;
4604 
4605 	if (iommu_dummy(dev))
4606 		return 0;
4607 
4608 	if (action != BUS_NOTIFY_REMOVED_DEVICE)
4609 		return 0;
4610 
4611 	domain = find_domain(dev);
4612 	if (!domain)
4613 		return 0;
4614 
4615 	dmar_remove_one_dev_info(domain, dev);
4616 	if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices))
4617 		domain_exit(domain);
4618 
4619 	return 0;
4620 }
4621 
4622 static struct notifier_block device_nb = {
4623 	.notifier_call = device_notifier,
4624 };
4625 
intel_iommu_memory_notifier(struct notifier_block * nb,unsigned long val,void * v)4626 static int intel_iommu_memory_notifier(struct notifier_block *nb,
4627 				       unsigned long val, void *v)
4628 {
4629 	struct memory_notify *mhp = v;
4630 	unsigned long long start, end;
4631 	unsigned long start_vpfn, last_vpfn;
4632 
4633 	switch (val) {
4634 	case MEM_GOING_ONLINE:
4635 		start = mhp->start_pfn << PAGE_SHIFT;
4636 		end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4637 		if (iommu_domain_identity_map(si_domain, start, end)) {
4638 			pr_warn("Failed to build identity map for [%llx-%llx]\n",
4639 				start, end);
4640 			return NOTIFY_BAD;
4641 		}
4642 		break;
4643 
4644 	case MEM_OFFLINE:
4645 	case MEM_CANCEL_ONLINE:
4646 		start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4647 		last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4648 		while (start_vpfn <= last_vpfn) {
4649 			struct iova *iova;
4650 			struct dmar_drhd_unit *drhd;
4651 			struct intel_iommu *iommu;
4652 			struct page *freelist;
4653 
4654 			iova = find_iova(&si_domain->iovad, start_vpfn);
4655 			if (iova == NULL) {
4656 				pr_debug("Failed get IOVA for PFN %lx\n",
4657 					 start_vpfn);
4658 				break;
4659 			}
4660 
4661 			iova = split_and_remove_iova(&si_domain->iovad, iova,
4662 						     start_vpfn, last_vpfn);
4663 			if (iova == NULL) {
4664 				pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
4665 					start_vpfn, last_vpfn);
4666 				return NOTIFY_BAD;
4667 			}
4668 
4669 			freelist = domain_unmap(si_domain, iova->pfn_lo,
4670 					       iova->pfn_hi);
4671 
4672 			rcu_read_lock();
4673 			for_each_active_iommu(iommu, drhd)
4674 				iommu_flush_iotlb_psi(iommu, si_domain,
4675 					iova->pfn_lo, iova_size(iova),
4676 					!freelist, 0);
4677 			rcu_read_unlock();
4678 			dma_free_pagelist(freelist);
4679 
4680 			start_vpfn = iova->pfn_hi + 1;
4681 			free_iova_mem(iova);
4682 		}
4683 		break;
4684 	}
4685 
4686 	return NOTIFY_OK;
4687 }
4688 
4689 static struct notifier_block intel_iommu_memory_nb = {
4690 	.notifier_call = intel_iommu_memory_notifier,
4691 	.priority = 0
4692 };
4693 
free_all_cpu_cached_iovas(unsigned int cpu)4694 static void free_all_cpu_cached_iovas(unsigned int cpu)
4695 {
4696 	int i;
4697 
4698 	for (i = 0; i < g_num_of_iommus; i++) {
4699 		struct intel_iommu *iommu = g_iommus[i];
4700 		struct dmar_domain *domain;
4701 		int did;
4702 
4703 		if (!iommu)
4704 			continue;
4705 
4706 		for (did = 0; did < cap_ndoms(iommu->cap); did++) {
4707 			domain = get_iommu_domain(iommu, (u16)did);
4708 
4709 			if (!domain)
4710 				continue;
4711 			free_cpu_cached_iovas(cpu, &domain->iovad);
4712 		}
4713 	}
4714 }
4715 
intel_iommu_cpu_notifier(struct notifier_block * nfb,unsigned long action,void * v)4716 static int intel_iommu_cpu_notifier(struct notifier_block *nfb,
4717 				    unsigned long action, void *v)
4718 {
4719 	unsigned int cpu = (unsigned long)v;
4720 
4721 	switch (action) {
4722 	case CPU_DEAD:
4723 	case CPU_DEAD_FROZEN:
4724 		free_all_cpu_cached_iovas(cpu);
4725 		flush_unmaps_timeout(cpu);
4726 		break;
4727 	}
4728 	return NOTIFY_OK;
4729 }
4730 
4731 static struct notifier_block intel_iommu_cpu_nb = {
4732 	.notifier_call = intel_iommu_cpu_notifier,
4733 };
4734 
intel_iommu_show_version(struct device * dev,struct device_attribute * attr,char * buf)4735 static ssize_t intel_iommu_show_version(struct device *dev,
4736 					struct device_attribute *attr,
4737 					char *buf)
4738 {
4739 	struct intel_iommu *iommu = dev_get_drvdata(dev);
4740 	u32 ver = readl(iommu->reg + DMAR_VER_REG);
4741 	return sprintf(buf, "%d:%d\n",
4742 		       DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4743 }
4744 static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4745 
intel_iommu_show_address(struct device * dev,struct device_attribute * attr,char * buf)4746 static ssize_t intel_iommu_show_address(struct device *dev,
4747 					struct device_attribute *attr,
4748 					char *buf)
4749 {
4750 	struct intel_iommu *iommu = dev_get_drvdata(dev);
4751 	return sprintf(buf, "%llx\n", iommu->reg_phys);
4752 }
4753 static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4754 
intel_iommu_show_cap(struct device * dev,struct device_attribute * attr,char * buf)4755 static ssize_t intel_iommu_show_cap(struct device *dev,
4756 				    struct device_attribute *attr,
4757 				    char *buf)
4758 {
4759 	struct intel_iommu *iommu = dev_get_drvdata(dev);
4760 	return sprintf(buf, "%llx\n", iommu->cap);
4761 }
4762 static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4763 
intel_iommu_show_ecap(struct device * dev,struct device_attribute * attr,char * buf)4764 static ssize_t intel_iommu_show_ecap(struct device *dev,
4765 				    struct device_attribute *attr,
4766 				    char *buf)
4767 {
4768 	struct intel_iommu *iommu = dev_get_drvdata(dev);
4769 	return sprintf(buf, "%llx\n", iommu->ecap);
4770 }
4771 static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4772 
intel_iommu_show_ndoms(struct device * dev,struct device_attribute * attr,char * buf)4773 static ssize_t intel_iommu_show_ndoms(struct device *dev,
4774 				      struct device_attribute *attr,
4775 				      char *buf)
4776 {
4777 	struct intel_iommu *iommu = dev_get_drvdata(dev);
4778 	return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4779 }
4780 static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
4781 
intel_iommu_show_ndoms_used(struct device * dev,struct device_attribute * attr,char * buf)4782 static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
4783 					   struct device_attribute *attr,
4784 					   char *buf)
4785 {
4786 	struct intel_iommu *iommu = dev_get_drvdata(dev);
4787 	return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4788 						  cap_ndoms(iommu->cap)));
4789 }
4790 static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
4791 
4792 static struct attribute *intel_iommu_attrs[] = {
4793 	&dev_attr_version.attr,
4794 	&dev_attr_address.attr,
4795 	&dev_attr_cap.attr,
4796 	&dev_attr_ecap.attr,
4797 	&dev_attr_domains_supported.attr,
4798 	&dev_attr_domains_used.attr,
4799 	NULL,
4800 };
4801 
4802 static struct attribute_group intel_iommu_group = {
4803 	.name = "intel-iommu",
4804 	.attrs = intel_iommu_attrs,
4805 };
4806 
4807 const struct attribute_group *intel_iommu_groups[] = {
4808 	&intel_iommu_group,
4809 	NULL,
4810 };
4811 
intel_iommu_init(void)4812 int __init intel_iommu_init(void)
4813 {
4814 	int ret = -ENODEV;
4815 	struct dmar_drhd_unit *drhd;
4816 	struct intel_iommu *iommu;
4817 
4818 	/* VT-d is required for a TXT/tboot launch, so enforce that */
4819 	force_on = tboot_force_iommu();
4820 
4821 	if (iommu_init_mempool()) {
4822 		if (force_on)
4823 			panic("tboot: Failed to initialize iommu memory\n");
4824 		return -ENOMEM;
4825 	}
4826 
4827 	down_write(&dmar_global_lock);
4828 	if (dmar_table_init()) {
4829 		if (force_on)
4830 			panic("tboot: Failed to initialize DMAR table\n");
4831 		goto out_free_dmar;
4832 	}
4833 
4834 	if (dmar_dev_scope_init() < 0) {
4835 		if (force_on)
4836 			panic("tboot: Failed to initialize DMAR device scope\n");
4837 		goto out_free_dmar;
4838 	}
4839 
4840 	if (no_iommu || dmar_disabled)
4841 		goto out_free_dmar;
4842 
4843 	if (list_empty(&dmar_rmrr_units))
4844 		pr_info("No RMRR found\n");
4845 
4846 	if (list_empty(&dmar_atsr_units))
4847 		pr_info("No ATSR found\n");
4848 
4849 	if (dmar_init_reserved_ranges()) {
4850 		if (force_on)
4851 			panic("tboot: Failed to reserve iommu ranges\n");
4852 		goto out_free_reserved_range;
4853 	}
4854 
4855 	init_no_remapping_devices();
4856 
4857 	ret = init_dmars();
4858 	if (ret) {
4859 		if (force_on)
4860 			panic("tboot: Failed to initialize DMARs\n");
4861 		pr_err("Initialization failed\n");
4862 		goto out_free_reserved_range;
4863 	}
4864 	up_write(&dmar_global_lock);
4865 	pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
4866 
4867 #ifdef CONFIG_SWIOTLB
4868 	swiotlb = 0;
4869 #endif
4870 	dma_ops = &intel_dma_ops;
4871 
4872 	init_iommu_pm_ops();
4873 
4874 	for_each_active_iommu(iommu, drhd)
4875 		iommu->iommu_dev = iommu_device_create(NULL, iommu,
4876 						       intel_iommu_groups,
4877 						       "%s", iommu->name);
4878 
4879 	bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
4880 	bus_register_notifier(&pci_bus_type, &device_nb);
4881 	if (si_domain && !hw_pass_through)
4882 		register_memory_notifier(&intel_iommu_memory_nb);
4883 	register_hotcpu_notifier(&intel_iommu_cpu_nb);
4884 
4885 	intel_iommu_enabled = 1;
4886 
4887 	return 0;
4888 
4889 out_free_reserved_range:
4890 	put_iova_domain(&reserved_iova_list);
4891 out_free_dmar:
4892 	intel_iommu_free_dmars();
4893 	up_write(&dmar_global_lock);
4894 	iommu_exit_mempool();
4895 	return ret;
4896 }
4897 
domain_context_clear_one_cb(struct pci_dev * pdev,u16 alias,void * opaque)4898 static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
4899 {
4900 	struct intel_iommu *iommu = opaque;
4901 
4902 	domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff);
4903 	return 0;
4904 }
4905 
4906 /*
4907  * NB - intel-iommu lacks any sort of reference counting for the users of
4908  * dependent devices.  If multiple endpoints have intersecting dependent
4909  * devices, unbinding the driver from any one of them will possibly leave
4910  * the others unable to operate.
4911  */
domain_context_clear(struct intel_iommu * iommu,struct device * dev)4912 static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
4913 {
4914 	if (!iommu || !dev || !dev_is_pci(dev))
4915 		return;
4916 
4917 	pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
4918 }
4919 
__dmar_remove_one_dev_info(struct device_domain_info * info)4920 static void __dmar_remove_one_dev_info(struct device_domain_info *info)
4921 {
4922 	struct intel_iommu *iommu;
4923 	unsigned long flags;
4924 
4925 	assert_spin_locked(&device_domain_lock);
4926 
4927 	if (WARN_ON(!info))
4928 		return;
4929 
4930 	iommu = info->iommu;
4931 
4932 	if (info->dev) {
4933 		iommu_disable_dev_iotlb(info);
4934 		domain_context_clear(iommu, info->dev);
4935 	}
4936 
4937 	unlink_domain_info(info);
4938 
4939 	spin_lock_irqsave(&iommu->lock, flags);
4940 	domain_detach_iommu(info->domain, iommu);
4941 	spin_unlock_irqrestore(&iommu->lock, flags);
4942 
4943 	free_devinfo_mem(info);
4944 }
4945 
dmar_remove_one_dev_info(struct dmar_domain * domain,struct device * dev)4946 static void dmar_remove_one_dev_info(struct dmar_domain *domain,
4947 				     struct device *dev)
4948 {
4949 	struct device_domain_info *info;
4950 	unsigned long flags;
4951 
4952 	spin_lock_irqsave(&device_domain_lock, flags);
4953 	info = dev->archdata.iommu;
4954 	__dmar_remove_one_dev_info(info);
4955 	spin_unlock_irqrestore(&device_domain_lock, flags);
4956 }
4957 
md_domain_init(struct dmar_domain * domain,int guest_width)4958 static int md_domain_init(struct dmar_domain *domain, int guest_width)
4959 {
4960 	int adjust_width;
4961 
4962 	init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
4963 			DMA_32BIT_PFN);
4964 	domain_reserve_special_ranges(domain);
4965 
4966 	/* calculate AGAW */
4967 	domain->gaw = guest_width;
4968 	adjust_width = guestwidth_to_adjustwidth(guest_width);
4969 	domain->agaw = width_to_agaw(adjust_width);
4970 
4971 	domain->iommu_coherency = 0;
4972 	domain->iommu_snooping = 0;
4973 	domain->iommu_superpage = 0;
4974 	domain->max_addr = 0;
4975 
4976 	/* always allocate the top pgd */
4977 	domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
4978 	if (!domain->pgd)
4979 		return -ENOMEM;
4980 	domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4981 	return 0;
4982 }
4983 
intel_iommu_domain_alloc(unsigned type)4984 static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
4985 {
4986 	struct dmar_domain *dmar_domain;
4987 	struct iommu_domain *domain;
4988 
4989 	if (type != IOMMU_DOMAIN_UNMANAGED)
4990 		return NULL;
4991 
4992 	dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE);
4993 	if (!dmar_domain) {
4994 		pr_err("Can't allocate dmar_domain\n");
4995 		return NULL;
4996 	}
4997 	if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
4998 		pr_err("Domain initialization failed\n");
4999 		domain_exit(dmar_domain);
5000 		return NULL;
5001 	}
5002 	domain_update_iommu_cap(dmar_domain);
5003 
5004 	domain = &dmar_domain->domain;
5005 	domain->geometry.aperture_start = 0;
5006 	domain->geometry.aperture_end   = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
5007 	domain->geometry.force_aperture = true;
5008 
5009 	return domain;
5010 }
5011 
intel_iommu_domain_free(struct iommu_domain * domain)5012 static void intel_iommu_domain_free(struct iommu_domain *domain)
5013 {
5014 	domain_exit(to_dmar_domain(domain));
5015 }
5016 
intel_iommu_attach_device(struct iommu_domain * domain,struct device * dev)5017 static int intel_iommu_attach_device(struct iommu_domain *domain,
5018 				     struct device *dev)
5019 {
5020 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5021 	struct intel_iommu *iommu;
5022 	int addr_width;
5023 	u8 bus, devfn;
5024 
5025 	if (device_is_rmrr_locked(dev)) {
5026 		dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement.  Contact your platform vendor.\n");
5027 		return -EPERM;
5028 	}
5029 
5030 	/* normally dev is not mapped */
5031 	if (unlikely(domain_context_mapped(dev))) {
5032 		struct dmar_domain *old_domain;
5033 
5034 		old_domain = find_domain(dev);
5035 		if (old_domain) {
5036 			rcu_read_lock();
5037 			dmar_remove_one_dev_info(old_domain, dev);
5038 			rcu_read_unlock();
5039 
5040 			if (!domain_type_is_vm_or_si(old_domain) &&
5041 			     list_empty(&old_domain->devices))
5042 				domain_exit(old_domain);
5043 		}
5044 	}
5045 
5046 	iommu = device_to_iommu(dev, &bus, &devfn);
5047 	if (!iommu)
5048 		return -ENODEV;
5049 
5050 	/* check if this iommu agaw is sufficient for max mapped address */
5051 	addr_width = agaw_to_width(iommu->agaw);
5052 	if (addr_width > cap_mgaw(iommu->cap))
5053 		addr_width = cap_mgaw(iommu->cap);
5054 
5055 	if (dmar_domain->max_addr > (1LL << addr_width)) {
5056 		pr_err("%s: iommu width (%d) is not "
5057 		       "sufficient for the mapped address (%llx)\n",
5058 		       __func__, addr_width, dmar_domain->max_addr);
5059 		return -EFAULT;
5060 	}
5061 	dmar_domain->gaw = addr_width;
5062 
5063 	/*
5064 	 * Knock out extra levels of page tables if necessary
5065 	 */
5066 	while (iommu->agaw < dmar_domain->agaw) {
5067 		struct dma_pte *pte;
5068 
5069 		pte = dmar_domain->pgd;
5070 		if (dma_pte_present(pte)) {
5071 			dmar_domain->pgd = (struct dma_pte *)
5072 				phys_to_virt(dma_pte_addr(pte));
5073 			free_pgtable_page(pte);
5074 		}
5075 		dmar_domain->agaw--;
5076 	}
5077 
5078 	return domain_add_dev_info(dmar_domain, dev);
5079 }
5080 
intel_iommu_detach_device(struct iommu_domain * domain,struct device * dev)5081 static void intel_iommu_detach_device(struct iommu_domain *domain,
5082 				      struct device *dev)
5083 {
5084 	dmar_remove_one_dev_info(to_dmar_domain(domain), dev);
5085 }
5086 
intel_iommu_map(struct iommu_domain * domain,unsigned long iova,phys_addr_t hpa,size_t size,int iommu_prot)5087 static int intel_iommu_map(struct iommu_domain *domain,
5088 			   unsigned long iova, phys_addr_t hpa,
5089 			   size_t size, int iommu_prot)
5090 {
5091 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5092 	u64 max_addr;
5093 	int prot = 0;
5094 	int ret;
5095 
5096 	if (iommu_prot & IOMMU_READ)
5097 		prot |= DMA_PTE_READ;
5098 	if (iommu_prot & IOMMU_WRITE)
5099 		prot |= DMA_PTE_WRITE;
5100 	if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
5101 		prot |= DMA_PTE_SNP;
5102 
5103 	max_addr = iova + size;
5104 	if (dmar_domain->max_addr < max_addr) {
5105 		u64 end;
5106 
5107 		/* check if minimum agaw is sufficient for mapped address */
5108 		end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
5109 		if (end < max_addr) {
5110 			pr_err("%s: iommu width (%d) is not "
5111 			       "sufficient for the mapped address (%llx)\n",
5112 			       __func__, dmar_domain->gaw, max_addr);
5113 			return -EFAULT;
5114 		}
5115 		dmar_domain->max_addr = max_addr;
5116 	}
5117 	/* Round up size to next multiple of PAGE_SIZE, if it and
5118 	   the low bits of hpa would take us onto the next page */
5119 	size = aligned_nrpages(hpa, size);
5120 	ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
5121 				 hpa >> VTD_PAGE_SHIFT, size, prot);
5122 	return ret;
5123 }
5124 
intel_iommu_unmap(struct iommu_domain * domain,unsigned long iova,size_t size)5125 static size_t intel_iommu_unmap(struct iommu_domain *domain,
5126 				unsigned long iova, size_t size)
5127 {
5128 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5129 	struct page *freelist = NULL;
5130 	struct intel_iommu *iommu;
5131 	unsigned long start_pfn, last_pfn;
5132 	unsigned int npages;
5133 	int iommu_id, level = 0;
5134 
5135 	/* Cope with horrid API which requires us to unmap more than the
5136 	   size argument if it happens to be a large-page mapping. */
5137 	BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
5138 
5139 	if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
5140 		size = VTD_PAGE_SIZE << level_to_offset_bits(level);
5141 
5142 	start_pfn = iova >> VTD_PAGE_SHIFT;
5143 	last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
5144 
5145 	freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
5146 
5147 	npages = last_pfn - start_pfn + 1;
5148 
5149 	for_each_domain_iommu(iommu_id, dmar_domain) {
5150 		iommu = g_iommus[iommu_id];
5151 
5152 		iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
5153 				      start_pfn, npages, !freelist, 0);
5154 	}
5155 
5156 	dma_free_pagelist(freelist);
5157 
5158 	if (dmar_domain->max_addr == iova + size)
5159 		dmar_domain->max_addr = iova;
5160 
5161 	return size;
5162 }
5163 
intel_iommu_iova_to_phys(struct iommu_domain * domain,dma_addr_t iova)5164 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
5165 					    dma_addr_t iova)
5166 {
5167 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5168 	struct dma_pte *pte;
5169 	int level = 0;
5170 	u64 phys = 0;
5171 
5172 	pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
5173 	if (pte)
5174 		phys = dma_pte_addr(pte);
5175 
5176 	return phys;
5177 }
5178 
intel_iommu_capable(enum iommu_cap cap)5179 static bool intel_iommu_capable(enum iommu_cap cap)
5180 {
5181 	if (cap == IOMMU_CAP_CACHE_COHERENCY)
5182 		return domain_update_iommu_snooping(NULL) == 1;
5183 	if (cap == IOMMU_CAP_INTR_REMAP)
5184 		return irq_remapping_enabled == 1;
5185 
5186 	return false;
5187 }
5188 
intel_iommu_add_device(struct device * dev)5189 static int intel_iommu_add_device(struct device *dev)
5190 {
5191 	struct intel_iommu *iommu;
5192 	struct iommu_group *group;
5193 	u8 bus, devfn;
5194 
5195 	iommu = device_to_iommu(dev, &bus, &devfn);
5196 	if (!iommu)
5197 		return -ENODEV;
5198 
5199 	iommu_device_link(iommu->iommu_dev, dev);
5200 
5201 	group = iommu_group_get_for_dev(dev);
5202 
5203 	if (IS_ERR(group))
5204 		return PTR_ERR(group);
5205 
5206 	iommu_group_put(group);
5207 	return 0;
5208 }
5209 
intel_iommu_remove_device(struct device * dev)5210 static void intel_iommu_remove_device(struct device *dev)
5211 {
5212 	struct intel_iommu *iommu;
5213 	u8 bus, devfn;
5214 
5215 	iommu = device_to_iommu(dev, &bus, &devfn);
5216 	if (!iommu)
5217 		return;
5218 
5219 	iommu_group_remove_device(dev);
5220 
5221 	iommu_device_unlink(iommu->iommu_dev, dev);
5222 }
5223 
5224 #ifdef CONFIG_INTEL_IOMMU_SVM
5225 #define MAX_NR_PASID_BITS (20)
intel_iommu_get_pts(struct intel_iommu * iommu)5226 static inline unsigned long intel_iommu_get_pts(struct intel_iommu *iommu)
5227 {
5228 	/*
5229 	 * Convert ecap_pss to extend context entry pts encoding, also
5230 	 * respect the soft pasid_max value set by the iommu.
5231 	 * - number of PASID bits = ecap_pss + 1
5232 	 * - number of PASID table entries = 2^(pts + 5)
5233 	 * Therefore, pts = ecap_pss - 4
5234 	 * e.g. KBL ecap_pss = 0x13, PASID has 20 bits, pts = 15
5235 	 */
5236 	if (ecap_pss(iommu->ecap) < 5)
5237 		return 0;
5238 
5239 	/* pasid_max is encoded as actual number of entries not the bits */
5240 	return find_first_bit((unsigned long *)&iommu->pasid_max,
5241 			MAX_NR_PASID_BITS) - 5;
5242 }
5243 
intel_iommu_enable_pasid(struct intel_iommu * iommu,struct intel_svm_dev * sdev)5244 int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev)
5245 {
5246 	struct device_domain_info *info;
5247 	struct context_entry *context;
5248 	struct dmar_domain *domain;
5249 	unsigned long flags;
5250 	u64 ctx_lo;
5251 	int ret;
5252 
5253 	domain = get_valid_domain_for_dev(sdev->dev);
5254 	if (!domain)
5255 		return -EINVAL;
5256 
5257 	spin_lock_irqsave(&device_domain_lock, flags);
5258 	spin_lock(&iommu->lock);
5259 
5260 	ret = -EINVAL;
5261 	info = sdev->dev->archdata.iommu;
5262 	if (!info || !info->pasid_supported)
5263 		goto out;
5264 
5265 	context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
5266 	if (WARN_ON(!context))
5267 		goto out;
5268 
5269 	ctx_lo = context[0].lo;
5270 
5271 	sdev->did = domain->iommu_did[iommu->seq_id];
5272 	sdev->sid = PCI_DEVID(info->bus, info->devfn);
5273 
5274 	if (!(ctx_lo & CONTEXT_PASIDE)) {
5275 		context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table);
5276 		context[1].lo = (u64)virt_to_phys(iommu->pasid_table) |
5277 			intel_iommu_get_pts(iommu);
5278 
5279 		wmb();
5280 		/* CONTEXT_TT_MULTI_LEVEL and CONTEXT_TT_DEV_IOTLB are both
5281 		 * extended to permit requests-with-PASID if the PASIDE bit
5282 		 * is set. which makes sense. For CONTEXT_TT_PASS_THROUGH,
5283 		 * however, the PASIDE bit is ignored and requests-with-PASID
5284 		 * are unconditionally blocked. Which makes less sense.
5285 		 * So convert from CONTEXT_TT_PASS_THROUGH to one of the new
5286 		 * "guest mode" translation types depending on whether ATS
5287 		 * is available or not. Annoyingly, we can't use the new
5288 		 * modes *unless* PASIDE is set. */
5289 		if ((ctx_lo & CONTEXT_TT_MASK) == (CONTEXT_TT_PASS_THROUGH << 2)) {
5290 			ctx_lo &= ~CONTEXT_TT_MASK;
5291 			if (info->ats_supported)
5292 				ctx_lo |= CONTEXT_TT_PT_PASID_DEV_IOTLB << 2;
5293 			else
5294 				ctx_lo |= CONTEXT_TT_PT_PASID << 2;
5295 		}
5296 		ctx_lo |= CONTEXT_PASIDE;
5297 		if (iommu->pasid_state_table)
5298 			ctx_lo |= CONTEXT_DINVE;
5299 		if (info->pri_supported)
5300 			ctx_lo |= CONTEXT_PRS;
5301 		context[0].lo = ctx_lo;
5302 		wmb();
5303 		iommu->flush.flush_context(iommu, sdev->did, sdev->sid,
5304 					   DMA_CCMD_MASK_NOBIT,
5305 					   DMA_CCMD_DEVICE_INVL);
5306 	}
5307 
5308 	/* Enable PASID support in the device, if it wasn't already */
5309 	if (!info->pasid_enabled)
5310 		iommu_enable_dev_iotlb(info);
5311 
5312 	if (info->ats_enabled) {
5313 		sdev->dev_iotlb = 1;
5314 		sdev->qdep = info->ats_qdep;
5315 		if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
5316 			sdev->qdep = 0;
5317 	}
5318 	ret = 0;
5319 
5320  out:
5321 	spin_unlock(&iommu->lock);
5322 	spin_unlock_irqrestore(&device_domain_lock, flags);
5323 
5324 	return ret;
5325 }
5326 
intel_svm_device_to_iommu(struct device * dev)5327 struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
5328 {
5329 	struct intel_iommu *iommu;
5330 	u8 bus, devfn;
5331 
5332 	if (iommu_dummy(dev)) {
5333 		dev_warn(dev,
5334 			 "No IOMMU translation for device; cannot enable SVM\n");
5335 		return NULL;
5336 	}
5337 
5338 	iommu = device_to_iommu(dev, &bus, &devfn);
5339 	if ((!iommu)) {
5340 		dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
5341 		return NULL;
5342 	}
5343 
5344 	if (!iommu->pasid_table) {
5345 		dev_err(dev, "PASID not enabled on IOMMU; cannot enable SVM\n");
5346 		return NULL;
5347 	}
5348 
5349 	return iommu;
5350 }
5351 #endif /* CONFIG_INTEL_IOMMU_SVM */
5352 
5353 static const struct iommu_ops intel_iommu_ops = {
5354 	.capable	= intel_iommu_capable,
5355 	.domain_alloc	= intel_iommu_domain_alloc,
5356 	.domain_free	= intel_iommu_domain_free,
5357 	.attach_dev	= intel_iommu_attach_device,
5358 	.detach_dev	= intel_iommu_detach_device,
5359 	.map		= intel_iommu_map,
5360 	.unmap		= intel_iommu_unmap,
5361 	.map_sg		= default_iommu_map_sg,
5362 	.iova_to_phys	= intel_iommu_iova_to_phys,
5363 	.add_device	= intel_iommu_add_device,
5364 	.remove_device	= intel_iommu_remove_device,
5365 	.device_group   = pci_device_group,
5366 	.pgsize_bitmap	= INTEL_IOMMU_PGSIZES,
5367 };
5368 
quirk_iommu_g4x_gfx(struct pci_dev * dev)5369 static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
5370 {
5371 	/* G4x/GM45 integrated gfx dmar support is totally busted. */
5372 	pr_info("Disabling IOMMU for graphics on this chipset\n");
5373 	dmar_map_gfx = 0;
5374 }
5375 
5376 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
5377 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
5378 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
5379 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
5380 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
5381 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
5382 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
5383 
quirk_iommu_rwbf(struct pci_dev * dev)5384 static void quirk_iommu_rwbf(struct pci_dev *dev)
5385 {
5386 	/*
5387 	 * Mobile 4 Series Chipset neglects to set RWBF capability,
5388 	 * but needs it. Same seems to hold for the desktop versions.
5389 	 */
5390 	pr_info("Forcing write-buffer flush capability\n");
5391 	rwbf_quirk = 1;
5392 }
5393 
5394 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
5395 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
5396 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
5397 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
5398 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
5399 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
5400 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
5401 
5402 #define GGC 0x52
5403 #define GGC_MEMORY_SIZE_MASK	(0xf << 8)
5404 #define GGC_MEMORY_SIZE_NONE	(0x0 << 8)
5405 #define GGC_MEMORY_SIZE_1M	(0x1 << 8)
5406 #define GGC_MEMORY_SIZE_2M	(0x3 << 8)
5407 #define GGC_MEMORY_VT_ENABLED	(0x8 << 8)
5408 #define GGC_MEMORY_SIZE_2M_VT	(0x9 << 8)
5409 #define GGC_MEMORY_SIZE_3M_VT	(0xa << 8)
5410 #define GGC_MEMORY_SIZE_4M_VT	(0xb << 8)
5411 
quirk_calpella_no_shadow_gtt(struct pci_dev * dev)5412 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
5413 {
5414 	unsigned short ggc;
5415 
5416 	if (pci_read_config_word(dev, GGC, &ggc))
5417 		return;
5418 
5419 	if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
5420 		pr_info("BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
5421 		dmar_map_gfx = 0;
5422 	} else if (dmar_map_gfx) {
5423 		/* we have to ensure the gfx device is idle before we flush */
5424 		pr_info("Disabling batched IOTLB flush on Ironlake\n");
5425 		intel_iommu_strict = 1;
5426        }
5427 }
5428 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
5429 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
5430 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
5431 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
5432 
5433 /* On Tylersburg chipsets, some BIOSes have been known to enable the
5434    ISOCH DMAR unit for the Azalia sound device, but not give it any
5435    TLB entries, which causes it to deadlock. Check for that.  We do
5436    this in a function called from init_dmars(), instead of in a PCI
5437    quirk, because we don't want to print the obnoxious "BIOS broken"
5438    message if VT-d is actually disabled.
5439 */
check_tylersburg_isoch(void)5440 static void __init check_tylersburg_isoch(void)
5441 {
5442 	struct pci_dev *pdev;
5443 	uint32_t vtisochctrl;
5444 
5445 	/* If there's no Azalia in the system anyway, forget it. */
5446 	pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
5447 	if (!pdev)
5448 		return;
5449 	pci_dev_put(pdev);
5450 
5451 	/* System Management Registers. Might be hidden, in which case
5452 	   we can't do the sanity check. But that's OK, because the
5453 	   known-broken BIOSes _don't_ actually hide it, so far. */
5454 	pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
5455 	if (!pdev)
5456 		return;
5457 
5458 	if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
5459 		pci_dev_put(pdev);
5460 		return;
5461 	}
5462 
5463 	pci_dev_put(pdev);
5464 
5465 	/* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
5466 	if (vtisochctrl & 1)
5467 		return;
5468 
5469 	/* Drop all bits other than the number of TLB entries */
5470 	vtisochctrl &= 0x1c;
5471 
5472 	/* If we have the recommended number of TLB entries (16), fine. */
5473 	if (vtisochctrl == 0x10)
5474 		return;
5475 
5476 	/* Zero TLB entries? You get to ride the short bus to school. */
5477 	if (!vtisochctrl) {
5478 		WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
5479 		     "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
5480 		     dmi_get_system_info(DMI_BIOS_VENDOR),
5481 		     dmi_get_system_info(DMI_BIOS_VERSION),
5482 		     dmi_get_system_info(DMI_PRODUCT_VERSION));
5483 		iommu_identity_mapping |= IDENTMAP_AZALIA;
5484 		return;
5485 	}
5486 
5487 	pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
5488 	       vtisochctrl);
5489 }
5490