• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2006-2008 Intel Corporation
18  * Author: Ashok Raj <ashok.raj@intel.com>
19  * Author: Shaohua Li <shaohua.li@intel.com>
20  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21  * Author: Fenghua Yu <fenghua.yu@intel.com>
22  */
23 
24 #include <linux/init.h>
25 #include <linux/bitmap.h>
26 #include <linux/debugfs.h>
27 #include <linux/export.h>
28 #include <linux/slab.h>
29 #include <linux/irq.h>
30 #include <linux/interrupt.h>
31 #include <linux/spinlock.h>
32 #include <linux/pci.h>
33 #include <linux/dmar.h>
34 #include <linux/dma-mapping.h>
35 #include <linux/mempool.h>
36 #include <linux/timer.h>
37 #include <linux/iova.h>
38 #include <linux/iommu.h>
39 #include <linux/intel-iommu.h>
40 #include <linux/syscore_ops.h>
41 #include <linux/tboot.h>
42 #include <linux/dmi.h>
43 #include <linux/pci-ats.h>
44 #include <linux/memblock.h>
45 #include <asm/irq_remapping.h>
46 #include <asm/cacheflush.h>
47 #include <asm/iommu.h>
48 
49 #include "irq_remapping.h"
50 #include "pci.h"
51 
52 #define ROOT_SIZE		VTD_PAGE_SIZE
53 #define CONTEXT_SIZE		VTD_PAGE_SIZE
54 
55 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
56 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
57 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
58 
59 #define IOAPIC_RANGE_START	(0xfee00000)
60 #define IOAPIC_RANGE_END	(0xfeefffff)
61 #define IOVA_START_ADDR		(0x1000)
62 
63 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
64 
65 #define MAX_AGAW_WIDTH 64
66 
67 #define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
68 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
69 
70 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
71    to match. That way, we can use 'unsigned long' for PFNs with impunity. */
72 #define DOMAIN_MAX_PFN(gaw)	((unsigned long) min_t(uint64_t, \
73 				__DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
74 #define DOMAIN_MAX_ADDR(gaw)	(((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
75 
76 #define IOVA_PFN(addr)		((addr) >> PAGE_SHIFT)
77 #define DMA_32BIT_PFN		IOVA_PFN(DMA_BIT_MASK(32))
78 #define DMA_64BIT_PFN		IOVA_PFN(DMA_BIT_MASK(64))
79 
80 /* page table handling */
81 #define LEVEL_STRIDE		(9)
82 #define LEVEL_MASK		(((u64)1 << LEVEL_STRIDE) - 1)
83 
84 /*
85  * This bitmap is used to advertise the page sizes our hardware support
86  * to the IOMMU core, which will then use this information to split
87  * physically contiguous memory regions it is mapping into page sizes
88  * that we support.
89  *
90  * Traditionally the IOMMU core just handed us the mappings directly,
91  * after making sure the size is an order of a 4KiB page and that the
92  * mapping has natural alignment.
93  *
94  * To retain this behavior, we currently advertise that we support
95  * all page sizes that are an order of 4KiB.
96  *
97  * If at some point we'd like to utilize the IOMMU core's new behavior,
98  * we could change this to advertise the real page sizes we support.
99  */
100 #define INTEL_IOMMU_PGSIZES	(~0xFFFUL)
101 
agaw_to_level(int agaw)102 static inline int agaw_to_level(int agaw)
103 {
104 	return agaw + 2;
105 }
106 
agaw_to_width(int agaw)107 static inline int agaw_to_width(int agaw)
108 {
109 	return 30 + agaw * LEVEL_STRIDE;
110 }
111 
width_to_agaw(int width)112 static inline int width_to_agaw(int width)
113 {
114 	return (width - 30) / LEVEL_STRIDE;
115 }
116 
level_to_offset_bits(int level)117 static inline unsigned int level_to_offset_bits(int level)
118 {
119 	return (level - 1) * LEVEL_STRIDE;
120 }
121 
pfn_level_offset(unsigned long pfn,int level)122 static inline int pfn_level_offset(unsigned long pfn, int level)
123 {
124 	return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
125 }
126 
level_mask(int level)127 static inline unsigned long level_mask(int level)
128 {
129 	return -1UL << level_to_offset_bits(level);
130 }
131 
level_size(int level)132 static inline unsigned long level_size(int level)
133 {
134 	return 1UL << level_to_offset_bits(level);
135 }
136 
align_to_level(unsigned long pfn,int level)137 static inline unsigned long align_to_level(unsigned long pfn, int level)
138 {
139 	return (pfn + level_size(level) - 1) & level_mask(level);
140 }
141 
lvl_to_nr_pages(unsigned int lvl)142 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
143 {
144 	return  1 << ((lvl - 1) * LEVEL_STRIDE);
145 }
146 
147 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
148    are never going to work. */
dma_to_mm_pfn(unsigned long dma_pfn)149 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
150 {
151 	return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
152 }
153 
mm_to_dma_pfn(unsigned long mm_pfn)154 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
155 {
156 	return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
157 }
page_to_dma_pfn(struct page * pg)158 static inline unsigned long page_to_dma_pfn(struct page *pg)
159 {
160 	return mm_to_dma_pfn(page_to_pfn(pg));
161 }
virt_to_dma_pfn(void * p)162 static inline unsigned long virt_to_dma_pfn(void *p)
163 {
164 	return page_to_dma_pfn(virt_to_page(p));
165 }
166 
167 /* global iommu list, set NULL for ignored DMAR units */
168 static struct intel_iommu **g_iommus;
169 
170 static void __init check_tylersburg_isoch(void);
171 static int rwbf_quirk;
172 
173 /*
174  * set to 1 to panic kernel if can't successfully enable VT-d
175  * (used when kernel is launched w/ TXT)
176  */
177 static int force_on = 0;
178 
179 /*
180  * 0: Present
181  * 1-11: Reserved
182  * 12-63: Context Ptr (12 - (haw-1))
183  * 64-127: Reserved
184  */
185 struct root_entry {
186 	u64	val;
187 	u64	rsvd1;
188 };
189 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
root_present(struct root_entry * root)190 static inline bool root_present(struct root_entry *root)
191 {
192 	return (root->val & 1);
193 }
set_root_present(struct root_entry * root)194 static inline void set_root_present(struct root_entry *root)
195 {
196 	root->val |= 1;
197 }
set_root_value(struct root_entry * root,unsigned long value)198 static inline void set_root_value(struct root_entry *root, unsigned long value)
199 {
200 	root->val |= value & VTD_PAGE_MASK;
201 }
202 
203 static inline struct context_entry *
get_context_addr_from_root(struct root_entry * root)204 get_context_addr_from_root(struct root_entry *root)
205 {
206 	return (struct context_entry *)
207 		(root_present(root)?phys_to_virt(
208 		root->val & VTD_PAGE_MASK) :
209 		NULL);
210 }
211 
212 /*
213  * low 64 bits:
214  * 0: present
215  * 1: fault processing disable
216  * 2-3: translation type
217  * 12-63: address space root
218  * high 64 bits:
219  * 0-2: address width
220  * 3-6: aval
221  * 8-23: domain id
222  */
223 struct context_entry {
224 	u64 lo;
225 	u64 hi;
226 };
227 
context_present(struct context_entry * context)228 static inline bool context_present(struct context_entry *context)
229 {
230 	return (context->lo & 1);
231 }
context_set_present(struct context_entry * context)232 static inline void context_set_present(struct context_entry *context)
233 {
234 	context->lo |= 1;
235 }
236 
context_set_fault_enable(struct context_entry * context)237 static inline void context_set_fault_enable(struct context_entry *context)
238 {
239 	context->lo &= (((u64)-1) << 2) | 1;
240 }
241 
context_set_translation_type(struct context_entry * context,unsigned long value)242 static inline void context_set_translation_type(struct context_entry *context,
243 						unsigned long value)
244 {
245 	context->lo &= (((u64)-1) << 4) | 3;
246 	context->lo |= (value & 3) << 2;
247 }
248 
context_set_address_root(struct context_entry * context,unsigned long value)249 static inline void context_set_address_root(struct context_entry *context,
250 					    unsigned long value)
251 {
252 	context->lo |= value & VTD_PAGE_MASK;
253 }
254 
context_set_address_width(struct context_entry * context,unsigned long value)255 static inline void context_set_address_width(struct context_entry *context,
256 					     unsigned long value)
257 {
258 	context->hi |= value & 7;
259 }
260 
context_set_domain_id(struct context_entry * context,unsigned long value)261 static inline void context_set_domain_id(struct context_entry *context,
262 					 unsigned long value)
263 {
264 	context->hi |= (value & ((1 << 16) - 1)) << 8;
265 }
266 
context_clear_entry(struct context_entry * context)267 static inline void context_clear_entry(struct context_entry *context)
268 {
269 	context->lo = 0;
270 	context->hi = 0;
271 }
272 
273 /*
274  * 0: readable
275  * 1: writable
276  * 2-6: reserved
277  * 7: super page
278  * 8-10: available
279  * 11: snoop behavior
280  * 12-63: Host physcial address
281  */
282 struct dma_pte {
283 	u64 val;
284 };
285 
dma_clear_pte(struct dma_pte * pte)286 static inline void dma_clear_pte(struct dma_pte *pte)
287 {
288 	pte->val = 0;
289 }
290 
dma_set_pte_readable(struct dma_pte * pte)291 static inline void dma_set_pte_readable(struct dma_pte *pte)
292 {
293 	pte->val |= DMA_PTE_READ;
294 }
295 
dma_set_pte_writable(struct dma_pte * pte)296 static inline void dma_set_pte_writable(struct dma_pte *pte)
297 {
298 	pte->val |= DMA_PTE_WRITE;
299 }
300 
dma_set_pte_snp(struct dma_pte * pte)301 static inline void dma_set_pte_snp(struct dma_pte *pte)
302 {
303 	pte->val |= DMA_PTE_SNP;
304 }
305 
dma_set_pte_prot(struct dma_pte * pte,unsigned long prot)306 static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
307 {
308 	pte->val = (pte->val & ~3) | (prot & 3);
309 }
310 
dma_pte_addr(struct dma_pte * pte)311 static inline u64 dma_pte_addr(struct dma_pte *pte)
312 {
313 #ifdef CONFIG_64BIT
314 	return pte->val & VTD_PAGE_MASK;
315 #else
316 	/* Must have a full atomic 64-bit read */
317 	return  __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
318 #endif
319 }
320 
dma_set_pte_pfn(struct dma_pte * pte,unsigned long pfn)321 static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn)
322 {
323 	pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT;
324 }
325 
dma_pte_present(struct dma_pte * pte)326 static inline bool dma_pte_present(struct dma_pte *pte)
327 {
328 	return (pte->val & 3) != 0;
329 }
330 
dma_pte_superpage(struct dma_pte * pte)331 static inline bool dma_pte_superpage(struct dma_pte *pte)
332 {
333 	return (pte->val & (1 << 7));
334 }
335 
first_pte_in_page(struct dma_pte * pte)336 static inline int first_pte_in_page(struct dma_pte *pte)
337 {
338 	return !((unsigned long)pte & ~VTD_PAGE_MASK);
339 }
340 
341 /*
342  * This domain is a statically identity mapping domain.
343  *	1. This domain creats a static 1:1 mapping to all usable memory.
344  * 	2. It maps to each iommu if successful.
345  *	3. Each iommu mapps to this domain if successful.
346  */
347 static struct dmar_domain *si_domain;
348 static int hw_pass_through = 1;
349 
350 /* devices under the same p2p bridge are owned in one domain */
351 #define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
352 
353 /* domain represents a virtual machine, more than one devices
354  * across iommus may be owned in one domain, e.g. kvm guest.
355  */
356 #define DOMAIN_FLAG_VIRTUAL_MACHINE	(1 << 1)
357 
358 /* si_domain contains mulitple devices */
359 #define DOMAIN_FLAG_STATIC_IDENTITY	(1 << 2)
360 
361 /* define the limit of IOMMUs supported in each domain */
362 #ifdef	CONFIG_X86
363 # define	IOMMU_UNITS_SUPPORTED	MAX_IO_APICS
364 #else
365 # define	IOMMU_UNITS_SUPPORTED	64
366 #endif
367 
368 struct dmar_domain {
369 	int	id;			/* domain id */
370 	int	nid;			/* node id */
371 	DECLARE_BITMAP(iommu_bmp, IOMMU_UNITS_SUPPORTED);
372 					/* bitmap of iommus this domain uses*/
373 
374 	struct list_head devices; 	/* all devices' list */
375 	struct iova_domain iovad;	/* iova's that belong to this domain */
376 
377 	struct dma_pte	*pgd;		/* virtual address */
378 	int		gaw;		/* max guest address width */
379 
380 	/* adjusted guest address width, 0 is level 2 30-bit */
381 	int		agaw;
382 
383 	int		flags;		/* flags to find out type of domain */
384 
385 	int		iommu_coherency;/* indicate coherency of iommu access */
386 	int		iommu_snooping; /* indicate snooping control feature*/
387 	int		iommu_count;	/* reference count of iommu */
388 	int		iommu_superpage;/* Level of superpages supported:
389 					   0 == 4KiB (no superpages), 1 == 2MiB,
390 					   2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
391 	spinlock_t	iommu_lock;	/* protect iommu set in domain */
392 	u64		max_addr;	/* maximum mapped address */
393 };
394 
395 /* PCI domain-device relationship */
396 struct device_domain_info {
397 	struct list_head link;	/* link to domain siblings */
398 	struct list_head global; /* link to global list */
399 	int segment;		/* PCI domain */
400 	u8 bus;			/* PCI bus number */
401 	u8 devfn;		/* PCI devfn number */
402 	struct pci_dev *dev; /* it's NULL for PCIe-to-PCI bridge */
403 	struct intel_iommu *iommu; /* IOMMU used by this device */
404 	struct dmar_domain *domain; /* pointer to domain */
405 };
406 
407 static void flush_unmaps_timeout(unsigned long data);
408 
409 DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
410 
411 #define HIGH_WATER_MARK 250
412 struct deferred_flush_tables {
413 	int next;
414 	struct iova *iova[HIGH_WATER_MARK];
415 	struct dmar_domain *domain[HIGH_WATER_MARK];
416 };
417 
418 static struct deferred_flush_tables *deferred_flush;
419 
420 /* bitmap for indexing intel_iommus */
421 static int g_num_of_iommus;
422 
423 static DEFINE_SPINLOCK(async_umap_flush_lock);
424 static LIST_HEAD(unmaps_to_do);
425 
426 static int timer_on;
427 static long list_size;
428 
429 static void domain_remove_dev_info(struct dmar_domain *domain);
430 
431 #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
432 int dmar_disabled = 0;
433 #else
434 int dmar_disabled = 1;
435 #endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
436 
437 int intel_iommu_enabled = 0;
438 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
439 
440 static int dmar_map_gfx = 1;
441 static int dmar_forcedac;
442 static int intel_iommu_strict;
443 static int intel_iommu_superpage = 1;
444 
445 int intel_iommu_gfx_mapped;
446 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
447 
448 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
449 static DEFINE_SPINLOCK(device_domain_lock);
450 static LIST_HEAD(device_domain_list);
451 
452 static struct iommu_ops intel_iommu_ops;
453 
intel_iommu_setup(char * str)454 static int __init intel_iommu_setup(char *str)
455 {
456 	if (!str)
457 		return -EINVAL;
458 	while (*str) {
459 		if (!strncmp(str, "on", 2)) {
460 			dmar_disabled = 0;
461 			printk(KERN_INFO "Intel-IOMMU: enabled\n");
462 		} else if (!strncmp(str, "off", 3)) {
463 			dmar_disabled = 1;
464 			printk(KERN_INFO "Intel-IOMMU: disabled\n");
465 		} else if (!strncmp(str, "igfx_off", 8)) {
466 			dmar_map_gfx = 0;
467 			printk(KERN_INFO
468 				"Intel-IOMMU: disable GFX device mapping\n");
469 		} else if (!strncmp(str, "forcedac", 8)) {
470 			printk(KERN_INFO
471 				"Intel-IOMMU: Forcing DAC for PCI devices\n");
472 			dmar_forcedac = 1;
473 		} else if (!strncmp(str, "strict", 6)) {
474 			printk(KERN_INFO
475 				"Intel-IOMMU: disable batched IOTLB flush\n");
476 			intel_iommu_strict = 1;
477 		} else if (!strncmp(str, "sp_off", 6)) {
478 			printk(KERN_INFO
479 				"Intel-IOMMU: disable supported super page\n");
480 			intel_iommu_superpage = 0;
481 		}
482 
483 		str += strcspn(str, ",");
484 		while (*str == ',')
485 			str++;
486 	}
487 	return 0;
488 }
489 __setup("intel_iommu=", intel_iommu_setup);
490 
491 static struct kmem_cache *iommu_domain_cache;
492 static struct kmem_cache *iommu_devinfo_cache;
493 static struct kmem_cache *iommu_iova_cache;
494 
alloc_pgtable_page(int node)495 static inline void *alloc_pgtable_page(int node)
496 {
497 	struct page *page;
498 	void *vaddr = NULL;
499 
500 	page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
501 	if (page)
502 		vaddr = page_address(page);
503 	return vaddr;
504 }
505 
free_pgtable_page(void * vaddr)506 static inline void free_pgtable_page(void *vaddr)
507 {
508 	free_page((unsigned long)vaddr);
509 }
510 
alloc_domain_mem(void)511 static inline void *alloc_domain_mem(void)
512 {
513 	return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
514 }
515 
free_domain_mem(void * vaddr)516 static void free_domain_mem(void *vaddr)
517 {
518 	kmem_cache_free(iommu_domain_cache, vaddr);
519 }
520 
alloc_devinfo_mem(void)521 static inline void * alloc_devinfo_mem(void)
522 {
523 	return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
524 }
525 
free_devinfo_mem(void * vaddr)526 static inline void free_devinfo_mem(void *vaddr)
527 {
528 	kmem_cache_free(iommu_devinfo_cache, vaddr);
529 }
530 
alloc_iova_mem(void)531 struct iova *alloc_iova_mem(void)
532 {
533 	return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
534 }
535 
free_iova_mem(struct iova * iova)536 void free_iova_mem(struct iova *iova)
537 {
538 	kmem_cache_free(iommu_iova_cache, iova);
539 }
540 
541 
__iommu_calculate_agaw(struct intel_iommu * iommu,int max_gaw)542 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
543 {
544 	unsigned long sagaw;
545 	int agaw = -1;
546 
547 	sagaw = cap_sagaw(iommu->cap);
548 	for (agaw = width_to_agaw(max_gaw);
549 	     agaw >= 0; agaw--) {
550 		if (test_bit(agaw, &sagaw))
551 			break;
552 	}
553 
554 	return agaw;
555 }
556 
557 /*
558  * Calculate max SAGAW for each iommu.
559  */
iommu_calculate_max_sagaw(struct intel_iommu * iommu)560 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
561 {
562 	return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
563 }
564 
565 /*
566  * calculate agaw for each iommu.
567  * "SAGAW" may be different across iommus, use a default agaw, and
568  * get a supported less agaw for iommus that don't support the default agaw.
569  */
iommu_calculate_agaw(struct intel_iommu * iommu)570 int iommu_calculate_agaw(struct intel_iommu *iommu)
571 {
572 	return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
573 }
574 
575 /* This functionin only returns single iommu in a domain */
domain_get_iommu(struct dmar_domain * domain)576 static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
577 {
578 	int iommu_id;
579 
580 	/* si_domain and vm domain should not get here. */
581 	BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
582 	BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY);
583 
584 	iommu_id = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
585 	if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
586 		return NULL;
587 
588 	return g_iommus[iommu_id];
589 }
590 
domain_update_iommu_coherency(struct dmar_domain * domain)591 static void domain_update_iommu_coherency(struct dmar_domain *domain)
592 {
593 	int i;
594 
595 	i = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
596 
597 	domain->iommu_coherency = i < g_num_of_iommus ? 1 : 0;
598 
599 	for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
600 		if (!ecap_coherent(g_iommus[i]->ecap)) {
601 			domain->iommu_coherency = 0;
602 			break;
603 		}
604 	}
605 }
606 
domain_update_iommu_snooping(struct dmar_domain * domain)607 static void domain_update_iommu_snooping(struct dmar_domain *domain)
608 {
609 	int i;
610 
611 	domain->iommu_snooping = 1;
612 
613 	for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
614 		if (!ecap_sc_support(g_iommus[i]->ecap)) {
615 			domain->iommu_snooping = 0;
616 			break;
617 		}
618 	}
619 }
620 
domain_update_iommu_superpage(struct dmar_domain * domain)621 static void domain_update_iommu_superpage(struct dmar_domain *domain)
622 {
623 	struct dmar_drhd_unit *drhd;
624 	struct intel_iommu *iommu = NULL;
625 	int mask = 0xf;
626 
627 	if (!intel_iommu_superpage) {
628 		domain->iommu_superpage = 0;
629 		return;
630 	}
631 
632 	/* set iommu_superpage to the smallest common denominator */
633 	for_each_active_iommu(iommu, drhd) {
634 		mask &= cap_super_page_val(iommu->cap);
635 		if (!mask) {
636 			break;
637 		}
638 	}
639 	domain->iommu_superpage = fls(mask);
640 }
641 
642 /* Some capabilities may be different across iommus */
domain_update_iommu_cap(struct dmar_domain * domain)643 static void domain_update_iommu_cap(struct dmar_domain *domain)
644 {
645 	domain_update_iommu_coherency(domain);
646 	domain_update_iommu_snooping(domain);
647 	domain_update_iommu_superpage(domain);
648 }
649 
device_to_iommu(int segment,u8 bus,u8 devfn)650 static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
651 {
652 	struct dmar_drhd_unit *drhd = NULL;
653 	int i;
654 
655 	for_each_drhd_unit(drhd) {
656 		if (drhd->ignored)
657 			continue;
658 		if (segment != drhd->segment)
659 			continue;
660 
661 		for (i = 0; i < drhd->devices_cnt; i++) {
662 			if (drhd->devices[i] &&
663 			    drhd->devices[i]->bus->number == bus &&
664 			    drhd->devices[i]->devfn == devfn)
665 				return drhd->iommu;
666 			if (drhd->devices[i] &&
667 			    drhd->devices[i]->subordinate &&
668 			    drhd->devices[i]->subordinate->number <= bus &&
669 			    drhd->devices[i]->subordinate->busn_res.end >= bus)
670 				return drhd->iommu;
671 		}
672 
673 		if (drhd->include_all)
674 			return drhd->iommu;
675 	}
676 
677 	return NULL;
678 }
679 
domain_flush_cache(struct dmar_domain * domain,void * addr,int size)680 static void domain_flush_cache(struct dmar_domain *domain,
681 			       void *addr, int size)
682 {
683 	if (!domain->iommu_coherency)
684 		clflush_cache_range(addr, size);
685 }
686 
687 /* Gets context entry for a given bus and devfn */
device_to_context_entry(struct intel_iommu * iommu,u8 bus,u8 devfn)688 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
689 		u8 bus, u8 devfn)
690 {
691 	struct root_entry *root;
692 	struct context_entry *context;
693 	unsigned long phy_addr;
694 	unsigned long flags;
695 
696 	spin_lock_irqsave(&iommu->lock, flags);
697 	root = &iommu->root_entry[bus];
698 	context = get_context_addr_from_root(root);
699 	if (!context) {
700 		context = (struct context_entry *)
701 				alloc_pgtable_page(iommu->node);
702 		if (!context) {
703 			spin_unlock_irqrestore(&iommu->lock, flags);
704 			return NULL;
705 		}
706 		__iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
707 		phy_addr = virt_to_phys((void *)context);
708 		set_root_value(root, phy_addr);
709 		set_root_present(root);
710 		__iommu_flush_cache(iommu, root, sizeof(*root));
711 	}
712 	spin_unlock_irqrestore(&iommu->lock, flags);
713 	return &context[devfn];
714 }
715 
device_context_mapped(struct intel_iommu * iommu,u8 bus,u8 devfn)716 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
717 {
718 	struct root_entry *root;
719 	struct context_entry *context;
720 	int ret;
721 	unsigned long flags;
722 
723 	spin_lock_irqsave(&iommu->lock, flags);
724 	root = &iommu->root_entry[bus];
725 	context = get_context_addr_from_root(root);
726 	if (!context) {
727 		ret = 0;
728 		goto out;
729 	}
730 	ret = context_present(&context[devfn]);
731 out:
732 	spin_unlock_irqrestore(&iommu->lock, flags);
733 	return ret;
734 }
735 
clear_context_table(struct intel_iommu * iommu,u8 bus,u8 devfn)736 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
737 {
738 	struct root_entry *root;
739 	struct context_entry *context;
740 	unsigned long flags;
741 
742 	spin_lock_irqsave(&iommu->lock, flags);
743 	root = &iommu->root_entry[bus];
744 	context = get_context_addr_from_root(root);
745 	if (context) {
746 		context_clear_entry(&context[devfn]);
747 		__iommu_flush_cache(iommu, &context[devfn], \
748 			sizeof(*context));
749 	}
750 	spin_unlock_irqrestore(&iommu->lock, flags);
751 }
752 
free_context_table(struct intel_iommu * iommu)753 static void free_context_table(struct intel_iommu *iommu)
754 {
755 	struct root_entry *root;
756 	int i;
757 	unsigned long flags;
758 	struct context_entry *context;
759 
760 	spin_lock_irqsave(&iommu->lock, flags);
761 	if (!iommu->root_entry) {
762 		goto out;
763 	}
764 	for (i = 0; i < ROOT_ENTRY_NR; i++) {
765 		root = &iommu->root_entry[i];
766 		context = get_context_addr_from_root(root);
767 		if (context)
768 			free_pgtable_page(context);
769 	}
770 	free_pgtable_page(iommu->root_entry);
771 	iommu->root_entry = NULL;
772 out:
773 	spin_unlock_irqrestore(&iommu->lock, flags);
774 }
775 
pfn_to_dma_pte(struct dmar_domain * domain,unsigned long pfn,int target_level)776 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
777 				      unsigned long pfn, int target_level)
778 {
779 	int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
780 	struct dma_pte *parent, *pte = NULL;
781 	int level = agaw_to_level(domain->agaw);
782 	int offset;
783 
784 	BUG_ON(!domain->pgd);
785 	BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
786 	parent = domain->pgd;
787 
788 	while (level > 0) {
789 		void *tmp_page;
790 
791 		offset = pfn_level_offset(pfn, level);
792 		pte = &parent[offset];
793 		if (!target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
794 			break;
795 		if (level == target_level)
796 			break;
797 
798 		if (!dma_pte_present(pte)) {
799 			uint64_t pteval;
800 
801 			tmp_page = alloc_pgtable_page(domain->nid);
802 
803 			if (!tmp_page)
804 				return NULL;
805 
806 			domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
807 			pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
808 			if (cmpxchg64(&pte->val, 0ULL, pteval)) {
809 				/* Someone else set it while we were thinking; use theirs. */
810 				free_pgtable_page(tmp_page);
811 			} else {
812 				dma_pte_addr(pte);
813 				domain_flush_cache(domain, pte, sizeof(*pte));
814 			}
815 		}
816 		parent = phys_to_virt(dma_pte_addr(pte));
817 		level--;
818 	}
819 
820 	return pte;
821 }
822 
823 
824 /* return address's pte at specific level */
dma_pfn_level_pte(struct dmar_domain * domain,unsigned long pfn,int level,int * large_page)825 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
826 					 unsigned long pfn,
827 					 int level, int *large_page)
828 {
829 	struct dma_pte *parent, *pte = NULL;
830 	int total = agaw_to_level(domain->agaw);
831 	int offset;
832 
833 	parent = domain->pgd;
834 	while (level <= total) {
835 		offset = pfn_level_offset(pfn, total);
836 		pte = &parent[offset];
837 		if (level == total)
838 			return pte;
839 
840 		if (!dma_pte_present(pte)) {
841 			*large_page = total;
842 			break;
843 		}
844 
845 		if (pte->val & DMA_PTE_LARGE_PAGE) {
846 			*large_page = total;
847 			return pte;
848 		}
849 
850 		parent = phys_to_virt(dma_pte_addr(pte));
851 		total--;
852 	}
853 	return NULL;
854 }
855 
856 /* clear last level pte, a tlb flush should be followed */
dma_pte_clear_range(struct dmar_domain * domain,unsigned long start_pfn,unsigned long last_pfn)857 static int dma_pte_clear_range(struct dmar_domain *domain,
858 				unsigned long start_pfn,
859 				unsigned long last_pfn)
860 {
861 	int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
862 	unsigned int large_page = 1;
863 	struct dma_pte *first_pte, *pte;
864 	int order;
865 
866 	BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
867 	BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
868 	BUG_ON(start_pfn > last_pfn);
869 
870 	/* we don't need lock here; nobody else touches the iova range */
871 	do {
872 		large_page = 1;
873 		first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
874 		if (!pte) {
875 			start_pfn = align_to_level(start_pfn + 1, large_page + 1);
876 			continue;
877 		}
878 		do {
879 			dma_clear_pte(pte);
880 			start_pfn += lvl_to_nr_pages(large_page);
881 			pte++;
882 		} while (start_pfn <= last_pfn && !first_pte_in_page(pte));
883 
884 		domain_flush_cache(domain, first_pte,
885 				   (void *)pte - (void *)first_pte);
886 
887 	} while (start_pfn && start_pfn <= last_pfn);
888 
889 	order = (large_page - 1) * 9;
890 	return order;
891 }
892 
893 /* free page table pages. last level pte should already be cleared */
dma_pte_free_pagetable(struct dmar_domain * domain,unsigned long start_pfn,unsigned long last_pfn)894 static void dma_pte_free_pagetable(struct dmar_domain *domain,
895 				   unsigned long start_pfn,
896 				   unsigned long last_pfn)
897 {
898 	int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
899 	struct dma_pte *first_pte, *pte;
900 	int total = agaw_to_level(domain->agaw);
901 	int level;
902 	unsigned long tmp;
903 	int large_page = 2;
904 
905 	BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
906 	BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
907 	BUG_ON(start_pfn > last_pfn);
908 
909 	/* We don't need lock here; nobody else touches the iova range */
910 	level = 2;
911 	while (level <= total) {
912 		tmp = align_to_level(start_pfn, level);
913 
914 		/* If we can't even clear one PTE at this level, we're done */
915 		if (tmp + level_size(level) - 1 > last_pfn)
916 			return;
917 
918 		do {
919 			large_page = level;
920 			first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page);
921 			if (large_page > level)
922 				level = large_page + 1;
923 			if (!pte) {
924 				tmp = align_to_level(tmp + 1, level + 1);
925 				continue;
926 			}
927 			do {
928 				if (dma_pte_present(pte)) {
929 					free_pgtable_page(phys_to_virt(dma_pte_addr(pte)));
930 					dma_clear_pte(pte);
931 				}
932 				pte++;
933 				tmp += level_size(level);
934 			} while (!first_pte_in_page(pte) &&
935 				 tmp + level_size(level) - 1 <= last_pfn);
936 
937 			domain_flush_cache(domain, first_pte,
938 					   (void *)pte - (void *)first_pte);
939 
940 		} while (tmp && tmp + level_size(level) - 1 <= last_pfn);
941 		level++;
942 	}
943 	/* free pgd */
944 	if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
945 		free_pgtable_page(domain->pgd);
946 		domain->pgd = NULL;
947 	}
948 }
949 
950 /* iommu handling */
iommu_alloc_root_entry(struct intel_iommu * iommu)951 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
952 {
953 	struct root_entry *root;
954 	unsigned long flags;
955 
956 	root = (struct root_entry *)alloc_pgtable_page(iommu->node);
957 	if (!root)
958 		return -ENOMEM;
959 
960 	__iommu_flush_cache(iommu, root, ROOT_SIZE);
961 
962 	spin_lock_irqsave(&iommu->lock, flags);
963 	iommu->root_entry = root;
964 	spin_unlock_irqrestore(&iommu->lock, flags);
965 
966 	return 0;
967 }
968 
iommu_set_root_entry(struct intel_iommu * iommu)969 static void iommu_set_root_entry(struct intel_iommu *iommu)
970 {
971 	void *addr;
972 	u32 sts;
973 	unsigned long flag;
974 
975 	addr = iommu->root_entry;
976 
977 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
978 	dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
979 
980 	writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
981 
982 	/* Make sure hardware complete it */
983 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
984 		      readl, (sts & DMA_GSTS_RTPS), sts);
985 
986 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
987 }
988 
iommu_flush_write_buffer(struct intel_iommu * iommu)989 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
990 {
991 	u32 val;
992 	unsigned long flag;
993 
994 	if (!rwbf_quirk && !cap_rwbf(iommu->cap))
995 		return;
996 
997 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
998 	writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
999 
1000 	/* Make sure hardware complete it */
1001 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1002 		      readl, (!(val & DMA_GSTS_WBFS)), val);
1003 
1004 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1005 }
1006 
1007 /* return value determine if we need a write buffer flush */
__iommu_flush_context(struct intel_iommu * iommu,u16 did,u16 source_id,u8 function_mask,u64 type)1008 static void __iommu_flush_context(struct intel_iommu *iommu,
1009 				  u16 did, u16 source_id, u8 function_mask,
1010 				  u64 type)
1011 {
1012 	u64 val = 0;
1013 	unsigned long flag;
1014 
1015 	switch (type) {
1016 	case DMA_CCMD_GLOBAL_INVL:
1017 		val = DMA_CCMD_GLOBAL_INVL;
1018 		break;
1019 	case DMA_CCMD_DOMAIN_INVL:
1020 		val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1021 		break;
1022 	case DMA_CCMD_DEVICE_INVL:
1023 		val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1024 			| DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1025 		break;
1026 	default:
1027 		BUG();
1028 	}
1029 	val |= DMA_CCMD_ICC;
1030 
1031 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1032 	dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1033 
1034 	/* Make sure hardware complete it */
1035 	IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1036 		dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1037 
1038 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1039 }
1040 
1041 /* return value determine if we need a write buffer flush */
__iommu_flush_iotlb(struct intel_iommu * iommu,u16 did,u64 addr,unsigned int size_order,u64 type)1042 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1043 				u64 addr, unsigned int size_order, u64 type)
1044 {
1045 	int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1046 	u64 val = 0, val_iva = 0;
1047 	unsigned long flag;
1048 
1049 	switch (type) {
1050 	case DMA_TLB_GLOBAL_FLUSH:
1051 		/* global flush doesn't need set IVA_REG */
1052 		val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1053 		break;
1054 	case DMA_TLB_DSI_FLUSH:
1055 		val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1056 		break;
1057 	case DMA_TLB_PSI_FLUSH:
1058 		val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1059 		/* Note: always flush non-leaf currently */
1060 		val_iva = size_order | addr;
1061 		break;
1062 	default:
1063 		BUG();
1064 	}
1065 	/* Note: set drain read/write */
1066 #if 0
1067 	/*
1068 	 * This is probably to be super secure.. Looks like we can
1069 	 * ignore it without any impact.
1070 	 */
1071 	if (cap_read_drain(iommu->cap))
1072 		val |= DMA_TLB_READ_DRAIN;
1073 #endif
1074 	if (cap_write_drain(iommu->cap))
1075 		val |= DMA_TLB_WRITE_DRAIN;
1076 
1077 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1078 	/* Note: Only uses first TLB reg currently */
1079 	if (val_iva)
1080 		dmar_writeq(iommu->reg + tlb_offset, val_iva);
1081 	dmar_writeq(iommu->reg + tlb_offset + 8, val);
1082 
1083 	/* Make sure hardware complete it */
1084 	IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1085 		dmar_readq, (!(val & DMA_TLB_IVT)), val);
1086 
1087 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1088 
1089 	/* check IOTLB invalidation granularity */
1090 	if (DMA_TLB_IAIG(val) == 0)
1091 		printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1092 	if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1093 		pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
1094 			(unsigned long long)DMA_TLB_IIRG(type),
1095 			(unsigned long long)DMA_TLB_IAIG(val));
1096 }
1097 
iommu_support_dev_iotlb(struct dmar_domain * domain,int segment,u8 bus,u8 devfn)1098 static struct device_domain_info *iommu_support_dev_iotlb(
1099 	struct dmar_domain *domain, int segment, u8 bus, u8 devfn)
1100 {
1101 	int found = 0;
1102 	unsigned long flags;
1103 	struct device_domain_info *info;
1104 	struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn);
1105 
1106 	if (!ecap_dev_iotlb_support(iommu->ecap))
1107 		return NULL;
1108 
1109 	if (!iommu->qi)
1110 		return NULL;
1111 
1112 	spin_lock_irqsave(&device_domain_lock, flags);
1113 	list_for_each_entry(info, &domain->devices, link)
1114 		if (info->bus == bus && info->devfn == devfn) {
1115 			found = 1;
1116 			break;
1117 		}
1118 	spin_unlock_irqrestore(&device_domain_lock, flags);
1119 
1120 	if (!found || !info->dev)
1121 		return NULL;
1122 
1123 	if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS))
1124 		return NULL;
1125 
1126 	if (!dmar_find_matched_atsr_unit(info->dev))
1127 		return NULL;
1128 
1129 	info->iommu = iommu;
1130 
1131 	return info;
1132 }
1133 
iommu_enable_dev_iotlb(struct device_domain_info * info)1134 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1135 {
1136 	if (!info)
1137 		return;
1138 
1139 	pci_enable_ats(info->dev, VTD_PAGE_SHIFT);
1140 }
1141 
iommu_disable_dev_iotlb(struct device_domain_info * info)1142 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1143 {
1144 	if (!info->dev || !pci_ats_enabled(info->dev))
1145 		return;
1146 
1147 	pci_disable_ats(info->dev);
1148 }
1149 
iommu_flush_dev_iotlb(struct dmar_domain * domain,u64 addr,unsigned mask)1150 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1151 				  u64 addr, unsigned mask)
1152 {
1153 	u16 sid, qdep;
1154 	unsigned long flags;
1155 	struct device_domain_info *info;
1156 
1157 	spin_lock_irqsave(&device_domain_lock, flags);
1158 	list_for_each_entry(info, &domain->devices, link) {
1159 		if (!info->dev || !pci_ats_enabled(info->dev))
1160 			continue;
1161 
1162 		sid = info->bus << 8 | info->devfn;
1163 		qdep = pci_ats_queue_depth(info->dev);
1164 		qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1165 	}
1166 	spin_unlock_irqrestore(&device_domain_lock, flags);
1167 }
1168 
iommu_flush_iotlb_psi(struct intel_iommu * iommu,u16 did,unsigned long pfn,unsigned int pages,int map)1169 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1170 				  unsigned long pfn, unsigned int pages, int map)
1171 {
1172 	unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1173 	uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1174 
1175 	BUG_ON(pages == 0);
1176 
1177 	/*
1178 	 * Fallback to domain selective flush if no PSI support or the size is
1179 	 * too big.
1180 	 * PSI requires page size to be 2 ^ x, and the base address is naturally
1181 	 * aligned to the size
1182 	 */
1183 	if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1184 		iommu->flush.flush_iotlb(iommu, did, 0, 0,
1185 						DMA_TLB_DSI_FLUSH);
1186 	else
1187 		iommu->flush.flush_iotlb(iommu, did, addr, mask,
1188 						DMA_TLB_PSI_FLUSH);
1189 
1190 	/*
1191 	 * In caching mode, changes of pages from non-present to present require
1192 	 * flush. However, device IOTLB doesn't need to be flushed in this case.
1193 	 */
1194 	if (!cap_caching_mode(iommu->cap) || !map)
1195 		iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
1196 }
1197 
iommu_disable_protect_mem_regions(struct intel_iommu * iommu)1198 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1199 {
1200 	u32 pmen;
1201 	unsigned long flags;
1202 
1203 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
1204 	pmen = readl(iommu->reg + DMAR_PMEN_REG);
1205 	pmen &= ~DMA_PMEN_EPM;
1206 	writel(pmen, iommu->reg + DMAR_PMEN_REG);
1207 
1208 	/* wait for the protected region status bit to clear */
1209 	IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1210 		readl, !(pmen & DMA_PMEN_PRS), pmen);
1211 
1212 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1213 }
1214 
iommu_enable_translation(struct intel_iommu * iommu)1215 static int iommu_enable_translation(struct intel_iommu *iommu)
1216 {
1217 	u32 sts;
1218 	unsigned long flags;
1219 
1220 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
1221 	iommu->gcmd |= DMA_GCMD_TE;
1222 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1223 
1224 	/* Make sure hardware complete it */
1225 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1226 		      readl, (sts & DMA_GSTS_TES), sts);
1227 
1228 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1229 	return 0;
1230 }
1231 
iommu_disable_translation(struct intel_iommu * iommu)1232 static int iommu_disable_translation(struct intel_iommu *iommu)
1233 {
1234 	u32 sts;
1235 	unsigned long flag;
1236 
1237 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1238 	iommu->gcmd &= ~DMA_GCMD_TE;
1239 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1240 
1241 	/* Make sure hardware complete it */
1242 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1243 		      readl, (!(sts & DMA_GSTS_TES)), sts);
1244 
1245 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1246 	return 0;
1247 }
1248 
1249 
iommu_init_domains(struct intel_iommu * iommu)1250 static int iommu_init_domains(struct intel_iommu *iommu)
1251 {
1252 	unsigned long ndomains;
1253 	unsigned long nlongs;
1254 
1255 	ndomains = cap_ndoms(iommu->cap);
1256 	pr_debug("IOMMU %d: Number of Domains supported <%ld>\n", iommu->seq_id,
1257 			ndomains);
1258 	nlongs = BITS_TO_LONGS(ndomains);
1259 
1260 	spin_lock_init(&iommu->lock);
1261 
1262 	/* TBD: there might be 64K domains,
1263 	 * consider other allocation for future chip
1264 	 */
1265 	iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1266 	if (!iommu->domain_ids) {
1267 		printk(KERN_ERR "Allocating domain id array failed\n");
1268 		return -ENOMEM;
1269 	}
1270 	iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1271 			GFP_KERNEL);
1272 	if (!iommu->domains) {
1273 		printk(KERN_ERR "Allocating domain array failed\n");
1274 		return -ENOMEM;
1275 	}
1276 
1277 	/*
1278 	 * if Caching mode is set, then invalid translations are tagged
1279 	 * with domainid 0. Hence we need to pre-allocate it.
1280 	 */
1281 	if (cap_caching_mode(iommu->cap))
1282 		set_bit(0, iommu->domain_ids);
1283 	return 0;
1284 }
1285 
1286 
1287 static void domain_exit(struct dmar_domain *domain);
1288 static void vm_domain_exit(struct dmar_domain *domain);
1289 
free_dmar_iommu(struct intel_iommu * iommu)1290 void free_dmar_iommu(struct intel_iommu *iommu)
1291 {
1292 	struct dmar_domain *domain;
1293 	int i;
1294 	unsigned long flags;
1295 
1296 	if ((iommu->domains) && (iommu->domain_ids)) {
1297 		for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
1298 			domain = iommu->domains[i];
1299 			clear_bit(i, iommu->domain_ids);
1300 
1301 			spin_lock_irqsave(&domain->iommu_lock, flags);
1302 			if (--domain->iommu_count == 0) {
1303 				if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1304 					vm_domain_exit(domain);
1305 				else
1306 					domain_exit(domain);
1307 			}
1308 			spin_unlock_irqrestore(&domain->iommu_lock, flags);
1309 		}
1310 	}
1311 
1312 	if (iommu->gcmd & DMA_GCMD_TE)
1313 		iommu_disable_translation(iommu);
1314 
1315 	if (iommu->irq) {
1316 		irq_set_handler_data(iommu->irq, NULL);
1317 		/* This will mask the irq */
1318 		free_irq(iommu->irq, iommu);
1319 		destroy_irq(iommu->irq);
1320 	}
1321 
1322 	kfree(iommu->domains);
1323 	kfree(iommu->domain_ids);
1324 
1325 	g_iommus[iommu->seq_id] = NULL;
1326 
1327 	/* if all iommus are freed, free g_iommus */
1328 	for (i = 0; i < g_num_of_iommus; i++) {
1329 		if (g_iommus[i])
1330 			break;
1331 	}
1332 
1333 	if (i == g_num_of_iommus)
1334 		kfree(g_iommus);
1335 
1336 	/* free context mapping */
1337 	free_context_table(iommu);
1338 }
1339 
alloc_domain(void)1340 static struct dmar_domain *alloc_domain(void)
1341 {
1342 	struct dmar_domain *domain;
1343 
1344 	domain = alloc_domain_mem();
1345 	if (!domain)
1346 		return NULL;
1347 
1348 	domain->nid = -1;
1349 	memset(domain->iommu_bmp, 0, sizeof(domain->iommu_bmp));
1350 	domain->flags = 0;
1351 
1352 	return domain;
1353 }
1354 
iommu_attach_domain(struct dmar_domain * domain,struct intel_iommu * iommu)1355 static int iommu_attach_domain(struct dmar_domain *domain,
1356 			       struct intel_iommu *iommu)
1357 {
1358 	int num;
1359 	unsigned long ndomains;
1360 	unsigned long flags;
1361 
1362 	ndomains = cap_ndoms(iommu->cap);
1363 
1364 	spin_lock_irqsave(&iommu->lock, flags);
1365 
1366 	num = find_first_zero_bit(iommu->domain_ids, ndomains);
1367 	if (num >= ndomains) {
1368 		spin_unlock_irqrestore(&iommu->lock, flags);
1369 		printk(KERN_ERR "IOMMU: no free domain ids\n");
1370 		return -ENOMEM;
1371 	}
1372 
1373 	domain->id = num;
1374 	set_bit(num, iommu->domain_ids);
1375 	set_bit(iommu->seq_id, domain->iommu_bmp);
1376 	iommu->domains[num] = domain;
1377 	spin_unlock_irqrestore(&iommu->lock, flags);
1378 
1379 	return 0;
1380 }
1381 
iommu_detach_domain(struct dmar_domain * domain,struct intel_iommu * iommu)1382 static void iommu_detach_domain(struct dmar_domain *domain,
1383 				struct intel_iommu *iommu)
1384 {
1385 	unsigned long flags;
1386 	int num, ndomains;
1387 	int found = 0;
1388 
1389 	spin_lock_irqsave(&iommu->lock, flags);
1390 	ndomains = cap_ndoms(iommu->cap);
1391 	for_each_set_bit(num, iommu->domain_ids, ndomains) {
1392 		if (iommu->domains[num] == domain) {
1393 			found = 1;
1394 			break;
1395 		}
1396 	}
1397 
1398 	if (found) {
1399 		clear_bit(num, iommu->domain_ids);
1400 		clear_bit(iommu->seq_id, domain->iommu_bmp);
1401 		iommu->domains[num] = NULL;
1402 	}
1403 	spin_unlock_irqrestore(&iommu->lock, flags);
1404 }
1405 
1406 static struct iova_domain reserved_iova_list;
1407 static struct lock_class_key reserved_rbtree_key;
1408 
dmar_init_reserved_ranges(void)1409 static int dmar_init_reserved_ranges(void)
1410 {
1411 	struct pci_dev *pdev = NULL;
1412 	struct iova *iova;
1413 	int i;
1414 
1415 	init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1416 
1417 	lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1418 		&reserved_rbtree_key);
1419 
1420 	/* IOAPIC ranges shouldn't be accessed by DMA */
1421 	iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1422 		IOVA_PFN(IOAPIC_RANGE_END));
1423 	if (!iova) {
1424 		printk(KERN_ERR "Reserve IOAPIC range failed\n");
1425 		return -ENODEV;
1426 	}
1427 
1428 	/* Reserve all PCI MMIO to avoid peer-to-peer access */
1429 	for_each_pci_dev(pdev) {
1430 		struct resource *r;
1431 
1432 		for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1433 			r = &pdev->resource[i];
1434 			if (!r->flags || !(r->flags & IORESOURCE_MEM))
1435 				continue;
1436 			iova = reserve_iova(&reserved_iova_list,
1437 					    IOVA_PFN(r->start),
1438 					    IOVA_PFN(r->end));
1439 			if (!iova) {
1440 				printk(KERN_ERR "Reserve iova failed\n");
1441 				return -ENODEV;
1442 			}
1443 		}
1444 	}
1445 	return 0;
1446 }
1447 
domain_reserve_special_ranges(struct dmar_domain * domain)1448 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1449 {
1450 	copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1451 }
1452 
guestwidth_to_adjustwidth(int gaw)1453 static inline int guestwidth_to_adjustwidth(int gaw)
1454 {
1455 	int agaw;
1456 	int r = (gaw - 12) % 9;
1457 
1458 	if (r == 0)
1459 		agaw = gaw;
1460 	else
1461 		agaw = gaw + 9 - r;
1462 	if (agaw > 64)
1463 		agaw = 64;
1464 	return agaw;
1465 }
1466 
domain_init(struct dmar_domain * domain,int guest_width)1467 static int domain_init(struct dmar_domain *domain, int guest_width)
1468 {
1469 	struct intel_iommu *iommu;
1470 	int adjust_width, agaw;
1471 	unsigned long sagaw;
1472 
1473 	init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1474 	spin_lock_init(&domain->iommu_lock);
1475 
1476 	domain_reserve_special_ranges(domain);
1477 
1478 	/* calculate AGAW */
1479 	iommu = domain_get_iommu(domain);
1480 	if (guest_width > cap_mgaw(iommu->cap))
1481 		guest_width = cap_mgaw(iommu->cap);
1482 	domain->gaw = guest_width;
1483 	adjust_width = guestwidth_to_adjustwidth(guest_width);
1484 	agaw = width_to_agaw(adjust_width);
1485 	sagaw = cap_sagaw(iommu->cap);
1486 	if (!test_bit(agaw, &sagaw)) {
1487 		/* hardware doesn't support it, choose a bigger one */
1488 		pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1489 		agaw = find_next_bit(&sagaw, 5, agaw);
1490 		if (agaw >= 5)
1491 			return -ENODEV;
1492 	}
1493 	domain->agaw = agaw;
1494 	INIT_LIST_HEAD(&domain->devices);
1495 
1496 	if (ecap_coherent(iommu->ecap))
1497 		domain->iommu_coherency = 1;
1498 	else
1499 		domain->iommu_coherency = 0;
1500 
1501 	if (ecap_sc_support(iommu->ecap))
1502 		domain->iommu_snooping = 1;
1503 	else
1504 		domain->iommu_snooping = 0;
1505 
1506 	domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1507 	domain->iommu_count = 1;
1508 	domain->nid = iommu->node;
1509 
1510 	/* always allocate the top pgd */
1511 	domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1512 	if (!domain->pgd)
1513 		return -ENOMEM;
1514 	__iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1515 	return 0;
1516 }
1517 
domain_exit(struct dmar_domain * domain)1518 static void domain_exit(struct dmar_domain *domain)
1519 {
1520 	struct dmar_drhd_unit *drhd;
1521 	struct intel_iommu *iommu;
1522 
1523 	/* Domain 0 is reserved, so dont process it */
1524 	if (!domain)
1525 		return;
1526 
1527 	/* Flush any lazy unmaps that may reference this domain */
1528 	if (!intel_iommu_strict)
1529 		flush_unmaps_timeout(0);
1530 
1531 	domain_remove_dev_info(domain);
1532 	/* destroy iovas */
1533 	put_iova_domain(&domain->iovad);
1534 
1535 	/* clear ptes */
1536 	dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1537 
1538 	/* free page tables */
1539 	dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1540 
1541 	for_each_active_iommu(iommu, drhd)
1542 		if (test_bit(iommu->seq_id, domain->iommu_bmp))
1543 			iommu_detach_domain(domain, iommu);
1544 
1545 	free_domain_mem(domain);
1546 }
1547 
domain_context_mapping_one(struct dmar_domain * domain,int segment,u8 bus,u8 devfn,int translation)1548 static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
1549 				 u8 bus, u8 devfn, int translation)
1550 {
1551 	struct context_entry *context;
1552 	unsigned long flags;
1553 	struct intel_iommu *iommu;
1554 	struct dma_pte *pgd;
1555 	unsigned long num;
1556 	unsigned long ndomains;
1557 	int id;
1558 	int agaw;
1559 	struct device_domain_info *info = NULL;
1560 
1561 	pr_debug("Set context mapping for %02x:%02x.%d\n",
1562 		bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1563 
1564 	BUG_ON(!domain->pgd);
1565 	BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1566 	       translation != CONTEXT_TT_MULTI_LEVEL);
1567 
1568 	iommu = device_to_iommu(segment, bus, devfn);
1569 	if (!iommu)
1570 		return -ENODEV;
1571 
1572 	context = device_to_context_entry(iommu, bus, devfn);
1573 	if (!context)
1574 		return -ENOMEM;
1575 	spin_lock_irqsave(&iommu->lock, flags);
1576 	if (context_present(context)) {
1577 		spin_unlock_irqrestore(&iommu->lock, flags);
1578 		return 0;
1579 	}
1580 
1581 	id = domain->id;
1582 	pgd = domain->pgd;
1583 
1584 	if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1585 	    domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) {
1586 		int found = 0;
1587 
1588 		/* find an available domain id for this device in iommu */
1589 		ndomains = cap_ndoms(iommu->cap);
1590 		for_each_set_bit(num, iommu->domain_ids, ndomains) {
1591 			if (iommu->domains[num] == domain) {
1592 				id = num;
1593 				found = 1;
1594 				break;
1595 			}
1596 		}
1597 
1598 		if (found == 0) {
1599 			num = find_first_zero_bit(iommu->domain_ids, ndomains);
1600 			if (num >= ndomains) {
1601 				spin_unlock_irqrestore(&iommu->lock, flags);
1602 				printk(KERN_ERR "IOMMU: no free domain ids\n");
1603 				return -EFAULT;
1604 			}
1605 
1606 			set_bit(num, iommu->domain_ids);
1607 			iommu->domains[num] = domain;
1608 			id = num;
1609 		}
1610 
1611 		/* Skip top levels of page tables for
1612 		 * iommu which has less agaw than default.
1613 		 * Unnecessary for PT mode.
1614 		 */
1615 		if (translation != CONTEXT_TT_PASS_THROUGH) {
1616 			for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1617 				pgd = phys_to_virt(dma_pte_addr(pgd));
1618 				if (!dma_pte_present(pgd)) {
1619 					spin_unlock_irqrestore(&iommu->lock, flags);
1620 					return -ENOMEM;
1621 				}
1622 			}
1623 		}
1624 	}
1625 
1626 	context_set_domain_id(context, id);
1627 
1628 	if (translation != CONTEXT_TT_PASS_THROUGH) {
1629 		info = iommu_support_dev_iotlb(domain, segment, bus, devfn);
1630 		translation = info ? CONTEXT_TT_DEV_IOTLB :
1631 				     CONTEXT_TT_MULTI_LEVEL;
1632 	}
1633 	/*
1634 	 * In pass through mode, AW must be programmed to indicate the largest
1635 	 * AGAW value supported by hardware. And ASR is ignored by hardware.
1636 	 */
1637 	if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
1638 		context_set_address_width(context, iommu->msagaw);
1639 	else {
1640 		context_set_address_root(context, virt_to_phys(pgd));
1641 		context_set_address_width(context, iommu->agaw);
1642 	}
1643 
1644 	context_set_translation_type(context, translation);
1645 	context_set_fault_enable(context);
1646 	context_set_present(context);
1647 	domain_flush_cache(domain, context, sizeof(*context));
1648 
1649 	/*
1650 	 * It's a non-present to present mapping. If hardware doesn't cache
1651 	 * non-present entry we only need to flush the write-buffer. If the
1652 	 * _does_ cache non-present entries, then it does so in the special
1653 	 * domain #0, which we have to flush:
1654 	 */
1655 	if (cap_caching_mode(iommu->cap)) {
1656 		iommu->flush.flush_context(iommu, 0,
1657 					   (((u16)bus) << 8) | devfn,
1658 					   DMA_CCMD_MASK_NOBIT,
1659 					   DMA_CCMD_DEVICE_INVL);
1660 		iommu->flush.flush_iotlb(iommu, domain->id, 0, 0, DMA_TLB_DSI_FLUSH);
1661 	} else {
1662 		iommu_flush_write_buffer(iommu);
1663 	}
1664 	iommu_enable_dev_iotlb(info);
1665 	spin_unlock_irqrestore(&iommu->lock, flags);
1666 
1667 	spin_lock_irqsave(&domain->iommu_lock, flags);
1668 	if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) {
1669 		domain->iommu_count++;
1670 		if (domain->iommu_count == 1)
1671 			domain->nid = iommu->node;
1672 		domain_update_iommu_cap(domain);
1673 	}
1674 	spin_unlock_irqrestore(&domain->iommu_lock, flags);
1675 	return 0;
1676 }
1677 
1678 static int
domain_context_mapping(struct dmar_domain * domain,struct pci_dev * pdev,int translation)1679 domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
1680 			int translation)
1681 {
1682 	int ret;
1683 	struct pci_dev *tmp, *parent;
1684 
1685 	ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus),
1686 					 pdev->bus->number, pdev->devfn,
1687 					 translation);
1688 	if (ret)
1689 		return ret;
1690 
1691 	/* dependent device mapping */
1692 	tmp = pci_find_upstream_pcie_bridge(pdev);
1693 	if (!tmp)
1694 		return 0;
1695 	/* Secondary interface's bus number and devfn 0 */
1696 	parent = pdev->bus->self;
1697 	while (parent != tmp) {
1698 		ret = domain_context_mapping_one(domain,
1699 						 pci_domain_nr(parent->bus),
1700 						 parent->bus->number,
1701 						 parent->devfn, translation);
1702 		if (ret)
1703 			return ret;
1704 		parent = parent->bus->self;
1705 	}
1706 	if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
1707 		return domain_context_mapping_one(domain,
1708 					pci_domain_nr(tmp->subordinate),
1709 					tmp->subordinate->number, 0,
1710 					translation);
1711 	else /* this is a legacy PCI bridge */
1712 		return domain_context_mapping_one(domain,
1713 						  pci_domain_nr(tmp->bus),
1714 						  tmp->bus->number,
1715 						  tmp->devfn,
1716 						  translation);
1717 }
1718 
domain_context_mapped(struct pci_dev * pdev)1719 static int domain_context_mapped(struct pci_dev *pdev)
1720 {
1721 	int ret;
1722 	struct pci_dev *tmp, *parent;
1723 	struct intel_iommu *iommu;
1724 
1725 	iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
1726 				pdev->devfn);
1727 	if (!iommu)
1728 		return -ENODEV;
1729 
1730 	ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn);
1731 	if (!ret)
1732 		return ret;
1733 	/* dependent device mapping */
1734 	tmp = pci_find_upstream_pcie_bridge(pdev);
1735 	if (!tmp)
1736 		return ret;
1737 	/* Secondary interface's bus number and devfn 0 */
1738 	parent = pdev->bus->self;
1739 	while (parent != tmp) {
1740 		ret = device_context_mapped(iommu, parent->bus->number,
1741 					    parent->devfn);
1742 		if (!ret)
1743 			return ret;
1744 		parent = parent->bus->self;
1745 	}
1746 	if (pci_is_pcie(tmp))
1747 		return device_context_mapped(iommu, tmp->subordinate->number,
1748 					     0);
1749 	else
1750 		return device_context_mapped(iommu, tmp->bus->number,
1751 					     tmp->devfn);
1752 }
1753 
1754 /* Returns a number of VTD pages, but aligned to MM page size */
aligned_nrpages(unsigned long host_addr,size_t size)1755 static inline unsigned long aligned_nrpages(unsigned long host_addr,
1756 					    size_t size)
1757 {
1758 	host_addr &= ~PAGE_MASK;
1759 	return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1760 }
1761 
1762 /* Return largest possible superpage level for a given mapping */
hardware_largepage_caps(struct dmar_domain * domain,unsigned long iov_pfn,unsigned long phy_pfn,unsigned long pages)1763 static inline int hardware_largepage_caps(struct dmar_domain *domain,
1764 					  unsigned long iov_pfn,
1765 					  unsigned long phy_pfn,
1766 					  unsigned long pages)
1767 {
1768 	int support, level = 1;
1769 	unsigned long pfnmerge;
1770 
1771 	support = domain->iommu_superpage;
1772 
1773 	/* To use a large page, the virtual *and* physical addresses
1774 	   must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1775 	   of them will mean we have to use smaller pages. So just
1776 	   merge them and check both at once. */
1777 	pfnmerge = iov_pfn | phy_pfn;
1778 
1779 	while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1780 		pages >>= VTD_STRIDE_SHIFT;
1781 		if (!pages)
1782 			break;
1783 		pfnmerge >>= VTD_STRIDE_SHIFT;
1784 		level++;
1785 		support--;
1786 	}
1787 	return level;
1788 }
1789 
__domain_mapping(struct dmar_domain * domain,unsigned long iov_pfn,struct scatterlist * sg,unsigned long phys_pfn,unsigned long nr_pages,int prot)1790 static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1791 			    struct scatterlist *sg, unsigned long phys_pfn,
1792 			    unsigned long nr_pages, int prot)
1793 {
1794 	struct dma_pte *first_pte = NULL, *pte = NULL;
1795 	phys_addr_t uninitialized_var(pteval);
1796 	int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1797 	unsigned long sg_res;
1798 	unsigned int largepage_lvl = 0;
1799 	unsigned long lvl_pages = 0;
1800 
1801 	BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1802 
1803 	if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1804 		return -EINVAL;
1805 
1806 	prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1807 
1808 	if (sg)
1809 		sg_res = 0;
1810 	else {
1811 		sg_res = nr_pages + 1;
1812 		pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1813 	}
1814 
1815 	while (nr_pages > 0) {
1816 		uint64_t tmp;
1817 
1818 		if (!sg_res) {
1819 			sg_res = aligned_nrpages(sg->offset, sg->length);
1820 			sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1821 			sg->dma_length = sg->length;
1822 			pteval = page_to_phys(sg_page(sg)) | prot;
1823 			phys_pfn = pteval >> VTD_PAGE_SHIFT;
1824 		}
1825 
1826 		if (!pte) {
1827 			largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
1828 
1829 			first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl);
1830 			if (!pte)
1831 				return -ENOMEM;
1832 			/* It is large page*/
1833 			if (largepage_lvl > 1) {
1834 				pteval |= DMA_PTE_LARGE_PAGE;
1835 				/* Ensure that old small page tables are removed to make room
1836 				   for superpage, if they exist. */
1837 				dma_pte_clear_range(domain, iov_pfn,
1838 						    iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
1839 				dma_pte_free_pagetable(domain, iov_pfn,
1840 						       iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
1841 			} else {
1842 				pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
1843 			}
1844 
1845 		}
1846 		/* We don't need lock here, nobody else
1847 		 * touches the iova range
1848 		 */
1849 		tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
1850 		if (tmp) {
1851 			static int dumps = 5;
1852 			printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
1853 			       iov_pfn, tmp, (unsigned long long)pteval);
1854 			if (dumps) {
1855 				dumps--;
1856 				debug_dma_dump_mappings(NULL);
1857 			}
1858 			WARN_ON(1);
1859 		}
1860 
1861 		lvl_pages = lvl_to_nr_pages(largepage_lvl);
1862 
1863 		BUG_ON(nr_pages < lvl_pages);
1864 		BUG_ON(sg_res < lvl_pages);
1865 
1866 		nr_pages -= lvl_pages;
1867 		iov_pfn += lvl_pages;
1868 		phys_pfn += lvl_pages;
1869 		pteval += lvl_pages * VTD_PAGE_SIZE;
1870 		sg_res -= lvl_pages;
1871 
1872 		/* If the next PTE would be the first in a new page, then we
1873 		   need to flush the cache on the entries we've just written.
1874 		   And then we'll need to recalculate 'pte', so clear it and
1875 		   let it get set again in the if (!pte) block above.
1876 
1877 		   If we're done (!nr_pages) we need to flush the cache too.
1878 
1879 		   Also if we've been setting superpages, we may need to
1880 		   recalculate 'pte' and switch back to smaller pages for the
1881 		   end of the mapping, if the trailing size is not enough to
1882 		   use another superpage (i.e. sg_res < lvl_pages). */
1883 		pte++;
1884 		if (!nr_pages || first_pte_in_page(pte) ||
1885 		    (largepage_lvl > 1 && sg_res < lvl_pages)) {
1886 			domain_flush_cache(domain, first_pte,
1887 					   (void *)pte - (void *)first_pte);
1888 			pte = NULL;
1889 		}
1890 
1891 		if (!sg_res && nr_pages)
1892 			sg = sg_next(sg);
1893 	}
1894 	return 0;
1895 }
1896 
domain_sg_mapping(struct dmar_domain * domain,unsigned long iov_pfn,struct scatterlist * sg,unsigned long nr_pages,int prot)1897 static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1898 				    struct scatterlist *sg, unsigned long nr_pages,
1899 				    int prot)
1900 {
1901 	return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
1902 }
1903 
domain_pfn_mapping(struct dmar_domain * domain,unsigned long iov_pfn,unsigned long phys_pfn,unsigned long nr_pages,int prot)1904 static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1905 				     unsigned long phys_pfn, unsigned long nr_pages,
1906 				     int prot)
1907 {
1908 	return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
1909 }
1910 
iommu_detach_dev(struct intel_iommu * iommu,u8 bus,u8 devfn)1911 static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
1912 {
1913 	if (!iommu)
1914 		return;
1915 
1916 	clear_context_table(iommu, bus, devfn);
1917 	iommu->flush.flush_context(iommu, 0, 0, 0,
1918 					   DMA_CCMD_GLOBAL_INVL);
1919 	iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
1920 }
1921 
unlink_domain_info(struct device_domain_info * info)1922 static inline void unlink_domain_info(struct device_domain_info *info)
1923 {
1924 	assert_spin_locked(&device_domain_lock);
1925 	list_del(&info->link);
1926 	list_del(&info->global);
1927 	if (info->dev)
1928 		info->dev->dev.archdata.iommu = NULL;
1929 }
1930 
domain_remove_dev_info(struct dmar_domain * domain)1931 static void domain_remove_dev_info(struct dmar_domain *domain)
1932 {
1933 	struct device_domain_info *info;
1934 	unsigned long flags;
1935 	struct intel_iommu *iommu;
1936 
1937 	spin_lock_irqsave(&device_domain_lock, flags);
1938 	while (!list_empty(&domain->devices)) {
1939 		info = list_entry(domain->devices.next,
1940 			struct device_domain_info, link);
1941 		unlink_domain_info(info);
1942 		spin_unlock_irqrestore(&device_domain_lock, flags);
1943 
1944 		iommu_disable_dev_iotlb(info);
1945 		iommu = device_to_iommu(info->segment, info->bus, info->devfn);
1946 		iommu_detach_dev(iommu, info->bus, info->devfn);
1947 		free_devinfo_mem(info);
1948 
1949 		spin_lock_irqsave(&device_domain_lock, flags);
1950 	}
1951 	spin_unlock_irqrestore(&device_domain_lock, flags);
1952 }
1953 
1954 /*
1955  * find_domain
1956  * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1957  */
1958 static struct dmar_domain *
find_domain(struct pci_dev * pdev)1959 find_domain(struct pci_dev *pdev)
1960 {
1961 	struct device_domain_info *info;
1962 
1963 	/* No lock here, assumes no domain exit in normal case */
1964 	info = pdev->dev.archdata.iommu;
1965 	if (info)
1966 		return info->domain;
1967 	return NULL;
1968 }
1969 
1970 /* domain is initialized */
get_domain_for_dev(struct pci_dev * pdev,int gaw)1971 static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1972 {
1973 	struct dmar_domain *domain, *found = NULL;
1974 	struct intel_iommu *iommu;
1975 	struct dmar_drhd_unit *drhd;
1976 	struct device_domain_info *info, *tmp;
1977 	struct pci_dev *dev_tmp;
1978 	unsigned long flags;
1979 	int bus = 0, devfn = 0;
1980 	int segment;
1981 	int ret;
1982 
1983 	domain = find_domain(pdev);
1984 	if (domain)
1985 		return domain;
1986 
1987 	segment = pci_domain_nr(pdev->bus);
1988 
1989 	dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1990 	if (dev_tmp) {
1991 		if (pci_is_pcie(dev_tmp)) {
1992 			bus = dev_tmp->subordinate->number;
1993 			devfn = 0;
1994 		} else {
1995 			bus = dev_tmp->bus->number;
1996 			devfn = dev_tmp->devfn;
1997 		}
1998 		spin_lock_irqsave(&device_domain_lock, flags);
1999 		list_for_each_entry(info, &device_domain_list, global) {
2000 			if (info->segment == segment &&
2001 			    info->bus == bus && info->devfn == devfn) {
2002 				found = info->domain;
2003 				break;
2004 			}
2005 		}
2006 		spin_unlock_irqrestore(&device_domain_lock, flags);
2007 		/* pcie-pci bridge already has a domain, uses it */
2008 		if (found) {
2009 			domain = found;
2010 			goto found_domain;
2011 		}
2012 	}
2013 
2014 	domain = alloc_domain();
2015 	if (!domain)
2016 		goto error;
2017 
2018 	/* Allocate new domain for the device */
2019 	drhd = dmar_find_matched_drhd_unit(pdev);
2020 	if (!drhd) {
2021 		printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
2022 			pci_name(pdev));
2023 		free_domain_mem(domain);
2024 		return NULL;
2025 	}
2026 	iommu = drhd->iommu;
2027 
2028 	ret = iommu_attach_domain(domain, iommu);
2029 	if (ret) {
2030 		free_domain_mem(domain);
2031 		goto error;
2032 	}
2033 
2034 	if (domain_init(domain, gaw)) {
2035 		domain_exit(domain);
2036 		goto error;
2037 	}
2038 
2039 	/* register pcie-to-pci device */
2040 	if (dev_tmp) {
2041 		info = alloc_devinfo_mem();
2042 		if (!info) {
2043 			domain_exit(domain);
2044 			goto error;
2045 		}
2046 		info->segment = segment;
2047 		info->bus = bus;
2048 		info->devfn = devfn;
2049 		info->dev = NULL;
2050 		info->domain = domain;
2051 		/* This domain is shared by devices under p2p bridge */
2052 		domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
2053 
2054 		/* pcie-to-pci bridge already has a domain, uses it */
2055 		found = NULL;
2056 		spin_lock_irqsave(&device_domain_lock, flags);
2057 		list_for_each_entry(tmp, &device_domain_list, global) {
2058 			if (tmp->segment == segment &&
2059 			    tmp->bus == bus && tmp->devfn == devfn) {
2060 				found = tmp->domain;
2061 				break;
2062 			}
2063 		}
2064 		if (found) {
2065 			spin_unlock_irqrestore(&device_domain_lock, flags);
2066 			free_devinfo_mem(info);
2067 			domain_exit(domain);
2068 			domain = found;
2069 		} else {
2070 			list_add(&info->link, &domain->devices);
2071 			list_add(&info->global, &device_domain_list);
2072 			spin_unlock_irqrestore(&device_domain_lock, flags);
2073 		}
2074 	}
2075 
2076 found_domain:
2077 	info = alloc_devinfo_mem();
2078 	if (!info)
2079 		goto error;
2080 	info->segment = segment;
2081 	info->bus = pdev->bus->number;
2082 	info->devfn = pdev->devfn;
2083 	info->dev = pdev;
2084 	info->domain = domain;
2085 	spin_lock_irqsave(&device_domain_lock, flags);
2086 	/* somebody is fast */
2087 	found = find_domain(pdev);
2088 	if (found != NULL) {
2089 		spin_unlock_irqrestore(&device_domain_lock, flags);
2090 		if (found != domain) {
2091 			domain_exit(domain);
2092 			domain = found;
2093 		}
2094 		free_devinfo_mem(info);
2095 		return domain;
2096 	}
2097 	list_add(&info->link, &domain->devices);
2098 	list_add(&info->global, &device_domain_list);
2099 	pdev->dev.archdata.iommu = info;
2100 	spin_unlock_irqrestore(&device_domain_lock, flags);
2101 	return domain;
2102 error:
2103 	/* recheck it here, maybe others set it */
2104 	return find_domain(pdev);
2105 }
2106 
2107 static int iommu_identity_mapping;
2108 #define IDENTMAP_ALL		1
2109 #define IDENTMAP_GFX		2
2110 #define IDENTMAP_AZALIA		4
2111 
iommu_domain_identity_map(struct dmar_domain * domain,unsigned long long start,unsigned long long end)2112 static int iommu_domain_identity_map(struct dmar_domain *domain,
2113 				     unsigned long long start,
2114 				     unsigned long long end)
2115 {
2116 	unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2117 	unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2118 
2119 	if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2120 			  dma_to_mm_pfn(last_vpfn))) {
2121 		printk(KERN_ERR "IOMMU: reserve iova failed\n");
2122 		return -ENOMEM;
2123 	}
2124 
2125 	pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2126 		 start, end, domain->id);
2127 	/*
2128 	 * RMRR range might have overlap with physical memory range,
2129 	 * clear it first
2130 	 */
2131 	dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2132 
2133 	return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2134 				  last_vpfn - first_vpfn + 1,
2135 				  DMA_PTE_READ|DMA_PTE_WRITE);
2136 }
2137 
iommu_prepare_identity_map(struct pci_dev * pdev,unsigned long long start,unsigned long long end)2138 static int iommu_prepare_identity_map(struct pci_dev *pdev,
2139 				      unsigned long long start,
2140 				      unsigned long long end)
2141 {
2142 	struct dmar_domain *domain;
2143 	int ret;
2144 
2145 	domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2146 	if (!domain)
2147 		return -ENOMEM;
2148 
2149 	/* For _hardware_ passthrough, don't bother. But for software
2150 	   passthrough, we do it anyway -- it may indicate a memory
2151 	   range which is reserved in E820, so which didn't get set
2152 	   up to start with in si_domain */
2153 	if (domain == si_domain && hw_pass_through) {
2154 		printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2155 		       pci_name(pdev), start, end);
2156 		return 0;
2157 	}
2158 
2159 	printk(KERN_INFO
2160 	       "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2161 	       pci_name(pdev), start, end);
2162 
2163 	if (end < start) {
2164 		WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2165 			"BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2166 			dmi_get_system_info(DMI_BIOS_VENDOR),
2167 			dmi_get_system_info(DMI_BIOS_VERSION),
2168 		     dmi_get_system_info(DMI_PRODUCT_VERSION));
2169 		ret = -EIO;
2170 		goto error;
2171 	}
2172 
2173 	if (end >> agaw_to_width(domain->agaw)) {
2174 		WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2175 		     "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2176 		     agaw_to_width(domain->agaw),
2177 		     dmi_get_system_info(DMI_BIOS_VENDOR),
2178 		     dmi_get_system_info(DMI_BIOS_VERSION),
2179 		     dmi_get_system_info(DMI_PRODUCT_VERSION));
2180 		ret = -EIO;
2181 		goto error;
2182 	}
2183 
2184 	ret = iommu_domain_identity_map(domain, start, end);
2185 	if (ret)
2186 		goto error;
2187 
2188 	/* context entry init */
2189 	ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
2190 	if (ret)
2191 		goto error;
2192 
2193 	return 0;
2194 
2195  error:
2196 	domain_exit(domain);
2197 	return ret;
2198 }
2199 
iommu_prepare_rmrr_dev(struct dmar_rmrr_unit * rmrr,struct pci_dev * pdev)2200 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2201 	struct pci_dev *pdev)
2202 {
2203 	if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2204 		return 0;
2205 	return iommu_prepare_identity_map(pdev, rmrr->base_address,
2206 		rmrr->end_address);
2207 }
2208 
2209 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
iommu_prepare_isa(void)2210 static inline void iommu_prepare_isa(void)
2211 {
2212 	struct pci_dev *pdev;
2213 	int ret;
2214 
2215 	pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2216 	if (!pdev)
2217 		return;
2218 
2219 	printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
2220 	ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024 - 1);
2221 
2222 	if (ret)
2223 		printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2224 		       "floppy might not work\n");
2225 
2226 }
2227 #else
iommu_prepare_isa(void)2228 static inline void iommu_prepare_isa(void)
2229 {
2230 	return;
2231 }
2232 #endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
2233 
2234 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2235 
si_domain_init(int hw)2236 static int __init si_domain_init(int hw)
2237 {
2238 	struct dmar_drhd_unit *drhd;
2239 	struct intel_iommu *iommu;
2240 	int nid, ret = 0;
2241 
2242 	si_domain = alloc_domain();
2243 	if (!si_domain)
2244 		return -EFAULT;
2245 
2246 	pr_debug("Identity mapping domain is domain %d\n", si_domain->id);
2247 
2248 	for_each_active_iommu(iommu, drhd) {
2249 		ret = iommu_attach_domain(si_domain, iommu);
2250 		if (ret) {
2251 			domain_exit(si_domain);
2252 			return -EFAULT;
2253 		}
2254 	}
2255 
2256 	if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2257 		domain_exit(si_domain);
2258 		return -EFAULT;
2259 	}
2260 
2261 	si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2262 
2263 	if (hw)
2264 		return 0;
2265 
2266 	for_each_online_node(nid) {
2267 		unsigned long start_pfn, end_pfn;
2268 		int i;
2269 
2270 		for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2271 			ret = iommu_domain_identity_map(si_domain,
2272 					PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2273 			if (ret)
2274 				return ret;
2275 		}
2276 	}
2277 
2278 	return 0;
2279 }
2280 
2281 static void domain_remove_one_dev_info(struct dmar_domain *domain,
2282 					  struct pci_dev *pdev);
identity_mapping(struct pci_dev * pdev)2283 static int identity_mapping(struct pci_dev *pdev)
2284 {
2285 	struct device_domain_info *info;
2286 
2287 	if (likely(!iommu_identity_mapping))
2288 		return 0;
2289 
2290 	info = pdev->dev.archdata.iommu;
2291 	if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2292 		return (info->domain == si_domain);
2293 
2294 	return 0;
2295 }
2296 
domain_add_dev_info(struct dmar_domain * domain,struct pci_dev * pdev,int translation)2297 static int domain_add_dev_info(struct dmar_domain *domain,
2298 			       struct pci_dev *pdev,
2299 			       int translation)
2300 {
2301 	struct device_domain_info *info;
2302 	unsigned long flags;
2303 	int ret;
2304 
2305 	info = alloc_devinfo_mem();
2306 	if (!info)
2307 		return -ENOMEM;
2308 
2309 	info->segment = pci_domain_nr(pdev->bus);
2310 	info->bus = pdev->bus->number;
2311 	info->devfn = pdev->devfn;
2312 	info->dev = pdev;
2313 	info->domain = domain;
2314 
2315 	spin_lock_irqsave(&device_domain_lock, flags);
2316 	list_add(&info->link, &domain->devices);
2317 	list_add(&info->global, &device_domain_list);
2318 	pdev->dev.archdata.iommu = info;
2319 	spin_unlock_irqrestore(&device_domain_lock, flags);
2320 
2321 	ret = domain_context_mapping(domain, pdev, translation);
2322 	if (ret) {
2323 		spin_lock_irqsave(&device_domain_lock, flags);
2324 		unlink_domain_info(info);
2325 		spin_unlock_irqrestore(&device_domain_lock, flags);
2326 		free_devinfo_mem(info);
2327 		return ret;
2328 	}
2329 
2330 	return 0;
2331 }
2332 
device_has_rmrr(struct pci_dev * dev)2333 static bool device_has_rmrr(struct pci_dev *dev)
2334 {
2335 	struct dmar_rmrr_unit *rmrr;
2336 	int i;
2337 
2338 	for_each_rmrr_units(rmrr) {
2339 		for (i = 0; i < rmrr->devices_cnt; i++) {
2340 			/*
2341 			 * Return TRUE if this RMRR contains the device that
2342 			 * is passed in.
2343 			 */
2344 			if (rmrr->devices[i] == dev)
2345 				return true;
2346 		}
2347 	}
2348 	return false;
2349 }
2350 
iommu_should_identity_map(struct pci_dev * pdev,int startup)2351 static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
2352 {
2353 
2354 	/*
2355 	 * We want to prevent any device associated with an RMRR from
2356 	 * getting placed into the SI Domain. This is done because
2357 	 * problems exist when devices are moved in and out of domains
2358 	 * and their respective RMRR info is lost. We exempt USB devices
2359 	 * from this process due to their usage of RMRRs that are known
2360 	 * to not be needed after BIOS hand-off to OS.
2361 	 */
2362 	if (device_has_rmrr(pdev) &&
2363 	    (pdev->class >> 8) != PCI_CLASS_SERIAL_USB)
2364 		return 0;
2365 
2366 	if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2367 		return 1;
2368 
2369 	if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2370 		return 1;
2371 
2372 	if (!(iommu_identity_mapping & IDENTMAP_ALL))
2373 		return 0;
2374 
2375 	/*
2376 	 * We want to start off with all devices in the 1:1 domain, and
2377 	 * take them out later if we find they can't access all of memory.
2378 	 *
2379 	 * However, we can't do this for PCI devices behind bridges,
2380 	 * because all PCI devices behind the same bridge will end up
2381 	 * with the same source-id on their transactions.
2382 	 *
2383 	 * Practically speaking, we can't change things around for these
2384 	 * devices at run-time, because we can't be sure there'll be no
2385 	 * DMA transactions in flight for any of their siblings.
2386 	 *
2387 	 * So PCI devices (unless they're on the root bus) as well as
2388 	 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2389 	 * the 1:1 domain, just in _case_ one of their siblings turns out
2390 	 * not to be able to map all of memory.
2391 	 */
2392 	if (!pci_is_pcie(pdev)) {
2393 		if (!pci_is_root_bus(pdev->bus))
2394 			return 0;
2395 		if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2396 			return 0;
2397 	} else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
2398 		return 0;
2399 
2400 	/*
2401 	 * At boot time, we don't yet know if devices will be 64-bit capable.
2402 	 * Assume that they will -- if they turn out not to be, then we can
2403 	 * take them out of the 1:1 domain later.
2404 	 */
2405 	if (!startup) {
2406 		/*
2407 		 * If the device's dma_mask is less than the system's memory
2408 		 * size then this is not a candidate for identity mapping.
2409 		 */
2410 		u64 dma_mask = pdev->dma_mask;
2411 
2412 		if (pdev->dev.coherent_dma_mask &&
2413 		    pdev->dev.coherent_dma_mask < dma_mask)
2414 			dma_mask = pdev->dev.coherent_dma_mask;
2415 
2416 		return dma_mask >= dma_get_required_mask(&pdev->dev);
2417 	}
2418 
2419 	return 1;
2420 }
2421 
iommu_prepare_static_identity_mapping(int hw)2422 static int __init iommu_prepare_static_identity_mapping(int hw)
2423 {
2424 	struct pci_dev *pdev = NULL;
2425 	int ret;
2426 
2427 	ret = si_domain_init(hw);
2428 	if (ret)
2429 		return -EFAULT;
2430 
2431 	for_each_pci_dev(pdev) {
2432 		if (iommu_should_identity_map(pdev, 1)) {
2433 			ret = domain_add_dev_info(si_domain, pdev,
2434 					     hw ? CONTEXT_TT_PASS_THROUGH :
2435 						  CONTEXT_TT_MULTI_LEVEL);
2436 			if (ret) {
2437 				/* device not associated with an iommu */
2438 				if (ret == -ENODEV)
2439 					continue;
2440 				return ret;
2441 			}
2442 			pr_info("IOMMU: %s identity mapping for device %s\n",
2443 				hw ? "hardware" : "software", pci_name(pdev));
2444 		}
2445 	}
2446 
2447 	return 0;
2448 }
2449 
init_dmars(void)2450 static int __init init_dmars(void)
2451 {
2452 	struct dmar_drhd_unit *drhd;
2453 	struct dmar_rmrr_unit *rmrr;
2454 	struct pci_dev *pdev;
2455 	struct intel_iommu *iommu;
2456 	int i, ret;
2457 
2458 	/*
2459 	 * for each drhd
2460 	 *    allocate root
2461 	 *    initialize and program root entry to not present
2462 	 * endfor
2463 	 */
2464 	for_each_drhd_unit(drhd) {
2465 		/*
2466 		 * lock not needed as this is only incremented in the single
2467 		 * threaded kernel __init code path all other access are read
2468 		 * only
2469 		 */
2470 		if (g_num_of_iommus < IOMMU_UNITS_SUPPORTED) {
2471 			g_num_of_iommus++;
2472 			continue;
2473 		}
2474 		printk_once(KERN_ERR "intel-iommu: exceeded %d IOMMUs\n",
2475 			  IOMMU_UNITS_SUPPORTED);
2476 	}
2477 
2478 	g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2479 			GFP_KERNEL);
2480 	if (!g_iommus) {
2481 		printk(KERN_ERR "Allocating global iommu array failed\n");
2482 		ret = -ENOMEM;
2483 		goto error;
2484 	}
2485 
2486 	deferred_flush = kzalloc(g_num_of_iommus *
2487 		sizeof(struct deferred_flush_tables), GFP_KERNEL);
2488 	if (!deferred_flush) {
2489 		ret = -ENOMEM;
2490 		goto error;
2491 	}
2492 
2493 	for_each_drhd_unit(drhd) {
2494 		if (drhd->ignored)
2495 			continue;
2496 
2497 		iommu = drhd->iommu;
2498 		g_iommus[iommu->seq_id] = iommu;
2499 
2500 		ret = iommu_init_domains(iommu);
2501 		if (ret)
2502 			goto error;
2503 
2504 		/*
2505 		 * TBD:
2506 		 * we could share the same root & context tables
2507 		 * among all IOMMU's. Need to Split it later.
2508 		 */
2509 		ret = iommu_alloc_root_entry(iommu);
2510 		if (ret) {
2511 			printk(KERN_ERR "IOMMU: allocate root entry failed\n");
2512 			goto error;
2513 		}
2514 		if (!ecap_pass_through(iommu->ecap))
2515 			hw_pass_through = 0;
2516 	}
2517 
2518 	/*
2519 	 * Start from the sane iommu hardware state.
2520 	 */
2521 	for_each_drhd_unit(drhd) {
2522 		if (drhd->ignored)
2523 			continue;
2524 
2525 		iommu = drhd->iommu;
2526 
2527 		/*
2528 		 * If the queued invalidation is already initialized by us
2529 		 * (for example, while enabling interrupt-remapping) then
2530 		 * we got the things already rolling from a sane state.
2531 		 */
2532 		if (iommu->qi)
2533 			continue;
2534 
2535 		/*
2536 		 * Clear any previous faults.
2537 		 */
2538 		dmar_fault(-1, iommu);
2539 		/*
2540 		 * Disable queued invalidation if supported and already enabled
2541 		 * before OS handover.
2542 		 */
2543 		dmar_disable_qi(iommu);
2544 	}
2545 
2546 	for_each_drhd_unit(drhd) {
2547 		if (drhd->ignored)
2548 			continue;
2549 
2550 		iommu = drhd->iommu;
2551 
2552 		if (dmar_enable_qi(iommu)) {
2553 			/*
2554 			 * Queued Invalidate not enabled, use Register Based
2555 			 * Invalidate
2556 			 */
2557 			iommu->flush.flush_context = __iommu_flush_context;
2558 			iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2559 			printk(KERN_INFO "IOMMU %d 0x%Lx: using Register based "
2560 			       "invalidation\n",
2561 				iommu->seq_id,
2562 			       (unsigned long long)drhd->reg_base_addr);
2563 		} else {
2564 			iommu->flush.flush_context = qi_flush_context;
2565 			iommu->flush.flush_iotlb = qi_flush_iotlb;
2566 			printk(KERN_INFO "IOMMU %d 0x%Lx: using Queued "
2567 			       "invalidation\n",
2568 				iommu->seq_id,
2569 			       (unsigned long long)drhd->reg_base_addr);
2570 		}
2571 	}
2572 
2573 	if (iommu_pass_through)
2574 		iommu_identity_mapping |= IDENTMAP_ALL;
2575 
2576 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
2577 	iommu_identity_mapping |= IDENTMAP_GFX;
2578 #endif
2579 
2580 	check_tylersburg_isoch();
2581 
2582 	/*
2583 	 * If pass through is not set or not enabled, setup context entries for
2584 	 * identity mappings for rmrr, gfx, and isa and may fall back to static
2585 	 * identity mapping if iommu_identity_mapping is set.
2586 	 */
2587 	if (iommu_identity_mapping) {
2588 		ret = iommu_prepare_static_identity_mapping(hw_pass_through);
2589 		if (ret) {
2590 			printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
2591 			goto error;
2592 		}
2593 	}
2594 	/*
2595 	 * For each rmrr
2596 	 *   for each dev attached to rmrr
2597 	 *   do
2598 	 *     locate drhd for dev, alloc domain for dev
2599 	 *     allocate free domain
2600 	 *     allocate page table entries for rmrr
2601 	 *     if context not allocated for bus
2602 	 *           allocate and init context
2603 	 *           set present in root table for this bus
2604 	 *     init context with domain, translation etc
2605 	 *    endfor
2606 	 * endfor
2607 	 */
2608 	printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2609 	for_each_rmrr_units(rmrr) {
2610 		for (i = 0; i < rmrr->devices_cnt; i++) {
2611 			pdev = rmrr->devices[i];
2612 			/*
2613 			 * some BIOS lists non-exist devices in DMAR
2614 			 * table.
2615 			 */
2616 			if (!pdev)
2617 				continue;
2618 			ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2619 			if (ret)
2620 				printk(KERN_ERR
2621 				       "IOMMU: mapping reserved region failed\n");
2622 		}
2623 	}
2624 
2625 	iommu_prepare_isa();
2626 
2627 	/*
2628 	 * for each drhd
2629 	 *   enable fault log
2630 	 *   global invalidate context cache
2631 	 *   global invalidate iotlb
2632 	 *   enable translation
2633 	 */
2634 	for_each_drhd_unit(drhd) {
2635 		if (drhd->ignored) {
2636 			/*
2637 			 * we always have to disable PMRs or DMA may fail on
2638 			 * this device
2639 			 */
2640 			if (force_on)
2641 				iommu_disable_protect_mem_regions(drhd->iommu);
2642 			continue;
2643 		}
2644 		iommu = drhd->iommu;
2645 
2646 		iommu_flush_write_buffer(iommu);
2647 
2648 		ret = dmar_set_interrupt(iommu);
2649 		if (ret)
2650 			goto error;
2651 
2652 		iommu_set_root_entry(iommu);
2653 
2654 		iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
2655 		iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2656 
2657 		ret = iommu_enable_translation(iommu);
2658 		if (ret)
2659 			goto error;
2660 
2661 		iommu_disable_protect_mem_regions(iommu);
2662 	}
2663 
2664 	return 0;
2665 error:
2666 	for_each_drhd_unit(drhd) {
2667 		if (drhd->ignored)
2668 			continue;
2669 		iommu = drhd->iommu;
2670 		free_iommu(iommu);
2671 	}
2672 	kfree(g_iommus);
2673 	return ret;
2674 }
2675 
2676 /* This takes a number of _MM_ pages, not VTD pages */
intel_alloc_iova(struct device * dev,struct dmar_domain * domain,unsigned long nrpages,uint64_t dma_mask)2677 static struct iova *intel_alloc_iova(struct device *dev,
2678 				     struct dmar_domain *domain,
2679 				     unsigned long nrpages, uint64_t dma_mask)
2680 {
2681 	struct pci_dev *pdev = to_pci_dev(dev);
2682 	struct iova *iova = NULL;
2683 
2684 	/* Restrict dma_mask to the width that the iommu can handle */
2685 	dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2686 
2687 	if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
2688 		/*
2689 		 * First try to allocate an io virtual address in
2690 		 * DMA_BIT_MASK(32) and if that fails then try allocating
2691 		 * from higher range
2692 		 */
2693 		iova = alloc_iova(&domain->iovad, nrpages,
2694 				  IOVA_PFN(DMA_BIT_MASK(32)), 1);
2695 		if (iova)
2696 			return iova;
2697 	}
2698 	iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2699 	if (unlikely(!iova)) {
2700 		printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2701 		       nrpages, pci_name(pdev));
2702 		return NULL;
2703 	}
2704 
2705 	return iova;
2706 }
2707 
__get_valid_domain_for_dev(struct pci_dev * pdev)2708 static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev)
2709 {
2710 	struct dmar_domain *domain;
2711 	int ret;
2712 
2713 	domain = get_domain_for_dev(pdev,
2714 			DEFAULT_DOMAIN_ADDRESS_WIDTH);
2715 	if (!domain) {
2716 		printk(KERN_ERR
2717 			"Allocating domain for %s failed", pci_name(pdev));
2718 		return NULL;
2719 	}
2720 
2721 	/* make sure context mapping is ok */
2722 	if (unlikely(!domain_context_mapped(pdev))) {
2723 		ret = domain_context_mapping(domain, pdev,
2724 					     CONTEXT_TT_MULTI_LEVEL);
2725 		if (ret) {
2726 			printk(KERN_ERR
2727 				"Domain context map for %s failed",
2728 				pci_name(pdev));
2729 			return NULL;
2730 		}
2731 	}
2732 
2733 	return domain;
2734 }
2735 
get_valid_domain_for_dev(struct pci_dev * dev)2736 static inline struct dmar_domain *get_valid_domain_for_dev(struct pci_dev *dev)
2737 {
2738 	struct device_domain_info *info;
2739 
2740 	/* No lock here, assumes no domain exit in normal case */
2741 	info = dev->dev.archdata.iommu;
2742 	if (likely(info))
2743 		return info->domain;
2744 
2745 	return __get_valid_domain_for_dev(dev);
2746 }
2747 
iommu_dummy(struct pci_dev * pdev)2748 static int iommu_dummy(struct pci_dev *pdev)
2749 {
2750 	return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2751 }
2752 
2753 /* Check if the pdev needs to go through non-identity map and unmap process.*/
iommu_no_mapping(struct device * dev)2754 static int iommu_no_mapping(struct device *dev)
2755 {
2756 	struct pci_dev *pdev;
2757 	int found;
2758 
2759 	if (unlikely(dev->bus != &pci_bus_type))
2760 		return 1;
2761 
2762 	pdev = to_pci_dev(dev);
2763 	if (iommu_dummy(pdev))
2764 		return 1;
2765 
2766 	if (!iommu_identity_mapping)
2767 		return 0;
2768 
2769 	found = identity_mapping(pdev);
2770 	if (found) {
2771 		if (iommu_should_identity_map(pdev, 0))
2772 			return 1;
2773 		else {
2774 			/*
2775 			 * 32 bit DMA is removed from si_domain and fall back
2776 			 * to non-identity mapping.
2777 			 */
2778 			domain_remove_one_dev_info(si_domain, pdev);
2779 			printk(KERN_INFO "32bit %s uses non-identity mapping\n",
2780 			       pci_name(pdev));
2781 			return 0;
2782 		}
2783 	} else {
2784 		/*
2785 		 * In case of a detached 64 bit DMA device from vm, the device
2786 		 * is put into si_domain for identity mapping.
2787 		 */
2788 		if (iommu_should_identity_map(pdev, 0)) {
2789 			int ret;
2790 			ret = domain_add_dev_info(si_domain, pdev,
2791 						  hw_pass_through ?
2792 						  CONTEXT_TT_PASS_THROUGH :
2793 						  CONTEXT_TT_MULTI_LEVEL);
2794 			if (!ret) {
2795 				printk(KERN_INFO "64bit %s uses identity mapping\n",
2796 				       pci_name(pdev));
2797 				return 1;
2798 			}
2799 		}
2800 	}
2801 
2802 	return 0;
2803 }
2804 
__intel_map_single(struct device * hwdev,phys_addr_t paddr,size_t size,int dir,u64 dma_mask)2805 static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2806 				     size_t size, int dir, u64 dma_mask)
2807 {
2808 	struct pci_dev *pdev = to_pci_dev(hwdev);
2809 	struct dmar_domain *domain;
2810 	phys_addr_t start_paddr;
2811 	struct iova *iova;
2812 	int prot = 0;
2813 	int ret;
2814 	struct intel_iommu *iommu;
2815 	unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
2816 
2817 	BUG_ON(dir == DMA_NONE);
2818 
2819 	if (iommu_no_mapping(hwdev))
2820 		return paddr;
2821 
2822 	domain = get_valid_domain_for_dev(pdev);
2823 	if (!domain)
2824 		return 0;
2825 
2826 	iommu = domain_get_iommu(domain);
2827 	size = aligned_nrpages(paddr, size);
2828 
2829 	iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), dma_mask);
2830 	if (!iova)
2831 		goto error;
2832 
2833 	/*
2834 	 * Check if DMAR supports zero-length reads on write only
2835 	 * mappings..
2836 	 */
2837 	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2838 			!cap_zlr(iommu->cap))
2839 		prot |= DMA_PTE_READ;
2840 	if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2841 		prot |= DMA_PTE_WRITE;
2842 	/*
2843 	 * paddr - (paddr + size) might be partial page, we should map the whole
2844 	 * page.  Note: if two part of one page are separately mapped, we
2845 	 * might have two guest_addr mapping to the same host paddr, but this
2846 	 * is not a big problem
2847 	 */
2848 	ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
2849 				 mm_to_dma_pfn(paddr_pfn), size, prot);
2850 	if (ret)
2851 		goto error;
2852 
2853 	/* it's a non-present to present mapping. Only flush if caching mode */
2854 	if (cap_caching_mode(iommu->cap))
2855 		iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 1);
2856 	else
2857 		iommu_flush_write_buffer(iommu);
2858 
2859 	start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2860 	start_paddr += paddr & ~PAGE_MASK;
2861 	return start_paddr;
2862 
2863 error:
2864 	if (iova)
2865 		__free_iova(&domain->iovad, iova);
2866 	printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
2867 		pci_name(pdev), size, (unsigned long long)paddr, dir);
2868 	return 0;
2869 }
2870 
intel_map_page(struct device * dev,struct page * page,unsigned long offset,size_t size,enum dma_data_direction dir,struct dma_attrs * attrs)2871 static dma_addr_t intel_map_page(struct device *dev, struct page *page,
2872 				 unsigned long offset, size_t size,
2873 				 enum dma_data_direction dir,
2874 				 struct dma_attrs *attrs)
2875 {
2876 	return __intel_map_single(dev, page_to_phys(page) + offset, size,
2877 				  dir, to_pci_dev(dev)->dma_mask);
2878 }
2879 
flush_unmaps(void)2880 static void flush_unmaps(void)
2881 {
2882 	int i, j;
2883 
2884 	timer_on = 0;
2885 
2886 	/* just flush them all */
2887 	for (i = 0; i < g_num_of_iommus; i++) {
2888 		struct intel_iommu *iommu = g_iommus[i];
2889 		if (!iommu)
2890 			continue;
2891 
2892 		if (!deferred_flush[i].next)
2893 			continue;
2894 
2895 		/* In caching mode, global flushes turn emulation expensive */
2896 		if (!cap_caching_mode(iommu->cap))
2897 			iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2898 					 DMA_TLB_GLOBAL_FLUSH);
2899 		for (j = 0; j < deferred_flush[i].next; j++) {
2900 			unsigned long mask;
2901 			struct iova *iova = deferred_flush[i].iova[j];
2902 			struct dmar_domain *domain = deferred_flush[i].domain[j];
2903 
2904 			/* On real hardware multiple invalidations are expensive */
2905 			if (cap_caching_mode(iommu->cap))
2906 				iommu_flush_iotlb_psi(iommu, domain->id,
2907 				iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1, 0);
2908 			else {
2909 				mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
2910 				iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
2911 						(uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
2912 			}
2913 			__free_iova(&deferred_flush[i].domain[j]->iovad, iova);
2914 		}
2915 		deferred_flush[i].next = 0;
2916 	}
2917 
2918 	list_size = 0;
2919 }
2920 
flush_unmaps_timeout(unsigned long data)2921 static void flush_unmaps_timeout(unsigned long data)
2922 {
2923 	unsigned long flags;
2924 
2925 	spin_lock_irqsave(&async_umap_flush_lock, flags);
2926 	flush_unmaps();
2927 	spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2928 }
2929 
add_unmap(struct dmar_domain * dom,struct iova * iova)2930 static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2931 {
2932 	unsigned long flags;
2933 	int next, iommu_id;
2934 	struct intel_iommu *iommu;
2935 
2936 	spin_lock_irqsave(&async_umap_flush_lock, flags);
2937 	if (list_size == HIGH_WATER_MARK)
2938 		flush_unmaps();
2939 
2940 	iommu = domain_get_iommu(dom);
2941 	iommu_id = iommu->seq_id;
2942 
2943 	next = deferred_flush[iommu_id].next;
2944 	deferred_flush[iommu_id].domain[next] = dom;
2945 	deferred_flush[iommu_id].iova[next] = iova;
2946 	deferred_flush[iommu_id].next++;
2947 
2948 	if (!timer_on) {
2949 		mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2950 		timer_on = 1;
2951 	}
2952 	list_size++;
2953 	spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2954 }
2955 
intel_unmap_page(struct device * dev,dma_addr_t dev_addr,size_t size,enum dma_data_direction dir,struct dma_attrs * attrs)2956 static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2957 			     size_t size, enum dma_data_direction dir,
2958 			     struct dma_attrs *attrs)
2959 {
2960 	struct pci_dev *pdev = to_pci_dev(dev);
2961 	struct dmar_domain *domain;
2962 	unsigned long start_pfn, last_pfn;
2963 	struct iova *iova;
2964 	struct intel_iommu *iommu;
2965 
2966 	if (iommu_no_mapping(dev))
2967 		return;
2968 
2969 	domain = find_domain(pdev);
2970 	BUG_ON(!domain);
2971 
2972 	iommu = domain_get_iommu(domain);
2973 
2974 	iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
2975 	if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
2976 		      (unsigned long long)dev_addr))
2977 		return;
2978 
2979 	start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2980 	last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
2981 
2982 	pr_debug("Device %s unmapping: pfn %lx-%lx\n",
2983 		 pci_name(pdev), start_pfn, last_pfn);
2984 
2985 	/*  clear the whole page */
2986 	dma_pte_clear_range(domain, start_pfn, last_pfn);
2987 
2988 	/* free page tables */
2989 	dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2990 
2991 	if (intel_iommu_strict) {
2992 		iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
2993 				      last_pfn - start_pfn + 1, 0);
2994 		/* free iova */
2995 		__free_iova(&domain->iovad, iova);
2996 	} else {
2997 		add_unmap(domain, iova);
2998 		/*
2999 		 * queue up the release of the unmap to save the 1/6th of the
3000 		 * cpu used up by the iotlb flush operation...
3001 		 */
3002 	}
3003 }
3004 
intel_alloc_coherent(struct device * hwdev,size_t size,dma_addr_t * dma_handle,gfp_t flags,struct dma_attrs * attrs)3005 static void *intel_alloc_coherent(struct device *hwdev, size_t size,
3006 				  dma_addr_t *dma_handle, gfp_t flags,
3007 				  struct dma_attrs *attrs)
3008 {
3009 	void *vaddr;
3010 	int order;
3011 
3012 	size = PAGE_ALIGN(size);
3013 	order = get_order(size);
3014 
3015 	if (!iommu_no_mapping(hwdev))
3016 		flags &= ~(GFP_DMA | GFP_DMA32);
3017 	else if (hwdev->coherent_dma_mask < dma_get_required_mask(hwdev)) {
3018 		if (hwdev->coherent_dma_mask < DMA_BIT_MASK(32))
3019 			flags |= GFP_DMA;
3020 		else
3021 			flags |= GFP_DMA32;
3022 	}
3023 
3024 	vaddr = (void *)__get_free_pages(flags, order);
3025 	if (!vaddr)
3026 		return NULL;
3027 	memset(vaddr, 0, size);
3028 
3029 	*dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
3030 					 DMA_BIDIRECTIONAL,
3031 					 hwdev->coherent_dma_mask);
3032 	if (*dma_handle)
3033 		return vaddr;
3034 	free_pages((unsigned long)vaddr, order);
3035 	return NULL;
3036 }
3037 
intel_free_coherent(struct device * hwdev,size_t size,void * vaddr,dma_addr_t dma_handle,struct dma_attrs * attrs)3038 static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
3039 				dma_addr_t dma_handle, struct dma_attrs *attrs)
3040 {
3041 	int order;
3042 
3043 	size = PAGE_ALIGN(size);
3044 	order = get_order(size);
3045 
3046 	intel_unmap_page(hwdev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
3047 	free_pages((unsigned long)vaddr, order);
3048 }
3049 
intel_unmap_sg(struct device * hwdev,struct scatterlist * sglist,int nelems,enum dma_data_direction dir,struct dma_attrs * attrs)3050 static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
3051 			   int nelems, enum dma_data_direction dir,
3052 			   struct dma_attrs *attrs)
3053 {
3054 	struct pci_dev *pdev = to_pci_dev(hwdev);
3055 	struct dmar_domain *domain;
3056 	unsigned long start_pfn, last_pfn;
3057 	struct iova *iova;
3058 	struct intel_iommu *iommu;
3059 
3060 	if (iommu_no_mapping(hwdev))
3061 		return;
3062 
3063 	domain = find_domain(pdev);
3064 	BUG_ON(!domain);
3065 
3066 	iommu = domain_get_iommu(domain);
3067 
3068 	iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
3069 	if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
3070 		      (unsigned long long)sglist[0].dma_address))
3071 		return;
3072 
3073 	start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3074 	last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
3075 
3076 	/*  clear the whole page */
3077 	dma_pte_clear_range(domain, start_pfn, last_pfn);
3078 
3079 	/* free page tables */
3080 	dma_pte_free_pagetable(domain, start_pfn, last_pfn);
3081 
3082 	if (intel_iommu_strict) {
3083 		iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
3084 				      last_pfn - start_pfn + 1, 0);
3085 		/* free iova */
3086 		__free_iova(&domain->iovad, iova);
3087 	} else {
3088 		add_unmap(domain, iova);
3089 		/*
3090 		 * queue up the release of the unmap to save the 1/6th of the
3091 		 * cpu used up by the iotlb flush operation...
3092 		 */
3093 	}
3094 }
3095 
intel_nontranslate_map_sg(struct device * hddev,struct scatterlist * sglist,int nelems,int dir)3096 static int intel_nontranslate_map_sg(struct device *hddev,
3097 	struct scatterlist *sglist, int nelems, int dir)
3098 {
3099 	int i;
3100 	struct scatterlist *sg;
3101 
3102 	for_each_sg(sglist, sg, nelems, i) {
3103 		BUG_ON(!sg_page(sg));
3104 		sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
3105 		sg->dma_length = sg->length;
3106 	}
3107 	return nelems;
3108 }
3109 
intel_map_sg(struct device * hwdev,struct scatterlist * sglist,int nelems,enum dma_data_direction dir,struct dma_attrs * attrs)3110 static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
3111 			enum dma_data_direction dir, struct dma_attrs *attrs)
3112 {
3113 	int i;
3114 	struct pci_dev *pdev = to_pci_dev(hwdev);
3115 	struct dmar_domain *domain;
3116 	size_t size = 0;
3117 	int prot = 0;
3118 	struct iova *iova = NULL;
3119 	int ret;
3120 	struct scatterlist *sg;
3121 	unsigned long start_vpfn;
3122 	struct intel_iommu *iommu;
3123 
3124 	BUG_ON(dir == DMA_NONE);
3125 	if (iommu_no_mapping(hwdev))
3126 		return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
3127 
3128 	domain = get_valid_domain_for_dev(pdev);
3129 	if (!domain)
3130 		return 0;
3131 
3132 	iommu = domain_get_iommu(domain);
3133 
3134 	for_each_sg(sglist, sg, nelems, i)
3135 		size += aligned_nrpages(sg->offset, sg->length);
3136 
3137 	iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
3138 				pdev->dma_mask);
3139 	if (!iova) {
3140 		sglist->dma_length = 0;
3141 		return 0;
3142 	}
3143 
3144 	/*
3145 	 * Check if DMAR supports zero-length reads on write only
3146 	 * mappings..
3147 	 */
3148 	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3149 			!cap_zlr(iommu->cap))
3150 		prot |= DMA_PTE_READ;
3151 	if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3152 		prot |= DMA_PTE_WRITE;
3153 
3154 	start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
3155 
3156 	ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3157 	if (unlikely(ret)) {
3158 		/*  clear the page */
3159 		dma_pte_clear_range(domain, start_vpfn,
3160 				    start_vpfn + size - 1);
3161 		/* free page tables */
3162 		dma_pte_free_pagetable(domain, start_vpfn,
3163 				       start_vpfn + size - 1);
3164 		/* free iova */
3165 		__free_iova(&domain->iovad, iova);
3166 		return 0;
3167 	}
3168 
3169 	/* it's a non-present to present mapping. Only flush if caching mode */
3170 	if (cap_caching_mode(iommu->cap))
3171 		iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 1);
3172 	else
3173 		iommu_flush_write_buffer(iommu);
3174 
3175 	return nelems;
3176 }
3177 
intel_mapping_error(struct device * dev,dma_addr_t dma_addr)3178 static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3179 {
3180 	return !dma_addr;
3181 }
3182 
3183 struct dma_map_ops intel_dma_ops = {
3184 	.alloc = intel_alloc_coherent,
3185 	.free = intel_free_coherent,
3186 	.map_sg = intel_map_sg,
3187 	.unmap_sg = intel_unmap_sg,
3188 	.map_page = intel_map_page,
3189 	.unmap_page = intel_unmap_page,
3190 	.mapping_error = intel_mapping_error,
3191 };
3192 
iommu_domain_cache_init(void)3193 static inline int iommu_domain_cache_init(void)
3194 {
3195 	int ret = 0;
3196 
3197 	iommu_domain_cache = kmem_cache_create("iommu_domain",
3198 					 sizeof(struct dmar_domain),
3199 					 0,
3200 					 SLAB_HWCACHE_ALIGN,
3201 
3202 					 NULL);
3203 	if (!iommu_domain_cache) {
3204 		printk(KERN_ERR "Couldn't create iommu_domain cache\n");
3205 		ret = -ENOMEM;
3206 	}
3207 
3208 	return ret;
3209 }
3210 
iommu_devinfo_cache_init(void)3211 static inline int iommu_devinfo_cache_init(void)
3212 {
3213 	int ret = 0;
3214 
3215 	iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3216 					 sizeof(struct device_domain_info),
3217 					 0,
3218 					 SLAB_HWCACHE_ALIGN,
3219 					 NULL);
3220 	if (!iommu_devinfo_cache) {
3221 		printk(KERN_ERR "Couldn't create devinfo cache\n");
3222 		ret = -ENOMEM;
3223 	}
3224 
3225 	return ret;
3226 }
3227 
iommu_iova_cache_init(void)3228 static inline int iommu_iova_cache_init(void)
3229 {
3230 	int ret = 0;
3231 
3232 	iommu_iova_cache = kmem_cache_create("iommu_iova",
3233 					 sizeof(struct iova),
3234 					 0,
3235 					 SLAB_HWCACHE_ALIGN,
3236 					 NULL);
3237 	if (!iommu_iova_cache) {
3238 		printk(KERN_ERR "Couldn't create iova cache\n");
3239 		ret = -ENOMEM;
3240 	}
3241 
3242 	return ret;
3243 }
3244 
iommu_init_mempool(void)3245 static int __init iommu_init_mempool(void)
3246 {
3247 	int ret;
3248 	ret = iommu_iova_cache_init();
3249 	if (ret)
3250 		return ret;
3251 
3252 	ret = iommu_domain_cache_init();
3253 	if (ret)
3254 		goto domain_error;
3255 
3256 	ret = iommu_devinfo_cache_init();
3257 	if (!ret)
3258 		return ret;
3259 
3260 	kmem_cache_destroy(iommu_domain_cache);
3261 domain_error:
3262 	kmem_cache_destroy(iommu_iova_cache);
3263 
3264 	return -ENOMEM;
3265 }
3266 
iommu_exit_mempool(void)3267 static void __init iommu_exit_mempool(void)
3268 {
3269 	kmem_cache_destroy(iommu_devinfo_cache);
3270 	kmem_cache_destroy(iommu_domain_cache);
3271 	kmem_cache_destroy(iommu_iova_cache);
3272 
3273 }
3274 
quirk_ioat_snb_local_iommu(struct pci_dev * pdev)3275 static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3276 {
3277 	struct dmar_drhd_unit *drhd;
3278 	u32 vtbar;
3279 	int rc;
3280 
3281 	/* We know that this device on this chipset has its own IOMMU.
3282 	 * If we find it under a different IOMMU, then the BIOS is lying
3283 	 * to us. Hope that the IOMMU for this device is actually
3284 	 * disabled, and it needs no translation...
3285 	 */
3286 	rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3287 	if (rc) {
3288 		/* "can't" happen */
3289 		dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3290 		return;
3291 	}
3292 	vtbar &= 0xffff0000;
3293 
3294 	/* we know that the this iommu should be at offset 0xa000 from vtbar */
3295 	drhd = dmar_find_matched_drhd_unit(pdev);
3296 	if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3297 			    TAINT_FIRMWARE_WORKAROUND,
3298 			    "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3299 		pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3300 }
3301 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3302 
init_no_remapping_devices(void)3303 static void __init init_no_remapping_devices(void)
3304 {
3305 	struct dmar_drhd_unit *drhd;
3306 
3307 	for_each_drhd_unit(drhd) {
3308 		if (!drhd->include_all) {
3309 			int i;
3310 			for (i = 0; i < drhd->devices_cnt; i++)
3311 				if (drhd->devices[i] != NULL)
3312 					break;
3313 			/* ignore DMAR unit if no pci devices exist */
3314 			if (i == drhd->devices_cnt)
3315 				drhd->ignored = 1;
3316 		}
3317 	}
3318 
3319 	for_each_drhd_unit(drhd) {
3320 		int i;
3321 		if (drhd->ignored || drhd->include_all)
3322 			continue;
3323 
3324 		for (i = 0; i < drhd->devices_cnt; i++)
3325 			if (drhd->devices[i] &&
3326 			    !IS_GFX_DEVICE(drhd->devices[i]))
3327 				break;
3328 
3329 		if (i < drhd->devices_cnt)
3330 			continue;
3331 
3332 		/* This IOMMU has *only* gfx devices. Either bypass it or
3333 		   set the gfx_mapped flag, as appropriate */
3334 		if (dmar_map_gfx) {
3335 			intel_iommu_gfx_mapped = 1;
3336 		} else {
3337 			drhd->ignored = 1;
3338 			for (i = 0; i < drhd->devices_cnt; i++) {
3339 				if (!drhd->devices[i])
3340 					continue;
3341 				drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3342 			}
3343 		}
3344 	}
3345 }
3346 
3347 #ifdef CONFIG_SUSPEND
init_iommu_hw(void)3348 static int init_iommu_hw(void)
3349 {
3350 	struct dmar_drhd_unit *drhd;
3351 	struct intel_iommu *iommu = NULL;
3352 
3353 	for_each_active_iommu(iommu, drhd)
3354 		if (iommu->qi)
3355 			dmar_reenable_qi(iommu);
3356 
3357 	for_each_iommu(iommu, drhd) {
3358 		if (drhd->ignored) {
3359 			/*
3360 			 * we always have to disable PMRs or DMA may fail on
3361 			 * this device
3362 			 */
3363 			if (force_on)
3364 				iommu_disable_protect_mem_regions(iommu);
3365 			continue;
3366 		}
3367 
3368 		iommu_flush_write_buffer(iommu);
3369 
3370 		iommu_set_root_entry(iommu);
3371 
3372 		iommu->flush.flush_context(iommu, 0, 0, 0,
3373 					   DMA_CCMD_GLOBAL_INVL);
3374 		iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3375 					 DMA_TLB_GLOBAL_FLUSH);
3376 		if (iommu_enable_translation(iommu))
3377 			return 1;
3378 		iommu_disable_protect_mem_regions(iommu);
3379 	}
3380 
3381 	return 0;
3382 }
3383 
iommu_flush_all(void)3384 static void iommu_flush_all(void)
3385 {
3386 	struct dmar_drhd_unit *drhd;
3387 	struct intel_iommu *iommu;
3388 
3389 	for_each_active_iommu(iommu, drhd) {
3390 		iommu->flush.flush_context(iommu, 0, 0, 0,
3391 					   DMA_CCMD_GLOBAL_INVL);
3392 		iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3393 					 DMA_TLB_GLOBAL_FLUSH);
3394 	}
3395 }
3396 
iommu_suspend(void)3397 static int iommu_suspend(void)
3398 {
3399 	struct dmar_drhd_unit *drhd;
3400 	struct intel_iommu *iommu = NULL;
3401 	unsigned long flag;
3402 
3403 	for_each_active_iommu(iommu, drhd) {
3404 		iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3405 						 GFP_ATOMIC);
3406 		if (!iommu->iommu_state)
3407 			goto nomem;
3408 	}
3409 
3410 	iommu_flush_all();
3411 
3412 	for_each_active_iommu(iommu, drhd) {
3413 		iommu_disable_translation(iommu);
3414 
3415 		raw_spin_lock_irqsave(&iommu->register_lock, flag);
3416 
3417 		iommu->iommu_state[SR_DMAR_FECTL_REG] =
3418 			readl(iommu->reg + DMAR_FECTL_REG);
3419 		iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3420 			readl(iommu->reg + DMAR_FEDATA_REG);
3421 		iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3422 			readl(iommu->reg + DMAR_FEADDR_REG);
3423 		iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3424 			readl(iommu->reg + DMAR_FEUADDR_REG);
3425 
3426 		raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3427 	}
3428 	return 0;
3429 
3430 nomem:
3431 	for_each_active_iommu(iommu, drhd)
3432 		kfree(iommu->iommu_state);
3433 
3434 	return -ENOMEM;
3435 }
3436 
iommu_resume(void)3437 static void iommu_resume(void)
3438 {
3439 	struct dmar_drhd_unit *drhd;
3440 	struct intel_iommu *iommu = NULL;
3441 	unsigned long flag;
3442 
3443 	if (init_iommu_hw()) {
3444 		if (force_on)
3445 			panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3446 		else
3447 			WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3448 		return;
3449 	}
3450 
3451 	for_each_active_iommu(iommu, drhd) {
3452 
3453 		raw_spin_lock_irqsave(&iommu->register_lock, flag);
3454 
3455 		writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3456 			iommu->reg + DMAR_FECTL_REG);
3457 		writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3458 			iommu->reg + DMAR_FEDATA_REG);
3459 		writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3460 			iommu->reg + DMAR_FEADDR_REG);
3461 		writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3462 			iommu->reg + DMAR_FEUADDR_REG);
3463 
3464 		raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3465 	}
3466 
3467 	for_each_active_iommu(iommu, drhd)
3468 		kfree(iommu->iommu_state);
3469 }
3470 
3471 static struct syscore_ops iommu_syscore_ops = {
3472 	.resume		= iommu_resume,
3473 	.suspend	= iommu_suspend,
3474 };
3475 
init_iommu_pm_ops(void)3476 static void __init init_iommu_pm_ops(void)
3477 {
3478 	register_syscore_ops(&iommu_syscore_ops);
3479 }
3480 
3481 #else
init_iommu_pm_ops(void)3482 static inline void init_iommu_pm_ops(void) {}
3483 #endif	/* CONFIG_PM */
3484 
3485 LIST_HEAD(dmar_rmrr_units);
3486 
dmar_register_rmrr_unit(struct dmar_rmrr_unit * rmrr)3487 static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
3488 {
3489 	list_add(&rmrr->list, &dmar_rmrr_units);
3490 }
3491 
3492 
dmar_parse_one_rmrr(struct acpi_dmar_header * header)3493 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)
3494 {
3495 	struct acpi_dmar_reserved_memory *rmrr;
3496 	struct dmar_rmrr_unit *rmrru;
3497 
3498 	rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3499 	if (!rmrru)
3500 		return -ENOMEM;
3501 
3502 	rmrru->hdr = header;
3503 	rmrr = (struct acpi_dmar_reserved_memory *)header;
3504 	rmrru->base_address = rmrr->base_address;
3505 	rmrru->end_address = rmrr->end_address;
3506 
3507 	dmar_register_rmrr_unit(rmrru);
3508 	return 0;
3509 }
3510 
3511 static int __init
rmrr_parse_dev(struct dmar_rmrr_unit * rmrru)3512 rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
3513 {
3514 	struct acpi_dmar_reserved_memory *rmrr;
3515 	int ret;
3516 
3517 	rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
3518 	ret = dmar_parse_dev_scope((void *)(rmrr + 1),
3519 		((void *)rmrr) + rmrr->header.length,
3520 		&rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
3521 
3522 	if (ret || (rmrru->devices_cnt == 0)) {
3523 		list_del(&rmrru->list);
3524 		kfree(rmrru);
3525 	}
3526 	return ret;
3527 }
3528 
3529 static LIST_HEAD(dmar_atsr_units);
3530 
dmar_parse_one_atsr(struct acpi_dmar_header * hdr)3531 int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
3532 {
3533 	struct acpi_dmar_atsr *atsr;
3534 	struct dmar_atsr_unit *atsru;
3535 
3536 	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3537 	atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
3538 	if (!atsru)
3539 		return -ENOMEM;
3540 
3541 	atsru->hdr = hdr;
3542 	atsru->include_all = atsr->flags & 0x1;
3543 
3544 	list_add(&atsru->list, &dmar_atsr_units);
3545 
3546 	return 0;
3547 }
3548 
atsr_parse_dev(struct dmar_atsr_unit * atsru)3549 static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru)
3550 {
3551 	int rc;
3552 	struct acpi_dmar_atsr *atsr;
3553 
3554 	if (atsru->include_all)
3555 		return 0;
3556 
3557 	atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3558 	rc = dmar_parse_dev_scope((void *)(atsr + 1),
3559 				(void *)atsr + atsr->header.length,
3560 				&atsru->devices_cnt, &atsru->devices,
3561 				atsr->segment);
3562 	if (rc || !atsru->devices_cnt) {
3563 		list_del(&atsru->list);
3564 		kfree(atsru);
3565 	}
3566 
3567 	return rc;
3568 }
3569 
dmar_find_matched_atsr_unit(struct pci_dev * dev)3570 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
3571 {
3572 	int i;
3573 	struct pci_bus *bus;
3574 	struct acpi_dmar_atsr *atsr;
3575 	struct dmar_atsr_unit *atsru;
3576 
3577 	dev = pci_physfn(dev);
3578 
3579 	list_for_each_entry(atsru, &dmar_atsr_units, list) {
3580 		atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3581 		if (atsr->segment == pci_domain_nr(dev->bus))
3582 			goto found;
3583 	}
3584 
3585 	return 0;
3586 
3587 found:
3588 	for (bus = dev->bus; bus; bus = bus->parent) {
3589 		struct pci_dev *bridge = bus->self;
3590 
3591 		if (!bridge || !pci_is_pcie(bridge) ||
3592 		    pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
3593 			return 0;
3594 
3595 		if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT) {
3596 			for (i = 0; i < atsru->devices_cnt; i++)
3597 				if (atsru->devices[i] == bridge)
3598 					return 1;
3599 			break;
3600 		}
3601 	}
3602 
3603 	if (atsru->include_all)
3604 		return 1;
3605 
3606 	return 0;
3607 }
3608 
dmar_parse_rmrr_atsr_dev(void)3609 int __init dmar_parse_rmrr_atsr_dev(void)
3610 {
3611 	struct dmar_rmrr_unit *rmrr, *rmrr_n;
3612 	struct dmar_atsr_unit *atsr, *atsr_n;
3613 	int ret = 0;
3614 
3615 	list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
3616 		ret = rmrr_parse_dev(rmrr);
3617 		if (ret)
3618 			return ret;
3619 	}
3620 
3621 	list_for_each_entry_safe(atsr, atsr_n, &dmar_atsr_units, list) {
3622 		ret = atsr_parse_dev(atsr);
3623 		if (ret)
3624 			return ret;
3625 	}
3626 
3627 	return ret;
3628 }
3629 
3630 /*
3631  * Here we only respond to action of unbound device from driver.
3632  *
3633  * Added device is not attached to its DMAR domain here yet. That will happen
3634  * when mapping the device to iova.
3635  */
device_notifier(struct notifier_block * nb,unsigned long action,void * data)3636 static int device_notifier(struct notifier_block *nb,
3637 				  unsigned long action, void *data)
3638 {
3639 	struct device *dev = data;
3640 	struct pci_dev *pdev = to_pci_dev(dev);
3641 	struct dmar_domain *domain;
3642 
3643 	if (iommu_no_mapping(dev))
3644 		return 0;
3645 
3646 	domain = find_domain(pdev);
3647 	if (!domain)
3648 		return 0;
3649 
3650 	if (action == BUS_NOTIFY_UNBOUND_DRIVER && !iommu_pass_through) {
3651 		domain_remove_one_dev_info(domain, pdev);
3652 
3653 		if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3654 		    !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
3655 		    list_empty(&domain->devices))
3656 			domain_exit(domain);
3657 	}
3658 
3659 	return 0;
3660 }
3661 
3662 static struct notifier_block device_nb = {
3663 	.notifier_call = device_notifier,
3664 };
3665 
intel_iommu_init(void)3666 int __init intel_iommu_init(void)
3667 {
3668 	int ret = 0;
3669 	struct dmar_drhd_unit *drhd;
3670 
3671 	/* VT-d is required for a TXT/tboot launch, so enforce that */
3672 	force_on = tboot_force_iommu();
3673 
3674 	if (dmar_table_init()) {
3675 		if (force_on)
3676 			panic("tboot: Failed to initialize DMAR table\n");
3677 		return 	-ENODEV;
3678 	}
3679 
3680 	/*
3681 	 * Disable translation if already enabled prior to OS handover.
3682 	 */
3683 	for_each_drhd_unit(drhd) {
3684 		struct intel_iommu *iommu;
3685 
3686 		if (drhd->ignored)
3687 			continue;
3688 
3689 		iommu = drhd->iommu;
3690 		if (iommu->gcmd & DMA_GCMD_TE)
3691 			iommu_disable_translation(iommu);
3692 	}
3693 
3694 	if (dmar_dev_scope_init() < 0) {
3695 		if (force_on)
3696 			panic("tboot: Failed to initialize DMAR device scope\n");
3697 		return 	-ENODEV;
3698 	}
3699 
3700 	if (no_iommu || dmar_disabled)
3701 		return -ENODEV;
3702 
3703 	if (iommu_init_mempool()) {
3704 		if (force_on)
3705 			panic("tboot: Failed to initialize iommu memory\n");
3706 		return 	-ENODEV;
3707 	}
3708 
3709 	if (list_empty(&dmar_rmrr_units))
3710 		printk(KERN_INFO "DMAR: No RMRR found\n");
3711 
3712 	if (list_empty(&dmar_atsr_units))
3713 		printk(KERN_INFO "DMAR: No ATSR found\n");
3714 
3715 	if (dmar_init_reserved_ranges()) {
3716 		if (force_on)
3717 			panic("tboot: Failed to reserve iommu ranges\n");
3718 		return 	-ENODEV;
3719 	}
3720 
3721 	init_no_remapping_devices();
3722 
3723 	ret = init_dmars();
3724 	if (ret) {
3725 		if (force_on)
3726 			panic("tboot: Failed to initialize DMARs\n");
3727 		printk(KERN_ERR "IOMMU: dmar init failed\n");
3728 		put_iova_domain(&reserved_iova_list);
3729 		iommu_exit_mempool();
3730 		return ret;
3731 	}
3732 	printk(KERN_INFO
3733 	"PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
3734 
3735 	init_timer(&unmap_timer);
3736 #ifdef CONFIG_SWIOTLB
3737 	swiotlb = 0;
3738 #endif
3739 	dma_ops = &intel_dma_ops;
3740 
3741 	init_iommu_pm_ops();
3742 
3743 	bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
3744 
3745 	bus_register_notifier(&pci_bus_type, &device_nb);
3746 
3747 	intel_iommu_enabled = 1;
3748 
3749 	return 0;
3750 }
3751 
iommu_detach_dependent_devices(struct intel_iommu * iommu,struct pci_dev * pdev)3752 static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
3753 					   struct pci_dev *pdev)
3754 {
3755 	struct pci_dev *tmp, *parent;
3756 
3757 	if (!iommu || !pdev)
3758 		return;
3759 
3760 	/* dependent device detach */
3761 	tmp = pci_find_upstream_pcie_bridge(pdev);
3762 	/* Secondary interface's bus number and devfn 0 */
3763 	if (tmp) {
3764 		parent = pdev->bus->self;
3765 		while (parent != tmp) {
3766 			iommu_detach_dev(iommu, parent->bus->number,
3767 					 parent->devfn);
3768 			parent = parent->bus->self;
3769 		}
3770 		if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
3771 			iommu_detach_dev(iommu,
3772 				tmp->subordinate->number, 0);
3773 		else /* this is a legacy PCI bridge */
3774 			iommu_detach_dev(iommu, tmp->bus->number,
3775 					 tmp->devfn);
3776 	}
3777 }
3778 
domain_remove_one_dev_info(struct dmar_domain * domain,struct pci_dev * pdev)3779 static void domain_remove_one_dev_info(struct dmar_domain *domain,
3780 					  struct pci_dev *pdev)
3781 {
3782 	struct device_domain_info *info;
3783 	struct intel_iommu *iommu;
3784 	unsigned long flags;
3785 	int found = 0;
3786 	struct list_head *entry, *tmp;
3787 
3788 	iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3789 				pdev->devfn);
3790 	if (!iommu)
3791 		return;
3792 
3793 	spin_lock_irqsave(&device_domain_lock, flags);
3794 	list_for_each_safe(entry, tmp, &domain->devices) {
3795 		info = list_entry(entry, struct device_domain_info, link);
3796 		if (info->segment == pci_domain_nr(pdev->bus) &&
3797 		    info->bus == pdev->bus->number &&
3798 		    info->devfn == pdev->devfn) {
3799 			unlink_domain_info(info);
3800 			spin_unlock_irqrestore(&device_domain_lock, flags);
3801 
3802 			iommu_disable_dev_iotlb(info);
3803 			iommu_detach_dev(iommu, info->bus, info->devfn);
3804 			iommu_detach_dependent_devices(iommu, pdev);
3805 			free_devinfo_mem(info);
3806 
3807 			spin_lock_irqsave(&device_domain_lock, flags);
3808 
3809 			if (found)
3810 				break;
3811 			else
3812 				continue;
3813 		}
3814 
3815 		/* if there is no other devices under the same iommu
3816 		 * owned by this domain, clear this iommu in iommu_bmp
3817 		 * update iommu count and coherency
3818 		 */
3819 		if (iommu == device_to_iommu(info->segment, info->bus,
3820 					    info->devfn))
3821 			found = 1;
3822 	}
3823 
3824 	spin_unlock_irqrestore(&device_domain_lock, flags);
3825 
3826 	if (found == 0) {
3827 		unsigned long tmp_flags;
3828 		spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
3829 		clear_bit(iommu->seq_id, domain->iommu_bmp);
3830 		domain->iommu_count--;
3831 		domain_update_iommu_cap(domain);
3832 		spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
3833 
3834 		if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3835 		    !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) {
3836 			spin_lock_irqsave(&iommu->lock, tmp_flags);
3837 			clear_bit(domain->id, iommu->domain_ids);
3838 			iommu->domains[domain->id] = NULL;
3839 			spin_unlock_irqrestore(&iommu->lock, tmp_flags);
3840 		}
3841 	}
3842 }
3843 
vm_domain_remove_all_dev_info(struct dmar_domain * domain)3844 static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
3845 {
3846 	struct device_domain_info *info;
3847 	struct intel_iommu *iommu;
3848 	unsigned long flags1, flags2;
3849 
3850 	spin_lock_irqsave(&device_domain_lock, flags1);
3851 	while (!list_empty(&domain->devices)) {
3852 		info = list_entry(domain->devices.next,
3853 			struct device_domain_info, link);
3854 		unlink_domain_info(info);
3855 		spin_unlock_irqrestore(&device_domain_lock, flags1);
3856 
3857 		iommu_disable_dev_iotlb(info);
3858 		iommu = device_to_iommu(info->segment, info->bus, info->devfn);
3859 		iommu_detach_dev(iommu, info->bus, info->devfn);
3860 		iommu_detach_dependent_devices(iommu, info->dev);
3861 
3862 		/* clear this iommu in iommu_bmp, update iommu count
3863 		 * and capabilities
3864 		 */
3865 		spin_lock_irqsave(&domain->iommu_lock, flags2);
3866 		if (test_and_clear_bit(iommu->seq_id,
3867 				       domain->iommu_bmp)) {
3868 			domain->iommu_count--;
3869 			domain_update_iommu_cap(domain);
3870 		}
3871 		spin_unlock_irqrestore(&domain->iommu_lock, flags2);
3872 
3873 		free_devinfo_mem(info);
3874 		spin_lock_irqsave(&device_domain_lock, flags1);
3875 	}
3876 	spin_unlock_irqrestore(&device_domain_lock, flags1);
3877 }
3878 
3879 /* domain id for virtual machine, it won't be set in context */
3880 static unsigned long vm_domid;
3881 
iommu_alloc_vm_domain(void)3882 static struct dmar_domain *iommu_alloc_vm_domain(void)
3883 {
3884 	struct dmar_domain *domain;
3885 
3886 	domain = alloc_domain_mem();
3887 	if (!domain)
3888 		return NULL;
3889 
3890 	domain->id = vm_domid++;
3891 	domain->nid = -1;
3892 	memset(domain->iommu_bmp, 0, sizeof(domain->iommu_bmp));
3893 	domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
3894 
3895 	return domain;
3896 }
3897 
md_domain_init(struct dmar_domain * domain,int guest_width)3898 static int md_domain_init(struct dmar_domain *domain, int guest_width)
3899 {
3900 	int adjust_width;
3901 
3902 	init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
3903 	spin_lock_init(&domain->iommu_lock);
3904 
3905 	domain_reserve_special_ranges(domain);
3906 
3907 	/* calculate AGAW */
3908 	domain->gaw = guest_width;
3909 	adjust_width = guestwidth_to_adjustwidth(guest_width);
3910 	domain->agaw = width_to_agaw(adjust_width);
3911 
3912 	INIT_LIST_HEAD(&domain->devices);
3913 
3914 	domain->iommu_count = 0;
3915 	domain->iommu_coherency = 0;
3916 	domain->iommu_snooping = 0;
3917 	domain->iommu_superpage = 0;
3918 	domain->max_addr = 0;
3919 	domain->nid = -1;
3920 
3921 	/* always allocate the top pgd */
3922 	domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
3923 	if (!domain->pgd)
3924 		return -ENOMEM;
3925 	domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
3926 	return 0;
3927 }
3928 
iommu_free_vm_domain(struct dmar_domain * domain)3929 static void iommu_free_vm_domain(struct dmar_domain *domain)
3930 {
3931 	unsigned long flags;
3932 	struct dmar_drhd_unit *drhd;
3933 	struct intel_iommu *iommu;
3934 	unsigned long i;
3935 	unsigned long ndomains;
3936 
3937 	for_each_drhd_unit(drhd) {
3938 		if (drhd->ignored)
3939 			continue;
3940 		iommu = drhd->iommu;
3941 
3942 		ndomains = cap_ndoms(iommu->cap);
3943 		for_each_set_bit(i, iommu->domain_ids, ndomains) {
3944 			if (iommu->domains[i] == domain) {
3945 				spin_lock_irqsave(&iommu->lock, flags);
3946 				clear_bit(i, iommu->domain_ids);
3947 				iommu->domains[i] = NULL;
3948 				spin_unlock_irqrestore(&iommu->lock, flags);
3949 				break;
3950 			}
3951 		}
3952 	}
3953 }
3954 
vm_domain_exit(struct dmar_domain * domain)3955 static void vm_domain_exit(struct dmar_domain *domain)
3956 {
3957 	/* Domain 0 is reserved, so dont process it */
3958 	if (!domain)
3959 		return;
3960 
3961 	vm_domain_remove_all_dev_info(domain);
3962 	/* destroy iovas */
3963 	put_iova_domain(&domain->iovad);
3964 
3965 	/* clear ptes */
3966 	dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3967 
3968 	/* free page tables */
3969 	dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3970 
3971 	iommu_free_vm_domain(domain);
3972 	free_domain_mem(domain);
3973 }
3974 
intel_iommu_domain_init(struct iommu_domain * domain)3975 static int intel_iommu_domain_init(struct iommu_domain *domain)
3976 {
3977 	struct dmar_domain *dmar_domain;
3978 
3979 	dmar_domain = iommu_alloc_vm_domain();
3980 	if (!dmar_domain) {
3981 		printk(KERN_ERR
3982 			"intel_iommu_domain_init: dmar_domain == NULL\n");
3983 		return -ENOMEM;
3984 	}
3985 	if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
3986 		printk(KERN_ERR
3987 			"intel_iommu_domain_init() failed\n");
3988 		vm_domain_exit(dmar_domain);
3989 		return -ENOMEM;
3990 	}
3991 	domain_update_iommu_cap(dmar_domain);
3992 	domain->priv = dmar_domain;
3993 
3994 	domain->geometry.aperture_start = 0;
3995 	domain->geometry.aperture_end   = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
3996 	domain->geometry.force_aperture = true;
3997 
3998 	return 0;
3999 }
4000 
intel_iommu_domain_destroy(struct iommu_domain * domain)4001 static void intel_iommu_domain_destroy(struct iommu_domain *domain)
4002 {
4003 	struct dmar_domain *dmar_domain = domain->priv;
4004 
4005 	domain->priv = NULL;
4006 	vm_domain_exit(dmar_domain);
4007 }
4008 
intel_iommu_attach_device(struct iommu_domain * domain,struct device * dev)4009 static int intel_iommu_attach_device(struct iommu_domain *domain,
4010 				     struct device *dev)
4011 {
4012 	struct dmar_domain *dmar_domain = domain->priv;
4013 	struct pci_dev *pdev = to_pci_dev(dev);
4014 	struct intel_iommu *iommu;
4015 	int addr_width;
4016 
4017 	/* normally pdev is not mapped */
4018 	if (unlikely(domain_context_mapped(pdev))) {
4019 		struct dmar_domain *old_domain;
4020 
4021 		old_domain = find_domain(pdev);
4022 		if (old_domain) {
4023 			if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
4024 			    dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)
4025 				domain_remove_one_dev_info(old_domain, pdev);
4026 			else
4027 				domain_remove_dev_info(old_domain);
4028 		}
4029 	}
4030 
4031 	iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
4032 				pdev->devfn);
4033 	if (!iommu)
4034 		return -ENODEV;
4035 
4036 	/* check if this iommu agaw is sufficient for max mapped address */
4037 	addr_width = agaw_to_width(iommu->agaw);
4038 	if (addr_width > cap_mgaw(iommu->cap))
4039 		addr_width = cap_mgaw(iommu->cap);
4040 
4041 	if (dmar_domain->max_addr > (1LL << addr_width)) {
4042 		printk(KERN_ERR "%s: iommu width (%d) is not "
4043 		       "sufficient for the mapped address (%llx)\n",
4044 		       __func__, addr_width, dmar_domain->max_addr);
4045 		return -EFAULT;
4046 	}
4047 	dmar_domain->gaw = addr_width;
4048 
4049 	/*
4050 	 * Knock out extra levels of page tables if necessary
4051 	 */
4052 	while (iommu->agaw < dmar_domain->agaw) {
4053 		struct dma_pte *pte;
4054 
4055 		pte = dmar_domain->pgd;
4056 		if (dma_pte_present(pte)) {
4057 			dmar_domain->pgd = (struct dma_pte *)
4058 				phys_to_virt(dma_pte_addr(pte));
4059 			free_pgtable_page(pte);
4060 		}
4061 		dmar_domain->agaw--;
4062 	}
4063 
4064 	return domain_add_dev_info(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
4065 }
4066 
intel_iommu_detach_device(struct iommu_domain * domain,struct device * dev)4067 static void intel_iommu_detach_device(struct iommu_domain *domain,
4068 				      struct device *dev)
4069 {
4070 	struct dmar_domain *dmar_domain = domain->priv;
4071 	struct pci_dev *pdev = to_pci_dev(dev);
4072 
4073 	domain_remove_one_dev_info(dmar_domain, pdev);
4074 }
4075 
intel_iommu_map(struct iommu_domain * domain,unsigned long iova,phys_addr_t hpa,size_t size,int iommu_prot)4076 static int intel_iommu_map(struct iommu_domain *domain,
4077 			   unsigned long iova, phys_addr_t hpa,
4078 			   size_t size, int iommu_prot)
4079 {
4080 	struct dmar_domain *dmar_domain = domain->priv;
4081 	u64 max_addr;
4082 	int prot = 0;
4083 	int ret;
4084 
4085 	if (iommu_prot & IOMMU_READ)
4086 		prot |= DMA_PTE_READ;
4087 	if (iommu_prot & IOMMU_WRITE)
4088 		prot |= DMA_PTE_WRITE;
4089 	if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4090 		prot |= DMA_PTE_SNP;
4091 
4092 	max_addr = iova + size;
4093 	if (dmar_domain->max_addr < max_addr) {
4094 		u64 end;
4095 
4096 		/* check if minimum agaw is sufficient for mapped address */
4097 		end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
4098 		if (end < max_addr) {
4099 			printk(KERN_ERR "%s: iommu width (%d) is not "
4100 			       "sufficient for the mapped address (%llx)\n",
4101 			       __func__, dmar_domain->gaw, max_addr);
4102 			return -EFAULT;
4103 		}
4104 		dmar_domain->max_addr = max_addr;
4105 	}
4106 	/* Round up size to next multiple of PAGE_SIZE, if it and
4107 	   the low bits of hpa would take us onto the next page */
4108 	size = aligned_nrpages(hpa, size);
4109 	ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4110 				 hpa >> VTD_PAGE_SHIFT, size, prot);
4111 	return ret;
4112 }
4113 
intel_iommu_unmap(struct iommu_domain * domain,unsigned long iova,size_t size)4114 static size_t intel_iommu_unmap(struct iommu_domain *domain,
4115 			     unsigned long iova, size_t size)
4116 {
4117 	struct dmar_domain *dmar_domain = domain->priv;
4118 	int order;
4119 
4120 	order = dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
4121 			    (iova + size - 1) >> VTD_PAGE_SHIFT);
4122 
4123 	if (dmar_domain->max_addr == iova + size)
4124 		dmar_domain->max_addr = iova;
4125 
4126 	return PAGE_SIZE << order;
4127 }
4128 
intel_iommu_iova_to_phys(struct iommu_domain * domain,dma_addr_t iova)4129 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
4130 					    dma_addr_t iova)
4131 {
4132 	struct dmar_domain *dmar_domain = domain->priv;
4133 	struct dma_pte *pte;
4134 	u64 phys = 0;
4135 
4136 	pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0);
4137 	if (pte)
4138 		phys = dma_pte_addr(pte);
4139 
4140 	return phys;
4141 }
4142 
intel_iommu_domain_has_cap(struct iommu_domain * domain,unsigned long cap)4143 static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
4144 				      unsigned long cap)
4145 {
4146 	struct dmar_domain *dmar_domain = domain->priv;
4147 
4148 	if (cap == IOMMU_CAP_CACHE_COHERENCY)
4149 		return dmar_domain->iommu_snooping;
4150 	if (cap == IOMMU_CAP_INTR_REMAP)
4151 		return irq_remapping_enabled;
4152 
4153 	return 0;
4154 }
4155 
4156 #define REQ_ACS_FLAGS	(PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF)
4157 
intel_iommu_add_device(struct device * dev)4158 static int intel_iommu_add_device(struct device *dev)
4159 {
4160 	struct pci_dev *pdev = to_pci_dev(dev);
4161 	struct pci_dev *bridge, *dma_pdev = NULL;
4162 	struct iommu_group *group;
4163 	int ret;
4164 
4165 	if (!device_to_iommu(pci_domain_nr(pdev->bus),
4166 			     pdev->bus->number, pdev->devfn))
4167 		return -ENODEV;
4168 
4169 	bridge = pci_find_upstream_pcie_bridge(pdev);
4170 	if (bridge) {
4171 		if (pci_is_pcie(bridge))
4172 			dma_pdev = pci_get_domain_bus_and_slot(
4173 						pci_domain_nr(pdev->bus),
4174 						bridge->subordinate->number, 0);
4175 		if (!dma_pdev)
4176 			dma_pdev = pci_dev_get(bridge);
4177 	} else
4178 		dma_pdev = pci_dev_get(pdev);
4179 
4180 	/* Account for quirked devices */
4181 	swap_pci_ref(&dma_pdev, pci_get_dma_source(dma_pdev));
4182 
4183 	/*
4184 	 * If it's a multifunction device that does not support our
4185 	 * required ACS flags, add to the same group as function 0.
4186 	 */
4187 	if (dma_pdev->multifunction &&
4188 	    !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS))
4189 		swap_pci_ref(&dma_pdev,
4190 			     pci_get_slot(dma_pdev->bus,
4191 					  PCI_DEVFN(PCI_SLOT(dma_pdev->devfn),
4192 					  0)));
4193 
4194 	/*
4195 	 * Devices on the root bus go through the iommu.  If that's not us,
4196 	 * find the next upstream device and test ACS up to the root bus.
4197 	 * Finding the next device may require skipping virtual buses.
4198 	 */
4199 	while (!pci_is_root_bus(dma_pdev->bus)) {
4200 		struct pci_bus *bus = dma_pdev->bus;
4201 
4202 		while (!bus->self) {
4203 			if (!pci_is_root_bus(bus))
4204 				bus = bus->parent;
4205 			else
4206 				goto root_bus;
4207 		}
4208 
4209 		if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS))
4210 			break;
4211 
4212 		swap_pci_ref(&dma_pdev, pci_dev_get(bus->self));
4213 	}
4214 
4215 root_bus:
4216 	group = iommu_group_get(&dma_pdev->dev);
4217 	pci_dev_put(dma_pdev);
4218 	if (!group) {
4219 		group = iommu_group_alloc();
4220 		if (IS_ERR(group))
4221 			return PTR_ERR(group);
4222 	}
4223 
4224 	ret = iommu_group_add_device(group, dev);
4225 
4226 	iommu_group_put(group);
4227 	return ret;
4228 }
4229 
intel_iommu_remove_device(struct device * dev)4230 static void intel_iommu_remove_device(struct device *dev)
4231 {
4232 	iommu_group_remove_device(dev);
4233 }
4234 
4235 static struct iommu_ops intel_iommu_ops = {
4236 	.domain_init	= intel_iommu_domain_init,
4237 	.domain_destroy = intel_iommu_domain_destroy,
4238 	.attach_dev	= intel_iommu_attach_device,
4239 	.detach_dev	= intel_iommu_detach_device,
4240 	.map		= intel_iommu_map,
4241 	.unmap		= intel_iommu_unmap,
4242 	.iova_to_phys	= intel_iommu_iova_to_phys,
4243 	.domain_has_cap = intel_iommu_domain_has_cap,
4244 	.add_device	= intel_iommu_add_device,
4245 	.remove_device	= intel_iommu_remove_device,
4246 	.pgsize_bitmap	= INTEL_IOMMU_PGSIZES,
4247 };
4248 
quirk_iommu_g4x_gfx(struct pci_dev * dev)4249 static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
4250 {
4251 	/* G4x/GM45 integrated gfx dmar support is totally busted. */
4252 	printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
4253 	dmar_map_gfx = 0;
4254 }
4255 
4256 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
4257 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
4258 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
4259 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
4260 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
4261 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
4262 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
4263 
quirk_iommu_rwbf(struct pci_dev * dev)4264 static void quirk_iommu_rwbf(struct pci_dev *dev)
4265 {
4266 	/*
4267 	 * Mobile 4 Series Chipset neglects to set RWBF capability,
4268 	 * but needs it. Same seems to hold for the desktop versions.
4269 	 */
4270 	printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
4271 	rwbf_quirk = 1;
4272 }
4273 
4274 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
4275 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
4276 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
4277 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
4278 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
4279 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
4280 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
4281 
4282 #define GGC 0x52
4283 #define GGC_MEMORY_SIZE_MASK	(0xf << 8)
4284 #define GGC_MEMORY_SIZE_NONE	(0x0 << 8)
4285 #define GGC_MEMORY_SIZE_1M	(0x1 << 8)
4286 #define GGC_MEMORY_SIZE_2M	(0x3 << 8)
4287 #define GGC_MEMORY_VT_ENABLED	(0x8 << 8)
4288 #define GGC_MEMORY_SIZE_2M_VT	(0x9 << 8)
4289 #define GGC_MEMORY_SIZE_3M_VT	(0xa << 8)
4290 #define GGC_MEMORY_SIZE_4M_VT	(0xb << 8)
4291 
quirk_calpella_no_shadow_gtt(struct pci_dev * dev)4292 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
4293 {
4294 	unsigned short ggc;
4295 
4296 	if (pci_read_config_word(dev, GGC, &ggc))
4297 		return;
4298 
4299 	if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
4300 		printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4301 		dmar_map_gfx = 0;
4302 	} else if (dmar_map_gfx) {
4303 		/* we have to ensure the gfx device is idle before we flush */
4304 		printk(KERN_INFO "DMAR: Disabling batched IOTLB flush on Ironlake\n");
4305 		intel_iommu_strict = 1;
4306        }
4307 }
4308 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4309 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
4310 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4311 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4312 
4313 /* On Tylersburg chipsets, some BIOSes have been known to enable the
4314    ISOCH DMAR unit for the Azalia sound device, but not give it any
4315    TLB entries, which causes it to deadlock. Check for that.  We do
4316    this in a function called from init_dmars(), instead of in a PCI
4317    quirk, because we don't want to print the obnoxious "BIOS broken"
4318    message if VT-d is actually disabled.
4319 */
check_tylersburg_isoch(void)4320 static void __init check_tylersburg_isoch(void)
4321 {
4322 	struct pci_dev *pdev;
4323 	uint32_t vtisochctrl;
4324 
4325 	/* If there's no Azalia in the system anyway, forget it. */
4326 	pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4327 	if (!pdev)
4328 		return;
4329 	pci_dev_put(pdev);
4330 
4331 	/* System Management Registers. Might be hidden, in which case
4332 	   we can't do the sanity check. But that's OK, because the
4333 	   known-broken BIOSes _don't_ actually hide it, so far. */
4334 	pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4335 	if (!pdev)
4336 		return;
4337 
4338 	if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4339 		pci_dev_put(pdev);
4340 		return;
4341 	}
4342 
4343 	pci_dev_put(pdev);
4344 
4345 	/* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4346 	if (vtisochctrl & 1)
4347 		return;
4348 
4349 	/* Drop all bits other than the number of TLB entries */
4350 	vtisochctrl &= 0x1c;
4351 
4352 	/* If we have the recommended number of TLB entries (16), fine. */
4353 	if (vtisochctrl == 0x10)
4354 		return;
4355 
4356 	/* Zero TLB entries? You get to ride the short bus to school. */
4357 	if (!vtisochctrl) {
4358 		WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4359 		     "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4360 		     dmi_get_system_info(DMI_BIOS_VENDOR),
4361 		     dmi_get_system_info(DMI_BIOS_VERSION),
4362 		     dmi_get_system_info(DMI_PRODUCT_VERSION));
4363 		iommu_identity_mapping |= IDENTMAP_AZALIA;
4364 		return;
4365 	}
4366 
4367 	printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4368 	       vtisochctrl);
4369 }
4370