• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2006-2008 Intel Corporation
18  * Author: Ashok Raj <ashok.raj@intel.com>
19  * Author: Shaohua Li <shaohua.li@intel.com>
20  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21  * Author: Fenghua Yu <fenghua.yu@intel.com>
22  */
23 
24 #include <linux/init.h>
25 #include <linux/bitmap.h>
26 #include <linux/debugfs.h>
27 #include <linux/slab.h>
28 #include <linux/irq.h>
29 #include <linux/interrupt.h>
30 #include <linux/spinlock.h>
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/dma-mapping.h>
34 #include <linux/mempool.h>
35 #include <linux/timer.h>
36 #include <linux/iova.h>
37 #include <linux/iommu.h>
38 #include <linux/intel-iommu.h>
39 #include <asm/cacheflush.h>
40 #include <asm/iommu.h>
41 #include "pci.h"
42 
43 #define ROOT_SIZE		VTD_PAGE_SIZE
44 #define CONTEXT_SIZE		VTD_PAGE_SIZE
45 
46 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
47 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
48 
49 #define IOAPIC_RANGE_START	(0xfee00000)
50 #define IOAPIC_RANGE_END	(0xfeefffff)
51 #define IOVA_START_ADDR		(0x1000)
52 
53 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
54 
55 #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
56 
57 #define IOVA_PFN(addr)		((addr) >> PAGE_SHIFT)
58 #define DMA_32BIT_PFN		IOVA_PFN(DMA_32BIT_MASK)
59 #define DMA_64BIT_PFN		IOVA_PFN(DMA_64BIT_MASK)
60 
61 /* global iommu list, set NULL for ignored DMAR units */
62 static struct intel_iommu **g_iommus;
63 
64 static int rwbf_quirk;
65 
66 /*
67  * 0: Present
68  * 1-11: Reserved
69  * 12-63: Context Ptr (12 - (haw-1))
70  * 64-127: Reserved
71  */
72 struct root_entry {
73 	u64	val;
74 	u64	rsvd1;
75 };
76 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
root_present(struct root_entry * root)77 static inline bool root_present(struct root_entry *root)
78 {
79 	return (root->val & 1);
80 }
set_root_present(struct root_entry * root)81 static inline void set_root_present(struct root_entry *root)
82 {
83 	root->val |= 1;
84 }
set_root_value(struct root_entry * root,unsigned long value)85 static inline void set_root_value(struct root_entry *root, unsigned long value)
86 {
87 	root->val |= value & VTD_PAGE_MASK;
88 }
89 
90 static inline struct context_entry *
get_context_addr_from_root(struct root_entry * root)91 get_context_addr_from_root(struct root_entry *root)
92 {
93 	return (struct context_entry *)
94 		(root_present(root)?phys_to_virt(
95 		root->val & VTD_PAGE_MASK) :
96 		NULL);
97 }
98 
99 /*
100  * low 64 bits:
101  * 0: present
102  * 1: fault processing disable
103  * 2-3: translation type
104  * 12-63: address space root
105  * high 64 bits:
106  * 0-2: address width
107  * 3-6: aval
108  * 8-23: domain id
109  */
110 struct context_entry {
111 	u64 lo;
112 	u64 hi;
113 };
114 
context_present(struct context_entry * context)115 static inline bool context_present(struct context_entry *context)
116 {
117 	return (context->lo & 1);
118 }
context_set_present(struct context_entry * context)119 static inline void context_set_present(struct context_entry *context)
120 {
121 	context->lo |= 1;
122 }
123 
context_set_fault_enable(struct context_entry * context)124 static inline void context_set_fault_enable(struct context_entry *context)
125 {
126 	context->lo &= (((u64)-1) << 2) | 1;
127 }
128 
129 #define CONTEXT_TT_MULTI_LEVEL 0
130 
context_set_translation_type(struct context_entry * context,unsigned long value)131 static inline void context_set_translation_type(struct context_entry *context,
132 						unsigned long value)
133 {
134 	context->lo &= (((u64)-1) << 4) | 3;
135 	context->lo |= (value & 3) << 2;
136 }
137 
context_set_address_root(struct context_entry * context,unsigned long value)138 static inline void context_set_address_root(struct context_entry *context,
139 					    unsigned long value)
140 {
141 	context->lo |= value & VTD_PAGE_MASK;
142 }
143 
context_set_address_width(struct context_entry * context,unsigned long value)144 static inline void context_set_address_width(struct context_entry *context,
145 					     unsigned long value)
146 {
147 	context->hi |= value & 7;
148 }
149 
context_set_domain_id(struct context_entry * context,unsigned long value)150 static inline void context_set_domain_id(struct context_entry *context,
151 					 unsigned long value)
152 {
153 	context->hi |= (value & ((1 << 16) - 1)) << 8;
154 }
155 
context_clear_entry(struct context_entry * context)156 static inline void context_clear_entry(struct context_entry *context)
157 {
158 	context->lo = 0;
159 	context->hi = 0;
160 }
161 
162 /*
163  * 0: readable
164  * 1: writable
165  * 2-6: reserved
166  * 7: super page
167  * 8-11: available
168  * 12-63: Host physcial address
169  */
170 struct dma_pte {
171 	u64 val;
172 };
173 
dma_clear_pte(struct dma_pte * pte)174 static inline void dma_clear_pte(struct dma_pte *pte)
175 {
176 	pte->val = 0;
177 }
178 
dma_set_pte_readable(struct dma_pte * pte)179 static inline void dma_set_pte_readable(struct dma_pte *pte)
180 {
181 	pte->val |= DMA_PTE_READ;
182 }
183 
dma_set_pte_writable(struct dma_pte * pte)184 static inline void dma_set_pte_writable(struct dma_pte *pte)
185 {
186 	pte->val |= DMA_PTE_WRITE;
187 }
188 
dma_set_pte_prot(struct dma_pte * pte,unsigned long prot)189 static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
190 {
191 	pte->val = (pte->val & ~3) | (prot & 3);
192 }
193 
dma_pte_addr(struct dma_pte * pte)194 static inline u64 dma_pte_addr(struct dma_pte *pte)
195 {
196 	return (pte->val & VTD_PAGE_MASK);
197 }
198 
dma_set_pte_addr(struct dma_pte * pte,u64 addr)199 static inline void dma_set_pte_addr(struct dma_pte *pte, u64 addr)
200 {
201 	pte->val |= (addr & VTD_PAGE_MASK);
202 }
203 
dma_pte_present(struct dma_pte * pte)204 static inline bool dma_pte_present(struct dma_pte *pte)
205 {
206 	return (pte->val & 3) != 0;
207 }
208 
209 /* devices under the same p2p bridge are owned in one domain */
210 #define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
211 
212 /* domain represents a virtual machine, more than one devices
213  * across iommus may be owned in one domain, e.g. kvm guest.
214  */
215 #define DOMAIN_FLAG_VIRTUAL_MACHINE	(1 << 1)
216 
217 struct dmar_domain {
218 	int	id;			/* domain id */
219 	unsigned long iommu_bmp;	/* bitmap of iommus this domain uses*/
220 
221 	struct list_head devices; 	/* all devices' list */
222 	struct iova_domain iovad;	/* iova's that belong to this domain */
223 
224 	struct dma_pte	*pgd;		/* virtual address */
225 	spinlock_t	mapping_lock;	/* page table lock */
226 	int		gaw;		/* max guest address width */
227 
228 	/* adjusted guest address width, 0 is level 2 30-bit */
229 	int		agaw;
230 
231 	int		flags;		/* flags to find out type of domain */
232 
233 	int		iommu_coherency;/* indicate coherency of iommu access */
234 	int		iommu_count;	/* reference count of iommu */
235 	spinlock_t	iommu_lock;	/* protect iommu set in domain */
236 	u64		max_addr;	/* maximum mapped address */
237 };
238 
239 /* PCI domain-device relationship */
240 struct device_domain_info {
241 	struct list_head link;	/* link to domain siblings */
242 	struct list_head global; /* link to global list */
243 	u8 bus;			/* PCI bus numer */
244 	u8 devfn;		/* PCI devfn number */
245 	struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
246 	struct dmar_domain *domain; /* pointer to domain */
247 };
248 
249 static void flush_unmaps_timeout(unsigned long data);
250 
251 DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
252 
253 #define HIGH_WATER_MARK 250
254 struct deferred_flush_tables {
255 	int next;
256 	struct iova *iova[HIGH_WATER_MARK];
257 	struct dmar_domain *domain[HIGH_WATER_MARK];
258 };
259 
260 static struct deferred_flush_tables *deferred_flush;
261 
262 /* bitmap for indexing intel_iommus */
263 static int g_num_of_iommus;
264 
265 static DEFINE_SPINLOCK(async_umap_flush_lock);
266 static LIST_HEAD(unmaps_to_do);
267 
268 static int timer_on;
269 static long list_size;
270 
271 static void domain_remove_dev_info(struct dmar_domain *domain);
272 
273 #ifdef CONFIG_DMAR_DEFAULT_ON
274 int dmar_disabled = 0;
275 #else
276 int dmar_disabled = 1;
277 #endif /*CONFIG_DMAR_DEFAULT_ON*/
278 
279 static int __initdata dmar_map_gfx = 1;
280 static int dmar_forcedac;
281 static int intel_iommu_strict;
282 
283 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
284 static DEFINE_SPINLOCK(device_domain_lock);
285 static LIST_HEAD(device_domain_list);
286 
287 static struct iommu_ops intel_iommu_ops;
288 
intel_iommu_setup(char * str)289 static int __init intel_iommu_setup(char *str)
290 {
291 	if (!str)
292 		return -EINVAL;
293 	while (*str) {
294 		if (!strncmp(str, "on", 2)) {
295 			dmar_disabled = 0;
296 			printk(KERN_INFO "Intel-IOMMU: enabled\n");
297 		} else if (!strncmp(str, "off", 3)) {
298 			dmar_disabled = 1;
299 			printk(KERN_INFO "Intel-IOMMU: disabled\n");
300 		} else if (!strncmp(str, "igfx_off", 8)) {
301 			dmar_map_gfx = 0;
302 			printk(KERN_INFO
303 				"Intel-IOMMU: disable GFX device mapping\n");
304 		} else if (!strncmp(str, "forcedac", 8)) {
305 			printk(KERN_INFO
306 				"Intel-IOMMU: Forcing DAC for PCI devices\n");
307 			dmar_forcedac = 1;
308 		} else if (!strncmp(str, "strict", 6)) {
309 			printk(KERN_INFO
310 				"Intel-IOMMU: disable batched IOTLB flush\n");
311 			intel_iommu_strict = 1;
312 		}
313 
314 		str += strcspn(str, ",");
315 		while (*str == ',')
316 			str++;
317 	}
318 	return 0;
319 }
320 __setup("intel_iommu=", intel_iommu_setup);
321 
322 static struct kmem_cache *iommu_domain_cache;
323 static struct kmem_cache *iommu_devinfo_cache;
324 static struct kmem_cache *iommu_iova_cache;
325 
iommu_kmem_cache_alloc(struct kmem_cache * cachep)326 static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
327 {
328 	unsigned int flags;
329 	void *vaddr;
330 
331 	/* trying to avoid low memory issues */
332 	flags = current->flags & PF_MEMALLOC;
333 	current->flags |= PF_MEMALLOC;
334 	vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
335 	current->flags &= (~PF_MEMALLOC | flags);
336 	return vaddr;
337 }
338 
339 
alloc_pgtable_page(void)340 static inline void *alloc_pgtable_page(void)
341 {
342 	unsigned int flags;
343 	void *vaddr;
344 
345 	/* trying to avoid low memory issues */
346 	flags = current->flags & PF_MEMALLOC;
347 	current->flags |= PF_MEMALLOC;
348 	vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
349 	current->flags &= (~PF_MEMALLOC | flags);
350 	return vaddr;
351 }
352 
free_pgtable_page(void * vaddr)353 static inline void free_pgtable_page(void *vaddr)
354 {
355 	free_page((unsigned long)vaddr);
356 }
357 
alloc_domain_mem(void)358 static inline void *alloc_domain_mem(void)
359 {
360 	return iommu_kmem_cache_alloc(iommu_domain_cache);
361 }
362 
free_domain_mem(void * vaddr)363 static void free_domain_mem(void *vaddr)
364 {
365 	kmem_cache_free(iommu_domain_cache, vaddr);
366 }
367 
alloc_devinfo_mem(void)368 static inline void * alloc_devinfo_mem(void)
369 {
370 	return iommu_kmem_cache_alloc(iommu_devinfo_cache);
371 }
372 
free_devinfo_mem(void * vaddr)373 static inline void free_devinfo_mem(void *vaddr)
374 {
375 	kmem_cache_free(iommu_devinfo_cache, vaddr);
376 }
377 
alloc_iova_mem(void)378 struct iova *alloc_iova_mem(void)
379 {
380 	return iommu_kmem_cache_alloc(iommu_iova_cache);
381 }
382 
free_iova_mem(struct iova * iova)383 void free_iova_mem(struct iova *iova)
384 {
385 	kmem_cache_free(iommu_iova_cache, iova);
386 }
387 
388 
389 static inline int width_to_agaw(int width);
390 
391 /* calculate agaw for each iommu.
392  * "SAGAW" may be different across iommus, use a default agaw, and
393  * get a supported less agaw for iommus that don't support the default agaw.
394  */
iommu_calculate_agaw(struct intel_iommu * iommu)395 int iommu_calculate_agaw(struct intel_iommu *iommu)
396 {
397 	unsigned long sagaw;
398 	int agaw = -1;
399 
400 	sagaw = cap_sagaw(iommu->cap);
401 	for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
402 	     agaw >= 0; agaw--) {
403 		if (test_bit(agaw, &sagaw))
404 			break;
405 	}
406 
407 	return agaw;
408 }
409 
410 /* in native case, each domain is related to only one iommu */
domain_get_iommu(struct dmar_domain * domain)411 static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
412 {
413 	int iommu_id;
414 
415 	BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
416 
417 	iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
418 	if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
419 		return NULL;
420 
421 	return g_iommus[iommu_id];
422 }
423 
424 /* "Coherency" capability may be different across iommus */
domain_update_iommu_coherency(struct dmar_domain * domain)425 static void domain_update_iommu_coherency(struct dmar_domain *domain)
426 {
427 	int i;
428 
429 	domain->iommu_coherency = 1;
430 
431 	i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
432 	for (; i < g_num_of_iommus; ) {
433 		if (!ecap_coherent(g_iommus[i]->ecap)) {
434 			domain->iommu_coherency = 0;
435 			break;
436 		}
437 		i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
438 	}
439 }
440 
device_to_iommu(u8 bus,u8 devfn)441 static struct intel_iommu *device_to_iommu(u8 bus, u8 devfn)
442 {
443 	struct dmar_drhd_unit *drhd = NULL;
444 	int i;
445 
446 	for_each_drhd_unit(drhd) {
447 		if (drhd->ignored)
448 			continue;
449 
450 		for (i = 0; i < drhd->devices_cnt; i++)
451 			if (drhd->devices[i] &&
452 			    drhd->devices[i]->bus->number == bus &&
453 			    drhd->devices[i]->devfn == devfn)
454 				return drhd->iommu;
455 
456 		if (drhd->include_all)
457 			return drhd->iommu;
458 	}
459 
460 	return NULL;
461 }
462 
domain_flush_cache(struct dmar_domain * domain,void * addr,int size)463 static void domain_flush_cache(struct dmar_domain *domain,
464 			       void *addr, int size)
465 {
466 	if (!domain->iommu_coherency)
467 		clflush_cache_range(addr, size);
468 }
469 
470 /* Gets context entry for a given bus and devfn */
device_to_context_entry(struct intel_iommu * iommu,u8 bus,u8 devfn)471 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
472 		u8 bus, u8 devfn)
473 {
474 	struct root_entry *root;
475 	struct context_entry *context;
476 	unsigned long phy_addr;
477 	unsigned long flags;
478 
479 	spin_lock_irqsave(&iommu->lock, flags);
480 	root = &iommu->root_entry[bus];
481 	context = get_context_addr_from_root(root);
482 	if (!context) {
483 		context = (struct context_entry *)alloc_pgtable_page();
484 		if (!context) {
485 			spin_unlock_irqrestore(&iommu->lock, flags);
486 			return NULL;
487 		}
488 		__iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
489 		phy_addr = virt_to_phys((void *)context);
490 		set_root_value(root, phy_addr);
491 		set_root_present(root);
492 		__iommu_flush_cache(iommu, root, sizeof(*root));
493 	}
494 	spin_unlock_irqrestore(&iommu->lock, flags);
495 	return &context[devfn];
496 }
497 
device_context_mapped(struct intel_iommu * iommu,u8 bus,u8 devfn)498 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
499 {
500 	struct root_entry *root;
501 	struct context_entry *context;
502 	int ret;
503 	unsigned long flags;
504 
505 	spin_lock_irqsave(&iommu->lock, flags);
506 	root = &iommu->root_entry[bus];
507 	context = get_context_addr_from_root(root);
508 	if (!context) {
509 		ret = 0;
510 		goto out;
511 	}
512 	ret = context_present(&context[devfn]);
513 out:
514 	spin_unlock_irqrestore(&iommu->lock, flags);
515 	return ret;
516 }
517 
clear_context_table(struct intel_iommu * iommu,u8 bus,u8 devfn)518 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
519 {
520 	struct root_entry *root;
521 	struct context_entry *context;
522 	unsigned long flags;
523 
524 	spin_lock_irqsave(&iommu->lock, flags);
525 	root = &iommu->root_entry[bus];
526 	context = get_context_addr_from_root(root);
527 	if (context) {
528 		context_clear_entry(&context[devfn]);
529 		__iommu_flush_cache(iommu, &context[devfn], \
530 			sizeof(*context));
531 	}
532 	spin_unlock_irqrestore(&iommu->lock, flags);
533 }
534 
free_context_table(struct intel_iommu * iommu)535 static void free_context_table(struct intel_iommu *iommu)
536 {
537 	struct root_entry *root;
538 	int i;
539 	unsigned long flags;
540 	struct context_entry *context;
541 
542 	spin_lock_irqsave(&iommu->lock, flags);
543 	if (!iommu->root_entry) {
544 		goto out;
545 	}
546 	for (i = 0; i < ROOT_ENTRY_NR; i++) {
547 		root = &iommu->root_entry[i];
548 		context = get_context_addr_from_root(root);
549 		if (context)
550 			free_pgtable_page(context);
551 	}
552 	free_pgtable_page(iommu->root_entry);
553 	iommu->root_entry = NULL;
554 out:
555 	spin_unlock_irqrestore(&iommu->lock, flags);
556 }
557 
558 /* page table handling */
559 #define LEVEL_STRIDE		(9)
560 #define LEVEL_MASK		(((u64)1 << LEVEL_STRIDE) - 1)
561 
agaw_to_level(int agaw)562 static inline int agaw_to_level(int agaw)
563 {
564 	return agaw + 2;
565 }
566 
agaw_to_width(int agaw)567 static inline int agaw_to_width(int agaw)
568 {
569 	return 30 + agaw * LEVEL_STRIDE;
570 
571 }
572 
width_to_agaw(int width)573 static inline int width_to_agaw(int width)
574 {
575 	return (width - 30) / LEVEL_STRIDE;
576 }
577 
level_to_offset_bits(int level)578 static inline unsigned int level_to_offset_bits(int level)
579 {
580 	return (12 + (level - 1) * LEVEL_STRIDE);
581 }
582 
address_level_offset(u64 addr,int level)583 static inline int address_level_offset(u64 addr, int level)
584 {
585 	return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
586 }
587 
level_mask(int level)588 static inline u64 level_mask(int level)
589 {
590 	return ((u64)-1 << level_to_offset_bits(level));
591 }
592 
level_size(int level)593 static inline u64 level_size(int level)
594 {
595 	return ((u64)1 << level_to_offset_bits(level));
596 }
597 
align_to_level(u64 addr,int level)598 static inline u64 align_to_level(u64 addr, int level)
599 {
600 	return ((addr + level_size(level) - 1) & level_mask(level));
601 }
602 
addr_to_dma_pte(struct dmar_domain * domain,u64 addr)603 static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
604 {
605 	int addr_width = agaw_to_width(domain->agaw);
606 	struct dma_pte *parent, *pte = NULL;
607 	int level = agaw_to_level(domain->agaw);
608 	int offset;
609 	unsigned long flags;
610 
611 	BUG_ON(!domain->pgd);
612 
613 	addr &= (((u64)1) << addr_width) - 1;
614 	parent = domain->pgd;
615 
616 	spin_lock_irqsave(&domain->mapping_lock, flags);
617 	while (level > 0) {
618 		void *tmp_page;
619 
620 		offset = address_level_offset(addr, level);
621 		pte = &parent[offset];
622 		if (level == 1)
623 			break;
624 
625 		if (!dma_pte_present(pte)) {
626 			tmp_page = alloc_pgtable_page();
627 
628 			if (!tmp_page) {
629 				spin_unlock_irqrestore(&domain->mapping_lock,
630 					flags);
631 				return NULL;
632 			}
633 			domain_flush_cache(domain, tmp_page, PAGE_SIZE);
634 			dma_set_pte_addr(pte, virt_to_phys(tmp_page));
635 			/*
636 			 * high level table always sets r/w, last level page
637 			 * table control read/write
638 			 */
639 			dma_set_pte_readable(pte);
640 			dma_set_pte_writable(pte);
641 			domain_flush_cache(domain, pte, sizeof(*pte));
642 		}
643 		parent = phys_to_virt(dma_pte_addr(pte));
644 		level--;
645 	}
646 
647 	spin_unlock_irqrestore(&domain->mapping_lock, flags);
648 	return pte;
649 }
650 
651 /* return address's pte at specific level */
dma_addr_level_pte(struct dmar_domain * domain,u64 addr,int level)652 static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
653 		int level)
654 {
655 	struct dma_pte *parent, *pte = NULL;
656 	int total = agaw_to_level(domain->agaw);
657 	int offset;
658 
659 	parent = domain->pgd;
660 	while (level <= total) {
661 		offset = address_level_offset(addr, total);
662 		pte = &parent[offset];
663 		if (level == total)
664 			return pte;
665 
666 		if (!dma_pte_present(pte))
667 			break;
668 		parent = phys_to_virt(dma_pte_addr(pte));
669 		total--;
670 	}
671 	return NULL;
672 }
673 
674 /* clear one page's page table */
dma_pte_clear_one(struct dmar_domain * domain,u64 addr)675 static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
676 {
677 	struct dma_pte *pte = NULL;
678 
679 	/* get last level pte */
680 	pte = dma_addr_level_pte(domain, addr, 1);
681 
682 	if (pte) {
683 		dma_clear_pte(pte);
684 		domain_flush_cache(domain, pte, sizeof(*pte));
685 	}
686 }
687 
688 /* clear last level pte, a tlb flush should be followed */
dma_pte_clear_range(struct dmar_domain * domain,u64 start,u64 end)689 static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
690 {
691 	int addr_width = agaw_to_width(domain->agaw);
692 
693 	start &= (((u64)1) << addr_width) - 1;
694 	end &= (((u64)1) << addr_width) - 1;
695 	/* in case it's partial page */
696 	start = PAGE_ALIGN(start);
697 	end &= PAGE_MASK;
698 
699 	/* we don't need lock here, nobody else touches the iova range */
700 	while (start < end) {
701 		dma_pte_clear_one(domain, start);
702 		start += VTD_PAGE_SIZE;
703 	}
704 }
705 
706 /* free page table pages. last level pte should already be cleared */
dma_pte_free_pagetable(struct dmar_domain * domain,u64 start,u64 end)707 static void dma_pte_free_pagetable(struct dmar_domain *domain,
708 	u64 start, u64 end)
709 {
710 	int addr_width = agaw_to_width(domain->agaw);
711 	struct dma_pte *pte;
712 	int total = agaw_to_level(domain->agaw);
713 	int level;
714 	u64 tmp;
715 
716 	start &= (((u64)1) << addr_width) - 1;
717 	end &= (((u64)1) << addr_width) - 1;
718 
719 	/* we don't need lock here, nobody else touches the iova range */
720 	level = 2;
721 	while (level <= total) {
722 		tmp = align_to_level(start, level);
723 		if (tmp >= end || (tmp + level_size(level) > end))
724 			return;
725 
726 		while (tmp < end) {
727 			pte = dma_addr_level_pte(domain, tmp, level);
728 			if (pte) {
729 				free_pgtable_page(
730 					phys_to_virt(dma_pte_addr(pte)));
731 				dma_clear_pte(pte);
732 				domain_flush_cache(domain, pte, sizeof(*pte));
733 			}
734 			tmp += level_size(level);
735 		}
736 		level++;
737 	}
738 	/* free pgd */
739 	if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
740 		free_pgtable_page(domain->pgd);
741 		domain->pgd = NULL;
742 	}
743 }
744 
745 /* iommu handling */
iommu_alloc_root_entry(struct intel_iommu * iommu)746 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
747 {
748 	struct root_entry *root;
749 	unsigned long flags;
750 
751 	root = (struct root_entry *)alloc_pgtable_page();
752 	if (!root)
753 		return -ENOMEM;
754 
755 	__iommu_flush_cache(iommu, root, ROOT_SIZE);
756 
757 	spin_lock_irqsave(&iommu->lock, flags);
758 	iommu->root_entry = root;
759 	spin_unlock_irqrestore(&iommu->lock, flags);
760 
761 	return 0;
762 }
763 
iommu_set_root_entry(struct intel_iommu * iommu)764 static void iommu_set_root_entry(struct intel_iommu *iommu)
765 {
766 	void *addr;
767 	u32 cmd, sts;
768 	unsigned long flag;
769 
770 	addr = iommu->root_entry;
771 
772 	spin_lock_irqsave(&iommu->register_lock, flag);
773 	dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
774 
775 	cmd = iommu->gcmd | DMA_GCMD_SRTP;
776 	writel(cmd, iommu->reg + DMAR_GCMD_REG);
777 
778 	/* Make sure hardware complete it */
779 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
780 		readl, (sts & DMA_GSTS_RTPS), sts);
781 
782 	spin_unlock_irqrestore(&iommu->register_lock, flag);
783 }
784 
iommu_flush_write_buffer(struct intel_iommu * iommu)785 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
786 {
787 	u32 val;
788 	unsigned long flag;
789 
790 	if (!rwbf_quirk && !cap_rwbf(iommu->cap))
791 		return;
792 	val = iommu->gcmd | DMA_GCMD_WBF;
793 
794 	spin_lock_irqsave(&iommu->register_lock, flag);
795 	writel(val, iommu->reg + DMAR_GCMD_REG);
796 
797 	/* Make sure hardware complete it */
798 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
799 			readl, (!(val & DMA_GSTS_WBFS)), val);
800 
801 	spin_unlock_irqrestore(&iommu->register_lock, flag);
802 }
803 
804 /* return value determine if we need a write buffer flush */
__iommu_flush_context(struct intel_iommu * iommu,u16 did,u16 source_id,u8 function_mask,u64 type,int non_present_entry_flush)805 static int __iommu_flush_context(struct intel_iommu *iommu,
806 	u16 did, u16 source_id, u8 function_mask, u64 type,
807 	int non_present_entry_flush)
808 {
809 	u64 val = 0;
810 	unsigned long flag;
811 
812 	/*
813 	 * In the non-present entry flush case, if hardware doesn't cache
814 	 * non-present entry we do nothing and if hardware cache non-present
815 	 * entry, we flush entries of domain 0 (the domain id is used to cache
816 	 * any non-present entries)
817 	 */
818 	if (non_present_entry_flush) {
819 		if (!cap_caching_mode(iommu->cap))
820 			return 1;
821 		else
822 			did = 0;
823 	}
824 
825 	switch (type) {
826 	case DMA_CCMD_GLOBAL_INVL:
827 		val = DMA_CCMD_GLOBAL_INVL;
828 		break;
829 	case DMA_CCMD_DOMAIN_INVL:
830 		val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
831 		break;
832 	case DMA_CCMD_DEVICE_INVL:
833 		val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
834 			| DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
835 		break;
836 	default:
837 		BUG();
838 	}
839 	val |= DMA_CCMD_ICC;
840 
841 	spin_lock_irqsave(&iommu->register_lock, flag);
842 	dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
843 
844 	/* Make sure hardware complete it */
845 	IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
846 		dmar_readq, (!(val & DMA_CCMD_ICC)), val);
847 
848 	spin_unlock_irqrestore(&iommu->register_lock, flag);
849 
850 	/* flush context entry will implicitly flush write buffer */
851 	return 0;
852 }
853 
854 /* return value determine if we need a write buffer flush */
__iommu_flush_iotlb(struct intel_iommu * iommu,u16 did,u64 addr,unsigned int size_order,u64 type,int non_present_entry_flush)855 static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
856 	u64 addr, unsigned int size_order, u64 type,
857 	int non_present_entry_flush)
858 {
859 	int tlb_offset = ecap_iotlb_offset(iommu->ecap);
860 	u64 val = 0, val_iva = 0;
861 	unsigned long flag;
862 
863 	/*
864 	 * In the non-present entry flush case, if hardware doesn't cache
865 	 * non-present entry we do nothing and if hardware cache non-present
866 	 * entry, we flush entries of domain 0 (the domain id is used to cache
867 	 * any non-present entries)
868 	 */
869 	if (non_present_entry_flush) {
870 		if (!cap_caching_mode(iommu->cap))
871 			return 1;
872 		else
873 			did = 0;
874 	}
875 
876 	switch (type) {
877 	case DMA_TLB_GLOBAL_FLUSH:
878 		/* global flush doesn't need set IVA_REG */
879 		val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
880 		break;
881 	case DMA_TLB_DSI_FLUSH:
882 		val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
883 		break;
884 	case DMA_TLB_PSI_FLUSH:
885 		val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
886 		/* Note: always flush non-leaf currently */
887 		val_iva = size_order | addr;
888 		break;
889 	default:
890 		BUG();
891 	}
892 	/* Note: set drain read/write */
893 #if 0
894 	/*
895 	 * This is probably to be super secure.. Looks like we can
896 	 * ignore it without any impact.
897 	 */
898 	if (cap_read_drain(iommu->cap))
899 		val |= DMA_TLB_READ_DRAIN;
900 #endif
901 	if (cap_write_drain(iommu->cap))
902 		val |= DMA_TLB_WRITE_DRAIN;
903 
904 	spin_lock_irqsave(&iommu->register_lock, flag);
905 	/* Note: Only uses first TLB reg currently */
906 	if (val_iva)
907 		dmar_writeq(iommu->reg + tlb_offset, val_iva);
908 	dmar_writeq(iommu->reg + tlb_offset + 8, val);
909 
910 	/* Make sure hardware complete it */
911 	IOMMU_WAIT_OP(iommu, tlb_offset + 8,
912 		dmar_readq, (!(val & DMA_TLB_IVT)), val);
913 
914 	spin_unlock_irqrestore(&iommu->register_lock, flag);
915 
916 	/* check IOTLB invalidation granularity */
917 	if (DMA_TLB_IAIG(val) == 0)
918 		printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
919 	if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
920 		pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
921 			(unsigned long long)DMA_TLB_IIRG(type),
922 			(unsigned long long)DMA_TLB_IAIG(val));
923 	/* flush iotlb entry will implicitly flush write buffer */
924 	return 0;
925 }
926 
iommu_flush_iotlb_psi(struct intel_iommu * iommu,u16 did,u64 addr,unsigned int pages,int non_present_entry_flush)927 static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
928 	u64 addr, unsigned int pages, int non_present_entry_flush)
929 {
930 	unsigned int mask;
931 
932 	BUG_ON(addr & (~VTD_PAGE_MASK));
933 	BUG_ON(pages == 0);
934 
935 	/* Fallback to domain selective flush if no PSI support */
936 	if (!cap_pgsel_inv(iommu->cap))
937 		return iommu->flush.flush_iotlb(iommu, did, 0, 0,
938 						DMA_TLB_DSI_FLUSH,
939 						non_present_entry_flush);
940 
941 	/*
942 	 * PSI requires page size to be 2 ^ x, and the base address is naturally
943 	 * aligned to the size
944 	 */
945 	mask = ilog2(__roundup_pow_of_two(pages));
946 	/* Fallback to domain selective flush if size is too big */
947 	if (mask > cap_max_amask_val(iommu->cap))
948 		return iommu->flush.flush_iotlb(iommu, did, 0, 0,
949 			DMA_TLB_DSI_FLUSH, non_present_entry_flush);
950 
951 	return iommu->flush.flush_iotlb(iommu, did, addr, mask,
952 					DMA_TLB_PSI_FLUSH,
953 					non_present_entry_flush);
954 }
955 
iommu_disable_protect_mem_regions(struct intel_iommu * iommu)956 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
957 {
958 	u32 pmen;
959 	unsigned long flags;
960 
961 	spin_lock_irqsave(&iommu->register_lock, flags);
962 	pmen = readl(iommu->reg + DMAR_PMEN_REG);
963 	pmen &= ~DMA_PMEN_EPM;
964 	writel(pmen, iommu->reg + DMAR_PMEN_REG);
965 
966 	/* wait for the protected region status bit to clear */
967 	IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
968 		readl, !(pmen & DMA_PMEN_PRS), pmen);
969 
970 	spin_unlock_irqrestore(&iommu->register_lock, flags);
971 }
972 
iommu_enable_translation(struct intel_iommu * iommu)973 static int iommu_enable_translation(struct intel_iommu *iommu)
974 {
975 	u32 sts;
976 	unsigned long flags;
977 
978 	spin_lock_irqsave(&iommu->register_lock, flags);
979 	writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
980 
981 	/* Make sure hardware complete it */
982 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
983 		readl, (sts & DMA_GSTS_TES), sts);
984 
985 	iommu->gcmd |= DMA_GCMD_TE;
986 	spin_unlock_irqrestore(&iommu->register_lock, flags);
987 	return 0;
988 }
989 
iommu_disable_translation(struct intel_iommu * iommu)990 static int iommu_disable_translation(struct intel_iommu *iommu)
991 {
992 	u32 sts;
993 	unsigned long flag;
994 
995 	spin_lock_irqsave(&iommu->register_lock, flag);
996 	iommu->gcmd &= ~DMA_GCMD_TE;
997 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
998 
999 	/* Make sure hardware complete it */
1000 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1001 		readl, (!(sts & DMA_GSTS_TES)), sts);
1002 
1003 	spin_unlock_irqrestore(&iommu->register_lock, flag);
1004 	return 0;
1005 }
1006 
1007 /* iommu interrupt handling. Most stuff are MSI-like. */
1008 
1009 static const char *fault_reason_strings[] =
1010 {
1011 	"Software",
1012 	"Present bit in root entry is clear",
1013 	"Present bit in context entry is clear",
1014 	"Invalid context entry",
1015 	"Access beyond MGAW",
1016 	"PTE Write access is not set",
1017 	"PTE Read access is not set",
1018 	"Next page table ptr is invalid",
1019 	"Root table address invalid",
1020 	"Context table ptr is invalid",
1021 	"non-zero reserved fields in RTP",
1022 	"non-zero reserved fields in CTP",
1023 	"non-zero reserved fields in PTE",
1024 };
1025 #define MAX_FAULT_REASON_IDX 	(ARRAY_SIZE(fault_reason_strings) - 1)
1026 
dmar_get_fault_reason(u8 fault_reason)1027 const char *dmar_get_fault_reason(u8 fault_reason)
1028 {
1029 	if (fault_reason > MAX_FAULT_REASON_IDX)
1030 		return "Unknown";
1031 	else
1032 		return fault_reason_strings[fault_reason];
1033 }
1034 
dmar_msi_unmask(unsigned int irq)1035 void dmar_msi_unmask(unsigned int irq)
1036 {
1037 	struct intel_iommu *iommu = get_irq_data(irq);
1038 	unsigned long flag;
1039 
1040 	/* unmask it */
1041 	spin_lock_irqsave(&iommu->register_lock, flag);
1042 	writel(0, iommu->reg + DMAR_FECTL_REG);
1043 	/* Read a reg to force flush the post write */
1044 	readl(iommu->reg + DMAR_FECTL_REG);
1045 	spin_unlock_irqrestore(&iommu->register_lock, flag);
1046 }
1047 
dmar_msi_mask(unsigned int irq)1048 void dmar_msi_mask(unsigned int irq)
1049 {
1050 	unsigned long flag;
1051 	struct intel_iommu *iommu = get_irq_data(irq);
1052 
1053 	/* mask it */
1054 	spin_lock_irqsave(&iommu->register_lock, flag);
1055 	writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1056 	/* Read a reg to force flush the post write */
1057 	readl(iommu->reg + DMAR_FECTL_REG);
1058 	spin_unlock_irqrestore(&iommu->register_lock, flag);
1059 }
1060 
dmar_msi_write(int irq,struct msi_msg * msg)1061 void dmar_msi_write(int irq, struct msi_msg *msg)
1062 {
1063 	struct intel_iommu *iommu = get_irq_data(irq);
1064 	unsigned long flag;
1065 
1066 	spin_lock_irqsave(&iommu->register_lock, flag);
1067 	writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1068 	writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1069 	writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1070 	spin_unlock_irqrestore(&iommu->register_lock, flag);
1071 }
1072 
dmar_msi_read(int irq,struct msi_msg * msg)1073 void dmar_msi_read(int irq, struct msi_msg *msg)
1074 {
1075 	struct intel_iommu *iommu = get_irq_data(irq);
1076 	unsigned long flag;
1077 
1078 	spin_lock_irqsave(&iommu->register_lock, flag);
1079 	msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1080 	msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1081 	msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1082 	spin_unlock_irqrestore(&iommu->register_lock, flag);
1083 }
1084 
iommu_page_fault_do_one(struct intel_iommu * iommu,int type,u8 fault_reason,u16 source_id,unsigned long long addr)1085 static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
1086 		u8 fault_reason, u16 source_id, unsigned long long addr)
1087 {
1088 	const char *reason;
1089 
1090 	reason = dmar_get_fault_reason(fault_reason);
1091 
1092 	printk(KERN_ERR
1093 		"DMAR:[%s] Request device [%02x:%02x.%d] "
1094 		"fault addr %llx \n"
1095 		"DMAR:[fault reason %02d] %s\n",
1096 		(type ? "DMA Read" : "DMA Write"),
1097 		(source_id >> 8), PCI_SLOT(source_id & 0xFF),
1098 		PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1099 	return 0;
1100 }
1101 
1102 #define PRIMARY_FAULT_REG_LEN (16)
iommu_page_fault(int irq,void * dev_id)1103 static irqreturn_t iommu_page_fault(int irq, void *dev_id)
1104 {
1105 	struct intel_iommu *iommu = dev_id;
1106 	int reg, fault_index;
1107 	u32 fault_status;
1108 	unsigned long flag;
1109 
1110 	spin_lock_irqsave(&iommu->register_lock, flag);
1111 	fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1112 
1113 	/* TBD: ignore advanced fault log currently */
1114 	if (!(fault_status & DMA_FSTS_PPF))
1115 		goto clear_overflow;
1116 
1117 	fault_index = dma_fsts_fault_record_index(fault_status);
1118 	reg = cap_fault_reg_offset(iommu->cap);
1119 	while (1) {
1120 		u8 fault_reason;
1121 		u16 source_id;
1122 		u64 guest_addr;
1123 		int type;
1124 		u32 data;
1125 
1126 		/* highest 32 bits */
1127 		data = readl(iommu->reg + reg +
1128 				fault_index * PRIMARY_FAULT_REG_LEN + 12);
1129 		if (!(data & DMA_FRCD_F))
1130 			break;
1131 
1132 		fault_reason = dma_frcd_fault_reason(data);
1133 		type = dma_frcd_type(data);
1134 
1135 		data = readl(iommu->reg + reg +
1136 				fault_index * PRIMARY_FAULT_REG_LEN + 8);
1137 		source_id = dma_frcd_source_id(data);
1138 
1139 		guest_addr = dmar_readq(iommu->reg + reg +
1140 				fault_index * PRIMARY_FAULT_REG_LEN);
1141 		guest_addr = dma_frcd_page_addr(guest_addr);
1142 		/* clear the fault */
1143 		writel(DMA_FRCD_F, iommu->reg + reg +
1144 			fault_index * PRIMARY_FAULT_REG_LEN + 12);
1145 
1146 		spin_unlock_irqrestore(&iommu->register_lock, flag);
1147 
1148 		iommu_page_fault_do_one(iommu, type, fault_reason,
1149 				source_id, guest_addr);
1150 
1151 		fault_index++;
1152 		if (fault_index > cap_num_fault_regs(iommu->cap))
1153 			fault_index = 0;
1154 		spin_lock_irqsave(&iommu->register_lock, flag);
1155 	}
1156 clear_overflow:
1157 	/* clear primary fault overflow */
1158 	fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1159 	if (fault_status & DMA_FSTS_PFO)
1160 		writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
1161 
1162 	spin_unlock_irqrestore(&iommu->register_lock, flag);
1163 	return IRQ_HANDLED;
1164 }
1165 
dmar_set_interrupt(struct intel_iommu * iommu)1166 int dmar_set_interrupt(struct intel_iommu *iommu)
1167 {
1168 	int irq, ret;
1169 
1170 	irq = create_irq();
1171 	if (!irq) {
1172 		printk(KERN_ERR "IOMMU: no free vectors\n");
1173 		return -EINVAL;
1174 	}
1175 
1176 	set_irq_data(irq, iommu);
1177 	iommu->irq = irq;
1178 
1179 	ret = arch_setup_dmar_msi(irq);
1180 	if (ret) {
1181 		set_irq_data(irq, NULL);
1182 		iommu->irq = 0;
1183 		destroy_irq(irq);
1184 		return 0;
1185 	}
1186 
1187 	/* Force fault register is cleared */
1188 	iommu_page_fault(irq, iommu);
1189 
1190 	ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
1191 	if (ret)
1192 		printk(KERN_ERR "IOMMU: can't request irq\n");
1193 	return ret;
1194 }
1195 
iommu_init_domains(struct intel_iommu * iommu)1196 static int iommu_init_domains(struct intel_iommu *iommu)
1197 {
1198 	unsigned long ndomains;
1199 	unsigned long nlongs;
1200 
1201 	ndomains = cap_ndoms(iommu->cap);
1202 	pr_debug("Number of Domains supportd <%ld>\n", ndomains);
1203 	nlongs = BITS_TO_LONGS(ndomains);
1204 
1205 	/* TBD: there might be 64K domains,
1206 	 * consider other allocation for future chip
1207 	 */
1208 	iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1209 	if (!iommu->domain_ids) {
1210 		printk(KERN_ERR "Allocating domain id array failed\n");
1211 		return -ENOMEM;
1212 	}
1213 	iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1214 			GFP_KERNEL);
1215 	if (!iommu->domains) {
1216 		printk(KERN_ERR "Allocating domain array failed\n");
1217 		kfree(iommu->domain_ids);
1218 		return -ENOMEM;
1219 	}
1220 
1221 	spin_lock_init(&iommu->lock);
1222 
1223 	/*
1224 	 * if Caching mode is set, then invalid translations are tagged
1225 	 * with domainid 0. Hence we need to pre-allocate it.
1226 	 */
1227 	if (cap_caching_mode(iommu->cap))
1228 		set_bit(0, iommu->domain_ids);
1229 	return 0;
1230 }
1231 
1232 
1233 static void domain_exit(struct dmar_domain *domain);
1234 static void vm_domain_exit(struct dmar_domain *domain);
1235 
free_dmar_iommu(struct intel_iommu * iommu)1236 void free_dmar_iommu(struct intel_iommu *iommu)
1237 {
1238 	struct dmar_domain *domain;
1239 	int i;
1240 	unsigned long flags;
1241 
1242 	i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1243 	for (; i < cap_ndoms(iommu->cap); ) {
1244 		domain = iommu->domains[i];
1245 		clear_bit(i, iommu->domain_ids);
1246 
1247 		spin_lock_irqsave(&domain->iommu_lock, flags);
1248 		if (--domain->iommu_count == 0) {
1249 			if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1250 				vm_domain_exit(domain);
1251 			else
1252 				domain_exit(domain);
1253 		}
1254 		spin_unlock_irqrestore(&domain->iommu_lock, flags);
1255 
1256 		i = find_next_bit(iommu->domain_ids,
1257 			cap_ndoms(iommu->cap), i+1);
1258 	}
1259 
1260 	if (iommu->gcmd & DMA_GCMD_TE)
1261 		iommu_disable_translation(iommu);
1262 
1263 	if (iommu->irq) {
1264 		set_irq_data(iommu->irq, NULL);
1265 		/* This will mask the irq */
1266 		free_irq(iommu->irq, iommu);
1267 		destroy_irq(iommu->irq);
1268 	}
1269 
1270 	kfree(iommu->domains);
1271 	kfree(iommu->domain_ids);
1272 
1273 	g_iommus[iommu->seq_id] = NULL;
1274 
1275 	/* if all iommus are freed, free g_iommus */
1276 	for (i = 0; i < g_num_of_iommus; i++) {
1277 		if (g_iommus[i])
1278 			break;
1279 	}
1280 
1281 	if (i == g_num_of_iommus)
1282 		kfree(g_iommus);
1283 
1284 	/* free context mapping */
1285 	free_context_table(iommu);
1286 }
1287 
iommu_alloc_domain(struct intel_iommu * iommu)1288 static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
1289 {
1290 	unsigned long num;
1291 	unsigned long ndomains;
1292 	struct dmar_domain *domain;
1293 	unsigned long flags;
1294 
1295 	domain = alloc_domain_mem();
1296 	if (!domain)
1297 		return NULL;
1298 
1299 	ndomains = cap_ndoms(iommu->cap);
1300 
1301 	spin_lock_irqsave(&iommu->lock, flags);
1302 	num = find_first_zero_bit(iommu->domain_ids, ndomains);
1303 	if (num >= ndomains) {
1304 		spin_unlock_irqrestore(&iommu->lock, flags);
1305 		free_domain_mem(domain);
1306 		printk(KERN_ERR "IOMMU: no free domain ids\n");
1307 		return NULL;
1308 	}
1309 
1310 	set_bit(num, iommu->domain_ids);
1311 	domain->id = num;
1312 	memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
1313 	set_bit(iommu->seq_id, &domain->iommu_bmp);
1314 	domain->flags = 0;
1315 	iommu->domains[num] = domain;
1316 	spin_unlock_irqrestore(&iommu->lock, flags);
1317 
1318 	return domain;
1319 }
1320 
iommu_free_domain(struct dmar_domain * domain)1321 static void iommu_free_domain(struct dmar_domain *domain)
1322 {
1323 	unsigned long flags;
1324 	struct intel_iommu *iommu;
1325 
1326 	iommu = domain_get_iommu(domain);
1327 
1328 	spin_lock_irqsave(&iommu->lock, flags);
1329 	clear_bit(domain->id, iommu->domain_ids);
1330 	spin_unlock_irqrestore(&iommu->lock, flags);
1331 }
1332 
1333 static struct iova_domain reserved_iova_list;
1334 static struct lock_class_key reserved_alloc_key;
1335 static struct lock_class_key reserved_rbtree_key;
1336 
dmar_init_reserved_ranges(void)1337 static void dmar_init_reserved_ranges(void)
1338 {
1339 	struct pci_dev *pdev = NULL;
1340 	struct iova *iova;
1341 	int i;
1342 	u64 addr, size;
1343 
1344 	init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1345 
1346 	lockdep_set_class(&reserved_iova_list.iova_alloc_lock,
1347 		&reserved_alloc_key);
1348 	lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1349 		&reserved_rbtree_key);
1350 
1351 	/* IOAPIC ranges shouldn't be accessed by DMA */
1352 	iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1353 		IOVA_PFN(IOAPIC_RANGE_END));
1354 	if (!iova)
1355 		printk(KERN_ERR "Reserve IOAPIC range failed\n");
1356 
1357 	/* Reserve all PCI MMIO to avoid peer-to-peer access */
1358 	for_each_pci_dev(pdev) {
1359 		struct resource *r;
1360 
1361 		for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1362 			r = &pdev->resource[i];
1363 			if (!r->flags || !(r->flags & IORESOURCE_MEM))
1364 				continue;
1365 			addr = r->start;
1366 			addr &= PAGE_MASK;
1367 			size = r->end - addr;
1368 			size = PAGE_ALIGN(size);
1369 			iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1370 				IOVA_PFN(size + addr) - 1);
1371 			if (!iova)
1372 				printk(KERN_ERR "Reserve iova failed\n");
1373 		}
1374 	}
1375 
1376 }
1377 
domain_reserve_special_ranges(struct dmar_domain * domain)1378 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1379 {
1380 	copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1381 }
1382 
guestwidth_to_adjustwidth(int gaw)1383 static inline int guestwidth_to_adjustwidth(int gaw)
1384 {
1385 	int agaw;
1386 	int r = (gaw - 12) % 9;
1387 
1388 	if (r == 0)
1389 		agaw = gaw;
1390 	else
1391 		agaw = gaw + 9 - r;
1392 	if (agaw > 64)
1393 		agaw = 64;
1394 	return agaw;
1395 }
1396 
domain_init(struct dmar_domain * domain,int guest_width)1397 static int domain_init(struct dmar_domain *domain, int guest_width)
1398 {
1399 	struct intel_iommu *iommu;
1400 	int adjust_width, agaw;
1401 	unsigned long sagaw;
1402 
1403 	init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1404 	spin_lock_init(&domain->mapping_lock);
1405 	spin_lock_init(&domain->iommu_lock);
1406 
1407 	domain_reserve_special_ranges(domain);
1408 
1409 	/* calculate AGAW */
1410 	iommu = domain_get_iommu(domain);
1411 	if (guest_width > cap_mgaw(iommu->cap))
1412 		guest_width = cap_mgaw(iommu->cap);
1413 	domain->gaw = guest_width;
1414 	adjust_width = guestwidth_to_adjustwidth(guest_width);
1415 	agaw = width_to_agaw(adjust_width);
1416 	sagaw = cap_sagaw(iommu->cap);
1417 	if (!test_bit(agaw, &sagaw)) {
1418 		/* hardware doesn't support it, choose a bigger one */
1419 		pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1420 		agaw = find_next_bit(&sagaw, 5, agaw);
1421 		if (agaw >= 5)
1422 			return -ENODEV;
1423 	}
1424 	domain->agaw = agaw;
1425 	INIT_LIST_HEAD(&domain->devices);
1426 
1427 	if (ecap_coherent(iommu->ecap))
1428 		domain->iommu_coherency = 1;
1429 	else
1430 		domain->iommu_coherency = 0;
1431 
1432 	domain->iommu_count = 1;
1433 
1434 	/* always allocate the top pgd */
1435 	domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1436 	if (!domain->pgd)
1437 		return -ENOMEM;
1438 	__iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1439 	return 0;
1440 }
1441 
domain_exit(struct dmar_domain * domain)1442 static void domain_exit(struct dmar_domain *domain)
1443 {
1444 	u64 end;
1445 
1446 	/* Domain 0 is reserved, so dont process it */
1447 	if (!domain)
1448 		return;
1449 
1450 	domain_remove_dev_info(domain);
1451 	/* destroy iovas */
1452 	put_iova_domain(&domain->iovad);
1453 	end = DOMAIN_MAX_ADDR(domain->gaw);
1454 	end = end & (~PAGE_MASK);
1455 
1456 	/* clear ptes */
1457 	dma_pte_clear_range(domain, 0, end);
1458 
1459 	/* free page tables */
1460 	dma_pte_free_pagetable(domain, 0, end);
1461 
1462 	iommu_free_domain(domain);
1463 	free_domain_mem(domain);
1464 }
1465 
domain_context_mapping_one(struct dmar_domain * domain,u8 bus,u8 devfn)1466 static int domain_context_mapping_one(struct dmar_domain *domain,
1467 		u8 bus, u8 devfn)
1468 {
1469 	struct context_entry *context;
1470 	unsigned long flags;
1471 	struct intel_iommu *iommu;
1472 	struct dma_pte *pgd;
1473 	unsigned long num;
1474 	unsigned long ndomains;
1475 	int id;
1476 	int agaw;
1477 
1478 	pr_debug("Set context mapping for %02x:%02x.%d\n",
1479 		bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1480 	BUG_ON(!domain->pgd);
1481 
1482 	iommu = device_to_iommu(bus, devfn);
1483 	if (!iommu)
1484 		return -ENODEV;
1485 
1486 	context = device_to_context_entry(iommu, bus, devfn);
1487 	if (!context)
1488 		return -ENOMEM;
1489 	spin_lock_irqsave(&iommu->lock, flags);
1490 	if (context_present(context)) {
1491 		spin_unlock_irqrestore(&iommu->lock, flags);
1492 		return 0;
1493 	}
1494 
1495 	id = domain->id;
1496 	pgd = domain->pgd;
1497 
1498 	if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
1499 		int found = 0;
1500 
1501 		/* find an available domain id for this device in iommu */
1502 		ndomains = cap_ndoms(iommu->cap);
1503 		num = find_first_bit(iommu->domain_ids, ndomains);
1504 		for (; num < ndomains; ) {
1505 			if (iommu->domains[num] == domain) {
1506 				id = num;
1507 				found = 1;
1508 				break;
1509 			}
1510 			num = find_next_bit(iommu->domain_ids,
1511 					    cap_ndoms(iommu->cap), num+1);
1512 		}
1513 
1514 		if (found == 0) {
1515 			num = find_first_zero_bit(iommu->domain_ids, ndomains);
1516 			if (num >= ndomains) {
1517 				spin_unlock_irqrestore(&iommu->lock, flags);
1518 				printk(KERN_ERR "IOMMU: no free domain ids\n");
1519 				return -EFAULT;
1520 			}
1521 
1522 			set_bit(num, iommu->domain_ids);
1523 			iommu->domains[num] = domain;
1524 			id = num;
1525 		}
1526 
1527 		/* Skip top levels of page tables for
1528 		 * iommu which has less agaw than default.
1529 		 */
1530 		for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1531 			pgd = phys_to_virt(dma_pte_addr(pgd));
1532 			if (!dma_pte_present(pgd)) {
1533 				spin_unlock_irqrestore(&iommu->lock, flags);
1534 				return -ENOMEM;
1535 			}
1536 		}
1537 	}
1538 
1539 	context_set_domain_id(context, id);
1540 	context_set_address_width(context, iommu->agaw);
1541 	context_set_address_root(context, virt_to_phys(pgd));
1542 	context_set_translation_type(context, CONTEXT_TT_MULTI_LEVEL);
1543 	context_set_fault_enable(context);
1544 	context_set_present(context);
1545 	domain_flush_cache(domain, context, sizeof(*context));
1546 
1547 	/* it's a non-present to present mapping */
1548 	if (iommu->flush.flush_context(iommu, domain->id,
1549 		(((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT,
1550 		DMA_CCMD_DEVICE_INVL, 1))
1551 		iommu_flush_write_buffer(iommu);
1552 	else
1553 		iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0);
1554 
1555 	spin_unlock_irqrestore(&iommu->lock, flags);
1556 
1557 	spin_lock_irqsave(&domain->iommu_lock, flags);
1558 	if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) {
1559 		domain->iommu_count++;
1560 		domain_update_iommu_coherency(domain);
1561 	}
1562 	spin_unlock_irqrestore(&domain->iommu_lock, flags);
1563 	return 0;
1564 }
1565 
1566 static int
domain_context_mapping(struct dmar_domain * domain,struct pci_dev * pdev)1567 domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1568 {
1569 	int ret;
1570 	struct pci_dev *tmp, *parent;
1571 
1572 	ret = domain_context_mapping_one(domain, pdev->bus->number,
1573 		pdev->devfn);
1574 	if (ret)
1575 		return ret;
1576 
1577 	/* dependent device mapping */
1578 	tmp = pci_find_upstream_pcie_bridge(pdev);
1579 	if (!tmp)
1580 		return 0;
1581 	/* Secondary interface's bus number and devfn 0 */
1582 	parent = pdev->bus->self;
1583 	while (parent != tmp) {
1584 		ret = domain_context_mapping_one(domain, parent->bus->number,
1585 			parent->devfn);
1586 		if (ret)
1587 			return ret;
1588 		parent = parent->bus->self;
1589 	}
1590 	if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1591 		return domain_context_mapping_one(domain,
1592 			tmp->subordinate->number, 0);
1593 	else /* this is a legacy PCI bridge */
1594 		return domain_context_mapping_one(domain,
1595 			tmp->bus->number, tmp->devfn);
1596 }
1597 
domain_context_mapped(struct pci_dev * pdev)1598 static int domain_context_mapped(struct pci_dev *pdev)
1599 {
1600 	int ret;
1601 	struct pci_dev *tmp, *parent;
1602 	struct intel_iommu *iommu;
1603 
1604 	iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
1605 	if (!iommu)
1606 		return -ENODEV;
1607 
1608 	ret = device_context_mapped(iommu,
1609 		pdev->bus->number, pdev->devfn);
1610 	if (!ret)
1611 		return ret;
1612 	/* dependent device mapping */
1613 	tmp = pci_find_upstream_pcie_bridge(pdev);
1614 	if (!tmp)
1615 		return ret;
1616 	/* Secondary interface's bus number and devfn 0 */
1617 	parent = pdev->bus->self;
1618 	while (parent != tmp) {
1619 		ret = device_context_mapped(iommu, parent->bus->number,
1620 			parent->devfn);
1621 		if (!ret)
1622 			return ret;
1623 		parent = parent->bus->self;
1624 	}
1625 	if (tmp->is_pcie)
1626 		return device_context_mapped(iommu,
1627 			tmp->subordinate->number, 0);
1628 	else
1629 		return device_context_mapped(iommu,
1630 			tmp->bus->number, tmp->devfn);
1631 }
1632 
1633 static int
domain_page_mapping(struct dmar_domain * domain,dma_addr_t iova,u64 hpa,size_t size,int prot)1634 domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1635 			u64 hpa, size_t size, int prot)
1636 {
1637 	u64 start_pfn, end_pfn;
1638 	struct dma_pte *pte;
1639 	int index;
1640 	int addr_width = agaw_to_width(domain->agaw);
1641 
1642 	hpa &= (((u64)1) << addr_width) - 1;
1643 
1644 	if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1645 		return -EINVAL;
1646 	iova &= PAGE_MASK;
1647 	start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT;
1648 	end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT;
1649 	index = 0;
1650 	while (start_pfn < end_pfn) {
1651 		pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index);
1652 		if (!pte)
1653 			return -ENOMEM;
1654 		/* We don't need lock here, nobody else
1655 		 * touches the iova range
1656 		 */
1657 		BUG_ON(dma_pte_addr(pte));
1658 		dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT);
1659 		dma_set_pte_prot(pte, prot);
1660 		domain_flush_cache(domain, pte, sizeof(*pte));
1661 		start_pfn++;
1662 		index++;
1663 	}
1664 	return 0;
1665 }
1666 
iommu_detach_dev(struct intel_iommu * iommu,u8 bus,u8 devfn)1667 static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
1668 {
1669 	if (!iommu)
1670 		return;
1671 
1672 	clear_context_table(iommu, bus, devfn);
1673 	iommu->flush.flush_context(iommu, 0, 0, 0,
1674 					   DMA_CCMD_GLOBAL_INVL, 0);
1675 	iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1676 					 DMA_TLB_GLOBAL_FLUSH, 0);
1677 }
1678 
domain_remove_dev_info(struct dmar_domain * domain)1679 static void domain_remove_dev_info(struct dmar_domain *domain)
1680 {
1681 	struct device_domain_info *info;
1682 	unsigned long flags;
1683 	struct intel_iommu *iommu;
1684 
1685 	spin_lock_irqsave(&device_domain_lock, flags);
1686 	while (!list_empty(&domain->devices)) {
1687 		info = list_entry(domain->devices.next,
1688 			struct device_domain_info, link);
1689 		list_del(&info->link);
1690 		list_del(&info->global);
1691 		if (info->dev)
1692 			info->dev->dev.archdata.iommu = NULL;
1693 		spin_unlock_irqrestore(&device_domain_lock, flags);
1694 
1695 		iommu = device_to_iommu(info->bus, info->devfn);
1696 		iommu_detach_dev(iommu, info->bus, info->devfn);
1697 		free_devinfo_mem(info);
1698 
1699 		spin_lock_irqsave(&device_domain_lock, flags);
1700 	}
1701 	spin_unlock_irqrestore(&device_domain_lock, flags);
1702 }
1703 
1704 /*
1705  * find_domain
1706  * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1707  */
1708 static struct dmar_domain *
find_domain(struct pci_dev * pdev)1709 find_domain(struct pci_dev *pdev)
1710 {
1711 	struct device_domain_info *info;
1712 
1713 	/* No lock here, assumes no domain exit in normal case */
1714 	info = pdev->dev.archdata.iommu;
1715 	if (info)
1716 		return info->domain;
1717 	return NULL;
1718 }
1719 
1720 /* domain is initialized */
get_domain_for_dev(struct pci_dev * pdev,int gaw)1721 static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1722 {
1723 	struct dmar_domain *domain, *found = NULL;
1724 	struct intel_iommu *iommu;
1725 	struct dmar_drhd_unit *drhd;
1726 	struct device_domain_info *info, *tmp;
1727 	struct pci_dev *dev_tmp;
1728 	unsigned long flags;
1729 	int bus = 0, devfn = 0;
1730 
1731 	domain = find_domain(pdev);
1732 	if (domain)
1733 		return domain;
1734 
1735 	dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1736 	if (dev_tmp) {
1737 		if (dev_tmp->is_pcie) {
1738 			bus = dev_tmp->subordinate->number;
1739 			devfn = 0;
1740 		} else {
1741 			bus = dev_tmp->bus->number;
1742 			devfn = dev_tmp->devfn;
1743 		}
1744 		spin_lock_irqsave(&device_domain_lock, flags);
1745 		list_for_each_entry(info, &device_domain_list, global) {
1746 			if (info->bus == bus && info->devfn == devfn) {
1747 				found = info->domain;
1748 				break;
1749 			}
1750 		}
1751 		spin_unlock_irqrestore(&device_domain_lock, flags);
1752 		/* pcie-pci bridge already has a domain, uses it */
1753 		if (found) {
1754 			domain = found;
1755 			goto found_domain;
1756 		}
1757 	}
1758 
1759 	/* Allocate new domain for the device */
1760 	drhd = dmar_find_matched_drhd_unit(pdev);
1761 	if (!drhd) {
1762 		printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1763 			pci_name(pdev));
1764 		return NULL;
1765 	}
1766 	iommu = drhd->iommu;
1767 
1768 	domain = iommu_alloc_domain(iommu);
1769 	if (!domain)
1770 		goto error;
1771 
1772 	if (domain_init(domain, gaw)) {
1773 		domain_exit(domain);
1774 		goto error;
1775 	}
1776 
1777 	/* register pcie-to-pci device */
1778 	if (dev_tmp) {
1779 		info = alloc_devinfo_mem();
1780 		if (!info) {
1781 			domain_exit(domain);
1782 			goto error;
1783 		}
1784 		info->bus = bus;
1785 		info->devfn = devfn;
1786 		info->dev = NULL;
1787 		info->domain = domain;
1788 		/* This domain is shared by devices under p2p bridge */
1789 		domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
1790 
1791 		/* pcie-to-pci bridge already has a domain, uses it */
1792 		found = NULL;
1793 		spin_lock_irqsave(&device_domain_lock, flags);
1794 		list_for_each_entry(tmp, &device_domain_list, global) {
1795 			if (tmp->bus == bus && tmp->devfn == devfn) {
1796 				found = tmp->domain;
1797 				break;
1798 			}
1799 		}
1800 		if (found) {
1801 			free_devinfo_mem(info);
1802 			domain_exit(domain);
1803 			domain = found;
1804 		} else {
1805 			list_add(&info->link, &domain->devices);
1806 			list_add(&info->global, &device_domain_list);
1807 		}
1808 		spin_unlock_irqrestore(&device_domain_lock, flags);
1809 	}
1810 
1811 found_domain:
1812 	info = alloc_devinfo_mem();
1813 	if (!info)
1814 		goto error;
1815 	info->bus = pdev->bus->number;
1816 	info->devfn = pdev->devfn;
1817 	info->dev = pdev;
1818 	info->domain = domain;
1819 	spin_lock_irqsave(&device_domain_lock, flags);
1820 	/* somebody is fast */
1821 	found = find_domain(pdev);
1822 	if (found != NULL) {
1823 		spin_unlock_irqrestore(&device_domain_lock, flags);
1824 		if (found != domain) {
1825 			domain_exit(domain);
1826 			domain = found;
1827 		}
1828 		free_devinfo_mem(info);
1829 		return domain;
1830 	}
1831 	list_add(&info->link, &domain->devices);
1832 	list_add(&info->global, &device_domain_list);
1833 	pdev->dev.archdata.iommu = info;
1834 	spin_unlock_irqrestore(&device_domain_lock, flags);
1835 	return domain;
1836 error:
1837 	/* recheck it here, maybe others set it */
1838 	return find_domain(pdev);
1839 }
1840 
iommu_prepare_identity_map(struct pci_dev * pdev,unsigned long long start,unsigned long long end)1841 static int iommu_prepare_identity_map(struct pci_dev *pdev,
1842 				      unsigned long long start,
1843 				      unsigned long long end)
1844 {
1845 	struct dmar_domain *domain;
1846 	unsigned long size;
1847 	unsigned long long base;
1848 	int ret;
1849 
1850 	printk(KERN_INFO
1851 		"IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1852 		pci_name(pdev), start, end);
1853 	/* page table init */
1854 	domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1855 	if (!domain)
1856 		return -ENOMEM;
1857 
1858 	/* The address might not be aligned */
1859 	base = start & PAGE_MASK;
1860 	size = end - base;
1861 	size = PAGE_ALIGN(size);
1862 	if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1863 			IOVA_PFN(base + size) - 1)) {
1864 		printk(KERN_ERR "IOMMU: reserve iova failed\n");
1865 		ret = -ENOMEM;
1866 		goto error;
1867 	}
1868 
1869 	pr_debug("Mapping reserved region %lx@%llx for %s\n",
1870 		size, base, pci_name(pdev));
1871 	/*
1872 	 * RMRR range might have overlap with physical memory range,
1873 	 * clear it first
1874 	 */
1875 	dma_pte_clear_range(domain, base, base + size);
1876 
1877 	ret = domain_page_mapping(domain, base, base, size,
1878 		DMA_PTE_READ|DMA_PTE_WRITE);
1879 	if (ret)
1880 		goto error;
1881 
1882 	/* context entry init */
1883 	ret = domain_context_mapping(domain, pdev);
1884 	if (!ret)
1885 		return 0;
1886 error:
1887 	domain_exit(domain);
1888 	return ret;
1889 
1890 }
1891 
iommu_prepare_rmrr_dev(struct dmar_rmrr_unit * rmrr,struct pci_dev * pdev)1892 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1893 	struct pci_dev *pdev)
1894 {
1895 	if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1896 		return 0;
1897 	return iommu_prepare_identity_map(pdev, rmrr->base_address,
1898 		rmrr->end_address + 1);
1899 }
1900 
1901 #ifdef CONFIG_DMAR_GFX_WA
1902 struct iommu_prepare_data {
1903 	struct pci_dev *pdev;
1904 	int ret;
1905 };
1906 
iommu_prepare_work_fn(unsigned long start_pfn,unsigned long end_pfn,void * datax)1907 static int __init iommu_prepare_work_fn(unsigned long start_pfn,
1908 					 unsigned long end_pfn, void *datax)
1909 {
1910 	struct iommu_prepare_data *data;
1911 
1912 	data = (struct iommu_prepare_data *)datax;
1913 
1914 	data->ret = iommu_prepare_identity_map(data->pdev,
1915 				start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
1916 	return data->ret;
1917 
1918 }
1919 
iommu_prepare_with_active_regions(struct pci_dev * pdev)1920 static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
1921 {
1922 	int nid;
1923 	struct iommu_prepare_data data;
1924 
1925 	data.pdev = pdev;
1926 	data.ret = 0;
1927 
1928 	for_each_online_node(nid) {
1929 		work_with_active_regions(nid, iommu_prepare_work_fn, &data);
1930 		if (data.ret)
1931 			return data.ret;
1932 	}
1933 	return data.ret;
1934 }
1935 
iommu_prepare_gfx_mapping(void)1936 static void __init iommu_prepare_gfx_mapping(void)
1937 {
1938 	struct pci_dev *pdev = NULL;
1939 	int ret;
1940 
1941 	for_each_pci_dev(pdev) {
1942 		if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
1943 				!IS_GFX_DEVICE(pdev))
1944 			continue;
1945 		printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1946 			pci_name(pdev));
1947 		ret = iommu_prepare_with_active_regions(pdev);
1948 		if (ret)
1949 			printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
1950 	}
1951 }
1952 #else /* !CONFIG_DMAR_GFX_WA */
iommu_prepare_gfx_mapping(void)1953 static inline void iommu_prepare_gfx_mapping(void)
1954 {
1955 	return;
1956 }
1957 #endif
1958 
1959 #ifdef CONFIG_DMAR_FLOPPY_WA
iommu_prepare_isa(void)1960 static inline void iommu_prepare_isa(void)
1961 {
1962 	struct pci_dev *pdev;
1963 	int ret;
1964 
1965 	pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
1966 	if (!pdev)
1967 		return;
1968 
1969 	printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
1970 	ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1971 
1972 	if (ret)
1973 		printk("IOMMU: Failed to create 0-64M identity map, "
1974 			"floppy might not work\n");
1975 
1976 }
1977 #else
iommu_prepare_isa(void)1978 static inline void iommu_prepare_isa(void)
1979 {
1980 	return;
1981 }
1982 #endif /* !CONFIG_DMAR_FLPY_WA */
1983 
init_dmars(void)1984 static int __init init_dmars(void)
1985 {
1986 	struct dmar_drhd_unit *drhd;
1987 	struct dmar_rmrr_unit *rmrr;
1988 	struct pci_dev *pdev;
1989 	struct intel_iommu *iommu;
1990 	int i, ret, unit = 0;
1991 
1992 	/*
1993 	 * for each drhd
1994 	 *    allocate root
1995 	 *    initialize and program root entry to not present
1996 	 * endfor
1997 	 */
1998 	for_each_drhd_unit(drhd) {
1999 		g_num_of_iommus++;
2000 		/*
2001 		 * lock not needed as this is only incremented in the single
2002 		 * threaded kernel __init code path all other access are read
2003 		 * only
2004 		 */
2005 	}
2006 
2007 	g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2008 			GFP_KERNEL);
2009 	if (!g_iommus) {
2010 		printk(KERN_ERR "Allocating global iommu array failed\n");
2011 		ret = -ENOMEM;
2012 		goto error;
2013 	}
2014 
2015 	deferred_flush = kzalloc(g_num_of_iommus *
2016 		sizeof(struct deferred_flush_tables), GFP_KERNEL);
2017 	if (!deferred_flush) {
2018 		kfree(g_iommus);
2019 		ret = -ENOMEM;
2020 		goto error;
2021 	}
2022 
2023 	for_each_drhd_unit(drhd) {
2024 		if (drhd->ignored)
2025 			continue;
2026 
2027 		iommu = drhd->iommu;
2028 		g_iommus[iommu->seq_id] = iommu;
2029 
2030 		ret = iommu_init_domains(iommu);
2031 		if (ret)
2032 			goto error;
2033 
2034 		/*
2035 		 * TBD:
2036 		 * we could share the same root & context tables
2037 		 * amoung all IOMMU's. Need to Split it later.
2038 		 */
2039 		ret = iommu_alloc_root_entry(iommu);
2040 		if (ret) {
2041 			printk(KERN_ERR "IOMMU: allocate root entry failed\n");
2042 			goto error;
2043 		}
2044 	}
2045 
2046 	for_each_drhd_unit(drhd) {
2047 		if (drhd->ignored)
2048 			continue;
2049 
2050 		iommu = drhd->iommu;
2051 		if (dmar_enable_qi(iommu)) {
2052 			/*
2053 			 * Queued Invalidate not enabled, use Register Based
2054 			 * Invalidate
2055 			 */
2056 			iommu->flush.flush_context = __iommu_flush_context;
2057 			iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2058 			printk(KERN_INFO "IOMMU 0x%Lx: using Register based "
2059 			       "invalidation\n",
2060 			       (unsigned long long)drhd->reg_base_addr);
2061 		} else {
2062 			iommu->flush.flush_context = qi_flush_context;
2063 			iommu->flush.flush_iotlb = qi_flush_iotlb;
2064 			printk(KERN_INFO "IOMMU 0x%Lx: using Queued "
2065 			       "invalidation\n",
2066 			       (unsigned long long)drhd->reg_base_addr);
2067 		}
2068 	}
2069 
2070 	/*
2071 	 * For each rmrr
2072 	 *   for each dev attached to rmrr
2073 	 *   do
2074 	 *     locate drhd for dev, alloc domain for dev
2075 	 *     allocate free domain
2076 	 *     allocate page table entries for rmrr
2077 	 *     if context not allocated for bus
2078 	 *           allocate and init context
2079 	 *           set present in root table for this bus
2080 	 *     init context with domain, translation etc
2081 	 *    endfor
2082 	 * endfor
2083 	 */
2084 	for_each_rmrr_units(rmrr) {
2085 		for (i = 0; i < rmrr->devices_cnt; i++) {
2086 			pdev = rmrr->devices[i];
2087 			/* some BIOS lists non-exist devices in DMAR table */
2088 			if (!pdev)
2089 				continue;
2090 			ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2091 			if (ret)
2092 				printk(KERN_ERR
2093 				 "IOMMU: mapping reserved region failed\n");
2094 		}
2095 	}
2096 
2097 	iommu_prepare_gfx_mapping();
2098 
2099 	iommu_prepare_isa();
2100 
2101 	/*
2102 	 * for each drhd
2103 	 *   enable fault log
2104 	 *   global invalidate context cache
2105 	 *   global invalidate iotlb
2106 	 *   enable translation
2107 	 */
2108 	for_each_drhd_unit(drhd) {
2109 		if (drhd->ignored)
2110 			continue;
2111 		iommu = drhd->iommu;
2112 		sprintf (iommu->name, "dmar%d", unit++);
2113 
2114 		iommu_flush_write_buffer(iommu);
2115 
2116 		ret = dmar_set_interrupt(iommu);
2117 		if (ret)
2118 			goto error;
2119 
2120 		iommu_set_root_entry(iommu);
2121 
2122 		iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
2123 					   0);
2124 		iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
2125 					 0);
2126 		iommu_disable_protect_mem_regions(iommu);
2127 
2128 		ret = iommu_enable_translation(iommu);
2129 		if (ret)
2130 			goto error;
2131 	}
2132 
2133 	return 0;
2134 error:
2135 	for_each_drhd_unit(drhd) {
2136 		if (drhd->ignored)
2137 			continue;
2138 		iommu = drhd->iommu;
2139 		free_iommu(iommu);
2140 	}
2141 	kfree(g_iommus);
2142 	return ret;
2143 }
2144 
aligned_size(u64 host_addr,size_t size)2145 static inline u64 aligned_size(u64 host_addr, size_t size)
2146 {
2147 	u64 addr;
2148 	addr = (host_addr & (~PAGE_MASK)) + size;
2149 	return PAGE_ALIGN(addr);
2150 }
2151 
2152 struct iova *
iommu_alloc_iova(struct dmar_domain * domain,size_t size,u64 end)2153 iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
2154 {
2155 	struct iova *piova;
2156 
2157 	/* Make sure it's in range */
2158 	end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
2159 	if (!size || (IOVA_START_ADDR + size > end))
2160 		return NULL;
2161 
2162 	piova = alloc_iova(&domain->iovad,
2163 			size >> PAGE_SHIFT, IOVA_PFN(end), 1);
2164 	return piova;
2165 }
2166 
2167 static struct iova *
__intel_alloc_iova(struct device * dev,struct dmar_domain * domain,size_t size,u64 dma_mask)2168 __intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
2169 		   size_t size, u64 dma_mask)
2170 {
2171 	struct pci_dev *pdev = to_pci_dev(dev);
2172 	struct iova *iova = NULL;
2173 
2174 	if (dma_mask <= DMA_32BIT_MASK || dmar_forcedac)
2175 		iova = iommu_alloc_iova(domain, size, dma_mask);
2176 	else {
2177 		/*
2178 		 * First try to allocate an io virtual address in
2179 		 * DMA_32BIT_MASK and if that fails then try allocating
2180 		 * from higher range
2181 		 */
2182 		iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
2183 		if (!iova)
2184 			iova = iommu_alloc_iova(domain, size, dma_mask);
2185 	}
2186 
2187 	if (!iova) {
2188 		printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
2189 		return NULL;
2190 	}
2191 
2192 	return iova;
2193 }
2194 
2195 static struct dmar_domain *
get_valid_domain_for_dev(struct pci_dev * pdev)2196 get_valid_domain_for_dev(struct pci_dev *pdev)
2197 {
2198 	struct dmar_domain *domain;
2199 	int ret;
2200 
2201 	domain = get_domain_for_dev(pdev,
2202 			DEFAULT_DOMAIN_ADDRESS_WIDTH);
2203 	if (!domain) {
2204 		printk(KERN_ERR
2205 			"Allocating domain for %s failed", pci_name(pdev));
2206 		return NULL;
2207 	}
2208 
2209 	/* make sure context mapping is ok */
2210 	if (unlikely(!domain_context_mapped(pdev))) {
2211 		ret = domain_context_mapping(domain, pdev);
2212 		if (ret) {
2213 			printk(KERN_ERR
2214 				"Domain context map for %s failed",
2215 				pci_name(pdev));
2216 			return NULL;
2217 		}
2218 	}
2219 
2220 	return domain;
2221 }
2222 
__intel_map_single(struct device * hwdev,phys_addr_t paddr,size_t size,int dir,u64 dma_mask)2223 static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2224 				     size_t size, int dir, u64 dma_mask)
2225 {
2226 	struct pci_dev *pdev = to_pci_dev(hwdev);
2227 	struct dmar_domain *domain;
2228 	phys_addr_t start_paddr;
2229 	struct iova *iova;
2230 	int prot = 0;
2231 	int ret;
2232 	struct intel_iommu *iommu;
2233 
2234 	BUG_ON(dir == DMA_NONE);
2235 	if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2236 		return paddr;
2237 
2238 	domain = get_valid_domain_for_dev(pdev);
2239 	if (!domain)
2240 		return 0;
2241 
2242 	iommu = domain_get_iommu(domain);
2243 	size = aligned_size((u64)paddr, size);
2244 
2245 	iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
2246 	if (!iova)
2247 		goto error;
2248 
2249 	start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2250 
2251 	/*
2252 	 * Check if DMAR supports zero-length reads on write only
2253 	 * mappings..
2254 	 */
2255 	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2256 			!cap_zlr(iommu->cap))
2257 		prot |= DMA_PTE_READ;
2258 	if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2259 		prot |= DMA_PTE_WRITE;
2260 	/*
2261 	 * paddr - (paddr + size) might be partial page, we should map the whole
2262 	 * page.  Note: if two part of one page are separately mapped, we
2263 	 * might have two guest_addr mapping to the same host paddr, but this
2264 	 * is not a big problem
2265 	 */
2266 	ret = domain_page_mapping(domain, start_paddr,
2267 		((u64)paddr) & PAGE_MASK, size, prot);
2268 	if (ret)
2269 		goto error;
2270 
2271 	/* it's a non-present to present mapping */
2272 	ret = iommu_flush_iotlb_psi(iommu, domain->id,
2273 			start_paddr, size >> VTD_PAGE_SHIFT, 1);
2274 	if (ret)
2275 		iommu_flush_write_buffer(iommu);
2276 
2277 	return start_paddr + ((u64)paddr & (~PAGE_MASK));
2278 
2279 error:
2280 	if (iova)
2281 		__free_iova(&domain->iovad, iova);
2282 	printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
2283 		pci_name(pdev), size, (unsigned long long)paddr, dir);
2284 	return 0;
2285 }
2286 
intel_map_single(struct device * hwdev,phys_addr_t paddr,size_t size,int dir)2287 dma_addr_t intel_map_single(struct device *hwdev, phys_addr_t paddr,
2288 			    size_t size, int dir)
2289 {
2290 	return __intel_map_single(hwdev, paddr, size, dir,
2291 				  to_pci_dev(hwdev)->dma_mask);
2292 }
2293 
flush_unmaps(void)2294 static void flush_unmaps(void)
2295 {
2296 	int i, j;
2297 
2298 	timer_on = 0;
2299 
2300 	/* just flush them all */
2301 	for (i = 0; i < g_num_of_iommus; i++) {
2302 		struct intel_iommu *iommu = g_iommus[i];
2303 		if (!iommu)
2304 			continue;
2305 
2306 		if (deferred_flush[i].next) {
2307 			iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2308 						 DMA_TLB_GLOBAL_FLUSH, 0);
2309 			for (j = 0; j < deferred_flush[i].next; j++) {
2310 				__free_iova(&deferred_flush[i].domain[j]->iovad,
2311 						deferred_flush[i].iova[j]);
2312 			}
2313 			deferred_flush[i].next = 0;
2314 		}
2315 	}
2316 
2317 	list_size = 0;
2318 }
2319 
flush_unmaps_timeout(unsigned long data)2320 static void flush_unmaps_timeout(unsigned long data)
2321 {
2322 	unsigned long flags;
2323 
2324 	spin_lock_irqsave(&async_umap_flush_lock, flags);
2325 	flush_unmaps();
2326 	spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2327 }
2328 
add_unmap(struct dmar_domain * dom,struct iova * iova)2329 static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2330 {
2331 	unsigned long flags;
2332 	int next, iommu_id;
2333 	struct intel_iommu *iommu;
2334 
2335 	spin_lock_irqsave(&async_umap_flush_lock, flags);
2336 	if (list_size == HIGH_WATER_MARK)
2337 		flush_unmaps();
2338 
2339 	iommu = domain_get_iommu(dom);
2340 	iommu_id = iommu->seq_id;
2341 
2342 	next = deferred_flush[iommu_id].next;
2343 	deferred_flush[iommu_id].domain[next] = dom;
2344 	deferred_flush[iommu_id].iova[next] = iova;
2345 	deferred_flush[iommu_id].next++;
2346 
2347 	if (!timer_on) {
2348 		mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2349 		timer_on = 1;
2350 	}
2351 	list_size++;
2352 	spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2353 }
2354 
intel_unmap_single(struct device * dev,dma_addr_t dev_addr,size_t size,int dir)2355 void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
2356 			int dir)
2357 {
2358 	struct pci_dev *pdev = to_pci_dev(dev);
2359 	struct dmar_domain *domain;
2360 	unsigned long start_addr;
2361 	struct iova *iova;
2362 	struct intel_iommu *iommu;
2363 
2364 	if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2365 		return;
2366 	domain = find_domain(pdev);
2367 	BUG_ON(!domain);
2368 
2369 	iommu = domain_get_iommu(domain);
2370 
2371 	iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
2372 	if (!iova)
2373 		return;
2374 
2375 	start_addr = iova->pfn_lo << PAGE_SHIFT;
2376 	size = aligned_size((u64)dev_addr, size);
2377 
2378 	pr_debug("Device %s unmapping: %lx@%llx\n",
2379 		pci_name(pdev), size, (unsigned long long)start_addr);
2380 
2381 	/*  clear the whole page */
2382 	dma_pte_clear_range(domain, start_addr, start_addr + size);
2383 	/* free page tables */
2384 	dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2385 	if (intel_iommu_strict) {
2386 		if (iommu_flush_iotlb_psi(iommu,
2387 			domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0))
2388 			iommu_flush_write_buffer(iommu);
2389 		/* free iova */
2390 		__free_iova(&domain->iovad, iova);
2391 	} else {
2392 		add_unmap(domain, iova);
2393 		/*
2394 		 * queue up the release of the unmap to save the 1/6th of the
2395 		 * cpu used up by the iotlb flush operation...
2396 		 */
2397 	}
2398 }
2399 
intel_alloc_coherent(struct device * hwdev,size_t size,dma_addr_t * dma_handle,gfp_t flags)2400 void *intel_alloc_coherent(struct device *hwdev, size_t size,
2401 			   dma_addr_t *dma_handle, gfp_t flags)
2402 {
2403 	void *vaddr;
2404 	int order;
2405 
2406 	size = PAGE_ALIGN(size);
2407 	order = get_order(size);
2408 	flags &= ~(GFP_DMA | GFP_DMA32);
2409 
2410 	vaddr = (void *)__get_free_pages(flags, order);
2411 	if (!vaddr)
2412 		return NULL;
2413 	memset(vaddr, 0, size);
2414 
2415 	*dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2416 					 DMA_BIDIRECTIONAL,
2417 					 hwdev->coherent_dma_mask);
2418 	if (*dma_handle)
2419 		return vaddr;
2420 	free_pages((unsigned long)vaddr, order);
2421 	return NULL;
2422 }
2423 
intel_free_coherent(struct device * hwdev,size_t size,void * vaddr,dma_addr_t dma_handle)2424 void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2425 			 dma_addr_t dma_handle)
2426 {
2427 	int order;
2428 
2429 	size = PAGE_ALIGN(size);
2430 	order = get_order(size);
2431 
2432 	intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
2433 	free_pages((unsigned long)vaddr, order);
2434 }
2435 
2436 #define SG_ENT_VIRT_ADDRESS(sg)	(sg_virt((sg)))
2437 
intel_unmap_sg(struct device * hwdev,struct scatterlist * sglist,int nelems,int dir)2438 void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2439 		    int nelems, int dir)
2440 {
2441 	int i;
2442 	struct pci_dev *pdev = to_pci_dev(hwdev);
2443 	struct dmar_domain *domain;
2444 	unsigned long start_addr;
2445 	struct iova *iova;
2446 	size_t size = 0;
2447 	void *addr;
2448 	struct scatterlist *sg;
2449 	struct intel_iommu *iommu;
2450 
2451 	if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2452 		return;
2453 
2454 	domain = find_domain(pdev);
2455 	BUG_ON(!domain);
2456 
2457 	iommu = domain_get_iommu(domain);
2458 
2459 	iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
2460 	if (!iova)
2461 		return;
2462 	for_each_sg(sglist, sg, nelems, i) {
2463 		addr = SG_ENT_VIRT_ADDRESS(sg);
2464 		size += aligned_size((u64)addr, sg->length);
2465 	}
2466 
2467 	start_addr = iova->pfn_lo << PAGE_SHIFT;
2468 
2469 	/*  clear the whole page */
2470 	dma_pte_clear_range(domain, start_addr, start_addr + size);
2471 	/* free page tables */
2472 	dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2473 
2474 	if (iommu_flush_iotlb_psi(iommu, domain->id, start_addr,
2475 			size >> VTD_PAGE_SHIFT, 0))
2476 		iommu_flush_write_buffer(iommu);
2477 
2478 	/* free iova */
2479 	__free_iova(&domain->iovad, iova);
2480 }
2481 
intel_nontranslate_map_sg(struct device * hddev,struct scatterlist * sglist,int nelems,int dir)2482 static int intel_nontranslate_map_sg(struct device *hddev,
2483 	struct scatterlist *sglist, int nelems, int dir)
2484 {
2485 	int i;
2486 	struct scatterlist *sg;
2487 
2488 	for_each_sg(sglist, sg, nelems, i) {
2489 		BUG_ON(!sg_page(sg));
2490 		sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
2491 		sg->dma_length = sg->length;
2492 	}
2493 	return nelems;
2494 }
2495 
intel_map_sg(struct device * hwdev,struct scatterlist * sglist,int nelems,int dir)2496 int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2497 		 int dir)
2498 {
2499 	void *addr;
2500 	int i;
2501 	struct pci_dev *pdev = to_pci_dev(hwdev);
2502 	struct dmar_domain *domain;
2503 	size_t size = 0;
2504 	int prot = 0;
2505 	size_t offset = 0;
2506 	struct iova *iova = NULL;
2507 	int ret;
2508 	struct scatterlist *sg;
2509 	unsigned long start_addr;
2510 	struct intel_iommu *iommu;
2511 
2512 	BUG_ON(dir == DMA_NONE);
2513 	if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2514 		return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
2515 
2516 	domain = get_valid_domain_for_dev(pdev);
2517 	if (!domain)
2518 		return 0;
2519 
2520 	iommu = domain_get_iommu(domain);
2521 
2522 	for_each_sg(sglist, sg, nelems, i) {
2523 		addr = SG_ENT_VIRT_ADDRESS(sg);
2524 		addr = (void *)virt_to_phys(addr);
2525 		size += aligned_size((u64)addr, sg->length);
2526 	}
2527 
2528 	iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
2529 	if (!iova) {
2530 		sglist->dma_length = 0;
2531 		return 0;
2532 	}
2533 
2534 	/*
2535 	 * Check if DMAR supports zero-length reads on write only
2536 	 * mappings..
2537 	 */
2538 	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2539 			!cap_zlr(iommu->cap))
2540 		prot |= DMA_PTE_READ;
2541 	if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2542 		prot |= DMA_PTE_WRITE;
2543 
2544 	start_addr = iova->pfn_lo << PAGE_SHIFT;
2545 	offset = 0;
2546 	for_each_sg(sglist, sg, nelems, i) {
2547 		addr = SG_ENT_VIRT_ADDRESS(sg);
2548 		addr = (void *)virt_to_phys(addr);
2549 		size = aligned_size((u64)addr, sg->length);
2550 		ret = domain_page_mapping(domain, start_addr + offset,
2551 			((u64)addr) & PAGE_MASK,
2552 			size, prot);
2553 		if (ret) {
2554 			/*  clear the page */
2555 			dma_pte_clear_range(domain, start_addr,
2556 				  start_addr + offset);
2557 			/* free page tables */
2558 			dma_pte_free_pagetable(domain, start_addr,
2559 				  start_addr + offset);
2560 			/* free iova */
2561 			__free_iova(&domain->iovad, iova);
2562 			return 0;
2563 		}
2564 		sg->dma_address = start_addr + offset +
2565 				((u64)addr & (~PAGE_MASK));
2566 		sg->dma_length = sg->length;
2567 		offset += size;
2568 	}
2569 
2570 	/* it's a non-present to present mapping */
2571 	if (iommu_flush_iotlb_psi(iommu, domain->id,
2572 			start_addr, offset >> VTD_PAGE_SHIFT, 1))
2573 		iommu_flush_write_buffer(iommu);
2574 	return nelems;
2575 }
2576 
2577 static struct dma_mapping_ops intel_dma_ops = {
2578 	.alloc_coherent = intel_alloc_coherent,
2579 	.free_coherent = intel_free_coherent,
2580 	.map_single = intel_map_single,
2581 	.unmap_single = intel_unmap_single,
2582 	.map_sg = intel_map_sg,
2583 	.unmap_sg = intel_unmap_sg,
2584 };
2585 
iommu_domain_cache_init(void)2586 static inline int iommu_domain_cache_init(void)
2587 {
2588 	int ret = 0;
2589 
2590 	iommu_domain_cache = kmem_cache_create("iommu_domain",
2591 					 sizeof(struct dmar_domain),
2592 					 0,
2593 					 SLAB_HWCACHE_ALIGN,
2594 
2595 					 NULL);
2596 	if (!iommu_domain_cache) {
2597 		printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2598 		ret = -ENOMEM;
2599 	}
2600 
2601 	return ret;
2602 }
2603 
iommu_devinfo_cache_init(void)2604 static inline int iommu_devinfo_cache_init(void)
2605 {
2606 	int ret = 0;
2607 
2608 	iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2609 					 sizeof(struct device_domain_info),
2610 					 0,
2611 					 SLAB_HWCACHE_ALIGN,
2612 					 NULL);
2613 	if (!iommu_devinfo_cache) {
2614 		printk(KERN_ERR "Couldn't create devinfo cache\n");
2615 		ret = -ENOMEM;
2616 	}
2617 
2618 	return ret;
2619 }
2620 
iommu_iova_cache_init(void)2621 static inline int iommu_iova_cache_init(void)
2622 {
2623 	int ret = 0;
2624 
2625 	iommu_iova_cache = kmem_cache_create("iommu_iova",
2626 					 sizeof(struct iova),
2627 					 0,
2628 					 SLAB_HWCACHE_ALIGN,
2629 					 NULL);
2630 	if (!iommu_iova_cache) {
2631 		printk(KERN_ERR "Couldn't create iova cache\n");
2632 		ret = -ENOMEM;
2633 	}
2634 
2635 	return ret;
2636 }
2637 
iommu_init_mempool(void)2638 static int __init iommu_init_mempool(void)
2639 {
2640 	int ret;
2641 	ret = iommu_iova_cache_init();
2642 	if (ret)
2643 		return ret;
2644 
2645 	ret = iommu_domain_cache_init();
2646 	if (ret)
2647 		goto domain_error;
2648 
2649 	ret = iommu_devinfo_cache_init();
2650 	if (!ret)
2651 		return ret;
2652 
2653 	kmem_cache_destroy(iommu_domain_cache);
2654 domain_error:
2655 	kmem_cache_destroy(iommu_iova_cache);
2656 
2657 	return -ENOMEM;
2658 }
2659 
iommu_exit_mempool(void)2660 static void __init iommu_exit_mempool(void)
2661 {
2662 	kmem_cache_destroy(iommu_devinfo_cache);
2663 	kmem_cache_destroy(iommu_domain_cache);
2664 	kmem_cache_destroy(iommu_iova_cache);
2665 
2666 }
2667 
init_no_remapping_devices(void)2668 static void __init init_no_remapping_devices(void)
2669 {
2670 	struct dmar_drhd_unit *drhd;
2671 
2672 	for_each_drhd_unit(drhd) {
2673 		if (!drhd->include_all) {
2674 			int i;
2675 			for (i = 0; i < drhd->devices_cnt; i++)
2676 				if (drhd->devices[i] != NULL)
2677 					break;
2678 			/* ignore DMAR unit if no pci devices exist */
2679 			if (i == drhd->devices_cnt)
2680 				drhd->ignored = 1;
2681 		}
2682 	}
2683 
2684 	if (dmar_map_gfx)
2685 		return;
2686 
2687 	for_each_drhd_unit(drhd) {
2688 		int i;
2689 		if (drhd->ignored || drhd->include_all)
2690 			continue;
2691 
2692 		for (i = 0; i < drhd->devices_cnt; i++)
2693 			if (drhd->devices[i] &&
2694 				!IS_GFX_DEVICE(drhd->devices[i]))
2695 				break;
2696 
2697 		if (i < drhd->devices_cnt)
2698 			continue;
2699 
2700 		/* bypass IOMMU if it is just for gfx devices */
2701 		drhd->ignored = 1;
2702 		for (i = 0; i < drhd->devices_cnt; i++) {
2703 			if (!drhd->devices[i])
2704 				continue;
2705 			drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
2706 		}
2707 	}
2708 }
2709 
intel_iommu_init(void)2710 int __init intel_iommu_init(void)
2711 {
2712 	int ret = 0;
2713 
2714 	if (dmar_table_init())
2715 		return 	-ENODEV;
2716 
2717 	if (dmar_dev_scope_init())
2718 		return 	-ENODEV;
2719 
2720 	/*
2721 	 * Check the need for DMA-remapping initialization now.
2722 	 * Above initialization will also be used by Interrupt-remapping.
2723 	 */
2724 	if (no_iommu || swiotlb || dmar_disabled)
2725 		return -ENODEV;
2726 
2727 	iommu_init_mempool();
2728 	dmar_init_reserved_ranges();
2729 
2730 	init_no_remapping_devices();
2731 
2732 	ret = init_dmars();
2733 	if (ret) {
2734 		printk(KERN_ERR "IOMMU: dmar init failed\n");
2735 		put_iova_domain(&reserved_iova_list);
2736 		iommu_exit_mempool();
2737 		return ret;
2738 	}
2739 	printk(KERN_INFO
2740 	"PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2741 
2742 	init_timer(&unmap_timer);
2743 	force_iommu = 1;
2744 	dma_ops = &intel_dma_ops;
2745 
2746 	register_iommu(&intel_iommu_ops);
2747 
2748 	return 0;
2749 }
2750 
vm_domain_add_dev_info(struct dmar_domain * domain,struct pci_dev * pdev)2751 static int vm_domain_add_dev_info(struct dmar_domain *domain,
2752 				  struct pci_dev *pdev)
2753 {
2754 	struct device_domain_info *info;
2755 	unsigned long flags;
2756 
2757 	info = alloc_devinfo_mem();
2758 	if (!info)
2759 		return -ENOMEM;
2760 
2761 	info->bus = pdev->bus->number;
2762 	info->devfn = pdev->devfn;
2763 	info->dev = pdev;
2764 	info->domain = domain;
2765 
2766 	spin_lock_irqsave(&device_domain_lock, flags);
2767 	list_add(&info->link, &domain->devices);
2768 	list_add(&info->global, &device_domain_list);
2769 	pdev->dev.archdata.iommu = info;
2770 	spin_unlock_irqrestore(&device_domain_lock, flags);
2771 
2772 	return 0;
2773 }
2774 
vm_domain_remove_one_dev_info(struct dmar_domain * domain,struct pci_dev * pdev)2775 static void vm_domain_remove_one_dev_info(struct dmar_domain *domain,
2776 					  struct pci_dev *pdev)
2777 {
2778 	struct device_domain_info *info;
2779 	struct intel_iommu *iommu;
2780 	unsigned long flags;
2781 	int found = 0;
2782 	struct list_head *entry, *tmp;
2783 
2784 	iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
2785 	if (!iommu)
2786 		return;
2787 
2788 	spin_lock_irqsave(&device_domain_lock, flags);
2789 	list_for_each_safe(entry, tmp, &domain->devices) {
2790 		info = list_entry(entry, struct device_domain_info, link);
2791 		if (info->bus == pdev->bus->number &&
2792 		    info->devfn == pdev->devfn) {
2793 			list_del(&info->link);
2794 			list_del(&info->global);
2795 			if (info->dev)
2796 				info->dev->dev.archdata.iommu = NULL;
2797 			spin_unlock_irqrestore(&device_domain_lock, flags);
2798 
2799 			iommu_detach_dev(iommu, info->bus, info->devfn);
2800 			free_devinfo_mem(info);
2801 
2802 			spin_lock_irqsave(&device_domain_lock, flags);
2803 
2804 			if (found)
2805 				break;
2806 			else
2807 				continue;
2808 		}
2809 
2810 		/* if there is no other devices under the same iommu
2811 		 * owned by this domain, clear this iommu in iommu_bmp
2812 		 * update iommu count and coherency
2813 		 */
2814 		if (device_to_iommu(info->bus, info->devfn) == iommu)
2815 			found = 1;
2816 	}
2817 
2818 	if (found == 0) {
2819 		unsigned long tmp_flags;
2820 		spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
2821 		clear_bit(iommu->seq_id, &domain->iommu_bmp);
2822 		domain->iommu_count--;
2823 		domain_update_iommu_coherency(domain);
2824 		spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
2825 	}
2826 
2827 	spin_unlock_irqrestore(&device_domain_lock, flags);
2828 }
2829 
vm_domain_remove_all_dev_info(struct dmar_domain * domain)2830 static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
2831 {
2832 	struct device_domain_info *info;
2833 	struct intel_iommu *iommu;
2834 	unsigned long flags1, flags2;
2835 
2836 	spin_lock_irqsave(&device_domain_lock, flags1);
2837 	while (!list_empty(&domain->devices)) {
2838 		info = list_entry(domain->devices.next,
2839 			struct device_domain_info, link);
2840 		list_del(&info->link);
2841 		list_del(&info->global);
2842 		if (info->dev)
2843 			info->dev->dev.archdata.iommu = NULL;
2844 
2845 		spin_unlock_irqrestore(&device_domain_lock, flags1);
2846 
2847 		iommu = device_to_iommu(info->bus, info->devfn);
2848 		iommu_detach_dev(iommu, info->bus, info->devfn);
2849 
2850 		/* clear this iommu in iommu_bmp, update iommu count
2851 		 * and coherency
2852 		 */
2853 		spin_lock_irqsave(&domain->iommu_lock, flags2);
2854 		if (test_and_clear_bit(iommu->seq_id,
2855 				       &domain->iommu_bmp)) {
2856 			domain->iommu_count--;
2857 			domain_update_iommu_coherency(domain);
2858 		}
2859 		spin_unlock_irqrestore(&domain->iommu_lock, flags2);
2860 
2861 		free_devinfo_mem(info);
2862 		spin_lock_irqsave(&device_domain_lock, flags1);
2863 	}
2864 	spin_unlock_irqrestore(&device_domain_lock, flags1);
2865 }
2866 
2867 /* domain id for virtual machine, it won't be set in context */
2868 static unsigned long vm_domid;
2869 
vm_domain_min_agaw(struct dmar_domain * domain)2870 static int vm_domain_min_agaw(struct dmar_domain *domain)
2871 {
2872 	int i;
2873 	int min_agaw = domain->agaw;
2874 
2875 	i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
2876 	for (; i < g_num_of_iommus; ) {
2877 		if (min_agaw > g_iommus[i]->agaw)
2878 			min_agaw = g_iommus[i]->agaw;
2879 
2880 		i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
2881 	}
2882 
2883 	return min_agaw;
2884 }
2885 
iommu_alloc_vm_domain(void)2886 static struct dmar_domain *iommu_alloc_vm_domain(void)
2887 {
2888 	struct dmar_domain *domain;
2889 
2890 	domain = alloc_domain_mem();
2891 	if (!domain)
2892 		return NULL;
2893 
2894 	domain->id = vm_domid++;
2895 	memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
2896 	domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
2897 
2898 	return domain;
2899 }
2900 
vm_domain_init(struct dmar_domain * domain,int guest_width)2901 static int vm_domain_init(struct dmar_domain *domain, int guest_width)
2902 {
2903 	int adjust_width;
2904 
2905 	init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
2906 	spin_lock_init(&domain->mapping_lock);
2907 	spin_lock_init(&domain->iommu_lock);
2908 
2909 	domain_reserve_special_ranges(domain);
2910 
2911 	/* calculate AGAW */
2912 	domain->gaw = guest_width;
2913 	adjust_width = guestwidth_to_adjustwidth(guest_width);
2914 	domain->agaw = width_to_agaw(adjust_width);
2915 
2916 	INIT_LIST_HEAD(&domain->devices);
2917 
2918 	domain->iommu_count = 0;
2919 	domain->iommu_coherency = 0;
2920 	domain->max_addr = 0;
2921 
2922 	/* always allocate the top pgd */
2923 	domain->pgd = (struct dma_pte *)alloc_pgtable_page();
2924 	if (!domain->pgd)
2925 		return -ENOMEM;
2926 	domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
2927 	return 0;
2928 }
2929 
iommu_free_vm_domain(struct dmar_domain * domain)2930 static void iommu_free_vm_domain(struct dmar_domain *domain)
2931 {
2932 	unsigned long flags;
2933 	struct dmar_drhd_unit *drhd;
2934 	struct intel_iommu *iommu;
2935 	unsigned long i;
2936 	unsigned long ndomains;
2937 
2938 	for_each_drhd_unit(drhd) {
2939 		if (drhd->ignored)
2940 			continue;
2941 		iommu = drhd->iommu;
2942 
2943 		ndomains = cap_ndoms(iommu->cap);
2944 		i = find_first_bit(iommu->domain_ids, ndomains);
2945 		for (; i < ndomains; ) {
2946 			if (iommu->domains[i] == domain) {
2947 				spin_lock_irqsave(&iommu->lock, flags);
2948 				clear_bit(i, iommu->domain_ids);
2949 				iommu->domains[i] = NULL;
2950 				spin_unlock_irqrestore(&iommu->lock, flags);
2951 				break;
2952 			}
2953 			i = find_next_bit(iommu->domain_ids, ndomains, i+1);
2954 		}
2955 	}
2956 }
2957 
vm_domain_exit(struct dmar_domain * domain)2958 static void vm_domain_exit(struct dmar_domain *domain)
2959 {
2960 	u64 end;
2961 
2962 	/* Domain 0 is reserved, so dont process it */
2963 	if (!domain)
2964 		return;
2965 
2966 	vm_domain_remove_all_dev_info(domain);
2967 	/* destroy iovas */
2968 	put_iova_domain(&domain->iovad);
2969 	end = DOMAIN_MAX_ADDR(domain->gaw);
2970 	end = end & (~VTD_PAGE_MASK);
2971 
2972 	/* clear ptes */
2973 	dma_pte_clear_range(domain, 0, end);
2974 
2975 	/* free page tables */
2976 	dma_pte_free_pagetable(domain, 0, end);
2977 
2978 	iommu_free_vm_domain(domain);
2979 	free_domain_mem(domain);
2980 }
2981 
intel_iommu_domain_init(struct iommu_domain * domain)2982 static int intel_iommu_domain_init(struct iommu_domain *domain)
2983 {
2984 	struct dmar_domain *dmar_domain;
2985 
2986 	dmar_domain = iommu_alloc_vm_domain();
2987 	if (!dmar_domain) {
2988 		printk(KERN_ERR
2989 			"intel_iommu_domain_init: dmar_domain == NULL\n");
2990 		return -ENOMEM;
2991 	}
2992 	if (vm_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2993 		printk(KERN_ERR
2994 			"intel_iommu_domain_init() failed\n");
2995 		vm_domain_exit(dmar_domain);
2996 		return -ENOMEM;
2997 	}
2998 	domain->priv = dmar_domain;
2999 
3000 	return 0;
3001 }
3002 
intel_iommu_domain_destroy(struct iommu_domain * domain)3003 static void intel_iommu_domain_destroy(struct iommu_domain *domain)
3004 {
3005 	struct dmar_domain *dmar_domain = domain->priv;
3006 
3007 	domain->priv = NULL;
3008 	vm_domain_exit(dmar_domain);
3009 }
3010 
intel_iommu_attach_device(struct iommu_domain * domain,struct device * dev)3011 static int intel_iommu_attach_device(struct iommu_domain *domain,
3012 				     struct device *dev)
3013 {
3014 	struct dmar_domain *dmar_domain = domain->priv;
3015 	struct pci_dev *pdev = to_pci_dev(dev);
3016 	struct intel_iommu *iommu;
3017 	int addr_width;
3018 	u64 end;
3019 	int ret;
3020 
3021 	/* normally pdev is not mapped */
3022 	if (unlikely(domain_context_mapped(pdev))) {
3023 		struct dmar_domain *old_domain;
3024 
3025 		old_domain = find_domain(pdev);
3026 		if (old_domain) {
3027 			if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
3028 				vm_domain_remove_one_dev_info(old_domain, pdev);
3029 			else
3030 				domain_remove_dev_info(old_domain);
3031 		}
3032 	}
3033 
3034 	iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
3035 	if (!iommu)
3036 		return -ENODEV;
3037 
3038 	/* check if this iommu agaw is sufficient for max mapped address */
3039 	addr_width = agaw_to_width(iommu->agaw);
3040 	end = DOMAIN_MAX_ADDR(addr_width);
3041 	end = end & VTD_PAGE_MASK;
3042 	if (end < dmar_domain->max_addr) {
3043 		printk(KERN_ERR "%s: iommu agaw (%d) is not "
3044 		       "sufficient for the mapped address (%llx)\n",
3045 		       __func__, iommu->agaw, dmar_domain->max_addr);
3046 		return -EFAULT;
3047 	}
3048 
3049 	ret = domain_context_mapping(dmar_domain, pdev);
3050 	if (ret)
3051 		return ret;
3052 
3053 	ret = vm_domain_add_dev_info(dmar_domain, pdev);
3054 	return ret;
3055 }
3056 
intel_iommu_detach_device(struct iommu_domain * domain,struct device * dev)3057 static void intel_iommu_detach_device(struct iommu_domain *domain,
3058 				      struct device *dev)
3059 {
3060 	struct dmar_domain *dmar_domain = domain->priv;
3061 	struct pci_dev *pdev = to_pci_dev(dev);
3062 
3063 	vm_domain_remove_one_dev_info(dmar_domain, pdev);
3064 }
3065 
intel_iommu_map_range(struct iommu_domain * domain,unsigned long iova,phys_addr_t hpa,size_t size,int iommu_prot)3066 static int intel_iommu_map_range(struct iommu_domain *domain,
3067 				 unsigned long iova, phys_addr_t hpa,
3068 				 size_t size, int iommu_prot)
3069 {
3070 	struct dmar_domain *dmar_domain = domain->priv;
3071 	u64 max_addr;
3072 	int addr_width;
3073 	int prot = 0;
3074 	int ret;
3075 
3076 	if (iommu_prot & IOMMU_READ)
3077 		prot |= DMA_PTE_READ;
3078 	if (iommu_prot & IOMMU_WRITE)
3079 		prot |= DMA_PTE_WRITE;
3080 
3081 	max_addr = (iova & VTD_PAGE_MASK) + VTD_PAGE_ALIGN(size);
3082 	if (dmar_domain->max_addr < max_addr) {
3083 		int min_agaw;
3084 		u64 end;
3085 
3086 		/* check if minimum agaw is sufficient for mapped address */
3087 		min_agaw = vm_domain_min_agaw(dmar_domain);
3088 		addr_width = agaw_to_width(min_agaw);
3089 		end = DOMAIN_MAX_ADDR(addr_width);
3090 		end = end & VTD_PAGE_MASK;
3091 		if (end < max_addr) {
3092 			printk(KERN_ERR "%s: iommu agaw (%d) is not "
3093 			       "sufficient for the mapped address (%llx)\n",
3094 			       __func__, min_agaw, max_addr);
3095 			return -EFAULT;
3096 		}
3097 		dmar_domain->max_addr = max_addr;
3098 	}
3099 
3100 	ret = domain_page_mapping(dmar_domain, iova, hpa, size, prot);
3101 	return ret;
3102 }
3103 
intel_iommu_unmap_range(struct iommu_domain * domain,unsigned long iova,size_t size)3104 static void intel_iommu_unmap_range(struct iommu_domain *domain,
3105 				    unsigned long iova, size_t size)
3106 {
3107 	struct dmar_domain *dmar_domain = domain->priv;
3108 	dma_addr_t base;
3109 
3110 	/* The address might not be aligned */
3111 	base = iova & VTD_PAGE_MASK;
3112 	size = VTD_PAGE_ALIGN(size);
3113 	dma_pte_clear_range(dmar_domain, base, base + size);
3114 
3115 	if (dmar_domain->max_addr == base + size)
3116 		dmar_domain->max_addr = base;
3117 }
3118 
intel_iommu_iova_to_phys(struct iommu_domain * domain,unsigned long iova)3119 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
3120 					    unsigned long iova)
3121 {
3122 	struct dmar_domain *dmar_domain = domain->priv;
3123 	struct dma_pte *pte;
3124 	u64 phys = 0;
3125 
3126 	pte = addr_to_dma_pte(dmar_domain, iova);
3127 	if (pte)
3128 		phys = dma_pte_addr(pte);
3129 
3130 	return phys;
3131 }
3132 
3133 static struct iommu_ops intel_iommu_ops = {
3134 	.domain_init	= intel_iommu_domain_init,
3135 	.domain_destroy = intel_iommu_domain_destroy,
3136 	.attach_dev	= intel_iommu_attach_device,
3137 	.detach_dev	= intel_iommu_detach_device,
3138 	.map		= intel_iommu_map_range,
3139 	.unmap		= intel_iommu_unmap_range,
3140 	.iova_to_phys	= intel_iommu_iova_to_phys,
3141 };
3142 
quirk_iommu_rwbf(struct pci_dev * dev)3143 static void __devinit quirk_iommu_rwbf(struct pci_dev *dev)
3144 {
3145 	/*
3146 	 * Mobile 4 Series Chipset neglects to set RWBF capability,
3147 	 * but needs it:
3148 	 */
3149 	printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
3150 	rwbf_quirk = 1;
3151 }
3152 
3153 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
3154