• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright © 2006-2014 Intel Corporation.
4  *
5  * Authors: David Woodhouse <dwmw2@infradead.org>,
6  *          Ashok Raj <ashok.raj@intel.com>,
7  *          Shaohua Li <shaohua.li@intel.com>,
8  *          Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
9  *          Fenghua Yu <fenghua.yu@intel.com>
10  *          Joerg Roedel <jroedel@suse.de>
11  */
12 
13 #define pr_fmt(fmt)     "DMAR: " fmt
14 #define dev_fmt(fmt)    pr_fmt(fmt)
15 
16 #include <linux/init.h>
17 #include <linux/bitmap.h>
18 #include <linux/debugfs.h>
19 #include <linux/export.h>
20 #include <linux/slab.h>
21 #include <linux/irq.h>
22 #include <linux/interrupt.h>
23 #include <linux/spinlock.h>
24 #include <linux/pci.h>
25 #include <linux/dmar.h>
26 #include <linux/dma-mapping.h>
27 #include <linux/mempool.h>
28 #include <linux/memory.h>
29 #include <linux/cpu.h>
30 #include <linux/timer.h>
31 #include <linux/io.h>
32 #include <linux/iova.h>
33 #include <linux/iommu.h>
34 #include <linux/intel-iommu.h>
35 #include <linux/syscore_ops.h>
36 #include <linux/tboot.h>
37 #include <linux/dmi.h>
38 #include <linux/pci-ats.h>
39 #include <linux/memblock.h>
40 #include <linux/dma-contiguous.h>
41 #include <linux/dma-direct.h>
42 #include <linux/crash_dump.h>
43 #include <linux/numa.h>
44 #include <linux/swiotlb.h>
45 #include <asm/irq_remapping.h>
46 #include <asm/cacheflush.h>
47 #include <asm/iommu.h>
48 #include <trace/events/intel_iommu.h>
49 
50 #include "irq_remapping.h"
51 #include "intel-pasid.h"
52 
53 #define ROOT_SIZE		VTD_PAGE_SIZE
54 #define CONTEXT_SIZE		VTD_PAGE_SIZE
55 
56 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
57 #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
58 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
59 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
60 
61 #define IOAPIC_RANGE_START	(0xfee00000)
62 #define IOAPIC_RANGE_END	(0xfeefffff)
63 #define IOVA_START_ADDR		(0x1000)
64 
65 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
66 
67 #define MAX_AGAW_WIDTH 64
68 #define MAX_AGAW_PFN_WIDTH	(MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
69 
70 #define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
71 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
72 
73 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
74    to match. That way, we can use 'unsigned long' for PFNs with impunity. */
75 #define DOMAIN_MAX_PFN(gaw)	((unsigned long) min_t(uint64_t, \
76 				__DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
77 #define DOMAIN_MAX_ADDR(gaw)	(((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
78 
79 /* IO virtual address start page frame number */
80 #define IOVA_START_PFN		(1)
81 
82 #define IOVA_PFN(addr)		((addr) >> PAGE_SHIFT)
83 
84 /* page table handling */
85 #define LEVEL_STRIDE		(9)
86 #define LEVEL_MASK		(((u64)1 << LEVEL_STRIDE) - 1)
87 
88 /*
89  * This bitmap is used to advertise the page sizes our hardware support
90  * to the IOMMU core, which will then use this information to split
91  * physically contiguous memory regions it is mapping into page sizes
92  * that we support.
93  *
94  * Traditionally the IOMMU core just handed us the mappings directly,
95  * after making sure the size is an order of a 4KiB page and that the
96  * mapping has natural alignment.
97  *
98  * To retain this behavior, we currently advertise that we support
99  * all page sizes that are an order of 4KiB.
100  *
101  * If at some point we'd like to utilize the IOMMU core's new behavior,
102  * we could change this to advertise the real page sizes we support.
103  */
104 #define INTEL_IOMMU_PGSIZES	(~0xFFFUL)
105 
agaw_to_level(int agaw)106 static inline int agaw_to_level(int agaw)
107 {
108 	return agaw + 2;
109 }
110 
agaw_to_width(int agaw)111 static inline int agaw_to_width(int agaw)
112 {
113 	return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
114 }
115 
width_to_agaw(int width)116 static inline int width_to_agaw(int width)
117 {
118 	return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
119 }
120 
level_to_offset_bits(int level)121 static inline unsigned int level_to_offset_bits(int level)
122 {
123 	return (level - 1) * LEVEL_STRIDE;
124 }
125 
pfn_level_offset(unsigned long pfn,int level)126 static inline int pfn_level_offset(unsigned long pfn, int level)
127 {
128 	return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
129 }
130 
level_mask(int level)131 static inline unsigned long level_mask(int level)
132 {
133 	return -1UL << level_to_offset_bits(level);
134 }
135 
level_size(int level)136 static inline unsigned long level_size(int level)
137 {
138 	return 1UL << level_to_offset_bits(level);
139 }
140 
align_to_level(unsigned long pfn,int level)141 static inline unsigned long align_to_level(unsigned long pfn, int level)
142 {
143 	return (pfn + level_size(level) - 1) & level_mask(level);
144 }
145 
lvl_to_nr_pages(unsigned int lvl)146 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
147 {
148 	return  1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
149 }
150 
151 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
152    are never going to work. */
dma_to_mm_pfn(unsigned long dma_pfn)153 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
154 {
155 	return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
156 }
157 
mm_to_dma_pfn(unsigned long mm_pfn)158 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
159 {
160 	return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
161 }
page_to_dma_pfn(struct page * pg)162 static inline unsigned long page_to_dma_pfn(struct page *pg)
163 {
164 	return mm_to_dma_pfn(page_to_pfn(pg));
165 }
virt_to_dma_pfn(void * p)166 static inline unsigned long virt_to_dma_pfn(void *p)
167 {
168 	return page_to_dma_pfn(virt_to_page(p));
169 }
170 
171 /* global iommu list, set NULL for ignored DMAR units */
172 static struct intel_iommu **g_iommus;
173 
174 static void __init check_tylersburg_isoch(void);
175 static int rwbf_quirk;
176 
177 /*
178  * set to 1 to panic kernel if can't successfully enable VT-d
179  * (used when kernel is launched w/ TXT)
180  */
181 static int force_on = 0;
182 int intel_iommu_tboot_noforce;
183 static int no_platform_optin;
184 
185 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
186 
187 /*
188  * Take a root_entry and return the Lower Context Table Pointer (LCTP)
189  * if marked present.
190  */
root_entry_lctp(struct root_entry * re)191 static phys_addr_t root_entry_lctp(struct root_entry *re)
192 {
193 	if (!(re->lo & 1))
194 		return 0;
195 
196 	return re->lo & VTD_PAGE_MASK;
197 }
198 
199 /*
200  * Take a root_entry and return the Upper Context Table Pointer (UCTP)
201  * if marked present.
202  */
root_entry_uctp(struct root_entry * re)203 static phys_addr_t root_entry_uctp(struct root_entry *re)
204 {
205 	if (!(re->hi & 1))
206 		return 0;
207 
208 	return re->hi & VTD_PAGE_MASK;
209 }
210 
context_clear_pasid_enable(struct context_entry * context)211 static inline void context_clear_pasid_enable(struct context_entry *context)
212 {
213 	context->lo &= ~(1ULL << 11);
214 }
215 
context_pasid_enabled(struct context_entry * context)216 static inline bool context_pasid_enabled(struct context_entry *context)
217 {
218 	return !!(context->lo & (1ULL << 11));
219 }
220 
context_set_copied(struct context_entry * context)221 static inline void context_set_copied(struct context_entry *context)
222 {
223 	context->hi |= (1ull << 3);
224 }
225 
context_copied(struct context_entry * context)226 static inline bool context_copied(struct context_entry *context)
227 {
228 	return !!(context->hi & (1ULL << 3));
229 }
230 
__context_present(struct context_entry * context)231 static inline bool __context_present(struct context_entry *context)
232 {
233 	return (context->lo & 1);
234 }
235 
context_present(struct context_entry * context)236 bool context_present(struct context_entry *context)
237 {
238 	return context_pasid_enabled(context) ?
239 	     __context_present(context) :
240 	     __context_present(context) && !context_copied(context);
241 }
242 
context_set_present(struct context_entry * context)243 static inline void context_set_present(struct context_entry *context)
244 {
245 	context->lo |= 1;
246 }
247 
context_set_fault_enable(struct context_entry * context)248 static inline void context_set_fault_enable(struct context_entry *context)
249 {
250 	context->lo &= (((u64)-1) << 2) | 1;
251 }
252 
context_set_translation_type(struct context_entry * context,unsigned long value)253 static inline void context_set_translation_type(struct context_entry *context,
254 						unsigned long value)
255 {
256 	context->lo &= (((u64)-1) << 4) | 3;
257 	context->lo |= (value & 3) << 2;
258 }
259 
context_set_address_root(struct context_entry * context,unsigned long value)260 static inline void context_set_address_root(struct context_entry *context,
261 					    unsigned long value)
262 {
263 	context->lo &= ~VTD_PAGE_MASK;
264 	context->lo |= value & VTD_PAGE_MASK;
265 }
266 
context_set_address_width(struct context_entry * context,unsigned long value)267 static inline void context_set_address_width(struct context_entry *context,
268 					     unsigned long value)
269 {
270 	context->hi |= value & 7;
271 }
272 
context_set_domain_id(struct context_entry * context,unsigned long value)273 static inline void context_set_domain_id(struct context_entry *context,
274 					 unsigned long value)
275 {
276 	context->hi |= (value & ((1 << 16) - 1)) << 8;
277 }
278 
context_domain_id(struct context_entry * c)279 static inline int context_domain_id(struct context_entry *c)
280 {
281 	return((c->hi >> 8) & 0xffff);
282 }
283 
context_clear_entry(struct context_entry * context)284 static inline void context_clear_entry(struct context_entry *context)
285 {
286 	context->lo = 0;
287 	context->hi = 0;
288 }
289 
290 /*
291  * This domain is a statically identity mapping domain.
292  *	1. This domain creats a static 1:1 mapping to all usable memory.
293  * 	2. It maps to each iommu if successful.
294  *	3. Each iommu mapps to this domain if successful.
295  */
296 static struct dmar_domain *si_domain;
297 static int hw_pass_through = 1;
298 
299 /* si_domain contains mulitple devices */
300 #define DOMAIN_FLAG_STATIC_IDENTITY		BIT(0)
301 
302 /*
303  * This is a DMA domain allocated through the iommu domain allocation
304  * interface. But one or more devices belonging to this domain have
305  * been chosen to use a private domain. We should avoid to use the
306  * map/unmap/iova_to_phys APIs on it.
307  */
308 #define DOMAIN_FLAG_LOSE_CHILDREN		BIT(1)
309 
310 #define for_each_domain_iommu(idx, domain)			\
311 	for (idx = 0; idx < g_num_of_iommus; idx++)		\
312 		if (domain->iommu_refcnt[idx])
313 
314 struct dmar_rmrr_unit {
315 	struct list_head list;		/* list of rmrr units	*/
316 	struct acpi_dmar_header *hdr;	/* ACPI header		*/
317 	u64	base_address;		/* reserved base address*/
318 	u64	end_address;		/* reserved end address */
319 	struct dmar_dev_scope *devices;	/* target devices */
320 	int	devices_cnt;		/* target device count */
321 };
322 
323 struct dmar_atsr_unit {
324 	struct list_head list;		/* list of ATSR units */
325 	struct acpi_dmar_header *hdr;	/* ACPI header */
326 	struct dmar_dev_scope *devices;	/* target devices */
327 	int devices_cnt;		/* target device count */
328 	u8 include_all:1;		/* include all ports */
329 };
330 
331 static LIST_HEAD(dmar_atsr_units);
332 static LIST_HEAD(dmar_rmrr_units);
333 
334 #define for_each_rmrr_units(rmrr) \
335 	list_for_each_entry(rmrr, &dmar_rmrr_units, list)
336 
337 /* bitmap for indexing intel_iommus */
338 static int g_num_of_iommus;
339 
340 static void domain_exit(struct dmar_domain *domain);
341 static void domain_remove_dev_info(struct dmar_domain *domain);
342 static void dmar_remove_one_dev_info(struct device *dev);
343 static void __dmar_remove_one_dev_info(struct device_domain_info *info);
344 static void domain_context_clear(struct intel_iommu *iommu,
345 				 struct device *dev);
346 static int domain_detach_iommu(struct dmar_domain *domain,
347 			       struct intel_iommu *iommu);
348 static bool device_is_rmrr_locked(struct device *dev);
349 static int intel_iommu_attach_device(struct iommu_domain *domain,
350 				     struct device *dev);
351 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
352 					    dma_addr_t iova);
353 
354 #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
355 int dmar_disabled = 0;
356 #else
357 int dmar_disabled = 1;
358 #endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
359 
360 int intel_iommu_sm;
361 int intel_iommu_enabled = 0;
362 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
363 
364 static int dmar_map_gfx = 1;
365 static int dmar_forcedac;
366 static int intel_iommu_strict;
367 static int intel_iommu_superpage = 1;
368 static int iommu_identity_mapping;
369 static int intel_no_bounce;
370 
371 #define IDENTMAP_ALL		1
372 #define IDENTMAP_GFX		2
373 #define IDENTMAP_AZALIA		4
374 
375 int intel_iommu_gfx_mapped;
376 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
377 
378 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
379 #define DEFER_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-2))
380 static DEFINE_SPINLOCK(device_domain_lock);
381 static LIST_HEAD(device_domain_list);
382 
383 #define device_needs_bounce(d) (!intel_no_bounce && dev_is_pci(d) &&	\
384 				to_pci_dev(d)->untrusted)
385 
386 /*
387  * Iterate over elements in device_domain_list and call the specified
388  * callback @fn against each element.
389  */
for_each_device_domain(int (* fn)(struct device_domain_info * info,void * data),void * data)390 int for_each_device_domain(int (*fn)(struct device_domain_info *info,
391 				     void *data), void *data)
392 {
393 	int ret = 0;
394 	unsigned long flags;
395 	struct device_domain_info *info;
396 
397 	spin_lock_irqsave(&device_domain_lock, flags);
398 	list_for_each_entry(info, &device_domain_list, global) {
399 		ret = fn(info, data);
400 		if (ret) {
401 			spin_unlock_irqrestore(&device_domain_lock, flags);
402 			return ret;
403 		}
404 	}
405 	spin_unlock_irqrestore(&device_domain_lock, flags);
406 
407 	return 0;
408 }
409 
410 const struct iommu_ops intel_iommu_ops;
411 
translation_pre_enabled(struct intel_iommu * iommu)412 static bool translation_pre_enabled(struct intel_iommu *iommu)
413 {
414 	return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
415 }
416 
clear_translation_pre_enabled(struct intel_iommu * iommu)417 static void clear_translation_pre_enabled(struct intel_iommu *iommu)
418 {
419 	iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
420 }
421 
init_translation_status(struct intel_iommu * iommu)422 static void init_translation_status(struct intel_iommu *iommu)
423 {
424 	u32 gsts;
425 
426 	gsts = readl(iommu->reg + DMAR_GSTS_REG);
427 	if (gsts & DMA_GSTS_TES)
428 		iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
429 }
430 
431 /* Convert generic 'struct iommu_domain to private struct dmar_domain */
to_dmar_domain(struct iommu_domain * dom)432 static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
433 {
434 	return container_of(dom, struct dmar_domain, domain);
435 }
436 
intel_iommu_setup(char * str)437 static int __init intel_iommu_setup(char *str)
438 {
439 	if (!str)
440 		return -EINVAL;
441 	while (*str) {
442 		if (!strncmp(str, "on", 2)) {
443 			dmar_disabled = 0;
444 			pr_info("IOMMU enabled\n");
445 		} else if (!strncmp(str, "off", 3)) {
446 			dmar_disabled = 1;
447 			no_platform_optin = 1;
448 			pr_info("IOMMU disabled\n");
449 		} else if (!strncmp(str, "igfx_off", 8)) {
450 			dmar_map_gfx = 0;
451 			pr_info("Disable GFX device mapping\n");
452 		} else if (!strncmp(str, "forcedac", 8)) {
453 			pr_info("Forcing DAC for PCI devices\n");
454 			dmar_forcedac = 1;
455 		} else if (!strncmp(str, "strict", 6)) {
456 			pr_info("Disable batched IOTLB flush\n");
457 			intel_iommu_strict = 1;
458 		} else if (!strncmp(str, "sp_off", 6)) {
459 			pr_info("Disable supported super page\n");
460 			intel_iommu_superpage = 0;
461 		} else if (!strncmp(str, "sm_on", 5)) {
462 			pr_info("Intel-IOMMU: scalable mode supported\n");
463 			intel_iommu_sm = 1;
464 		} else if (!strncmp(str, "tboot_noforce", 13)) {
465 			printk(KERN_INFO
466 				"Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
467 			intel_iommu_tboot_noforce = 1;
468 		} else if (!strncmp(str, "nobounce", 8)) {
469 			pr_info("Intel-IOMMU: No bounce buffer. This could expose security risks of DMA attacks\n");
470 			intel_no_bounce = 1;
471 		}
472 
473 		str += strcspn(str, ",");
474 		while (*str == ',')
475 			str++;
476 	}
477 	return 0;
478 }
479 __setup("intel_iommu=", intel_iommu_setup);
480 
481 static struct kmem_cache *iommu_domain_cache;
482 static struct kmem_cache *iommu_devinfo_cache;
483 
get_iommu_domain(struct intel_iommu * iommu,u16 did)484 static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
485 {
486 	struct dmar_domain **domains;
487 	int idx = did >> 8;
488 
489 	domains = iommu->domains[idx];
490 	if (!domains)
491 		return NULL;
492 
493 	return domains[did & 0xff];
494 }
495 
set_iommu_domain(struct intel_iommu * iommu,u16 did,struct dmar_domain * domain)496 static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
497 			     struct dmar_domain *domain)
498 {
499 	struct dmar_domain **domains;
500 	int idx = did >> 8;
501 
502 	if (!iommu->domains[idx]) {
503 		size_t size = 256 * sizeof(struct dmar_domain *);
504 		iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
505 	}
506 
507 	domains = iommu->domains[idx];
508 	if (WARN_ON(!domains))
509 		return;
510 	else
511 		domains[did & 0xff] = domain;
512 }
513 
alloc_pgtable_page(int node)514 void *alloc_pgtable_page(int node)
515 {
516 	struct page *page;
517 	void *vaddr = NULL;
518 
519 	page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
520 	if (page)
521 		vaddr = page_address(page);
522 	return vaddr;
523 }
524 
free_pgtable_page(void * vaddr)525 void free_pgtable_page(void *vaddr)
526 {
527 	free_page((unsigned long)vaddr);
528 }
529 
alloc_domain_mem(void)530 static inline void *alloc_domain_mem(void)
531 {
532 	return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
533 }
534 
free_domain_mem(void * vaddr)535 static void free_domain_mem(void *vaddr)
536 {
537 	kmem_cache_free(iommu_domain_cache, vaddr);
538 }
539 
alloc_devinfo_mem(void)540 static inline void * alloc_devinfo_mem(void)
541 {
542 	return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
543 }
544 
free_devinfo_mem(void * vaddr)545 static inline void free_devinfo_mem(void *vaddr)
546 {
547 	kmem_cache_free(iommu_devinfo_cache, vaddr);
548 }
549 
domain_type_is_si(struct dmar_domain * domain)550 static inline int domain_type_is_si(struct dmar_domain *domain)
551 {
552 	return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
553 }
554 
domain_pfn_supported(struct dmar_domain * domain,unsigned long pfn)555 static inline int domain_pfn_supported(struct dmar_domain *domain,
556 				       unsigned long pfn)
557 {
558 	int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
559 
560 	return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
561 }
562 
__iommu_calculate_agaw(struct intel_iommu * iommu,int max_gaw)563 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
564 {
565 	unsigned long sagaw;
566 	int agaw = -1;
567 
568 	sagaw = cap_sagaw(iommu->cap);
569 	for (agaw = width_to_agaw(max_gaw);
570 	     agaw >= 0; agaw--) {
571 		if (test_bit(agaw, &sagaw))
572 			break;
573 	}
574 
575 	return agaw;
576 }
577 
578 /*
579  * Calculate max SAGAW for each iommu.
580  */
iommu_calculate_max_sagaw(struct intel_iommu * iommu)581 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
582 {
583 	return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
584 }
585 
586 /*
587  * calculate agaw for each iommu.
588  * "SAGAW" may be different across iommus, use a default agaw, and
589  * get a supported less agaw for iommus that don't support the default agaw.
590  */
iommu_calculate_agaw(struct intel_iommu * iommu)591 int iommu_calculate_agaw(struct intel_iommu *iommu)
592 {
593 	return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
594 }
595 
596 /* This functionin only returns single iommu in a domain */
domain_get_iommu(struct dmar_domain * domain)597 struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
598 {
599 	int iommu_id;
600 
601 	/* si_domain and vm domain should not get here. */
602 	if (WARN_ON(domain->domain.type != IOMMU_DOMAIN_DMA))
603 		return NULL;
604 
605 	for_each_domain_iommu(iommu_id, domain)
606 		break;
607 
608 	if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
609 		return NULL;
610 
611 	return g_iommus[iommu_id];
612 }
613 
domain_update_iommu_coherency(struct dmar_domain * domain)614 static void domain_update_iommu_coherency(struct dmar_domain *domain)
615 {
616 	struct dmar_drhd_unit *drhd;
617 	struct intel_iommu *iommu;
618 	bool found = false;
619 	int i;
620 
621 	domain->iommu_coherency = 1;
622 
623 	for_each_domain_iommu(i, domain) {
624 		found = true;
625 		if (!ecap_coherent(g_iommus[i]->ecap)) {
626 			domain->iommu_coherency = 0;
627 			break;
628 		}
629 	}
630 	if (found)
631 		return;
632 
633 	/* No hardware attached; use lowest common denominator */
634 	rcu_read_lock();
635 	for_each_active_iommu(iommu, drhd) {
636 		if (!ecap_coherent(iommu->ecap)) {
637 			domain->iommu_coherency = 0;
638 			break;
639 		}
640 	}
641 	rcu_read_unlock();
642 }
643 
domain_update_iommu_snooping(struct intel_iommu * skip)644 static int domain_update_iommu_snooping(struct intel_iommu *skip)
645 {
646 	struct dmar_drhd_unit *drhd;
647 	struct intel_iommu *iommu;
648 	int ret = 1;
649 
650 	rcu_read_lock();
651 	for_each_active_iommu(iommu, drhd) {
652 		if (iommu != skip) {
653 			if (!ecap_sc_support(iommu->ecap)) {
654 				ret = 0;
655 				break;
656 			}
657 		}
658 	}
659 	rcu_read_unlock();
660 
661 	return ret;
662 }
663 
domain_update_iommu_superpage(struct intel_iommu * skip)664 static int domain_update_iommu_superpage(struct intel_iommu *skip)
665 {
666 	struct dmar_drhd_unit *drhd;
667 	struct intel_iommu *iommu;
668 	int mask = 0xf;
669 
670 	if (!intel_iommu_superpage) {
671 		return 0;
672 	}
673 
674 	/* set iommu_superpage to the smallest common denominator */
675 	rcu_read_lock();
676 	for_each_active_iommu(iommu, drhd) {
677 		if (iommu != skip) {
678 			mask &= cap_super_page_val(iommu->cap);
679 			if (!mask)
680 				break;
681 		}
682 	}
683 	rcu_read_unlock();
684 
685 	return fls(mask);
686 }
687 
688 /* Some capabilities may be different across iommus */
domain_update_iommu_cap(struct dmar_domain * domain)689 static void domain_update_iommu_cap(struct dmar_domain *domain)
690 {
691 	domain_update_iommu_coherency(domain);
692 	domain->iommu_snooping = domain_update_iommu_snooping(NULL);
693 	domain->iommu_superpage = domain_update_iommu_superpage(NULL);
694 }
695 
iommu_context_addr(struct intel_iommu * iommu,u8 bus,u8 devfn,int alloc)696 struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
697 					 u8 devfn, int alloc)
698 {
699 	struct root_entry *root = &iommu->root_entry[bus];
700 	struct context_entry *context;
701 	u64 *entry;
702 
703 	entry = &root->lo;
704 	if (sm_supported(iommu)) {
705 		if (devfn >= 0x80) {
706 			devfn -= 0x80;
707 			entry = &root->hi;
708 		}
709 		devfn *= 2;
710 	}
711 	if (*entry & 1)
712 		context = phys_to_virt(*entry & VTD_PAGE_MASK);
713 	else {
714 		unsigned long phy_addr;
715 		if (!alloc)
716 			return NULL;
717 
718 		context = alloc_pgtable_page(iommu->node);
719 		if (!context)
720 			return NULL;
721 
722 		__iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
723 		phy_addr = virt_to_phys((void *)context);
724 		*entry = phy_addr | 1;
725 		__iommu_flush_cache(iommu, entry, sizeof(*entry));
726 	}
727 	return &context[devfn];
728 }
729 
iommu_dummy(struct device * dev)730 static int iommu_dummy(struct device *dev)
731 {
732 	return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
733 }
734 
735 /**
736  * is_downstream_to_pci_bridge - test if a device belongs to the PCI
737  *				 sub-hierarchy of a candidate PCI-PCI bridge
738  * @dev: candidate PCI device belonging to @bridge PCI sub-hierarchy
739  * @bridge: the candidate PCI-PCI bridge
740  *
741  * Return: true if @dev belongs to @bridge PCI sub-hierarchy, else false.
742  */
743 static bool
is_downstream_to_pci_bridge(struct device * dev,struct device * bridge)744 is_downstream_to_pci_bridge(struct device *dev, struct device *bridge)
745 {
746 	struct pci_dev *pdev, *pbridge;
747 
748 	if (!dev_is_pci(dev) || !dev_is_pci(bridge))
749 		return false;
750 
751 	pdev = to_pci_dev(dev);
752 	pbridge = to_pci_dev(bridge);
753 
754 	if (pbridge->subordinate &&
755 	    pbridge->subordinate->number <= pdev->bus->number &&
756 	    pbridge->subordinate->busn_res.end >= pdev->bus->number)
757 		return true;
758 
759 	return false;
760 }
761 
device_to_iommu(struct device * dev,u8 * bus,u8 * devfn)762 static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
763 {
764 	struct dmar_drhd_unit *drhd = NULL;
765 	struct intel_iommu *iommu;
766 	struct device *tmp;
767 	struct pci_dev *pdev = NULL;
768 	u16 segment = 0;
769 	int i;
770 
771 	if (iommu_dummy(dev))
772 		return NULL;
773 
774 	if (dev_is_pci(dev)) {
775 		struct pci_dev *pf_pdev;
776 
777 		pdev = to_pci_dev(dev);
778 
779 #ifdef CONFIG_X86
780 		/* VMD child devices currently cannot be handled individually */
781 		if (is_vmd(pdev->bus))
782 			return NULL;
783 #endif
784 
785 		/* VFs aren't listed in scope tables; we need to look up
786 		 * the PF instead to find the IOMMU. */
787 		pf_pdev = pci_physfn(pdev);
788 		dev = &pf_pdev->dev;
789 		segment = pci_domain_nr(pdev->bus);
790 	} else if (has_acpi_companion(dev))
791 		dev = &ACPI_COMPANION(dev)->dev;
792 
793 	rcu_read_lock();
794 	for_each_active_iommu(iommu, drhd) {
795 		if (pdev && segment != drhd->segment)
796 			continue;
797 
798 		for_each_active_dev_scope(drhd->devices,
799 					  drhd->devices_cnt, i, tmp) {
800 			if (tmp == dev) {
801 				/* For a VF use its original BDF# not that of the PF
802 				 * which we used for the IOMMU lookup. Strictly speaking
803 				 * we could do this for all PCI devices; we only need to
804 				 * get the BDF# from the scope table for ACPI matches. */
805 				if (pdev && pdev->is_virtfn)
806 					goto got_pdev;
807 
808 				*bus = drhd->devices[i].bus;
809 				*devfn = drhd->devices[i].devfn;
810 				goto out;
811 			}
812 
813 			if (is_downstream_to_pci_bridge(dev, tmp))
814 				goto got_pdev;
815 		}
816 
817 		if (pdev && drhd->include_all) {
818 		got_pdev:
819 			*bus = pdev->bus->number;
820 			*devfn = pdev->devfn;
821 			goto out;
822 		}
823 	}
824 	iommu = NULL;
825  out:
826 	rcu_read_unlock();
827 
828 	return iommu;
829 }
830 
domain_flush_cache(struct dmar_domain * domain,void * addr,int size)831 static void domain_flush_cache(struct dmar_domain *domain,
832 			       void *addr, int size)
833 {
834 	if (!domain->iommu_coherency)
835 		clflush_cache_range(addr, size);
836 }
837 
device_context_mapped(struct intel_iommu * iommu,u8 bus,u8 devfn)838 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
839 {
840 	struct context_entry *context;
841 	int ret = 0;
842 	unsigned long flags;
843 
844 	spin_lock_irqsave(&iommu->lock, flags);
845 	context = iommu_context_addr(iommu, bus, devfn, 0);
846 	if (context)
847 		ret = context_present(context);
848 	spin_unlock_irqrestore(&iommu->lock, flags);
849 	return ret;
850 }
851 
free_context_table(struct intel_iommu * iommu)852 static void free_context_table(struct intel_iommu *iommu)
853 {
854 	int i;
855 	unsigned long flags;
856 	struct context_entry *context;
857 
858 	spin_lock_irqsave(&iommu->lock, flags);
859 	if (!iommu->root_entry) {
860 		goto out;
861 	}
862 	for (i = 0; i < ROOT_ENTRY_NR; i++) {
863 		context = iommu_context_addr(iommu, i, 0, 0);
864 		if (context)
865 			free_pgtable_page(context);
866 
867 		if (!sm_supported(iommu))
868 			continue;
869 
870 		context = iommu_context_addr(iommu, i, 0x80, 0);
871 		if (context)
872 			free_pgtable_page(context);
873 
874 	}
875 	free_pgtable_page(iommu->root_entry);
876 	iommu->root_entry = NULL;
877 out:
878 	spin_unlock_irqrestore(&iommu->lock, flags);
879 }
880 
pfn_to_dma_pte(struct dmar_domain * domain,unsigned long pfn,int * target_level)881 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
882 				      unsigned long pfn, int *target_level)
883 {
884 	struct dma_pte *parent, *pte;
885 	int level = agaw_to_level(domain->agaw);
886 	int offset;
887 
888 	BUG_ON(!domain->pgd);
889 
890 	if (!domain_pfn_supported(domain, pfn))
891 		/* Address beyond IOMMU's addressing capabilities. */
892 		return NULL;
893 
894 	parent = domain->pgd;
895 
896 	while (1) {
897 		void *tmp_page;
898 
899 		offset = pfn_level_offset(pfn, level);
900 		pte = &parent[offset];
901 		if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
902 			break;
903 		if (level == *target_level)
904 			break;
905 
906 		if (!dma_pte_present(pte)) {
907 			uint64_t pteval;
908 
909 			tmp_page = alloc_pgtable_page(domain->nid);
910 
911 			if (!tmp_page)
912 				return NULL;
913 
914 			domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
915 			pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
916 			if (cmpxchg64(&pte->val, 0ULL, pteval))
917 				/* Someone else set it while we were thinking; use theirs. */
918 				free_pgtable_page(tmp_page);
919 			else
920 				domain_flush_cache(domain, pte, sizeof(*pte));
921 		}
922 		if (level == 1)
923 			break;
924 
925 		parent = phys_to_virt(dma_pte_addr(pte));
926 		level--;
927 	}
928 
929 	if (!*target_level)
930 		*target_level = level;
931 
932 	return pte;
933 }
934 
935 /* return address's pte at specific level */
dma_pfn_level_pte(struct dmar_domain * domain,unsigned long pfn,int level,int * large_page)936 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
937 					 unsigned long pfn,
938 					 int level, int *large_page)
939 {
940 	struct dma_pte *parent, *pte;
941 	int total = agaw_to_level(domain->agaw);
942 	int offset;
943 
944 	parent = domain->pgd;
945 	while (level <= total) {
946 		offset = pfn_level_offset(pfn, total);
947 		pte = &parent[offset];
948 		if (level == total)
949 			return pte;
950 
951 		if (!dma_pte_present(pte)) {
952 			*large_page = total;
953 			break;
954 		}
955 
956 		if (dma_pte_superpage(pte)) {
957 			*large_page = total;
958 			return pte;
959 		}
960 
961 		parent = phys_to_virt(dma_pte_addr(pte));
962 		total--;
963 	}
964 	return NULL;
965 }
966 
967 /* clear last level pte, a tlb flush should be followed */
dma_pte_clear_range(struct dmar_domain * domain,unsigned long start_pfn,unsigned long last_pfn)968 static void dma_pte_clear_range(struct dmar_domain *domain,
969 				unsigned long start_pfn,
970 				unsigned long last_pfn)
971 {
972 	unsigned int large_page;
973 	struct dma_pte *first_pte, *pte;
974 
975 	BUG_ON(!domain_pfn_supported(domain, start_pfn));
976 	BUG_ON(!domain_pfn_supported(domain, last_pfn));
977 	BUG_ON(start_pfn > last_pfn);
978 
979 	/* we don't need lock here; nobody else touches the iova range */
980 	do {
981 		large_page = 1;
982 		first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
983 		if (!pte) {
984 			start_pfn = align_to_level(start_pfn + 1, large_page + 1);
985 			continue;
986 		}
987 		do {
988 			dma_clear_pte(pte);
989 			start_pfn += lvl_to_nr_pages(large_page);
990 			pte++;
991 		} while (start_pfn <= last_pfn && !first_pte_in_page(pte));
992 
993 		domain_flush_cache(domain, first_pte,
994 				   (void *)pte - (void *)first_pte);
995 
996 	} while (start_pfn && start_pfn <= last_pfn);
997 }
998 
dma_pte_free_level(struct dmar_domain * domain,int level,int retain_level,struct dma_pte * pte,unsigned long pfn,unsigned long start_pfn,unsigned long last_pfn)999 static void dma_pte_free_level(struct dmar_domain *domain, int level,
1000 			       int retain_level, struct dma_pte *pte,
1001 			       unsigned long pfn, unsigned long start_pfn,
1002 			       unsigned long last_pfn)
1003 {
1004 	pfn = max(start_pfn, pfn);
1005 	pte = &pte[pfn_level_offset(pfn, level)];
1006 
1007 	do {
1008 		unsigned long level_pfn;
1009 		struct dma_pte *level_pte;
1010 
1011 		if (!dma_pte_present(pte) || dma_pte_superpage(pte))
1012 			goto next;
1013 
1014 		level_pfn = pfn & level_mask(level);
1015 		level_pte = phys_to_virt(dma_pte_addr(pte));
1016 
1017 		if (level > 2) {
1018 			dma_pte_free_level(domain, level - 1, retain_level,
1019 					   level_pte, level_pfn, start_pfn,
1020 					   last_pfn);
1021 		}
1022 
1023 		/*
1024 		 * Free the page table if we're below the level we want to
1025 		 * retain and the range covers the entire table.
1026 		 */
1027 		if (level < retain_level && !(start_pfn > level_pfn ||
1028 		      last_pfn < level_pfn + level_size(level) - 1)) {
1029 			dma_clear_pte(pte);
1030 			domain_flush_cache(domain, pte, sizeof(*pte));
1031 			free_pgtable_page(level_pte);
1032 		}
1033 next:
1034 		pfn += level_size(level);
1035 	} while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1036 }
1037 
1038 /*
1039  * clear last level (leaf) ptes and free page table pages below the
1040  * level we wish to keep intact.
1041  */
dma_pte_free_pagetable(struct dmar_domain * domain,unsigned long start_pfn,unsigned long last_pfn,int retain_level)1042 static void dma_pte_free_pagetable(struct dmar_domain *domain,
1043 				   unsigned long start_pfn,
1044 				   unsigned long last_pfn,
1045 				   int retain_level)
1046 {
1047 	BUG_ON(!domain_pfn_supported(domain, start_pfn));
1048 	BUG_ON(!domain_pfn_supported(domain, last_pfn));
1049 	BUG_ON(start_pfn > last_pfn);
1050 
1051 	dma_pte_clear_range(domain, start_pfn, last_pfn);
1052 
1053 	/* We don't need lock here; nobody else touches the iova range */
1054 	dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level,
1055 			   domain->pgd, 0, start_pfn, last_pfn);
1056 
1057 	/* free pgd */
1058 	if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1059 		free_pgtable_page(domain->pgd);
1060 		domain->pgd = NULL;
1061 	}
1062 }
1063 
1064 /* When a page at a given level is being unlinked from its parent, we don't
1065    need to *modify* it at all. All we need to do is make a list of all the
1066    pages which can be freed just as soon as we've flushed the IOTLB and we
1067    know the hardware page-walk will no longer touch them.
1068    The 'pte' argument is the *parent* PTE, pointing to the page that is to
1069    be freed. */
dma_pte_list_pagetables(struct dmar_domain * domain,int level,struct dma_pte * pte,struct page * freelist)1070 static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1071 					    int level, struct dma_pte *pte,
1072 					    struct page *freelist)
1073 {
1074 	struct page *pg;
1075 
1076 	pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1077 	pg->freelist = freelist;
1078 	freelist = pg;
1079 
1080 	if (level == 1)
1081 		return freelist;
1082 
1083 	pte = page_address(pg);
1084 	do {
1085 		if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1086 			freelist = dma_pte_list_pagetables(domain, level - 1,
1087 							   pte, freelist);
1088 		pte++;
1089 	} while (!first_pte_in_page(pte));
1090 
1091 	return freelist;
1092 }
1093 
dma_pte_clear_level(struct dmar_domain * domain,int level,struct dma_pte * pte,unsigned long pfn,unsigned long start_pfn,unsigned long last_pfn,struct page * freelist)1094 static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1095 					struct dma_pte *pte, unsigned long pfn,
1096 					unsigned long start_pfn,
1097 					unsigned long last_pfn,
1098 					struct page *freelist)
1099 {
1100 	struct dma_pte *first_pte = NULL, *last_pte = NULL;
1101 
1102 	pfn = max(start_pfn, pfn);
1103 	pte = &pte[pfn_level_offset(pfn, level)];
1104 
1105 	do {
1106 		unsigned long level_pfn;
1107 
1108 		if (!dma_pte_present(pte))
1109 			goto next;
1110 
1111 		level_pfn = pfn & level_mask(level);
1112 
1113 		/* If range covers entire pagetable, free it */
1114 		if (start_pfn <= level_pfn &&
1115 		    last_pfn >= level_pfn + level_size(level) - 1) {
1116 			/* These suborbinate page tables are going away entirely. Don't
1117 			   bother to clear them; we're just going to *free* them. */
1118 			if (level > 1 && !dma_pte_superpage(pte))
1119 				freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1120 
1121 			dma_clear_pte(pte);
1122 			if (!first_pte)
1123 				first_pte = pte;
1124 			last_pte = pte;
1125 		} else if (level > 1) {
1126 			/* Recurse down into a level that isn't *entirely* obsolete */
1127 			freelist = dma_pte_clear_level(domain, level - 1,
1128 						       phys_to_virt(dma_pte_addr(pte)),
1129 						       level_pfn, start_pfn, last_pfn,
1130 						       freelist);
1131 		}
1132 next:
1133 		pfn += level_size(level);
1134 	} while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1135 
1136 	if (first_pte)
1137 		domain_flush_cache(domain, first_pte,
1138 				   (void *)++last_pte - (void *)first_pte);
1139 
1140 	return freelist;
1141 }
1142 
1143 /* We can't just free the pages because the IOMMU may still be walking
1144    the page tables, and may have cached the intermediate levels. The
1145    pages can only be freed after the IOTLB flush has been done. */
domain_unmap(struct dmar_domain * domain,unsigned long start_pfn,unsigned long last_pfn)1146 static struct page *domain_unmap(struct dmar_domain *domain,
1147 				 unsigned long start_pfn,
1148 				 unsigned long last_pfn)
1149 {
1150 	struct page *freelist;
1151 
1152 	BUG_ON(!domain_pfn_supported(domain, start_pfn));
1153 	BUG_ON(!domain_pfn_supported(domain, last_pfn));
1154 	BUG_ON(start_pfn > last_pfn);
1155 
1156 	/* we don't need lock here; nobody else touches the iova range */
1157 	freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1158 				       domain->pgd, 0, start_pfn, last_pfn, NULL);
1159 
1160 	/* free pgd */
1161 	if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1162 		struct page *pgd_page = virt_to_page(domain->pgd);
1163 		pgd_page->freelist = freelist;
1164 		freelist = pgd_page;
1165 
1166 		domain->pgd = NULL;
1167 	}
1168 
1169 	return freelist;
1170 }
1171 
dma_free_pagelist(struct page * freelist)1172 static void dma_free_pagelist(struct page *freelist)
1173 {
1174 	struct page *pg;
1175 
1176 	while ((pg = freelist)) {
1177 		freelist = pg->freelist;
1178 		free_pgtable_page(page_address(pg));
1179 	}
1180 }
1181 
iova_entry_free(unsigned long data)1182 static void iova_entry_free(unsigned long data)
1183 {
1184 	struct page *freelist = (struct page *)data;
1185 
1186 	dma_free_pagelist(freelist);
1187 }
1188 
1189 /* iommu handling */
iommu_alloc_root_entry(struct intel_iommu * iommu)1190 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1191 {
1192 	struct root_entry *root;
1193 	unsigned long flags;
1194 
1195 	root = (struct root_entry *)alloc_pgtable_page(iommu->node);
1196 	if (!root) {
1197 		pr_err("Allocating root entry for %s failed\n",
1198 			iommu->name);
1199 		return -ENOMEM;
1200 	}
1201 
1202 	__iommu_flush_cache(iommu, root, ROOT_SIZE);
1203 
1204 	spin_lock_irqsave(&iommu->lock, flags);
1205 	iommu->root_entry = root;
1206 	spin_unlock_irqrestore(&iommu->lock, flags);
1207 
1208 	return 0;
1209 }
1210 
iommu_set_root_entry(struct intel_iommu * iommu)1211 static void iommu_set_root_entry(struct intel_iommu *iommu)
1212 {
1213 	u64 addr;
1214 	u32 sts;
1215 	unsigned long flag;
1216 
1217 	addr = virt_to_phys(iommu->root_entry);
1218 	if (sm_supported(iommu))
1219 		addr |= DMA_RTADDR_SMT;
1220 
1221 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1222 	dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
1223 
1224 	writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1225 
1226 	/* Make sure hardware complete it */
1227 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1228 		      readl, (sts & DMA_GSTS_RTPS), sts);
1229 
1230 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1231 }
1232 
iommu_flush_write_buffer(struct intel_iommu * iommu)1233 void iommu_flush_write_buffer(struct intel_iommu *iommu)
1234 {
1235 	u32 val;
1236 	unsigned long flag;
1237 
1238 	if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1239 		return;
1240 
1241 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1242 	writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1243 
1244 	/* Make sure hardware complete it */
1245 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1246 		      readl, (!(val & DMA_GSTS_WBFS)), val);
1247 
1248 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1249 }
1250 
1251 /* return value determine if we need a write buffer flush */
__iommu_flush_context(struct intel_iommu * iommu,u16 did,u16 source_id,u8 function_mask,u64 type)1252 static void __iommu_flush_context(struct intel_iommu *iommu,
1253 				  u16 did, u16 source_id, u8 function_mask,
1254 				  u64 type)
1255 {
1256 	u64 val = 0;
1257 	unsigned long flag;
1258 
1259 	switch (type) {
1260 	case DMA_CCMD_GLOBAL_INVL:
1261 		val = DMA_CCMD_GLOBAL_INVL;
1262 		break;
1263 	case DMA_CCMD_DOMAIN_INVL:
1264 		val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1265 		break;
1266 	case DMA_CCMD_DEVICE_INVL:
1267 		val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1268 			| DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1269 		break;
1270 	default:
1271 		BUG();
1272 	}
1273 	val |= DMA_CCMD_ICC;
1274 
1275 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1276 	dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1277 
1278 	/* Make sure hardware complete it */
1279 	IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1280 		dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1281 
1282 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1283 }
1284 
1285 /* return value determine if we need a write buffer flush */
__iommu_flush_iotlb(struct intel_iommu * iommu,u16 did,u64 addr,unsigned int size_order,u64 type)1286 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1287 				u64 addr, unsigned int size_order, u64 type)
1288 {
1289 	int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1290 	u64 val = 0, val_iva = 0;
1291 	unsigned long flag;
1292 
1293 	switch (type) {
1294 	case DMA_TLB_GLOBAL_FLUSH:
1295 		/* global flush doesn't need set IVA_REG */
1296 		val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1297 		break;
1298 	case DMA_TLB_DSI_FLUSH:
1299 		val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1300 		break;
1301 	case DMA_TLB_PSI_FLUSH:
1302 		val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1303 		/* IH bit is passed in as part of address */
1304 		val_iva = size_order | addr;
1305 		break;
1306 	default:
1307 		BUG();
1308 	}
1309 	/* Note: set drain read/write */
1310 #if 0
1311 	/*
1312 	 * This is probably to be super secure.. Looks like we can
1313 	 * ignore it without any impact.
1314 	 */
1315 	if (cap_read_drain(iommu->cap))
1316 		val |= DMA_TLB_READ_DRAIN;
1317 #endif
1318 	if (cap_write_drain(iommu->cap))
1319 		val |= DMA_TLB_WRITE_DRAIN;
1320 
1321 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1322 	/* Note: Only uses first TLB reg currently */
1323 	if (val_iva)
1324 		dmar_writeq(iommu->reg + tlb_offset, val_iva);
1325 	dmar_writeq(iommu->reg + tlb_offset + 8, val);
1326 
1327 	/* Make sure hardware complete it */
1328 	IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1329 		dmar_readq, (!(val & DMA_TLB_IVT)), val);
1330 
1331 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1332 
1333 	/* check IOTLB invalidation granularity */
1334 	if (DMA_TLB_IAIG(val) == 0)
1335 		pr_err("Flush IOTLB failed\n");
1336 	if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1337 		pr_debug("TLB flush request %Lx, actual %Lx\n",
1338 			(unsigned long long)DMA_TLB_IIRG(type),
1339 			(unsigned long long)DMA_TLB_IAIG(val));
1340 }
1341 
1342 static struct device_domain_info *
iommu_support_dev_iotlb(struct dmar_domain * domain,struct intel_iommu * iommu,u8 bus,u8 devfn)1343 iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1344 			 u8 bus, u8 devfn)
1345 {
1346 	struct device_domain_info *info;
1347 
1348 	assert_spin_locked(&device_domain_lock);
1349 
1350 	if (!iommu->qi)
1351 		return NULL;
1352 
1353 	list_for_each_entry(info, &domain->devices, link)
1354 		if (info->iommu == iommu && info->bus == bus &&
1355 		    info->devfn == devfn) {
1356 			if (info->ats_supported && info->dev)
1357 				return info;
1358 			break;
1359 		}
1360 
1361 	return NULL;
1362 }
1363 
domain_update_iotlb(struct dmar_domain * domain)1364 static void domain_update_iotlb(struct dmar_domain *domain)
1365 {
1366 	struct device_domain_info *info;
1367 	bool has_iotlb_device = false;
1368 
1369 	assert_spin_locked(&device_domain_lock);
1370 
1371 	list_for_each_entry(info, &domain->devices, link) {
1372 		struct pci_dev *pdev;
1373 
1374 		if (!info->dev || !dev_is_pci(info->dev))
1375 			continue;
1376 
1377 		pdev = to_pci_dev(info->dev);
1378 		if (pdev->ats_enabled) {
1379 			has_iotlb_device = true;
1380 			break;
1381 		}
1382 	}
1383 
1384 	domain->has_iotlb_device = has_iotlb_device;
1385 }
1386 
iommu_enable_dev_iotlb(struct device_domain_info * info)1387 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1388 {
1389 	struct pci_dev *pdev;
1390 
1391 	assert_spin_locked(&device_domain_lock);
1392 
1393 	if (!info || !dev_is_pci(info->dev))
1394 		return;
1395 
1396 	pdev = to_pci_dev(info->dev);
1397 	/* For IOMMU that supports device IOTLB throttling (DIT), we assign
1398 	 * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge
1399 	 * queue depth at PF level. If DIT is not set, PFSID will be treated as
1400 	 * reserved, which should be set to 0.
1401 	 */
1402 	if (!ecap_dit(info->iommu->ecap))
1403 		info->pfsid = 0;
1404 	else {
1405 		struct pci_dev *pf_pdev;
1406 
1407 		/* pdev will be returned if device is not a vf */
1408 		pf_pdev = pci_physfn(pdev);
1409 		info->pfsid = pci_dev_id(pf_pdev);
1410 	}
1411 
1412 #ifdef CONFIG_INTEL_IOMMU_SVM
1413 	/* The PCIe spec, in its wisdom, declares that the behaviour of
1414 	   the device if you enable PASID support after ATS support is
1415 	   undefined. So always enable PASID support on devices which
1416 	   have it, even if we can't yet know if we're ever going to
1417 	   use it. */
1418 	if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1419 		info->pasid_enabled = 1;
1420 
1421 	if (info->pri_supported &&
1422 	    (info->pasid_enabled ? pci_prg_resp_pasid_required(pdev) : 1)  &&
1423 	    !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
1424 		info->pri_enabled = 1;
1425 #endif
1426 	if (!pdev->untrusted && info->ats_supported &&
1427 	    pci_ats_page_aligned(pdev) &&
1428 	    !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
1429 		info->ats_enabled = 1;
1430 		domain_update_iotlb(info->domain);
1431 		info->ats_qdep = pci_ats_queue_depth(pdev);
1432 	}
1433 }
1434 
iommu_disable_dev_iotlb(struct device_domain_info * info)1435 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1436 {
1437 	struct pci_dev *pdev;
1438 
1439 	assert_spin_locked(&device_domain_lock);
1440 
1441 	if (!dev_is_pci(info->dev))
1442 		return;
1443 
1444 	pdev = to_pci_dev(info->dev);
1445 
1446 	if (info->ats_enabled) {
1447 		pci_disable_ats(pdev);
1448 		info->ats_enabled = 0;
1449 		domain_update_iotlb(info->domain);
1450 	}
1451 #ifdef CONFIG_INTEL_IOMMU_SVM
1452 	if (info->pri_enabled) {
1453 		pci_disable_pri(pdev);
1454 		info->pri_enabled = 0;
1455 	}
1456 	if (info->pasid_enabled) {
1457 		pci_disable_pasid(pdev);
1458 		info->pasid_enabled = 0;
1459 	}
1460 #endif
1461 }
1462 
iommu_flush_dev_iotlb(struct dmar_domain * domain,u64 addr,unsigned mask)1463 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1464 				  u64 addr, unsigned mask)
1465 {
1466 	u16 sid, qdep;
1467 	unsigned long flags;
1468 	struct device_domain_info *info;
1469 
1470 	if (!domain->has_iotlb_device)
1471 		return;
1472 
1473 	spin_lock_irqsave(&device_domain_lock, flags);
1474 	list_for_each_entry(info, &domain->devices, link) {
1475 		if (!info->ats_enabled)
1476 			continue;
1477 
1478 		sid = info->bus << 8 | info->devfn;
1479 		qdep = info->ats_qdep;
1480 		qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
1481 				qdep, addr, mask);
1482 	}
1483 	spin_unlock_irqrestore(&device_domain_lock, flags);
1484 }
1485 
iommu_flush_iotlb_psi(struct intel_iommu * iommu,struct dmar_domain * domain,unsigned long pfn,unsigned int pages,int ih,int map)1486 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1487 				  struct dmar_domain *domain,
1488 				  unsigned long pfn, unsigned int pages,
1489 				  int ih, int map)
1490 {
1491 	unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1492 	uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1493 	u16 did = domain->iommu_did[iommu->seq_id];
1494 
1495 	BUG_ON(pages == 0);
1496 
1497 	if (ih)
1498 		ih = 1 << 6;
1499 	/*
1500 	 * Fallback to domain selective flush if no PSI support or the size is
1501 	 * too big.
1502 	 * PSI requires page size to be 2 ^ x, and the base address is naturally
1503 	 * aligned to the size
1504 	 */
1505 	if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1506 		iommu->flush.flush_iotlb(iommu, did, 0, 0,
1507 						DMA_TLB_DSI_FLUSH);
1508 	else
1509 		iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1510 						DMA_TLB_PSI_FLUSH);
1511 
1512 	/*
1513 	 * In caching mode, changes of pages from non-present to present require
1514 	 * flush. However, device IOTLB doesn't need to be flushed in this case.
1515 	 */
1516 	if (!cap_caching_mode(iommu->cap) || !map)
1517 		iommu_flush_dev_iotlb(domain, addr, mask);
1518 }
1519 
1520 /* Notification for newly created mappings */
__mapping_notify_one(struct intel_iommu * iommu,struct dmar_domain * domain,unsigned long pfn,unsigned int pages)1521 static inline void __mapping_notify_one(struct intel_iommu *iommu,
1522 					struct dmar_domain *domain,
1523 					unsigned long pfn, unsigned int pages)
1524 {
1525 	/* It's a non-present to present mapping. Only flush if caching mode */
1526 	if (cap_caching_mode(iommu->cap))
1527 		iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1);
1528 	else
1529 		iommu_flush_write_buffer(iommu);
1530 }
1531 
iommu_flush_iova(struct iova_domain * iovad)1532 static void iommu_flush_iova(struct iova_domain *iovad)
1533 {
1534 	struct dmar_domain *domain;
1535 	int idx;
1536 
1537 	domain = container_of(iovad, struct dmar_domain, iovad);
1538 
1539 	for_each_domain_iommu(idx, domain) {
1540 		struct intel_iommu *iommu = g_iommus[idx];
1541 		u16 did = domain->iommu_did[iommu->seq_id];
1542 
1543 		iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
1544 
1545 		if (!cap_caching_mode(iommu->cap))
1546 			iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1547 					      0, MAX_AGAW_PFN_WIDTH);
1548 	}
1549 }
1550 
iommu_disable_protect_mem_regions(struct intel_iommu * iommu)1551 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1552 {
1553 	u32 pmen;
1554 	unsigned long flags;
1555 
1556 	if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap))
1557 		return;
1558 
1559 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
1560 	pmen = readl(iommu->reg + DMAR_PMEN_REG);
1561 	pmen &= ~DMA_PMEN_EPM;
1562 	writel(pmen, iommu->reg + DMAR_PMEN_REG);
1563 
1564 	/* wait for the protected region status bit to clear */
1565 	IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1566 		readl, !(pmen & DMA_PMEN_PRS), pmen);
1567 
1568 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1569 }
1570 
iommu_enable_translation(struct intel_iommu * iommu)1571 static void iommu_enable_translation(struct intel_iommu *iommu)
1572 {
1573 	u32 sts;
1574 	unsigned long flags;
1575 
1576 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
1577 	iommu->gcmd |= DMA_GCMD_TE;
1578 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1579 
1580 	/* Make sure hardware complete it */
1581 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1582 		      readl, (sts & DMA_GSTS_TES), sts);
1583 
1584 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1585 }
1586 
iommu_disable_translation(struct intel_iommu * iommu)1587 static void iommu_disable_translation(struct intel_iommu *iommu)
1588 {
1589 	u32 sts;
1590 	unsigned long flag;
1591 
1592 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1593 	iommu->gcmd &= ~DMA_GCMD_TE;
1594 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1595 
1596 	/* Make sure hardware complete it */
1597 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1598 		      readl, (!(sts & DMA_GSTS_TES)), sts);
1599 
1600 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1601 }
1602 
iommu_init_domains(struct intel_iommu * iommu)1603 static int iommu_init_domains(struct intel_iommu *iommu)
1604 {
1605 	u32 ndomains, nlongs;
1606 	size_t size;
1607 
1608 	ndomains = cap_ndoms(iommu->cap);
1609 	pr_debug("%s: Number of Domains supported <%d>\n",
1610 		 iommu->name, ndomains);
1611 	nlongs = BITS_TO_LONGS(ndomains);
1612 
1613 	spin_lock_init(&iommu->lock);
1614 
1615 	iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1616 	if (!iommu->domain_ids) {
1617 		pr_err("%s: Allocating domain id array failed\n",
1618 		       iommu->name);
1619 		return -ENOMEM;
1620 	}
1621 
1622 	size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **);
1623 	iommu->domains = kzalloc(size, GFP_KERNEL);
1624 
1625 	if (iommu->domains) {
1626 		size = 256 * sizeof(struct dmar_domain *);
1627 		iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1628 	}
1629 
1630 	if (!iommu->domains || !iommu->domains[0]) {
1631 		pr_err("%s: Allocating domain array failed\n",
1632 		       iommu->name);
1633 		kfree(iommu->domain_ids);
1634 		kfree(iommu->domains);
1635 		iommu->domain_ids = NULL;
1636 		iommu->domains    = NULL;
1637 		return -ENOMEM;
1638 	}
1639 
1640 	/*
1641 	 * If Caching mode is set, then invalid translations are tagged
1642 	 * with domain-id 0, hence we need to pre-allocate it. We also
1643 	 * use domain-id 0 as a marker for non-allocated domain-id, so
1644 	 * make sure it is not used for a real domain.
1645 	 */
1646 	set_bit(0, iommu->domain_ids);
1647 
1648 	/*
1649 	 * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid
1650 	 * entry for first-level or pass-through translation modes should
1651 	 * be programmed with a domain id different from those used for
1652 	 * second-level or nested translation. We reserve a domain id for
1653 	 * this purpose.
1654 	 */
1655 	if (sm_supported(iommu))
1656 		set_bit(FLPT_DEFAULT_DID, iommu->domain_ids);
1657 
1658 	return 0;
1659 }
1660 
disable_dmar_iommu(struct intel_iommu * iommu)1661 static void disable_dmar_iommu(struct intel_iommu *iommu)
1662 {
1663 	struct device_domain_info *info, *tmp;
1664 	unsigned long flags;
1665 
1666 	if (!iommu->domains || !iommu->domain_ids)
1667 		return;
1668 
1669 	spin_lock_irqsave(&device_domain_lock, flags);
1670 	list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
1671 		if (info->iommu != iommu)
1672 			continue;
1673 
1674 		if (!info->dev || !info->domain)
1675 			continue;
1676 
1677 		__dmar_remove_one_dev_info(info);
1678 	}
1679 	spin_unlock_irqrestore(&device_domain_lock, flags);
1680 
1681 	if (iommu->gcmd & DMA_GCMD_TE)
1682 		iommu_disable_translation(iommu);
1683 }
1684 
free_dmar_iommu(struct intel_iommu * iommu)1685 static void free_dmar_iommu(struct intel_iommu *iommu)
1686 {
1687 	if ((iommu->domains) && (iommu->domain_ids)) {
1688 		int elems = ALIGN(cap_ndoms(iommu->cap), 256) >> 8;
1689 		int i;
1690 
1691 		for (i = 0; i < elems; i++)
1692 			kfree(iommu->domains[i]);
1693 		kfree(iommu->domains);
1694 		kfree(iommu->domain_ids);
1695 		iommu->domains = NULL;
1696 		iommu->domain_ids = NULL;
1697 	}
1698 
1699 	g_iommus[iommu->seq_id] = NULL;
1700 
1701 	/* free context mapping */
1702 	free_context_table(iommu);
1703 
1704 #ifdef CONFIG_INTEL_IOMMU_SVM
1705 	if (pasid_supported(iommu)) {
1706 		if (ecap_prs(iommu->ecap))
1707 			intel_svm_finish_prq(iommu);
1708 	}
1709 #endif
1710 }
1711 
alloc_domain(int flags)1712 static struct dmar_domain *alloc_domain(int flags)
1713 {
1714 	struct dmar_domain *domain;
1715 
1716 	domain = alloc_domain_mem();
1717 	if (!domain)
1718 		return NULL;
1719 
1720 	memset(domain, 0, sizeof(*domain));
1721 	domain->nid = NUMA_NO_NODE;
1722 	domain->flags = flags;
1723 	domain->has_iotlb_device = false;
1724 	INIT_LIST_HEAD(&domain->devices);
1725 
1726 	return domain;
1727 }
1728 
1729 /* Must be called with iommu->lock */
domain_attach_iommu(struct dmar_domain * domain,struct intel_iommu * iommu)1730 static int domain_attach_iommu(struct dmar_domain *domain,
1731 			       struct intel_iommu *iommu)
1732 {
1733 	unsigned long ndomains;
1734 	int num;
1735 
1736 	assert_spin_locked(&device_domain_lock);
1737 	assert_spin_locked(&iommu->lock);
1738 
1739 	domain->iommu_refcnt[iommu->seq_id] += 1;
1740 	domain->iommu_count += 1;
1741 	if (domain->iommu_refcnt[iommu->seq_id] == 1) {
1742 		ndomains = cap_ndoms(iommu->cap);
1743 		num      = find_first_zero_bit(iommu->domain_ids, ndomains);
1744 
1745 		if (num >= ndomains) {
1746 			pr_err("%s: No free domain ids\n", iommu->name);
1747 			domain->iommu_refcnt[iommu->seq_id] -= 1;
1748 			domain->iommu_count -= 1;
1749 			return -ENOSPC;
1750 		}
1751 
1752 		set_bit(num, iommu->domain_ids);
1753 		set_iommu_domain(iommu, num, domain);
1754 
1755 		domain->iommu_did[iommu->seq_id] = num;
1756 		domain->nid			 = iommu->node;
1757 
1758 		domain_update_iommu_cap(domain);
1759 	}
1760 
1761 	return 0;
1762 }
1763 
domain_detach_iommu(struct dmar_domain * domain,struct intel_iommu * iommu)1764 static int domain_detach_iommu(struct dmar_domain *domain,
1765 			       struct intel_iommu *iommu)
1766 {
1767 	int num, count;
1768 
1769 	assert_spin_locked(&device_domain_lock);
1770 	assert_spin_locked(&iommu->lock);
1771 
1772 	domain->iommu_refcnt[iommu->seq_id] -= 1;
1773 	count = --domain->iommu_count;
1774 	if (domain->iommu_refcnt[iommu->seq_id] == 0) {
1775 		num = domain->iommu_did[iommu->seq_id];
1776 		clear_bit(num, iommu->domain_ids);
1777 		set_iommu_domain(iommu, num, NULL);
1778 
1779 		domain_update_iommu_cap(domain);
1780 		domain->iommu_did[iommu->seq_id] = 0;
1781 	}
1782 
1783 	return count;
1784 }
1785 
1786 static struct iova_domain reserved_iova_list;
1787 static struct lock_class_key reserved_rbtree_key;
1788 
dmar_init_reserved_ranges(void)1789 static int dmar_init_reserved_ranges(void)
1790 {
1791 	struct pci_dev *pdev = NULL;
1792 	struct iova *iova;
1793 	int i;
1794 
1795 	init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN);
1796 
1797 	lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1798 		&reserved_rbtree_key);
1799 
1800 	/* IOAPIC ranges shouldn't be accessed by DMA */
1801 	iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1802 		IOVA_PFN(IOAPIC_RANGE_END));
1803 	if (!iova) {
1804 		pr_err("Reserve IOAPIC range failed\n");
1805 		return -ENODEV;
1806 	}
1807 
1808 	/* Reserve all PCI MMIO to avoid peer-to-peer access */
1809 	for_each_pci_dev(pdev) {
1810 		struct resource *r;
1811 
1812 		for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1813 			r = &pdev->resource[i];
1814 			if (!r->flags || !(r->flags & IORESOURCE_MEM))
1815 				continue;
1816 			iova = reserve_iova(&reserved_iova_list,
1817 					    IOVA_PFN(r->start),
1818 					    IOVA_PFN(r->end));
1819 			if (!iova) {
1820 				pci_err(pdev, "Reserve iova for %pR failed\n", r);
1821 				return -ENODEV;
1822 			}
1823 		}
1824 	}
1825 	return 0;
1826 }
1827 
domain_reserve_special_ranges(struct dmar_domain * domain)1828 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1829 {
1830 	copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1831 }
1832 
guestwidth_to_adjustwidth(int gaw)1833 static inline int guestwidth_to_adjustwidth(int gaw)
1834 {
1835 	int agaw;
1836 	int r = (gaw - 12) % 9;
1837 
1838 	if (r == 0)
1839 		agaw = gaw;
1840 	else
1841 		agaw = gaw + 9 - r;
1842 	if (agaw > 64)
1843 		agaw = 64;
1844 	return agaw;
1845 }
1846 
domain_init(struct dmar_domain * domain,struct intel_iommu * iommu,int guest_width)1847 static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
1848 		       int guest_width)
1849 {
1850 	int adjust_width, agaw;
1851 	unsigned long sagaw;
1852 	int err;
1853 
1854 	init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
1855 
1856 	err = init_iova_flush_queue(&domain->iovad,
1857 				    iommu_flush_iova, iova_entry_free);
1858 	if (err)
1859 		return err;
1860 
1861 	domain_reserve_special_ranges(domain);
1862 
1863 	/* calculate AGAW */
1864 	if (guest_width > cap_mgaw(iommu->cap))
1865 		guest_width = cap_mgaw(iommu->cap);
1866 	domain->gaw = guest_width;
1867 	adjust_width = guestwidth_to_adjustwidth(guest_width);
1868 	agaw = width_to_agaw(adjust_width);
1869 	sagaw = cap_sagaw(iommu->cap);
1870 	if (!test_bit(agaw, &sagaw)) {
1871 		/* hardware doesn't support it, choose a bigger one */
1872 		pr_debug("Hardware doesn't support agaw %d\n", agaw);
1873 		agaw = find_next_bit(&sagaw, 5, agaw);
1874 		if (agaw >= 5)
1875 			return -ENODEV;
1876 	}
1877 	domain->agaw = agaw;
1878 
1879 	if (ecap_coherent(iommu->ecap))
1880 		domain->iommu_coherency = 1;
1881 	else
1882 		domain->iommu_coherency = 0;
1883 
1884 	if (ecap_sc_support(iommu->ecap))
1885 		domain->iommu_snooping = 1;
1886 	else
1887 		domain->iommu_snooping = 0;
1888 
1889 	if (intel_iommu_superpage)
1890 		domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1891 	else
1892 		domain->iommu_superpage = 0;
1893 
1894 	domain->nid = iommu->node;
1895 
1896 	/* always allocate the top pgd */
1897 	domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1898 	if (!domain->pgd)
1899 		return -ENOMEM;
1900 	__iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1901 	return 0;
1902 }
1903 
domain_exit(struct dmar_domain * domain)1904 static void domain_exit(struct dmar_domain *domain)
1905 {
1906 
1907 	/* Remove associated devices and clear attached or cached domains */
1908 	domain_remove_dev_info(domain);
1909 
1910 	/* destroy iovas */
1911 	put_iova_domain(&domain->iovad);
1912 
1913 	if (domain->pgd) {
1914 		struct page *freelist;
1915 
1916 		freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1917 		dma_free_pagelist(freelist);
1918 	}
1919 
1920 	free_domain_mem(domain);
1921 }
1922 
1923 /*
1924  * Get the PASID directory size for scalable mode context entry.
1925  * Value of X in the PDTS field of a scalable mode context entry
1926  * indicates PASID directory with 2^(X + 7) entries.
1927  */
context_get_sm_pds(struct pasid_table * table)1928 static inline unsigned long context_get_sm_pds(struct pasid_table *table)
1929 {
1930 	int pds, max_pde;
1931 
1932 	max_pde = table->max_pasid >> PASID_PDE_SHIFT;
1933 	pds = find_first_bit((unsigned long *)&max_pde, MAX_NR_PASID_BITS);
1934 	if (pds < 7)
1935 		return 0;
1936 
1937 	return pds - 7;
1938 }
1939 
1940 /*
1941  * Set the RID_PASID field of a scalable mode context entry. The
1942  * IOMMU hardware will use the PASID value set in this field for
1943  * DMA translations of DMA requests without PASID.
1944  */
1945 static inline void
context_set_sm_rid2pasid(struct context_entry * context,unsigned long pasid)1946 context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid)
1947 {
1948 	context->hi |= pasid & ((1 << 20) - 1);
1949 	context->hi |= (1 << 20);
1950 }
1951 
1952 /*
1953  * Set the DTE(Device-TLB Enable) field of a scalable mode context
1954  * entry.
1955  */
context_set_sm_dte(struct context_entry * context)1956 static inline void context_set_sm_dte(struct context_entry *context)
1957 {
1958 	context->lo |= (1 << 2);
1959 }
1960 
1961 /*
1962  * Set the PRE(Page Request Enable) field of a scalable mode context
1963  * entry.
1964  */
context_set_sm_pre(struct context_entry * context)1965 static inline void context_set_sm_pre(struct context_entry *context)
1966 {
1967 	context->lo |= (1 << 4);
1968 }
1969 
1970 /* Convert value to context PASID directory size field coding. */
1971 #define context_pdts(pds)	(((pds) & 0x7) << 9)
1972 
domain_context_mapping_one(struct dmar_domain * domain,struct intel_iommu * iommu,struct pasid_table * table,u8 bus,u8 devfn)1973 static int domain_context_mapping_one(struct dmar_domain *domain,
1974 				      struct intel_iommu *iommu,
1975 				      struct pasid_table *table,
1976 				      u8 bus, u8 devfn)
1977 {
1978 	u16 did = domain->iommu_did[iommu->seq_id];
1979 	int translation = CONTEXT_TT_MULTI_LEVEL;
1980 	struct device_domain_info *info = NULL;
1981 	struct context_entry *context;
1982 	unsigned long flags;
1983 	int ret;
1984 
1985 	WARN_ON(did == 0);
1986 
1987 	if (hw_pass_through && domain_type_is_si(domain))
1988 		translation = CONTEXT_TT_PASS_THROUGH;
1989 
1990 	pr_debug("Set context mapping for %02x:%02x.%d\n",
1991 		bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1992 
1993 	BUG_ON(!domain->pgd);
1994 
1995 	spin_lock_irqsave(&device_domain_lock, flags);
1996 	spin_lock(&iommu->lock);
1997 
1998 	ret = -ENOMEM;
1999 	context = iommu_context_addr(iommu, bus, devfn, 1);
2000 	if (!context)
2001 		goto out_unlock;
2002 
2003 	ret = 0;
2004 	if (context_present(context))
2005 		goto out_unlock;
2006 
2007 	/*
2008 	 * For kdump cases, old valid entries may be cached due to the
2009 	 * in-flight DMA and copied pgtable, but there is no unmapping
2010 	 * behaviour for them, thus we need an explicit cache flush for
2011 	 * the newly-mapped device. For kdump, at this point, the device
2012 	 * is supposed to finish reset at its driver probe stage, so no
2013 	 * in-flight DMA will exist, and we don't need to worry anymore
2014 	 * hereafter.
2015 	 */
2016 	if (context_copied(context)) {
2017 		u16 did_old = context_domain_id(context);
2018 
2019 		if (did_old < cap_ndoms(iommu->cap)) {
2020 			iommu->flush.flush_context(iommu, did_old,
2021 						   (((u16)bus) << 8) | devfn,
2022 						   DMA_CCMD_MASK_NOBIT,
2023 						   DMA_CCMD_DEVICE_INVL);
2024 			iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
2025 						 DMA_TLB_DSI_FLUSH);
2026 		}
2027 	}
2028 
2029 	context_clear_entry(context);
2030 
2031 	if (sm_supported(iommu)) {
2032 		unsigned long pds;
2033 
2034 		WARN_ON(!table);
2035 
2036 		/* Setup the PASID DIR pointer: */
2037 		pds = context_get_sm_pds(table);
2038 		context->lo = (u64)virt_to_phys(table->table) |
2039 				context_pdts(pds);
2040 
2041 		/* Setup the RID_PASID field: */
2042 		context_set_sm_rid2pasid(context, PASID_RID2PASID);
2043 
2044 		/*
2045 		 * Setup the Device-TLB enable bit and Page request
2046 		 * Enable bit:
2047 		 */
2048 		info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2049 		if (info && info->ats_supported)
2050 			context_set_sm_dte(context);
2051 		if (info && info->pri_supported)
2052 			context_set_sm_pre(context);
2053 	} else {
2054 		struct dma_pte *pgd = domain->pgd;
2055 		int agaw;
2056 
2057 		context_set_domain_id(context, did);
2058 
2059 		if (translation != CONTEXT_TT_PASS_THROUGH) {
2060 			/*
2061 			 * Skip top levels of page tables for iommu which has
2062 			 * less agaw than default. Unnecessary for PT mode.
2063 			 */
2064 			for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
2065 				ret = -ENOMEM;
2066 				pgd = phys_to_virt(dma_pte_addr(pgd));
2067 				if (!dma_pte_present(pgd))
2068 					goto out_unlock;
2069 			}
2070 
2071 			info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2072 			if (info && info->ats_supported)
2073 				translation = CONTEXT_TT_DEV_IOTLB;
2074 			else
2075 				translation = CONTEXT_TT_MULTI_LEVEL;
2076 
2077 			context_set_address_root(context, virt_to_phys(pgd));
2078 			context_set_address_width(context, agaw);
2079 		} else {
2080 			/*
2081 			 * In pass through mode, AW must be programmed to
2082 			 * indicate the largest AGAW value supported by
2083 			 * hardware. And ASR is ignored by hardware.
2084 			 */
2085 			context_set_address_width(context, iommu->msagaw);
2086 		}
2087 
2088 		context_set_translation_type(context, translation);
2089 	}
2090 
2091 	context_set_fault_enable(context);
2092 	context_set_present(context);
2093 	domain_flush_cache(domain, context, sizeof(*context));
2094 
2095 	/*
2096 	 * It's a non-present to present mapping. If hardware doesn't cache
2097 	 * non-present entry we only need to flush the write-buffer. If the
2098 	 * _does_ cache non-present entries, then it does so in the special
2099 	 * domain #0, which we have to flush:
2100 	 */
2101 	if (cap_caching_mode(iommu->cap)) {
2102 		iommu->flush.flush_context(iommu, 0,
2103 					   (((u16)bus) << 8) | devfn,
2104 					   DMA_CCMD_MASK_NOBIT,
2105 					   DMA_CCMD_DEVICE_INVL);
2106 		iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
2107 	} else {
2108 		iommu_flush_write_buffer(iommu);
2109 	}
2110 	iommu_enable_dev_iotlb(info);
2111 
2112 	ret = 0;
2113 
2114 out_unlock:
2115 	spin_unlock(&iommu->lock);
2116 	spin_unlock_irqrestore(&device_domain_lock, flags);
2117 
2118 	return ret;
2119 }
2120 
2121 struct domain_context_mapping_data {
2122 	struct dmar_domain *domain;
2123 	struct intel_iommu *iommu;
2124 	struct pasid_table *table;
2125 };
2126 
domain_context_mapping_cb(struct pci_dev * pdev,u16 alias,void * opaque)2127 static int domain_context_mapping_cb(struct pci_dev *pdev,
2128 				     u16 alias, void *opaque)
2129 {
2130 	struct domain_context_mapping_data *data = opaque;
2131 
2132 	return domain_context_mapping_one(data->domain, data->iommu,
2133 					  data->table, PCI_BUS_NUM(alias),
2134 					  alias & 0xff);
2135 }
2136 
2137 static int
domain_context_mapping(struct dmar_domain * domain,struct device * dev)2138 domain_context_mapping(struct dmar_domain *domain, struct device *dev)
2139 {
2140 	struct domain_context_mapping_data data;
2141 	struct pasid_table *table;
2142 	struct intel_iommu *iommu;
2143 	u8 bus, devfn;
2144 
2145 	iommu = device_to_iommu(dev, &bus, &devfn);
2146 	if (!iommu)
2147 		return -ENODEV;
2148 
2149 	table = intel_pasid_get_table(dev);
2150 
2151 	if (!dev_is_pci(dev))
2152 		return domain_context_mapping_one(domain, iommu, table,
2153 						  bus, devfn);
2154 
2155 	data.domain = domain;
2156 	data.iommu = iommu;
2157 	data.table = table;
2158 
2159 	return pci_for_each_dma_alias(to_pci_dev(dev),
2160 				      &domain_context_mapping_cb, &data);
2161 }
2162 
domain_context_mapped_cb(struct pci_dev * pdev,u16 alias,void * opaque)2163 static int domain_context_mapped_cb(struct pci_dev *pdev,
2164 				    u16 alias, void *opaque)
2165 {
2166 	struct intel_iommu *iommu = opaque;
2167 
2168 	return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
2169 }
2170 
domain_context_mapped(struct device * dev)2171 static int domain_context_mapped(struct device *dev)
2172 {
2173 	struct intel_iommu *iommu;
2174 	u8 bus, devfn;
2175 
2176 	iommu = device_to_iommu(dev, &bus, &devfn);
2177 	if (!iommu)
2178 		return -ENODEV;
2179 
2180 	if (!dev_is_pci(dev))
2181 		return device_context_mapped(iommu, bus, devfn);
2182 
2183 	return !pci_for_each_dma_alias(to_pci_dev(dev),
2184 				       domain_context_mapped_cb, iommu);
2185 }
2186 
2187 /* Returns a number of VTD pages, but aligned to MM page size */
aligned_nrpages(unsigned long host_addr,size_t size)2188 static inline unsigned long aligned_nrpages(unsigned long host_addr,
2189 					    size_t size)
2190 {
2191 	host_addr &= ~PAGE_MASK;
2192 	return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2193 }
2194 
2195 /* Return largest possible superpage level for a given mapping */
hardware_largepage_caps(struct dmar_domain * domain,unsigned long iov_pfn,unsigned long phy_pfn,unsigned long pages)2196 static inline int hardware_largepage_caps(struct dmar_domain *domain,
2197 					  unsigned long iov_pfn,
2198 					  unsigned long phy_pfn,
2199 					  unsigned long pages)
2200 {
2201 	int support, level = 1;
2202 	unsigned long pfnmerge;
2203 
2204 	support = domain->iommu_superpage;
2205 
2206 	/* To use a large page, the virtual *and* physical addresses
2207 	   must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2208 	   of them will mean we have to use smaller pages. So just
2209 	   merge them and check both at once. */
2210 	pfnmerge = iov_pfn | phy_pfn;
2211 
2212 	while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2213 		pages >>= VTD_STRIDE_SHIFT;
2214 		if (!pages)
2215 			break;
2216 		pfnmerge >>= VTD_STRIDE_SHIFT;
2217 		level++;
2218 		support--;
2219 	}
2220 	return level;
2221 }
2222 
__domain_mapping(struct dmar_domain * domain,unsigned long iov_pfn,struct scatterlist * sg,unsigned long phys_pfn,unsigned long nr_pages,int prot)2223 static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2224 			    struct scatterlist *sg, unsigned long phys_pfn,
2225 			    unsigned long nr_pages, int prot)
2226 {
2227 	struct dma_pte *first_pte = NULL, *pte = NULL;
2228 	phys_addr_t uninitialized_var(pteval);
2229 	unsigned long sg_res = 0;
2230 	unsigned int largepage_lvl = 0;
2231 	unsigned long lvl_pages = 0;
2232 
2233 	BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
2234 
2235 	if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2236 		return -EINVAL;
2237 
2238 	prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
2239 
2240 	if (!sg) {
2241 		sg_res = nr_pages;
2242 		pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2243 	}
2244 
2245 	while (nr_pages > 0) {
2246 		uint64_t tmp;
2247 
2248 		if (!sg_res) {
2249 			unsigned int pgoff = sg->offset & ~PAGE_MASK;
2250 
2251 			sg_res = aligned_nrpages(sg->offset, sg->length);
2252 			sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff;
2253 			sg->dma_length = sg->length;
2254 			pteval = (sg_phys(sg) - pgoff) | prot;
2255 			phys_pfn = pteval >> VTD_PAGE_SHIFT;
2256 		}
2257 
2258 		if (!pte) {
2259 			largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2260 
2261 			first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
2262 			if (!pte)
2263 				return -ENOMEM;
2264 			/* It is large page*/
2265 			if (largepage_lvl > 1) {
2266 				unsigned long nr_superpages, end_pfn;
2267 
2268 				pteval |= DMA_PTE_LARGE_PAGE;
2269 				lvl_pages = lvl_to_nr_pages(largepage_lvl);
2270 
2271 				nr_superpages = sg_res / lvl_pages;
2272 				end_pfn = iov_pfn + nr_superpages * lvl_pages - 1;
2273 
2274 				/*
2275 				 * Ensure that old small page tables are
2276 				 * removed to make room for superpage(s).
2277 				 * We're adding new large pages, so make sure
2278 				 * we don't remove their parent tables.
2279 				 */
2280 				dma_pte_free_pagetable(domain, iov_pfn, end_pfn,
2281 						       largepage_lvl + 1);
2282 			} else {
2283 				pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
2284 			}
2285 
2286 		}
2287 		/* We don't need lock here, nobody else
2288 		 * touches the iova range
2289 		 */
2290 		tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
2291 		if (tmp) {
2292 			static int dumps = 5;
2293 			pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2294 				iov_pfn, tmp, (unsigned long long)pteval);
2295 			if (dumps) {
2296 				dumps--;
2297 				debug_dma_dump_mappings(NULL);
2298 			}
2299 			WARN_ON(1);
2300 		}
2301 
2302 		lvl_pages = lvl_to_nr_pages(largepage_lvl);
2303 
2304 		BUG_ON(nr_pages < lvl_pages);
2305 		BUG_ON(sg_res < lvl_pages);
2306 
2307 		nr_pages -= lvl_pages;
2308 		iov_pfn += lvl_pages;
2309 		phys_pfn += lvl_pages;
2310 		pteval += lvl_pages * VTD_PAGE_SIZE;
2311 		sg_res -= lvl_pages;
2312 
2313 		/* If the next PTE would be the first in a new page, then we
2314 		   need to flush the cache on the entries we've just written.
2315 		   And then we'll need to recalculate 'pte', so clear it and
2316 		   let it get set again in the if (!pte) block above.
2317 
2318 		   If we're done (!nr_pages) we need to flush the cache too.
2319 
2320 		   Also if we've been setting superpages, we may need to
2321 		   recalculate 'pte' and switch back to smaller pages for the
2322 		   end of the mapping, if the trailing size is not enough to
2323 		   use another superpage (i.e. sg_res < lvl_pages). */
2324 		pte++;
2325 		if (!nr_pages || first_pte_in_page(pte) ||
2326 		    (largepage_lvl > 1 && sg_res < lvl_pages)) {
2327 			domain_flush_cache(domain, first_pte,
2328 					   (void *)pte - (void *)first_pte);
2329 			pte = NULL;
2330 		}
2331 
2332 		if (!sg_res && nr_pages)
2333 			sg = sg_next(sg);
2334 	}
2335 	return 0;
2336 }
2337 
domain_mapping(struct dmar_domain * domain,unsigned long iov_pfn,struct scatterlist * sg,unsigned long phys_pfn,unsigned long nr_pages,int prot)2338 static int domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2339 			  struct scatterlist *sg, unsigned long phys_pfn,
2340 			  unsigned long nr_pages, int prot)
2341 {
2342 	int iommu_id, ret;
2343 	struct intel_iommu *iommu;
2344 
2345 	/* Do the real mapping first */
2346 	ret = __domain_mapping(domain, iov_pfn, sg, phys_pfn, nr_pages, prot);
2347 	if (ret)
2348 		return ret;
2349 
2350 	for_each_domain_iommu(iommu_id, domain) {
2351 		iommu = g_iommus[iommu_id];
2352 		__mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
2353 	}
2354 
2355 	return 0;
2356 }
2357 
domain_sg_mapping(struct dmar_domain * domain,unsigned long iov_pfn,struct scatterlist * sg,unsigned long nr_pages,int prot)2358 static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2359 				    struct scatterlist *sg, unsigned long nr_pages,
2360 				    int prot)
2361 {
2362 	return domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2363 }
2364 
domain_pfn_mapping(struct dmar_domain * domain,unsigned long iov_pfn,unsigned long phys_pfn,unsigned long nr_pages,int prot)2365 static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2366 				     unsigned long phys_pfn, unsigned long nr_pages,
2367 				     int prot)
2368 {
2369 	return domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
2370 }
2371 
domain_context_clear_one(struct intel_iommu * iommu,u8 bus,u8 devfn)2372 static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
2373 {
2374 	unsigned long flags;
2375 	struct context_entry *context;
2376 	u16 did_old;
2377 
2378 	if (!iommu)
2379 		return;
2380 
2381 	spin_lock_irqsave(&iommu->lock, flags);
2382 	context = iommu_context_addr(iommu, bus, devfn, 0);
2383 	if (!context) {
2384 		spin_unlock_irqrestore(&iommu->lock, flags);
2385 		return;
2386 	}
2387 	did_old = context_domain_id(context);
2388 	context_clear_entry(context);
2389 	__iommu_flush_cache(iommu, context, sizeof(*context));
2390 	spin_unlock_irqrestore(&iommu->lock, flags);
2391 	iommu->flush.flush_context(iommu,
2392 				   did_old,
2393 				   (((u16)bus) << 8) | devfn,
2394 				   DMA_CCMD_MASK_NOBIT,
2395 				   DMA_CCMD_DEVICE_INVL);
2396 	iommu->flush.flush_iotlb(iommu,
2397 				 did_old,
2398 				 0,
2399 				 0,
2400 				 DMA_TLB_DSI_FLUSH);
2401 }
2402 
unlink_domain_info(struct device_domain_info * info)2403 static inline void unlink_domain_info(struct device_domain_info *info)
2404 {
2405 	assert_spin_locked(&device_domain_lock);
2406 	list_del(&info->link);
2407 	list_del(&info->global);
2408 	if (info->dev)
2409 		info->dev->archdata.iommu = NULL;
2410 }
2411 
domain_remove_dev_info(struct dmar_domain * domain)2412 static void domain_remove_dev_info(struct dmar_domain *domain)
2413 {
2414 	struct device_domain_info *info, *tmp;
2415 	unsigned long flags;
2416 
2417 	spin_lock_irqsave(&device_domain_lock, flags);
2418 	list_for_each_entry_safe(info, tmp, &domain->devices, link)
2419 		__dmar_remove_one_dev_info(info);
2420 	spin_unlock_irqrestore(&device_domain_lock, flags);
2421 }
2422 
2423 /*
2424  * find_domain
2425  * Note: we use struct device->archdata.iommu stores the info
2426  */
find_domain(struct device * dev)2427 static struct dmar_domain *find_domain(struct device *dev)
2428 {
2429 	struct device_domain_info *info;
2430 
2431 	if (unlikely(dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO)) {
2432 		struct iommu_domain *domain;
2433 
2434 		dev->archdata.iommu = NULL;
2435 		domain = iommu_get_domain_for_dev(dev);
2436 		if (domain)
2437 			intel_iommu_attach_device(domain, dev);
2438 	}
2439 
2440 	/* No lock here, assumes no domain exit in normal case */
2441 	info = dev->archdata.iommu;
2442 
2443 	if (likely(info))
2444 		return info->domain;
2445 	return NULL;
2446 }
2447 
2448 static inline struct device_domain_info *
dmar_search_domain_by_dev_info(int segment,int bus,int devfn)2449 dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2450 {
2451 	struct device_domain_info *info;
2452 
2453 	list_for_each_entry(info, &device_domain_list, global)
2454 		if (info->iommu->segment == segment && info->bus == bus &&
2455 		    info->devfn == devfn)
2456 			return info;
2457 
2458 	return NULL;
2459 }
2460 
dmar_insert_one_dev_info(struct intel_iommu * iommu,int bus,int devfn,struct device * dev,struct dmar_domain * domain)2461 static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2462 						    int bus, int devfn,
2463 						    struct device *dev,
2464 						    struct dmar_domain *domain)
2465 {
2466 	struct dmar_domain *found = NULL;
2467 	struct device_domain_info *info;
2468 	unsigned long flags;
2469 	int ret;
2470 
2471 	info = alloc_devinfo_mem();
2472 	if (!info)
2473 		return NULL;
2474 
2475 	info->bus = bus;
2476 	info->devfn = devfn;
2477 	info->ats_supported = info->pasid_supported = info->pri_supported = 0;
2478 	info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
2479 	info->ats_qdep = 0;
2480 	info->dev = dev;
2481 	info->domain = domain;
2482 	info->iommu = iommu;
2483 	info->pasid_table = NULL;
2484 	info->auxd_enabled = 0;
2485 	INIT_LIST_HEAD(&info->auxiliary_domains);
2486 
2487 	if (dev && dev_is_pci(dev)) {
2488 		struct pci_dev *pdev = to_pci_dev(info->dev);
2489 
2490 		if (!pdev->untrusted &&
2491 		    !pci_ats_disabled() &&
2492 		    ecap_dev_iotlb_support(iommu->ecap) &&
2493 		    pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) &&
2494 		    dmar_find_matched_atsr_unit(pdev))
2495 			info->ats_supported = 1;
2496 
2497 		if (sm_supported(iommu)) {
2498 			if (pasid_supported(iommu)) {
2499 				int features = pci_pasid_features(pdev);
2500 				if (features >= 0)
2501 					info->pasid_supported = features | 1;
2502 			}
2503 
2504 			if (info->ats_supported && ecap_prs(iommu->ecap) &&
2505 			    pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
2506 				info->pri_supported = 1;
2507 		}
2508 	}
2509 
2510 	spin_lock_irqsave(&device_domain_lock, flags);
2511 	if (dev)
2512 		found = find_domain(dev);
2513 
2514 	if (!found) {
2515 		struct device_domain_info *info2;
2516 		info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
2517 		if (info2) {
2518 			found      = info2->domain;
2519 			info2->dev = dev;
2520 		}
2521 	}
2522 
2523 	if (found) {
2524 		spin_unlock_irqrestore(&device_domain_lock, flags);
2525 		free_devinfo_mem(info);
2526 		/* Caller must free the original domain */
2527 		return found;
2528 	}
2529 
2530 	spin_lock(&iommu->lock);
2531 	ret = domain_attach_iommu(domain, iommu);
2532 	spin_unlock(&iommu->lock);
2533 
2534 	if (ret) {
2535 		spin_unlock_irqrestore(&device_domain_lock, flags);
2536 		free_devinfo_mem(info);
2537 		return NULL;
2538 	}
2539 
2540 	list_add(&info->link, &domain->devices);
2541 	list_add(&info->global, &device_domain_list);
2542 	if (dev)
2543 		dev->archdata.iommu = info;
2544 	spin_unlock_irqrestore(&device_domain_lock, flags);
2545 
2546 	/* PASID table is mandatory for a PCI device in scalable mode. */
2547 	if (dev && dev_is_pci(dev) && sm_supported(iommu)) {
2548 		ret = intel_pasid_alloc_table(dev);
2549 		if (ret) {
2550 			dev_err(dev, "PASID table allocation failed\n");
2551 			dmar_remove_one_dev_info(dev);
2552 			return NULL;
2553 		}
2554 
2555 		/* Setup the PASID entry for requests without PASID: */
2556 		spin_lock(&iommu->lock);
2557 		if (hw_pass_through && domain_type_is_si(domain))
2558 			ret = intel_pasid_setup_pass_through(iommu, domain,
2559 					dev, PASID_RID2PASID);
2560 		else
2561 			ret = intel_pasid_setup_second_level(iommu, domain,
2562 					dev, PASID_RID2PASID);
2563 		spin_unlock(&iommu->lock);
2564 		if (ret) {
2565 			dev_err(dev, "Setup RID2PASID failed\n");
2566 			dmar_remove_one_dev_info(dev);
2567 			return NULL;
2568 		}
2569 	}
2570 
2571 	if (dev && domain_context_mapping(domain, dev)) {
2572 		dev_err(dev, "Domain context map failed\n");
2573 		dmar_remove_one_dev_info(dev);
2574 		return NULL;
2575 	}
2576 
2577 	return domain;
2578 }
2579 
get_last_alias(struct pci_dev * pdev,u16 alias,void * opaque)2580 static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2581 {
2582 	*(u16 *)opaque = alias;
2583 	return 0;
2584 }
2585 
find_or_alloc_domain(struct device * dev,int gaw)2586 static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
2587 {
2588 	struct device_domain_info *info;
2589 	struct dmar_domain *domain = NULL;
2590 	struct intel_iommu *iommu;
2591 	u16 dma_alias;
2592 	unsigned long flags;
2593 	u8 bus, devfn;
2594 
2595 	iommu = device_to_iommu(dev, &bus, &devfn);
2596 	if (!iommu)
2597 		return NULL;
2598 
2599 	if (dev_is_pci(dev)) {
2600 		struct pci_dev *pdev = to_pci_dev(dev);
2601 
2602 		pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2603 
2604 		spin_lock_irqsave(&device_domain_lock, flags);
2605 		info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2606 						      PCI_BUS_NUM(dma_alias),
2607 						      dma_alias & 0xff);
2608 		if (info) {
2609 			iommu = info->iommu;
2610 			domain = info->domain;
2611 		}
2612 		spin_unlock_irqrestore(&device_domain_lock, flags);
2613 
2614 		/* DMA alias already has a domain, use it */
2615 		if (info)
2616 			goto out;
2617 	}
2618 
2619 	/* Allocate and initialize new domain for the device */
2620 	domain = alloc_domain(0);
2621 	if (!domain)
2622 		return NULL;
2623 	if (domain_init(domain, iommu, gaw)) {
2624 		domain_exit(domain);
2625 		return NULL;
2626 	}
2627 
2628 out:
2629 	return domain;
2630 }
2631 
set_domain_for_dev(struct device * dev,struct dmar_domain * domain)2632 static struct dmar_domain *set_domain_for_dev(struct device *dev,
2633 					      struct dmar_domain *domain)
2634 {
2635 	struct intel_iommu *iommu;
2636 	struct dmar_domain *tmp;
2637 	u16 req_id, dma_alias;
2638 	u8 bus, devfn;
2639 
2640 	iommu = device_to_iommu(dev, &bus, &devfn);
2641 	if (!iommu)
2642 		return NULL;
2643 
2644 	req_id = ((u16)bus << 8) | devfn;
2645 
2646 	if (dev_is_pci(dev)) {
2647 		struct pci_dev *pdev = to_pci_dev(dev);
2648 
2649 		pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2650 
2651 		/* register PCI DMA alias device */
2652 		if (req_id != dma_alias) {
2653 			tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2654 					dma_alias & 0xff, NULL, domain);
2655 
2656 			if (!tmp || tmp != domain)
2657 				return tmp;
2658 		}
2659 	}
2660 
2661 	tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
2662 	if (!tmp || tmp != domain)
2663 		return tmp;
2664 
2665 	return domain;
2666 }
2667 
iommu_domain_identity_map(struct dmar_domain * domain,unsigned long long start,unsigned long long end)2668 static int iommu_domain_identity_map(struct dmar_domain *domain,
2669 				     unsigned long long start,
2670 				     unsigned long long end)
2671 {
2672 	unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2673 	unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2674 
2675 	if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2676 			  dma_to_mm_pfn(last_vpfn))) {
2677 		pr_err("Reserving iova failed\n");
2678 		return -ENOMEM;
2679 	}
2680 
2681 	pr_debug("Mapping reserved region %llx-%llx\n", start, end);
2682 	/*
2683 	 * RMRR range might have overlap with physical memory range,
2684 	 * clear it first
2685 	 */
2686 	dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2687 
2688 	return __domain_mapping(domain, first_vpfn, NULL,
2689 				first_vpfn, last_vpfn - first_vpfn + 1,
2690 				DMA_PTE_READ|DMA_PTE_WRITE);
2691 }
2692 
domain_prepare_identity_map(struct device * dev,struct dmar_domain * domain,unsigned long long start,unsigned long long end)2693 static int domain_prepare_identity_map(struct device *dev,
2694 				       struct dmar_domain *domain,
2695 				       unsigned long long start,
2696 				       unsigned long long end)
2697 {
2698 	/* For _hardware_ passthrough, don't bother. But for software
2699 	   passthrough, we do it anyway -- it may indicate a memory
2700 	   range which is reserved in E820, so which didn't get set
2701 	   up to start with in si_domain */
2702 	if (domain == si_domain && hw_pass_through) {
2703 		dev_warn(dev, "Ignoring identity map for HW passthrough [0x%Lx - 0x%Lx]\n",
2704 			 start, end);
2705 		return 0;
2706 	}
2707 
2708 	dev_info(dev, "Setting identity map [0x%Lx - 0x%Lx]\n", start, end);
2709 
2710 	if (end < start) {
2711 		WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2712 			"BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2713 			dmi_get_system_info(DMI_BIOS_VENDOR),
2714 			dmi_get_system_info(DMI_BIOS_VERSION),
2715 		     dmi_get_system_info(DMI_PRODUCT_VERSION));
2716 		return -EIO;
2717 	}
2718 
2719 	if (end >> agaw_to_width(domain->agaw)) {
2720 		WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2721 		     "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2722 		     agaw_to_width(domain->agaw),
2723 		     dmi_get_system_info(DMI_BIOS_VENDOR),
2724 		     dmi_get_system_info(DMI_BIOS_VERSION),
2725 		     dmi_get_system_info(DMI_PRODUCT_VERSION));
2726 		return -EIO;
2727 	}
2728 
2729 	return iommu_domain_identity_map(domain, start, end);
2730 }
2731 
2732 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2733 
si_domain_init(int hw)2734 static int __init si_domain_init(int hw)
2735 {
2736 	struct dmar_rmrr_unit *rmrr;
2737 	struct device *dev;
2738 	int i, nid, ret;
2739 
2740 	si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2741 	if (!si_domain)
2742 		return -EFAULT;
2743 
2744 	if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2745 		domain_exit(si_domain);
2746 		return -EFAULT;
2747 	}
2748 
2749 	if (hw)
2750 		return 0;
2751 
2752 	for_each_online_node(nid) {
2753 		unsigned long start_pfn, end_pfn;
2754 		int i;
2755 
2756 		for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2757 			ret = iommu_domain_identity_map(si_domain,
2758 					PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2759 			if (ret)
2760 				return ret;
2761 		}
2762 	}
2763 
2764 	/*
2765 	 * Normally we use DMA domains for devices which have RMRRs. But we
2766 	 * loose this requirement for graphic and usb devices. Identity map
2767 	 * the RMRRs for graphic and USB devices so that they could use the
2768 	 * si_domain.
2769 	 */
2770 	for_each_rmrr_units(rmrr) {
2771 		for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
2772 					  i, dev) {
2773 			unsigned long long start = rmrr->base_address;
2774 			unsigned long long end = rmrr->end_address;
2775 
2776 			if (device_is_rmrr_locked(dev))
2777 				continue;
2778 
2779 			if (WARN_ON(end < start ||
2780 				    end >> agaw_to_width(si_domain->agaw)))
2781 				continue;
2782 
2783 			ret = iommu_domain_identity_map(si_domain, start, end);
2784 			if (ret)
2785 				return ret;
2786 		}
2787 	}
2788 
2789 	return 0;
2790 }
2791 
identity_mapping(struct device * dev)2792 static int identity_mapping(struct device *dev)
2793 {
2794 	struct device_domain_info *info;
2795 
2796 	info = dev->archdata.iommu;
2797 	if (info && info != DUMMY_DEVICE_DOMAIN_INFO && info != DEFER_DEVICE_DOMAIN_INFO)
2798 		return (info->domain == si_domain);
2799 
2800 	return 0;
2801 }
2802 
domain_add_dev_info(struct dmar_domain * domain,struct device * dev)2803 static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2804 {
2805 	struct dmar_domain *ndomain;
2806 	struct intel_iommu *iommu;
2807 	u8 bus, devfn;
2808 
2809 	iommu = device_to_iommu(dev, &bus, &devfn);
2810 	if (!iommu)
2811 		return -ENODEV;
2812 
2813 	ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
2814 	if (ndomain != domain)
2815 		return -EBUSY;
2816 
2817 	return 0;
2818 }
2819 
device_has_rmrr(struct device * dev)2820 static bool device_has_rmrr(struct device *dev)
2821 {
2822 	struct dmar_rmrr_unit *rmrr;
2823 	struct device *tmp;
2824 	int i;
2825 
2826 	rcu_read_lock();
2827 	for_each_rmrr_units(rmrr) {
2828 		/*
2829 		 * Return TRUE if this RMRR contains the device that
2830 		 * is passed in.
2831 		 */
2832 		for_each_active_dev_scope(rmrr->devices,
2833 					  rmrr->devices_cnt, i, tmp)
2834 			if (tmp == dev ||
2835 			    is_downstream_to_pci_bridge(dev, tmp)) {
2836 				rcu_read_unlock();
2837 				return true;
2838 			}
2839 	}
2840 	rcu_read_unlock();
2841 	return false;
2842 }
2843 
2844 /**
2845  * device_rmrr_is_relaxable - Test whether the RMRR of this device
2846  * is relaxable (ie. is allowed to be not enforced under some conditions)
2847  * @dev: device handle
2848  *
2849  * We assume that PCI USB devices with RMRRs have them largely
2850  * for historical reasons and that the RMRR space is not actively used post
2851  * boot.  This exclusion may change if vendors begin to abuse it.
2852  *
2853  * The same exception is made for graphics devices, with the requirement that
2854  * any use of the RMRR regions will be torn down before assigning the device
2855  * to a guest.
2856  *
2857  * Return: true if the RMRR is relaxable, false otherwise
2858  */
device_rmrr_is_relaxable(struct device * dev)2859 static bool device_rmrr_is_relaxable(struct device *dev)
2860 {
2861 	struct pci_dev *pdev;
2862 
2863 	if (!dev_is_pci(dev))
2864 		return false;
2865 
2866 	pdev = to_pci_dev(dev);
2867 	if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
2868 		return true;
2869 	else
2870 		return false;
2871 }
2872 
2873 /*
2874  * There are a couple cases where we need to restrict the functionality of
2875  * devices associated with RMRRs.  The first is when evaluating a device for
2876  * identity mapping because problems exist when devices are moved in and out
2877  * of domains and their respective RMRR information is lost.  This means that
2878  * a device with associated RMRRs will never be in a "passthrough" domain.
2879  * The second is use of the device through the IOMMU API.  This interface
2880  * expects to have full control of the IOVA space for the device.  We cannot
2881  * satisfy both the requirement that RMRR access is maintained and have an
2882  * unencumbered IOVA space.  We also have no ability to quiesce the device's
2883  * use of the RMRR space or even inform the IOMMU API user of the restriction.
2884  * We therefore prevent devices associated with an RMRR from participating in
2885  * the IOMMU API, which eliminates them from device assignment.
2886  *
2887  * In both cases, devices which have relaxable RMRRs are not concerned by this
2888  * restriction. See device_rmrr_is_relaxable comment.
2889  */
device_is_rmrr_locked(struct device * dev)2890 static bool device_is_rmrr_locked(struct device *dev)
2891 {
2892 	if (!device_has_rmrr(dev))
2893 		return false;
2894 
2895 	if (device_rmrr_is_relaxable(dev))
2896 		return false;
2897 
2898 	return true;
2899 }
2900 
2901 /*
2902  * Return the required default domain type for a specific device.
2903  *
2904  * @dev: the device in query
2905  * @startup: true if this is during early boot
2906  *
2907  * Returns:
2908  *  - IOMMU_DOMAIN_DMA: device requires a dynamic mapping domain
2909  *  - IOMMU_DOMAIN_IDENTITY: device requires an identical mapping domain
2910  *  - 0: both identity and dynamic domains work for this device
2911  */
device_def_domain_type(struct device * dev)2912 static int device_def_domain_type(struct device *dev)
2913 {
2914 	if (dev_is_pci(dev)) {
2915 		struct pci_dev *pdev = to_pci_dev(dev);
2916 
2917 		if (device_is_rmrr_locked(dev))
2918 			return IOMMU_DOMAIN_DMA;
2919 
2920 		/*
2921 		 * Prevent any device marked as untrusted from getting
2922 		 * placed into the statically identity mapping domain.
2923 		 */
2924 		if (pdev->untrusted)
2925 			return IOMMU_DOMAIN_DMA;
2926 
2927 		if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2928 			return IOMMU_DOMAIN_IDENTITY;
2929 
2930 		if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2931 			return IOMMU_DOMAIN_IDENTITY;
2932 
2933 		/*
2934 		 * We want to start off with all devices in the 1:1 domain, and
2935 		 * take them out later if we find they can't access all of memory.
2936 		 *
2937 		 * However, we can't do this for PCI devices behind bridges,
2938 		 * because all PCI devices behind the same bridge will end up
2939 		 * with the same source-id on their transactions.
2940 		 *
2941 		 * Practically speaking, we can't change things around for these
2942 		 * devices at run-time, because we can't be sure there'll be no
2943 		 * DMA transactions in flight for any of their siblings.
2944 		 *
2945 		 * So PCI devices (unless they're on the root bus) as well as
2946 		 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2947 		 * the 1:1 domain, just in _case_ one of their siblings turns out
2948 		 * not to be able to map all of memory.
2949 		 */
2950 		if (!pci_is_pcie(pdev)) {
2951 			if (!pci_is_root_bus(pdev->bus))
2952 				return IOMMU_DOMAIN_DMA;
2953 			if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2954 				return IOMMU_DOMAIN_DMA;
2955 		} else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
2956 			return IOMMU_DOMAIN_DMA;
2957 	} else {
2958 		if (device_has_rmrr(dev))
2959 			return IOMMU_DOMAIN_DMA;
2960 	}
2961 
2962 	return (iommu_identity_mapping & IDENTMAP_ALL) ?
2963 			IOMMU_DOMAIN_IDENTITY : 0;
2964 }
2965 
intel_iommu_init_qi(struct intel_iommu * iommu)2966 static void intel_iommu_init_qi(struct intel_iommu *iommu)
2967 {
2968 	/*
2969 	 * Start from the sane iommu hardware state.
2970 	 * If the queued invalidation is already initialized by us
2971 	 * (for example, while enabling interrupt-remapping) then
2972 	 * we got the things already rolling from a sane state.
2973 	 */
2974 	if (!iommu->qi) {
2975 		/*
2976 		 * Clear any previous faults.
2977 		 */
2978 		dmar_fault(-1, iommu);
2979 		/*
2980 		 * Disable queued invalidation if supported and already enabled
2981 		 * before OS handover.
2982 		 */
2983 		dmar_disable_qi(iommu);
2984 	}
2985 
2986 	if (dmar_enable_qi(iommu)) {
2987 		/*
2988 		 * Queued Invalidate not enabled, use Register Based Invalidate
2989 		 */
2990 		iommu->flush.flush_context = __iommu_flush_context;
2991 		iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2992 		pr_info("%s: Using Register based invalidation\n",
2993 			iommu->name);
2994 	} else {
2995 		iommu->flush.flush_context = qi_flush_context;
2996 		iommu->flush.flush_iotlb = qi_flush_iotlb;
2997 		pr_info("%s: Using Queued invalidation\n", iommu->name);
2998 	}
2999 }
3000 
copy_context_table(struct intel_iommu * iommu,struct root_entry * old_re,struct context_entry ** tbl,int bus,bool ext)3001 static int copy_context_table(struct intel_iommu *iommu,
3002 			      struct root_entry *old_re,
3003 			      struct context_entry **tbl,
3004 			      int bus, bool ext)
3005 {
3006 	int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
3007 	struct context_entry *new_ce = NULL, ce;
3008 	struct context_entry *old_ce = NULL;
3009 	struct root_entry re;
3010 	phys_addr_t old_ce_phys;
3011 
3012 	tbl_idx = ext ? bus * 2 : bus;
3013 	memcpy(&re, old_re, sizeof(re));
3014 
3015 	for (devfn = 0; devfn < 256; devfn++) {
3016 		/* First calculate the correct index */
3017 		idx = (ext ? devfn * 2 : devfn) % 256;
3018 
3019 		if (idx == 0) {
3020 			/* First save what we may have and clean up */
3021 			if (new_ce) {
3022 				tbl[tbl_idx] = new_ce;
3023 				__iommu_flush_cache(iommu, new_ce,
3024 						    VTD_PAGE_SIZE);
3025 				pos = 1;
3026 			}
3027 
3028 			if (old_ce)
3029 				memunmap(old_ce);
3030 
3031 			ret = 0;
3032 			if (devfn < 0x80)
3033 				old_ce_phys = root_entry_lctp(&re);
3034 			else
3035 				old_ce_phys = root_entry_uctp(&re);
3036 
3037 			if (!old_ce_phys) {
3038 				if (ext && devfn == 0) {
3039 					/* No LCTP, try UCTP */
3040 					devfn = 0x7f;
3041 					continue;
3042 				} else {
3043 					goto out;
3044 				}
3045 			}
3046 
3047 			ret = -ENOMEM;
3048 			old_ce = memremap(old_ce_phys, PAGE_SIZE,
3049 					MEMREMAP_WB);
3050 			if (!old_ce)
3051 				goto out;
3052 
3053 			new_ce = alloc_pgtable_page(iommu->node);
3054 			if (!new_ce)
3055 				goto out_unmap;
3056 
3057 			ret = 0;
3058 		}
3059 
3060 		/* Now copy the context entry */
3061 		memcpy(&ce, old_ce + idx, sizeof(ce));
3062 
3063 		if (!__context_present(&ce))
3064 			continue;
3065 
3066 		did = context_domain_id(&ce);
3067 		if (did >= 0 && did < cap_ndoms(iommu->cap))
3068 			set_bit(did, iommu->domain_ids);
3069 
3070 		/*
3071 		 * We need a marker for copied context entries. This
3072 		 * marker needs to work for the old format as well as
3073 		 * for extended context entries.
3074 		 *
3075 		 * Bit 67 of the context entry is used. In the old
3076 		 * format this bit is available to software, in the
3077 		 * extended format it is the PGE bit, but PGE is ignored
3078 		 * by HW if PASIDs are disabled (and thus still
3079 		 * available).
3080 		 *
3081 		 * So disable PASIDs first and then mark the entry
3082 		 * copied. This means that we don't copy PASID
3083 		 * translations from the old kernel, but this is fine as
3084 		 * faults there are not fatal.
3085 		 */
3086 		context_clear_pasid_enable(&ce);
3087 		context_set_copied(&ce);
3088 
3089 		new_ce[idx] = ce;
3090 	}
3091 
3092 	tbl[tbl_idx + pos] = new_ce;
3093 
3094 	__iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
3095 
3096 out_unmap:
3097 	memunmap(old_ce);
3098 
3099 out:
3100 	return ret;
3101 }
3102 
copy_translation_tables(struct intel_iommu * iommu)3103 static int copy_translation_tables(struct intel_iommu *iommu)
3104 {
3105 	struct context_entry **ctxt_tbls;
3106 	struct root_entry *old_rt;
3107 	phys_addr_t old_rt_phys;
3108 	int ctxt_table_entries;
3109 	unsigned long flags;
3110 	u64 rtaddr_reg;
3111 	int bus, ret;
3112 	bool new_ext, ext;
3113 
3114 	rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
3115 	ext        = !!(rtaddr_reg & DMA_RTADDR_RTT);
3116 	new_ext    = !!ecap_ecs(iommu->ecap);
3117 
3118 	/*
3119 	 * The RTT bit can only be changed when translation is disabled,
3120 	 * but disabling translation means to open a window for data
3121 	 * corruption. So bail out and don't copy anything if we would
3122 	 * have to change the bit.
3123 	 */
3124 	if (new_ext != ext)
3125 		return -EINVAL;
3126 
3127 	old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
3128 	if (!old_rt_phys)
3129 		return -EINVAL;
3130 
3131 	old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
3132 	if (!old_rt)
3133 		return -ENOMEM;
3134 
3135 	/* This is too big for the stack - allocate it from slab */
3136 	ctxt_table_entries = ext ? 512 : 256;
3137 	ret = -ENOMEM;
3138 	ctxt_tbls = kcalloc(ctxt_table_entries, sizeof(void *), GFP_KERNEL);
3139 	if (!ctxt_tbls)
3140 		goto out_unmap;
3141 
3142 	for (bus = 0; bus < 256; bus++) {
3143 		ret = copy_context_table(iommu, &old_rt[bus],
3144 					 ctxt_tbls, bus, ext);
3145 		if (ret) {
3146 			pr_err("%s: Failed to copy context table for bus %d\n",
3147 				iommu->name, bus);
3148 			continue;
3149 		}
3150 	}
3151 
3152 	spin_lock_irqsave(&iommu->lock, flags);
3153 
3154 	/* Context tables are copied, now write them to the root_entry table */
3155 	for (bus = 0; bus < 256; bus++) {
3156 		int idx = ext ? bus * 2 : bus;
3157 		u64 val;
3158 
3159 		if (ctxt_tbls[idx]) {
3160 			val = virt_to_phys(ctxt_tbls[idx]) | 1;
3161 			iommu->root_entry[bus].lo = val;
3162 		}
3163 
3164 		if (!ext || !ctxt_tbls[idx + 1])
3165 			continue;
3166 
3167 		val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3168 		iommu->root_entry[bus].hi = val;
3169 	}
3170 
3171 	spin_unlock_irqrestore(&iommu->lock, flags);
3172 
3173 	kfree(ctxt_tbls);
3174 
3175 	__iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3176 
3177 	ret = 0;
3178 
3179 out_unmap:
3180 	memunmap(old_rt);
3181 
3182 	return ret;
3183 }
3184 
init_dmars(void)3185 static int __init init_dmars(void)
3186 {
3187 	struct dmar_drhd_unit *drhd;
3188 	struct intel_iommu *iommu;
3189 	int ret;
3190 
3191 	/*
3192 	 * for each drhd
3193 	 *    allocate root
3194 	 *    initialize and program root entry to not present
3195 	 * endfor
3196 	 */
3197 	for_each_drhd_unit(drhd) {
3198 		/*
3199 		 * lock not needed as this is only incremented in the single
3200 		 * threaded kernel __init code path all other access are read
3201 		 * only
3202 		 */
3203 		if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
3204 			g_num_of_iommus++;
3205 			continue;
3206 		}
3207 		pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
3208 	}
3209 
3210 	/* Preallocate enough resources for IOMMU hot-addition */
3211 	if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3212 		g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3213 
3214 	g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3215 			GFP_KERNEL);
3216 	if (!g_iommus) {
3217 		pr_err("Allocating global iommu array failed\n");
3218 		ret = -ENOMEM;
3219 		goto error;
3220 	}
3221 
3222 	for_each_iommu(iommu, drhd) {
3223 		if (drhd->ignored) {
3224 			iommu_disable_translation(iommu);
3225 			continue;
3226 		}
3227 
3228 		/*
3229 		 * Find the max pasid size of all IOMMU's in the system.
3230 		 * We need to ensure the system pasid table is no bigger
3231 		 * than the smallest supported.
3232 		 */
3233 		if (pasid_supported(iommu)) {
3234 			u32 temp = 2 << ecap_pss(iommu->ecap);
3235 
3236 			intel_pasid_max_id = min_t(u32, temp,
3237 						   intel_pasid_max_id);
3238 		}
3239 
3240 		g_iommus[iommu->seq_id] = iommu;
3241 
3242 		intel_iommu_init_qi(iommu);
3243 
3244 		ret = iommu_init_domains(iommu);
3245 		if (ret)
3246 			goto free_iommu;
3247 
3248 		init_translation_status(iommu);
3249 
3250 		if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3251 			iommu_disable_translation(iommu);
3252 			clear_translation_pre_enabled(iommu);
3253 			pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3254 				iommu->name);
3255 		}
3256 
3257 		/*
3258 		 * TBD:
3259 		 * we could share the same root & context tables
3260 		 * among all IOMMU's. Need to Split it later.
3261 		 */
3262 		ret = iommu_alloc_root_entry(iommu);
3263 		if (ret)
3264 			goto free_iommu;
3265 
3266 		if (translation_pre_enabled(iommu)) {
3267 			pr_info("Translation already enabled - trying to copy translation structures\n");
3268 
3269 			ret = copy_translation_tables(iommu);
3270 			if (ret) {
3271 				/*
3272 				 * We found the IOMMU with translation
3273 				 * enabled - but failed to copy over the
3274 				 * old root-entry table. Try to proceed
3275 				 * by disabling translation now and
3276 				 * allocating a clean root-entry table.
3277 				 * This might cause DMAR faults, but
3278 				 * probably the dump will still succeed.
3279 				 */
3280 				pr_err("Failed to copy translation tables from previous kernel for %s\n",
3281 				       iommu->name);
3282 				iommu_disable_translation(iommu);
3283 				clear_translation_pre_enabled(iommu);
3284 			} else {
3285 				pr_info("Copied translation tables from previous kernel for %s\n",
3286 					iommu->name);
3287 			}
3288 		}
3289 
3290 		if (!ecap_pass_through(iommu->ecap))
3291 			hw_pass_through = 0;
3292 #ifdef CONFIG_INTEL_IOMMU_SVM
3293 		if (pasid_supported(iommu))
3294 			intel_svm_init(iommu);
3295 #endif
3296 	}
3297 
3298 	/*
3299 	 * Now that qi is enabled on all iommus, set the root entry and flush
3300 	 * caches. This is required on some Intel X58 chipsets, otherwise the
3301 	 * flush_context function will loop forever and the boot hangs.
3302 	 */
3303 	for_each_active_iommu(iommu, drhd) {
3304 		iommu_flush_write_buffer(iommu);
3305 		iommu_set_root_entry(iommu);
3306 		iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3307 		iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3308 	}
3309 
3310 	if (iommu_default_passthrough())
3311 		iommu_identity_mapping |= IDENTMAP_ALL;
3312 
3313 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
3314 	dmar_map_gfx = 0;
3315 #endif
3316 
3317 	if (!dmar_map_gfx)
3318 		iommu_identity_mapping |= IDENTMAP_GFX;
3319 
3320 	check_tylersburg_isoch();
3321 
3322 	ret = si_domain_init(hw_pass_through);
3323 	if (ret)
3324 		goto free_iommu;
3325 
3326 	/*
3327 	 * for each drhd
3328 	 *   enable fault log
3329 	 *   global invalidate context cache
3330 	 *   global invalidate iotlb
3331 	 *   enable translation
3332 	 */
3333 	for_each_iommu(iommu, drhd) {
3334 		if (drhd->ignored) {
3335 			/*
3336 			 * we always have to disable PMRs or DMA may fail on
3337 			 * this device
3338 			 */
3339 			if (force_on)
3340 				iommu_disable_protect_mem_regions(iommu);
3341 			continue;
3342 		}
3343 
3344 		iommu_flush_write_buffer(iommu);
3345 
3346 #ifdef CONFIG_INTEL_IOMMU_SVM
3347 		if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
3348 			/*
3349 			 * Call dmar_alloc_hwirq() with dmar_global_lock held,
3350 			 * could cause possible lock race condition.
3351 			 */
3352 			up_write(&dmar_global_lock);
3353 			ret = intel_svm_enable_prq(iommu);
3354 			down_write(&dmar_global_lock);
3355 			if (ret)
3356 				goto free_iommu;
3357 		}
3358 #endif
3359 		ret = dmar_set_interrupt(iommu);
3360 		if (ret)
3361 			goto free_iommu;
3362 	}
3363 
3364 	return 0;
3365 
3366 free_iommu:
3367 	for_each_active_iommu(iommu, drhd) {
3368 		disable_dmar_iommu(iommu);
3369 		free_dmar_iommu(iommu);
3370 	}
3371 
3372 	kfree(g_iommus);
3373 
3374 error:
3375 	return ret;
3376 }
3377 
3378 /* This takes a number of _MM_ pages, not VTD pages */
intel_alloc_iova(struct device * dev,struct dmar_domain * domain,unsigned long nrpages,uint64_t dma_mask)3379 static unsigned long intel_alloc_iova(struct device *dev,
3380 				     struct dmar_domain *domain,
3381 				     unsigned long nrpages, uint64_t dma_mask)
3382 {
3383 	unsigned long iova_pfn;
3384 
3385 	/* Restrict dma_mask to the width that the iommu can handle */
3386 	dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
3387 	/* Ensure we reserve the whole size-aligned region */
3388 	nrpages = __roundup_pow_of_two(nrpages);
3389 
3390 	if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
3391 		/*
3392 		 * First try to allocate an io virtual address in
3393 		 * DMA_BIT_MASK(32) and if that fails then try allocating
3394 		 * from higher range
3395 		 */
3396 		iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
3397 					   IOVA_PFN(DMA_BIT_MASK(32)), false);
3398 		if (iova_pfn)
3399 			return iova_pfn;
3400 	}
3401 	iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
3402 				   IOVA_PFN(dma_mask), true);
3403 	if (unlikely(!iova_pfn)) {
3404 		dev_err(dev, "Allocating %ld-page iova failed", nrpages);
3405 		return 0;
3406 	}
3407 
3408 	return iova_pfn;
3409 }
3410 
get_private_domain_for_dev(struct device * dev)3411 static struct dmar_domain *get_private_domain_for_dev(struct device *dev)
3412 {
3413 	struct dmar_domain *domain, *tmp;
3414 	struct dmar_rmrr_unit *rmrr;
3415 	struct device *i_dev;
3416 	int i, ret;
3417 
3418 	/* Device shouldn't be attached by any domains. */
3419 	domain = find_domain(dev);
3420 	if (domain)
3421 		return NULL;
3422 
3423 	domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
3424 	if (!domain)
3425 		goto out;
3426 
3427 	/* We have a new domain - setup possible RMRRs for the device */
3428 	rcu_read_lock();
3429 	for_each_rmrr_units(rmrr) {
3430 		for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3431 					  i, i_dev) {
3432 			if (i_dev != dev)
3433 				continue;
3434 
3435 			ret = domain_prepare_identity_map(dev, domain,
3436 							  rmrr->base_address,
3437 							  rmrr->end_address);
3438 			if (ret)
3439 				dev_err(dev, "Mapping reserved region failed\n");
3440 		}
3441 	}
3442 	rcu_read_unlock();
3443 
3444 	tmp = set_domain_for_dev(dev, domain);
3445 	if (!tmp || domain != tmp) {
3446 		domain_exit(domain);
3447 		domain = tmp;
3448 	}
3449 
3450 out:
3451 	if (!domain)
3452 		dev_err(dev, "Allocating domain failed\n");
3453 	else
3454 		domain->domain.type = IOMMU_DOMAIN_DMA;
3455 
3456 	return domain;
3457 }
3458 
3459 /* Check if the dev needs to go through non-identity map and unmap process.*/
iommu_need_mapping(struct device * dev)3460 static bool iommu_need_mapping(struct device *dev)
3461 {
3462 	int ret;
3463 
3464 	if (iommu_dummy(dev))
3465 		return false;
3466 
3467 	ret = identity_mapping(dev);
3468 	if (ret) {
3469 		u64 dma_mask = *dev->dma_mask;
3470 
3471 		if (dev->coherent_dma_mask && dev->coherent_dma_mask < dma_mask)
3472 			dma_mask = dev->coherent_dma_mask;
3473 
3474 		if (dma_mask >= dma_direct_get_required_mask(dev))
3475 			return false;
3476 
3477 		/*
3478 		 * 32 bit DMA is removed from si_domain and fall back to
3479 		 * non-identity mapping.
3480 		 */
3481 		dmar_remove_one_dev_info(dev);
3482 		ret = iommu_request_dma_domain_for_dev(dev);
3483 		if (ret) {
3484 			struct iommu_domain *domain;
3485 			struct dmar_domain *dmar_domain;
3486 
3487 			domain = iommu_get_domain_for_dev(dev);
3488 			if (domain) {
3489 				dmar_domain = to_dmar_domain(domain);
3490 				dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
3491 			}
3492 			dmar_remove_one_dev_info(dev);
3493 			get_private_domain_for_dev(dev);
3494 		}
3495 
3496 		dev_info(dev, "32bit DMA uses non-identity mapping\n");
3497 	}
3498 
3499 	return true;
3500 }
3501 
__intel_map_single(struct device * dev,phys_addr_t paddr,size_t size,int dir,u64 dma_mask)3502 static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
3503 				     size_t size, int dir, u64 dma_mask)
3504 {
3505 	struct dmar_domain *domain;
3506 	phys_addr_t start_paddr;
3507 	unsigned long iova_pfn;
3508 	int prot = 0;
3509 	int ret;
3510 	struct intel_iommu *iommu;
3511 	unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
3512 
3513 	BUG_ON(dir == DMA_NONE);
3514 
3515 	domain = find_domain(dev);
3516 	if (!domain)
3517 		return DMA_MAPPING_ERROR;
3518 
3519 	iommu = domain_get_iommu(domain);
3520 	size = aligned_nrpages(paddr, size);
3521 
3522 	iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3523 	if (!iova_pfn)
3524 		goto error;
3525 
3526 	/*
3527 	 * Check if DMAR supports zero-length reads on write only
3528 	 * mappings..
3529 	 */
3530 	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3531 			!cap_zlr(iommu->cap))
3532 		prot |= DMA_PTE_READ;
3533 	if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3534 		prot |= DMA_PTE_WRITE;
3535 	/*
3536 	 * paddr - (paddr + size) might be partial page, we should map the whole
3537 	 * page.  Note: if two part of one page are separately mapped, we
3538 	 * might have two guest_addr mapping to the same host paddr, but this
3539 	 * is not a big problem
3540 	 */
3541 	ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn),
3542 				 mm_to_dma_pfn(paddr_pfn), size, prot);
3543 	if (ret)
3544 		goto error;
3545 
3546 	start_paddr = (phys_addr_t)iova_pfn << PAGE_SHIFT;
3547 	start_paddr += paddr & ~PAGE_MASK;
3548 
3549 	trace_map_single(dev, start_paddr, paddr, size << VTD_PAGE_SHIFT);
3550 
3551 	return start_paddr;
3552 
3553 error:
3554 	if (iova_pfn)
3555 		free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
3556 	dev_err(dev, "Device request: %zx@%llx dir %d --- failed\n",
3557 		size, (unsigned long long)paddr, dir);
3558 	return DMA_MAPPING_ERROR;
3559 }
3560 
intel_map_page(struct device * dev,struct page * page,unsigned long offset,size_t size,enum dma_data_direction dir,unsigned long attrs)3561 static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3562 				 unsigned long offset, size_t size,
3563 				 enum dma_data_direction dir,
3564 				 unsigned long attrs)
3565 {
3566 	if (iommu_need_mapping(dev))
3567 		return __intel_map_single(dev, page_to_phys(page) + offset,
3568 				size, dir, *dev->dma_mask);
3569 	return dma_direct_map_page(dev, page, offset, size, dir, attrs);
3570 }
3571 
intel_map_resource(struct device * dev,phys_addr_t phys_addr,size_t size,enum dma_data_direction dir,unsigned long attrs)3572 static dma_addr_t intel_map_resource(struct device *dev, phys_addr_t phys_addr,
3573 				     size_t size, enum dma_data_direction dir,
3574 				     unsigned long attrs)
3575 {
3576 	if (iommu_need_mapping(dev))
3577 		return __intel_map_single(dev, phys_addr, size, dir,
3578 				*dev->dma_mask);
3579 	return dma_direct_map_resource(dev, phys_addr, size, dir, attrs);
3580 }
3581 
intel_unmap(struct device * dev,dma_addr_t dev_addr,size_t size)3582 static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
3583 {
3584 	struct dmar_domain *domain;
3585 	unsigned long start_pfn, last_pfn;
3586 	unsigned long nrpages;
3587 	unsigned long iova_pfn;
3588 	struct intel_iommu *iommu;
3589 	struct page *freelist;
3590 	struct pci_dev *pdev = NULL;
3591 
3592 	domain = find_domain(dev);
3593 	BUG_ON(!domain);
3594 
3595 	iommu = domain_get_iommu(domain);
3596 
3597 	iova_pfn = IOVA_PFN(dev_addr);
3598 
3599 	nrpages = aligned_nrpages(dev_addr, size);
3600 	start_pfn = mm_to_dma_pfn(iova_pfn);
3601 	last_pfn = start_pfn + nrpages - 1;
3602 
3603 	if (dev_is_pci(dev))
3604 		pdev = to_pci_dev(dev);
3605 
3606 	freelist = domain_unmap(domain, start_pfn, last_pfn);
3607 	if (intel_iommu_strict || (pdev && pdev->untrusted) ||
3608 			!has_iova_flush_queue(&domain->iovad)) {
3609 		iommu_flush_iotlb_psi(iommu, domain, start_pfn,
3610 				      nrpages, !freelist, 0);
3611 		/* free iova */
3612 		free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
3613 		dma_free_pagelist(freelist);
3614 	} else {
3615 		queue_iova(&domain->iovad, iova_pfn, nrpages,
3616 			   (unsigned long)freelist);
3617 		/*
3618 		 * queue up the release of the unmap to save the 1/6th of the
3619 		 * cpu used up by the iotlb flush operation...
3620 		 */
3621 	}
3622 
3623 	trace_unmap_single(dev, dev_addr, size);
3624 }
3625 
intel_unmap_page(struct device * dev,dma_addr_t dev_addr,size_t size,enum dma_data_direction dir,unsigned long attrs)3626 static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3627 			     size_t size, enum dma_data_direction dir,
3628 			     unsigned long attrs)
3629 {
3630 	if (iommu_need_mapping(dev))
3631 		intel_unmap(dev, dev_addr, size);
3632 	else
3633 		dma_direct_unmap_page(dev, dev_addr, size, dir, attrs);
3634 }
3635 
intel_unmap_resource(struct device * dev,dma_addr_t dev_addr,size_t size,enum dma_data_direction dir,unsigned long attrs)3636 static void intel_unmap_resource(struct device *dev, dma_addr_t dev_addr,
3637 		size_t size, enum dma_data_direction dir, unsigned long attrs)
3638 {
3639 	if (iommu_need_mapping(dev))
3640 		intel_unmap(dev, dev_addr, size);
3641 }
3642 
intel_alloc_coherent(struct device * dev,size_t size,dma_addr_t * dma_handle,gfp_t flags,unsigned long attrs)3643 static void *intel_alloc_coherent(struct device *dev, size_t size,
3644 				  dma_addr_t *dma_handle, gfp_t flags,
3645 				  unsigned long attrs)
3646 {
3647 	struct page *page = NULL;
3648 	int order;
3649 
3650 	if (!iommu_need_mapping(dev))
3651 		return dma_direct_alloc(dev, size, dma_handle, flags, attrs);
3652 
3653 	size = PAGE_ALIGN(size);
3654 	order = get_order(size);
3655 
3656 	if (gfpflags_allow_blocking(flags)) {
3657 		unsigned int count = size >> PAGE_SHIFT;
3658 
3659 		page = dma_alloc_from_contiguous(dev, count, order,
3660 						 flags & __GFP_NOWARN);
3661 	}
3662 
3663 	if (!page)
3664 		page = alloc_pages(flags, order);
3665 	if (!page)
3666 		return NULL;
3667 	memset(page_address(page), 0, size);
3668 
3669 	*dma_handle = __intel_map_single(dev, page_to_phys(page), size,
3670 					 DMA_BIDIRECTIONAL,
3671 					 dev->coherent_dma_mask);
3672 	if (*dma_handle != DMA_MAPPING_ERROR)
3673 		return page_address(page);
3674 	if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3675 		__free_pages(page, order);
3676 
3677 	return NULL;
3678 }
3679 
intel_free_coherent(struct device * dev,size_t size,void * vaddr,dma_addr_t dma_handle,unsigned long attrs)3680 static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
3681 				dma_addr_t dma_handle, unsigned long attrs)
3682 {
3683 	int order;
3684 	struct page *page = virt_to_page(vaddr);
3685 
3686 	if (!iommu_need_mapping(dev))
3687 		return dma_direct_free(dev, size, vaddr, dma_handle, attrs);
3688 
3689 	size = PAGE_ALIGN(size);
3690 	order = get_order(size);
3691 
3692 	intel_unmap(dev, dma_handle, size);
3693 	if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3694 		__free_pages(page, order);
3695 }
3696 
intel_unmap_sg(struct device * dev,struct scatterlist * sglist,int nelems,enum dma_data_direction dir,unsigned long attrs)3697 static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
3698 			   int nelems, enum dma_data_direction dir,
3699 			   unsigned long attrs)
3700 {
3701 	dma_addr_t startaddr = sg_dma_address(sglist) & PAGE_MASK;
3702 	unsigned long nrpages = 0;
3703 	struct scatterlist *sg;
3704 	int i;
3705 
3706 	if (!iommu_need_mapping(dev))
3707 		return dma_direct_unmap_sg(dev, sglist, nelems, dir, attrs);
3708 
3709 	for_each_sg(sglist, sg, nelems, i) {
3710 		nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg));
3711 	}
3712 
3713 	intel_unmap(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
3714 
3715 	trace_unmap_sg(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
3716 }
3717 
intel_map_sg(struct device * dev,struct scatterlist * sglist,int nelems,enum dma_data_direction dir,unsigned long attrs)3718 static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3719 			enum dma_data_direction dir, unsigned long attrs)
3720 {
3721 	int i;
3722 	struct dmar_domain *domain;
3723 	size_t size = 0;
3724 	int prot = 0;
3725 	unsigned long iova_pfn;
3726 	int ret;
3727 	struct scatterlist *sg;
3728 	unsigned long start_vpfn;
3729 	struct intel_iommu *iommu;
3730 
3731 	BUG_ON(dir == DMA_NONE);
3732 	if (!iommu_need_mapping(dev))
3733 		return dma_direct_map_sg(dev, sglist, nelems, dir, attrs);
3734 
3735 	domain = find_domain(dev);
3736 	if (!domain)
3737 		return 0;
3738 
3739 	iommu = domain_get_iommu(domain);
3740 
3741 	for_each_sg(sglist, sg, nelems, i)
3742 		size += aligned_nrpages(sg->offset, sg->length);
3743 
3744 	iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
3745 				*dev->dma_mask);
3746 	if (!iova_pfn) {
3747 		sglist->dma_length = 0;
3748 		return 0;
3749 	}
3750 
3751 	/*
3752 	 * Check if DMAR supports zero-length reads on write only
3753 	 * mappings..
3754 	 */
3755 	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3756 			!cap_zlr(iommu->cap))
3757 		prot |= DMA_PTE_READ;
3758 	if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3759 		prot |= DMA_PTE_WRITE;
3760 
3761 	start_vpfn = mm_to_dma_pfn(iova_pfn);
3762 
3763 	ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3764 	if (unlikely(ret)) {
3765 		dma_pte_free_pagetable(domain, start_vpfn,
3766 				       start_vpfn + size - 1,
3767 				       agaw_to_level(domain->agaw) + 1);
3768 		free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
3769 		return 0;
3770 	}
3771 
3772 	trace_map_sg(dev, iova_pfn << PAGE_SHIFT,
3773 		     sg_phys(sglist), size << VTD_PAGE_SHIFT);
3774 
3775 	return nelems;
3776 }
3777 
intel_get_required_mask(struct device * dev)3778 static u64 intel_get_required_mask(struct device *dev)
3779 {
3780 	if (!iommu_need_mapping(dev))
3781 		return dma_direct_get_required_mask(dev);
3782 	return DMA_BIT_MASK(32);
3783 }
3784 
3785 static const struct dma_map_ops intel_dma_ops = {
3786 	.alloc = intel_alloc_coherent,
3787 	.free = intel_free_coherent,
3788 	.map_sg = intel_map_sg,
3789 	.unmap_sg = intel_unmap_sg,
3790 	.map_page = intel_map_page,
3791 	.unmap_page = intel_unmap_page,
3792 	.map_resource = intel_map_resource,
3793 	.unmap_resource = intel_unmap_resource,
3794 	.dma_supported = dma_direct_supported,
3795 	.mmap = dma_common_mmap,
3796 	.get_sgtable = dma_common_get_sgtable,
3797 	.get_required_mask = intel_get_required_mask,
3798 };
3799 
3800 static void
bounce_sync_single(struct device * dev,dma_addr_t addr,size_t size,enum dma_data_direction dir,enum dma_sync_target target)3801 bounce_sync_single(struct device *dev, dma_addr_t addr, size_t size,
3802 		   enum dma_data_direction dir, enum dma_sync_target target)
3803 {
3804 	struct dmar_domain *domain;
3805 	phys_addr_t tlb_addr;
3806 
3807 	domain = find_domain(dev);
3808 	if (WARN_ON(!domain))
3809 		return;
3810 
3811 	tlb_addr = intel_iommu_iova_to_phys(&domain->domain, addr);
3812 	if (is_swiotlb_buffer(tlb_addr))
3813 		swiotlb_tbl_sync_single(dev, tlb_addr, size, dir, target);
3814 }
3815 
3816 static dma_addr_t
bounce_map_single(struct device * dev,phys_addr_t paddr,size_t size,enum dma_data_direction dir,unsigned long attrs,u64 dma_mask)3817 bounce_map_single(struct device *dev, phys_addr_t paddr, size_t size,
3818 		  enum dma_data_direction dir, unsigned long attrs,
3819 		  u64 dma_mask)
3820 {
3821 	size_t aligned_size = ALIGN(size, VTD_PAGE_SIZE);
3822 	struct dmar_domain *domain;
3823 	struct intel_iommu *iommu;
3824 	unsigned long iova_pfn;
3825 	unsigned long nrpages;
3826 	phys_addr_t tlb_addr;
3827 	int prot = 0;
3828 	int ret;
3829 
3830 	domain = find_domain(dev);
3831 	if (WARN_ON(dir == DMA_NONE || !domain))
3832 		return DMA_MAPPING_ERROR;
3833 
3834 	iommu = domain_get_iommu(domain);
3835 	if (WARN_ON(!iommu))
3836 		return DMA_MAPPING_ERROR;
3837 
3838 	nrpages = aligned_nrpages(0, size);
3839 	iova_pfn = intel_alloc_iova(dev, domain,
3840 				    dma_to_mm_pfn(nrpages), dma_mask);
3841 	if (!iova_pfn)
3842 		return DMA_MAPPING_ERROR;
3843 
3844 	/*
3845 	 * Check if DMAR supports zero-length reads on write only
3846 	 * mappings..
3847 	 */
3848 	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL ||
3849 			!cap_zlr(iommu->cap))
3850 		prot |= DMA_PTE_READ;
3851 	if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3852 		prot |= DMA_PTE_WRITE;
3853 
3854 	/*
3855 	 * If both the physical buffer start address and size are
3856 	 * page aligned, we don't need to use a bounce page.
3857 	 */
3858 	if (!IS_ALIGNED(paddr | size, VTD_PAGE_SIZE)) {
3859 		tlb_addr = swiotlb_tbl_map_single(dev,
3860 				__phys_to_dma(dev, io_tlb_start),
3861 				paddr, size, aligned_size, dir, attrs);
3862 		if (tlb_addr == DMA_MAPPING_ERROR) {
3863 			goto swiotlb_error;
3864 		} else {
3865 			/* Cleanup the padding area. */
3866 			void *padding_start = phys_to_virt(tlb_addr);
3867 			size_t padding_size = aligned_size;
3868 
3869 			if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
3870 			    (dir == DMA_TO_DEVICE ||
3871 			     dir == DMA_BIDIRECTIONAL)) {
3872 				padding_start += size;
3873 				padding_size -= size;
3874 			}
3875 
3876 			memset(padding_start, 0, padding_size);
3877 		}
3878 	} else {
3879 		tlb_addr = paddr;
3880 	}
3881 
3882 	ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn),
3883 				 tlb_addr >> VTD_PAGE_SHIFT, nrpages, prot);
3884 	if (ret)
3885 		goto mapping_error;
3886 
3887 	trace_bounce_map_single(dev, iova_pfn << PAGE_SHIFT, paddr, size);
3888 
3889 	return (phys_addr_t)iova_pfn << PAGE_SHIFT;
3890 
3891 mapping_error:
3892 	if (is_swiotlb_buffer(tlb_addr))
3893 		swiotlb_tbl_unmap_single(dev, tlb_addr, size,
3894 					 aligned_size, dir, attrs);
3895 swiotlb_error:
3896 	free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
3897 	dev_err(dev, "Device bounce map: %zx@%llx dir %d --- failed\n",
3898 		size, (unsigned long long)paddr, dir);
3899 
3900 	return DMA_MAPPING_ERROR;
3901 }
3902 
3903 static void
bounce_unmap_single(struct device * dev,dma_addr_t dev_addr,size_t size,enum dma_data_direction dir,unsigned long attrs)3904 bounce_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
3905 		    enum dma_data_direction dir, unsigned long attrs)
3906 {
3907 	size_t aligned_size = ALIGN(size, VTD_PAGE_SIZE);
3908 	struct dmar_domain *domain;
3909 	phys_addr_t tlb_addr;
3910 
3911 	domain = find_domain(dev);
3912 	if (WARN_ON(!domain))
3913 		return;
3914 
3915 	tlb_addr = intel_iommu_iova_to_phys(&domain->domain, dev_addr);
3916 	if (WARN_ON(!tlb_addr))
3917 		return;
3918 
3919 	intel_unmap(dev, dev_addr, size);
3920 	if (is_swiotlb_buffer(tlb_addr))
3921 		swiotlb_tbl_unmap_single(dev, tlb_addr, size,
3922 					 aligned_size, dir, attrs);
3923 
3924 	trace_bounce_unmap_single(dev, dev_addr, size);
3925 }
3926 
3927 static dma_addr_t
bounce_map_page(struct device * dev,struct page * page,unsigned long offset,size_t size,enum dma_data_direction dir,unsigned long attrs)3928 bounce_map_page(struct device *dev, struct page *page, unsigned long offset,
3929 		size_t size, enum dma_data_direction dir, unsigned long attrs)
3930 {
3931 	return bounce_map_single(dev, page_to_phys(page) + offset,
3932 				 size, dir, attrs, *dev->dma_mask);
3933 }
3934 
3935 static dma_addr_t
bounce_map_resource(struct device * dev,phys_addr_t phys_addr,size_t size,enum dma_data_direction dir,unsigned long attrs)3936 bounce_map_resource(struct device *dev, phys_addr_t phys_addr, size_t size,
3937 		    enum dma_data_direction dir, unsigned long attrs)
3938 {
3939 	return bounce_map_single(dev, phys_addr, size,
3940 				 dir, attrs, *dev->dma_mask);
3941 }
3942 
3943 static void
bounce_unmap_page(struct device * dev,dma_addr_t dev_addr,size_t size,enum dma_data_direction dir,unsigned long attrs)3944 bounce_unmap_page(struct device *dev, dma_addr_t dev_addr, size_t size,
3945 		  enum dma_data_direction dir, unsigned long attrs)
3946 {
3947 	bounce_unmap_single(dev, dev_addr, size, dir, attrs);
3948 }
3949 
3950 static void
bounce_unmap_resource(struct device * dev,dma_addr_t dev_addr,size_t size,enum dma_data_direction dir,unsigned long attrs)3951 bounce_unmap_resource(struct device *dev, dma_addr_t dev_addr, size_t size,
3952 		      enum dma_data_direction dir, unsigned long attrs)
3953 {
3954 	bounce_unmap_single(dev, dev_addr, size, dir, attrs);
3955 }
3956 
3957 static void
bounce_unmap_sg(struct device * dev,struct scatterlist * sglist,int nelems,enum dma_data_direction dir,unsigned long attrs)3958 bounce_unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3959 		enum dma_data_direction dir, unsigned long attrs)
3960 {
3961 	struct scatterlist *sg;
3962 	int i;
3963 
3964 	for_each_sg(sglist, sg, nelems, i)
3965 		bounce_unmap_page(dev, sg->dma_address,
3966 				  sg_dma_len(sg), dir, attrs);
3967 }
3968 
3969 static int
bounce_map_sg(struct device * dev,struct scatterlist * sglist,int nelems,enum dma_data_direction dir,unsigned long attrs)3970 bounce_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3971 	      enum dma_data_direction dir, unsigned long attrs)
3972 {
3973 	int i;
3974 	struct scatterlist *sg;
3975 
3976 	for_each_sg(sglist, sg, nelems, i) {
3977 		sg->dma_address = bounce_map_page(dev, sg_page(sg),
3978 						  sg->offset, sg->length,
3979 						  dir, attrs);
3980 		if (sg->dma_address == DMA_MAPPING_ERROR)
3981 			goto out_unmap;
3982 		sg_dma_len(sg) = sg->length;
3983 	}
3984 
3985 	return nelems;
3986 
3987 out_unmap:
3988 	bounce_unmap_sg(dev, sglist, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
3989 	return 0;
3990 }
3991 
3992 static void
bounce_sync_single_for_cpu(struct device * dev,dma_addr_t addr,size_t size,enum dma_data_direction dir)3993 bounce_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
3994 			   size_t size, enum dma_data_direction dir)
3995 {
3996 	bounce_sync_single(dev, addr, size, dir, SYNC_FOR_CPU);
3997 }
3998 
3999 static void
bounce_sync_single_for_device(struct device * dev,dma_addr_t addr,size_t size,enum dma_data_direction dir)4000 bounce_sync_single_for_device(struct device *dev, dma_addr_t addr,
4001 			      size_t size, enum dma_data_direction dir)
4002 {
4003 	bounce_sync_single(dev, addr, size, dir, SYNC_FOR_DEVICE);
4004 }
4005 
4006 static void
bounce_sync_sg_for_cpu(struct device * dev,struct scatterlist * sglist,int nelems,enum dma_data_direction dir)4007 bounce_sync_sg_for_cpu(struct device *dev, struct scatterlist *sglist,
4008 		       int nelems, enum dma_data_direction dir)
4009 {
4010 	struct scatterlist *sg;
4011 	int i;
4012 
4013 	for_each_sg(sglist, sg, nelems, i)
4014 		bounce_sync_single(dev, sg_dma_address(sg),
4015 				   sg_dma_len(sg), dir, SYNC_FOR_CPU);
4016 }
4017 
4018 static void
bounce_sync_sg_for_device(struct device * dev,struct scatterlist * sglist,int nelems,enum dma_data_direction dir)4019 bounce_sync_sg_for_device(struct device *dev, struct scatterlist *sglist,
4020 			  int nelems, enum dma_data_direction dir)
4021 {
4022 	struct scatterlist *sg;
4023 	int i;
4024 
4025 	for_each_sg(sglist, sg, nelems, i)
4026 		bounce_sync_single(dev, sg_dma_address(sg),
4027 				   sg_dma_len(sg), dir, SYNC_FOR_DEVICE);
4028 }
4029 
4030 static const struct dma_map_ops bounce_dma_ops = {
4031 	.alloc			= intel_alloc_coherent,
4032 	.free			= intel_free_coherent,
4033 	.map_sg			= bounce_map_sg,
4034 	.unmap_sg		= bounce_unmap_sg,
4035 	.map_page		= bounce_map_page,
4036 	.unmap_page		= bounce_unmap_page,
4037 	.sync_single_for_cpu	= bounce_sync_single_for_cpu,
4038 	.sync_single_for_device	= bounce_sync_single_for_device,
4039 	.sync_sg_for_cpu	= bounce_sync_sg_for_cpu,
4040 	.sync_sg_for_device	= bounce_sync_sg_for_device,
4041 	.map_resource		= bounce_map_resource,
4042 	.unmap_resource		= bounce_unmap_resource,
4043 	.dma_supported		= dma_direct_supported,
4044 };
4045 
iommu_domain_cache_init(void)4046 static inline int iommu_domain_cache_init(void)
4047 {
4048 	int ret = 0;
4049 
4050 	iommu_domain_cache = kmem_cache_create("iommu_domain",
4051 					 sizeof(struct dmar_domain),
4052 					 0,
4053 					 SLAB_HWCACHE_ALIGN,
4054 
4055 					 NULL);
4056 	if (!iommu_domain_cache) {
4057 		pr_err("Couldn't create iommu_domain cache\n");
4058 		ret = -ENOMEM;
4059 	}
4060 
4061 	return ret;
4062 }
4063 
iommu_devinfo_cache_init(void)4064 static inline int iommu_devinfo_cache_init(void)
4065 {
4066 	int ret = 0;
4067 
4068 	iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
4069 					 sizeof(struct device_domain_info),
4070 					 0,
4071 					 SLAB_HWCACHE_ALIGN,
4072 					 NULL);
4073 	if (!iommu_devinfo_cache) {
4074 		pr_err("Couldn't create devinfo cache\n");
4075 		ret = -ENOMEM;
4076 	}
4077 
4078 	return ret;
4079 }
4080 
iommu_init_mempool(void)4081 static int __init iommu_init_mempool(void)
4082 {
4083 	int ret;
4084 	ret = iova_cache_get();
4085 	if (ret)
4086 		return ret;
4087 
4088 	ret = iommu_domain_cache_init();
4089 	if (ret)
4090 		goto domain_error;
4091 
4092 	ret = iommu_devinfo_cache_init();
4093 	if (!ret)
4094 		return ret;
4095 
4096 	kmem_cache_destroy(iommu_domain_cache);
4097 domain_error:
4098 	iova_cache_put();
4099 
4100 	return -ENOMEM;
4101 }
4102 
iommu_exit_mempool(void)4103 static void __init iommu_exit_mempool(void)
4104 {
4105 	kmem_cache_destroy(iommu_devinfo_cache);
4106 	kmem_cache_destroy(iommu_domain_cache);
4107 	iova_cache_put();
4108 }
4109 
quirk_ioat_snb_local_iommu(struct pci_dev * pdev)4110 static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
4111 {
4112 	struct dmar_drhd_unit *drhd;
4113 	u32 vtbar;
4114 	int rc;
4115 
4116 	/* We know that this device on this chipset has its own IOMMU.
4117 	 * If we find it under a different IOMMU, then the BIOS is lying
4118 	 * to us. Hope that the IOMMU for this device is actually
4119 	 * disabled, and it needs no translation...
4120 	 */
4121 	rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
4122 	if (rc) {
4123 		/* "can't" happen */
4124 		dev_info(&pdev->dev, "failed to run vt-d quirk\n");
4125 		return;
4126 	}
4127 	vtbar &= 0xffff0000;
4128 
4129 	/* we know that the this iommu should be at offset 0xa000 from vtbar */
4130 	drhd = dmar_find_matched_drhd_unit(pdev);
4131 	if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
4132 			    TAINT_FIRMWARE_WORKAROUND,
4133 			    "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
4134 		pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
4135 }
4136 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
4137 
init_no_remapping_devices(void)4138 static void __init init_no_remapping_devices(void)
4139 {
4140 	struct dmar_drhd_unit *drhd;
4141 	struct device *dev;
4142 	int i;
4143 
4144 	for_each_drhd_unit(drhd) {
4145 		if (!drhd->include_all) {
4146 			for_each_active_dev_scope(drhd->devices,
4147 						  drhd->devices_cnt, i, dev)
4148 				break;
4149 			/* ignore DMAR unit if no devices exist */
4150 			if (i == drhd->devices_cnt)
4151 				drhd->ignored = 1;
4152 		}
4153 	}
4154 
4155 	for_each_active_drhd_unit(drhd) {
4156 		if (drhd->include_all)
4157 			continue;
4158 
4159 		for_each_active_dev_scope(drhd->devices,
4160 					  drhd->devices_cnt, i, dev)
4161 			if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
4162 				break;
4163 		if (i < drhd->devices_cnt)
4164 			continue;
4165 
4166 		/* This IOMMU has *only* gfx devices. Either bypass it or
4167 		   set the gfx_mapped flag, as appropriate */
4168 		if (!dmar_map_gfx) {
4169 			drhd->ignored = 1;
4170 			for_each_active_dev_scope(drhd->devices,
4171 						  drhd->devices_cnt, i, dev)
4172 				dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
4173 		}
4174 	}
4175 }
4176 
4177 #ifdef CONFIG_SUSPEND
init_iommu_hw(void)4178 static int init_iommu_hw(void)
4179 {
4180 	struct dmar_drhd_unit *drhd;
4181 	struct intel_iommu *iommu = NULL;
4182 
4183 	for_each_active_iommu(iommu, drhd)
4184 		if (iommu->qi)
4185 			dmar_reenable_qi(iommu);
4186 
4187 	for_each_iommu(iommu, drhd) {
4188 		if (drhd->ignored) {
4189 			/*
4190 			 * we always have to disable PMRs or DMA may fail on
4191 			 * this device
4192 			 */
4193 			if (force_on)
4194 				iommu_disable_protect_mem_regions(iommu);
4195 			continue;
4196 		}
4197 
4198 		iommu_flush_write_buffer(iommu);
4199 
4200 		iommu_set_root_entry(iommu);
4201 
4202 		iommu->flush.flush_context(iommu, 0, 0, 0,
4203 					   DMA_CCMD_GLOBAL_INVL);
4204 		iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4205 		iommu_enable_translation(iommu);
4206 		iommu_disable_protect_mem_regions(iommu);
4207 	}
4208 
4209 	return 0;
4210 }
4211 
iommu_flush_all(void)4212 static void iommu_flush_all(void)
4213 {
4214 	struct dmar_drhd_unit *drhd;
4215 	struct intel_iommu *iommu;
4216 
4217 	for_each_active_iommu(iommu, drhd) {
4218 		iommu->flush.flush_context(iommu, 0, 0, 0,
4219 					   DMA_CCMD_GLOBAL_INVL);
4220 		iommu->flush.flush_iotlb(iommu, 0, 0, 0,
4221 					 DMA_TLB_GLOBAL_FLUSH);
4222 	}
4223 }
4224 
iommu_suspend(void)4225 static int iommu_suspend(void)
4226 {
4227 	struct dmar_drhd_unit *drhd;
4228 	struct intel_iommu *iommu = NULL;
4229 	unsigned long flag;
4230 
4231 	for_each_active_iommu(iommu, drhd) {
4232 		iommu->iommu_state = kcalloc(MAX_SR_DMAR_REGS, sizeof(u32),
4233 						 GFP_ATOMIC);
4234 		if (!iommu->iommu_state)
4235 			goto nomem;
4236 	}
4237 
4238 	iommu_flush_all();
4239 
4240 	for_each_active_iommu(iommu, drhd) {
4241 		iommu_disable_translation(iommu);
4242 
4243 		raw_spin_lock_irqsave(&iommu->register_lock, flag);
4244 
4245 		iommu->iommu_state[SR_DMAR_FECTL_REG] =
4246 			readl(iommu->reg + DMAR_FECTL_REG);
4247 		iommu->iommu_state[SR_DMAR_FEDATA_REG] =
4248 			readl(iommu->reg + DMAR_FEDATA_REG);
4249 		iommu->iommu_state[SR_DMAR_FEADDR_REG] =
4250 			readl(iommu->reg + DMAR_FEADDR_REG);
4251 		iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
4252 			readl(iommu->reg + DMAR_FEUADDR_REG);
4253 
4254 		raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
4255 	}
4256 	return 0;
4257 
4258 nomem:
4259 	for_each_active_iommu(iommu, drhd)
4260 		kfree(iommu->iommu_state);
4261 
4262 	return -ENOMEM;
4263 }
4264 
iommu_resume(void)4265 static void iommu_resume(void)
4266 {
4267 	struct dmar_drhd_unit *drhd;
4268 	struct intel_iommu *iommu = NULL;
4269 	unsigned long flag;
4270 
4271 	if (init_iommu_hw()) {
4272 		if (force_on)
4273 			panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
4274 		else
4275 			WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
4276 		return;
4277 	}
4278 
4279 	for_each_active_iommu(iommu, drhd) {
4280 
4281 		raw_spin_lock_irqsave(&iommu->register_lock, flag);
4282 
4283 		writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
4284 			iommu->reg + DMAR_FECTL_REG);
4285 		writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
4286 			iommu->reg + DMAR_FEDATA_REG);
4287 		writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
4288 			iommu->reg + DMAR_FEADDR_REG);
4289 		writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
4290 			iommu->reg + DMAR_FEUADDR_REG);
4291 
4292 		raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
4293 	}
4294 
4295 	for_each_active_iommu(iommu, drhd)
4296 		kfree(iommu->iommu_state);
4297 }
4298 
4299 static struct syscore_ops iommu_syscore_ops = {
4300 	.resume		= iommu_resume,
4301 	.suspend	= iommu_suspend,
4302 };
4303 
init_iommu_pm_ops(void)4304 static void __init init_iommu_pm_ops(void)
4305 {
4306 	register_syscore_ops(&iommu_syscore_ops);
4307 }
4308 
4309 #else
init_iommu_pm_ops(void)4310 static inline void init_iommu_pm_ops(void) {}
4311 #endif	/* CONFIG_PM */
4312 
dmar_parse_one_rmrr(struct acpi_dmar_header * header,void * arg)4313 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
4314 {
4315 	struct acpi_dmar_reserved_memory *rmrr;
4316 	struct dmar_rmrr_unit *rmrru;
4317 
4318 	rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
4319 	if (!rmrru)
4320 		goto out;
4321 
4322 	rmrru->hdr = header;
4323 	rmrr = (struct acpi_dmar_reserved_memory *)header;
4324 	rmrru->base_address = rmrr->base_address;
4325 	rmrru->end_address = rmrr->end_address;
4326 
4327 	rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
4328 				((void *)rmrr) + rmrr->header.length,
4329 				&rmrru->devices_cnt);
4330 	if (rmrru->devices_cnt && rmrru->devices == NULL)
4331 		goto free_rmrru;
4332 
4333 	list_add(&rmrru->list, &dmar_rmrr_units);
4334 
4335 	return 0;
4336 free_rmrru:
4337 	kfree(rmrru);
4338 out:
4339 	return -ENOMEM;
4340 }
4341 
dmar_find_atsr(struct acpi_dmar_atsr * atsr)4342 static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
4343 {
4344 	struct dmar_atsr_unit *atsru;
4345 	struct acpi_dmar_atsr *tmp;
4346 
4347 	list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4348 		tmp = (struct acpi_dmar_atsr *)atsru->hdr;
4349 		if (atsr->segment != tmp->segment)
4350 			continue;
4351 		if (atsr->header.length != tmp->header.length)
4352 			continue;
4353 		if (memcmp(atsr, tmp, atsr->header.length) == 0)
4354 			return atsru;
4355 	}
4356 
4357 	return NULL;
4358 }
4359 
dmar_parse_one_atsr(struct acpi_dmar_header * hdr,void * arg)4360 int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4361 {
4362 	struct acpi_dmar_atsr *atsr;
4363 	struct dmar_atsr_unit *atsru;
4364 
4365 	if (system_state >= SYSTEM_RUNNING && !intel_iommu_enabled)
4366 		return 0;
4367 
4368 	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4369 	atsru = dmar_find_atsr(atsr);
4370 	if (atsru)
4371 		return 0;
4372 
4373 	atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
4374 	if (!atsru)
4375 		return -ENOMEM;
4376 
4377 	/*
4378 	 * If memory is allocated from slab by ACPI _DSM method, we need to
4379 	 * copy the memory content because the memory buffer will be freed
4380 	 * on return.
4381 	 */
4382 	atsru->hdr = (void *)(atsru + 1);
4383 	memcpy(atsru->hdr, hdr, hdr->length);
4384 	atsru->include_all = atsr->flags & 0x1;
4385 	if (!atsru->include_all) {
4386 		atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
4387 				(void *)atsr + atsr->header.length,
4388 				&atsru->devices_cnt);
4389 		if (atsru->devices_cnt && atsru->devices == NULL) {
4390 			kfree(atsru);
4391 			return -ENOMEM;
4392 		}
4393 	}
4394 
4395 	list_add_rcu(&atsru->list, &dmar_atsr_units);
4396 
4397 	return 0;
4398 }
4399 
intel_iommu_free_atsr(struct dmar_atsr_unit * atsru)4400 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
4401 {
4402 	dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
4403 	kfree(atsru);
4404 }
4405 
dmar_release_one_atsr(struct acpi_dmar_header * hdr,void * arg)4406 int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4407 {
4408 	struct acpi_dmar_atsr *atsr;
4409 	struct dmar_atsr_unit *atsru;
4410 
4411 	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4412 	atsru = dmar_find_atsr(atsr);
4413 	if (atsru) {
4414 		list_del_rcu(&atsru->list);
4415 		synchronize_rcu();
4416 		intel_iommu_free_atsr(atsru);
4417 	}
4418 
4419 	return 0;
4420 }
4421 
dmar_check_one_atsr(struct acpi_dmar_header * hdr,void * arg)4422 int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4423 {
4424 	int i;
4425 	struct device *dev;
4426 	struct acpi_dmar_atsr *atsr;
4427 	struct dmar_atsr_unit *atsru;
4428 
4429 	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4430 	atsru = dmar_find_atsr(atsr);
4431 	if (!atsru)
4432 		return 0;
4433 
4434 	if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
4435 		for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
4436 					  i, dev)
4437 			return -EBUSY;
4438 	}
4439 
4440 	return 0;
4441 }
4442 
intel_iommu_add(struct dmar_drhd_unit * dmaru)4443 static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
4444 {
4445 	int sp, ret;
4446 	struct intel_iommu *iommu = dmaru->iommu;
4447 
4448 	if (g_iommus[iommu->seq_id])
4449 		return 0;
4450 
4451 	if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
4452 		pr_warn("%s: Doesn't support hardware pass through.\n",
4453 			iommu->name);
4454 		return -ENXIO;
4455 	}
4456 	if (!ecap_sc_support(iommu->ecap) &&
4457 	    domain_update_iommu_snooping(iommu)) {
4458 		pr_warn("%s: Doesn't support snooping.\n",
4459 			iommu->name);
4460 		return -ENXIO;
4461 	}
4462 	sp = domain_update_iommu_superpage(iommu) - 1;
4463 	if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
4464 		pr_warn("%s: Doesn't support large page.\n",
4465 			iommu->name);
4466 		return -ENXIO;
4467 	}
4468 
4469 	/*
4470 	 * Disable translation if already enabled prior to OS handover.
4471 	 */
4472 	if (iommu->gcmd & DMA_GCMD_TE)
4473 		iommu_disable_translation(iommu);
4474 
4475 	g_iommus[iommu->seq_id] = iommu;
4476 	ret = iommu_init_domains(iommu);
4477 	if (ret == 0)
4478 		ret = iommu_alloc_root_entry(iommu);
4479 	if (ret)
4480 		goto out;
4481 
4482 #ifdef CONFIG_INTEL_IOMMU_SVM
4483 	if (pasid_supported(iommu))
4484 		intel_svm_init(iommu);
4485 #endif
4486 
4487 	if (dmaru->ignored) {
4488 		/*
4489 		 * we always have to disable PMRs or DMA may fail on this device
4490 		 */
4491 		if (force_on)
4492 			iommu_disable_protect_mem_regions(iommu);
4493 		return 0;
4494 	}
4495 
4496 	intel_iommu_init_qi(iommu);
4497 	iommu_flush_write_buffer(iommu);
4498 
4499 #ifdef CONFIG_INTEL_IOMMU_SVM
4500 	if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
4501 		ret = intel_svm_enable_prq(iommu);
4502 		if (ret)
4503 			goto disable_iommu;
4504 	}
4505 #endif
4506 	ret = dmar_set_interrupt(iommu);
4507 	if (ret)
4508 		goto disable_iommu;
4509 
4510 	iommu_set_root_entry(iommu);
4511 	iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
4512 	iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4513 	iommu_enable_translation(iommu);
4514 
4515 	iommu_disable_protect_mem_regions(iommu);
4516 	return 0;
4517 
4518 disable_iommu:
4519 	disable_dmar_iommu(iommu);
4520 out:
4521 	free_dmar_iommu(iommu);
4522 	return ret;
4523 }
4524 
dmar_iommu_hotplug(struct dmar_drhd_unit * dmaru,bool insert)4525 int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
4526 {
4527 	int ret = 0;
4528 	struct intel_iommu *iommu = dmaru->iommu;
4529 
4530 	if (!intel_iommu_enabled)
4531 		return 0;
4532 	if (iommu == NULL)
4533 		return -EINVAL;
4534 
4535 	if (insert) {
4536 		ret = intel_iommu_add(dmaru);
4537 	} else {
4538 		disable_dmar_iommu(iommu);
4539 		free_dmar_iommu(iommu);
4540 	}
4541 
4542 	return ret;
4543 }
4544 
intel_iommu_free_dmars(void)4545 static void intel_iommu_free_dmars(void)
4546 {
4547 	struct dmar_rmrr_unit *rmrru, *rmrr_n;
4548 	struct dmar_atsr_unit *atsru, *atsr_n;
4549 
4550 	list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
4551 		list_del(&rmrru->list);
4552 		dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
4553 		kfree(rmrru);
4554 	}
4555 
4556 	list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
4557 		list_del(&atsru->list);
4558 		intel_iommu_free_atsr(atsru);
4559 	}
4560 }
4561 
dmar_find_matched_atsr_unit(struct pci_dev * dev)4562 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
4563 {
4564 	int i, ret = 1;
4565 	struct pci_bus *bus;
4566 	struct pci_dev *bridge = NULL;
4567 	struct device *tmp;
4568 	struct acpi_dmar_atsr *atsr;
4569 	struct dmar_atsr_unit *atsru;
4570 
4571 	dev = pci_physfn(dev);
4572 	for (bus = dev->bus; bus; bus = bus->parent) {
4573 		bridge = bus->self;
4574 		/* If it's an integrated device, allow ATS */
4575 		if (!bridge)
4576 			return 1;
4577 		/* Connected via non-PCIe: no ATS */
4578 		if (!pci_is_pcie(bridge) ||
4579 		    pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
4580 			return 0;
4581 		/* If we found the root port, look it up in the ATSR */
4582 		if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
4583 			break;
4584 	}
4585 
4586 	rcu_read_lock();
4587 	list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4588 		atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4589 		if (atsr->segment != pci_domain_nr(dev->bus))
4590 			continue;
4591 
4592 		for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
4593 			if (tmp == &bridge->dev)
4594 				goto out;
4595 
4596 		if (atsru->include_all)
4597 			goto out;
4598 	}
4599 	ret = 0;
4600 out:
4601 	rcu_read_unlock();
4602 
4603 	return ret;
4604 }
4605 
dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info * info)4606 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
4607 {
4608 	int ret;
4609 	struct dmar_rmrr_unit *rmrru;
4610 	struct dmar_atsr_unit *atsru;
4611 	struct acpi_dmar_atsr *atsr;
4612 	struct acpi_dmar_reserved_memory *rmrr;
4613 
4614 	if (!intel_iommu_enabled && system_state >= SYSTEM_RUNNING)
4615 		return 0;
4616 
4617 	list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4618 		rmrr = container_of(rmrru->hdr,
4619 				    struct acpi_dmar_reserved_memory, header);
4620 		if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4621 			ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4622 				((void *)rmrr) + rmrr->header.length,
4623 				rmrr->segment, rmrru->devices,
4624 				rmrru->devices_cnt);
4625 			if (ret < 0)
4626 				return ret;
4627 		} else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4628 			dmar_remove_dev_scope(info, rmrr->segment,
4629 				rmrru->devices, rmrru->devices_cnt);
4630 		}
4631 	}
4632 
4633 	list_for_each_entry(atsru, &dmar_atsr_units, list) {
4634 		if (atsru->include_all)
4635 			continue;
4636 
4637 		atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4638 		if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4639 			ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4640 					(void *)atsr + atsr->header.length,
4641 					atsr->segment, atsru->devices,
4642 					atsru->devices_cnt);
4643 			if (ret > 0)
4644 				break;
4645 			else if (ret < 0)
4646 				return ret;
4647 		} else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4648 			if (dmar_remove_dev_scope(info, atsr->segment,
4649 					atsru->devices, atsru->devices_cnt))
4650 				break;
4651 		}
4652 	}
4653 
4654 	return 0;
4655 }
4656 
intel_iommu_memory_notifier(struct notifier_block * nb,unsigned long val,void * v)4657 static int intel_iommu_memory_notifier(struct notifier_block *nb,
4658 				       unsigned long val, void *v)
4659 {
4660 	struct memory_notify *mhp = v;
4661 	unsigned long long start, end;
4662 	unsigned long start_vpfn, last_vpfn;
4663 
4664 	switch (val) {
4665 	case MEM_GOING_ONLINE:
4666 		start = mhp->start_pfn << PAGE_SHIFT;
4667 		end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4668 		if (iommu_domain_identity_map(si_domain, start, end)) {
4669 			pr_warn("Failed to build identity map for [%llx-%llx]\n",
4670 				start, end);
4671 			return NOTIFY_BAD;
4672 		}
4673 		break;
4674 
4675 	case MEM_OFFLINE:
4676 	case MEM_CANCEL_ONLINE:
4677 		start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4678 		last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4679 		while (start_vpfn <= last_vpfn) {
4680 			struct iova *iova;
4681 			struct dmar_drhd_unit *drhd;
4682 			struct intel_iommu *iommu;
4683 			struct page *freelist;
4684 
4685 			iova = find_iova(&si_domain->iovad, start_vpfn);
4686 			if (iova == NULL) {
4687 				pr_debug("Failed get IOVA for PFN %lx\n",
4688 					 start_vpfn);
4689 				break;
4690 			}
4691 
4692 			iova = split_and_remove_iova(&si_domain->iovad, iova,
4693 						     start_vpfn, last_vpfn);
4694 			if (iova == NULL) {
4695 				pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
4696 					start_vpfn, last_vpfn);
4697 				return NOTIFY_BAD;
4698 			}
4699 
4700 			freelist = domain_unmap(si_domain, iova->pfn_lo,
4701 					       iova->pfn_hi);
4702 
4703 			rcu_read_lock();
4704 			for_each_active_iommu(iommu, drhd)
4705 				iommu_flush_iotlb_psi(iommu, si_domain,
4706 					iova->pfn_lo, iova_size(iova),
4707 					!freelist, 0);
4708 			rcu_read_unlock();
4709 			dma_free_pagelist(freelist);
4710 
4711 			start_vpfn = iova->pfn_hi + 1;
4712 			free_iova_mem(iova);
4713 		}
4714 		break;
4715 	}
4716 
4717 	return NOTIFY_OK;
4718 }
4719 
4720 static struct notifier_block intel_iommu_memory_nb = {
4721 	.notifier_call = intel_iommu_memory_notifier,
4722 	.priority = 0
4723 };
4724 
free_all_cpu_cached_iovas(unsigned int cpu)4725 static void free_all_cpu_cached_iovas(unsigned int cpu)
4726 {
4727 	int i;
4728 
4729 	for (i = 0; i < g_num_of_iommus; i++) {
4730 		struct intel_iommu *iommu = g_iommus[i];
4731 		struct dmar_domain *domain;
4732 		int did;
4733 
4734 		if (!iommu)
4735 			continue;
4736 
4737 		for (did = 0; did < cap_ndoms(iommu->cap); did++) {
4738 			domain = get_iommu_domain(iommu, (u16)did);
4739 
4740 			if (!domain)
4741 				continue;
4742 			free_cpu_cached_iovas(cpu, &domain->iovad);
4743 		}
4744 	}
4745 }
4746 
intel_iommu_cpu_dead(unsigned int cpu)4747 static int intel_iommu_cpu_dead(unsigned int cpu)
4748 {
4749 	free_all_cpu_cached_iovas(cpu);
4750 	return 0;
4751 }
4752 
intel_disable_iommus(void)4753 static void intel_disable_iommus(void)
4754 {
4755 	struct intel_iommu *iommu = NULL;
4756 	struct dmar_drhd_unit *drhd;
4757 
4758 	for_each_iommu(iommu, drhd)
4759 		iommu_disable_translation(iommu);
4760 }
4761 
dev_to_intel_iommu(struct device * dev)4762 static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
4763 {
4764 	struct iommu_device *iommu_dev = dev_to_iommu_device(dev);
4765 
4766 	return container_of(iommu_dev, struct intel_iommu, iommu);
4767 }
4768 
intel_iommu_show_version(struct device * dev,struct device_attribute * attr,char * buf)4769 static ssize_t intel_iommu_show_version(struct device *dev,
4770 					struct device_attribute *attr,
4771 					char *buf)
4772 {
4773 	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4774 	u32 ver = readl(iommu->reg + DMAR_VER_REG);
4775 	return sprintf(buf, "%d:%d\n",
4776 		       DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4777 }
4778 static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4779 
intel_iommu_show_address(struct device * dev,struct device_attribute * attr,char * buf)4780 static ssize_t intel_iommu_show_address(struct device *dev,
4781 					struct device_attribute *attr,
4782 					char *buf)
4783 {
4784 	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4785 	return sprintf(buf, "%llx\n", iommu->reg_phys);
4786 }
4787 static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4788 
intel_iommu_show_cap(struct device * dev,struct device_attribute * attr,char * buf)4789 static ssize_t intel_iommu_show_cap(struct device *dev,
4790 				    struct device_attribute *attr,
4791 				    char *buf)
4792 {
4793 	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4794 	return sprintf(buf, "%llx\n", iommu->cap);
4795 }
4796 static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4797 
intel_iommu_show_ecap(struct device * dev,struct device_attribute * attr,char * buf)4798 static ssize_t intel_iommu_show_ecap(struct device *dev,
4799 				    struct device_attribute *attr,
4800 				    char *buf)
4801 {
4802 	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4803 	return sprintf(buf, "%llx\n", iommu->ecap);
4804 }
4805 static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4806 
intel_iommu_show_ndoms(struct device * dev,struct device_attribute * attr,char * buf)4807 static ssize_t intel_iommu_show_ndoms(struct device *dev,
4808 				      struct device_attribute *attr,
4809 				      char *buf)
4810 {
4811 	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4812 	return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4813 }
4814 static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
4815 
intel_iommu_show_ndoms_used(struct device * dev,struct device_attribute * attr,char * buf)4816 static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
4817 					   struct device_attribute *attr,
4818 					   char *buf)
4819 {
4820 	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
4821 	return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4822 						  cap_ndoms(iommu->cap)));
4823 }
4824 static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
4825 
4826 static struct attribute *intel_iommu_attrs[] = {
4827 	&dev_attr_version.attr,
4828 	&dev_attr_address.attr,
4829 	&dev_attr_cap.attr,
4830 	&dev_attr_ecap.attr,
4831 	&dev_attr_domains_supported.attr,
4832 	&dev_attr_domains_used.attr,
4833 	NULL,
4834 };
4835 
4836 static struct attribute_group intel_iommu_group = {
4837 	.name = "intel-iommu",
4838 	.attrs = intel_iommu_attrs,
4839 };
4840 
4841 const struct attribute_group *intel_iommu_groups[] = {
4842 	&intel_iommu_group,
4843 	NULL,
4844 };
4845 
has_untrusted_dev(void)4846 static inline bool has_untrusted_dev(void)
4847 {
4848 	struct pci_dev *pdev = NULL;
4849 
4850 	for_each_pci_dev(pdev)
4851 		if (pdev->untrusted)
4852 			return true;
4853 
4854 	return false;
4855 }
4856 
platform_optin_force_iommu(void)4857 static int __init platform_optin_force_iommu(void)
4858 {
4859 	if (!dmar_platform_optin() || no_platform_optin || !has_untrusted_dev())
4860 		return 0;
4861 
4862 	if (no_iommu || dmar_disabled)
4863 		pr_info("Intel-IOMMU force enabled due to platform opt in\n");
4864 
4865 	/*
4866 	 * If Intel-IOMMU is disabled by default, we will apply identity
4867 	 * map for all devices except those marked as being untrusted.
4868 	 */
4869 	if (dmar_disabled)
4870 		iommu_identity_mapping |= IDENTMAP_ALL;
4871 
4872 	dmar_disabled = 0;
4873 	no_iommu = 0;
4874 
4875 	return 1;
4876 }
4877 
probe_acpi_namespace_devices(void)4878 static int __init probe_acpi_namespace_devices(void)
4879 {
4880 	struct dmar_drhd_unit *drhd;
4881 	/* To avoid a -Wunused-but-set-variable warning. */
4882 	struct intel_iommu *iommu __maybe_unused;
4883 	struct device *dev;
4884 	int i, ret = 0;
4885 
4886 	for_each_active_iommu(iommu, drhd) {
4887 		for_each_active_dev_scope(drhd->devices,
4888 					  drhd->devices_cnt, i, dev) {
4889 			struct acpi_device_physical_node *pn;
4890 			struct iommu_group *group;
4891 			struct acpi_device *adev;
4892 
4893 			if (dev->bus != &acpi_bus_type)
4894 				continue;
4895 
4896 			adev = to_acpi_device(dev);
4897 			mutex_lock(&adev->physical_node_lock);
4898 			list_for_each_entry(pn,
4899 					    &adev->physical_node_list, node) {
4900 				group = iommu_group_get(pn->dev);
4901 				if (group) {
4902 					iommu_group_put(group);
4903 					continue;
4904 				}
4905 
4906 				pn->dev->bus->iommu_ops = &intel_iommu_ops;
4907 				ret = iommu_probe_device(pn->dev);
4908 				if (ret)
4909 					break;
4910 			}
4911 			mutex_unlock(&adev->physical_node_lock);
4912 
4913 			if (ret)
4914 				return ret;
4915 		}
4916 	}
4917 
4918 	return 0;
4919 }
4920 
intel_iommu_init(void)4921 int __init intel_iommu_init(void)
4922 {
4923 	int ret = -ENODEV;
4924 	struct dmar_drhd_unit *drhd;
4925 	struct intel_iommu *iommu;
4926 
4927 	/*
4928 	 * Intel IOMMU is required for a TXT/tboot launch or platform
4929 	 * opt in, so enforce that.
4930 	 */
4931 	force_on = tboot_force_iommu() || platform_optin_force_iommu();
4932 
4933 	if (iommu_init_mempool()) {
4934 		if (force_on)
4935 			panic("tboot: Failed to initialize iommu memory\n");
4936 		return -ENOMEM;
4937 	}
4938 
4939 	down_write(&dmar_global_lock);
4940 	if (dmar_table_init()) {
4941 		if (force_on)
4942 			panic("tboot: Failed to initialize DMAR table\n");
4943 		goto out_free_dmar;
4944 	}
4945 
4946 	if (dmar_dev_scope_init() < 0) {
4947 		if (force_on)
4948 			panic("tboot: Failed to initialize DMAR device scope\n");
4949 		goto out_free_dmar;
4950 	}
4951 
4952 	up_write(&dmar_global_lock);
4953 
4954 	/*
4955 	 * The bus notifier takes the dmar_global_lock, so lockdep will
4956 	 * complain later when we register it under the lock.
4957 	 */
4958 	dmar_register_bus_notifier();
4959 
4960 	down_write(&dmar_global_lock);
4961 
4962 	if (no_iommu || dmar_disabled) {
4963 		/*
4964 		 * We exit the function here to ensure IOMMU's remapping and
4965 		 * mempool aren't setup, which means that the IOMMU's PMRs
4966 		 * won't be disabled via the call to init_dmars(). So disable
4967 		 * it explicitly here. The PMRs were setup by tboot prior to
4968 		 * calling SENTER, but the kernel is expected to reset/tear
4969 		 * down the PMRs.
4970 		 */
4971 		if (intel_iommu_tboot_noforce) {
4972 			for_each_iommu(iommu, drhd)
4973 				iommu_disable_protect_mem_regions(iommu);
4974 		}
4975 
4976 		/*
4977 		 * Make sure the IOMMUs are switched off, even when we
4978 		 * boot into a kexec kernel and the previous kernel left
4979 		 * them enabled
4980 		 */
4981 		intel_disable_iommus();
4982 		goto out_free_dmar;
4983 	}
4984 
4985 	if (list_empty(&dmar_rmrr_units))
4986 		pr_info("No RMRR found\n");
4987 
4988 	if (list_empty(&dmar_atsr_units))
4989 		pr_info("No ATSR found\n");
4990 
4991 	if (dmar_init_reserved_ranges()) {
4992 		if (force_on)
4993 			panic("tboot: Failed to reserve iommu ranges\n");
4994 		goto out_free_reserved_range;
4995 	}
4996 
4997 	if (dmar_map_gfx)
4998 		intel_iommu_gfx_mapped = 1;
4999 
5000 	init_no_remapping_devices();
5001 
5002 	ret = init_dmars();
5003 	if (ret) {
5004 		if (force_on)
5005 			panic("tboot: Failed to initialize DMARs\n");
5006 		pr_err("Initialization failed\n");
5007 		goto out_free_reserved_range;
5008 	}
5009 	up_write(&dmar_global_lock);
5010 
5011 #if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB)
5012 	/*
5013 	 * If the system has no untrusted device or the user has decided
5014 	 * to disable the bounce page mechanisms, we don't need swiotlb.
5015 	 * Mark this and the pre-allocated bounce pages will be released
5016 	 * later.
5017 	 */
5018 	if (!has_untrusted_dev() || intel_no_bounce)
5019 		swiotlb = 0;
5020 #endif
5021 	dma_ops = &intel_dma_ops;
5022 
5023 	init_iommu_pm_ops();
5024 
5025 	for_each_active_iommu(iommu, drhd) {
5026 		iommu_device_sysfs_add(&iommu->iommu, NULL,
5027 				       intel_iommu_groups,
5028 				       "%s", iommu->name);
5029 		iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
5030 		iommu_device_register(&iommu->iommu);
5031 	}
5032 
5033 	bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
5034 	if (si_domain && !hw_pass_through)
5035 		register_memory_notifier(&intel_iommu_memory_nb);
5036 	cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL,
5037 			  intel_iommu_cpu_dead);
5038 
5039 	down_read(&dmar_global_lock);
5040 	if (probe_acpi_namespace_devices())
5041 		pr_warn("ACPI name space devices didn't probe correctly\n");
5042 	up_read(&dmar_global_lock);
5043 
5044 	/* Finally, we enable the DMA remapping hardware. */
5045 	for_each_iommu(iommu, drhd) {
5046 		if (!drhd->ignored && !translation_pre_enabled(iommu))
5047 			iommu_enable_translation(iommu);
5048 
5049 		iommu_disable_protect_mem_regions(iommu);
5050 	}
5051 	pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
5052 
5053 	intel_iommu_enabled = 1;
5054 	intel_iommu_debugfs_init();
5055 
5056 	return 0;
5057 
5058 out_free_reserved_range:
5059 	put_iova_domain(&reserved_iova_list);
5060 out_free_dmar:
5061 	intel_iommu_free_dmars();
5062 	up_write(&dmar_global_lock);
5063 	iommu_exit_mempool();
5064 	return ret;
5065 }
5066 
domain_context_clear_one_cb(struct pci_dev * pdev,u16 alias,void * opaque)5067 static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
5068 {
5069 	struct intel_iommu *iommu = opaque;
5070 
5071 	domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff);
5072 	return 0;
5073 }
5074 
5075 /*
5076  * NB - intel-iommu lacks any sort of reference counting for the users of
5077  * dependent devices.  If multiple endpoints have intersecting dependent
5078  * devices, unbinding the driver from any one of them will possibly leave
5079  * the others unable to operate.
5080  */
domain_context_clear(struct intel_iommu * iommu,struct device * dev)5081 static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
5082 {
5083 	if (!iommu || !dev || !dev_is_pci(dev))
5084 		return;
5085 
5086 	pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
5087 }
5088 
__dmar_remove_one_dev_info(struct device_domain_info * info)5089 static void __dmar_remove_one_dev_info(struct device_domain_info *info)
5090 {
5091 	struct dmar_domain *domain;
5092 	struct intel_iommu *iommu;
5093 	unsigned long flags;
5094 
5095 	assert_spin_locked(&device_domain_lock);
5096 
5097 	if (WARN_ON(!info))
5098 		return;
5099 
5100 	iommu = info->iommu;
5101 	domain = info->domain;
5102 
5103 	if (info->dev) {
5104 		if (dev_is_pci(info->dev) && sm_supported(iommu))
5105 			intel_pasid_tear_down_entry(iommu, info->dev,
5106 					PASID_RID2PASID);
5107 
5108 		iommu_disable_dev_iotlb(info);
5109 		domain_context_clear(iommu, info->dev);
5110 		intel_pasid_free_table(info->dev);
5111 	}
5112 
5113 	unlink_domain_info(info);
5114 
5115 	spin_lock_irqsave(&iommu->lock, flags);
5116 	domain_detach_iommu(domain, iommu);
5117 	spin_unlock_irqrestore(&iommu->lock, flags);
5118 
5119 	/* free the private domain */
5120 	if (domain->flags & DOMAIN_FLAG_LOSE_CHILDREN &&
5121 	    !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
5122 	    list_empty(&domain->devices))
5123 		domain_exit(info->domain);
5124 
5125 	free_devinfo_mem(info);
5126 }
5127 
dmar_remove_one_dev_info(struct device * dev)5128 static void dmar_remove_one_dev_info(struct device *dev)
5129 {
5130 	struct device_domain_info *info;
5131 	unsigned long flags;
5132 
5133 	spin_lock_irqsave(&device_domain_lock, flags);
5134 	info = dev->archdata.iommu;
5135 	if (info && info != DEFER_DEVICE_DOMAIN_INFO
5136 	    && info != DUMMY_DEVICE_DOMAIN_INFO)
5137 		__dmar_remove_one_dev_info(info);
5138 	spin_unlock_irqrestore(&device_domain_lock, flags);
5139 }
5140 
md_domain_init(struct dmar_domain * domain,int guest_width)5141 static int md_domain_init(struct dmar_domain *domain, int guest_width)
5142 {
5143 	int adjust_width;
5144 
5145 	init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
5146 	domain_reserve_special_ranges(domain);
5147 
5148 	/* calculate AGAW */
5149 	domain->gaw = guest_width;
5150 	adjust_width = guestwidth_to_adjustwidth(guest_width);
5151 	domain->agaw = width_to_agaw(adjust_width);
5152 
5153 	domain->iommu_coherency = 0;
5154 	domain->iommu_snooping = 0;
5155 	domain->iommu_superpage = 0;
5156 	domain->max_addr = 0;
5157 
5158 	/* always allocate the top pgd */
5159 	domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5160 	if (!domain->pgd)
5161 		return -ENOMEM;
5162 	domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
5163 	return 0;
5164 }
5165 
intel_iommu_domain_alloc(unsigned type)5166 static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
5167 {
5168 	struct dmar_domain *dmar_domain;
5169 	struct iommu_domain *domain;
5170 
5171 	switch (type) {
5172 	case IOMMU_DOMAIN_DMA:
5173 	/* fallthrough */
5174 	case IOMMU_DOMAIN_UNMANAGED:
5175 		dmar_domain = alloc_domain(0);
5176 		if (!dmar_domain) {
5177 			pr_err("Can't allocate dmar_domain\n");
5178 			return NULL;
5179 		}
5180 		if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
5181 			pr_err("Domain initialization failed\n");
5182 			domain_exit(dmar_domain);
5183 			return NULL;
5184 		}
5185 
5186 		if (type == IOMMU_DOMAIN_DMA &&
5187 		    init_iova_flush_queue(&dmar_domain->iovad,
5188 					  iommu_flush_iova, iova_entry_free)) {
5189 			pr_warn("iova flush queue initialization failed\n");
5190 			intel_iommu_strict = 1;
5191 		}
5192 
5193 		domain_update_iommu_cap(dmar_domain);
5194 
5195 		domain = &dmar_domain->domain;
5196 		domain->geometry.aperture_start = 0;
5197 		domain->geometry.aperture_end   =
5198 				__DOMAIN_MAX_ADDR(dmar_domain->gaw);
5199 		domain->geometry.force_aperture = true;
5200 
5201 		return domain;
5202 	case IOMMU_DOMAIN_IDENTITY:
5203 		return &si_domain->domain;
5204 	default:
5205 		return NULL;
5206 	}
5207 
5208 	return NULL;
5209 }
5210 
intel_iommu_domain_free(struct iommu_domain * domain)5211 static void intel_iommu_domain_free(struct iommu_domain *domain)
5212 {
5213 	if (domain != &si_domain->domain)
5214 		domain_exit(to_dmar_domain(domain));
5215 }
5216 
5217 /*
5218  * Check whether a @domain could be attached to the @dev through the
5219  * aux-domain attach/detach APIs.
5220  */
5221 static inline bool
is_aux_domain(struct device * dev,struct iommu_domain * domain)5222 is_aux_domain(struct device *dev, struct iommu_domain *domain)
5223 {
5224 	struct device_domain_info *info = dev->archdata.iommu;
5225 
5226 	return info && info->auxd_enabled &&
5227 			domain->type == IOMMU_DOMAIN_UNMANAGED;
5228 }
5229 
auxiliary_link_device(struct dmar_domain * domain,struct device * dev)5230 static void auxiliary_link_device(struct dmar_domain *domain,
5231 				  struct device *dev)
5232 {
5233 	struct device_domain_info *info = dev->archdata.iommu;
5234 
5235 	assert_spin_locked(&device_domain_lock);
5236 	if (WARN_ON(!info))
5237 		return;
5238 
5239 	domain->auxd_refcnt++;
5240 	list_add(&domain->auxd, &info->auxiliary_domains);
5241 }
5242 
auxiliary_unlink_device(struct dmar_domain * domain,struct device * dev)5243 static void auxiliary_unlink_device(struct dmar_domain *domain,
5244 				    struct device *dev)
5245 {
5246 	struct device_domain_info *info = dev->archdata.iommu;
5247 
5248 	assert_spin_locked(&device_domain_lock);
5249 	if (WARN_ON(!info))
5250 		return;
5251 
5252 	list_del(&domain->auxd);
5253 	domain->auxd_refcnt--;
5254 
5255 	if (!domain->auxd_refcnt && domain->default_pasid > 0)
5256 		intel_pasid_free_id(domain->default_pasid);
5257 }
5258 
aux_domain_add_dev(struct dmar_domain * domain,struct device * dev)5259 static int aux_domain_add_dev(struct dmar_domain *domain,
5260 			      struct device *dev)
5261 {
5262 	int ret;
5263 	u8 bus, devfn;
5264 	unsigned long flags;
5265 	struct intel_iommu *iommu;
5266 
5267 	iommu = device_to_iommu(dev, &bus, &devfn);
5268 	if (!iommu)
5269 		return -ENODEV;
5270 
5271 	if (domain->default_pasid <= 0) {
5272 		int pasid;
5273 
5274 		pasid = intel_pasid_alloc_id(domain, PASID_MIN,
5275 					     pci_max_pasids(to_pci_dev(dev)),
5276 					     GFP_KERNEL);
5277 		if (pasid <= 0) {
5278 			pr_err("Can't allocate default pasid\n");
5279 			return -ENODEV;
5280 		}
5281 		domain->default_pasid = pasid;
5282 	}
5283 
5284 	spin_lock_irqsave(&device_domain_lock, flags);
5285 	/*
5286 	 * iommu->lock must be held to attach domain to iommu and setup the
5287 	 * pasid entry for second level translation.
5288 	 */
5289 	spin_lock(&iommu->lock);
5290 	ret = domain_attach_iommu(domain, iommu);
5291 	if (ret)
5292 		goto attach_failed;
5293 
5294 	/* Setup the PASID entry for mediated devices: */
5295 	ret = intel_pasid_setup_second_level(iommu, domain, dev,
5296 					     domain->default_pasid);
5297 	if (ret)
5298 		goto table_failed;
5299 	spin_unlock(&iommu->lock);
5300 
5301 	auxiliary_link_device(domain, dev);
5302 
5303 	spin_unlock_irqrestore(&device_domain_lock, flags);
5304 
5305 	return 0;
5306 
5307 table_failed:
5308 	domain_detach_iommu(domain, iommu);
5309 attach_failed:
5310 	spin_unlock(&iommu->lock);
5311 	spin_unlock_irqrestore(&device_domain_lock, flags);
5312 	if (!domain->auxd_refcnt && domain->default_pasid > 0)
5313 		intel_pasid_free_id(domain->default_pasid);
5314 
5315 	return ret;
5316 }
5317 
aux_domain_remove_dev(struct dmar_domain * domain,struct device * dev)5318 static void aux_domain_remove_dev(struct dmar_domain *domain,
5319 				  struct device *dev)
5320 {
5321 	struct device_domain_info *info;
5322 	struct intel_iommu *iommu;
5323 	unsigned long flags;
5324 
5325 	if (!is_aux_domain(dev, &domain->domain))
5326 		return;
5327 
5328 	spin_lock_irqsave(&device_domain_lock, flags);
5329 	info = dev->archdata.iommu;
5330 	iommu = info->iommu;
5331 
5332 	auxiliary_unlink_device(domain, dev);
5333 
5334 	spin_lock(&iommu->lock);
5335 	intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid);
5336 	domain_detach_iommu(domain, iommu);
5337 	spin_unlock(&iommu->lock);
5338 
5339 	spin_unlock_irqrestore(&device_domain_lock, flags);
5340 }
5341 
prepare_domain_attach_device(struct iommu_domain * domain,struct device * dev)5342 static int prepare_domain_attach_device(struct iommu_domain *domain,
5343 					struct device *dev)
5344 {
5345 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5346 	struct intel_iommu *iommu;
5347 	int addr_width;
5348 	u8 bus, devfn;
5349 
5350 	iommu = device_to_iommu(dev, &bus, &devfn);
5351 	if (!iommu)
5352 		return -ENODEV;
5353 
5354 	/* check if this iommu agaw is sufficient for max mapped address */
5355 	addr_width = agaw_to_width(iommu->agaw);
5356 	if (addr_width > cap_mgaw(iommu->cap))
5357 		addr_width = cap_mgaw(iommu->cap);
5358 
5359 	if (dmar_domain->max_addr > (1LL << addr_width)) {
5360 		dev_err(dev, "%s: iommu width (%d) is not "
5361 		        "sufficient for the mapped address (%llx)\n",
5362 		        __func__, addr_width, dmar_domain->max_addr);
5363 		return -EFAULT;
5364 	}
5365 	dmar_domain->gaw = addr_width;
5366 
5367 	/*
5368 	 * Knock out extra levels of page tables if necessary
5369 	 */
5370 	while (iommu->agaw < dmar_domain->agaw) {
5371 		struct dma_pte *pte;
5372 
5373 		pte = dmar_domain->pgd;
5374 		if (dma_pte_present(pte)) {
5375 			dmar_domain->pgd = (struct dma_pte *)
5376 				phys_to_virt(dma_pte_addr(pte));
5377 			free_pgtable_page(pte);
5378 		}
5379 		dmar_domain->agaw--;
5380 	}
5381 
5382 	return 0;
5383 }
5384 
intel_iommu_attach_device(struct iommu_domain * domain,struct device * dev)5385 static int intel_iommu_attach_device(struct iommu_domain *domain,
5386 				     struct device *dev)
5387 {
5388 	int ret;
5389 
5390 	if (domain->type == IOMMU_DOMAIN_UNMANAGED &&
5391 	    device_is_rmrr_locked(dev)) {
5392 		dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement.  Contact your platform vendor.\n");
5393 		return -EPERM;
5394 	}
5395 
5396 	if (is_aux_domain(dev, domain))
5397 		return -EPERM;
5398 
5399 	/* normally dev is not mapped */
5400 	if (unlikely(domain_context_mapped(dev))) {
5401 		struct dmar_domain *old_domain;
5402 
5403 		old_domain = find_domain(dev);
5404 		if (old_domain)
5405 			dmar_remove_one_dev_info(dev);
5406 	}
5407 
5408 	ret = prepare_domain_attach_device(domain, dev);
5409 	if (ret)
5410 		return ret;
5411 
5412 	return domain_add_dev_info(to_dmar_domain(domain), dev);
5413 }
5414 
intel_iommu_aux_attach_device(struct iommu_domain * domain,struct device * dev)5415 static int intel_iommu_aux_attach_device(struct iommu_domain *domain,
5416 					 struct device *dev)
5417 {
5418 	int ret;
5419 
5420 	if (!is_aux_domain(dev, domain))
5421 		return -EPERM;
5422 
5423 	ret = prepare_domain_attach_device(domain, dev);
5424 	if (ret)
5425 		return ret;
5426 
5427 	return aux_domain_add_dev(to_dmar_domain(domain), dev);
5428 }
5429 
intel_iommu_detach_device(struct iommu_domain * domain,struct device * dev)5430 static void intel_iommu_detach_device(struct iommu_domain *domain,
5431 				      struct device *dev)
5432 {
5433 	dmar_remove_one_dev_info(dev);
5434 }
5435 
intel_iommu_aux_detach_device(struct iommu_domain * domain,struct device * dev)5436 static void intel_iommu_aux_detach_device(struct iommu_domain *domain,
5437 					  struct device *dev)
5438 {
5439 	aux_domain_remove_dev(to_dmar_domain(domain), dev);
5440 }
5441 
intel_iommu_map(struct iommu_domain * domain,unsigned long iova,phys_addr_t hpa,size_t size,int iommu_prot)5442 static int intel_iommu_map(struct iommu_domain *domain,
5443 			   unsigned long iova, phys_addr_t hpa,
5444 			   size_t size, int iommu_prot)
5445 {
5446 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5447 	u64 max_addr;
5448 	int prot = 0;
5449 	int ret;
5450 
5451 	if (iommu_prot & IOMMU_READ)
5452 		prot |= DMA_PTE_READ;
5453 	if (iommu_prot & IOMMU_WRITE)
5454 		prot |= DMA_PTE_WRITE;
5455 	if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
5456 		prot |= DMA_PTE_SNP;
5457 
5458 	max_addr = iova + size;
5459 	if (dmar_domain->max_addr < max_addr) {
5460 		u64 end;
5461 
5462 		/* check if minimum agaw is sufficient for mapped address */
5463 		end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
5464 		if (end < max_addr) {
5465 			pr_err("%s: iommu width (%d) is not "
5466 			       "sufficient for the mapped address (%llx)\n",
5467 			       __func__, dmar_domain->gaw, max_addr);
5468 			return -EFAULT;
5469 		}
5470 		dmar_domain->max_addr = max_addr;
5471 	}
5472 	/* Round up size to next multiple of PAGE_SIZE, if it and
5473 	   the low bits of hpa would take us onto the next page */
5474 	size = aligned_nrpages(hpa, size);
5475 	ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
5476 				 hpa >> VTD_PAGE_SHIFT, size, prot);
5477 	return ret;
5478 }
5479 
intel_iommu_unmap(struct iommu_domain * domain,unsigned long iova,size_t size,struct iommu_iotlb_gather * gather)5480 static size_t intel_iommu_unmap(struct iommu_domain *domain,
5481 				unsigned long iova, size_t size,
5482 				struct iommu_iotlb_gather *gather)
5483 {
5484 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5485 	struct page *freelist = NULL;
5486 	unsigned long start_pfn, last_pfn;
5487 	unsigned int npages;
5488 	int iommu_id, level = 0;
5489 
5490 	/* Cope with horrid API which requires us to unmap more than the
5491 	   size argument if it happens to be a large-page mapping. */
5492 	BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
5493 
5494 	if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
5495 		size = VTD_PAGE_SIZE << level_to_offset_bits(level);
5496 
5497 	start_pfn = iova >> VTD_PAGE_SHIFT;
5498 	last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
5499 
5500 	freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
5501 
5502 	npages = last_pfn - start_pfn + 1;
5503 
5504 	for_each_domain_iommu(iommu_id, dmar_domain)
5505 		iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
5506 				      start_pfn, npages, !freelist, 0);
5507 
5508 	dma_free_pagelist(freelist);
5509 
5510 	if (dmar_domain->max_addr == iova + size)
5511 		dmar_domain->max_addr = iova;
5512 
5513 	return size;
5514 }
5515 
intel_iommu_iova_to_phys(struct iommu_domain * domain,dma_addr_t iova)5516 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
5517 					    dma_addr_t iova)
5518 {
5519 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5520 	struct dma_pte *pte;
5521 	int level = 0;
5522 	u64 phys = 0;
5523 
5524 	pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
5525 	if (pte)
5526 		phys = dma_pte_addr(pte);
5527 
5528 	return phys;
5529 }
5530 
scalable_mode_support(void)5531 static inline bool scalable_mode_support(void)
5532 {
5533 	struct dmar_drhd_unit *drhd;
5534 	struct intel_iommu *iommu;
5535 	bool ret = true;
5536 
5537 	rcu_read_lock();
5538 	for_each_active_iommu(iommu, drhd) {
5539 		if (!sm_supported(iommu)) {
5540 			ret = false;
5541 			break;
5542 		}
5543 	}
5544 	rcu_read_unlock();
5545 
5546 	return ret;
5547 }
5548 
iommu_pasid_support(void)5549 static inline bool iommu_pasid_support(void)
5550 {
5551 	struct dmar_drhd_unit *drhd;
5552 	struct intel_iommu *iommu;
5553 	bool ret = true;
5554 
5555 	rcu_read_lock();
5556 	for_each_active_iommu(iommu, drhd) {
5557 		if (!pasid_supported(iommu)) {
5558 			ret = false;
5559 			break;
5560 		}
5561 	}
5562 	rcu_read_unlock();
5563 
5564 	return ret;
5565 }
5566 
intel_iommu_capable(enum iommu_cap cap)5567 static bool intel_iommu_capable(enum iommu_cap cap)
5568 {
5569 	if (cap == IOMMU_CAP_CACHE_COHERENCY)
5570 		return domain_update_iommu_snooping(NULL) == 1;
5571 	if (cap == IOMMU_CAP_INTR_REMAP)
5572 		return irq_remapping_enabled == 1;
5573 
5574 	return false;
5575 }
5576 
intel_iommu_add_device(struct device * dev)5577 static int intel_iommu_add_device(struct device *dev)
5578 {
5579 	struct dmar_domain *dmar_domain;
5580 	struct iommu_domain *domain;
5581 	struct intel_iommu *iommu;
5582 	struct iommu_group *group;
5583 	u8 bus, devfn;
5584 	int ret;
5585 
5586 	iommu = device_to_iommu(dev, &bus, &devfn);
5587 	if (!iommu)
5588 		return -ENODEV;
5589 
5590 	iommu_device_link(&iommu->iommu, dev);
5591 
5592 	if (translation_pre_enabled(iommu))
5593 		dev->archdata.iommu = DEFER_DEVICE_DOMAIN_INFO;
5594 
5595 	group = iommu_group_get_for_dev(dev);
5596 
5597 	if (IS_ERR(group)) {
5598 		ret = PTR_ERR(group);
5599 		goto unlink;
5600 	}
5601 
5602 	iommu_group_put(group);
5603 
5604 	domain = iommu_get_domain_for_dev(dev);
5605 	dmar_domain = to_dmar_domain(domain);
5606 	if (domain->type == IOMMU_DOMAIN_DMA) {
5607 		if (device_def_domain_type(dev) == IOMMU_DOMAIN_IDENTITY) {
5608 			ret = iommu_request_dm_for_dev(dev);
5609 			if (ret) {
5610 				dmar_remove_one_dev_info(dev);
5611 				dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
5612 				domain_add_dev_info(si_domain, dev);
5613 				dev_info(dev,
5614 					 "Device uses a private identity domain.\n");
5615 			}
5616 		}
5617 	} else {
5618 		if (device_def_domain_type(dev) == IOMMU_DOMAIN_DMA) {
5619 			ret = iommu_request_dma_domain_for_dev(dev);
5620 			if (ret) {
5621 				dmar_remove_one_dev_info(dev);
5622 				dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
5623 				if (!get_private_domain_for_dev(dev)) {
5624 					dev_warn(dev,
5625 						 "Failed to get a private domain.\n");
5626 					ret = -ENOMEM;
5627 					goto unlink;
5628 				}
5629 
5630 				dev_info(dev,
5631 					 "Device uses a private dma domain.\n");
5632 			}
5633 		}
5634 	}
5635 
5636 	if (device_needs_bounce(dev)) {
5637 		dev_info(dev, "Use Intel IOMMU bounce page dma_ops\n");
5638 		set_dma_ops(dev, &bounce_dma_ops);
5639 	}
5640 
5641 	return 0;
5642 
5643 unlink:
5644 	iommu_device_unlink(&iommu->iommu, dev);
5645 	return ret;
5646 }
5647 
intel_iommu_remove_device(struct device * dev)5648 static void intel_iommu_remove_device(struct device *dev)
5649 {
5650 	struct intel_iommu *iommu;
5651 	u8 bus, devfn;
5652 
5653 	iommu = device_to_iommu(dev, &bus, &devfn);
5654 	if (!iommu)
5655 		return;
5656 
5657 	dmar_remove_one_dev_info(dev);
5658 
5659 	iommu_group_remove_device(dev);
5660 
5661 	iommu_device_unlink(&iommu->iommu, dev);
5662 
5663 	if (device_needs_bounce(dev))
5664 		set_dma_ops(dev, NULL);
5665 }
5666 
intel_iommu_get_resv_regions(struct device * device,struct list_head * head)5667 static void intel_iommu_get_resv_regions(struct device *device,
5668 					 struct list_head *head)
5669 {
5670 	int prot = DMA_PTE_READ | DMA_PTE_WRITE;
5671 	struct iommu_resv_region *reg;
5672 	struct dmar_rmrr_unit *rmrr;
5673 	struct device *i_dev;
5674 	int i;
5675 
5676 	down_read(&dmar_global_lock);
5677 	for_each_rmrr_units(rmrr) {
5678 		for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
5679 					  i, i_dev) {
5680 			struct iommu_resv_region *resv;
5681 			enum iommu_resv_type type;
5682 			size_t length;
5683 
5684 			if (i_dev != device &&
5685 			    !is_downstream_to_pci_bridge(device, i_dev))
5686 				continue;
5687 
5688 			length = rmrr->end_address - rmrr->base_address + 1;
5689 
5690 			type = device_rmrr_is_relaxable(device) ?
5691 				IOMMU_RESV_DIRECT_RELAXABLE : IOMMU_RESV_DIRECT;
5692 
5693 			resv = iommu_alloc_resv_region(rmrr->base_address,
5694 						       length, prot, type);
5695 			if (!resv)
5696 				break;
5697 
5698 			list_add_tail(&resv->list, head);
5699 		}
5700 	}
5701 	up_read(&dmar_global_lock);
5702 
5703 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
5704 	if (dev_is_pci(device)) {
5705 		struct pci_dev *pdev = to_pci_dev(device);
5706 
5707 		if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) {
5708 			reg = iommu_alloc_resv_region(0, 1UL << 24, prot,
5709 						   IOMMU_RESV_DIRECT_RELAXABLE);
5710 			if (reg)
5711 				list_add_tail(&reg->list, head);
5712 		}
5713 	}
5714 #endif /* CONFIG_INTEL_IOMMU_FLOPPY_WA */
5715 
5716 	reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
5717 				      IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
5718 				      0, IOMMU_RESV_MSI);
5719 	if (!reg)
5720 		return;
5721 	list_add_tail(&reg->list, head);
5722 }
5723 
intel_iommu_put_resv_regions(struct device * dev,struct list_head * head)5724 static void intel_iommu_put_resv_regions(struct device *dev,
5725 					 struct list_head *head)
5726 {
5727 	struct iommu_resv_region *entry, *next;
5728 
5729 	list_for_each_entry_safe(entry, next, head, list)
5730 		kfree(entry);
5731 }
5732 
intel_iommu_enable_pasid(struct intel_iommu * iommu,struct device * dev)5733 int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev)
5734 {
5735 	struct device_domain_info *info;
5736 	struct context_entry *context;
5737 	struct dmar_domain *domain;
5738 	unsigned long flags;
5739 	u64 ctx_lo;
5740 	int ret;
5741 
5742 	domain = find_domain(dev);
5743 	if (!domain)
5744 		return -EINVAL;
5745 
5746 	spin_lock_irqsave(&device_domain_lock, flags);
5747 	spin_lock(&iommu->lock);
5748 
5749 	ret = -EINVAL;
5750 	info = dev->archdata.iommu;
5751 	if (!info || !info->pasid_supported)
5752 		goto out;
5753 
5754 	context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
5755 	if (WARN_ON(!context))
5756 		goto out;
5757 
5758 	ctx_lo = context[0].lo;
5759 
5760 	if (!(ctx_lo & CONTEXT_PASIDE)) {
5761 		ctx_lo |= CONTEXT_PASIDE;
5762 		context[0].lo = ctx_lo;
5763 		wmb();
5764 		iommu->flush.flush_context(iommu,
5765 					   domain->iommu_did[iommu->seq_id],
5766 					   PCI_DEVID(info->bus, info->devfn),
5767 					   DMA_CCMD_MASK_NOBIT,
5768 					   DMA_CCMD_DEVICE_INVL);
5769 	}
5770 
5771 	/* Enable PASID support in the device, if it wasn't already */
5772 	if (!info->pasid_enabled)
5773 		iommu_enable_dev_iotlb(info);
5774 
5775 	ret = 0;
5776 
5777  out:
5778 	spin_unlock(&iommu->lock);
5779 	spin_unlock_irqrestore(&device_domain_lock, flags);
5780 
5781 	return ret;
5782 }
5783 
intel_iommu_apply_resv_region(struct device * dev,struct iommu_domain * domain,struct iommu_resv_region * region)5784 static void intel_iommu_apply_resv_region(struct device *dev,
5785 					  struct iommu_domain *domain,
5786 					  struct iommu_resv_region *region)
5787 {
5788 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5789 	unsigned long start, end;
5790 
5791 	start = IOVA_PFN(region->start);
5792 	end   = IOVA_PFN(region->start + region->length - 1);
5793 
5794 	WARN_ON_ONCE(!reserve_iova(&dmar_domain->iovad, start, end));
5795 }
5796 
intel_iommu_device_group(struct device * dev)5797 static struct iommu_group *intel_iommu_device_group(struct device *dev)
5798 {
5799 	if (dev_is_pci(dev))
5800 		return pci_device_group(dev);
5801 	return generic_device_group(dev);
5802 }
5803 
5804 #ifdef CONFIG_INTEL_IOMMU_SVM
intel_svm_device_to_iommu(struct device * dev)5805 struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
5806 {
5807 	struct intel_iommu *iommu;
5808 	u8 bus, devfn;
5809 
5810 	if (iommu_dummy(dev)) {
5811 		dev_warn(dev,
5812 			 "No IOMMU translation for device; cannot enable SVM\n");
5813 		return NULL;
5814 	}
5815 
5816 	iommu = device_to_iommu(dev, &bus, &devfn);
5817 	if ((!iommu)) {
5818 		dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
5819 		return NULL;
5820 	}
5821 
5822 	return iommu;
5823 }
5824 #endif /* CONFIG_INTEL_IOMMU_SVM */
5825 
intel_iommu_enable_auxd(struct device * dev)5826 static int intel_iommu_enable_auxd(struct device *dev)
5827 {
5828 	struct device_domain_info *info;
5829 	struct intel_iommu *iommu;
5830 	unsigned long flags;
5831 	u8 bus, devfn;
5832 	int ret;
5833 
5834 	iommu = device_to_iommu(dev, &bus, &devfn);
5835 	if (!iommu || dmar_disabled)
5836 		return -EINVAL;
5837 
5838 	if (!sm_supported(iommu) || !pasid_supported(iommu))
5839 		return -EINVAL;
5840 
5841 	ret = intel_iommu_enable_pasid(iommu, dev);
5842 	if (ret)
5843 		return -ENODEV;
5844 
5845 	spin_lock_irqsave(&device_domain_lock, flags);
5846 	info = dev->archdata.iommu;
5847 	info->auxd_enabled = 1;
5848 	spin_unlock_irqrestore(&device_domain_lock, flags);
5849 
5850 	return 0;
5851 }
5852 
intel_iommu_disable_auxd(struct device * dev)5853 static int intel_iommu_disable_auxd(struct device *dev)
5854 {
5855 	struct device_domain_info *info;
5856 	unsigned long flags;
5857 
5858 	spin_lock_irqsave(&device_domain_lock, flags);
5859 	info = dev->archdata.iommu;
5860 	if (!WARN_ON(!info))
5861 		info->auxd_enabled = 0;
5862 	spin_unlock_irqrestore(&device_domain_lock, flags);
5863 
5864 	return 0;
5865 }
5866 
5867 /*
5868  * A PCI express designated vendor specific extended capability is defined
5869  * in the section 3.7 of Intel scalable I/O virtualization technical spec
5870  * for system software and tools to detect endpoint devices supporting the
5871  * Intel scalable IO virtualization without host driver dependency.
5872  *
5873  * Returns the address of the matching extended capability structure within
5874  * the device's PCI configuration space or 0 if the device does not support
5875  * it.
5876  */
siov_find_pci_dvsec(struct pci_dev * pdev)5877 static int siov_find_pci_dvsec(struct pci_dev *pdev)
5878 {
5879 	int pos;
5880 	u16 vendor, id;
5881 
5882 	pos = pci_find_next_ext_capability(pdev, 0, 0x23);
5883 	while (pos) {
5884 		pci_read_config_word(pdev, pos + 4, &vendor);
5885 		pci_read_config_word(pdev, pos + 8, &id);
5886 		if (vendor == PCI_VENDOR_ID_INTEL && id == 5)
5887 			return pos;
5888 
5889 		pos = pci_find_next_ext_capability(pdev, pos, 0x23);
5890 	}
5891 
5892 	return 0;
5893 }
5894 
5895 static bool
intel_iommu_dev_has_feat(struct device * dev,enum iommu_dev_features feat)5896 intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat)
5897 {
5898 	if (feat == IOMMU_DEV_FEAT_AUX) {
5899 		int ret;
5900 
5901 		if (!dev_is_pci(dev) || dmar_disabled ||
5902 		    !scalable_mode_support() || !iommu_pasid_support())
5903 			return false;
5904 
5905 		ret = pci_pasid_features(to_pci_dev(dev));
5906 		if (ret < 0)
5907 			return false;
5908 
5909 		return !!siov_find_pci_dvsec(to_pci_dev(dev));
5910 	}
5911 
5912 	return false;
5913 }
5914 
5915 static int
intel_iommu_dev_enable_feat(struct device * dev,enum iommu_dev_features feat)5916 intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
5917 {
5918 	if (feat == IOMMU_DEV_FEAT_AUX)
5919 		return intel_iommu_enable_auxd(dev);
5920 
5921 	return -ENODEV;
5922 }
5923 
5924 static int
intel_iommu_dev_disable_feat(struct device * dev,enum iommu_dev_features feat)5925 intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
5926 {
5927 	if (feat == IOMMU_DEV_FEAT_AUX)
5928 		return intel_iommu_disable_auxd(dev);
5929 
5930 	return -ENODEV;
5931 }
5932 
5933 static bool
intel_iommu_dev_feat_enabled(struct device * dev,enum iommu_dev_features feat)5934 intel_iommu_dev_feat_enabled(struct device *dev, enum iommu_dev_features feat)
5935 {
5936 	struct device_domain_info *info = dev->archdata.iommu;
5937 
5938 	if (feat == IOMMU_DEV_FEAT_AUX)
5939 		return scalable_mode_support() && info && info->auxd_enabled;
5940 
5941 	return false;
5942 }
5943 
5944 static int
intel_iommu_aux_get_pasid(struct iommu_domain * domain,struct device * dev)5945 intel_iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev)
5946 {
5947 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
5948 
5949 	return dmar_domain->default_pasid > 0 ?
5950 			dmar_domain->default_pasid : -EINVAL;
5951 }
5952 
intel_iommu_is_attach_deferred(struct iommu_domain * domain,struct device * dev)5953 static bool intel_iommu_is_attach_deferred(struct iommu_domain *domain,
5954 					   struct device *dev)
5955 {
5956 	return dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO;
5957 }
5958 
5959 const struct iommu_ops intel_iommu_ops = {
5960 	.capable		= intel_iommu_capable,
5961 	.domain_alloc		= intel_iommu_domain_alloc,
5962 	.domain_free		= intel_iommu_domain_free,
5963 	.attach_dev		= intel_iommu_attach_device,
5964 	.detach_dev		= intel_iommu_detach_device,
5965 	.aux_attach_dev		= intel_iommu_aux_attach_device,
5966 	.aux_detach_dev		= intel_iommu_aux_detach_device,
5967 	.aux_get_pasid		= intel_iommu_aux_get_pasid,
5968 	.map			= intel_iommu_map,
5969 	.unmap			= intel_iommu_unmap,
5970 	.iova_to_phys		= intel_iommu_iova_to_phys,
5971 	.add_device		= intel_iommu_add_device,
5972 	.remove_device		= intel_iommu_remove_device,
5973 	.get_resv_regions	= intel_iommu_get_resv_regions,
5974 	.put_resv_regions	= intel_iommu_put_resv_regions,
5975 	.apply_resv_region	= intel_iommu_apply_resv_region,
5976 	.device_group		= intel_iommu_device_group,
5977 	.dev_has_feat		= intel_iommu_dev_has_feat,
5978 	.dev_feat_enabled	= intel_iommu_dev_feat_enabled,
5979 	.dev_enable_feat	= intel_iommu_dev_enable_feat,
5980 	.dev_disable_feat	= intel_iommu_dev_disable_feat,
5981 	.is_attach_deferred	= intel_iommu_is_attach_deferred,
5982 	.pgsize_bitmap		= INTEL_IOMMU_PGSIZES,
5983 };
5984 
quirk_iommu_igfx(struct pci_dev * dev)5985 static void quirk_iommu_igfx(struct pci_dev *dev)
5986 {
5987 	pci_info(dev, "Disabling IOMMU for graphics on this chipset\n");
5988 	dmar_map_gfx = 0;
5989 }
5990 
5991 /* G4x/GM45 integrated gfx dmar support is totally busted. */
5992 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_igfx);
5993 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_igfx);
5994 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_igfx);
5995 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_igfx);
5996 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx);
5997 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx);
5998 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx);
5999 
6000 /* Broadwell igfx malfunctions with dmar */
6001 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx);
6002 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx);
6003 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160E, quirk_iommu_igfx);
6004 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1602, quirk_iommu_igfx);
6005 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160A, quirk_iommu_igfx);
6006 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160D, quirk_iommu_igfx);
6007 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1616, quirk_iommu_igfx);
6008 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161B, quirk_iommu_igfx);
6009 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161E, quirk_iommu_igfx);
6010 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1612, quirk_iommu_igfx);
6011 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161A, quirk_iommu_igfx);
6012 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161D, quirk_iommu_igfx);
6013 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1626, quirk_iommu_igfx);
6014 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162B, quirk_iommu_igfx);
6015 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162E, quirk_iommu_igfx);
6016 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1622, quirk_iommu_igfx);
6017 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162A, quirk_iommu_igfx);
6018 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162D, quirk_iommu_igfx);
6019 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1636, quirk_iommu_igfx);
6020 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163B, quirk_iommu_igfx);
6021 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163E, quirk_iommu_igfx);
6022 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx);
6023 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx);
6024 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx);
6025 
quirk_iommu_rwbf(struct pci_dev * dev)6026 static void quirk_iommu_rwbf(struct pci_dev *dev)
6027 {
6028 	/*
6029 	 * Mobile 4 Series Chipset neglects to set RWBF capability,
6030 	 * but needs it. Same seems to hold for the desktop versions.
6031 	 */
6032 	pci_info(dev, "Forcing write-buffer flush capability\n");
6033 	rwbf_quirk = 1;
6034 }
6035 
6036 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
6037 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
6038 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
6039 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
6040 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
6041 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
6042 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
6043 
6044 #define GGC 0x52
6045 #define GGC_MEMORY_SIZE_MASK	(0xf << 8)
6046 #define GGC_MEMORY_SIZE_NONE	(0x0 << 8)
6047 #define GGC_MEMORY_SIZE_1M	(0x1 << 8)
6048 #define GGC_MEMORY_SIZE_2M	(0x3 << 8)
6049 #define GGC_MEMORY_VT_ENABLED	(0x8 << 8)
6050 #define GGC_MEMORY_SIZE_2M_VT	(0x9 << 8)
6051 #define GGC_MEMORY_SIZE_3M_VT	(0xa << 8)
6052 #define GGC_MEMORY_SIZE_4M_VT	(0xb << 8)
6053 
quirk_calpella_no_shadow_gtt(struct pci_dev * dev)6054 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
6055 {
6056 	unsigned short ggc;
6057 
6058 	if (pci_read_config_word(dev, GGC, &ggc))
6059 		return;
6060 
6061 	if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
6062 		pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
6063 		dmar_map_gfx = 0;
6064 	} else if (dmar_map_gfx) {
6065 		/* we have to ensure the gfx device is idle before we flush */
6066 		pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n");
6067 		intel_iommu_strict = 1;
6068        }
6069 }
6070 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
6071 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
6072 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
6073 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
6074 
6075 /* On Tylersburg chipsets, some BIOSes have been known to enable the
6076    ISOCH DMAR unit for the Azalia sound device, but not give it any
6077    TLB entries, which causes it to deadlock. Check for that.  We do
6078    this in a function called from init_dmars(), instead of in a PCI
6079    quirk, because we don't want to print the obnoxious "BIOS broken"
6080    message if VT-d is actually disabled.
6081 */
check_tylersburg_isoch(void)6082 static void __init check_tylersburg_isoch(void)
6083 {
6084 	struct pci_dev *pdev;
6085 	uint32_t vtisochctrl;
6086 
6087 	/* If there's no Azalia in the system anyway, forget it. */
6088 	pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
6089 	if (!pdev)
6090 		return;
6091 	pci_dev_put(pdev);
6092 
6093 	/* System Management Registers. Might be hidden, in which case
6094 	   we can't do the sanity check. But that's OK, because the
6095 	   known-broken BIOSes _don't_ actually hide it, so far. */
6096 	pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
6097 	if (!pdev)
6098 		return;
6099 
6100 	if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
6101 		pci_dev_put(pdev);
6102 		return;
6103 	}
6104 
6105 	pci_dev_put(pdev);
6106 
6107 	/* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
6108 	if (vtisochctrl & 1)
6109 		return;
6110 
6111 	/* Drop all bits other than the number of TLB entries */
6112 	vtisochctrl &= 0x1c;
6113 
6114 	/* If we have the recommended number of TLB entries (16), fine. */
6115 	if (vtisochctrl == 0x10)
6116 		return;
6117 
6118 	/* Zero TLB entries? You get to ride the short bus to school. */
6119 	if (!vtisochctrl) {
6120 		WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
6121 		     "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
6122 		     dmi_get_system_info(DMI_BIOS_VENDOR),
6123 		     dmi_get_system_info(DMI_BIOS_VERSION),
6124 		     dmi_get_system_info(DMI_PRODUCT_VERSION));
6125 		iommu_identity_mapping |= IDENTMAP_AZALIA;
6126 		return;
6127 	}
6128 
6129 	pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
6130 	       vtisochctrl);
6131 }
6132