• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright © 2006-2009, Intel Corporation.
4  *
5  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
6  */
7 
8 #include <linux/iova.h>
9 #include <linux/module.h>
10 #include <linux/slab.h>
11 #include <linux/smp.h>
12 #include <linux/bitops.h>
13 #include <linux/cpu.h>
14 #include <trace/hooks/iommu.h>
15 
16 /* The anchor node sits above the top of the usable address space */
17 #define IOVA_ANCHOR	~0UL
18 
19 #define IOVA_RANGE_CACHE_MAX_SIZE 6	/* log of max cached IOVA range size (in pages) */
20 
21 static bool iova_rcache_insert(struct iova_domain *iovad,
22 			       unsigned long pfn,
23 			       unsigned long size);
24 static unsigned long iova_rcache_get(struct iova_domain *iovad,
25 				     unsigned long size,
26 				     unsigned long limit_pfn);
27 static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad);
28 static void free_iova_rcaches(struct iova_domain *iovad);
29 
iova_rcache_range(void)30 unsigned long iova_rcache_range(void)
31 {
32 	return PAGE_SIZE << (IOVA_RANGE_CACHE_MAX_SIZE - 1);
33 }
34 
iova_cpuhp_dead(unsigned int cpu,struct hlist_node * node)35 static int iova_cpuhp_dead(unsigned int cpu, struct hlist_node *node)
36 {
37 	struct iova_domain *iovad;
38 
39 	iovad = hlist_entry_safe(node, struct iova_domain, cpuhp_dead);
40 
41 	free_cpu_cached_iovas(cpu, iovad);
42 	return 0;
43 }
44 
45 static void free_global_cached_iovas(struct iova_domain *iovad);
46 
to_iova(struct rb_node * node)47 static struct iova *to_iova(struct rb_node *node)
48 {
49 	return rb_entry(node, struct iova, node);
50 }
51 
52 void
init_iova_domain(struct iova_domain * iovad,unsigned long granule,unsigned long start_pfn)53 init_iova_domain(struct iova_domain *iovad, unsigned long granule,
54 	unsigned long start_pfn)
55 {
56 	/*
57 	 * IOVA granularity will normally be equal to the smallest
58 	 * supported IOMMU page size; both *must* be capable of
59 	 * representing individual CPU pages exactly.
60 	 */
61 	BUG_ON((granule > PAGE_SIZE) || !is_power_of_2(granule));
62 
63 	spin_lock_init(&iovad->iova_rbtree_lock);
64 	iovad->rbroot = RB_ROOT;
65 	iovad->cached_node = &iovad->anchor.node;
66 	iovad->cached32_node = &iovad->anchor.node;
67 	iovad->granule = granule;
68 	iovad->start_pfn = start_pfn;
69 	iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad));
70 	iovad->max32_alloc_size = iovad->dma_32bit_pfn;
71 	iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR;
72 	rb_link_node(&iovad->anchor.node, NULL, &iovad->rbroot.rb_node);
73 	rb_insert_color(&iovad->anchor.node, &iovad->rbroot);
74 	android_init_vendor_data(iovad, 1);
75 }
76 EXPORT_SYMBOL_GPL(init_iova_domain);
77 
78 static struct rb_node *
__get_cached_rbnode(struct iova_domain * iovad,unsigned long limit_pfn)79 __get_cached_rbnode(struct iova_domain *iovad, unsigned long limit_pfn)
80 {
81 	if (limit_pfn <= iovad->dma_32bit_pfn)
82 		return iovad->cached32_node;
83 
84 	return iovad->cached_node;
85 }
86 
87 static void
__cached_rbnode_insert_update(struct iova_domain * iovad,struct iova * new)88 __cached_rbnode_insert_update(struct iova_domain *iovad, struct iova *new)
89 {
90 	if (new->pfn_hi < iovad->dma_32bit_pfn)
91 		iovad->cached32_node = &new->node;
92 	else
93 		iovad->cached_node = &new->node;
94 }
95 
96 static void
__cached_rbnode_delete_update(struct iova_domain * iovad,struct iova * free)97 __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
98 {
99 	struct iova *cached_iova;
100 
101 	cached_iova = to_iova(iovad->cached32_node);
102 	if (free == cached_iova ||
103 	    (free->pfn_hi < iovad->dma_32bit_pfn &&
104 	     free->pfn_lo >= cached_iova->pfn_lo))
105 		iovad->cached32_node = rb_next(&free->node);
106 
107 	if (free->pfn_lo < iovad->dma_32bit_pfn)
108 		iovad->max32_alloc_size = iovad->dma_32bit_pfn;
109 
110 	cached_iova = to_iova(iovad->cached_node);
111 	if (free->pfn_lo >= cached_iova->pfn_lo)
112 		iovad->cached_node = rb_next(&free->node);
113 }
114 
iova_find_limit(struct iova_domain * iovad,unsigned long limit_pfn)115 static struct rb_node *iova_find_limit(struct iova_domain *iovad, unsigned long limit_pfn)
116 {
117 	struct rb_node *node, *next;
118 	/*
119 	 * Ideally what we'd like to judge here is whether limit_pfn is close
120 	 * enough to the highest-allocated IOVA that starting the allocation
121 	 * walk from the anchor node will be quicker than this initial work to
122 	 * find an exact starting point (especially if that ends up being the
123 	 * anchor node anyway). This is an incredibly crude approximation which
124 	 * only really helps the most likely case, but is at least trivially easy.
125 	 */
126 	if (limit_pfn > iovad->dma_32bit_pfn)
127 		return &iovad->anchor.node;
128 
129 	node = iovad->rbroot.rb_node;
130 	while (to_iova(node)->pfn_hi < limit_pfn)
131 		node = node->rb_right;
132 
133 search_left:
134 	while (node->rb_left && to_iova(node->rb_left)->pfn_lo >= limit_pfn)
135 		node = node->rb_left;
136 
137 	if (!node->rb_left)
138 		return node;
139 
140 	next = node->rb_left;
141 	while (next->rb_right) {
142 		next = next->rb_right;
143 		if (to_iova(next)->pfn_lo >= limit_pfn) {
144 			node = next;
145 			goto search_left;
146 		}
147 	}
148 
149 	return node;
150 }
151 
152 /* Insert the iova into domain rbtree by holding writer lock */
153 static void
iova_insert_rbtree(struct rb_root * root,struct iova * iova,struct rb_node * start)154 iova_insert_rbtree(struct rb_root *root, struct iova *iova,
155 		   struct rb_node *start)
156 {
157 	struct rb_node **new, *parent = NULL;
158 
159 	new = (start) ? &start : &(root->rb_node);
160 	/* Figure out where to put new node */
161 	while (*new) {
162 		struct iova *this = to_iova(*new);
163 
164 		parent = *new;
165 
166 		if (iova->pfn_lo < this->pfn_lo)
167 			new = &((*new)->rb_left);
168 		else if (iova->pfn_lo > this->pfn_lo)
169 			new = &((*new)->rb_right);
170 		else {
171 			WARN_ON(1); /* this should not happen */
172 			return;
173 		}
174 	}
175 	/* Add new node and rebalance tree. */
176 	rb_link_node(&iova->node, parent, new);
177 	rb_insert_color(&iova->node, root);
178 }
179 
__alloc_and_insert_iova_range(struct iova_domain * iovad,unsigned long size,unsigned long limit_pfn,struct iova * new,bool size_aligned)180 static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
181 		unsigned long size, unsigned long limit_pfn,
182 			struct iova *new, bool size_aligned)
183 {
184 	struct rb_node *curr, *prev;
185 	struct iova *curr_iova;
186 	unsigned long flags;
187 	unsigned long new_pfn, retry_pfn;
188 	unsigned long align_mask = ~0UL;
189 	unsigned long high_pfn = limit_pfn, low_pfn = iovad->start_pfn;
190 
191 	if (size_aligned) {
192 		unsigned long shift = fls_long(size - 1);
193 		trace_android_rvh_iommu_limit_align_shift(iovad, size, &shift);
194 		align_mask <<= shift;
195 	}
196 
197 	/* Walk the tree backwards */
198 	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
199 	if (limit_pfn <= iovad->dma_32bit_pfn &&
200 			size >= iovad->max32_alloc_size)
201 		goto iova32_full;
202 
203 	curr = __get_cached_rbnode(iovad, limit_pfn);
204 	curr_iova = to_iova(curr);
205 	retry_pfn = curr_iova->pfn_hi;
206 
207 retry:
208 	do {
209 		high_pfn = min(high_pfn, curr_iova->pfn_lo);
210 		new_pfn = (high_pfn - size) & align_mask;
211 		prev = curr;
212 		curr = rb_prev(curr);
213 		curr_iova = to_iova(curr);
214 	} while (curr && new_pfn <= curr_iova->pfn_hi && new_pfn >= low_pfn);
215 
216 	if (high_pfn < size || new_pfn < low_pfn) {
217 		if (low_pfn == iovad->start_pfn && retry_pfn < limit_pfn) {
218 			high_pfn = limit_pfn;
219 			low_pfn = retry_pfn + 1;
220 			curr = iova_find_limit(iovad, limit_pfn);
221 			curr_iova = to_iova(curr);
222 			goto retry;
223 		}
224 		iovad->max32_alloc_size = size;
225 		goto iova32_full;
226 	}
227 
228 	/* pfn_lo will point to size aligned address if size_aligned is set */
229 	new->pfn_lo = new_pfn;
230 	new->pfn_hi = new->pfn_lo + size - 1;
231 
232 	/* If we have 'prev', it's a valid place to start the insertion. */
233 	iova_insert_rbtree(&iovad->rbroot, new, prev);
234 	__cached_rbnode_insert_update(iovad, new);
235 
236 	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
237 	return 0;
238 
239 iova32_full:
240 	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
241 	return -ENOMEM;
242 }
243 
244 static struct kmem_cache *iova_cache;
245 static unsigned int iova_cache_users;
246 static DEFINE_MUTEX(iova_cache_mutex);
247 
alloc_iova_mem(void)248 static struct iova *alloc_iova_mem(void)
249 {
250 	return kmem_cache_zalloc(iova_cache, GFP_ATOMIC | __GFP_NOWARN);
251 }
252 
free_iova_mem(struct iova * iova)253 static void free_iova_mem(struct iova *iova)
254 {
255 	if (iova->pfn_lo != IOVA_ANCHOR)
256 		kmem_cache_free(iova_cache, iova);
257 }
258 
iova_cache_get(void)259 int iova_cache_get(void)
260 {
261 	mutex_lock(&iova_cache_mutex);
262 	if (!iova_cache_users) {
263 		int ret;
264 
265 		ret = cpuhp_setup_state_multi(CPUHP_IOMMU_IOVA_DEAD, "iommu/iova:dead", NULL,
266 					iova_cpuhp_dead);
267 		if (ret) {
268 			mutex_unlock(&iova_cache_mutex);
269 			pr_err("Couldn't register cpuhp handler\n");
270 			return ret;
271 		}
272 
273 		iova_cache = kmem_cache_create(
274 			"iommu_iova", sizeof(struct iova), 0,
275 			SLAB_HWCACHE_ALIGN, NULL);
276 		if (!iova_cache) {
277 			cpuhp_remove_multi_state(CPUHP_IOMMU_IOVA_DEAD);
278 			mutex_unlock(&iova_cache_mutex);
279 			pr_err("Couldn't create iova cache\n");
280 			return -ENOMEM;
281 		}
282 	}
283 
284 	iova_cache_users++;
285 	mutex_unlock(&iova_cache_mutex);
286 
287 	return 0;
288 }
289 EXPORT_SYMBOL_GPL(iova_cache_get);
290 
iova_cache_put(void)291 void iova_cache_put(void)
292 {
293 	mutex_lock(&iova_cache_mutex);
294 	if (WARN_ON(!iova_cache_users)) {
295 		mutex_unlock(&iova_cache_mutex);
296 		return;
297 	}
298 	iova_cache_users--;
299 	if (!iova_cache_users) {
300 		cpuhp_remove_multi_state(CPUHP_IOMMU_IOVA_DEAD);
301 		kmem_cache_destroy(iova_cache);
302 	}
303 	mutex_unlock(&iova_cache_mutex);
304 }
305 EXPORT_SYMBOL_GPL(iova_cache_put);
306 
307 /**
308  * alloc_iova - allocates an iova
309  * @iovad: - iova domain in question
310  * @size: - size of page frames to allocate
311  * @limit_pfn: - max limit address
312  * @size_aligned: - set if size_aligned address range is required
313  * This function allocates an iova in the range iovad->start_pfn to limit_pfn,
314  * searching top-down from limit_pfn to iovad->start_pfn. If the size_aligned
315  * flag is set then the allocated address iova->pfn_lo will be naturally
316  * aligned on roundup_power_of_two(size).
317  */
318 struct iova *
alloc_iova(struct iova_domain * iovad,unsigned long size,unsigned long limit_pfn,bool size_aligned)319 alloc_iova(struct iova_domain *iovad, unsigned long size,
320 	unsigned long limit_pfn,
321 	bool size_aligned)
322 {
323 	struct iova *new_iova;
324 	int ret = -1;
325 
326 	new_iova = alloc_iova_mem();
327 	if (!new_iova)
328 		return NULL;
329 
330 	trace_android_rvh_iommu_alloc_insert_iova(iovad, size, limit_pfn + 1,
331 			new_iova, size_aligned, &ret);
332 	if (ret) {
333 		ret = __alloc_and_insert_iova_range(iovad, size,
334 			limit_pfn + 1, new_iova, size_aligned);
335 	}
336 
337 	if (ret) {
338 		free_iova_mem(new_iova);
339 		return NULL;
340 	}
341 
342 	return new_iova;
343 }
344 EXPORT_SYMBOL_GPL(alloc_iova);
345 
346 static struct iova *
private_find_iova(struct iova_domain * iovad,unsigned long pfn)347 private_find_iova(struct iova_domain *iovad, unsigned long pfn)
348 {
349 	struct rb_node *node = iovad->rbroot.rb_node;
350 
351 	assert_spin_locked(&iovad->iova_rbtree_lock);
352 
353 	while (node) {
354 		struct iova *iova = to_iova(node);
355 
356 		if (pfn < iova->pfn_lo)
357 			node = node->rb_left;
358 		else if (pfn > iova->pfn_hi)
359 			node = node->rb_right;
360 		else
361 			return iova;	/* pfn falls within iova's range */
362 	}
363 
364 	return NULL;
365 }
366 
remove_iova(struct iova_domain * iovad,struct iova * iova)367 static void remove_iova(struct iova_domain *iovad, struct iova *iova)
368 {
369 	assert_spin_locked(&iovad->iova_rbtree_lock);
370 	__cached_rbnode_delete_update(iovad, iova);
371 	rb_erase(&iova->node, &iovad->rbroot);
372 }
373 
374 /**
375  * find_iova - finds an iova for a given pfn
376  * @iovad: - iova domain in question.
377  * @pfn: - page frame number
378  * This function finds and returns an iova belonging to the
379  * given domain which matches the given pfn.
380  */
find_iova(struct iova_domain * iovad,unsigned long pfn)381 struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn)
382 {
383 	unsigned long flags;
384 	struct iova *iova;
385 
386 	/* Take the lock so that no other thread is manipulating the rbtree */
387 	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
388 	iova = private_find_iova(iovad, pfn);
389 	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
390 	return iova;
391 }
392 EXPORT_SYMBOL_GPL(find_iova);
393 
394 /**
395  * __free_iova - frees the given iova
396  * @iovad: iova domain in question.
397  * @iova: iova in question.
398  * Frees the given iova belonging to the giving domain
399  */
400 void
__free_iova(struct iova_domain * iovad,struct iova * iova)401 __free_iova(struct iova_domain *iovad, struct iova *iova)
402 {
403 	unsigned long flags;
404 
405 	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
406 	remove_iova(iovad, iova);
407 	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
408 	free_iova_mem(iova);
409 }
410 EXPORT_SYMBOL_GPL(__free_iova);
411 
412 /**
413  * free_iova - finds and frees the iova for a given pfn
414  * @iovad: - iova domain in question.
415  * @pfn: - pfn that is allocated previously
416  * This functions finds an iova for a given pfn and then
417  * frees the iova from that domain.
418  */
419 void
free_iova(struct iova_domain * iovad,unsigned long pfn)420 free_iova(struct iova_domain *iovad, unsigned long pfn)
421 {
422 	unsigned long flags;
423 	struct iova *iova;
424 
425 	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
426 	iova = private_find_iova(iovad, pfn);
427 	if (!iova) {
428 		spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
429 		return;
430 	}
431 	remove_iova(iovad, iova);
432 	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
433 	free_iova_mem(iova);
434 }
435 EXPORT_SYMBOL_GPL(free_iova);
436 
437 /**
438  * alloc_iova_fast - allocates an iova from rcache
439  * @iovad: - iova domain in question
440  * @size: - size of page frames to allocate
441  * @limit_pfn: - max limit address
442  * @flush_rcache: - set to flush rcache on regular allocation failure
443  * This function tries to satisfy an iova allocation from the rcache,
444  * and falls back to regular allocation on failure. If regular allocation
445  * fails too and the flush_rcache flag is set then the rcache will be flushed.
446 */
447 unsigned long
alloc_iova_fast(struct iova_domain * iovad,unsigned long size,unsigned long limit_pfn,bool flush_rcache)448 alloc_iova_fast(struct iova_domain *iovad, unsigned long size,
449 		unsigned long limit_pfn, bool flush_rcache)
450 {
451 	unsigned long iova_pfn;
452 	struct iova *new_iova;
453 
454 	/*
455 	 * Freeing non-power-of-two-sized allocations back into the IOVA caches
456 	 * will come back to bite us badly, so we have to waste a bit of space
457 	 * rounding up anything cacheable to make sure that can't happen. The
458 	 * order of the unadjusted size will still match upon freeing.
459 	 */
460 	if (size < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1)))
461 		size = roundup_pow_of_two(size);
462 
463 	iova_pfn = iova_rcache_get(iovad, size, limit_pfn + 1);
464 	if (iova_pfn)
465 		return iova_pfn;
466 
467 retry:
468 	new_iova = alloc_iova(iovad, size, limit_pfn, true);
469 	if (!new_iova) {
470 		unsigned int cpu;
471 
472 		if (!flush_rcache)
473 			return 0;
474 
475 		/* Try replenishing IOVAs by flushing rcache. */
476 		flush_rcache = false;
477 		for_each_online_cpu(cpu)
478 			free_cpu_cached_iovas(cpu, iovad);
479 		free_global_cached_iovas(iovad);
480 		goto retry;
481 	}
482 
483 	return new_iova->pfn_lo;
484 }
485 EXPORT_SYMBOL_GPL(alloc_iova_fast);
486 
487 /**
488  * free_iova_fast - free iova pfn range into rcache
489  * @iovad: - iova domain in question.
490  * @pfn: - pfn that is allocated previously
491  * @size: - # of pages in range
492  * This functions frees an iova range by trying to put it into the rcache,
493  * falling back to regular iova deallocation via free_iova() if this fails.
494  */
495 void
free_iova_fast(struct iova_domain * iovad,unsigned long pfn,unsigned long size)496 free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size)
497 {
498 	if (iova_rcache_insert(iovad, pfn, size))
499 		return;
500 
501 	free_iova(iovad, pfn);
502 }
503 EXPORT_SYMBOL_GPL(free_iova_fast);
504 
iova_domain_free_rcaches(struct iova_domain * iovad)505 static void iova_domain_free_rcaches(struct iova_domain *iovad)
506 {
507 	cpuhp_state_remove_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD,
508 					    &iovad->cpuhp_dead);
509 	free_iova_rcaches(iovad);
510 }
511 
512 /**
513  * put_iova_domain - destroys the iova domain
514  * @iovad: - iova domain in question.
515  * All the iova's in that domain are destroyed.
516  */
put_iova_domain(struct iova_domain * iovad)517 void put_iova_domain(struct iova_domain *iovad)
518 {
519 	struct iova *iova, *tmp;
520 
521 	if (iovad->rcaches)
522 		iova_domain_free_rcaches(iovad);
523 
524 	rbtree_postorder_for_each_entry_safe(iova, tmp, &iovad->rbroot, node)
525 		free_iova_mem(iova);
526 }
527 EXPORT_SYMBOL_GPL(put_iova_domain);
528 
529 static int
__is_range_overlap(struct rb_node * node,unsigned long pfn_lo,unsigned long pfn_hi)530 __is_range_overlap(struct rb_node *node,
531 	unsigned long pfn_lo, unsigned long pfn_hi)
532 {
533 	struct iova *iova = to_iova(node);
534 
535 	if ((pfn_lo <= iova->pfn_hi) && (pfn_hi >= iova->pfn_lo))
536 		return 1;
537 	return 0;
538 }
539 
540 static inline struct iova *
alloc_and_init_iova(unsigned long pfn_lo,unsigned long pfn_hi)541 alloc_and_init_iova(unsigned long pfn_lo, unsigned long pfn_hi)
542 {
543 	struct iova *iova;
544 
545 	iova = alloc_iova_mem();
546 	if (iova) {
547 		iova->pfn_lo = pfn_lo;
548 		iova->pfn_hi = pfn_hi;
549 	}
550 
551 	return iova;
552 }
553 
554 static struct iova *
__insert_new_range(struct iova_domain * iovad,unsigned long pfn_lo,unsigned long pfn_hi)555 __insert_new_range(struct iova_domain *iovad,
556 	unsigned long pfn_lo, unsigned long pfn_hi)
557 {
558 	struct iova *iova;
559 
560 	iova = alloc_and_init_iova(pfn_lo, pfn_hi);
561 	if (iova)
562 		iova_insert_rbtree(&iovad->rbroot, iova, NULL);
563 
564 	return iova;
565 }
566 
567 static void
__adjust_overlap_range(struct iova * iova,unsigned long * pfn_lo,unsigned long * pfn_hi)568 __adjust_overlap_range(struct iova *iova,
569 	unsigned long *pfn_lo, unsigned long *pfn_hi)
570 {
571 	if (*pfn_lo < iova->pfn_lo)
572 		iova->pfn_lo = *pfn_lo;
573 	if (*pfn_hi > iova->pfn_hi)
574 		*pfn_lo = iova->pfn_hi + 1;
575 }
576 
577 /**
578  * reserve_iova - reserves an iova in the given range
579  * @iovad: - iova domain pointer
580  * @pfn_lo: - lower page frame address
581  * @pfn_hi:- higher pfn adderss
582  * This function allocates reserves the address range from pfn_lo to pfn_hi so
583  * that this address is not dished out as part of alloc_iova.
584  */
585 struct iova *
reserve_iova(struct iova_domain * iovad,unsigned long pfn_lo,unsigned long pfn_hi)586 reserve_iova(struct iova_domain *iovad,
587 	unsigned long pfn_lo, unsigned long pfn_hi)
588 {
589 	struct rb_node *node;
590 	unsigned long flags;
591 	struct iova *iova;
592 	unsigned int overlap = 0;
593 
594 	/* Don't allow nonsensical pfns */
595 	if (WARN_ON((pfn_hi | pfn_lo) > (ULLONG_MAX >> iova_shift(iovad))))
596 		return NULL;
597 
598 	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
599 	for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) {
600 		if (__is_range_overlap(node, pfn_lo, pfn_hi)) {
601 			iova = to_iova(node);
602 			__adjust_overlap_range(iova, &pfn_lo, &pfn_hi);
603 			if ((pfn_lo >= iova->pfn_lo) &&
604 				(pfn_hi <= iova->pfn_hi))
605 				goto finish;
606 			overlap = 1;
607 
608 		} else if (overlap)
609 				break;
610 	}
611 
612 	/* We are here either because this is the first reserver node
613 	 * or need to insert remaining non overlap addr range
614 	 */
615 	iova = __insert_new_range(iovad, pfn_lo, pfn_hi);
616 finish:
617 
618 	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
619 	return iova;
620 }
621 EXPORT_SYMBOL_GPL(reserve_iova);
622 
623 /*
624  * Magazine caches for IOVA ranges.  For an introduction to magazines,
625  * see the USENIX 2001 paper "Magazines and Vmem: Extending the Slab
626  * Allocator to Many CPUs and Arbitrary Resources" by Bonwick and Adams.
627  * For simplicity, we use a static magazine size and don't implement the
628  * dynamic size tuning described in the paper.
629  */
630 
631 /*
632  * As kmalloc's buffer size is fixed to power of 2, 127 is chosen to
633  * assure size of 'iova_magazine' to be 1024 bytes, so that no memory
634  * will be wasted.
635  */
636 #define IOVA_MAG_SIZE 127
637 #define MAX_GLOBAL_MAGS 32	/* magazines per bin */
638 
639 struct iova_magazine {
640 	unsigned long size;
641 	unsigned long pfns[IOVA_MAG_SIZE];
642 };
643 
644 struct iova_cpu_rcache {
645 	spinlock_t lock;
646 	struct iova_magazine *loaded;
647 	struct iova_magazine *prev;
648 };
649 
650 struct iova_rcache {
651 	spinlock_t lock;
652 	unsigned long depot_size;
653 	struct iova_magazine *depot[MAX_GLOBAL_MAGS];
654 	struct iova_cpu_rcache __percpu *cpu_rcaches;
655 };
656 
iova_magazine_alloc(gfp_t flags)657 static struct iova_magazine *iova_magazine_alloc(gfp_t flags)
658 {
659 	return kzalloc(sizeof(struct iova_magazine), flags);
660 }
661 
iova_magazine_free(struct iova_magazine * mag)662 static void iova_magazine_free(struct iova_magazine *mag)
663 {
664 	kfree(mag);
665 }
666 
667 static void
iova_magazine_free_pfns(struct iova_magazine * mag,struct iova_domain * iovad)668 iova_magazine_free_pfns(struct iova_magazine *mag, struct iova_domain *iovad)
669 {
670 	unsigned long flags;
671 	int i;
672 
673 	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
674 
675 	for (i = 0 ; i < mag->size; ++i) {
676 		struct iova *iova = private_find_iova(iovad, mag->pfns[i]);
677 
678 		if (WARN_ON(!iova))
679 			continue;
680 
681 		remove_iova(iovad, iova);
682 		free_iova_mem(iova);
683 	}
684 
685 	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
686 
687 	mag->size = 0;
688 }
689 
iova_magazine_full(struct iova_magazine * mag)690 static bool iova_magazine_full(struct iova_magazine *mag)
691 {
692 	return mag->size == IOVA_MAG_SIZE;
693 }
694 
iova_magazine_empty(struct iova_magazine * mag)695 static bool iova_magazine_empty(struct iova_magazine *mag)
696 {
697 	return mag->size == 0;
698 }
699 
iova_magazine_pop(struct iova_magazine * mag,unsigned long limit_pfn)700 static unsigned long iova_magazine_pop(struct iova_magazine *mag,
701 				       unsigned long limit_pfn)
702 {
703 	int i;
704 	unsigned long pfn;
705 
706 	/* Only fall back to the rbtree if we have no suitable pfns at all */
707 	for (i = mag->size - 1; mag->pfns[i] > limit_pfn; i--)
708 		if (i == 0)
709 			return 0;
710 
711 	/* Swap it to pop it */
712 	pfn = mag->pfns[i];
713 	mag->pfns[i] = mag->pfns[--mag->size];
714 
715 	return pfn;
716 }
717 
iova_magazine_push(struct iova_magazine * mag,unsigned long pfn)718 static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn)
719 {
720 	mag->pfns[mag->size++] = pfn;
721 }
722 
iova_domain_init_rcaches(struct iova_domain * iovad)723 int iova_domain_init_rcaches(struct iova_domain *iovad)
724 {
725 	unsigned int cpu;
726 	int i, ret;
727 
728 	iovad->rcaches = kcalloc(IOVA_RANGE_CACHE_MAX_SIZE,
729 				 sizeof(struct iova_rcache),
730 				 GFP_KERNEL);
731 	if (!iovad->rcaches)
732 		return -ENOMEM;
733 
734 	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
735 		struct iova_cpu_rcache *cpu_rcache;
736 		struct iova_rcache *rcache;
737 
738 		rcache = &iovad->rcaches[i];
739 		spin_lock_init(&rcache->lock);
740 		rcache->depot_size = 0;
741 		rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache),
742 						     cache_line_size());
743 		if (!rcache->cpu_rcaches) {
744 			ret = -ENOMEM;
745 			goto out_err;
746 		}
747 		for_each_possible_cpu(cpu) {
748 			cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
749 
750 			spin_lock_init(&cpu_rcache->lock);
751 			cpu_rcache->loaded = iova_magazine_alloc(GFP_KERNEL);
752 			cpu_rcache->prev = iova_magazine_alloc(GFP_KERNEL);
753 			if (!cpu_rcache->loaded || !cpu_rcache->prev) {
754 				ret = -ENOMEM;
755 				goto out_err;
756 			}
757 		}
758 	}
759 
760 	ret = cpuhp_state_add_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD,
761 					       &iovad->cpuhp_dead);
762 	if (ret)
763 		goto out_err;
764 	return 0;
765 
766 out_err:
767 	free_iova_rcaches(iovad);
768 	return ret;
769 }
770 EXPORT_SYMBOL_GPL(iova_domain_init_rcaches);
771 
772 /*
773  * Try inserting IOVA range starting with 'iova_pfn' into 'rcache', and
774  * return true on success.  Can fail if rcache is full and we can't free
775  * space, and free_iova() (our only caller) will then return the IOVA
776  * range to the rbtree instead.
777  */
__iova_rcache_insert(struct iova_domain * iovad,struct iova_rcache * rcache,unsigned long iova_pfn)778 static bool __iova_rcache_insert(struct iova_domain *iovad,
779 				 struct iova_rcache *rcache,
780 				 unsigned long iova_pfn)
781 {
782 	struct iova_magazine *mag_to_free = NULL;
783 	struct iova_cpu_rcache *cpu_rcache;
784 	bool can_insert = false;
785 	unsigned long flags;
786 
787 	cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
788 	spin_lock_irqsave(&cpu_rcache->lock, flags);
789 
790 	if (!iova_magazine_full(cpu_rcache->loaded)) {
791 		can_insert = true;
792 	} else if (!iova_magazine_full(cpu_rcache->prev)) {
793 		swap(cpu_rcache->prev, cpu_rcache->loaded);
794 		can_insert = true;
795 	} else {
796 		struct iova_magazine *new_mag = iova_magazine_alloc(GFP_ATOMIC);
797 
798 		if (new_mag) {
799 			spin_lock(&rcache->lock);
800 			if (rcache->depot_size < MAX_GLOBAL_MAGS) {
801 				rcache->depot[rcache->depot_size++] =
802 						cpu_rcache->loaded;
803 			} else {
804 				mag_to_free = cpu_rcache->loaded;
805 			}
806 			spin_unlock(&rcache->lock);
807 
808 			cpu_rcache->loaded = new_mag;
809 			can_insert = true;
810 		}
811 	}
812 
813 	if (can_insert)
814 		iova_magazine_push(cpu_rcache->loaded, iova_pfn);
815 
816 	spin_unlock_irqrestore(&cpu_rcache->lock, flags);
817 
818 	if (mag_to_free) {
819 		iova_magazine_free_pfns(mag_to_free, iovad);
820 		iova_magazine_free(mag_to_free);
821 	}
822 
823 	return can_insert;
824 }
825 
iova_rcache_insert(struct iova_domain * iovad,unsigned long pfn,unsigned long size)826 static bool iova_rcache_insert(struct iova_domain *iovad, unsigned long pfn,
827 			       unsigned long size)
828 {
829 	unsigned int log_size = order_base_2(size);
830 
831 	if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
832 		return false;
833 
834 	return __iova_rcache_insert(iovad, &iovad->rcaches[log_size], pfn);
835 }
836 
837 /*
838  * Caller wants to allocate a new IOVA range from 'rcache'.  If we can
839  * satisfy the request, return a matching non-NULL range and remove
840  * it from the 'rcache'.
841  */
__iova_rcache_get(struct iova_rcache * rcache,unsigned long limit_pfn)842 static unsigned long __iova_rcache_get(struct iova_rcache *rcache,
843 				       unsigned long limit_pfn)
844 {
845 	struct iova_cpu_rcache *cpu_rcache;
846 	unsigned long iova_pfn = 0;
847 	bool has_pfn = false;
848 	unsigned long flags;
849 
850 	cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
851 	spin_lock_irqsave(&cpu_rcache->lock, flags);
852 
853 	if (!iova_magazine_empty(cpu_rcache->loaded)) {
854 		has_pfn = true;
855 	} else if (!iova_magazine_empty(cpu_rcache->prev)) {
856 		swap(cpu_rcache->prev, cpu_rcache->loaded);
857 		has_pfn = true;
858 	} else {
859 		spin_lock(&rcache->lock);
860 		if (rcache->depot_size > 0) {
861 			iova_magazine_free(cpu_rcache->loaded);
862 			cpu_rcache->loaded = rcache->depot[--rcache->depot_size];
863 			has_pfn = true;
864 		}
865 		spin_unlock(&rcache->lock);
866 	}
867 
868 	if (has_pfn)
869 		iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn);
870 
871 	spin_unlock_irqrestore(&cpu_rcache->lock, flags);
872 
873 	return iova_pfn;
874 }
875 
876 /*
877  * Try to satisfy IOVA allocation range from rcache.  Fail if requested
878  * size is too big or the DMA limit we are given isn't satisfied by the
879  * top element in the magazine.
880  */
iova_rcache_get(struct iova_domain * iovad,unsigned long size,unsigned long limit_pfn)881 static unsigned long iova_rcache_get(struct iova_domain *iovad,
882 				     unsigned long size,
883 				     unsigned long limit_pfn)
884 {
885 	unsigned int log_size = order_base_2(size);
886 
887 	if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
888 		return 0;
889 
890 	return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn - size);
891 }
892 
893 /*
894  * free rcache data structures.
895  */
free_iova_rcaches(struct iova_domain * iovad)896 static void free_iova_rcaches(struct iova_domain *iovad)
897 {
898 	struct iova_rcache *rcache;
899 	struct iova_cpu_rcache *cpu_rcache;
900 	unsigned int cpu;
901 	int i, j;
902 
903 	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
904 		rcache = &iovad->rcaches[i];
905 		if (!rcache->cpu_rcaches)
906 			break;
907 		for_each_possible_cpu(cpu) {
908 			cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
909 			iova_magazine_free(cpu_rcache->loaded);
910 			iova_magazine_free(cpu_rcache->prev);
911 		}
912 		free_percpu(rcache->cpu_rcaches);
913 		for (j = 0; j < rcache->depot_size; ++j)
914 			iova_magazine_free(rcache->depot[j]);
915 	}
916 
917 	kfree(iovad->rcaches);
918 	iovad->rcaches = NULL;
919 }
920 
921 /*
922  * free all the IOVA ranges cached by a cpu (used when cpu is unplugged)
923  */
free_cpu_cached_iovas(unsigned int cpu,struct iova_domain * iovad)924 static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad)
925 {
926 	struct iova_cpu_rcache *cpu_rcache;
927 	struct iova_rcache *rcache;
928 	unsigned long flags;
929 	int i;
930 
931 	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
932 		rcache = &iovad->rcaches[i];
933 		cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
934 		spin_lock_irqsave(&cpu_rcache->lock, flags);
935 		iova_magazine_free_pfns(cpu_rcache->loaded, iovad);
936 		iova_magazine_free_pfns(cpu_rcache->prev, iovad);
937 		spin_unlock_irqrestore(&cpu_rcache->lock, flags);
938 	}
939 }
940 
941 /*
942  * free all the IOVA ranges of global cache
943  */
free_global_cached_iovas(struct iova_domain * iovad)944 static void free_global_cached_iovas(struct iova_domain *iovad)
945 {
946 	struct iova_rcache *rcache;
947 	unsigned long flags;
948 	int i, j;
949 
950 	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
951 		rcache = &iovad->rcaches[i];
952 		spin_lock_irqsave(&rcache->lock, flags);
953 		for (j = 0; j < rcache->depot_size; ++j) {
954 			iova_magazine_free_pfns(rcache->depot[j], iovad);
955 			iova_magazine_free(rcache->depot[j]);
956 		}
957 		rcache->depot_size = 0;
958 		spin_unlock_irqrestore(&rcache->lock, flags);
959 	}
960 }
961 MODULE_AUTHOR("Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>");
962 MODULE_LICENSE("GPL");
963