1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright © 2006-2009, Intel Corporation.
4 *
5 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
6 */
7
8 #include <linux/iova.h>
9 #include <linux/module.h>
10 #include <linux/slab.h>
11 #include <linux/smp.h>
12 #include <linux/bitops.h>
13 #include <linux/cpu.h>
14 #include <linux/workqueue.h>
15 #include <trace/hooks/iommu.h>
16
17 /* The anchor node sits above the top of the usable address space */
18 #define IOVA_ANCHOR ~0UL
19
20 #define IOVA_RANGE_CACHE_MAX_SIZE 6 /* log of max cached IOVA range size (in pages) */
21
22 static bool iova_rcache_insert(struct iova_domain *iovad,
23 unsigned long pfn,
24 unsigned long size);
25 static unsigned long iova_rcache_get(struct iova_domain *iovad,
26 unsigned long size,
27 unsigned long limit_pfn);
28 static void free_iova_rcaches(struct iova_domain *iovad);
29 static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad);
30 static void free_global_cached_iovas(struct iova_domain *iovad);
31
to_iova(struct rb_node * node)32 static struct iova *to_iova(struct rb_node *node)
33 {
34 return rb_entry(node, struct iova, node);
35 }
36
37 void
init_iova_domain(struct iova_domain * iovad,unsigned long granule,unsigned long start_pfn)38 init_iova_domain(struct iova_domain *iovad, unsigned long granule,
39 unsigned long start_pfn)
40 {
41 /*
42 * IOVA granularity will normally be equal to the smallest
43 * supported IOMMU page size; both *must* be capable of
44 * representing individual CPU pages exactly.
45 */
46 BUG_ON((granule > PAGE_SIZE) || !is_power_of_2(granule));
47
48 spin_lock_init(&iovad->iova_rbtree_lock);
49 iovad->rbroot = RB_ROOT;
50 iovad->cached_node = &iovad->anchor.node;
51 iovad->cached32_node = &iovad->anchor.node;
52 iovad->granule = granule;
53 iovad->start_pfn = start_pfn;
54 iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad));
55 iovad->max32_alloc_size = iovad->dma_32bit_pfn;
56 iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR;
57 rb_link_node(&iovad->anchor.node, NULL, &iovad->rbroot.rb_node);
58 rb_insert_color(&iovad->anchor.node, &iovad->rbroot);
59 android_init_vendor_data(iovad, 1);
60 }
61 EXPORT_SYMBOL_GPL(init_iova_domain);
62
63 static struct rb_node *
__get_cached_rbnode(struct iova_domain * iovad,unsigned long limit_pfn)64 __get_cached_rbnode(struct iova_domain *iovad, unsigned long limit_pfn)
65 {
66 if (limit_pfn <= iovad->dma_32bit_pfn)
67 return iovad->cached32_node;
68
69 return iovad->cached_node;
70 }
71
72 static void
__cached_rbnode_insert_update(struct iova_domain * iovad,struct iova * new)73 __cached_rbnode_insert_update(struct iova_domain *iovad, struct iova *new)
74 {
75 if (new->pfn_hi < iovad->dma_32bit_pfn)
76 iovad->cached32_node = &new->node;
77 else
78 iovad->cached_node = &new->node;
79 }
80
81 static void
__cached_rbnode_delete_update(struct iova_domain * iovad,struct iova * free)82 __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
83 {
84 struct iova *cached_iova;
85
86 cached_iova = to_iova(iovad->cached32_node);
87 if (free == cached_iova ||
88 (free->pfn_hi < iovad->dma_32bit_pfn &&
89 free->pfn_lo >= cached_iova->pfn_lo))
90 iovad->cached32_node = rb_next(&free->node);
91
92 if (free->pfn_lo < iovad->dma_32bit_pfn)
93 iovad->max32_alloc_size = iovad->dma_32bit_pfn;
94
95 cached_iova = to_iova(iovad->cached_node);
96 if (free->pfn_lo >= cached_iova->pfn_lo)
97 iovad->cached_node = rb_next(&free->node);
98 }
99
iova_find_limit(struct iova_domain * iovad,unsigned long limit_pfn)100 static struct rb_node *iova_find_limit(struct iova_domain *iovad, unsigned long limit_pfn)
101 {
102 struct rb_node *node, *next;
103 /*
104 * Ideally what we'd like to judge here is whether limit_pfn is close
105 * enough to the highest-allocated IOVA that starting the allocation
106 * walk from the anchor node will be quicker than this initial work to
107 * find an exact starting point (especially if that ends up being the
108 * anchor node anyway). This is an incredibly crude approximation which
109 * only really helps the most likely case, but is at least trivially easy.
110 */
111 if (limit_pfn > iovad->dma_32bit_pfn)
112 return &iovad->anchor.node;
113
114 node = iovad->rbroot.rb_node;
115 while (to_iova(node)->pfn_hi < limit_pfn)
116 node = node->rb_right;
117
118 search_left:
119 while (node->rb_left && to_iova(node->rb_left)->pfn_lo >= limit_pfn)
120 node = node->rb_left;
121
122 if (!node->rb_left)
123 return node;
124
125 next = node->rb_left;
126 while (next->rb_right) {
127 next = next->rb_right;
128 if (to_iova(next)->pfn_lo >= limit_pfn) {
129 node = next;
130 goto search_left;
131 }
132 }
133
134 return node;
135 }
136
137 /* Insert the iova into domain rbtree by holding writer lock */
138 static void
iova_insert_rbtree(struct rb_root * root,struct iova * iova,struct rb_node * start)139 iova_insert_rbtree(struct rb_root *root, struct iova *iova,
140 struct rb_node *start)
141 {
142 struct rb_node **new, *parent = NULL;
143
144 new = (start) ? &start : &(root->rb_node);
145 /* Figure out where to put new node */
146 while (*new) {
147 struct iova *this = to_iova(*new);
148
149 parent = *new;
150
151 if (iova->pfn_lo < this->pfn_lo)
152 new = &((*new)->rb_left);
153 else if (iova->pfn_lo > this->pfn_lo)
154 new = &((*new)->rb_right);
155 else {
156 WARN_ON(1); /* this should not happen */
157 return;
158 }
159 }
160 /* Add new node and rebalance tree. */
161 rb_link_node(&iova->node, parent, new);
162 rb_insert_color(&iova->node, root);
163 }
164
__alloc_and_insert_iova_range(struct iova_domain * iovad,unsigned long size,unsigned long limit_pfn,struct iova * new,bool size_aligned)165 static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
166 unsigned long size, unsigned long limit_pfn,
167 struct iova *new, bool size_aligned)
168 {
169 struct rb_node *curr, *prev;
170 struct iova *curr_iova;
171 unsigned long flags;
172 unsigned long new_pfn, retry_pfn;
173 unsigned long align_mask = ~0UL;
174 unsigned long high_pfn = limit_pfn, low_pfn = iovad->start_pfn;
175
176 if (size_aligned) {
177 unsigned long shift = fls_long(size - 1);
178
179 trace_android_rvh_iommu_limit_align_shift(iovad, size, &shift);
180 align_mask <<= shift;
181 }
182
183 /* Walk the tree backwards */
184 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
185 if (limit_pfn <= iovad->dma_32bit_pfn &&
186 size >= iovad->max32_alloc_size)
187 goto iova32_full;
188
189 curr = __get_cached_rbnode(iovad, limit_pfn);
190 curr_iova = to_iova(curr);
191 retry_pfn = curr_iova->pfn_hi;
192
193 retry:
194 do {
195 high_pfn = min(high_pfn, curr_iova->pfn_lo);
196 new_pfn = (high_pfn - size) & align_mask;
197 prev = curr;
198 curr = rb_prev(curr);
199 curr_iova = to_iova(curr);
200 } while (curr && new_pfn <= curr_iova->pfn_hi && new_pfn >= low_pfn);
201
202 if (high_pfn < size || new_pfn < low_pfn) {
203 if (low_pfn == iovad->start_pfn && retry_pfn < limit_pfn) {
204 high_pfn = limit_pfn;
205 low_pfn = retry_pfn + 1;
206 curr = iova_find_limit(iovad, limit_pfn);
207 curr_iova = to_iova(curr);
208 goto retry;
209 }
210 iovad->max32_alloc_size = size;
211 goto iova32_full;
212 }
213
214 /* pfn_lo will point to size aligned address if size_aligned is set */
215 new->pfn_lo = new_pfn;
216 new->pfn_hi = new->pfn_lo + size - 1;
217
218 /* If we have 'prev', it's a valid place to start the insertion. */
219 iova_insert_rbtree(&iovad->rbroot, new, prev);
220 __cached_rbnode_insert_update(iovad, new);
221
222 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
223 return 0;
224
225 iova32_full:
226 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
227 return -ENOMEM;
228 }
229
230 static struct kmem_cache *iova_cache;
231 static unsigned int iova_cache_users;
232 static DEFINE_MUTEX(iova_cache_mutex);
233
alloc_iova_mem(void)234 static struct iova *alloc_iova_mem(void)
235 {
236 return kmem_cache_zalloc(iova_cache, GFP_ATOMIC | __GFP_NOWARN);
237 }
238
free_iova_mem(struct iova * iova)239 static void free_iova_mem(struct iova *iova)
240 {
241 if (iova->pfn_lo != IOVA_ANCHOR)
242 kmem_cache_free(iova_cache, iova);
243 }
244
245 /**
246 * alloc_iova - allocates an iova
247 * @iovad: - iova domain in question
248 * @size: - size of page frames to allocate
249 * @limit_pfn: - max limit address
250 * @size_aligned: - set if size_aligned address range is required
251 * This function allocates an iova in the range iovad->start_pfn to limit_pfn,
252 * searching top-down from limit_pfn to iovad->start_pfn. If the size_aligned
253 * flag is set then the allocated address iova->pfn_lo will be naturally
254 * aligned on roundup_power_of_two(size).
255 */
256 struct iova *
alloc_iova(struct iova_domain * iovad,unsigned long size,unsigned long limit_pfn,bool size_aligned)257 alloc_iova(struct iova_domain *iovad, unsigned long size,
258 unsigned long limit_pfn,
259 bool size_aligned)
260 {
261 struct iova *new_iova;
262 int ret = -1;
263
264 new_iova = alloc_iova_mem();
265 if (!new_iova)
266 return NULL;
267
268 trace_android_rvh_iommu_alloc_insert_iova(iovad, size, limit_pfn + 1,
269 new_iova, size_aligned, &ret);
270 if (ret) {
271 ret = __alloc_and_insert_iova_range(iovad, size,
272 limit_pfn + 1, new_iova, size_aligned);
273 }
274
275 if (ret) {
276 free_iova_mem(new_iova);
277 return NULL;
278 }
279
280 return new_iova;
281 }
282 EXPORT_SYMBOL_GPL(alloc_iova);
283
284 static struct iova *
private_find_iova(struct iova_domain * iovad,unsigned long pfn)285 private_find_iova(struct iova_domain *iovad, unsigned long pfn)
286 {
287 struct rb_node *node = iovad->rbroot.rb_node;
288
289 assert_spin_locked(&iovad->iova_rbtree_lock);
290
291 while (node) {
292 struct iova *iova = to_iova(node);
293
294 if (pfn < iova->pfn_lo)
295 node = node->rb_left;
296 else if (pfn > iova->pfn_hi)
297 node = node->rb_right;
298 else
299 return iova; /* pfn falls within iova's range */
300 }
301
302 return NULL;
303 }
304
remove_iova(struct iova_domain * iovad,struct iova * iova)305 static void remove_iova(struct iova_domain *iovad, struct iova *iova)
306 {
307 assert_spin_locked(&iovad->iova_rbtree_lock);
308 __cached_rbnode_delete_update(iovad, iova);
309 rb_erase(&iova->node, &iovad->rbroot);
310 }
311
312 /**
313 * find_iova - finds an iova for a given pfn
314 * @iovad: - iova domain in question.
315 * @pfn: - page frame number
316 * This function finds and returns an iova belonging to the
317 * given domain which matches the given pfn.
318 */
find_iova(struct iova_domain * iovad,unsigned long pfn)319 struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn)
320 {
321 unsigned long flags;
322 struct iova *iova;
323
324 /* Take the lock so that no other thread is manipulating the rbtree */
325 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
326 iova = private_find_iova(iovad, pfn);
327 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
328 return iova;
329 }
330 EXPORT_SYMBOL_GPL(find_iova);
331
332 /**
333 * __free_iova - frees the given iova
334 * @iovad: iova domain in question.
335 * @iova: iova in question.
336 * Frees the given iova belonging to the giving domain
337 */
338 void
__free_iova(struct iova_domain * iovad,struct iova * iova)339 __free_iova(struct iova_domain *iovad, struct iova *iova)
340 {
341 unsigned long flags;
342
343 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
344 remove_iova(iovad, iova);
345 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
346 free_iova_mem(iova);
347 }
348 EXPORT_SYMBOL_GPL(__free_iova);
349
350 /**
351 * free_iova - finds and frees the iova for a given pfn
352 * @iovad: - iova domain in question.
353 * @pfn: - pfn that is allocated previously
354 * This functions finds an iova for a given pfn and then
355 * frees the iova from that domain.
356 */
357 void
free_iova(struct iova_domain * iovad,unsigned long pfn)358 free_iova(struct iova_domain *iovad, unsigned long pfn)
359 {
360 unsigned long flags;
361 struct iova *iova;
362
363 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
364 iova = private_find_iova(iovad, pfn);
365 if (!iova) {
366 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
367 return;
368 }
369 remove_iova(iovad, iova);
370 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
371 free_iova_mem(iova);
372 }
373 EXPORT_SYMBOL_GPL(free_iova);
374
375 /**
376 * alloc_iova_fast - allocates an iova from rcache
377 * @iovad: - iova domain in question
378 * @size: - size of page frames to allocate
379 * @limit_pfn: - max limit address
380 * @flush_rcache: - set to flush rcache on regular allocation failure
381 * This function tries to satisfy an iova allocation from the rcache,
382 * and falls back to regular allocation on failure. If regular allocation
383 * fails too and the flush_rcache flag is set then the rcache will be flushed.
384 */
385 unsigned long
alloc_iova_fast(struct iova_domain * iovad,unsigned long size,unsigned long limit_pfn,bool flush_rcache)386 alloc_iova_fast(struct iova_domain *iovad, unsigned long size,
387 unsigned long limit_pfn, bool flush_rcache)
388 {
389 unsigned long iova_pfn;
390 struct iova *new_iova;
391
392 /*
393 * Freeing non-power-of-two-sized allocations back into the IOVA caches
394 * will come back to bite us badly, so we have to waste a bit of space
395 * rounding up anything cacheable to make sure that can't happen. The
396 * order of the unadjusted size will still match upon freeing.
397 */
398 if (size < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1)))
399 size = roundup_pow_of_two(size);
400
401 iova_pfn = iova_rcache_get(iovad, size, limit_pfn + 1);
402 if (iova_pfn)
403 return iova_pfn;
404
405 retry:
406 new_iova = alloc_iova(iovad, size, limit_pfn, true);
407 if (!new_iova) {
408 unsigned int cpu;
409
410 if (!flush_rcache)
411 return 0;
412
413 /* Try replenishing IOVAs by flushing rcache. */
414 flush_rcache = false;
415 for_each_online_cpu(cpu)
416 free_cpu_cached_iovas(cpu, iovad);
417 free_global_cached_iovas(iovad);
418 goto retry;
419 }
420
421 return new_iova->pfn_lo;
422 }
423 EXPORT_SYMBOL_GPL(alloc_iova_fast);
424
425 /**
426 * free_iova_fast - free iova pfn range into rcache
427 * @iovad: - iova domain in question.
428 * @pfn: - pfn that is allocated previously
429 * @size: - # of pages in range
430 * This functions frees an iova range by trying to put it into the rcache,
431 * falling back to regular iova deallocation via free_iova() if this fails.
432 */
433 void
free_iova_fast(struct iova_domain * iovad,unsigned long pfn,unsigned long size)434 free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size)
435 {
436 if (iova_rcache_insert(iovad, pfn, size))
437 return;
438
439 free_iova(iovad, pfn);
440 }
441 EXPORT_SYMBOL_GPL(free_iova_fast);
442
iova_domain_free_rcaches(struct iova_domain * iovad)443 static void iova_domain_free_rcaches(struct iova_domain *iovad)
444 {
445 cpuhp_state_remove_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD,
446 &iovad->cpuhp_dead);
447 free_iova_rcaches(iovad);
448 }
449
450 /**
451 * put_iova_domain - destroys the iova domain
452 * @iovad: - iova domain in question.
453 * All the iova's in that domain are destroyed.
454 */
put_iova_domain(struct iova_domain * iovad)455 void put_iova_domain(struct iova_domain *iovad)
456 {
457 struct iova *iova, *tmp;
458
459 if (iovad->rcaches)
460 iova_domain_free_rcaches(iovad);
461
462 rbtree_postorder_for_each_entry_safe(iova, tmp, &iovad->rbroot, node)
463 free_iova_mem(iova);
464 }
465 EXPORT_SYMBOL_GPL(put_iova_domain);
466
467 static int
__is_range_overlap(struct rb_node * node,unsigned long pfn_lo,unsigned long pfn_hi)468 __is_range_overlap(struct rb_node *node,
469 unsigned long pfn_lo, unsigned long pfn_hi)
470 {
471 struct iova *iova = to_iova(node);
472
473 if ((pfn_lo <= iova->pfn_hi) && (pfn_hi >= iova->pfn_lo))
474 return 1;
475 return 0;
476 }
477
478 static inline struct iova *
alloc_and_init_iova(unsigned long pfn_lo,unsigned long pfn_hi)479 alloc_and_init_iova(unsigned long pfn_lo, unsigned long pfn_hi)
480 {
481 struct iova *iova;
482
483 iova = alloc_iova_mem();
484 if (iova) {
485 iova->pfn_lo = pfn_lo;
486 iova->pfn_hi = pfn_hi;
487 }
488
489 return iova;
490 }
491
492 static struct iova *
__insert_new_range(struct iova_domain * iovad,unsigned long pfn_lo,unsigned long pfn_hi)493 __insert_new_range(struct iova_domain *iovad,
494 unsigned long pfn_lo, unsigned long pfn_hi)
495 {
496 struct iova *iova;
497
498 iova = alloc_and_init_iova(pfn_lo, pfn_hi);
499 if (iova)
500 iova_insert_rbtree(&iovad->rbroot, iova, NULL);
501
502 return iova;
503 }
504
505 static void
__adjust_overlap_range(struct iova * iova,unsigned long * pfn_lo,unsigned long * pfn_hi)506 __adjust_overlap_range(struct iova *iova,
507 unsigned long *pfn_lo, unsigned long *pfn_hi)
508 {
509 if (*pfn_lo < iova->pfn_lo)
510 iova->pfn_lo = *pfn_lo;
511 if (*pfn_hi > iova->pfn_hi)
512 *pfn_lo = iova->pfn_hi + 1;
513 }
514
515 /**
516 * reserve_iova - reserves an iova in the given range
517 * @iovad: - iova domain pointer
518 * @pfn_lo: - lower page frame address
519 * @pfn_hi:- higher pfn adderss
520 * This function allocates reserves the address range from pfn_lo to pfn_hi so
521 * that this address is not dished out as part of alloc_iova.
522 */
523 struct iova *
reserve_iova(struct iova_domain * iovad,unsigned long pfn_lo,unsigned long pfn_hi)524 reserve_iova(struct iova_domain *iovad,
525 unsigned long pfn_lo, unsigned long pfn_hi)
526 {
527 struct rb_node *node;
528 unsigned long flags;
529 struct iova *iova;
530 unsigned int overlap = 0;
531
532 /* Don't allow nonsensical pfns */
533 if (WARN_ON((pfn_hi | pfn_lo) > (ULLONG_MAX >> iova_shift(iovad))))
534 return NULL;
535
536 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
537 for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) {
538 if (__is_range_overlap(node, pfn_lo, pfn_hi)) {
539 iova = to_iova(node);
540 __adjust_overlap_range(iova, &pfn_lo, &pfn_hi);
541 if ((pfn_lo >= iova->pfn_lo) &&
542 (pfn_hi <= iova->pfn_hi))
543 goto finish;
544 overlap = 1;
545
546 } else if (overlap)
547 break;
548 }
549
550 /* We are here either because this is the first reserver node
551 * or need to insert remaining non overlap addr range
552 */
553 iova = __insert_new_range(iovad, pfn_lo, pfn_hi);
554 finish:
555
556 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
557 return iova;
558 }
559 EXPORT_SYMBOL_GPL(reserve_iova);
560
561 /*
562 * Magazine caches for IOVA ranges. For an introduction to magazines,
563 * see the USENIX 2001 paper "Magazines and Vmem: Extending the Slab
564 * Allocator to Many CPUs and Arbitrary Resources" by Bonwick and Adams.
565 * For simplicity, we use a static magazine size and don't implement the
566 * dynamic size tuning described in the paper.
567 */
568
569 /*
570 * As kmalloc's buffer size is fixed to power of 2, 127 is chosen to
571 * assure size of 'iova_magazine' to be 1024 bytes, so that no memory
572 * will be wasted. Since only full magazines are inserted into the depot,
573 * we don't need to waste PFN capacity on a separate list head either.
574 */
575 #define IOVA_MAG_SIZE 127
576
577 #define IOVA_DEPOT_DELAY msecs_to_jiffies(100)
578
579 struct iova_magazine {
580 union {
581 unsigned long size;
582 struct iova_magazine *next;
583 };
584 unsigned long pfns[IOVA_MAG_SIZE];
585 };
586 static_assert(!(sizeof(struct iova_magazine) & (sizeof(struct iova_magazine) - 1)));
587
588 struct iova_cpu_rcache {
589 spinlock_t lock;
590 struct iova_magazine *loaded;
591 struct iova_magazine *prev;
592 };
593
594 struct iova_rcache {
595 spinlock_t lock;
596 unsigned int depot_size;
597 struct iova_magazine *depot;
598 struct iova_cpu_rcache __percpu *cpu_rcaches;
599 struct iova_domain *iovad;
600 struct delayed_work work;
601 };
602
603 static struct kmem_cache *iova_magazine_cache;
604
iova_rcache_range(void)605 unsigned long iova_rcache_range(void)
606 {
607 return PAGE_SIZE << (IOVA_RANGE_CACHE_MAX_SIZE - 1);
608 }
609
iova_magazine_alloc(gfp_t flags)610 static struct iova_magazine *iova_magazine_alloc(gfp_t flags)
611 {
612 struct iova_magazine *mag;
613
614 mag = kmem_cache_alloc(iova_magazine_cache, flags);
615 if (mag)
616 mag->size = 0;
617
618 return mag;
619 }
620
iova_magazine_free(struct iova_magazine * mag)621 static void iova_magazine_free(struct iova_magazine *mag)
622 {
623 kmem_cache_free(iova_magazine_cache, mag);
624 }
625
626 static void
iova_magazine_free_pfns(struct iova_magazine * mag,struct iova_domain * iovad)627 iova_magazine_free_pfns(struct iova_magazine *mag, struct iova_domain *iovad)
628 {
629 unsigned long flags;
630 int i;
631
632 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
633
634 for (i = 0 ; i < mag->size; ++i) {
635 struct iova *iova = private_find_iova(iovad, mag->pfns[i]);
636
637 if (WARN_ON(!iova))
638 continue;
639
640 remove_iova(iovad, iova);
641 free_iova_mem(iova);
642 }
643
644 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
645
646 mag->size = 0;
647 }
648
iova_magazine_full(struct iova_magazine * mag)649 static bool iova_magazine_full(struct iova_magazine *mag)
650 {
651 return mag->size == IOVA_MAG_SIZE;
652 }
653
iova_magazine_empty(struct iova_magazine * mag)654 static bool iova_magazine_empty(struct iova_magazine *mag)
655 {
656 return mag->size == 0;
657 }
658
iova_magazine_pop(struct iova_magazine * mag,unsigned long limit_pfn)659 static unsigned long iova_magazine_pop(struct iova_magazine *mag,
660 unsigned long limit_pfn)
661 {
662 int i;
663 unsigned long pfn;
664
665 /* Only fall back to the rbtree if we have no suitable pfns at all */
666 for (i = mag->size - 1; mag->pfns[i] > limit_pfn; i--)
667 if (i == 0)
668 return 0;
669
670 /* Swap it to pop it */
671 pfn = mag->pfns[i];
672 mag->pfns[i] = mag->pfns[--mag->size];
673
674 return pfn;
675 }
676
iova_magazine_push(struct iova_magazine * mag,unsigned long pfn)677 static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn)
678 {
679 mag->pfns[mag->size++] = pfn;
680 }
681
iova_depot_pop(struct iova_rcache * rcache)682 static struct iova_magazine *iova_depot_pop(struct iova_rcache *rcache)
683 {
684 struct iova_magazine *mag = rcache->depot;
685
686 rcache->depot = mag->next;
687 mag->size = IOVA_MAG_SIZE;
688 rcache->depot_size--;
689 return mag;
690 }
691
iova_depot_push(struct iova_rcache * rcache,struct iova_magazine * mag)692 static void iova_depot_push(struct iova_rcache *rcache, struct iova_magazine *mag)
693 {
694 mag->next = rcache->depot;
695 rcache->depot = mag;
696 rcache->depot_size++;
697 }
698
iova_depot_work_func(struct work_struct * work)699 static void iova_depot_work_func(struct work_struct *work)
700 {
701 struct iova_rcache *rcache = container_of(work, typeof(*rcache), work.work);
702 struct iova_magazine *mag = NULL;
703 unsigned long flags;
704
705 spin_lock_irqsave(&rcache->lock, flags);
706 if (rcache->depot_size > num_online_cpus())
707 mag = iova_depot_pop(rcache);
708 spin_unlock_irqrestore(&rcache->lock, flags);
709
710 if (mag) {
711 iova_magazine_free_pfns(mag, rcache->iovad);
712 iova_magazine_free(mag);
713 schedule_delayed_work(&rcache->work, IOVA_DEPOT_DELAY);
714 }
715 }
716
iova_domain_init_rcaches(struct iova_domain * iovad)717 int iova_domain_init_rcaches(struct iova_domain *iovad)
718 {
719 unsigned int cpu;
720 int i, ret;
721
722 iovad->rcaches = kcalloc(IOVA_RANGE_CACHE_MAX_SIZE,
723 sizeof(struct iova_rcache),
724 GFP_KERNEL);
725 if (!iovad->rcaches)
726 return -ENOMEM;
727
728 for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
729 struct iova_cpu_rcache *cpu_rcache;
730 struct iova_rcache *rcache;
731
732 rcache = &iovad->rcaches[i];
733 spin_lock_init(&rcache->lock);
734 rcache->iovad = iovad;
735 INIT_DELAYED_WORK(&rcache->work, iova_depot_work_func);
736 rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache),
737 cache_line_size());
738 if (!rcache->cpu_rcaches) {
739 ret = -ENOMEM;
740 goto out_err;
741 }
742 for_each_possible_cpu(cpu) {
743 cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
744
745 spin_lock_init(&cpu_rcache->lock);
746 cpu_rcache->loaded = iova_magazine_alloc(GFP_KERNEL);
747 cpu_rcache->prev = iova_magazine_alloc(GFP_KERNEL);
748 if (!cpu_rcache->loaded || !cpu_rcache->prev) {
749 ret = -ENOMEM;
750 goto out_err;
751 }
752 }
753 }
754
755 ret = cpuhp_state_add_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD,
756 &iovad->cpuhp_dead);
757 if (ret)
758 goto out_err;
759 return 0;
760
761 out_err:
762 free_iova_rcaches(iovad);
763 return ret;
764 }
765 EXPORT_SYMBOL_GPL(iova_domain_init_rcaches);
766
767 /*
768 * Try inserting IOVA range starting with 'iova_pfn' into 'rcache', and
769 * return true on success. Can fail if rcache is full and we can't free
770 * space, and free_iova() (our only caller) will then return the IOVA
771 * range to the rbtree instead.
772 */
__iova_rcache_insert(struct iova_domain * iovad,struct iova_rcache * rcache,unsigned long iova_pfn)773 static bool __iova_rcache_insert(struct iova_domain *iovad,
774 struct iova_rcache *rcache,
775 unsigned long iova_pfn)
776 {
777 struct iova_cpu_rcache *cpu_rcache;
778 bool can_insert = false;
779 unsigned long flags;
780
781 cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
782 spin_lock_irqsave(&cpu_rcache->lock, flags);
783
784 if (!iova_magazine_full(cpu_rcache->loaded)) {
785 can_insert = true;
786 } else if (!iova_magazine_full(cpu_rcache->prev)) {
787 swap(cpu_rcache->prev, cpu_rcache->loaded);
788 can_insert = true;
789 } else {
790 struct iova_magazine *new_mag = iova_magazine_alloc(GFP_ATOMIC);
791
792 if (new_mag) {
793 spin_lock(&rcache->lock);
794 iova_depot_push(rcache, cpu_rcache->loaded);
795 spin_unlock(&rcache->lock);
796 schedule_delayed_work(&rcache->work, IOVA_DEPOT_DELAY);
797
798 cpu_rcache->loaded = new_mag;
799 can_insert = true;
800 }
801 }
802
803 if (can_insert)
804 iova_magazine_push(cpu_rcache->loaded, iova_pfn);
805
806 spin_unlock_irqrestore(&cpu_rcache->lock, flags);
807
808 return can_insert;
809 }
810
iova_rcache_insert(struct iova_domain * iovad,unsigned long pfn,unsigned long size)811 static bool iova_rcache_insert(struct iova_domain *iovad, unsigned long pfn,
812 unsigned long size)
813 {
814 unsigned int log_size = order_base_2(size);
815
816 if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
817 return false;
818
819 return __iova_rcache_insert(iovad, &iovad->rcaches[log_size], pfn);
820 }
821
822 /*
823 * Caller wants to allocate a new IOVA range from 'rcache'. If we can
824 * satisfy the request, return a matching non-NULL range and remove
825 * it from the 'rcache'.
826 */
__iova_rcache_get(struct iova_rcache * rcache,unsigned long limit_pfn)827 static unsigned long __iova_rcache_get(struct iova_rcache *rcache,
828 unsigned long limit_pfn)
829 {
830 struct iova_cpu_rcache *cpu_rcache;
831 unsigned long iova_pfn = 0;
832 bool has_pfn = false;
833 unsigned long flags;
834
835 cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
836 spin_lock_irqsave(&cpu_rcache->lock, flags);
837
838 if (!iova_magazine_empty(cpu_rcache->loaded)) {
839 has_pfn = true;
840 } else if (!iova_magazine_empty(cpu_rcache->prev)) {
841 swap(cpu_rcache->prev, cpu_rcache->loaded);
842 has_pfn = true;
843 } else {
844 spin_lock(&rcache->lock);
845 if (rcache->depot) {
846 iova_magazine_free(cpu_rcache->loaded);
847 cpu_rcache->loaded = iova_depot_pop(rcache);
848 has_pfn = true;
849 }
850 spin_unlock(&rcache->lock);
851 }
852
853 if (has_pfn)
854 iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn);
855
856 spin_unlock_irqrestore(&cpu_rcache->lock, flags);
857
858 return iova_pfn;
859 }
860
861 /*
862 * Try to satisfy IOVA allocation range from rcache. Fail if requested
863 * size is too big or the DMA limit we are given isn't satisfied by the
864 * top element in the magazine.
865 */
iova_rcache_get(struct iova_domain * iovad,unsigned long size,unsigned long limit_pfn)866 static unsigned long iova_rcache_get(struct iova_domain *iovad,
867 unsigned long size,
868 unsigned long limit_pfn)
869 {
870 unsigned int log_size = order_base_2(size);
871
872 if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
873 return 0;
874
875 return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn - size);
876 }
877
878 /*
879 * free rcache data structures.
880 */
free_iova_rcaches(struct iova_domain * iovad)881 static void free_iova_rcaches(struct iova_domain *iovad)
882 {
883 struct iova_rcache *rcache;
884 struct iova_cpu_rcache *cpu_rcache;
885 unsigned int cpu;
886
887 for (int i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
888 rcache = &iovad->rcaches[i];
889 if (!rcache->cpu_rcaches)
890 break;
891 for_each_possible_cpu(cpu) {
892 cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
893 iova_magazine_free(cpu_rcache->loaded);
894 iova_magazine_free(cpu_rcache->prev);
895 }
896 free_percpu(rcache->cpu_rcaches);
897 cancel_delayed_work_sync(&rcache->work);
898 while (rcache->depot)
899 iova_magazine_free(iova_depot_pop(rcache));
900 }
901
902 kfree(iovad->rcaches);
903 iovad->rcaches = NULL;
904 }
905
906 /*
907 * free all the IOVA ranges cached by a cpu (used when cpu is unplugged)
908 */
free_cpu_cached_iovas(unsigned int cpu,struct iova_domain * iovad)909 static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad)
910 {
911 struct iova_cpu_rcache *cpu_rcache;
912 struct iova_rcache *rcache;
913 unsigned long flags;
914 int i;
915
916 for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
917 rcache = &iovad->rcaches[i];
918 cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
919 spin_lock_irqsave(&cpu_rcache->lock, flags);
920 iova_magazine_free_pfns(cpu_rcache->loaded, iovad);
921 iova_magazine_free_pfns(cpu_rcache->prev, iovad);
922 spin_unlock_irqrestore(&cpu_rcache->lock, flags);
923 }
924 }
925
926 /*
927 * free all the IOVA ranges of global cache
928 */
free_global_cached_iovas(struct iova_domain * iovad)929 static void free_global_cached_iovas(struct iova_domain *iovad)
930 {
931 struct iova_rcache *rcache;
932 unsigned long flags;
933
934 for (int i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
935 rcache = &iovad->rcaches[i];
936 spin_lock_irqsave(&rcache->lock, flags);
937 while (rcache->depot) {
938 struct iova_magazine *mag = iova_depot_pop(rcache);
939
940 iova_magazine_free_pfns(mag, iovad);
941 iova_magazine_free(mag);
942 }
943 spin_unlock_irqrestore(&rcache->lock, flags);
944 }
945 }
946
iova_cpuhp_dead(unsigned int cpu,struct hlist_node * node)947 static int iova_cpuhp_dead(unsigned int cpu, struct hlist_node *node)
948 {
949 struct iova_domain *iovad;
950
951 iovad = hlist_entry_safe(node, struct iova_domain, cpuhp_dead);
952
953 free_cpu_cached_iovas(cpu, iovad);
954 return 0;
955 }
956
iova_cache_get(void)957 int iova_cache_get(void)
958 {
959 int err = -ENOMEM;
960
961 mutex_lock(&iova_cache_mutex);
962 if (!iova_cache_users) {
963 iova_cache = kmem_cache_create("iommu_iova", sizeof(struct iova), 0,
964 SLAB_HWCACHE_ALIGN, NULL);
965 if (!iova_cache)
966 goto out_err;
967
968 iova_magazine_cache = kmem_cache_create("iommu_iova_magazine",
969 sizeof(struct iova_magazine),
970 0, SLAB_HWCACHE_ALIGN, NULL);
971 if (!iova_magazine_cache)
972 goto out_err;
973
974 err = cpuhp_setup_state_multi(CPUHP_IOMMU_IOVA_DEAD, "iommu/iova:dead",
975 NULL, iova_cpuhp_dead);
976 if (err) {
977 pr_err("IOVA: Couldn't register cpuhp handler: %pe\n", ERR_PTR(err));
978 goto out_err;
979 }
980 }
981
982 iova_cache_users++;
983 mutex_unlock(&iova_cache_mutex);
984
985 return 0;
986
987 out_err:
988 kmem_cache_destroy(iova_cache);
989 kmem_cache_destroy(iova_magazine_cache);
990 mutex_unlock(&iova_cache_mutex);
991 return err;
992 }
993 EXPORT_SYMBOL_GPL(iova_cache_get);
994
iova_cache_put(void)995 void iova_cache_put(void)
996 {
997 mutex_lock(&iova_cache_mutex);
998 if (WARN_ON(!iova_cache_users)) {
999 mutex_unlock(&iova_cache_mutex);
1000 return;
1001 }
1002 iova_cache_users--;
1003 if (!iova_cache_users) {
1004 cpuhp_remove_multi_state(CPUHP_IOMMU_IOVA_DEAD);
1005 kmem_cache_destroy(iova_cache);
1006 kmem_cache_destroy(iova_magazine_cache);
1007 }
1008 mutex_unlock(&iova_cache_mutex);
1009 }
1010 EXPORT_SYMBOL_GPL(iova_cache_put);
1011
1012 MODULE_AUTHOR("Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>");
1013 MODULE_DESCRIPTION("IOMMU I/O Virtual Address management");
1014 MODULE_LICENSE("GPL");
1015