• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * GCMA (Guaranteed Contiguous Memory Allocator)
4  *
5  */
6 
7 #define pr_fmt(fmt) "gcma: " fmt
8 
9 #include <linux/cleancache.h>
10 #include <linux/gcma.h>
11 #include <linux/hashtable.h>
12 #include <linux/highmem.h>
13 #include <linux/idr.h>
14 #include <linux/slab.h>
15 #include <linux/xarray.h>
16 #include <trace/hooks/mm.h>
17 #include "gcma_sysfs.h"
18 #include "internal.h"
19 
20 /*
21  * page->page_type : area id
22  * page->mapping : struct gcma_inode
23  * page->index : page offset from inode
24  */
25 
26 /*
27  * inode->lock
28  * 	lru_lock
29  * 	hash_lock
30  * 	page_area_lock
31  */
32 
get_area_id(struct page * page)33 static inline int get_area_id(struct page *page)
34 {
35 	return page->page_type;
36 }
37 
set_area_id(struct page * page,int id)38 static inline void set_area_id(struct page *page, int id)
39 {
40 	page->page_type = id;
41 }
42 
get_inode_index(struct page * page)43 static inline unsigned long get_inode_index(struct page *page)
44 {
45 	return page->index;
46 }
47 
set_inode_index(struct page * page,unsigned long index)48 static inline void set_inode_index(struct page *page, unsigned long index)
49 {
50 	page->index = index;
51 }
52 
get_inode_mapping(struct page * page)53 static inline struct gcma_inode *get_inode_mapping(struct page *page)
54 {
55 	/*
56 	 * We do not cast into struct gcma_inode* directly to avoid
57 	 * "casting from randomized structure pointer type" error when
58 	 * CONFIG_RANDSTRUCT is enabled.
59 	 */
60 	return (void *)page->mapping;
61 }
62 
set_inode_mapping(struct page * page,struct gcma_inode * inode)63 static inline void set_inode_mapping(struct page *page,
64 				     struct gcma_inode *inode)
65 {
66 	page->mapping = (struct address_space *)inode;
67 }
68 
69 #define GCMA_HASH_BITS	10
70 
71 /*
72  * Cleancache API(e.g., cleancache_putpage) is called under IRQ disabled
73  * context. Thus, The locks taken in the cleancache API path should take
74  * care of the irq locking.
75  */
76 
77 static DEFINE_SPINLOCK(gcma_fs_lock);
78 static DEFINE_IDR(gcma_fs_idr);
79 
80 #define MAX_EVICT_BATCH 64UL
81 #define MAX_GCMA_AREAS 64
82 
83 /* This list contains cache pages in LRU order. */
84 static LIST_HEAD(gcma_lru);
85 static DEFINE_SPINLOCK(lru_lock);
86 
87 static atomic_t nr_gcma_area = ATOMIC_INIT(0);
88 
89 /* represent reserved memory range */
90 struct gcma_area {
91 	struct list_head free_pages;
92 	spinlock_t free_pages_lock;
93 	/* both  start_pfn and end_pfn are inclusive */
94 	unsigned long start_pfn;
95 	unsigned long end_pfn;
96 };
97 
98 static struct gcma_area areas[MAX_GCMA_AREAS];
99 
lookup_area_id(struct page * page,int start_id)100 static int lookup_area_id(struct page *page, int start_id)
101 {
102 	int id, nr_area;
103 	unsigned long pfn = page_to_pfn(page);
104 	struct gcma_area *area;
105 
106 	area = &areas[start_id];
107 	if (pfn >= area->start_pfn && pfn <= area->end_pfn)
108 		return start_id;
109 
110 	nr_area = atomic_read(&nr_gcma_area);
111 	for (id = 0; id < nr_area; id++) {
112 		area = &areas[id];
113 		if (pfn >= area->start_pfn && pfn <= area->end_pfn)
114 			return id;
115 	}
116 
117 	return -1;
118 }
119 
120 /* represents each file system instance hosted by the cleancache */
121 struct gcma_fs {
122 	spinlock_t hash_lock;
123 	DECLARE_HASHTABLE(inode_hash, GCMA_HASH_BITS);
124 };
125 
126 /*
127  * @gcma_inode represents each inode in @gcma_fs
128  *
129  * The gcma_inode will be freed by RCU(except invalidate_inode)
130  * when the last page from xarray will be freed.
131  */
132 struct gcma_inode {
133 	struct cleancache_filekey key;
134 	struct hlist_node hash;
135 	refcount_t ref_count;
136 
137 	struct xarray pages;
138 	struct rcu_head rcu;
139 	struct gcma_fs *gcma_fs;
140 };
141 
142 static struct kmem_cache *slab_gcma_inode;
143 
add_page_to_lru(struct page * page)144 static void add_page_to_lru(struct page *page)
145 {
146 	VM_BUG_ON(!irqs_disabled());
147 	VM_BUG_ON(!list_empty(&page->lru));
148 
149 	spin_lock(&lru_lock);
150 	list_add(&page->lru, &gcma_lru);
151 	spin_unlock(&lru_lock);
152 }
153 
rotate_lru_page(struct page * page)154 static void rotate_lru_page(struct page *page)
155 {
156 	VM_BUG_ON(!irqs_disabled());
157 
158 	spin_lock(&lru_lock);
159 	if (!list_empty(&page->lru))
160 		list_move(&page->lru, &gcma_lru);
161 	spin_unlock(&lru_lock);
162 }
163 
delete_page_from_lru(struct page * page)164 static void delete_page_from_lru(struct page *page)
165 {
166 	VM_BUG_ON(!irqs_disabled());
167 
168 	spin_lock(&lru_lock);
169 	if (!list_empty(&page->lru))
170 		list_del_init(&page->lru);
171 	spin_unlock(&lru_lock);
172 }
173 
174 /*
175  * GCMAFree means the page is currently free in the GCMA so it can be
176  * allocated for cache page.
177  */
SetPageGCMAFree(struct page * page)178 static void SetPageGCMAFree(struct page *page)
179 {
180 	SetPagePrivate(page);
181 }
182 
PageGCMAFree(struct page * page)183 static int PageGCMAFree(struct page *page)
184 {
185 	return PagePrivate(page);
186 }
187 
ClearPageGCMAFree(struct page * page)188 static void ClearPageGCMAFree(struct page *page)
189 {
190 	ClearPagePrivate(page);
191 }
192 
reset_gcma_page(struct page * page)193 static void reset_gcma_page(struct page *page)
194 {
195 	set_inode_mapping(page, NULL);
196 	set_inode_index(page, 0);
197 }
198 
find_gcma_fs(int hash_id)199 static struct gcma_fs *find_gcma_fs(int hash_id)
200 {
201 	struct gcma_fs *ret;
202 
203 	rcu_read_lock();
204 	ret = idr_find(&gcma_fs_idr, hash_id);
205 	rcu_read_unlock();
206 
207 	return ret;
208 }
209 
alloc_gcma_inode(struct gcma_fs * gcma_fs,struct cleancache_filekey * key)210 static struct gcma_inode *alloc_gcma_inode(struct gcma_fs *gcma_fs,
211 					struct cleancache_filekey *key)
212 {
213 	struct gcma_inode *inode;
214 
215 	inode = kmem_cache_alloc(slab_gcma_inode, GFP_ATOMIC|__GFP_NOWARN);
216 	if (inode) {
217 		memcpy(&inode->key, key, sizeof(*key));
218 		xa_init_flags(&inode->pages, XA_FLAGS_LOCK_IRQ);
219 		INIT_HLIST_NODE(&inode->hash);
220 		inode->gcma_fs = gcma_fs;
221 		refcount_set(&inode->ref_count, 1);
222 	}
223 
224 	return inode;
225 }
226 
gcma_inode_free(struct rcu_head * rcu)227 static void gcma_inode_free(struct rcu_head *rcu)
228 {
229 	struct gcma_inode *inode = container_of(rcu, struct gcma_inode, rcu);
230 
231 	VM_BUG_ON(!xa_empty(&inode->pages));
232 	kmem_cache_free(slab_gcma_inode, inode);
233 }
234 
get_gcma_inode(struct gcma_inode * inode)235 static bool get_gcma_inode(struct gcma_inode *inode)
236 {
237 	return refcount_inc_not_zero(&inode->ref_count);
238 }
239 
put_gcma_inode(struct gcma_inode * inode)240 static void put_gcma_inode(struct gcma_inode *inode)
241 {
242 	if (refcount_dec_and_test(&inode->ref_count))
243 		call_rcu(&inode->rcu, gcma_inode_free);
244 }
245 
find_and_get_gcma_inode(struct gcma_fs * gcma_fs,struct cleancache_filekey * key)246 static struct gcma_inode *find_and_get_gcma_inode(struct gcma_fs *gcma_fs,
247 						struct cleancache_filekey *key)
248 {
249 	struct gcma_inode *tmp, *inode = NULL;
250 
251 	rcu_read_lock();
252 	hash_for_each_possible_rcu(gcma_fs->inode_hash, tmp, hash, key->u.ino) {
253 		if (memcmp(&tmp->key, key, sizeof(*key)))
254 			continue;
255 		if (get_gcma_inode(tmp)) {
256 			inode = tmp;
257 			break;
258 		}
259 	}
260 	rcu_read_unlock();
261 
262 	return inode;
263 }
264 
add_gcma_inode(struct gcma_fs * gcma_fs,struct cleancache_filekey * key)265 static struct gcma_inode *add_gcma_inode(struct gcma_fs *gcma_fs,
266 					 struct cleancache_filekey *key)
267 {
268 	struct gcma_inode *inode, *tmp;
269 
270 	inode = alloc_gcma_inode(gcma_fs, key);
271 	if (!inode)
272 		return ERR_PTR(-ENOMEM);
273 
274 	spin_lock(&gcma_fs->hash_lock);
275 	tmp = find_and_get_gcma_inode(gcma_fs, key);
276 	if (tmp) {
277 		spin_unlock(&gcma_fs->hash_lock);
278 		/* someone already added it */
279 		put_gcma_inode(inode);
280 		put_gcma_inode(tmp);
281 		return ERR_PTR(-EEXIST);
282 	}
283 
284 	get_gcma_inode(inode);
285 	hash_add_rcu(gcma_fs->inode_hash, &inode->hash, key->u.ino);
286 	spin_unlock(&gcma_fs->hash_lock);
287 
288 	return inode;
289 }
290 
register_gcma_area(const char * name,phys_addr_t base,phys_addr_t size)291 int register_gcma_area(const char *name, phys_addr_t base, phys_addr_t size)
292 {
293 	unsigned long i;
294 	struct page *page;
295 	struct gcma_area *area;
296 	unsigned long pfn = PFN_DOWN(base);
297 	unsigned long page_count = size >> PAGE_SHIFT;
298 	int area_id;
299 
300 	area_id = atomic_fetch_inc(&nr_gcma_area);
301 	if (area_id >= MAX_GCMA_AREAS) {
302 		atomic_dec(&nr_gcma_area);
303 		pr_err("Failed to register new area due to short of space\n");
304 		return -ENOMEM;
305 	}
306 
307 	area = &areas[area_id];
308 	INIT_LIST_HEAD(&area->free_pages);
309 	spin_lock_init(&area->free_pages_lock);
310 
311 	gcma_stat_add(TOTAL_PAGE, page_count);
312 	for (i = 0; i < page_count; i++) {
313 		page = pfn_to_page(pfn + i);
314 		set_area_id(page, area_id);
315 		reset_gcma_page(page);
316 		SetPageGCMAFree(page);
317 		list_add(&page->lru, &area->free_pages);
318 	}
319 
320 	area->start_pfn = pfn;
321 	area->end_pfn = pfn + page_count - 1;
322 
323 	pr_info("Reserved memory: created GCMA memory pool at %pa, size %lu MiB for %s\n",
324 		 &base, (unsigned long)size / SZ_1M, name ? : "none");
325 
326 	return 0;
327 }
328 EXPORT_SYMBOL_GPL(register_gcma_area);
329 
page_area_lock(struct page * page)330 static void page_area_lock(struct page *page)
331 {
332 	struct gcma_area *area;
333 
334 	VM_BUG_ON(!irqs_disabled());
335 
336 	area = &areas[get_area_id(page)];
337 	spin_lock(&area->free_pages_lock);
338 }
339 
page_area_unlock(struct page * page)340 static void page_area_unlock(struct page *page)
341 {
342 	struct gcma_area *area;
343 
344 	area = &areas[get_area_id(page)];
345 	spin_unlock(&area->free_pages_lock);
346 }
347 
gcma_alloc_page(void)348 static struct page *gcma_alloc_page(void)
349 {
350 	int i, nr_area;
351 	struct gcma_area *area;
352 	struct page *page = NULL;
353 
354 	VM_BUG_ON(!irqs_disabled());
355 
356 	nr_area = atomic_read(&nr_gcma_area);
357 
358 	for (i = 0; i < nr_area; i++) {
359 		area = &areas[i];
360 		spin_lock(&area->free_pages_lock);
361 		if (list_empty(&area->free_pages)) {
362 			spin_unlock(&area->free_pages_lock);
363 			continue;
364 		}
365 
366 		page = list_last_entry(&area->free_pages, struct page, lru);
367 		list_del_init(&page->lru);
368 
369 		ClearPageGCMAFree(page);
370 		set_page_count(page, 1);
371 		spin_unlock(&area->free_pages_lock);
372 		gcma_stat_inc(CACHED_PAGE);
373 		break;
374 	}
375 
376 	return page;
377 }
378 
379 /* Hold page_area_lock */
__gcma_free_page(struct page * page)380 static void __gcma_free_page(struct page *page)
381 {
382 	struct gcma_area *area = &areas[get_area_id(page)];
383 
384 	reset_gcma_page(page);
385 	VM_BUG_ON(!list_empty(&page->lru));
386 	list_add(&page->lru, &area->free_pages);
387 	SetPageGCMAFree(page);
388 }
389 
gcma_free_page(struct page * page)390 static void gcma_free_page(struct page *page)
391 {
392 	__gcma_free_page(page);
393 	gcma_stat_dec(CACHED_PAGE);
394 }
395 
gcma_get_page(struct page * page)396 static inline void gcma_get_page(struct page *page)
397 {
398 	get_page(page);
399 }
400 
gcma_get_page_unless_zero(struct page * page)401 static inline bool gcma_get_page_unless_zero(struct page *page)
402 {
403 	return get_page_unless_zero(page);
404 }
405 
gcma_put_page(struct page * page)406 static void gcma_put_page(struct page *page)
407 {
408 	if (put_page_testzero(page)) {
409 		unsigned long flags;
410 
411 		local_irq_save(flags);
412 		VM_BUG_ON(!list_empty(&page->lru));
413 		page_area_lock(page);
414 		gcma_free_page(page);
415 		page_area_unlock(page);
416 		local_irq_restore(flags);
417 	}
418 }
419 
gcma_store_page(struct gcma_inode * inode,unsigned long index,struct page * page,struct cleancache_filekey * key)420 static int gcma_store_page(struct gcma_inode *inode, unsigned long index,
421 			   struct page *page, struct cleancache_filekey *key)
422 {
423 	int err = xa_err(__xa_store(&inode->pages, index,
424 				page, GFP_ATOMIC|__GFP_NOWARN));
425 
426 	if (!err) {
427 		struct gcma_fs *gcma_fs;
428 
429 		gcma_get_page(page);
430 		set_inode_mapping(page, inode);
431 		set_inode_index(page, index);
432 
433 		gcma_fs = inode->gcma_fs;
434 		spin_lock(&gcma_fs->hash_lock);
435 		if (hlist_unhashed(&inode->hash)) {
436 			get_gcma_inode(inode);
437 			hash_add_rcu(gcma_fs->inode_hash, &inode->hash, key->u.ino);
438 		}
439 		spin_unlock(&gcma_fs->hash_lock);
440 	}
441 
442 	return err;
443 }
444 
check_and_remove_inode(struct gcma_inode * inode)445 static void check_and_remove_inode(struct gcma_inode *inode)
446 {
447 	struct gcma_fs *gcma_fs = inode->gcma_fs;
448 
449 	/* The pair is in gcma_store_page */
450 	if (!xa_empty(&inode->pages))
451 		return;
452 
453 	spin_lock(&gcma_fs->hash_lock);
454 	if (!hlist_unhashed(&inode->hash)) {
455 		hlist_del_init_rcu(&inode->hash);
456 		refcount_dec(&inode->ref_count);
457 	}
458 	spin_unlock(&gcma_fs->hash_lock);
459 }
460 
gcma_erase_page(struct gcma_inode * inode,unsigned long index,struct page * page,bool put_page)461 static void gcma_erase_page(struct gcma_inode *inode, unsigned long index,
462 			    struct page *page, bool put_page)
463 {
464 	void *old;
465 
466 	lockdep_assert_held(&inode->pages.xa_lock);
467 
468 	/* The inode refcount will decrease when the page is freed */
469 	old = __xa_erase(&inode->pages, index);
470 	VM_BUG_ON(old == 0);
471 	delete_page_from_lru(page);
472 	if (put_page)
473 		gcma_put_page(page);
474 
475 	check_and_remove_inode(inode);
476 }
477 
478 /*
479  * @page's refcount is zero now so no one can access this page
480  */
isolate_gcma_page(struct gcma_inode * inode,struct page * page)481 static void isolate_gcma_page(struct gcma_inode *inode, struct page *page)
482 {
483 	VM_BUG_ON(!list_empty(&page->lru));
484 	page_area_lock(page);
485 	reset_gcma_page(page);
486 	page_area_unlock(page);
487 	gcma_stat_dec(CACHED_PAGE);
488 }
489 
490 /*
491  * Discard cached pages to prepare allocating in the range
492  *
493  * Every path to elevated page refcount(e.g., gcma_get_page) is supposed to
494  * release the refcount pretty fast under irq-disabled-spin lock context
495  * where doesn't allow preemption. Thus,retrial in this logic would make
496  * forward progress with just retrial.
497  */
__gcma_discard_range(struct gcma_area * area,unsigned long start_pfn,unsigned long end_pfn)498 static void __gcma_discard_range(struct gcma_area *area,
499 				unsigned long start_pfn,
500 				unsigned long end_pfn)
501 {
502 	unsigned long pfn;
503 	struct page *page;
504 	unsigned long scanned = 0;
505 
506 	local_irq_disable();
507 
508 	for (pfn = start_pfn; pfn <= end_pfn; pfn++) {
509 		struct gcma_inode *inode;
510 		unsigned long index;
511 again:
512 		if (!(++scanned % XA_CHECK_SCHED)) {
513 			/* let in any pending interrupt */
514 			local_irq_enable();
515 			cond_resched();
516 			local_irq_disable();
517 		}
518 
519 		page = pfn_to_page(pfn);
520 		page_area_lock(page);
521 		if (PageGCMAFree(page)) {
522 			/*
523 			 * Isolate page from the free list to prevent further
524 			 * allocation.
525 			 */
526 			ClearPageGCMAFree(page);
527 			list_del_init(&page->lru);
528 			page_area_unlock(page);
529 			continue;
530 		}
531 
532 		/* To gaurantee gcma_inode is not freed */
533 		rcu_read_lock();
534 		if (!gcma_get_page_unless_zero(page)) {
535 			page_area_unlock(page);
536 			rcu_read_unlock();
537 			/*
538 			 * The page is being freed but did not reach
539 			 * the free list.
540 			 */
541 			goto again;
542 		}
543 
544 		inode = get_inode_mapping(page);
545 		index = get_inode_index(page);
546 		page_area_unlock(page);
547 
548 		/*
549 		 * Page is not stored yet since it was allocated. Just retry
550 		 */
551 		if (!inode) {
552 			gcma_put_page(page);
553 			rcu_read_unlock();
554 			goto again;
555 		}
556 
557 		if (!get_gcma_inode(inode)) {
558 			gcma_put_page(page);
559 			rcu_read_unlock();
560 			goto again;
561 		}
562 		rcu_read_unlock();
563 
564 		/*
565 		 * From now on, the page and inode is never freed by page and
566 		 * inode's refcount.
567 		 */
568 		xa_lock(&inode->pages);
569 		/*
570 		 * If the page is not attached to the inode or already is erased,
571 		 * just retry.
572 		 */
573 		if (xa_load(&inode->pages, index) != page) {
574 			xa_unlock(&inode->pages);
575 			gcma_put_page(page);
576 			put_gcma_inode(inode);
577 			goto again;
578 		}
579 
580 		/*
581 		 * If someone is holding the refcount, wait on them to finish
582 		 * the work. In theory, it could cause livelock if someone
583 		 * repeated to hold/release the refcount in parallel but it
584 		 * should be extremely rare.
585 		 *
586 		 * Expect refcount two from xarray and this function.
587 		 */
588 		if (!page_ref_freeze(page, 2)) {
589 			xa_unlock(&inode->pages);
590 			gcma_put_page(page);
591 			put_gcma_inode(inode);
592 			goto again;
593 		}
594 
595 		gcma_erase_page(inode, index, page, false);
596 		xa_unlock(&inode->pages);
597 
598 		isolate_gcma_page(inode, page);
599 		gcma_stat_inc(DISCARDED_PAGE);
600 		put_gcma_inode(inode);
601 	}
602 	local_irq_enable();
603 }
604 
gcma_alloc_range(unsigned long start_pfn,unsigned long end_pfn)605 void gcma_alloc_range(unsigned long start_pfn, unsigned long end_pfn)
606 {
607 	int i;
608 	unsigned long pfn;
609 	struct gcma_area *area;
610 	int nr_area = atomic_read(&nr_gcma_area);
611 
612 	for (i = 0; i < nr_area; i++) {
613 		unsigned long s_pfn, e_pfn;
614 
615 		area = &areas[i];
616 		if (area->end_pfn < start_pfn)
617 			continue;
618 
619 		if (area->start_pfn > end_pfn)
620 			continue;
621 
622 		s_pfn = max(start_pfn, area->start_pfn);
623 		e_pfn = min(end_pfn, area->end_pfn);
624 
625 		__gcma_discard_range(area, s_pfn, e_pfn);
626 	}
627 	gcma_stat_add(ALLOCATED_PAGE, end_pfn - start_pfn + 1);
628 
629 	/*
630 	 * GCMA returns pages with refcount 1 and expects them to have
631 	 * the same refcount 1 whet they are freed.
632 	 */
633 	for (pfn = start_pfn; pfn <= end_pfn; pfn++)
634 		set_page_count(pfn_to_page(pfn), 1);
635 }
636 EXPORT_SYMBOL_GPL(gcma_alloc_range);
637 
gcma_free_range(unsigned long start_pfn,unsigned long end_pfn)638 void gcma_free_range(unsigned long start_pfn, unsigned long end_pfn)
639 {
640 	unsigned long pfn;
641 	struct page *page;
642 	unsigned long scanned = 0;
643 	int area_id, start_id = 0;
644 
645 	VM_BUG_ON(irqs_disabled());
646 
647 	/* The caller should ensure no other users when freeing */
648 	for (pfn = start_pfn; pfn <= end_pfn; pfn++)
649 		WARN_ON(!page_ref_dec_and_test(pfn_to_page(pfn)));
650 
651 	local_irq_disable();
652 
653 	for (pfn = start_pfn; pfn <= end_pfn; pfn++) {
654 		if (!(++scanned % XA_CHECK_SCHED)) {
655 			local_irq_enable();
656 			/* let in any pending interrupt */
657 			cond_resched();
658 			local_irq_disable();
659 		}
660 
661 		page = pfn_to_page(pfn);
662 		VM_BUG_ON(PageGCMAFree(page));
663 
664 		area_id = lookup_area_id(page, start_id);
665 		VM_BUG_ON(area_id == -1);
666 		if (start_id != area_id)
667 			start_id = area_id;
668 		/* The struct page fields would be contaminated so reset them */
669 		set_area_id(page, area_id);
670 		INIT_LIST_HEAD(&page->lru);
671 		page_area_lock(page);
672 		__gcma_free_page(page);
673 		page_area_unlock(page);
674 	}
675 
676 	local_irq_enable();
677 	gcma_stat_sub(ALLOCATED_PAGE, end_pfn - start_pfn + 1);
678 }
679 EXPORT_SYMBOL_GPL(gcma_free_range);
680 
evict_gcma_lru_pages(unsigned long nr_request)681 static void evict_gcma_lru_pages(unsigned long nr_request)
682 {
683 	unsigned long nr_evicted = 0;
684 
685 	while (nr_request) {
686 		struct page *pages[MAX_EVICT_BATCH];
687 		int i;
688 		unsigned long isolated = 0;
689 		unsigned long flags;
690 		struct page *page, *tmp;
691 		struct gcma_inode *inode;
692 		unsigned long index;
693 
694 		/* gcma_inode will not be freed */
695 		rcu_read_lock();
696 		spin_lock_irqsave(&lru_lock, flags);
697 		if (list_empty(&gcma_lru)) {
698 			spin_unlock_irqrestore(&lru_lock, flags);
699 			rcu_read_unlock();
700 			break;
701 		}
702 
703 		list_for_each_entry_safe_reverse(page, tmp, &gcma_lru, lru) {
704 			if (isolated == MAX_EVICT_BATCH || !nr_request)
705 				break;
706 			nr_request--;
707 			if (!gcma_get_page_unless_zero(page))
708 				continue;
709 
710 			inode = get_inode_mapping(page);
711 			if (!get_gcma_inode(inode)) {
712 				gcma_put_page(page);
713 				continue;
714 			}
715 
716 			/* From now on, gcma_inode is safe to access */
717 			list_del_init(&page->lru);
718 			pages[isolated++] = page;
719 		}
720 		spin_unlock_irqrestore(&lru_lock, flags);
721 		rcu_read_unlock();
722 
723 		/* From now on, pages in the list will never be freed */
724 		for (i = 0; i < isolated; i++) {
725 			page = pages[i];
726 			inode = get_inode_mapping(page);
727 			index = get_inode_index(page);
728 
729 			xa_lock_irqsave(&inode->pages, flags);
730 			if (xa_load(&inode->pages, index) == page)
731 				gcma_erase_page(inode, index, page, true);
732 			xa_unlock_irqrestore(&inode->pages, flags);
733 			put_gcma_inode(inode);
734 			gcma_put_page(page);
735 		}
736 		nr_evicted += isolated;
737 	}
738 
739 	gcma_stat_add(EVICTED_PAGE, nr_evicted);
740 }
741 
evict_gcma_pages(struct work_struct * work)742 static void evict_gcma_pages(struct work_struct *work)
743 {
744 	evict_gcma_lru_pages(MAX_EVICT_BATCH);
745 }
746 
747 static DECLARE_WORK(lru_evict_work, evict_gcma_pages);
748 
749 /*
750  * We want to store only workingset page in the GCMA to increase hit ratio
751  * so there are four cases:
752  *
753  * @page is workingset but GCMA doesn't have @page: create new gcma page
754  * @page is workingset and GCMA has @page: overwrite the stale data
755  * @page is !workingset and GCMA doesn't have @page: just bail out
756  * @page is !workingset and GCMA has @page: remove the stale @page
757  */
gcma_cc_store_page(int hash_id,struct cleancache_filekey key,pgoff_t offset,struct page * page)758 static void gcma_cc_store_page(int hash_id, struct cleancache_filekey key,
759 			       pgoff_t offset, struct page *page)
760 {
761 	struct gcma_fs *gcma_fs;
762 	struct gcma_inode *inode;
763 	struct page *g_page;
764 	void *src, *dst;
765 	bool is_new = false;
766 	bool workingset = PageWorkingset(page);
767 	bool bypass = false;
768 	bool allow_nonworkingset = false;
769 
770 	trace_android_vh_gcma_cc_store_page_bypass(&bypass);
771 	if (bypass)
772 		return;
773 	/*
774 	 * This cleancache function is called under irq disabled so every
775 	 * locks in this function should take of the irq if they are
776 	 * used in the non-irqdisabled context.
777 	 */
778 	VM_BUG_ON(!irqs_disabled());
779 
780 	gcma_fs = find_gcma_fs(hash_id);
781 	if (!gcma_fs)
782 		return;
783 
784 	trace_android_vh_gcma_cc_allow_nonworkingset(&allow_nonworkingset);
785 find_inode:
786 	inode = find_and_get_gcma_inode(gcma_fs, &key);
787 	if (!inode) {
788 		if (!workingset && !allow_nonworkingset)
789 			return;
790 		inode = add_gcma_inode(gcma_fs, &key);
791 		if (!IS_ERR(inode))
792 			goto load_page;
793 		/*
794 		 * If someone just added new inode under us, retry to find it.
795 		 */
796 		if (PTR_ERR(inode) == -EEXIST)
797 			goto find_inode;
798 		return;
799 	}
800 
801 load_page:
802 	VM_BUG_ON(!inode);
803 
804 	xa_lock(&inode->pages);
805 	g_page = xa_load(&inode->pages, offset);
806 	if (g_page) {
807 		if (!workingset && !allow_nonworkingset) {
808 			gcma_erase_page(inode, offset, g_page, true);
809 			goto out_unlock;
810 		}
811 		goto copy;
812 	}
813 
814 	if (!workingset && !allow_nonworkingset)
815 		goto out_unlock;
816 
817 	g_page = gcma_alloc_page();
818 	if (!g_page) {
819 		queue_work(system_unbound_wq, &lru_evict_work);
820 		goto out_unlock;
821 	}
822 
823 	if (gcma_store_page(inode, offset, g_page, &key)) {
824 		gcma_put_page(g_page);
825 		goto out_unlock;
826 	}
827 
828 	gcma_put_page(g_page);
829 	is_new = true;
830 copy:
831 	src = kmap_atomic(page);
832 	dst = kmap_atomic(g_page);
833 	memcpy(dst, src, PAGE_SIZE);
834 	kunmap_atomic(dst);
835 	kunmap_atomic(src);
836 
837 	if (is_new)
838 		add_page_to_lru(g_page);
839 	else
840 		rotate_lru_page(g_page);
841 
842 	gcma_stat_inc(STORED_PAGE);
843 out_unlock:
844 	/*
845 	 * If inode was just created but failed to add gcma page,
846 	 * remove the inode from hash
847 	 */
848 	check_and_remove_inode(inode);
849 	xa_unlock(&inode->pages);
850 	put_gcma_inode(inode);
851 }
852 
gcma_cc_load_page(int hash_id,struct cleancache_filekey key,pgoff_t offset,struct page * page)853 static int gcma_cc_load_page(int hash_id, struct cleancache_filekey key,
854 			pgoff_t offset, struct page *page)
855 {
856 	struct gcma_fs *gcma_fs;
857 	struct gcma_inode *inode;
858 	struct page *g_page;
859 	void *src, *dst;
860 
861 	VM_BUG_ON(irqs_disabled());
862 
863 	gcma_fs = find_gcma_fs(hash_id);
864 	if (!gcma_fs)
865 		return -1;
866 
867 	inode = find_and_get_gcma_inode(gcma_fs, &key);
868 	if (!inode)
869 		return -1;
870 
871 	xa_lock_irq(&inode->pages);
872 	g_page = xa_load(&inode->pages, offset);
873 	if (!g_page) {
874 		xa_unlock_irq(&inode->pages);
875 		put_gcma_inode(inode);
876 		return -1;
877 	}
878 
879 	src = kmap_atomic(g_page);
880 	dst = kmap_atomic(page);
881 	memcpy(dst, src, PAGE_SIZE);
882 	kunmap_atomic(dst);
883 	kunmap_atomic(src);
884 	rotate_lru_page(g_page);
885 	xa_unlock_irq(&inode->pages);
886 
887 	put_gcma_inode(inode);
888 	gcma_stat_inc(LOADED_PAGE);
889 
890 	return 0;
891 }
892 
gcma_cc_invalidate_page(int hash_id,struct cleancache_filekey key,pgoff_t offset)893 static void gcma_cc_invalidate_page(int hash_id, struct cleancache_filekey key,
894 				pgoff_t offset)
895 {
896 	struct gcma_fs *gcma_fs;
897 	struct gcma_inode *inode;
898 	struct page *g_page;
899 	unsigned long flags;
900 
901 	gcma_fs = find_gcma_fs(hash_id);
902 	if (!gcma_fs)
903 		return;
904 
905 	inode = find_and_get_gcma_inode(gcma_fs, &key);
906 	if (!inode)
907 		return;
908 
909 	xa_lock_irqsave(&inode->pages, flags);
910 	g_page = xa_load(&inode->pages, offset);
911 	if (!g_page)
912 		goto out;
913 	gcma_erase_page(inode, offset, g_page, true);
914 out:
915 	xa_unlock_irqrestore(&inode->pages, flags);
916 	put_gcma_inode(inode);
917 }
918 
gcma_erase_all_pages(struct gcma_inode * inode)919 static void gcma_erase_all_pages(struct gcma_inode *inode)
920 {
921 	struct page *page;
922 	unsigned long flags;
923 
924 
925 	XA_STATE(xas, &inode->pages, 0);
926 
927 	xas_lock_irqsave(&xas, flags);
928 	if (xa_empty(&inode->pages))
929 		goto out;
930 	xas_for_each(&xas, page, ULONG_MAX)
931 		gcma_erase_page(inode, xas.xa_index, page, true);
932 out:
933 	xas_unlock_irqrestore(&xas, flags);
934 }
935 
__gcma_cc_invalidate_inode(struct gcma_fs * gcma_fs,struct cleancache_filekey * key)936 static void __gcma_cc_invalidate_inode(struct gcma_fs *gcma_fs,
937 				       struct cleancache_filekey *key)
938 {
939 	struct gcma_inode *inode;
940 
941 	inode = find_and_get_gcma_inode(gcma_fs, key);
942 	if (!inode)
943 		return;
944 
945 	gcma_erase_all_pages(inode);
946 	put_gcma_inode(inode);
947 }
948 
gcma_cc_invalidate_inode(int hash_id,struct cleancache_filekey key)949 static void gcma_cc_invalidate_inode(int hash_id, struct cleancache_filekey key)
950 {
951 	struct gcma_fs *gcma_fs;
952 
953 	gcma_fs = find_gcma_fs(hash_id);
954 	if (!gcma_fs)
955 		return;
956 
957 	__gcma_cc_invalidate_inode(gcma_fs, &key);
958 }
959 
gcma_cc_invalidate_fs(int hash_id)960 static void gcma_cc_invalidate_fs(int hash_id)
961 {
962 	struct gcma_fs *gcma_fs;
963 	struct gcma_inode *inode;
964 	int cursor, i;
965 	struct hlist_node *tmp;
966 
967 	gcma_fs = find_gcma_fs(hash_id);
968 	if (!gcma_fs)
969 		return;
970 
971 	VM_BUG_ON(irqs_disabled());
972 
973 	/*
974 	 * No need to hold any lock here since this function is called when
975 	 * fs is unmounted. IOW, inode insert/delete race cannot happen.
976 	 */
977 	hash_for_each_safe(gcma_fs->inode_hash, cursor, tmp, inode, hash)
978 		 __gcma_cc_invalidate_inode(gcma_fs, &inode->key);
979 
980 	synchronize_rcu();
981 
982 	for (i = 0; i < HASH_SIZE(gcma_fs->inode_hash); i++)
983 		VM_BUG_ON(!hlist_empty(&gcma_fs->inode_hash[i]));
984 
985 	spin_lock(&gcma_fs_lock);
986 	idr_remove(&gcma_fs_idr, hash_id);
987 	spin_unlock(&gcma_fs_lock);
988 	pr_info("removed hash_id %d\n", hash_id);
989 
990 	kfree(gcma_fs);
991 }
992 
gcma_cc_init_fs(size_t page_size)993 static int gcma_cc_init_fs(size_t page_size)
994 {
995 	int hash_id;
996 	struct gcma_fs *gcma_fs;
997 
998 	if (atomic_read(&nr_gcma_area) == 0)
999 		return -ENOMEM;
1000 
1001 	if (page_size != PAGE_SIZE)
1002 		return -EOPNOTSUPP;
1003 
1004 	gcma_fs = kzalloc(sizeof(struct gcma_fs), GFP_KERNEL);
1005 	if (!gcma_fs)
1006 		return -ENOMEM;
1007 
1008 	spin_lock_init(&gcma_fs->hash_lock);
1009 	hash_init(gcma_fs->inode_hash);
1010 
1011 	idr_preload(GFP_KERNEL);
1012 
1013 	spin_lock(&gcma_fs_lock);
1014 	hash_id = idr_alloc(&gcma_fs_idr, gcma_fs, 0, 0, GFP_NOWAIT);
1015 	spin_unlock(&gcma_fs_lock);
1016 
1017 	idr_preload_end();
1018 
1019 	if (hash_id < 0) {
1020 		pr_warn("too many gcma instances\n");
1021 		kfree(gcma_fs);
1022 	}
1023 
1024 	return hash_id;
1025 }
1026 
gcma_cc_init_shared_fs(uuid_t * uuid,size_t pagesize)1027 static int gcma_cc_init_shared_fs(uuid_t *uuid, size_t pagesize)
1028 {
1029 	return -1;
1030 }
1031 
1032 struct cleancache_ops gcma_cleancache_ops = {
1033 	.init_fs = gcma_cc_init_fs,
1034 	.init_shared_fs = gcma_cc_init_shared_fs,
1035 	.get_page = gcma_cc_load_page,
1036 	.put_page = gcma_cc_store_page,
1037 	.invalidate_page = gcma_cc_invalidate_page,
1038 	.invalidate_inode = gcma_cc_invalidate_inode,
1039 	.invalidate_fs = gcma_cc_invalidate_fs,
1040 };
1041 
gcma_init(void)1042 static int __init gcma_init(void)
1043 {
1044 	slab_gcma_inode = KMEM_CACHE(gcma_inode, 0);
1045 	if (!slab_gcma_inode)
1046 		return -ENOMEM;
1047 
1048 	cleancache_register_ops(&gcma_cleancache_ops);
1049 
1050 	return 0;
1051 }
1052 
1053 core_initcall(gcma_init);
1054