• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2018 HUAWEI, Inc.
4  *             https://www.huawei.com/
5  * Created by Gao Xiang <gaoxiang25@huawei.com>
6  */
7 #include "internal.h"
8 #include <linux/pagevec.h>
9 
erofs_allocpage(struct list_head * pool,gfp_t gfp)10 struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp)
11 {
12 	struct page *page;
13 
14 	if (!list_empty(pool)) {
15 		page = lru_to_page(pool);
16 		DBG_BUGON(page_ref_count(page) != 1);
17 		list_del(&page->lru);
18 	} else {
19 		page = alloc_page(gfp);
20 	}
21 	return page;
22 }
23 
24 #ifdef CONFIG_EROFS_FS_ZIP
25 /* global shrink count (for all mounted EROFS instances) */
26 static atomic_long_t erofs_global_shrink_cnt;
27 
erofs_workgroup_get(struct erofs_workgroup * grp)28 static int erofs_workgroup_get(struct erofs_workgroup *grp)
29 {
30 	int o;
31 
32 repeat:
33 	o = erofs_wait_on_workgroup_freezed(grp);
34 	if (o <= 0)
35 		return -1;
36 
37 	if (atomic_cmpxchg(&grp->refcount, o, o + 1) != o)
38 		goto repeat;
39 
40 	/* decrease refcount paired by erofs_workgroup_put */
41 	if (o == 1)
42 		atomic_long_dec(&erofs_global_shrink_cnt);
43 	return 0;
44 }
45 
erofs_find_workgroup(struct super_block * sb,pgoff_t index)46 struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
47 					     pgoff_t index)
48 {
49 	struct erofs_sb_info *sbi = EROFS_SB(sb);
50 	struct erofs_workgroup *grp;
51 
52 repeat:
53 	rcu_read_lock();
54 	grp = xa_load(&sbi->managed_pslots, index);
55 	if (grp) {
56 		if (erofs_workgroup_get(grp)) {
57 			/* prefer to relax rcu read side */
58 			rcu_read_unlock();
59 			goto repeat;
60 		}
61 
62 		DBG_BUGON(index != grp->index);
63 	}
64 	rcu_read_unlock();
65 	return grp;
66 }
67 
erofs_insert_workgroup(struct super_block * sb,struct erofs_workgroup * grp)68 struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb,
69 					       struct erofs_workgroup *grp)
70 {
71 	struct erofs_sb_info *const sbi = EROFS_SB(sb);
72 	struct erofs_workgroup *pre;
73 
74 	/*
75 	 * Bump up a reference count before making this visible
76 	 * to others for the XArray in order to avoid potential
77 	 * UAF without serialized by xa_lock.
78 	 */
79 	atomic_inc(&grp->refcount);
80 
81 repeat:
82 	xa_lock(&sbi->managed_pslots);
83 	pre = __xa_cmpxchg(&sbi->managed_pslots, grp->index,
84 			   NULL, grp, GFP_NOFS);
85 	if (pre) {
86 		if (xa_is_err(pre)) {
87 			pre = ERR_PTR(xa_err(pre));
88 		} else if (erofs_workgroup_get(pre)) {
89 			/* try to legitimize the current in-tree one */
90 			xa_unlock(&sbi->managed_pslots);
91 			cond_resched();
92 			goto repeat;
93 		}
94 		atomic_dec(&grp->refcount);
95 		grp = pre;
96 	}
97 	xa_unlock(&sbi->managed_pslots);
98 	return grp;
99 }
100 
__erofs_workgroup_free(struct erofs_workgroup * grp)101 static void  __erofs_workgroup_free(struct erofs_workgroup *grp)
102 {
103 	atomic_long_dec(&erofs_global_shrink_cnt);
104 	erofs_workgroup_free_rcu(grp);
105 }
106 
erofs_workgroup_put(struct erofs_workgroup * grp)107 int erofs_workgroup_put(struct erofs_workgroup *grp)
108 {
109 	int count = atomic_dec_return(&grp->refcount);
110 
111 	if (count == 1)
112 		atomic_long_inc(&erofs_global_shrink_cnt);
113 	else if (!count)
114 		__erofs_workgroup_free(grp);
115 	return count;
116 }
117 
erofs_try_to_release_workgroup(struct erofs_sb_info * sbi,struct erofs_workgroup * grp)118 static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
119 					   struct erofs_workgroup *grp)
120 {
121 	/*
122 	 * If managed cache is on, refcount of workgroups
123 	 * themselves could be < 0 (freezed). In other words,
124 	 * there is no guarantee that all refcounts > 0.
125 	 */
126 	if (!erofs_workgroup_try_to_freeze(grp, 1))
127 		return false;
128 
129 	/*
130 	 * Note that all cached pages should be unattached
131 	 * before deleted from the XArray. Otherwise some
132 	 * cached pages could be still attached to the orphan
133 	 * old workgroup when the new one is available in the tree.
134 	 */
135 	if (erofs_try_to_free_all_cached_pages(sbi, grp)) {
136 		erofs_workgroup_unfreeze(grp, 1);
137 		return false;
138 	}
139 
140 	/*
141 	 * It's impossible to fail after the workgroup is freezed,
142 	 * however in order to avoid some race conditions, add a
143 	 * DBG_BUGON to observe this in advance.
144 	 */
145 	DBG_BUGON(__xa_erase(&sbi->managed_pslots, grp->index) != grp);
146 
147 	/* last refcount should be connected with its managed pslot.  */
148 	erofs_workgroup_unfreeze(grp, 0);
149 	__erofs_workgroup_free(grp);
150 	return true;
151 }
152 
erofs_shrink_workstation(struct erofs_sb_info * sbi,unsigned long nr_shrink)153 static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
154 					      unsigned long nr_shrink)
155 {
156 	struct erofs_workgroup *grp;
157 	unsigned int freed = 0;
158 	unsigned long index;
159 
160 	xa_lock(&sbi->managed_pslots);
161 	xa_for_each(&sbi->managed_pslots, index, grp) {
162 		/* try to shrink each valid workgroup */
163 		if (!erofs_try_to_release_workgroup(sbi, grp))
164 			continue;
165 		xa_unlock(&sbi->managed_pslots);
166 
167 		++freed;
168 		if (!--nr_shrink)
169 			return freed;
170 		xa_lock(&sbi->managed_pslots);
171 	}
172 	xa_unlock(&sbi->managed_pslots);
173 	return freed;
174 }
175 
176 /* protected by 'erofs_sb_list_lock' */
177 static unsigned int shrinker_run_no;
178 
179 /* protects the mounted 'erofs_sb_list' */
180 static DEFINE_SPINLOCK(erofs_sb_list_lock);
181 static LIST_HEAD(erofs_sb_list);
182 
erofs_shrinker_register(struct super_block * sb)183 void erofs_shrinker_register(struct super_block *sb)
184 {
185 	struct erofs_sb_info *sbi = EROFS_SB(sb);
186 
187 	mutex_init(&sbi->umount_mutex);
188 
189 	spin_lock(&erofs_sb_list_lock);
190 	list_add(&sbi->list, &erofs_sb_list);
191 	spin_unlock(&erofs_sb_list_lock);
192 }
193 
erofs_shrinker_unregister(struct super_block * sb)194 void erofs_shrinker_unregister(struct super_block *sb)
195 {
196 	struct erofs_sb_info *const sbi = EROFS_SB(sb);
197 
198 	mutex_lock(&sbi->umount_mutex);
199 	/* clean up all remaining workgroups in memory */
200 	erofs_shrink_workstation(sbi, ~0UL);
201 
202 	spin_lock(&erofs_sb_list_lock);
203 	list_del(&sbi->list);
204 	spin_unlock(&erofs_sb_list_lock);
205 	mutex_unlock(&sbi->umount_mutex);
206 }
207 
erofs_shrink_count(struct shrinker * shrink,struct shrink_control * sc)208 static unsigned long erofs_shrink_count(struct shrinker *shrink,
209 					struct shrink_control *sc)
210 {
211 	return atomic_long_read(&erofs_global_shrink_cnt);
212 }
213 
erofs_shrink_scan(struct shrinker * shrink,struct shrink_control * sc)214 static unsigned long erofs_shrink_scan(struct shrinker *shrink,
215 				       struct shrink_control *sc)
216 {
217 	struct erofs_sb_info *sbi;
218 	struct list_head *p;
219 
220 	unsigned long nr = sc->nr_to_scan;
221 	unsigned int run_no;
222 	unsigned long freed = 0;
223 
224 	spin_lock(&erofs_sb_list_lock);
225 	do {
226 		run_no = ++shrinker_run_no;
227 	} while (run_no == 0);
228 
229 	/* Iterate over all mounted superblocks and try to shrink them */
230 	p = erofs_sb_list.next;
231 	while (p != &erofs_sb_list) {
232 		sbi = list_entry(p, struct erofs_sb_info, list);
233 
234 		/*
235 		 * We move the ones we do to the end of the list, so we stop
236 		 * when we see one we have already done.
237 		 */
238 		if (sbi->shrinker_run_no == run_no)
239 			break;
240 
241 		if (!mutex_trylock(&sbi->umount_mutex)) {
242 			p = p->next;
243 			continue;
244 		}
245 
246 		spin_unlock(&erofs_sb_list_lock);
247 		sbi->shrinker_run_no = run_no;
248 
249 		freed += erofs_shrink_workstation(sbi, nr - freed);
250 
251 		spin_lock(&erofs_sb_list_lock);
252 		/* Get the next list element before we move this one */
253 		p = p->next;
254 
255 		/*
256 		 * Move this one to the end of the list to provide some
257 		 * fairness.
258 		 */
259 		list_move_tail(&sbi->list, &erofs_sb_list);
260 		mutex_unlock(&sbi->umount_mutex);
261 
262 		if (freed >= nr)
263 			break;
264 	}
265 	spin_unlock(&erofs_sb_list_lock);
266 	return freed;
267 }
268 
269 static struct shrinker erofs_shrinker_info = {
270 	.scan_objects = erofs_shrink_scan,
271 	.count_objects = erofs_shrink_count,
272 	.seeks = DEFAULT_SEEKS,
273 };
274 
erofs_init_shrinker(void)275 int __init erofs_init_shrinker(void)
276 {
277 	return register_shrinker(&erofs_shrinker_info);
278 }
279 
erofs_exit_shrinker(void)280 void erofs_exit_shrinker(void)
281 {
282 	unregister_shrinker(&erofs_shrinker_info);
283 }
284 #endif	/* !CONFIG_EROFS_FS_ZIP */
285 
286