1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (C) 2018 HUAWEI, Inc.
4 * https://www.huawei.com/
5 * Created by Gao Xiang <gaoxiang25@huawei.com>
6 */
7 #include "internal.h"
8 #include <linux/pagevec.h>
9
erofs_allocpage(struct list_head * pool,gfp_t gfp)10 struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp)
11 {
12 struct page *page;
13
14 if (!list_empty(pool)) {
15 page = lru_to_page(pool);
16 DBG_BUGON(page_ref_count(page) != 1);
17 list_del(&page->lru);
18 } else {
19 page = alloc_page(gfp);
20 }
21 return page;
22 }
23
24 #ifdef CONFIG_EROFS_FS_ZIP
25 /* global shrink count (for all mounted EROFS instances) */
26 static atomic_long_t erofs_global_shrink_cnt;
27
erofs_workgroup_get(struct erofs_workgroup * grp)28 static int erofs_workgroup_get(struct erofs_workgroup *grp)
29 {
30 int o;
31
32 repeat:
33 o = erofs_wait_on_workgroup_freezed(grp);
34 if (o <= 0)
35 return -1;
36
37 if (atomic_cmpxchg(&grp->refcount, o, o + 1) != o)
38 goto repeat;
39
40 /* decrease refcount paired by erofs_workgroup_put */
41 if (o == 1)
42 atomic_long_dec(&erofs_global_shrink_cnt);
43 return 0;
44 }
45
erofs_find_workgroup(struct super_block * sb,pgoff_t index)46 struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
47 pgoff_t index)
48 {
49 struct erofs_sb_info *sbi = EROFS_SB(sb);
50 struct erofs_workgroup *grp;
51
52 repeat:
53 rcu_read_lock();
54 grp = xa_load(&sbi->managed_pslots, index);
55 if (grp) {
56 if (erofs_workgroup_get(grp)) {
57 /* prefer to relax rcu read side */
58 rcu_read_unlock();
59 goto repeat;
60 }
61
62 DBG_BUGON(index != grp->index);
63 }
64 rcu_read_unlock();
65 return grp;
66 }
67
erofs_insert_workgroup(struct super_block * sb,struct erofs_workgroup * grp)68 struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb,
69 struct erofs_workgroup *grp)
70 {
71 struct erofs_sb_info *const sbi = EROFS_SB(sb);
72 struct erofs_workgroup *pre;
73
74 /*
75 * Bump up a reference count before making this visible
76 * to others for the XArray in order to avoid potential
77 * UAF without serialized by xa_lock.
78 */
79 atomic_inc(&grp->refcount);
80
81 repeat:
82 xa_lock(&sbi->managed_pslots);
83 pre = __xa_cmpxchg(&sbi->managed_pslots, grp->index,
84 NULL, grp, GFP_NOFS);
85 if (pre) {
86 if (xa_is_err(pre)) {
87 pre = ERR_PTR(xa_err(pre));
88 } else if (erofs_workgroup_get(pre)) {
89 /* try to legitimize the current in-tree one */
90 xa_unlock(&sbi->managed_pslots);
91 cond_resched();
92 goto repeat;
93 }
94 atomic_dec(&grp->refcount);
95 grp = pre;
96 }
97 xa_unlock(&sbi->managed_pslots);
98 return grp;
99 }
100
__erofs_workgroup_free(struct erofs_workgroup * grp)101 static void __erofs_workgroup_free(struct erofs_workgroup *grp)
102 {
103 atomic_long_dec(&erofs_global_shrink_cnt);
104 erofs_workgroup_free_rcu(grp);
105 }
106
erofs_workgroup_put(struct erofs_workgroup * grp)107 int erofs_workgroup_put(struct erofs_workgroup *grp)
108 {
109 int count = atomic_dec_return(&grp->refcount);
110
111 if (count == 1)
112 atomic_long_inc(&erofs_global_shrink_cnt);
113 else if (!count)
114 __erofs_workgroup_free(grp);
115 return count;
116 }
117
erofs_try_to_release_workgroup(struct erofs_sb_info * sbi,struct erofs_workgroup * grp)118 static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
119 struct erofs_workgroup *grp)
120 {
121 /*
122 * If managed cache is on, refcount of workgroups
123 * themselves could be < 0 (freezed). In other words,
124 * there is no guarantee that all refcounts > 0.
125 */
126 if (!erofs_workgroup_try_to_freeze(grp, 1))
127 return false;
128
129 /*
130 * Note that all cached pages should be unattached
131 * before deleted from the XArray. Otherwise some
132 * cached pages could be still attached to the orphan
133 * old workgroup when the new one is available in the tree.
134 */
135 if (erofs_try_to_free_all_cached_pages(sbi, grp)) {
136 erofs_workgroup_unfreeze(grp, 1);
137 return false;
138 }
139
140 /*
141 * It's impossible to fail after the workgroup is freezed,
142 * however in order to avoid some race conditions, add a
143 * DBG_BUGON to observe this in advance.
144 */
145 DBG_BUGON(__xa_erase(&sbi->managed_pslots, grp->index) != grp);
146
147 /* last refcount should be connected with its managed pslot. */
148 erofs_workgroup_unfreeze(grp, 0);
149 __erofs_workgroup_free(grp);
150 return true;
151 }
152
erofs_shrink_workstation(struct erofs_sb_info * sbi,unsigned long nr_shrink)153 static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
154 unsigned long nr_shrink)
155 {
156 struct erofs_workgroup *grp;
157 unsigned int freed = 0;
158 unsigned long index;
159
160 xa_lock(&sbi->managed_pslots);
161 xa_for_each(&sbi->managed_pslots, index, grp) {
162 /* try to shrink each valid workgroup */
163 if (!erofs_try_to_release_workgroup(sbi, grp))
164 continue;
165 xa_unlock(&sbi->managed_pslots);
166
167 ++freed;
168 if (!--nr_shrink)
169 return freed;
170 xa_lock(&sbi->managed_pslots);
171 }
172 xa_unlock(&sbi->managed_pslots);
173 return freed;
174 }
175
176 /* protected by 'erofs_sb_list_lock' */
177 static unsigned int shrinker_run_no;
178
179 /* protects the mounted 'erofs_sb_list' */
180 static DEFINE_SPINLOCK(erofs_sb_list_lock);
181 static LIST_HEAD(erofs_sb_list);
182
erofs_shrinker_register(struct super_block * sb)183 void erofs_shrinker_register(struct super_block *sb)
184 {
185 struct erofs_sb_info *sbi = EROFS_SB(sb);
186
187 mutex_init(&sbi->umount_mutex);
188
189 spin_lock(&erofs_sb_list_lock);
190 list_add(&sbi->list, &erofs_sb_list);
191 spin_unlock(&erofs_sb_list_lock);
192 }
193
erofs_shrinker_unregister(struct super_block * sb)194 void erofs_shrinker_unregister(struct super_block *sb)
195 {
196 struct erofs_sb_info *const sbi = EROFS_SB(sb);
197
198 mutex_lock(&sbi->umount_mutex);
199 /* clean up all remaining workgroups in memory */
200 erofs_shrink_workstation(sbi, ~0UL);
201
202 spin_lock(&erofs_sb_list_lock);
203 list_del(&sbi->list);
204 spin_unlock(&erofs_sb_list_lock);
205 mutex_unlock(&sbi->umount_mutex);
206 }
207
erofs_shrink_count(struct shrinker * shrink,struct shrink_control * sc)208 static unsigned long erofs_shrink_count(struct shrinker *shrink,
209 struct shrink_control *sc)
210 {
211 return atomic_long_read(&erofs_global_shrink_cnt);
212 }
213
erofs_shrink_scan(struct shrinker * shrink,struct shrink_control * sc)214 static unsigned long erofs_shrink_scan(struct shrinker *shrink,
215 struct shrink_control *sc)
216 {
217 struct erofs_sb_info *sbi;
218 struct list_head *p;
219
220 unsigned long nr = sc->nr_to_scan;
221 unsigned int run_no;
222 unsigned long freed = 0;
223
224 spin_lock(&erofs_sb_list_lock);
225 do {
226 run_no = ++shrinker_run_no;
227 } while (run_no == 0);
228
229 /* Iterate over all mounted superblocks and try to shrink them */
230 p = erofs_sb_list.next;
231 while (p != &erofs_sb_list) {
232 sbi = list_entry(p, struct erofs_sb_info, list);
233
234 /*
235 * We move the ones we do to the end of the list, so we stop
236 * when we see one we have already done.
237 */
238 if (sbi->shrinker_run_no == run_no)
239 break;
240
241 if (!mutex_trylock(&sbi->umount_mutex)) {
242 p = p->next;
243 continue;
244 }
245
246 spin_unlock(&erofs_sb_list_lock);
247 sbi->shrinker_run_no = run_no;
248
249 freed += erofs_shrink_workstation(sbi, nr - freed);
250
251 spin_lock(&erofs_sb_list_lock);
252 /* Get the next list element before we move this one */
253 p = p->next;
254
255 /*
256 * Move this one to the end of the list to provide some
257 * fairness.
258 */
259 list_move_tail(&sbi->list, &erofs_sb_list);
260 mutex_unlock(&sbi->umount_mutex);
261
262 if (freed >= nr)
263 break;
264 }
265 spin_unlock(&erofs_sb_list_lock);
266 return freed;
267 }
268
269 static struct shrinker erofs_shrinker_info = {
270 .scan_objects = erofs_shrink_scan,
271 .count_objects = erofs_shrink_count,
272 .seeks = DEFAULT_SEEKS,
273 };
274
erofs_init_shrinker(void)275 int __init erofs_init_shrinker(void)
276 {
277 return register_shrinker(&erofs_shrinker_info);
278 }
279
erofs_exit_shrinker(void)280 void erofs_exit_shrinker(void)
281 {
282 unregister_shrinker(&erofs_shrinker_info);
283 }
284 #endif /* !CONFIG_EROFS_FS_ZIP */
285
286