• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/memcontrol.h>
3 #include <linux/rwsem.h>
4 #include <linux/shrinker.h>
5 #include <linux/rculist.h>
6 #include <trace/events/vmscan.h>
7 
8 #undef CREATE_TRACE_POINTS
9 #include <trace/hooks/vmscan.h>
10 
11 #include "internal.h"
12 
13 LIST_HEAD(shrinker_list);
14 DEFINE_MUTEX(shrinker_mutex);
15 
16 #ifdef CONFIG_MEMCG
17 static int shrinker_nr_max;
18 
shrinker_unit_size(int nr_items)19 static inline int shrinker_unit_size(int nr_items)
20 {
21 	return (DIV_ROUND_UP(nr_items, SHRINKER_UNIT_BITS) * sizeof(struct shrinker_info_unit *));
22 }
23 
shrinker_unit_free(struct shrinker_info * info,int start)24 static inline void shrinker_unit_free(struct shrinker_info *info, int start)
25 {
26 	struct shrinker_info_unit **unit;
27 	int nr, i;
28 
29 	if (!info)
30 		return;
31 
32 	unit = info->unit;
33 	nr = DIV_ROUND_UP(info->map_nr_max, SHRINKER_UNIT_BITS);
34 
35 	for (i = start; i < nr; i++) {
36 		if (!unit[i])
37 			break;
38 
39 		kfree(unit[i]);
40 		unit[i] = NULL;
41 	}
42 }
43 
shrinker_unit_alloc(struct shrinker_info * new,struct shrinker_info * old,int nid)44 static inline int shrinker_unit_alloc(struct shrinker_info *new,
45 				       struct shrinker_info *old, int nid)
46 {
47 	struct shrinker_info_unit *unit;
48 	int nr = DIV_ROUND_UP(new->map_nr_max, SHRINKER_UNIT_BITS);
49 	int start = old ? DIV_ROUND_UP(old->map_nr_max, SHRINKER_UNIT_BITS) : 0;
50 	int i;
51 
52 	for (i = start; i < nr; i++) {
53 		unit = kzalloc_node(sizeof(*unit), GFP_KERNEL, nid);
54 		if (!unit) {
55 			shrinker_unit_free(new, start);
56 			return -ENOMEM;
57 		}
58 
59 		new->unit[i] = unit;
60 	}
61 
62 	return 0;
63 }
64 
free_shrinker_info(struct mem_cgroup * memcg)65 void free_shrinker_info(struct mem_cgroup *memcg)
66 {
67 	struct mem_cgroup_per_node *pn;
68 	struct shrinker_info *info;
69 	int nid;
70 
71 	for_each_node(nid) {
72 		pn = memcg->nodeinfo[nid];
73 		info = rcu_dereference_protected(pn->shrinker_info, true);
74 		shrinker_unit_free(info, 0);
75 		kvfree(info);
76 		rcu_assign_pointer(pn->shrinker_info, NULL);
77 	}
78 }
79 
alloc_shrinker_info(struct mem_cgroup * memcg)80 int alloc_shrinker_info(struct mem_cgroup *memcg)
81 {
82 	int nid, ret = 0;
83 	int array_size = 0;
84 
85 	mutex_lock(&shrinker_mutex);
86 	array_size = shrinker_unit_size(shrinker_nr_max);
87 	for_each_node(nid) {
88 		struct shrinker_info *info = kvzalloc_node(sizeof(*info) + array_size,
89 							   GFP_KERNEL, nid);
90 		if (!info)
91 			goto err;
92 		info->map_nr_max = shrinker_nr_max;
93 		if (shrinker_unit_alloc(info, NULL, nid)) {
94 			kvfree(info);
95 			goto err;
96 		}
97 		rcu_assign_pointer(memcg->nodeinfo[nid]->shrinker_info, info);
98 	}
99 	mutex_unlock(&shrinker_mutex);
100 
101 	return ret;
102 
103 err:
104 	mutex_unlock(&shrinker_mutex);
105 	free_shrinker_info(memcg);
106 	return -ENOMEM;
107 }
108 
shrinker_info_protected(struct mem_cgroup * memcg,int nid)109 static struct shrinker_info *shrinker_info_protected(struct mem_cgroup *memcg,
110 						     int nid)
111 {
112 	return rcu_dereference_protected(memcg->nodeinfo[nid]->shrinker_info,
113 					 lockdep_is_held(&shrinker_mutex));
114 }
115 
expand_one_shrinker_info(struct mem_cgroup * memcg,int new_size,int old_size,int new_nr_max)116 static int expand_one_shrinker_info(struct mem_cgroup *memcg, int new_size,
117 				    int old_size, int new_nr_max)
118 {
119 	struct shrinker_info *new, *old;
120 	struct mem_cgroup_per_node *pn;
121 	int nid;
122 
123 	for_each_node(nid) {
124 		pn = memcg->nodeinfo[nid];
125 		old = shrinker_info_protected(memcg, nid);
126 		/* Not yet online memcg */
127 		if (!old)
128 			return 0;
129 
130 		/* Already expanded this shrinker_info */
131 		if (new_nr_max <= old->map_nr_max)
132 			continue;
133 
134 		new = kvzalloc_node(sizeof(*new) + new_size, GFP_KERNEL, nid);
135 		if (!new)
136 			return -ENOMEM;
137 
138 		new->map_nr_max = new_nr_max;
139 
140 		memcpy(new->unit, old->unit, old_size);
141 		if (shrinker_unit_alloc(new, old, nid)) {
142 			kvfree(new);
143 			return -ENOMEM;
144 		}
145 
146 		rcu_assign_pointer(pn->shrinker_info, new);
147 		kvfree_rcu(old, rcu);
148 	}
149 
150 	return 0;
151 }
152 
expand_shrinker_info(int new_id)153 static int expand_shrinker_info(int new_id)
154 {
155 	int ret = 0;
156 	int new_nr_max = round_up(new_id + 1, SHRINKER_UNIT_BITS);
157 	int new_size, old_size = 0;
158 	struct mem_cgroup *memcg;
159 
160 	if (!root_mem_cgroup)
161 		goto out;
162 
163 	lockdep_assert_held(&shrinker_mutex);
164 
165 	new_size = shrinker_unit_size(new_nr_max);
166 	old_size = shrinker_unit_size(shrinker_nr_max);
167 
168 	memcg = mem_cgroup_iter(NULL, NULL, NULL);
169 	do {
170 		ret = expand_one_shrinker_info(memcg, new_size, old_size,
171 					       new_nr_max);
172 		if (ret) {
173 			mem_cgroup_iter_break(NULL, memcg);
174 			goto out;
175 		}
176 	} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL);
177 out:
178 	if (!ret)
179 		shrinker_nr_max = new_nr_max;
180 
181 	return ret;
182 }
183 
shrinker_id_to_index(int shrinker_id)184 static inline int shrinker_id_to_index(int shrinker_id)
185 {
186 	return shrinker_id / SHRINKER_UNIT_BITS;
187 }
188 
shrinker_id_to_offset(int shrinker_id)189 static inline int shrinker_id_to_offset(int shrinker_id)
190 {
191 	return shrinker_id % SHRINKER_UNIT_BITS;
192 }
193 
calc_shrinker_id(int index,int offset)194 static inline int calc_shrinker_id(int index, int offset)
195 {
196 	return index * SHRINKER_UNIT_BITS + offset;
197 }
198 
set_shrinker_bit(struct mem_cgroup * memcg,int nid,int shrinker_id)199 void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id)
200 {
201 	if (shrinker_id >= 0 && memcg && !mem_cgroup_is_root(memcg)) {
202 		struct shrinker_info *info;
203 		struct shrinker_info_unit *unit;
204 
205 		rcu_read_lock();
206 		info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info);
207 		unit = info->unit[shrinker_id_to_index(shrinker_id)];
208 		if (!WARN_ON_ONCE(shrinker_id >= info->map_nr_max)) {
209 			/* Pairs with smp mb in shrink_slab() */
210 			smp_mb__before_atomic();
211 			set_bit(shrinker_id_to_offset(shrinker_id), unit->map);
212 		}
213 		rcu_read_unlock();
214 	}
215 }
216 
217 static DEFINE_IDR(shrinker_idr);
218 
shrinker_memcg_alloc(struct shrinker * shrinker)219 static int shrinker_memcg_alloc(struct shrinker *shrinker)
220 {
221 	int id, ret = -ENOMEM;
222 
223 	if (mem_cgroup_disabled())
224 		return -ENOSYS;
225 
226 	mutex_lock(&shrinker_mutex);
227 	id = idr_alloc(&shrinker_idr, shrinker, 0, 0, GFP_KERNEL);
228 	if (id < 0)
229 		goto unlock;
230 
231 	if (id >= shrinker_nr_max) {
232 		if (expand_shrinker_info(id)) {
233 			idr_remove(&shrinker_idr, id);
234 			goto unlock;
235 		}
236 	}
237 	shrinker->id = id;
238 	ret = 0;
239 unlock:
240 	mutex_unlock(&shrinker_mutex);
241 	return ret;
242 }
243 
shrinker_memcg_remove(struct shrinker * shrinker)244 static void shrinker_memcg_remove(struct shrinker *shrinker)
245 {
246 	int id = shrinker->id;
247 
248 	BUG_ON(id < 0);
249 
250 	lockdep_assert_held(&shrinker_mutex);
251 
252 	idr_remove(&shrinker_idr, id);
253 }
254 
xchg_nr_deferred_memcg(int nid,struct shrinker * shrinker,struct mem_cgroup * memcg)255 static long xchg_nr_deferred_memcg(int nid, struct shrinker *shrinker,
256 				   struct mem_cgroup *memcg)
257 {
258 	struct shrinker_info *info;
259 	struct shrinker_info_unit *unit;
260 	long nr_deferred;
261 
262 	rcu_read_lock();
263 	info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info);
264 	unit = info->unit[shrinker_id_to_index(shrinker->id)];
265 	nr_deferred = atomic_long_xchg(&unit->nr_deferred[shrinker_id_to_offset(shrinker->id)], 0);
266 	rcu_read_unlock();
267 
268 	return nr_deferred;
269 }
270 
add_nr_deferred_memcg(long nr,int nid,struct shrinker * shrinker,struct mem_cgroup * memcg)271 static long add_nr_deferred_memcg(long nr, int nid, struct shrinker *shrinker,
272 				  struct mem_cgroup *memcg)
273 {
274 	struct shrinker_info *info;
275 	struct shrinker_info_unit *unit;
276 	long nr_deferred;
277 
278 	rcu_read_lock();
279 	info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info);
280 	unit = info->unit[shrinker_id_to_index(shrinker->id)];
281 	nr_deferred =
282 		atomic_long_add_return(nr, &unit->nr_deferred[shrinker_id_to_offset(shrinker->id)]);
283 	rcu_read_unlock();
284 
285 	return nr_deferred;
286 }
287 
reparent_shrinker_deferred(struct mem_cgroup * memcg)288 void reparent_shrinker_deferred(struct mem_cgroup *memcg)
289 {
290 	int nid, index, offset;
291 	long nr;
292 	struct mem_cgroup *parent;
293 	struct shrinker_info *child_info, *parent_info;
294 	struct shrinker_info_unit *child_unit, *parent_unit;
295 
296 	parent = parent_mem_cgroup(memcg);
297 	if (!parent)
298 		parent = root_mem_cgroup;
299 
300 	/* Prevent from concurrent shrinker_info expand */
301 	mutex_lock(&shrinker_mutex);
302 	for_each_node(nid) {
303 		child_info = shrinker_info_protected(memcg, nid);
304 		parent_info = shrinker_info_protected(parent, nid);
305 		for (index = 0; index < shrinker_id_to_index(child_info->map_nr_max); index++) {
306 			child_unit = child_info->unit[index];
307 			parent_unit = parent_info->unit[index];
308 			for (offset = 0; offset < SHRINKER_UNIT_BITS; offset++) {
309 				nr = atomic_long_read(&child_unit->nr_deferred[offset]);
310 				atomic_long_add(nr, &parent_unit->nr_deferred[offset]);
311 			}
312 		}
313 	}
314 	mutex_unlock(&shrinker_mutex);
315 }
316 #else
shrinker_memcg_alloc(struct shrinker * shrinker)317 static int shrinker_memcg_alloc(struct shrinker *shrinker)
318 {
319 	return -ENOSYS;
320 }
321 
shrinker_memcg_remove(struct shrinker * shrinker)322 static void shrinker_memcg_remove(struct shrinker *shrinker)
323 {
324 }
325 
xchg_nr_deferred_memcg(int nid,struct shrinker * shrinker,struct mem_cgroup * memcg)326 static long xchg_nr_deferred_memcg(int nid, struct shrinker *shrinker,
327 				   struct mem_cgroup *memcg)
328 {
329 	return 0;
330 }
331 
add_nr_deferred_memcg(long nr,int nid,struct shrinker * shrinker,struct mem_cgroup * memcg)332 static long add_nr_deferred_memcg(long nr, int nid, struct shrinker *shrinker,
333 				  struct mem_cgroup *memcg)
334 {
335 	return 0;
336 }
337 #endif /* CONFIG_MEMCG */
338 
xchg_nr_deferred(struct shrinker * shrinker,struct shrink_control * sc)339 static long xchg_nr_deferred(struct shrinker *shrinker,
340 			     struct shrink_control *sc)
341 {
342 	int nid = sc->nid;
343 
344 	if (!(shrinker->flags & SHRINKER_NUMA_AWARE))
345 		nid = 0;
346 
347 	if (sc->memcg &&
348 	    (shrinker->flags & SHRINKER_MEMCG_AWARE))
349 		return xchg_nr_deferred_memcg(nid, shrinker,
350 					      sc->memcg);
351 
352 	return atomic_long_xchg(&shrinker->nr_deferred[nid], 0);
353 }
354 
355 
add_nr_deferred(long nr,struct shrinker * shrinker,struct shrink_control * sc)356 static long add_nr_deferred(long nr, struct shrinker *shrinker,
357 			    struct shrink_control *sc)
358 {
359 	int nid = sc->nid;
360 
361 	if (!(shrinker->flags & SHRINKER_NUMA_AWARE))
362 		nid = 0;
363 
364 	if (sc->memcg &&
365 	    (shrinker->flags & SHRINKER_MEMCG_AWARE))
366 		return add_nr_deferred_memcg(nr, nid, shrinker,
367 					     sc->memcg);
368 
369 	return atomic_long_add_return(nr, &shrinker->nr_deferred[nid]);
370 }
371 
372 #define SHRINK_BATCH 128
373 
do_shrink_slab(struct shrink_control * shrinkctl,struct shrinker * shrinker,int priority)374 static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
375 				    struct shrinker *shrinker, int priority)
376 {
377 	unsigned long freed = 0;
378 	unsigned long long delta;
379 	long total_scan;
380 	long freeable;
381 	long nr;
382 	long new_nr;
383 	long batch_size = shrinker->batch ? shrinker->batch
384 					  : SHRINK_BATCH;
385 	long scanned = 0, next_deferred;
386 
387 	freeable = shrinker->count_objects(shrinker, shrinkctl);
388 	trace_android_vh_do_shrink_slab(shrinker, &freeable);
389 	trace_android_vh_do_shrink_slab_ex(shrinkctl, shrinker, &freeable, priority);
390 	if (freeable == 0 || freeable == SHRINK_EMPTY)
391 		return freeable;
392 
393 	/*
394 	 * copy the current shrinker scan count into a local variable
395 	 * and zero it so that other concurrent shrinker invocations
396 	 * don't also do this scanning work.
397 	 */
398 	nr = xchg_nr_deferred(shrinker, shrinkctl);
399 
400 	if (shrinker->seeks) {
401 		delta = freeable >> priority;
402 		delta *= 4;
403 		do_div(delta, shrinker->seeks);
404 	} else {
405 		/*
406 		 * These objects don't require any IO to create. Trim
407 		 * them aggressively under memory pressure to keep
408 		 * them from causing refetches in the IO caches.
409 		 */
410 		delta = freeable / 2;
411 	}
412 
413 	total_scan = nr >> priority;
414 	total_scan += delta;
415 	total_scan = min(total_scan, (2 * freeable));
416 
417 	trace_mm_shrink_slab_start(shrinker, shrinkctl, nr,
418 				   freeable, delta, total_scan, priority);
419 
420 	/*
421 	 * Normally, we should not scan less than batch_size objects in one
422 	 * pass to avoid too frequent shrinker calls, but if the slab has less
423 	 * than batch_size objects in total and we are really tight on memory,
424 	 * we will try to reclaim all available objects, otherwise we can end
425 	 * up failing allocations although there are plenty of reclaimable
426 	 * objects spread over several slabs with usage less than the
427 	 * batch_size.
428 	 *
429 	 * We detect the "tight on memory" situations by looking at the total
430 	 * number of objects we want to scan (total_scan). If it is greater
431 	 * than the total number of objects on slab (freeable), we must be
432 	 * scanning at high prio and therefore should try to reclaim as much as
433 	 * possible.
434 	 */
435 	while (total_scan >= batch_size ||
436 	       total_scan >= freeable) {
437 		unsigned long ret;
438 		unsigned long nr_to_scan = min(batch_size, total_scan);
439 
440 		shrinkctl->nr_to_scan = nr_to_scan;
441 		shrinkctl->nr_scanned = nr_to_scan;
442 		ret = shrinker->scan_objects(shrinker, shrinkctl);
443 		if (ret == SHRINK_STOP)
444 			break;
445 		freed += ret;
446 
447 		count_vm_events(SLABS_SCANNED, shrinkctl->nr_scanned);
448 		total_scan -= shrinkctl->nr_scanned;
449 		scanned += shrinkctl->nr_scanned;
450 
451 		cond_resched();
452 	}
453 
454 	/*
455 	 * The deferred work is increased by any new work (delta) that wasn't
456 	 * done, decreased by old deferred work that was done now.
457 	 *
458 	 * And it is capped to two times of the freeable items.
459 	 */
460 	next_deferred = max_t(long, (nr + delta - scanned), 0);
461 	next_deferred = min(next_deferred, (2 * freeable));
462 
463 	/*
464 	 * move the unused scan count back into the shrinker in a
465 	 * manner that handles concurrent updates.
466 	 */
467 	new_nr = add_nr_deferred(next_deferred, shrinker, shrinkctl);
468 
469 	trace_mm_shrink_slab_end(shrinker, shrinkctl->nid, freed, nr, new_nr, total_scan);
470 	return freed;
471 }
472 
473 #ifdef CONFIG_MEMCG
shrink_slab_memcg(gfp_t gfp_mask,int nid,struct mem_cgroup * memcg,int priority)474 static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
475 			struct mem_cgroup *memcg, int priority)
476 {
477 	struct shrinker_info *info;
478 	unsigned long ret, freed = 0;
479 	int offset, index = 0;
480 
481 	if (!mem_cgroup_online(memcg))
482 		return 0;
483 
484 	/*
485 	 * lockless algorithm of memcg shrink.
486 	 *
487 	 * The shrinker_info may be freed asynchronously via RCU in the
488 	 * expand_one_shrinker_info(), so the rcu_read_lock() needs to be used
489 	 * to ensure the existence of the shrinker_info.
490 	 *
491 	 * The shrinker_info_unit is never freed unless its corresponding memcg
492 	 * is destroyed. Here we already hold the refcount of memcg, so the
493 	 * memcg will not be destroyed, and of course shrinker_info_unit will
494 	 * not be freed.
495 	 *
496 	 * So in the memcg shrink:
497 	 *  step 1: use rcu_read_lock() to guarantee existence of the
498 	 *          shrinker_info.
499 	 *  step 2: after getting shrinker_info_unit we can safely release the
500 	 *          RCU lock.
501 	 *  step 3: traverse the bitmap and calculate shrinker_id
502 	 *  step 4: use rcu_read_lock() to guarantee existence of the shrinker.
503 	 *  step 5: use shrinker_id to find the shrinker, then use
504 	 *          shrinker_try_get() to guarantee existence of the shrinker,
505 	 *          then we can release the RCU lock to do do_shrink_slab() that
506 	 *          may sleep.
507 	 *  step 6: do shrinker_put() paired with step 5 to put the refcount,
508 	 *          if the refcount reaches 0, then wake up the waiter in
509 	 *          shrinker_free() by calling complete().
510 	 *          Note: here is different from the global shrink, we don't
511 	 *                need to acquire the RCU lock to guarantee existence of
512 	 *                the shrinker, because we don't need to use this
513 	 *                shrinker to traverse the next shrinker in the bitmap.
514 	 *  step 7: we have already exited the read-side of rcu critical section
515 	 *          before calling do_shrink_slab(), the shrinker_info may be
516 	 *          released in expand_one_shrinker_info(), so go back to step 1
517 	 *          to reacquire the shrinker_info.
518 	 */
519 again:
520 	rcu_read_lock();
521 	info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info);
522 	if (unlikely(!info))
523 		goto unlock;
524 
525 	if (index < shrinker_id_to_index(info->map_nr_max)) {
526 		struct shrinker_info_unit *unit;
527 
528 		unit = info->unit[index];
529 
530 		rcu_read_unlock();
531 
532 		for_each_set_bit(offset, unit->map, SHRINKER_UNIT_BITS) {
533 			struct shrink_control sc = {
534 				.gfp_mask = gfp_mask,
535 				.nid = nid,
536 				.memcg = memcg,
537 			};
538 			struct shrinker *shrinker;
539 			int shrinker_id = calc_shrinker_id(index, offset);
540 
541 			rcu_read_lock();
542 			shrinker = idr_find(&shrinker_idr, shrinker_id);
543 			if (unlikely(!shrinker || !shrinker_try_get(shrinker))) {
544 				clear_bit(offset, unit->map);
545 				rcu_read_unlock();
546 				continue;
547 			}
548 			rcu_read_unlock();
549 
550 			/* Call non-slab shrinkers even though kmem is disabled */
551 			if (!memcg_kmem_online() &&
552 			    !(shrinker->flags & SHRINKER_NONSLAB))
553 				continue;
554 
555 			ret = do_shrink_slab(&sc, shrinker, priority);
556 			if (ret == SHRINK_EMPTY) {
557 				clear_bit(offset, unit->map);
558 				/*
559 				 * After the shrinker reported that it had no objects to
560 				 * free, but before we cleared the corresponding bit in
561 				 * the memcg shrinker map, a new object might have been
562 				 * added. To make sure, we have the bit set in this
563 				 * case, we invoke the shrinker one more time and reset
564 				 * the bit if it reports that it is not empty anymore.
565 				 * The memory barrier here pairs with the barrier in
566 				 * set_shrinker_bit():
567 				 *
568 				 * list_lru_add()     shrink_slab_memcg()
569 				 *   list_add_tail()    clear_bit()
570 				 *   <MB>               <MB>
571 				 *   set_bit()          do_shrink_slab()
572 				 */
573 				smp_mb__after_atomic();
574 				ret = do_shrink_slab(&sc, shrinker, priority);
575 				if (ret == SHRINK_EMPTY)
576 					ret = 0;
577 				else
578 					set_shrinker_bit(memcg, nid, shrinker_id);
579 			}
580 			freed += ret;
581 			shrinker_put(shrinker);
582 		}
583 
584 		index++;
585 		goto again;
586 	}
587 unlock:
588 	rcu_read_unlock();
589 	return freed;
590 }
591 #else /* !CONFIG_MEMCG */
shrink_slab_memcg(gfp_t gfp_mask,int nid,struct mem_cgroup * memcg,int priority)592 static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
593 			struct mem_cgroup *memcg, int priority)
594 {
595 	return 0;
596 }
597 #endif /* CONFIG_MEMCG */
598 
599 /**
600  * shrink_slab - shrink slab caches
601  * @gfp_mask: allocation context
602  * @nid: node whose slab caches to target
603  * @memcg: memory cgroup whose slab caches to target
604  * @priority: the reclaim priority
605  *
606  * Call the shrink functions to age shrinkable caches.
607  *
608  * @nid is passed along to shrinkers with SHRINKER_NUMA_AWARE set,
609  * unaware shrinkers will receive a node id of 0 instead.
610  *
611  * @memcg specifies the memory cgroup to target. Unaware shrinkers
612  * are called only if it is the root cgroup.
613  *
614  * @priority is sc->priority, we take the number of objects and >> by priority
615  * in order to get the scan target.
616  *
617  * Returns the number of reclaimed slab objects.
618  */
shrink_slab(gfp_t gfp_mask,int nid,struct mem_cgroup * memcg,int priority)619 unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg,
620 			  int priority)
621 {
622 	unsigned long ret, freed = 0;
623 	struct shrinker *shrinker;
624 	bool bypass = false;
625 
626 	trace_android_vh_shrink_slab_bypass(gfp_mask, nid, memcg, priority, &bypass);
627 	if (bypass)
628 		return 0;
629 
630 	/*
631 	 * The root memcg might be allocated even though memcg is disabled
632 	 * via "cgroup_disable=memory" boot parameter.  This could make
633 	 * mem_cgroup_is_root() return false, then just run memcg slab
634 	 * shrink, but skip global shrink.  This may result in premature
635 	 * oom.
636 	 */
637 	if (!mem_cgroup_disabled() && !mem_cgroup_is_root(memcg))
638 		return shrink_slab_memcg(gfp_mask, nid, memcg, priority);
639 
640 	/*
641 	 * lockless algorithm of global shrink.
642 	 *
643 	 * In the unregistration setp, the shrinker will be freed asynchronously
644 	 * via RCU after its refcount reaches 0. So both rcu_read_lock() and
645 	 * shrinker_try_get() can be used to ensure the existence of the shrinker.
646 	 *
647 	 * So in the global shrink:
648 	 *  step 1: use rcu_read_lock() to guarantee existence of the shrinker
649 	 *          and the validity of the shrinker_list walk.
650 	 *  step 2: use shrinker_try_get() to try get the refcount, if successful,
651 	 *          then the existence of the shrinker can also be guaranteed,
652 	 *          so we can release the RCU lock to do do_shrink_slab() that
653 	 *          may sleep.
654 	 *  step 3: *MUST* to reacquire the RCU lock before calling shrinker_put(),
655 	 *          which ensures that neither this shrinker nor the next shrinker
656 	 *          will be freed in the next traversal operation.
657 	 *  step 4: do shrinker_put() paired with step 2 to put the refcount,
658 	 *          if the refcount reaches 0, then wake up the waiter in
659 	 *          shrinker_free() by calling complete().
660 	 */
661 	rcu_read_lock();
662 	list_for_each_entry_rcu(shrinker, &shrinker_list, list) {
663 		struct shrink_control sc = {
664 			.gfp_mask = gfp_mask,
665 			.nid = nid,
666 			.memcg = memcg,
667 		};
668 
669 		if (!shrinker_try_get(shrinker))
670 			continue;
671 
672 		rcu_read_unlock();
673 
674 		ret = do_shrink_slab(&sc, shrinker, priority);
675 		if (ret == SHRINK_EMPTY)
676 			ret = 0;
677 		freed += ret;
678 
679 		rcu_read_lock();
680 		shrinker_put(shrinker);
681 	}
682 
683 	rcu_read_unlock();
684 	cond_resched();
685 	return freed;
686 }
687 EXPORT_SYMBOL_GPL(shrink_slab);
688 
shrinker_alloc(unsigned int flags,const char * fmt,...)689 struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...)
690 {
691 	struct shrinker *shrinker;
692 	unsigned int size;
693 	va_list ap;
694 	int err;
695 
696 	shrinker = kzalloc(sizeof(struct shrinker), GFP_KERNEL);
697 	if (!shrinker)
698 		return NULL;
699 
700 	va_start(ap, fmt);
701 	err = shrinker_debugfs_name_alloc(shrinker, fmt, ap);
702 	va_end(ap);
703 	if (err)
704 		goto err_name;
705 
706 	shrinker->flags = flags | SHRINKER_ALLOCATED;
707 	shrinker->seeks = DEFAULT_SEEKS;
708 
709 	if (flags & SHRINKER_MEMCG_AWARE) {
710 		err = shrinker_memcg_alloc(shrinker);
711 		if (err == -ENOSYS) {
712 			/* Memcg is not supported, fallback to non-memcg-aware shrinker. */
713 			shrinker->flags &= ~SHRINKER_MEMCG_AWARE;
714 			goto non_memcg;
715 		}
716 
717 		if (err)
718 			goto err_flags;
719 
720 		return shrinker;
721 	}
722 
723 non_memcg:
724 	/*
725 	 * The nr_deferred is available on per memcg level for memcg aware
726 	 * shrinkers, so only allocate nr_deferred in the following cases:
727 	 *  - non-memcg-aware shrinkers
728 	 *  - !CONFIG_MEMCG
729 	 *  - memcg is disabled by kernel command line
730 	 */
731 	size = sizeof(*shrinker->nr_deferred);
732 	if (flags & SHRINKER_NUMA_AWARE)
733 		size *= nr_node_ids;
734 
735 	shrinker->nr_deferred = kzalloc(size, GFP_KERNEL);
736 	if (!shrinker->nr_deferred)
737 		goto err_flags;
738 
739 	return shrinker;
740 
741 err_flags:
742 	shrinker_debugfs_name_free(shrinker);
743 err_name:
744 	kfree(shrinker);
745 	return NULL;
746 }
747 EXPORT_SYMBOL_GPL(shrinker_alloc);
748 
shrinker_register(struct shrinker * shrinker)749 void shrinker_register(struct shrinker *shrinker)
750 {
751 	if (unlikely(!(shrinker->flags & SHRINKER_ALLOCATED))) {
752 		pr_warn("Must use shrinker_alloc() to dynamically allocate the shrinker");
753 		return;
754 	}
755 
756 	mutex_lock(&shrinker_mutex);
757 	list_add_tail_rcu(&shrinker->list, &shrinker_list);
758 	shrinker->flags |= SHRINKER_REGISTERED;
759 	shrinker_debugfs_add(shrinker);
760 	mutex_unlock(&shrinker_mutex);
761 
762 	init_completion(&shrinker->done);
763 	/*
764 	 * Now the shrinker is fully set up, take the first reference to it to
765 	 * indicate that lookup operations are now allowed to use it via
766 	 * shrinker_try_get().
767 	 */
768 	refcount_set(&shrinker->refcount, 1);
769 }
770 EXPORT_SYMBOL_GPL(shrinker_register);
771 
shrinker_free_rcu_cb(struct rcu_head * head)772 static void shrinker_free_rcu_cb(struct rcu_head *head)
773 {
774 	struct shrinker *shrinker = container_of(head, struct shrinker, rcu);
775 
776 	kfree(shrinker->nr_deferred);
777 	kfree(shrinker);
778 }
779 
shrinker_free(struct shrinker * shrinker)780 void shrinker_free(struct shrinker *shrinker)
781 {
782 	struct dentry *debugfs_entry = NULL;
783 	int debugfs_id;
784 
785 	if (!shrinker)
786 		return;
787 
788 	if (shrinker->flags & SHRINKER_REGISTERED) {
789 		/* drop the initial refcount */
790 		shrinker_put(shrinker);
791 		/*
792 		 * Wait for all lookups of the shrinker to complete, after that,
793 		 * no shrinker is running or will run again, then we can safely
794 		 * free it asynchronously via RCU and safely free the structure
795 		 * where the shrinker is located, such as super_block etc.
796 		 */
797 		wait_for_completion(&shrinker->done);
798 	}
799 
800 	mutex_lock(&shrinker_mutex);
801 	if (shrinker->flags & SHRINKER_REGISTERED) {
802 		/*
803 		 * Now we can safely remove it from the shrinker_list and then
804 		 * free it.
805 		 */
806 		list_del_rcu(&shrinker->list);
807 		debugfs_entry = shrinker_debugfs_detach(shrinker, &debugfs_id);
808 		shrinker->flags &= ~SHRINKER_REGISTERED;
809 	}
810 
811 	shrinker_debugfs_name_free(shrinker);
812 
813 	if (shrinker->flags & SHRINKER_MEMCG_AWARE)
814 		shrinker_memcg_remove(shrinker);
815 	mutex_unlock(&shrinker_mutex);
816 
817 	if (debugfs_entry)
818 		shrinker_debugfs_remove(debugfs_entry, debugfs_id);
819 
820 	call_rcu(&shrinker->rcu, shrinker_free_rcu_cb);
821 }
822 EXPORT_SYMBOL_GPL(shrinker_free);
823