• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * mm/memcg_control.c
4  *
5  * Copyright (c) 2020-2022 Huawei Technologies Co., Ltd.
6  */
7 #include <linux/memcontrol.h>
8 #include <linux/types.h>
9 #include <linux/cgroup-defs.h>
10 #include <linux/cgroup.h>
11 #include <linux/zswapd.h>
12 #include "internal.h"
13 
14 #include "zswapd_internal.h"
15 
16 #ifdef CONFIG_HYPERHOLD_MEMCG
17 
18 struct list_head score_head;
19 bool score_head_inited;
20 DEFINE_RWLOCK(score_list_lock);
21 DEFINE_MUTEX(reclaim_para_lock);
22 
23 /**
24  * get_next_memcg - iterate over memory cgroup score_list
25  * @prev: previously returned memcg, NULL on first invocation
26  *
27  * Returns references to the next memg on score_list of @prev,
28  * or %NULL after a full round-trip.
29  *
30  * Caller must pass the return value in @prev on subsequent
31  * invocations for reference counting, or use get_next_memcg_break()
32  * to cancel a walk before the round-trip is complete.
33  */
get_next_memcg(struct mem_cgroup * prev)34 struct mem_cgroup *get_next_memcg(struct mem_cgroup *prev)
35 {
36 	struct mem_cgroup *memcg = NULL;
37 	struct list_head *pos = NULL;
38 	unsigned long flags;
39 
40 	if (unlikely(!score_head_inited))
41 		return NULL;
42 
43 	read_lock_irqsave(&score_list_lock, flags);
44 
45 	if (unlikely(!prev))
46 		pos = &score_head;
47 	else
48 		pos = &(prev->score_node);
49 
50 	if (list_empty(pos)) /* deleted node */
51 		goto unlock;
52 
53 	if (pos->next == &score_head)
54 		goto unlock;
55 
56 	memcg = list_entry(pos->next,
57 			struct mem_cgroup, score_node);
58 
59 	if (!css_tryget(&memcg->css))
60 		memcg = NULL;
61 
62 unlock:
63 	read_unlock_irqrestore(&score_list_lock, flags);
64 
65 	if (prev)
66 		css_put(&prev->css);
67 
68 	return memcg;
69 }
70 
get_next_memcg_break(struct mem_cgroup * memcg)71 void get_next_memcg_break(struct mem_cgroup *memcg)
72 {
73 	if (memcg)
74 		css_put(&memcg->css);
75 }
76 
get_prev_memcg(struct mem_cgroup * next)77 struct mem_cgroup *get_prev_memcg(struct mem_cgroup *next)
78 {
79 	struct mem_cgroup *memcg = NULL;
80 	struct list_head *pos = NULL;
81 	unsigned long flags;
82 
83 	if (unlikely(!score_head_inited))
84 		return NULL;
85 
86 	read_lock_irqsave(&score_list_lock, flags);
87 
88 	if (unlikely(!next))
89 		pos = &score_head;
90 	else
91 		pos = &next->score_node;
92 
93 	if (list_empty(pos)) /* deleted node */
94 		goto unlock;
95 
96 	if (pos->prev == &score_head)
97 		goto unlock;
98 
99 	memcg = list_entry(pos->prev,
100 			struct mem_cgroup, score_node);
101 
102 	if (unlikely(!memcg))
103 		goto unlock;
104 
105 	if (!css_tryget(&memcg->css))
106 		memcg = NULL;
107 
108 unlock:
109 	read_unlock_irqrestore(&score_list_lock, flags);
110 
111 	if (next)
112 		css_put(&next->css);
113 	return memcg;
114 }
115 
get_prev_memcg_break(struct mem_cgroup * memcg)116 void get_prev_memcg_break(struct mem_cgroup *memcg)
117 {
118 	if (memcg)
119 		css_put(&memcg->css);
120 }
121 
memcg_app_score_update(struct mem_cgroup * target)122 void memcg_app_score_update(struct mem_cgroup *target)
123 {
124 	struct list_head *pos = NULL;
125 	struct list_head *tmp;
126 	unsigned long flags;
127 
128 	write_lock_irqsave(&score_list_lock, flags);
129 	list_for_each_prev_safe(pos, tmp, &score_head) {
130 		struct mem_cgroup *memcg = list_entry(pos,
131 				struct mem_cgroup, score_node);
132 		if (atomic64_read(&memcg->memcg_reclaimed.app_score) <
133 			atomic64_read(&target->memcg_reclaimed.app_score))
134 			break;
135 	}
136 	list_move_tail(&target->score_node, pos);
137 	write_unlock_irqrestore(&score_list_lock, flags);
138 }
139 
mem_cgroup_app_score_read(struct cgroup_subsys_state * css,struct cftype * cft)140 static u64 mem_cgroup_app_score_read(struct cgroup_subsys_state *css,
141 				struct cftype *cft)
142 {
143 	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
144 
145 	return atomic64_read(&memcg->memcg_reclaimed.app_score);
146 }
147 
mem_cgroup_app_score_write(struct cgroup_subsys_state * css,struct cftype * cft,u64 val)148 static int mem_cgroup_app_score_write(struct cgroup_subsys_state *css,
149 				struct cftype *cft, u64 val)
150 {
151 	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
152 
153 	if (val > MAX_APP_SCORE)
154 		return -EINVAL;
155 
156 	if (atomic64_read(&memcg->memcg_reclaimed.app_score) != val) {
157 		atomic64_set(&memcg->memcg_reclaimed.app_score, val);
158 		memcg_app_score_update(memcg);
159 	}
160 
161 	return 0;
162 }
163 
move_pages_to_page_list(struct lruvec * lruvec,enum lru_list lru,struct list_head * page_list)164 static unsigned long move_pages_to_page_list(struct lruvec *lruvec, enum lru_list lru,
165 					     struct list_head *page_list)
166 {
167 	struct list_head *src = &lruvec->lists[lru];
168 	unsigned long nr_isolated = 0;
169 	struct page *page;
170 
171 	while (!list_empty(src)) {
172 		page = lru_to_page(src);
173 
174 		if (PageUnevictable(page))
175 			continue;
176 
177 		if (likely(get_page_unless_zero(page))) {
178 			if (isolate_lru_page(page)) {
179 				put_page(page);
180 				continue;
181 			}
182 			put_page(page);
183 
184 		} else {
185 			continue;
186 		}
187 
188 
189 		if (PageUnevictable(page)) {
190 			putback_lru_page(page);
191 			continue;
192 		}
193 
194 		if (PageAnon(page) && !PageSwapBacked(page)) {
195 			putback_lru_page(page);
196 			continue;
197 		}
198 
199 		list_add(&page->lru, page_list);
200 		nr_isolated++;
201 	}
202 
203 	return nr_isolated;
204 }
205 
206 
reclaim_all_anon_memcg(struct pglist_data * pgdat,struct mem_cgroup * memcg)207 unsigned long reclaim_all_anon_memcg(struct pglist_data *pgdat, struct mem_cgroup *memcg)
208 {
209 	struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
210 	unsigned long nr_reclaimed;
211 	LIST_HEAD(page_list);
212 	struct page *page;
213 	struct reclaim_stat stat = {};
214 	struct scan_control sc = {
215 		.gfp_mask = GFP_KERNEL,
216 		.may_writepage = 1,
217 		.may_unmap = 1,
218 		.may_swap = 1,
219 	};
220 
221 #ifdef CONFIG_RECLAIM_ACCT
222 	reclaimacct_substage_start(RA_SHRINKANON);
223 #endif
224 	count_vm_event(FREEZE_RECLAIME_COUNT);
225 	move_pages_to_page_list(lruvec, LRU_INACTIVE_ANON, &page_list);
226 
227 	nr_reclaimed = shrink_page_list(&page_list, pgdat, &sc, &stat, true);
228 	count_vm_event(FREEZE_RECLAIMED);
229 
230 	while (!list_empty(&page_list)) {
231 		page = lru_to_page(&page_list);
232 		list_del(&page->lru);
233 		putback_lru_page(page);
234 	}
235 
236 #ifdef CONFIG_RECLAIM_ACCT
237 	reclaimacct_substage_end(RA_SHRINKANON, nr_reclaimed, NULL);
238 #endif
239 
240 	return nr_reclaimed;
241 }
242 
memcg_force_shrink_anon(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)243 static ssize_t memcg_force_shrink_anon(struct kernfs_open_file *of,
244 				   char *buf, size_t nbytes,
245 				   loff_t off)
246 {
247 	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
248 	struct pglist_data *pgdat;
249 	int nid;
250 
251 	for_each_online_node(nid) {
252 		pgdat = NODE_DATA(nid);
253 		reclaim_all_anon_memcg(pgdat, memcg);
254 	}
255 
256 	return nbytes;
257 }
258 
memcg_name_show(struct seq_file * m,void * v)259 static int memcg_name_show(struct seq_file *m, void *v)
260 {
261 	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
262 
263 	seq_printf(m, "%s\n", memcg->name);
264 	return 0;
265 }
266 
memcg_name_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)267 static ssize_t memcg_name_write(struct kernfs_open_file *of, char *buf,
268 				     size_t nbytes, loff_t off)
269 {
270 	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
271 
272 	buf = strstrip(buf);
273 	if (nbytes >= MEM_CGROUP_NAME_MAX_LEN)
274 		return -EINVAL;
275 
276 	mutex_lock(&reclaim_para_lock);
277 	if (memcg)
278 		strcpy(memcg->name, buf);
279 	mutex_unlock(&reclaim_para_lock);
280 
281 	return nbytes;
282 }
283 
memcg_total_info_per_app_show(struct seq_file * m,void * v)284 static int memcg_total_info_per_app_show(struct seq_file *m, void *v)
285 {
286 	struct mem_cgroup *memcg = NULL;
287 	struct mem_cgroup_per_node *mz = NULL;
288 	struct lruvec *lruvec = NULL;
289 	unsigned long anon_size;
290 	unsigned long zram_compress_size;
291 	unsigned long eswap_compress_size;
292 
293 
294 	while ((memcg = get_next_memcg(memcg))) {
295 		mz = mem_cgroup_nodeinfo(memcg, 0);
296 		if (!mz) {
297 			get_next_memcg_break(memcg);
298 			return 0;
299 		}
300 
301 		lruvec = &mz->lruvec;
302 		if (!lruvec) {
303 			get_next_memcg_break(memcg);
304 			return 0;
305 		}
306 
307 		anon_size = lruvec_lru_size(lruvec, LRU_ACTIVE_ANON, MAX_NR_ZONES) +
308 			    lruvec_lru_size(lruvec, LRU_INACTIVE_ANON, MAX_NR_ZONES);
309 		zram_compress_size = memcg_data_size(memcg, CACHE_SIZE);
310 		eswap_compress_size = memcg_data_size(memcg, SWAP_SIZE);
311 		anon_size *= PAGE_SIZE / SZ_1K;
312 		zram_compress_size /= SZ_1K;
313 		eswap_compress_size /= SZ_1K;
314 
315 		if (!strlen(memcg->name))
316 			continue;
317 
318 		seq_printf(m, "%s %lu %lu %lu\n", memcg->name, anon_size,
319 			   zram_compress_size, eswap_compress_size);
320 	}
321 
322 	return 0;
323 }
324 
memcg_ub_ufs2zram_ratio_write(struct cgroup_subsys_state * css,struct cftype * cft,u64 val)325 static int memcg_ub_ufs2zram_ratio_write(struct cgroup_subsys_state *css,
326 					 struct cftype *cft, u64 val)
327 {
328 	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
329 	const unsigned int ratio = 100;
330 
331 	if (val > ratio)
332 		return -EINVAL;
333 
334 	atomic64_set(&memcg->memcg_reclaimed.ub_ufs2zram_ratio, val);
335 
336 	return 0;
337 }
338 
memcg_ub_ufs2zram_ratio_read(struct cgroup_subsys_state * css,struct cftype * cft)339 static u64 memcg_ub_ufs2zram_ratio_read(struct cgroup_subsys_state *css, struct cftype *cft)
340 {
341 	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
342 
343 	return atomic64_read(&memcg->memcg_reclaimed.ub_ufs2zram_ratio);
344 }
345 
memcg_force_swapin_write(struct cgroup_subsys_state * css,struct cftype * cft,u64 val)346 static int memcg_force_swapin_write(struct cgroup_subsys_state *css, struct cftype *cft, u64 val)
347 {
348 	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
349 	u64 size;
350 	const unsigned int ratio = 100;
351 
352 	size = memcg_data_size(memcg, SWAP_SIZE);
353 	size = div_u64(atomic64_read(&memcg->memcg_reclaimed.ub_ufs2zram_ratio) * size, ratio);
354 
355 	swapin_memcg(memcg, size);
356 
357 	return 0;
358 }
359 
360 #ifdef CONFIG_MEM_PURGEABLE
purgeable_memcg_node(pg_data_t * pgdata,struct scan_control * sc,struct mem_cgroup * memcg)361 static unsigned long purgeable_memcg_node(pg_data_t *pgdata,
362 	struct scan_control *sc, struct mem_cgroup *memcg)
363 {
364 	unsigned long nr = 0;
365 	struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdata);
366 	if (!lruvec)
367 		return 0;
368 
369 	shrink_list(LRU_ACTIVE_PURGEABLE, -1, lruvec, sc);
370 	nr += shrink_list(LRU_INACTIVE_PURGEABLE, -1, lruvec, sc);
371 
372 	pr_info("reclaim %lu purgeable pages \n", nr);
373 	return nr;
374 }
375 
memcg_force_shrink_purgeable_bysize(struct cgroup_subsys_state * css,struct cftype * cft,u64 reclaim_size)376 static int memcg_force_shrink_purgeable_bysize(struct cgroup_subsys_state *css,
377 	struct cftype *cft, u64 reclaim_size)
378 {
379 	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
380 	if (!memcg)
381 		return 0;
382 
383 	if (reclaim_size == 0) {
384 		pr_err("reclaim_size is zero, skip shrink\n");
385 		return 0;
386 	}
387 
388 	struct scan_control sc = {
389 		.gfp_mask = GFP_KERNEL,
390 		.order = 0,
391 		.priority = DEF_PRIORITY,
392 		.may_deactivate = DEACTIVATE_ANON,
393 		.may_writepage = 1,
394 		.may_unmap = 1,
395 		.may_swap = 1,
396 		.reclaim_idx = MAX_NR_ZONES -1,
397 	};
398 	int nid = 0;
399 	sc.nr_to_reclaim = div_u64(reclaim_size, PAGE_SIZE);
400 
401 	for_each_node_state(nid, N_MEMORY)
402 		purgeable_memcg_node(NODE_DATA(nid), &sc, memcg);
403 	return 0;
404 }
405 #endif
406 
407 static struct cftype memcg_policy_files[] = {
408 	{
409 		.name = "name",
410 		.write = memcg_name_write,
411 		.seq_show = memcg_name_show,
412 	},
413 	{
414 		.name = "ub_ufs2zram_ratio",
415 		.write_u64 = memcg_ub_ufs2zram_ratio_write,
416 		.read_u64 = memcg_ub_ufs2zram_ratio_read,
417 	},
418 	{
419 		.name = "total_info_per_app",
420 		.seq_show = memcg_total_info_per_app_show,
421 	},
422 	{
423 		.name = "app_score",
424 		.write_u64 = mem_cgroup_app_score_write,
425 		.read_u64 = mem_cgroup_app_score_read,
426 	},
427 	{
428 		.name = "force_shrink_anon",
429 		.write = memcg_force_shrink_anon
430 	},
431 	{
432 		.name = "force_swapin",
433 		.write_u64 = memcg_force_swapin_write,
434 	},
435 #ifdef CONFIG_MEM_PURGEABLE
436 	{
437 		.name = "force_shrink_purgeable_bysize",
438 		.write_u64 = memcg_force_shrink_purgeable_bysize,
439 	},
440 #endif
441 	{ },	/* terminate */
442 };
443 
memcg_policy_init(void)444 static int __init memcg_policy_init(void)
445 {
446 	if (!mem_cgroup_disabled())
447 		WARN_ON(cgroup_add_legacy_cftypes(&memory_cgrp_subsys,
448 						memcg_policy_files));
449 
450 	return 0;
451 }
452 subsys_initcall(memcg_policy_init);
453 #else
get_next_memcg(struct mem_cgroup * prev)454 struct mem_cgroup *get_next_memcg(struct mem_cgroup *prev)
455 {
456 	return NULL;
457 }
458 
get_next_memcg_break(struct mem_cgroup * memcg)459 void get_next_memcg_break(struct mem_cgroup *memcg)
460 {
461 }
462 
463 
get_prev_memcg(struct mem_cgroup * next)464 struct mem_cgroup *get_prev_memcg(struct mem_cgroup *next)
465 {
466 	return NULL;
467 }
468 
get_prev_memcg_break(struct mem_cgroup * memcg)469 void get_prev_memcg_break(struct mem_cgroup *memcg)
470 {
471 }
472 
mem_cgroup_app_score_read(struct cgroup_subsys_state * css,struct cftype * cft)473 static u64 mem_cgroup_app_score_read(struct cgroup_subsys_state *css,
474 				struct cftype *cft)
475 {
476 	return 0;
477 }
478 
mem_cgroup_app_score_write(struct cgroup_subsys_state * css,struct cftype * cft,u64 val)479 static int mem_cgroup_app_score_write(struct cgroup_subsys_state *css,
480 				struct cftype *cft, u64 val)
481 {
482 	return 0;
483 }
484 
memcg_app_score_update(struct mem_cgroup * target)485 void memcg_app_score_update(struct mem_cgroup *target)
486 {
487 }
488 #endif
489