1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * mm/memcg_control.c
4 *
5 * Copyright (c) 2020-2022 Huawei Technologies Co., Ltd.
6 */
7 #include <linux/memcontrol.h>
8 #include <linux/types.h>
9 #include <linux/cgroup-defs.h>
10 #include <linux/cgroup.h>
11 #include <linux/zswapd.h>
12 #include "internal.h"
13
14 #include "zswapd_internal.h"
15
16 #ifdef CONFIG_HYPERHOLD_MEMCG
17
18 struct list_head score_head;
19 bool score_head_inited;
20 DEFINE_RWLOCK(score_list_lock);
21 DEFINE_MUTEX(reclaim_para_lock);
22
23 /**
24 * get_next_memcg - iterate over memory cgroup score_list
25 * @prev: previously returned memcg, NULL on first invocation
26 *
27 * Returns references to the next memg on score_list of @prev,
28 * or %NULL after a full round-trip.
29 *
30 * Caller must pass the return value in @prev on subsequent
31 * invocations for reference counting, or use get_next_memcg_break()
32 * to cancel a walk before the round-trip is complete.
33 */
get_next_memcg(struct mem_cgroup * prev)34 struct mem_cgroup *get_next_memcg(struct mem_cgroup *prev)
35 {
36 struct mem_cgroup *memcg = NULL;
37 struct list_head *pos = NULL;
38 unsigned long flags;
39
40 if (unlikely(!score_head_inited))
41 return NULL;
42
43 read_lock_irqsave(&score_list_lock, flags);
44
45 if (unlikely(!prev))
46 pos = &score_head;
47 else
48 pos = &(prev->score_node);
49
50 if (list_empty(pos)) /* deleted node */
51 goto unlock;
52
53 if (pos->next == &score_head)
54 goto unlock;
55
56 memcg = list_entry(pos->next,
57 struct mem_cgroup, score_node);
58
59 if (!css_tryget(&memcg->css))
60 memcg = NULL;
61
62 unlock:
63 read_unlock_irqrestore(&score_list_lock, flags);
64
65 if (prev)
66 css_put(&prev->css);
67
68 return memcg;
69 }
70
get_next_memcg_break(struct mem_cgroup * memcg)71 void get_next_memcg_break(struct mem_cgroup *memcg)
72 {
73 if (memcg)
74 css_put(&memcg->css);
75 }
76
get_prev_memcg(struct mem_cgroup * next)77 struct mem_cgroup *get_prev_memcg(struct mem_cgroup *next)
78 {
79 struct mem_cgroup *memcg = NULL;
80 struct list_head *pos = NULL;
81 unsigned long flags;
82
83 if (unlikely(!score_head_inited))
84 return NULL;
85
86 read_lock_irqsave(&score_list_lock, flags);
87
88 if (unlikely(!next))
89 pos = &score_head;
90 else
91 pos = &next->score_node;
92
93 if (list_empty(pos)) /* deleted node */
94 goto unlock;
95
96 if (pos->prev == &score_head)
97 goto unlock;
98
99 memcg = list_entry(pos->prev,
100 struct mem_cgroup, score_node);
101
102 if (unlikely(!memcg))
103 goto unlock;
104
105 if (!css_tryget(&memcg->css))
106 memcg = NULL;
107
108 unlock:
109 read_unlock_irqrestore(&score_list_lock, flags);
110
111 if (next)
112 css_put(&next->css);
113 return memcg;
114 }
115
get_prev_memcg_break(struct mem_cgroup * memcg)116 void get_prev_memcg_break(struct mem_cgroup *memcg)
117 {
118 if (memcg)
119 css_put(&memcg->css);
120 }
121
memcg_app_score_update(struct mem_cgroup * target)122 void memcg_app_score_update(struct mem_cgroup *target)
123 {
124 struct list_head *pos = NULL;
125 struct list_head *tmp;
126 unsigned long flags;
127
128 write_lock_irqsave(&score_list_lock, flags);
129 list_for_each_prev_safe(pos, tmp, &score_head) {
130 struct mem_cgroup *memcg = list_entry(pos,
131 struct mem_cgroup, score_node);
132 if (atomic64_read(&memcg->memcg_reclaimed.app_score) <
133 atomic64_read(&target->memcg_reclaimed.app_score))
134 break;
135 }
136 list_move_tail(&target->score_node, pos);
137 write_unlock_irqrestore(&score_list_lock, flags);
138 }
139
mem_cgroup_app_score_read(struct cgroup_subsys_state * css,struct cftype * cft)140 static u64 mem_cgroup_app_score_read(struct cgroup_subsys_state *css,
141 struct cftype *cft)
142 {
143 struct mem_cgroup *memcg = mem_cgroup_from_css(css);
144
145 return atomic64_read(&memcg->memcg_reclaimed.app_score);
146 }
147
mem_cgroup_app_score_write(struct cgroup_subsys_state * css,struct cftype * cft,u64 val)148 static int mem_cgroup_app_score_write(struct cgroup_subsys_state *css,
149 struct cftype *cft, u64 val)
150 {
151 struct mem_cgroup *memcg = mem_cgroup_from_css(css);
152
153 if (val > MAX_APP_SCORE)
154 return -EINVAL;
155
156 if (atomic64_read(&memcg->memcg_reclaimed.app_score) != val) {
157 atomic64_set(&memcg->memcg_reclaimed.app_score, val);
158 memcg_app_score_update(memcg);
159 }
160
161 return 0;
162 }
163
move_pages_to_page_list(struct lruvec * lruvec,enum lru_list lru,struct list_head * page_list)164 static unsigned long move_pages_to_page_list(struct lruvec *lruvec, enum lru_list lru,
165 struct list_head *page_list)
166 {
167 struct list_head *src = &lruvec->lists[lru];
168 unsigned long nr_isolated = 0;
169 struct page *page;
170
171 while (!list_empty(src)) {
172 page = lru_to_page(src);
173
174 if (PageUnevictable(page))
175 continue;
176
177 if (likely(get_page_unless_zero(page))) {
178 if (isolate_lru_page(page)) {
179 put_page(page);
180 continue;
181 }
182 put_page(page);
183
184 } else {
185 continue;
186 }
187
188
189 if (PageUnevictable(page)) {
190 putback_lru_page(page);
191 continue;
192 }
193
194 if (PageAnon(page) && !PageSwapBacked(page)) {
195 putback_lru_page(page);
196 continue;
197 }
198
199 list_add(&page->lru, page_list);
200 nr_isolated++;
201 }
202
203 return nr_isolated;
204 }
205
206
reclaim_all_anon_memcg(struct pglist_data * pgdat,struct mem_cgroup * memcg)207 unsigned long reclaim_all_anon_memcg(struct pglist_data *pgdat, struct mem_cgroup *memcg)
208 {
209 struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
210 unsigned long nr_reclaimed;
211 LIST_HEAD(page_list);
212 struct page *page;
213 struct reclaim_stat stat = {};
214 struct scan_control sc = {
215 .gfp_mask = GFP_KERNEL,
216 .may_writepage = 1,
217 .may_unmap = 1,
218 .may_swap = 1,
219 };
220
221 count_vm_event(FREEZE_RECLAIME_COUNT);
222 move_pages_to_page_list(lruvec, LRU_INACTIVE_ANON, &page_list);
223
224 nr_reclaimed = shrink_page_list(&page_list, pgdat, &sc, &stat, true);
225 count_vm_event(FREEZE_RECLAIMED);
226
227 while (!list_empty(&page_list)) {
228 page = lru_to_page(&page_list);
229 list_del(&page->lru);
230 putback_lru_page(page);
231 }
232
233 return nr_reclaimed;
234 }
235
memcg_force_shrink_anon(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)236 static ssize_t memcg_force_shrink_anon(struct kernfs_open_file *of,
237 char *buf, size_t nbytes,
238 loff_t off)
239 {
240 struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
241 struct pglist_data *pgdat;
242 int nid;
243
244 for_each_online_node(nid) {
245 pgdat = NODE_DATA(nid);
246 reclaim_all_anon_memcg(pgdat, memcg);
247 }
248
249 return nbytes;
250 }
251
memcg_name_show(struct seq_file * m,void * v)252 static int memcg_name_show(struct seq_file *m, void *v)
253 {
254 struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
255
256 seq_printf(m, "%s\n", memcg->name);
257 return 0;
258 }
259
memcg_name_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)260 static ssize_t memcg_name_write(struct kernfs_open_file *of, char *buf,
261 size_t nbytes, loff_t off)
262 {
263 struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
264
265 buf = strstrip(buf);
266 if (nbytes >= MEM_CGROUP_NAME_MAX_LEN)
267 return -EINVAL;
268
269 mutex_lock(&reclaim_para_lock);
270 if (memcg)
271 strcpy(memcg->name, buf);
272 mutex_unlock(&reclaim_para_lock);
273
274 return nbytes;
275 }
276
memcg_total_info_per_app_show(struct seq_file * m,void * v)277 static int memcg_total_info_per_app_show(struct seq_file *m, void *v)
278 {
279 struct mem_cgroup *memcg = NULL;
280 struct mem_cgroup_per_node *mz = NULL;
281 struct lruvec *lruvec = NULL;
282 unsigned long anon_size;
283 unsigned long zram_compress_size;
284 unsigned long eswap_compress_size;
285
286
287 while ((memcg = get_next_memcg(memcg))) {
288 mz = mem_cgroup_nodeinfo(memcg, 0);
289 if (!mz) {
290 get_next_memcg_break(memcg);
291 return 0;
292 }
293
294 lruvec = &mz->lruvec;
295 if (!lruvec) {
296 get_next_memcg_break(memcg);
297 return 0;
298 }
299
300 anon_size = lruvec_lru_size(lruvec, LRU_ACTIVE_ANON, MAX_NR_ZONES) +
301 lruvec_lru_size(lruvec, LRU_INACTIVE_ANON, MAX_NR_ZONES);
302 zram_compress_size = memcg_data_size(memcg, CACHE_SIZE);
303 eswap_compress_size = memcg_data_size(memcg, SWAP_SIZE);
304 anon_size *= PAGE_SIZE / SZ_1K;
305 zram_compress_size /= SZ_1K;
306 eswap_compress_size /= SZ_1K;
307
308 if (!strlen(memcg->name))
309 continue;
310
311 seq_printf(m, "%s %lu %lu %lu\n", memcg->name, anon_size,
312 zram_compress_size, eswap_compress_size);
313 }
314
315 return 0;
316 }
317
memcg_ub_ufs2zram_ratio_write(struct cgroup_subsys_state * css,struct cftype * cft,u64 val)318 static int memcg_ub_ufs2zram_ratio_write(struct cgroup_subsys_state *css,
319 struct cftype *cft, u64 val)
320 {
321 struct mem_cgroup *memcg = mem_cgroup_from_css(css);
322 const unsigned int ratio = 100;
323
324 if (val > ratio)
325 return -EINVAL;
326
327 atomic64_set(&memcg->memcg_reclaimed.ub_ufs2zram_ratio, val);
328
329 return 0;
330 }
331
memcg_ub_ufs2zram_ratio_read(struct cgroup_subsys_state * css,struct cftype * cft)332 static u64 memcg_ub_ufs2zram_ratio_read(struct cgroup_subsys_state *css, struct cftype *cft)
333 {
334 struct mem_cgroup *memcg = mem_cgroup_from_css(css);
335
336 return atomic64_read(&memcg->memcg_reclaimed.ub_ufs2zram_ratio);
337 }
338
memcg_force_swapin_write(struct cgroup_subsys_state * css,struct cftype * cft,u64 val)339 static int memcg_force_swapin_write(struct cgroup_subsys_state *css, struct cftype *cft, u64 val)
340 {
341 struct mem_cgroup *memcg = mem_cgroup_from_css(css);
342 u64 size;
343 const unsigned int ratio = 100;
344
345 size = memcg_data_size(memcg, SWAP_SIZE);
346 size = div_u64(atomic64_read(&memcg->memcg_reclaimed.ub_ufs2zram_ratio) * size, ratio);
347
348 swapin_memcg(memcg, size);
349
350 return 0;
351 }
352
353 static struct cftype memcg_policy_files[] = {
354 {
355 .name = "name",
356 .write = memcg_name_write,
357 .seq_show = memcg_name_show,
358 },
359 {
360 .name = "ub_ufs2zram_ratio",
361 .write_u64 = memcg_ub_ufs2zram_ratio_write,
362 .read_u64 = memcg_ub_ufs2zram_ratio_read,
363 },
364 {
365 .name = "total_info_per_app",
366 .seq_show = memcg_total_info_per_app_show,
367 },
368 {
369 .name = "app_score",
370 .write_u64 = mem_cgroup_app_score_write,
371 .read_u64 = mem_cgroup_app_score_read,
372 },
373 {
374 .name = "force_shrink_anon",
375 .write = memcg_force_shrink_anon
376 },
377 {
378 .name = "force_swapin",
379 .write_u64 = memcg_force_swapin_write,
380 },
381 { }, /* terminate */
382 };
383
memcg_policy_init(void)384 static int __init memcg_policy_init(void)
385 {
386 if (!mem_cgroup_disabled())
387 WARN_ON(cgroup_add_legacy_cftypes(&memory_cgrp_subsys,
388 memcg_policy_files));
389
390 return 0;
391 }
392 subsys_initcall(memcg_policy_init);
393 #else
get_next_memcg(struct mem_cgroup * prev)394 struct mem_cgroup *get_next_memcg(struct mem_cgroup *prev)
395 {
396 return NULL;
397 }
398
get_next_memcg_break(struct mem_cgroup * memcg)399 void get_next_memcg_break(struct mem_cgroup *memcg)
400 {
401 }
402
403
get_prev_memcg(struct mem_cgroup * next)404 struct mem_cgroup *get_prev_memcg(struct mem_cgroup *next)
405 {
406 return NULL;
407 }
408
get_prev_memcg_break(struct mem_cgroup * memcg)409 void get_prev_memcg_break(struct mem_cgroup *memcg)
410 {
411 }
412
mem_cgroup_app_score_read(struct cgroup_subsys_state * css,struct cftype * cft)413 static u64 mem_cgroup_app_score_read(struct cgroup_subsys_state *css,
414 struct cftype *cft)
415 {
416 return 0;
417 }
418
mem_cgroup_app_score_write(struct cgroup_subsys_state * css,struct cftype * cft,u64 val)419 static int mem_cgroup_app_score_write(struct cgroup_subsys_state *css,
420 struct cftype *cft, u64 val)
421 {
422 return 0;
423 }
424
memcg_app_score_update(struct mem_cgroup * target)425 void memcg_app_score_update(struct mem_cgroup *target)
426 {
427 }
428 #endif
429