1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (c) 2017 Cyril Hrubis <chrubis@suse.cz>
4 */
5
6 /*
7 * Check that memory marked with MADV_FREE is freed on memory pressure.
8 *
9 * o Fork a child and move it into a memory cgroup
10 *
11 * o Allocate pages and fill them with a pattern
12 *
13 * o Madvise pages with MADV_FREE
14 *
15 * o Check that madvised pages were not freed immediately
16 *
17 * o Write to some of the madvised pages again, these must not be freed
18 *
19 * o Set memory limits
20 * - limit_in_bytes = 8MB
21 * - memsw.limit_in_bytes = 16MB
22 *
23 * The reason for doubling the limit_in_bytes is to have safe margin
24 * for forking the memory hungy child etc. And the reason to setting
25 * memsw.limit_in_bytes to twice of that is to give the system chance
26 * to try to free some memory before cgroup OOM kicks in and kills
27 * the memory hungry child.
28 *
29 * o Run a memory hungry child that allocates memory in loop until it's
30 * killed by cgroup OOM
31 *
32 * o Once the child is killed the MADV_FREE pages that were not written to
33 * should be freed, the test passes if there is at least one
34 */
35
36 #include <stdlib.h>
37 #include <sys/wait.h>
38 #include <fcntl.h>
39 #include <unistd.h>
40 #include <signal.h>
41 #include <errno.h>
42 #include <stdio.h>
43 #include <ctype.h>
44
45 #include "tst_test.h"
46 #include "lapi/mmap.h"
47
48 #define MEMCG_PATH "/sys/fs/cgroup/memory/"
49
50 static char cgroup_path[PATH_MAX];
51 static char tasks_path[PATH_MAX];
52 static char limit_in_bytes_path[PATH_MAX];
53 static char memsw_limit_in_bytes_path[PATH_MAX];
54
55 static size_t page_size;
56 static int sleep_between_faults;
57
58 static int swap_accounting_enabled;
59
60 #define PAGES 128
61 #define TOUCHED_PAGE1 0
62 #define TOUCHED_PAGE2 10
63
memory_pressure_child(void)64 static void memory_pressure_child(void)
65 {
66 size_t i, page_size = getpagesize();
67 char *ptr;
68
69 for (;;) {
70 ptr = mmap(NULL, 500 * page_size, PROT_READ | PROT_WRITE,
71 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
72
73 for (i = 0; i < 500; i++) {
74 ptr[i * page_size] = i % 100;
75 usleep(sleep_between_faults);
76 }
77
78 /* If swap accounting is disabled exit after process swapped out 100MB */
79 if (!swap_accounting_enabled) {
80 int swapped;
81
82 SAFE_FILE_LINES_SCANF("/proc/self/status", "VmSwap: %d", &swapped);
83
84 if (swapped > 100 * 1024)
85 exit(0);
86 }
87
88 }
89
90 abort();
91 }
92
setup_cgroup_paths(int pid)93 static void setup_cgroup_paths(int pid)
94 {
95 snprintf(cgroup_path, sizeof(cgroup_path),
96 MEMCG_PATH "ltp_madvise09_%i/", pid);
97 snprintf(tasks_path, sizeof(tasks_path), "%s/tasks", cgroup_path);
98 snprintf(limit_in_bytes_path, sizeof(limit_in_bytes_path),
99 "%s/memory.limit_in_bytes", cgroup_path);
100 snprintf(memsw_limit_in_bytes_path, sizeof(memsw_limit_in_bytes_path),
101 "%s/memory.memsw.limit_in_bytes", cgroup_path);
102 }
103
count_freed(char * ptr)104 static int count_freed(char *ptr)
105 {
106 int i, ret = 0;
107
108 for (i = 0; i < PAGES; i++) {
109 if (!ptr[i * page_size])
110 ret++;
111 }
112
113 return ret;
114 }
115
check_page_baaa(char * ptr)116 static int check_page_baaa(char *ptr)
117 {
118 unsigned int i;
119
120 if (ptr[0] != 'b') {
121 tst_res(TINFO, "%p unexpected %c (%i) at 0 expected 'b'",
122 ptr, isprint(ptr[0]) ? ptr[0] : ' ', ptr[0]);
123 return 1;
124 }
125
126 for (i = 1; i < page_size; i++) {
127 if (ptr[i] != 'a') {
128 tst_res(TINFO,
129 "%p unexpected %c (%i) at %i expected 'a'",
130 ptr, isprint(ptr[i]) ? ptr[i] : ' ',
131 ptr[i], i);
132 return 1;
133 }
134 }
135
136 return 0;
137 }
138
check_page(char * ptr,char val)139 static int check_page(char *ptr, char val)
140 {
141 unsigned int i;
142
143 for (i = 0; i < page_size; i++) {
144 if (ptr[i] != val) {
145 tst_res(TINFO,
146 "%p unexpected %c (%i) at %i expected %c (%i)",
147 ptr, isprint(ptr[i]) ? ptr[i] : ' ', ptr[i], i,
148 isprint(val) ? val : ' ', val);
149 return 1;
150 }
151 }
152
153 return 0;
154 }
155
child(void)156 static void child(void)
157 {
158 size_t i;
159 char *ptr;
160 unsigned int usage, old_limit, old_memsw_limit;
161 int status, pid, retries = 0;
162
163 SAFE_MKDIR(cgroup_path, 0777);
164 SAFE_FILE_PRINTF(tasks_path, "%i", getpid());
165
166 ptr = SAFE_MMAP(NULL, PAGES * page_size, PROT_READ | PROT_WRITE,
167 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
168
169 for (i = 0; i < PAGES * page_size; i++)
170 ptr[i] = 'a';
171
172 if (madvise(ptr, PAGES * page_size, MADV_FREE)) {
173 if (errno == EINVAL)
174 tst_brk(TCONF | TERRNO, "MADV_FREE is not supported");
175
176 tst_brk(TBROK | TERRNO, "MADV_FREE failed");
177 }
178
179 if (ptr[page_size] != 'a')
180 tst_res(TFAIL, "MADV_FREE pages were freed immediately");
181 else
182 tst_res(TPASS, "MADV_FREE pages were not freed immediately");
183
184 ptr[TOUCHED_PAGE1 * page_size] = 'b';
185 ptr[TOUCHED_PAGE2 * page_size] = 'b';
186
187 usage = 8 * 1024 * 1024;
188 tst_res(TINFO, "Setting memory limits to %u %u", usage, 2 * usage);
189
190 SAFE_FILE_SCANF(limit_in_bytes_path, "%u", &old_limit);
191
192 if (swap_accounting_enabled)
193 SAFE_FILE_SCANF(memsw_limit_in_bytes_path, "%u", &old_memsw_limit);
194
195 SAFE_FILE_PRINTF(limit_in_bytes_path, "%u", usage);
196
197 if (swap_accounting_enabled)
198 SAFE_FILE_PRINTF(memsw_limit_in_bytes_path, "%u", 2 * usage);
199
200 do {
201 sleep_between_faults++;
202
203 pid = SAFE_FORK();
204 if (!pid)
205 memory_pressure_child();
206
207 tst_res(TINFO, "Memory hungry child %i started, try %i", pid, retries);
208
209 SAFE_WAIT(&status);
210 } while (retries++ < 10 && count_freed(ptr) == 0);
211
212 char map[PAGES+1];
213 unsigned int freed = 0;
214 unsigned int corrupted = 0;
215
216 for (i = 0; i < PAGES; i++) {
217 char exp_val;
218
219 if (ptr[i * page_size]) {
220 exp_val = 'a';
221 map[i] = 'p';
222 } else {
223 exp_val = 0;
224 map[i] = '_';
225 freed++;
226 }
227
228 if (i != TOUCHED_PAGE1 && i != TOUCHED_PAGE2) {
229 if (check_page(ptr + i * page_size, exp_val)) {
230 map[i] = '?';
231 corrupted++;
232 }
233 } else {
234 if (check_page_baaa(ptr + i * page_size)) {
235 map[i] = '?';
236 corrupted++;
237 }
238 }
239 }
240 map[PAGES] = '\0';
241
242 tst_res(TINFO, "Memory map: %s", map);
243
244 if (freed)
245 tst_res(TPASS, "Pages MADV_FREE were freed on low memory");
246 else
247 tst_res(TFAIL, "No MADV_FREE page was freed on low memory");
248
249 if (corrupted)
250 tst_res(TFAIL, "Found corrupted page");
251 else
252 tst_res(TPASS, "All pages have expected content");
253
254 if (swap_accounting_enabled)
255 SAFE_FILE_PRINTF(memsw_limit_in_bytes_path, "%u", old_memsw_limit);
256
257 SAFE_FILE_PRINTF(limit_in_bytes_path, "%u", old_limit);
258
259 SAFE_MUNMAP(ptr, PAGES);
260
261 exit(0);
262 }
263
cleanup(void)264 static void cleanup(void)
265 {
266 if (cgroup_path[0] && !access(cgroup_path, F_OK))
267 rmdir(cgroup_path);
268 }
269
run(void)270 static void run(void)
271 {
272 pid_t pid;
273 int status;
274
275 retry:
276 pid = SAFE_FORK();
277
278 if (!pid) {
279 setup_cgroup_paths(getpid());
280 child();
281 }
282
283 setup_cgroup_paths(pid);
284 SAFE_WAIT(&status);
285 cleanup();
286
287 /*
288 * Rarely cgroup OOM kills both children not only the one that allocates
289 * memory in loop, hence we retry here if that happens.
290 */
291 if (WIFSIGNALED(status)) {
292 tst_res(TINFO, "Both children killed, retrying...");
293 goto retry;
294 }
295
296 if (WIFEXITED(status) && WEXITSTATUS(status) == TCONF)
297 tst_brk(TCONF, "MADV_FREE is not supported");
298
299 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
300 tst_brk(TBROK, "Child %s", tst_strstatus(status));
301 }
302
setup(void)303 static void setup(void)
304 {
305 long int swap_total;
306
307 if (access(MEMCG_PATH, F_OK)) {
308 tst_brk(TCONF, "'" MEMCG_PATH
309 "' not present, CONFIG_MEMCG missing?");
310 }
311
312 if (!access(MEMCG_PATH "memory.memsw.limit_in_bytes", F_OK))
313 swap_accounting_enabled = 1;
314 else
315 tst_res(TINFO, "Swap accounting is disabled");
316
317 SAFE_FILE_LINES_SCANF("/proc/meminfo", "SwapTotal: %ld", &swap_total);
318 if (swap_total <= 0)
319 tst_brk(TCONF, "MADV_FREE does not work without swap");
320
321 page_size = getpagesize();
322 }
323
324 static struct tst_test test = {
325 .setup = setup,
326 .cleanup = cleanup,
327 .test_all = run,
328 .needs_root = 1,
329 .forks_child = 1,
330 };
331