1 /*
2 * Copyright (C) 2012-2017 Red Hat, Inc.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
12 * the GNU General Public License for more details.
13 *
14 * Description:
15 *
16 * The program is designed to test max_map_count tunable file
17 *
18 * The kernel Documentation say that:
19 * /proc/sys/vm/max_map_count contains the maximum number of memory map
20 * areas a process may have. Memory map areas are used as a side-effect
21 * of calling malloc, directly by mmap and mprotect, and also when
22 * loading shared libraries.
23 *
24 * Each process has his own maps file: /proc/[pid]/maps, and each line
25 * indicates a map entry, so it can caculate the amount of maps by reading
26 * the file lines' number to check the tunable performance.
27 *
28 * The program tries to invoke mmap() endlessly until it triggers MAP_FAILED,
29 * then reads the process's maps file /proc/[pid]/maps, save the line number to
30 * map_count variable, and compare it with /proc/sys/vm/max_map_count,
31 * map_count should be greater than max_map_count by 1;
32 *
33 * Note: On some architectures there is a special vma VSYSCALL, which
34 * is allocated without incrementing mm->map_count variable. On these
35 * architectures each /proc/<pid>/maps has at the end:
36 * ...
37 * ...
38 * ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]
39 *
40 * so we ignore this line during /proc/[pid]/maps reading.
41 */
42
43 #define _GNU_SOURCE
44 #include <sys/wait.h>
45 #include <errno.h>
46 #include <fcntl.h>
47 #include <stdbool.h>
48 #include <stdio.h>
49 #include <stdlib.h>
50 #include <sys/utsname.h>
51 #include "mem.h"
52
53 #define MAP_COUNT_DEFAULT 1024
54 #define MAX_MAP_COUNT 65536L
55
56 static long old_max_map_count = -1;
57 static long old_overcommit = -1;
58 static struct utsname un;
59
setup(void)60 static void setup(void)
61 {
62 if (access(PATH_SYSVM "max_map_count", F_OK) != 0)
63 tst_brk(TBROK | TERRNO,
64 "Can't support to test max_map_count");
65
66 old_max_map_count = get_sys_tune("max_map_count");
67 old_overcommit = get_sys_tune("overcommit_memory");
68 set_sys_tune("overcommit_memory", 2, 1);
69
70 if (uname(&un) != 0)
71 tst_brk(TBROK | TERRNO, "uname error");
72 }
73
cleanup(void)74 static void cleanup(void)
75 {
76 if (old_overcommit != -1)
77 set_sys_tune("overcommit_memory", old_overcommit, 0);
78 if (old_max_map_count != -1)
79 set_sys_tune("max_map_count", old_max_map_count, 0);
80 }
81
82 /* This is a filter to exclude map entries which aren't accounted
83 * for in the vm_area_struct's map_count.
84 */
filter_map(const char * line)85 static bool filter_map(const char *line)
86 {
87 char buf[BUFSIZ];
88 int ret;
89
90 ret = sscanf(line, "%*p-%*p %*4s %*p %*2d:%*2d %*d %s", buf);
91 if (ret != 1)
92 return false;
93
94 #if defined(__x86_64__) || defined(__x86__)
95 /* On x86, there's an old compat vsyscall page */
96 if (!strcmp(buf, "[vsyscall]"))
97 return true;
98 #elif defined(__ia64__)
99 /* On ia64, the vdso is not a proper mapping */
100 if (!strcmp(buf, "[vdso]"))
101 return true;
102 #elif defined(__arm__)
103 /* Skip it when run it in aarch64 */
104 if ((!strcmp(un.machine, "aarch64"))
105 || (!strcmp(un.machine, "aarch64_be")))
106 return false;
107
108 /* Older arm kernels didn't label their vdso maps */
109 if (!strncmp(line, "ffff0000-ffff1000", 17))
110 return true;
111 #endif
112
113 return false;
114 }
115
count_maps(pid_t pid)116 static long count_maps(pid_t pid)
117 {
118 FILE *fp;
119 size_t len;
120 char *line = NULL;
121 char buf[BUFSIZ];
122 long map_count = 0;
123
124 snprintf(buf, BUFSIZ, "/proc/%d/maps", pid);
125 fp = fopen(buf, "r");
126 if (fp == NULL)
127 tst_brk(TBROK | TERRNO, "fopen %s", buf);
128 while (getline(&line, &len, fp) != -1) {
129 /* exclude vdso and vsyscall */
130 if (filter_map(line))
131 continue;
132 map_count++;
133 }
134 fclose(fp);
135
136 return map_count;
137 }
138
max_map_count_test(void)139 static void max_map_count_test(void)
140 {
141 int status;
142 pid_t pid;
143 long max_maps;
144 long map_count;
145 long max_iters;
146 long memfree;
147
148 /*
149 * XXX Due to a possible kernel bug, oom-killer can be easily
150 * triggered when doing small piece mmaps in huge amount even if
151 * enough free memory available. Also it has been observed that
152 * oom-killer often kill wrong victims in this situation, we
153 * decided to do following steps to make sure no oom happen:
154 * 1) use a safe maximum max_map_count value as upper-bound,
155 * we set it 65536 in this case, i.e., we don't test too big
156 * value;
157 * 2) make sure total mapping isn't larger tha
158 * CommitLimit - Committed_AS
159 * and set overcommit_memory to 2, this could help mapping
160 * returns ENOMEM instead of triggering oom-killer when
161 * memory is tight. (When there are enough free memory,
162 * step 1) will be used first.
163 * Hope OOM-killer can be more stable oneday.
164 */
165 memfree = SAFE_READ_MEMINFO("CommitLimit:") - SAFE_READ_MEMINFO("Committed_AS:");
166 /* 64 used as a bias to make sure no overflow happen */
167 max_iters = memfree / sysconf(_SC_PAGESIZE) * 1024 - 64;
168 if (max_iters > MAX_MAP_COUNT)
169 max_iters = MAX_MAP_COUNT;
170
171 max_maps = MAP_COUNT_DEFAULT;
172 while (max_maps <= max_iters) {
173 set_sys_tune("max_map_count", max_maps, 1);
174
175 switch (pid = SAFE_FORK()) {
176 case 0:
177 while (mmap(NULL, 1, PROT_READ,
178 MAP_SHARED | MAP_ANONYMOUS, -1, 0)
179 != MAP_FAILED) ;
180 if (raise(SIGSTOP) != 0)
181 tst_brk(TBROK | TERRNO, "raise");
182 exit(0);
183 default:
184 break;
185 }
186 /* wait child done mmap and stop */
187 SAFE_WAITPID(pid, &status, WUNTRACED);
188 if (!WIFSTOPPED(status))
189 tst_brk(TBROK, "child did not stopped");
190
191 map_count = count_maps(pid);
192 /* Note max_maps will be exceeded by one for
193 * the sysctl setting of max_map_count. This
194 * is the mm failure point at the time of
195 * writing this COMMENT!
196 */
197 if (map_count == (max_maps + 1))
198 tst_res(TPASS, "%ld map entries in total "
199 "as expected.", max_maps);
200 else
201 tst_res(TFAIL, "%ld map entries in total, but "
202 "expected %ld entries", map_count, max_maps);
203
204 /* make child continue to exit */
205 SAFE_KILL(pid, SIGCONT);
206 SAFE_WAITPID(pid, &status, 0);
207
208 max_maps = max_maps << 1;
209 }
210 }
211
212 static struct tst_test test = {
213 .needs_root = 1,
214 .forks_child = 1,
215 .setup = setup,
216 .cleanup = cleanup,
217 .test_all = max_map_count_test,
218 };
219