1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (c) 2016-2019 FUJITSU LIMITED. All rights reserved.
4 * Author: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
5 * Ported: Guangwen Feng <fenggw-fnst@cn.fujitsu.com>
6 * Ported: Xiao Yang <yangx.jy@cn.fujitsu.com>
7 * Ported: Yang Xu <xuyang2018.jy@cn.jujitsu.com>
8 */
9
10 /*\
11 * [Description]
12 *
13 * *Test 1*
14 *
15 * This is a regression test for the race condition between move_pages()
16 * and freeing hugepages, where move_pages() calls follow_page(FOLL_GET)
17 * for hugepages internally and tries to get its refcount without
18 * preventing concurrent freeing.
19 *
20 * This test can crash the buggy kernel, and the bug was fixed in:
21 *
22 * commit e66f17ff71772b209eed39de35aaa99ba819c93d
23 * Author: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
24 * Date: Wed Feb 11 15:25:22 2015 -0800
25 *
26 * mm/hugetlb: take page table lock in follow_huge_pmd()
27 *
28 * *Test 2.1*
29 *
30 * This is a regression test for the race condition, where move_pages()
31 * and soft offline are called on a single hugetlb page concurrently.
32 *
33 * This test can crash the buggy kernel, and was fixed by:
34 *
35 * commit c9d398fa237882ea07167e23bcfc5e6847066518
36 * Author: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
37 * Date: Fri Mar 31 15:11:55 2017 -0700
38 *
39 * mm, hugetlb: use pte_present() instead of pmd_present() in follow_huge_pmd()
40 *
41 * *Test 2.2*
42 *
43 * This is also a regression test for an race condition causing SIGBUS
44 * in hugepage migration/fault.
45 *
46 * This bug was fixed by:
47 *
48 * commit 4643d67e8cb0b3536ef0ab5cddd1cedc73fa14ad
49 * Author: Mike Kravetz <mike.kravetz@oracle.com>
50 * Date: Tue Aug 13 15:38:00 2019 -0700
51 *
52 * hugetlbfs: fix hugetlb page migration/fault race causing SIGBUS
53 *
54 * *Test 2.3*
55 *
56 * The madvise() in the do_soft_online() was also triggering cases where soft
57 * online returned EIO when page migration failed, which was fixed in:
58 *
59 * commit 3f4b815a439adfb8f238335612c4b28bc10084d8
60 * Author: Oscar Salvador <osalvador@suse.de>
61 * Date: Mon Dec 14 19:11:51 2020 -0800
62 *
63 * mm,hwpoison: return -EBUSY when migration fails
64 */
65
66 #include <errno.h>
67 #include <unistd.h>
68 #include <string.h>
69 #include <stdio.h>
70 #include <sys/types.h>
71 #include <sys/wait.h>
72
73 #include "tst_test.h"
74 #include "move_pages_support.h"
75 #include "lapi/mmap.h"
76
77 #ifdef HAVE_NUMA_V2
78
79 #define LOOPS 1000
80 #define PATH_MEMINFO "/proc/meminfo"
81 #define PATH_NR_HUGEPAGES "/proc/sys/vm/nr_hugepages"
82 #define PATH_HUGEPAGES "/sys/kernel/mm/hugepages/"
83 #define TEST_NODES 2
84
85 static struct tcase {
86 int tpages;
87 int offline;
88 } tcases[] = {
89 {2, 0},
90 {2, 1},
91 };
92
93 static int pgsz, hpsz;
94 static long orig_hugepages = -1;
95 static char path_hugepages_node1[PATH_MAX];
96 static char path_hugepages_node2[PATH_MAX];
97 static long orig_hugepages_node1 = -1;
98 static long orig_hugepages_node2 = -1;
99 static unsigned int node1, node2;
100 static void *addr;
101
do_soft_offline(int tpgs)102 static int do_soft_offline(int tpgs)
103 {
104 if (madvise(addr, tpgs * hpsz, MADV_SOFT_OFFLINE) == -1) {
105 if (errno != EINVAL && errno != EBUSY)
106 tst_res(TFAIL | TERRNO, "madvise failed");
107 return errno;
108 }
109 return 0;
110 }
111
do_child(int tpgs)112 static void do_child(int tpgs)
113 {
114 int test_pages = tpgs * hpsz / pgsz;
115 int i, j;
116 int *nodes, *status;
117 void **pages;
118 pid_t ppid = getppid();
119
120 pages = SAFE_MALLOC(sizeof(char *) * test_pages);
121 nodes = SAFE_MALLOC(sizeof(int) * test_pages);
122 status = SAFE_MALLOC(sizeof(int) * test_pages);
123
124 for (i = 0; i < test_pages; i++)
125 pages[i] = addr + i * pgsz;
126
127 for (i = 0; ; i++) {
128 for (j = 0; j < test_pages; j++) {
129 if (i % 2 == 0)
130 nodes[j] = node1;
131 else
132 nodes[j] = node2;
133 status[j] = 0;
134 }
135
136 TEST(numa_move_pages(ppid, test_pages,
137 pages, nodes, status, MPOL_MF_MOVE_ALL));
138 if (TST_RET < 0) {
139 if (errno == ENOMEM)
140 continue;
141
142 tst_res(TFAIL | TTERRNO, "move_pages failed");
143 break;
144 }
145 }
146
147 exit(0);
148 }
149
do_test(unsigned int n)150 static void do_test(unsigned int n)
151 {
152 int i, ret;
153 void *ptr;
154 pid_t cpid = -1;
155 int status;
156
157 addr = SAFE_MMAP(NULL, tcases[n].tpages * hpsz, PROT_READ | PROT_WRITE,
158 MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
159
160 SAFE_MUNMAP(addr, tcases[n].tpages * hpsz);
161
162 cpid = SAFE_FORK();
163 if (cpid == 0)
164 do_child(tcases[n].tpages);
165
166 for (i = 0; i < LOOPS; i++) {
167 ptr = mmap(NULL, tcases[n].tpages * hpsz,
168 PROT_READ | PROT_WRITE,
169 MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
170 if (ptr == MAP_FAILED) {
171 if (i == 0)
172 tst_brk(TBROK | TERRNO, "Cannot allocate hugepage");
173
174 if (errno == ENOMEM) {
175 usleep(1000);
176 continue;
177 }
178 }
179
180 if (ptr != addr)
181 tst_brk(TBROK, "Failed to mmap at desired addr");
182
183 memset(addr, 0, tcases[n].tpages * hpsz);
184
185 if (tcases[n].offline) {
186 ret = do_soft_offline(tcases[n].tpages);
187
188 if (ret == EINVAL) {
189 SAFE_KILL(cpid, SIGKILL);
190 SAFE_WAITPID(cpid, &status, 0);
191 SAFE_MUNMAP(addr, tcases[n].tpages * hpsz);
192 tst_res(TCONF,
193 "madvise() didn't support MADV_SOFT_OFFLINE");
194 return;
195 }
196 }
197
198 SAFE_MUNMAP(addr, tcases[n].tpages * hpsz);
199
200 if (!tst_remaining_runtime())
201 break;
202 }
203
204 SAFE_KILL(cpid, SIGKILL);
205 SAFE_WAITPID(cpid, &status, 0);
206 if (!WIFEXITED(status))
207 tst_res(TPASS, "Bug not reproduced");
208 }
209
alloc_free_huge_on_node(unsigned int node,size_t size)210 static void alloc_free_huge_on_node(unsigned int node, size_t size)
211 {
212 char *mem;
213 long ret;
214 struct bitmask *bm;
215
216 tst_res(TINFO, "Allocating and freeing %zu hugepages on node %u",
217 size / hpsz, node);
218
219 mem = mmap(NULL, size, PROT_READ | PROT_WRITE,
220 MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
221 if (mem == MAP_FAILED) {
222 if (errno == ENOMEM)
223 tst_brk(TCONF, "Cannot allocate huge pages");
224
225 tst_brk(TBROK | TERRNO, "mmap(..., MAP_HUGETLB, ...) failed");
226 }
227
228 bm = numa_bitmask_alloc(numa_max_possible_node() + 1);
229 if (!bm)
230 tst_brk(TBROK | TERRNO, "numa_bitmask_alloc() failed");
231
232 numa_bitmask_setbit(bm, node);
233
234 ret = mbind(mem, size, MPOL_BIND, bm->maskp, bm->size + 1, 0);
235 if (ret) {
236 if (errno == ENOMEM)
237 tst_brk(TCONF, "Cannot mbind huge pages");
238
239 tst_brk(TBROK | TERRNO, "mbind() failed");
240 }
241
242 TEST(mlock(mem, size));
243 if (TST_RET) {
244 SAFE_MUNMAP(mem, size);
245 if (TST_ERR == ENOMEM || TST_ERR == EAGAIN)
246 tst_brk(TCONF, "Cannot lock huge pages");
247 tst_brk(TBROK | TTERRNO, "mlock failed");
248 }
249
250 numa_bitmask_free(bm);
251
252 SAFE_MUNMAP(mem, size);
253 }
254
setup(void)255 static void setup(void)
256 {
257 int ret;
258 long memfree;
259
260 check_config(TEST_NODES);
261
262 if (access(PATH_HUGEPAGES, F_OK))
263 tst_brk(TCONF, "Huge page not supported");
264
265 ret = get_allowed_nodes(NH_MEMS, TEST_NODES, &node1, &node2);
266 if (ret < 0)
267 tst_brk(TBROK | TERRNO, "get_allowed_nodes: %d", ret);
268
269 pgsz = (int)get_page_size();
270 SAFE_FILE_LINES_SCANF(PATH_MEMINFO, "Hugepagesize: %d", &hpsz);
271
272 SAFE_FILE_LINES_SCANF(PATH_MEMINFO, "MemFree: %ld", &memfree);
273 tst_res(TINFO, "Free RAM %ld kB", memfree);
274
275 if (4 * hpsz > memfree)
276 tst_brk(TBROK, "Not enough free RAM");
277
278 snprintf(path_hugepages_node1, sizeof(path_hugepages_node1),
279 "/sys/devices/system/node/node%u/hugepages/hugepages-%dkB/nr_hugepages",
280 node1, hpsz);
281
282 snprintf(path_hugepages_node2, sizeof(path_hugepages_node2),
283 "/sys/devices/system/node/node%u/hugepages/hugepages-%dkB/nr_hugepages",
284 node2, hpsz);
285
286 if (!access(path_hugepages_node1, F_OK)) {
287 SAFE_FILE_SCANF(path_hugepages_node1,
288 "%ld", &orig_hugepages_node1);
289 tst_res(TINFO,
290 "Increasing %dkB hugepages pool on node %u to %ld",
291 hpsz, node1, orig_hugepages_node1 + 4);
292 SAFE_FILE_PRINTF(path_hugepages_node1,
293 "%ld", orig_hugepages_node1 + 4);
294 }
295
296 if (!access(path_hugepages_node2, F_OK)) {
297 SAFE_FILE_SCANF(path_hugepages_node2,
298 "%ld", &orig_hugepages_node2);
299 tst_res(TINFO,
300 "Increasing %dkB hugepages pool on node %u to %ld",
301 hpsz, node2, orig_hugepages_node2 + 4);
302 SAFE_FILE_PRINTF(path_hugepages_node2,
303 "%ld", orig_hugepages_node2 + 4);
304 }
305
306 hpsz *= 1024;
307
308 if (orig_hugepages_node1 == -1 || orig_hugepages_node2 == -1) {
309 SAFE_FILE_SCANF(PATH_NR_HUGEPAGES, "%ld", &orig_hugepages);
310 tst_res(TINFO, "Increasing global hugepages pool to %ld",
311 orig_hugepages + 8);
312 SAFE_FILE_PRINTF(PATH_NR_HUGEPAGES, "%ld", orig_hugepages + 8);
313 }
314
315 alloc_free_huge_on_node(node1, 4L * hpsz);
316 alloc_free_huge_on_node(node2, 4L * hpsz);
317 }
318
cleanup(void)319 static void cleanup(void)
320 {
321 if (orig_hugepages != -1)
322 SAFE_FILE_PRINTF(PATH_NR_HUGEPAGES, "%ld", orig_hugepages);
323
324 if (orig_hugepages_node1 != -1) {
325 SAFE_FILE_PRINTF(path_hugepages_node1,
326 "%ld", orig_hugepages_node1);
327 }
328
329 if (orig_hugepages_node2 != -1) {
330 SAFE_FILE_PRINTF(path_hugepages_node2,
331 "%ld", orig_hugepages_node2);
332 }
333 }
334
335 static struct tst_test test = {
336 .needs_root = 1,
337 .forks_child = 1,
338 .setup = setup,
339 .cleanup = cleanup,
340 .test = do_test,
341 .tcnt = ARRAY_SIZE(tcases),
342 .max_runtime = 240,
343 .tags = (const struct tst_tag[]) {
344 {"linux-git", "e66f17ff7177"},
345 {"linux-git", "c9d398fa2378"},
346 {"linux-git", "4643d67e8cb0"},
347 {"linux-git", "3f4b815a439a"},
348 {}
349 }
350 };
351
352 #else
353 TST_TEST_TCONF(NUMA_ERROR_MSG);
354 #endif
355