• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *  Copyright (c) 2016-2019 FUJITSU LIMITED. All rights reserved.
4  *  Author: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
5  *  Ported: Guangwen Feng <fenggw-fnst@cn.fujitsu.com>
6  *  Ported: Xiao Yang <yangx.jy@cn.fujitsu.com>
7  *  Ported: Yang Xu <xuyang2018.jy@cn.jujitsu.com>
8  */
9 
10 /*\
11  * [Description]
12  *
13  * *Test 1*
14  *
15  * This is a regression test for the race condition between move_pages()
16  * and freeing hugepages, where move_pages() calls follow_page(FOLL_GET)
17  * for hugepages internally and tries to get its refcount without
18  * preventing concurrent freeing.
19  *
20  * This test can crash the buggy kernel, and the bug was fixed in:
21  *
22  *   commit e66f17ff71772b209eed39de35aaa99ba819c93d
23  *   Author: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
24  *   Date:   Wed Feb 11 15:25:22 2015 -0800
25  *
26  *   mm/hugetlb: take page table lock in follow_huge_pmd()
27  *
28  * *Test 2.1*
29  *
30  * This is a regression test for the race condition, where move_pages()
31  * and soft offline are called on a single hugetlb page concurrently.
32  *
33  * This test can crash the buggy kernel, and was fixed by:
34  *
35  *   commit c9d398fa237882ea07167e23bcfc5e6847066518
36  *   Author: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
37  *   Date:   Fri Mar 31 15:11:55 2017 -0700
38  *
39  *   mm, hugetlb: use pte_present() instead of pmd_present() in follow_huge_pmd()
40  *
41  * *Test 2.2*
42  *
43  * This is also a regression test for an race condition causing SIGBUS
44  * in hugepage migration/fault.
45  *
46  * This bug was fixed by:
47  *
48  *   commit 4643d67e8cb0b3536ef0ab5cddd1cedc73fa14ad
49  *   Author: Mike Kravetz <mike.kravetz@oracle.com>
50  *   Date:   Tue Aug 13 15:38:00 2019 -0700
51  *
52  *   hugetlbfs: fix hugetlb page migration/fault race causing SIGBUS
53  *
54  * *Test 2.3*
55  *
56  * The madvise() in the do_soft_online() was also triggering cases where soft
57  * online returned EIO when page migration failed, which was fixed in:
58  *
59  *    commit 3f4b815a439adfb8f238335612c4b28bc10084d8
60  *    Author: Oscar Salvador <osalvador@suse.de>
61  *    Date:   Mon Dec 14 19:11:51 2020 -0800
62  *
63  *    mm,hwpoison: return -EBUSY when migration fails
64  */
65 
66 #include <errno.h>
67 #include <unistd.h>
68 #include <string.h>
69 #include <stdio.h>
70 #include <sys/types.h>
71 #include <sys/wait.h>
72 
73 #include "tst_test.h"
74 #include "move_pages_support.h"
75 #include "lapi/mmap.h"
76 
77 #ifdef HAVE_NUMA_V2
78 
79 #define LOOPS	1000
80 #define PATH_MEMINFO	"/proc/meminfo"
81 #define PATH_NR_HUGEPAGES	"/proc/sys/vm/nr_hugepages"
82 #define PATH_HUGEPAGES	"/sys/kernel/mm/hugepages/"
83 #define TEST_NODES	2
84 
85 static struct tcase {
86 	int tpages;
87 	int offline;
88 } tcases[] = {
89 	{2, 0},
90 	{2, 1},
91 };
92 
93 static int pgsz, hpsz;
94 static long orig_hugepages = -1;
95 static char path_hugepages_node1[PATH_MAX];
96 static char path_hugepages_node2[PATH_MAX];
97 static long orig_hugepages_node1 = -1;
98 static long orig_hugepages_node2 = -1;
99 static unsigned int node1, node2;
100 static void *addr;
101 
do_soft_offline(int tpgs)102 static int do_soft_offline(int tpgs)
103 {
104 	if (madvise(addr, tpgs * hpsz, MADV_SOFT_OFFLINE) == -1) {
105 		if (errno != EINVAL && errno != EBUSY)
106 			tst_res(TFAIL | TERRNO, "madvise failed");
107 		return errno;
108 	}
109 	return 0;
110 }
111 
do_child(int tpgs)112 static void do_child(int tpgs)
113 {
114 	int test_pages = tpgs * hpsz / pgsz;
115 	int i, j;
116 	int *nodes, *status;
117 	void **pages;
118 	pid_t ppid = getppid();
119 
120 	pages = SAFE_MALLOC(sizeof(char *) * test_pages);
121 	nodes = SAFE_MALLOC(sizeof(int) * test_pages);
122 	status = SAFE_MALLOC(sizeof(int) * test_pages);
123 
124 	for (i = 0; i < test_pages; i++)
125 		pages[i] = addr + i * pgsz;
126 
127 	for (i = 0; ; i++) {
128 		for (j = 0; j < test_pages; j++) {
129 			if (i % 2 == 0)
130 				nodes[j] = node1;
131 			else
132 				nodes[j] = node2;
133 			status[j] = 0;
134 		}
135 
136 		TEST(numa_move_pages(ppid, test_pages,
137 			pages, nodes, status, MPOL_MF_MOVE_ALL));
138 		if (TST_RET < 0) {
139 			if (errno == ENOMEM)
140 				continue;
141 
142 			tst_res(TFAIL | TTERRNO, "move_pages failed");
143 			break;
144 		}
145 	}
146 
147 	exit(0);
148 }
149 
do_test(unsigned int n)150 static void do_test(unsigned int n)
151 {
152 	int i, ret;
153 	void *ptr;
154 	pid_t cpid = -1;
155 	int status;
156 	unsigned int twenty_percent = (tst_timeout_remaining() / 5);
157 
158 	addr = SAFE_MMAP(NULL, tcases[n].tpages * hpsz, PROT_READ | PROT_WRITE,
159 		MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
160 
161 	SAFE_MUNMAP(addr, tcases[n].tpages * hpsz);
162 
163 	cpid = SAFE_FORK();
164 	if (cpid == 0)
165 		do_child(tcases[n].tpages);
166 
167 	for (i = 0; i < LOOPS; i++) {
168 		ptr = mmap(NULL, tcases[n].tpages * hpsz,
169 				PROT_READ | PROT_WRITE,
170 				MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
171 		if (ptr == MAP_FAILED) {
172 			if (i == 0)
173 				tst_brk(TBROK | TERRNO, "Cannot allocate hugepage");
174 
175 			if (errno == ENOMEM) {
176 				usleep(1000);
177 				continue;
178 			}
179 		}
180 
181 		if (ptr != addr)
182 			tst_brk(TBROK, "Failed to mmap at desired addr");
183 
184 		memset(addr, 0, tcases[n].tpages * hpsz);
185 
186 		if (tcases[n].offline) {
187 			ret = do_soft_offline(tcases[n].tpages);
188 
189 			if (ret == EINVAL) {
190 				SAFE_KILL(cpid, SIGKILL);
191 				SAFE_WAITPID(cpid, &status, 0);
192 				SAFE_MUNMAP(addr, tcases[n].tpages * hpsz);
193 				tst_res(TCONF,
194 					"madvise() didn't support MADV_SOFT_OFFLINE");
195 				return;
196 			}
197 		}
198 
199 		SAFE_MUNMAP(addr, tcases[n].tpages * hpsz);
200 
201 		if (tst_timeout_remaining() < twenty_percent)
202 			break;
203 	}
204 
205 	SAFE_KILL(cpid, SIGKILL);
206 	SAFE_WAITPID(cpid, &status, 0);
207 	if (!WIFEXITED(status))
208 		tst_res(TPASS, "Bug not reproduced");
209 }
210 
alloc_free_huge_on_node(unsigned int node,size_t size)211 static void alloc_free_huge_on_node(unsigned int node, size_t size)
212 {
213 	char *mem;
214 	long ret;
215 	struct bitmask *bm;
216 
217 	tst_res(TINFO, "Allocating and freeing %zu hugepages on node %u",
218 		size / hpsz, node);
219 
220 	mem = mmap(NULL, size, PROT_READ | PROT_WRITE,
221 		   MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
222 	if (mem == MAP_FAILED) {
223 		if (errno == ENOMEM)
224 			tst_brk(TCONF, "Cannot allocate huge pages");
225 
226 		tst_brk(TBROK | TERRNO, "mmap(..., MAP_HUGETLB, ...) failed");
227 	}
228 
229 	bm = numa_bitmask_alloc(numa_max_possible_node() + 1);
230 	if (!bm)
231 		tst_brk(TBROK | TERRNO, "numa_bitmask_alloc() failed");
232 
233 	numa_bitmask_setbit(bm, node);
234 
235 	ret = mbind(mem, size, MPOL_BIND, bm->maskp, bm->size + 1, 0);
236 	if (ret) {
237 		if (errno == ENOMEM)
238 			tst_brk(TCONF, "Cannot mbind huge pages");
239 
240 		tst_brk(TBROK | TERRNO, "mbind() failed");
241 	}
242 
243 	TEST(mlock(mem, size));
244 	if (TST_RET) {
245 		SAFE_MUNMAP(mem, size);
246 		if (TST_ERR == ENOMEM || TST_ERR == EAGAIN)
247 			tst_brk(TCONF, "Cannot lock huge pages");
248 		tst_brk(TBROK | TTERRNO, "mlock failed");
249 	}
250 
251 	numa_bitmask_free(bm);
252 
253 	SAFE_MUNMAP(mem, size);
254 }
255 
setup(void)256 static void setup(void)
257 {
258 	int ret;
259 	long memfree;
260 
261 	check_config(TEST_NODES);
262 
263 	if (access(PATH_HUGEPAGES, F_OK))
264 		tst_brk(TCONF, "Huge page not supported");
265 
266 	ret = get_allowed_nodes(NH_MEMS, TEST_NODES, &node1, &node2);
267 	if (ret < 0)
268 		tst_brk(TBROK | TERRNO, "get_allowed_nodes: %d", ret);
269 
270 	pgsz = (int)get_page_size();
271 	SAFE_FILE_LINES_SCANF(PATH_MEMINFO, "Hugepagesize: %d", &hpsz);
272 
273 	SAFE_FILE_LINES_SCANF(PATH_MEMINFO, "MemFree: %ld", &memfree);
274 	tst_res(TINFO, "Free RAM %ld kB", memfree);
275 
276 	if (4 * hpsz > memfree)
277 		tst_brk(TBROK, "Not enough free RAM");
278 
279 	snprintf(path_hugepages_node1, sizeof(path_hugepages_node1),
280 		 "/sys/devices/system/node/node%u/hugepages/hugepages-%dkB/nr_hugepages",
281 		 node1, hpsz);
282 
283 	snprintf(path_hugepages_node2, sizeof(path_hugepages_node2),
284 		 "/sys/devices/system/node/node%u/hugepages/hugepages-%dkB/nr_hugepages",
285 		 node2, hpsz);
286 
287 	if (!access(path_hugepages_node1, F_OK)) {
288 		SAFE_FILE_SCANF(path_hugepages_node1,
289 				"%ld", &orig_hugepages_node1);
290 		tst_res(TINFO,
291 			"Increasing %dkB hugepages pool on node %u to %ld",
292 			hpsz, node1, orig_hugepages_node1 + 4);
293 		SAFE_FILE_PRINTF(path_hugepages_node1,
294 				 "%ld", orig_hugepages_node1 + 4);
295 	}
296 
297 	if (!access(path_hugepages_node2, F_OK)) {
298 		SAFE_FILE_SCANF(path_hugepages_node2,
299 				"%ld", &orig_hugepages_node2);
300 		tst_res(TINFO,
301 			"Increasing %dkB hugepages pool on node %u to %ld",
302 			hpsz, node2, orig_hugepages_node2 + 4);
303 		SAFE_FILE_PRINTF(path_hugepages_node2,
304 				 "%ld", orig_hugepages_node2 + 4);
305 	}
306 
307 	hpsz *= 1024;
308 
309 	if (orig_hugepages_node1 == -1 || orig_hugepages_node2 == -1) {
310 		SAFE_FILE_SCANF(PATH_NR_HUGEPAGES, "%ld", &orig_hugepages);
311 		tst_res(TINFO, "Increasing global hugepages pool to %ld",
312 			orig_hugepages + 8);
313 		SAFE_FILE_PRINTF(PATH_NR_HUGEPAGES, "%ld", orig_hugepages + 8);
314 	}
315 
316 	alloc_free_huge_on_node(node1, 4L * hpsz);
317 	alloc_free_huge_on_node(node2, 4L * hpsz);
318 }
319 
cleanup(void)320 static void cleanup(void)
321 {
322 	if (orig_hugepages != -1)
323 		SAFE_FILE_PRINTF(PATH_NR_HUGEPAGES, "%ld", orig_hugepages);
324 
325 	if (orig_hugepages_node1 != -1) {
326 		SAFE_FILE_PRINTF(path_hugepages_node1,
327 				 "%ld", orig_hugepages_node1);
328 	}
329 
330 	if (orig_hugepages_node2 != -1) {
331 		SAFE_FILE_PRINTF(path_hugepages_node2,
332 				 "%ld", orig_hugepages_node2);
333 	}
334 }
335 
336 static struct tst_test test = {
337 	.min_kver = "2.6.32",
338 	.needs_root = 1,
339 	.forks_child = 1,
340 	.setup = setup,
341 	.cleanup = cleanup,
342 	.test = do_test,
343 	.tcnt = ARRAY_SIZE(tcases),
344 	.tags = (const struct tst_tag[]) {
345 		{"linux-git", "e66f17ff7177"},
346 		{"linux-git", "c9d398fa2378"},
347 		{"linux-git", "4643d67e8cb0"},
348 		{"linux-git", "3f4b815a439a"},
349 		{}
350 	}
351 };
352 
353 #else
354 	TST_TEST_TCONF(NUMA_ERROR_MSG);
355 #endif
356