• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (C) 2012 Linux Test Project, Inc.
4  */
5 
6 /*
7  * use migrate_pages() and check that address is on correct node
8  * 1. process A can migrate its non-shared mem with CAP_SYS_NICE
9  * 2. process A can migrate its non-shared mem without CAP_SYS_NICE
10  * 3. process A can migrate shared mem only with CAP_SYS_NICE
11  * 4. process A can migrate non-shared mem in process B with same effective uid
12  * 5. process A can migrate non-shared mem in process B with CAP_SYS_NICE
13  */
14 #include <sys/types.h>
15 #include <sys/syscall.h>
16 #include <sys/wait.h>
17 #include <sys/mman.h>
18 #include <sys/prctl.h>
19 #include <errno.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <unistd.h>
23 #include <pwd.h>
24 
25 #include "tst_test.h"
26 #include "lapi/syscalls.h"
27 #include "numa_helper.h"
28 #include "migrate_pages_common.h"
29 
30 /*
31  * This is an estimated minimum of free mem required to migrate this
32  * process to another node as migrate_pages will fail if there is not
33  * enough free space on node. While running this test on x86_64
34  * it used ~2048 pages (total VM, not just RSS). Considering ia64 as
35  * architecture with largest (non-huge) page size (16k), this limit
36  * is set to 2048*16k == 32M.
37  */
38 #define NODE_MIN_FREEMEM (32*1024*1024)
39 
40 #ifdef HAVE_NUMA_V2
41 
42 static const char nobody_uid[] = "nobody";
43 static struct passwd *ltpuser;
44 static int *nodes, nodeA, nodeB;
45 static int num_nodes;
46 
47 static const char * const save_restore[] = {
48 	"?/proc/sys/kernel/numa_balancing",
49 	NULL,
50 };
51 
print_mem_stats(pid_t pid,int node)52 static void print_mem_stats(pid_t pid, int node)
53 {
54 	char s[64];
55 	long long node_size, freep;
56 
57 	if (pid == 0)
58 		pid = getpid();
59 
60 	tst_res(TINFO, "mem_stats pid: %d, node: %d", pid, node);
61 
62 	/* dump pid's VM info */
63 	sprintf(s, "cat /proc/%d/status", pid);
64 	system(s);
65 	sprintf(s, "cat /proc/%d/numa_maps", pid);
66 	system(s);
67 
68 	/* dump node free mem */
69 	node_size = numa_node_size64(node, &freep);
70 	tst_res(TINFO, "Node id: %d, size: %lld, free: %lld",
71 		 node, node_size, freep);
72 }
73 
migrate_to_node(pid_t pid,int node)74 static int migrate_to_node(pid_t pid, int node)
75 {
76 	unsigned long nodemask_size, max_node;
77 	unsigned long *old_nodes, *new_nodes;
78 	int i;
79 
80 	tst_res(TINFO, "pid(%d) migrate pid %d to node -> %d",
81 		 getpid(), pid, node);
82 	max_node = LTP_ALIGN(get_max_node(), sizeof(unsigned long)*8);
83 	nodemask_size = max_node / 8;
84 	old_nodes = SAFE_MALLOC(nodemask_size);
85 	new_nodes = SAFE_MALLOC(nodemask_size);
86 
87 	memset(old_nodes, 0, nodemask_size);
88 	memset(new_nodes, 0, nodemask_size);
89 	for (i = 0; i < num_nodes; i++)
90 		set_bit(old_nodes, nodes[i], 1);
91 	set_bit(new_nodes, node, 1);
92 
93 	TEST(tst_syscall(__NR_migrate_pages, pid, max_node, old_nodes,
94 		new_nodes));
95 	if (TST_RET != 0) {
96 		if (TST_RET < 0) {
97 			tst_res(TFAIL | TERRNO, "migrate_pages failed "
98 				 "ret: %ld, ", TST_RET);
99 			print_mem_stats(pid, node);
100 		} else {
101 			tst_res(TINFO, "migrate_pages could not migrate all "
102 				 "pages, not migrated: %ld", TST_RET);
103 		}
104 	}
105 	free(old_nodes);
106 	free(new_nodes);
107 	return TST_RET;
108 }
109 
addr_on_node(void * addr)110 static int addr_on_node(void *addr)
111 {
112 	int node;
113 	int ret;
114 
115 	ret = tst_syscall(__NR_get_mempolicy, &node, NULL, (unsigned long)0,
116 		      (unsigned long)addr, MPOL_F_NODE | MPOL_F_ADDR);
117 	if (ret == -1) {
118 		tst_res(TFAIL | TERRNO,
119 				"error getting memory policy for page %p", addr);
120 	}
121 	return node;
122 }
123 
check_addr_on_node(void * addr,int exp_node)124 static int check_addr_on_node(void *addr, int exp_node)
125 {
126 	int node;
127 
128 	node = addr_on_node(addr);
129 	if (node == exp_node) {
130 		tst_res(TPASS, "pid(%d) addr %p is on expected node: %d",
131 			 getpid(), addr, exp_node);
132 		return TPASS;
133 	} else {
134 		tst_res(TFAIL, "pid(%d) addr %p not on expected node: %d "
135 			 ", expected %d", getpid(), addr, node, exp_node);
136 		print_mem_stats(0, exp_node);
137 		return TFAIL;
138 	}
139 }
140 
test_migrate_current_process(int node1,int node2,int cap_sys_nice)141 static void test_migrate_current_process(int node1, int node2, int cap_sys_nice)
142 {
143 	char *private, *shared;
144 	int ret;
145 	pid_t child;
146 
147 	/* parent can migrate its non-shared memory */
148 	tst_res(TINFO, "current_process, cap_sys_nice: %d", cap_sys_nice);
149 	private =  SAFE_MMAP(NULL, getpagesize(), PROT_READ | PROT_WRITE,
150 		MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
151 	private[0] = 0;
152 	tst_res(TINFO, "private anonymous: %p", private);
153 
154 	migrate_to_node(0, node2);
155 	check_addr_on_node(private, node2);
156 	migrate_to_node(0, node1);
157 	check_addr_on_node(private, node1);
158 	SAFE_MUNMAP(private, getpagesize());
159 
160 	/* parent can migrate shared memory with CAP_SYS_NICE */
161 	shared = SAFE_MMAP(NULL, getpagesize(), PROT_READ | PROT_WRITE,
162 		      MAP_ANONYMOUS | MAP_SHARED, 0, 0);
163 	shared[0] = 1;
164 	tst_res(TINFO, "shared anonymous: %p", shared);
165 	migrate_to_node(0, node2);
166 	check_addr_on_node(shared, node2);
167 
168 	/* shared mem is on node2, try to migrate in child to node1 */
169 	fflush(stdout);
170 	child = SAFE_FORK();
171 	if (child == 0) {
172 		tst_res(TINFO, "child shared anonymous, cap_sys_nice: %d",
173 			 cap_sys_nice);
174 		private =  SAFE_MMAP(NULL, getpagesize(),
175 			PROT_READ | PROT_WRITE,
176 			MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
177 		private[0] = 1;
178 		shared[0] = 1;
179 		if (!cap_sys_nice)
180 			SAFE_SETEUID(ltpuser->pw_uid);
181 
182 		migrate_to_node(0, node1);
183 		/* child can migrate non-shared memory */
184 		ret = check_addr_on_node(private, node1);
185 
186 		exit(ret);
187 	}
188 
189 	SAFE_WAITPID(child, NULL, 0);
190 	if (cap_sys_nice)
191 		/* child can migrate shared memory only
192 		 * with CAP_SYS_NICE */
193 		check_addr_on_node(shared, node1);
194 	else
195 		check_addr_on_node(shared, node2);
196 	SAFE_MUNMAP(shared, getpagesize());
197 }
198 
test_migrate_other_process(int node1,int node2,int cap_sys_nice)199 static void test_migrate_other_process(int node1, int node2, int cap_sys_nice)
200 {
201 	char *private;
202 	int ret;
203 	pid_t child1, child2;
204 
205 	tst_res(TINFO, "other_process, cap_sys_nice: %d", cap_sys_nice);
206 
207 	fflush(stdout);
208 	child1 = SAFE_FORK();
209 	if (child1 == 0) {
210 		private =  SAFE_MMAP(NULL, getpagesize(),
211 			PROT_READ | PROT_WRITE,
212 			MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
213 		private[0] = 0;
214 
215 		/* make sure we are on node1 */
216 		migrate_to_node(0, node1);
217 		check_addr_on_node(private, node1);
218 
219 		SAFE_SETUID(ltpuser->pw_uid);
220 
221 		/* commit_creds() will clear dumpable, restore it */
222 		if (prctl(PR_SET_DUMPABLE, 1))
223 			tst_brk(TBROK | TERRNO, "prctl");
224 
225 		/* signal child2 it's OK to migrate child1 and wait */
226 		TST_CHECKPOINT_WAKE(0);
227 		TST_CHECKPOINT_WAIT(1);
228 
229 		/* child2 can migrate child1 process if it's privileged */
230 		/* child2 can migrate child1 process if it has same uid */
231 		ret = check_addr_on_node(private, node2);
232 
233 		exit(ret);
234 	}
235 
236 	fflush(stdout);
237 	child2 = SAFE_FORK();
238 	if (child2 == 0) {
239 		if (!cap_sys_nice)
240 			SAFE_SETUID(ltpuser->pw_uid);
241 
242 		/* wait until child1 is ready on node1, then migrate and
243 		 * signal to check current node */
244 		TST_CHECKPOINT_WAIT(0);
245 		migrate_to_node(child1, node2);
246 		TST_CHECKPOINT_WAKE(1);
247 
248 		exit(TPASS);
249 	}
250 
251 	SAFE_WAITPID(child1, NULL, 0);
252 	SAFE_WAITPID(child2, NULL, 0);
253 }
254 
run(void)255 static void run(void)
256 {
257 	test_migrate_current_process(nodeA, nodeB, 1);
258 	test_migrate_current_process(nodeA, nodeB, 0);
259 	test_migrate_other_process(nodeA, nodeB, 1);
260 	test_migrate_other_process(nodeA, nodeB, 0);
261 }
262 
setup(void)263 static void setup(void)
264 {
265 	int ret, i, j;
266 	int pagesize = getpagesize();
267 	void *p;
268 
269 	tst_syscall(__NR_migrate_pages, 0, 0, NULL, NULL);
270 
271 	if (numa_available() == -1)
272 		tst_brk(TCONF, "NUMA not available");
273 
274 	ret = get_allowed_nodes_arr(NH_MEMS, &num_nodes, &nodes);
275 	if (ret < 0)
276 		tst_brk(TBROK | TERRNO, "get_allowed_nodes(): %d", ret);
277 
278 	if (num_nodes < 2)
279 		tst_brk(TCONF, "at least 2 allowed NUMA nodes"
280 			 " are required");
281 	else if (tst_kvercmp(2, 6, 18) < 0)
282 		tst_brk(TCONF, "2.6.18 or greater kernel required");
283 
284 	FILE_PRINTF("/proc/sys/kernel/numa_balancing", "0");
285 	/*
286 	 * find 2 nodes, which can hold NODE_MIN_FREEMEM bytes
287 	 * The reason is that:
288 	 * 1. migrate_pages() is expected to succeed
289 	 * 2. this test avoids hitting:
290 	 *    Bug 870326 - migrate_pages() reports success, but pages are
291 	 *                 not moved to desired node
292 	 *    https://bugzilla.redhat.com/show_bug.cgi?id=870326
293 	 */
294 	nodeA = nodeB = -1;
295 	for (i = 0; i < num_nodes; i++) {
296 		p = numa_alloc_onnode(NODE_MIN_FREEMEM, nodes[i]);
297 		if (p == NULL)
298 			break;
299 		memset(p, 0xff, NODE_MIN_FREEMEM);
300 
301 		j = 0;
302 		while (j < NODE_MIN_FREEMEM) {
303 			if (addr_on_node(p + j) != nodes[i])
304 				break;
305 			j += pagesize;
306 		}
307 		numa_free(p, NODE_MIN_FREEMEM);
308 
309 		if (j >= NODE_MIN_FREEMEM) {
310 			if (nodeA == -1)
311 				nodeA = nodes[i];
312 			else if (nodeB == -1)
313 				nodeB = nodes[i];
314 			else
315 				break;
316 		}
317 	}
318 
319 	if (nodeA == -1 || nodeB == -1)
320 		tst_brk(TCONF, "at least 2 NUMA nodes with "
321 			 "free mem > %d are needed", NODE_MIN_FREEMEM);
322 	tst_res(TINFO, "Using nodes: %d %d", nodeA, nodeB);
323 
324 	ltpuser = getpwnam(nobody_uid);
325 	if (ltpuser == NULL)
326 		tst_brk(TBROK | TERRNO, "getpwnam failed");
327 }
328 
329 static struct tst_test test = {
330 	.needs_root = 1,
331 	.needs_checkpoints = 1,
332 	.forks_child = 1,
333 	.test_all = run,
334 	.setup = setup,
335 	.save_restore = save_restore,
336 };
337 #else
338 TST_TEST_TCONF(NUMA_ERROR_MSG);
339 #endif
340