1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (C) 2012 Linux Test Project, Inc.
4 */
5
6 /*
7 * use migrate_pages() and check that address is on correct node
8 * 1. process A can migrate its non-shared mem with CAP_SYS_NICE
9 * 2. process A can migrate its non-shared mem without CAP_SYS_NICE
10 * 3. process A can migrate shared mem only with CAP_SYS_NICE
11 * 4. process A can migrate non-shared mem in process B with same effective uid
12 * 5. process A can migrate non-shared mem in process B with CAP_SYS_NICE
13 */
14 #include <sys/types.h>
15 #include <sys/syscall.h>
16 #include <sys/wait.h>
17 #include <sys/mman.h>
18 #include <sys/prctl.h>
19 #include <errno.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <unistd.h>
23 #include <pwd.h>
24
25 #include "tst_test.h"
26 #include "lapi/syscalls.h"
27 #include "numa_helper.h"
28 #include "migrate_pages_common.h"
29
30 /*
31 * This is an estimated minimum of free mem required to migrate this
32 * process to another node as migrate_pages will fail if there is not
33 * enough free space on node. While running this test on x86_64
34 * it used ~2048 pages (total VM, not just RSS). Considering ia64 as
35 * architecture with largest (non-huge) page size (16k), this limit
36 * is set to 2048*16k == 32M.
37 */
38 #define NODE_MIN_FREEMEM (32*1024*1024)
39
40 #ifdef HAVE_NUMA_V2
41
42 static const char nobody_uid[] = "nobody";
43 static struct passwd *ltpuser;
44 static int *nodes, nodeA, nodeB;
45 static int num_nodes;
46
47 static const char * const save_restore[] = {
48 "?/proc/sys/kernel/numa_balancing",
49 NULL,
50 };
51
print_mem_stats(pid_t pid,int node)52 static void print_mem_stats(pid_t pid, int node)
53 {
54 char s[64];
55 long long node_size, freep;
56
57 if (pid == 0)
58 pid = getpid();
59
60 tst_res(TINFO, "mem_stats pid: %d, node: %d", pid, node);
61
62 /* dump pid's VM info */
63 sprintf(s, "cat /proc/%d/status", pid);
64 system(s);
65 sprintf(s, "cat /proc/%d/numa_maps", pid);
66 system(s);
67
68 /* dump node free mem */
69 node_size = numa_node_size64(node, &freep);
70 tst_res(TINFO, "Node id: %d, size: %lld, free: %lld",
71 node, node_size, freep);
72 }
73
migrate_to_node(pid_t pid,int node)74 static int migrate_to_node(pid_t pid, int node)
75 {
76 unsigned long nodemask_size, max_node;
77 unsigned long *old_nodes, *new_nodes;
78 int i;
79
80 tst_res(TINFO, "pid(%d) migrate pid %d to node -> %d",
81 getpid(), pid, node);
82 max_node = LTP_ALIGN(get_max_node(), sizeof(unsigned long)*8);
83 nodemask_size = max_node / 8;
84 old_nodes = SAFE_MALLOC(nodemask_size);
85 new_nodes = SAFE_MALLOC(nodemask_size);
86
87 memset(old_nodes, 0, nodemask_size);
88 memset(new_nodes, 0, nodemask_size);
89 for (i = 0; i < num_nodes; i++)
90 set_bit(old_nodes, nodes[i], 1);
91 set_bit(new_nodes, node, 1);
92
93 TEST(tst_syscall(__NR_migrate_pages, pid, max_node, old_nodes,
94 new_nodes));
95 if (TST_RET != 0) {
96 if (TST_RET < 0) {
97 tst_res(TFAIL | TERRNO, "migrate_pages failed "
98 "ret: %ld, ", TST_RET);
99 print_mem_stats(pid, node);
100 } else {
101 tst_res(TINFO, "migrate_pages could not migrate all "
102 "pages, not migrated: %ld", TST_RET);
103 }
104 }
105 free(old_nodes);
106 free(new_nodes);
107 return TST_RET;
108 }
109
addr_on_node(void * addr)110 static int addr_on_node(void *addr)
111 {
112 int node;
113 int ret;
114
115 ret = tst_syscall(__NR_get_mempolicy, &node, NULL, (unsigned long)0,
116 (unsigned long)addr, MPOL_F_NODE | MPOL_F_ADDR);
117 if (ret == -1) {
118 tst_res(TFAIL | TERRNO,
119 "error getting memory policy for page %p", addr);
120 }
121 return node;
122 }
123
check_addr_on_node(void * addr,int exp_node)124 static int check_addr_on_node(void *addr, int exp_node)
125 {
126 int node;
127
128 node = addr_on_node(addr);
129 if (node == exp_node) {
130 tst_res(TPASS, "pid(%d) addr %p is on expected node: %d",
131 getpid(), addr, exp_node);
132 return TPASS;
133 } else {
134 tst_res(TFAIL, "pid(%d) addr %p not on expected node: %d "
135 ", expected %d", getpid(), addr, node, exp_node);
136 print_mem_stats(0, exp_node);
137 return TFAIL;
138 }
139 }
140
test_migrate_current_process(int node1,int node2,int cap_sys_nice)141 static void test_migrate_current_process(int node1, int node2, int cap_sys_nice)
142 {
143 char *private, *shared;
144 int ret;
145 pid_t child;
146
147 /* parent can migrate its non-shared memory */
148 tst_res(TINFO, "current_process, cap_sys_nice: %d", cap_sys_nice);
149 private = SAFE_MMAP(NULL, getpagesize(), PROT_READ | PROT_WRITE,
150 MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
151 private[0] = 0;
152 tst_res(TINFO, "private anonymous: %p", private);
153
154 migrate_to_node(0, node2);
155 check_addr_on_node(private, node2);
156 migrate_to_node(0, node1);
157 check_addr_on_node(private, node1);
158 SAFE_MUNMAP(private, getpagesize());
159
160 /* parent can migrate shared memory with CAP_SYS_NICE */
161 shared = SAFE_MMAP(NULL, getpagesize(), PROT_READ | PROT_WRITE,
162 MAP_ANONYMOUS | MAP_SHARED, 0, 0);
163 shared[0] = 1;
164 tst_res(TINFO, "shared anonymous: %p", shared);
165 migrate_to_node(0, node2);
166 check_addr_on_node(shared, node2);
167
168 /* shared mem is on node2, try to migrate in child to node1 */
169 fflush(stdout);
170 child = SAFE_FORK();
171 if (child == 0) {
172 tst_res(TINFO, "child shared anonymous, cap_sys_nice: %d",
173 cap_sys_nice);
174 private = SAFE_MMAP(NULL, getpagesize(),
175 PROT_READ | PROT_WRITE,
176 MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
177 private[0] = 1;
178 shared[0] = 1;
179 if (!cap_sys_nice)
180 SAFE_SETEUID(ltpuser->pw_uid);
181
182 migrate_to_node(0, node1);
183 /* child can migrate non-shared memory */
184 ret = check_addr_on_node(private, node1);
185
186 exit(ret);
187 }
188
189 SAFE_WAITPID(child, NULL, 0);
190 if (cap_sys_nice)
191 /* child can migrate shared memory only
192 * with CAP_SYS_NICE */
193 check_addr_on_node(shared, node1);
194 else
195 check_addr_on_node(shared, node2);
196 SAFE_MUNMAP(shared, getpagesize());
197 }
198
test_migrate_other_process(int node1,int node2,int cap_sys_nice)199 static void test_migrate_other_process(int node1, int node2, int cap_sys_nice)
200 {
201 char *private;
202 int ret;
203 pid_t child1, child2;
204
205 tst_res(TINFO, "other_process, cap_sys_nice: %d", cap_sys_nice);
206
207 fflush(stdout);
208 child1 = SAFE_FORK();
209 if (child1 == 0) {
210 private = SAFE_MMAP(NULL, getpagesize(),
211 PROT_READ | PROT_WRITE,
212 MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
213 private[0] = 0;
214
215 /* make sure we are on node1 */
216 migrate_to_node(0, node1);
217 check_addr_on_node(private, node1);
218
219 SAFE_SETUID(ltpuser->pw_uid);
220
221 /* commit_creds() will clear dumpable, restore it */
222 if (prctl(PR_SET_DUMPABLE, 1))
223 tst_brk(TBROK | TERRNO, "prctl");
224
225 /* signal child2 it's OK to migrate child1 and wait */
226 TST_CHECKPOINT_WAKE(0);
227 TST_CHECKPOINT_WAIT(1);
228
229 /* child2 can migrate child1 process if it's privileged */
230 /* child2 can migrate child1 process if it has same uid */
231 ret = check_addr_on_node(private, node2);
232
233 exit(ret);
234 }
235
236 fflush(stdout);
237 child2 = SAFE_FORK();
238 if (child2 == 0) {
239 if (!cap_sys_nice)
240 SAFE_SETUID(ltpuser->pw_uid);
241
242 /* wait until child1 is ready on node1, then migrate and
243 * signal to check current node */
244 TST_CHECKPOINT_WAIT(0);
245 migrate_to_node(child1, node2);
246 TST_CHECKPOINT_WAKE(1);
247
248 exit(TPASS);
249 }
250
251 SAFE_WAITPID(child1, NULL, 0);
252 SAFE_WAITPID(child2, NULL, 0);
253 }
254
run(void)255 static void run(void)
256 {
257 test_migrate_current_process(nodeA, nodeB, 1);
258 test_migrate_current_process(nodeA, nodeB, 0);
259 test_migrate_other_process(nodeA, nodeB, 1);
260 test_migrate_other_process(nodeA, nodeB, 0);
261 }
262
setup(void)263 static void setup(void)
264 {
265 int ret, i, j;
266 int pagesize = getpagesize();
267 void *p;
268
269 tst_syscall(__NR_migrate_pages, 0, 0, NULL, NULL);
270
271 if (numa_available() == -1)
272 tst_brk(TCONF, "NUMA not available");
273
274 ret = get_allowed_nodes_arr(NH_MEMS, &num_nodes, &nodes);
275 if (ret < 0)
276 tst_brk(TBROK | TERRNO, "get_allowed_nodes(): %d", ret);
277
278 if (num_nodes < 2)
279 tst_brk(TCONF, "at least 2 allowed NUMA nodes"
280 " are required");
281 else if (tst_kvercmp(2, 6, 18) < 0)
282 tst_brk(TCONF, "2.6.18 or greater kernel required");
283
284 FILE_PRINTF("/proc/sys/kernel/numa_balancing", "0");
285 /*
286 * find 2 nodes, which can hold NODE_MIN_FREEMEM bytes
287 * The reason is that:
288 * 1. migrate_pages() is expected to succeed
289 * 2. this test avoids hitting:
290 * Bug 870326 - migrate_pages() reports success, but pages are
291 * not moved to desired node
292 * https://bugzilla.redhat.com/show_bug.cgi?id=870326
293 */
294 nodeA = nodeB = -1;
295 for (i = 0; i < num_nodes; i++) {
296 p = numa_alloc_onnode(NODE_MIN_FREEMEM, nodes[i]);
297 if (p == NULL)
298 break;
299 memset(p, 0xff, NODE_MIN_FREEMEM);
300
301 j = 0;
302 while (j < NODE_MIN_FREEMEM) {
303 if (addr_on_node(p + j) != nodes[i])
304 break;
305 j += pagesize;
306 }
307 numa_free(p, NODE_MIN_FREEMEM);
308
309 if (j >= NODE_MIN_FREEMEM) {
310 if (nodeA == -1)
311 nodeA = nodes[i];
312 else if (nodeB == -1)
313 nodeB = nodes[i];
314 else
315 break;
316 }
317 }
318
319 if (nodeA == -1 || nodeB == -1)
320 tst_brk(TCONF, "at least 2 NUMA nodes with "
321 "free mem > %d are needed", NODE_MIN_FREEMEM);
322 tst_res(TINFO, "Using nodes: %d %d", nodeA, nodeB);
323
324 ltpuser = getpwnam(nobody_uid);
325 if (ltpuser == NULL)
326 tst_brk(TBROK | TERRNO, "getpwnam failed");
327 }
328
329 static struct tst_test test = {
330 .needs_root = 1,
331 .needs_checkpoints = 1,
332 .forks_child = 1,
333 .test_all = run,
334 .setup = setup,
335 .save_restore = save_restore,
336 };
337 #else
338 TST_TEST_TCONF(NUMA_ERROR_MSG);
339 #endif
340