1 /*
2 * Copyright (C) 2012 Linux Test Project, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it
13 * is free of the rightful claim of any third person regarding
14 * infringement or the like. Any license provided herein, whether
15 * implied or otherwise, applies only to this software file. Patent
16 * licenses, if any, provided herein do not apply to combinations of
17 * this program with other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 * 02110-1301, USA.
23 */
24
25 /*
26 * use migrate_pages() and check that address is on correct node
27 * 1. process A can migrate its non-shared mem with CAP_SYS_NICE
28 * 2. process A can migrate its non-shared mem without CAP_SYS_NICE
29 * 3. process A can migrate shared mem only with CAP_SYS_NICE
30 * 4. process A can migrate non-shared mem in process B with same effective uid
31 * 5. process A can migrate non-shared mem in process B with CAP_SYS_NICE
32 */
33 #include <sys/types.h>
34 #include <sys/syscall.h>
35 #include <sys/wait.h>
36 #include <sys/mman.h>
37 #include <errno.h>
38 #if HAVE_NUMA_H
39 #include <numa.h>
40 #endif
41 #if HAVE_NUMAIF_H
42 #include <numaif.h>
43 #endif
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <unistd.h>
47 #include <pwd.h>
48 #include "config.h"
49 #include "test.h"
50 #include "safe_macros.h"
51 #include "linux_syscall_numbers.h"
52 #include "numa_helper.h"
53 #include "migrate_pages_common.h"
54
55 /*
56 * This is an estimated minimum of free mem required to migrate this
57 * process to another node as migrate_pages will fail if there is not
58 * enough free space on node. While running this test on x86_64
59 * it used ~2048 pages (total VM, not just RSS). Considering ia64 as
60 * architecture with largest (non-huge) page size (16k), this limit
61 * is set to 2048*16k == 32M.
62 */
63 #define NODE_MIN_FREEMEM (32*1024*1024)
64
65 char *TCID = "migrate_pages02";
66 int TST_TOTAL = 1;
67
68 #if defined(__NR_migrate_pages) && HAVE_NUMA_H && HAVE_NUMAIF_H
69 static const char nobody_uid[] = "nobody";
70 static struct passwd *ltpuser;
71 static int *nodes, nodeA, nodeB;
72 static int num_nodes;
73
74 static void setup(void);
75 static void cleanup(void);
76
77 option_t options[] = {
78 {NULL, NULL, NULL}
79 };
80
print_mem_stats(pid_t pid,int node)81 static void print_mem_stats(pid_t pid, int node)
82 {
83 char s[64];
84 long long node_size, freep;
85
86 if (pid == 0)
87 pid = getpid();
88
89 tst_resm(TINFO, "mem_stats pid: %d, node: %d", pid, node);
90
91 /* dump pid's VM info */
92 sprintf(s, "cat /proc/%d/status", pid);
93 system(s);
94 sprintf(s, "cat /proc/%d/numa_maps", pid);
95 system(s);
96
97 /* dump node free mem */
98 node_size = numa_node_size64(node, &freep);
99 tst_resm(TINFO, "Node id: %d, size: %lld, free: %lld",
100 node, node_size, freep);
101 }
102
migrate_to_node(pid_t pid,int node)103 static int migrate_to_node(pid_t pid, int node)
104 {
105 unsigned long nodemask_size, max_node;
106 unsigned long *old_nodes, *new_nodes;
107 int i;
108
109 tst_resm(TINFO, "pid(%d) migrate pid %d to node -> %d",
110 getpid(), pid, node);
111 max_node = LTP_ALIGN(get_max_node(), sizeof(unsigned long)*8);
112 nodemask_size = max_node / 8;
113 old_nodes = SAFE_MALLOC(NULL, nodemask_size);
114 new_nodes = SAFE_MALLOC(NULL, nodemask_size);
115
116 memset(old_nodes, 0, nodemask_size);
117 memset(new_nodes, 0, nodemask_size);
118 for (i = 0; i < num_nodes; i++)
119 set_bit(old_nodes, nodes[i], 1);
120 set_bit(new_nodes, node, 1);
121
122 TEST(ltp_syscall(__NR_migrate_pages, pid, max_node, old_nodes,
123 new_nodes));
124 if (TEST_RETURN != 0) {
125 if (TEST_RETURN < 0)
126 tst_resm(TFAIL | TERRNO, "migrate_pages failed "
127 "ret: %ld, ", TEST_RETURN);
128 else
129 tst_resm(TWARN, "migrate_pages could not migrate all "
130 "pages, not migrated: %ld", TEST_RETURN);
131 print_mem_stats(pid, node);
132 }
133 free(old_nodes);
134 free(new_nodes);
135 return TEST_RETURN;
136 }
137
addr_on_node(void * addr)138 static int addr_on_node(void *addr)
139 {
140 int node;
141 int ret;
142
143 ret = ltp_syscall(__NR_get_mempolicy, &node, NULL, (unsigned long)0,
144 (unsigned long)addr, MPOL_F_NODE | MPOL_F_ADDR);
145 if (ret == -1) {
146 tst_resm(TBROK | TERRNO, "error getting memory policy "
147 "for page %p", addr);
148 }
149 return node;
150 }
151
check_addr_on_node(void * addr,int exp_node)152 static int check_addr_on_node(void *addr, int exp_node)
153 {
154 int node;
155
156 node = addr_on_node(addr);
157 if (node == exp_node) {
158 tst_resm(TPASS, "pid(%d) addr %p is on expected node: %d",
159 getpid(), addr, exp_node);
160 return 0;
161 } else {
162 tst_resm(TFAIL, "pid(%d) addr %p not on expected node: %d "
163 ", expected %d", getpid(), addr, node, exp_node);
164 print_mem_stats(0, exp_node);
165 return 1;
166 }
167 }
168
test_migrate_current_process(int node1,int node2,int cap_sys_nice)169 static void test_migrate_current_process(int node1, int node2, int cap_sys_nice)
170 {
171 char *testp, *testp2;
172 int ret, status;
173 pid_t child;
174
175 /* parent can migrate its non-shared memory */
176 tst_resm(TINFO, "current_process, cap_sys_nice: %d", cap_sys_nice);
177 testp = SAFE_MALLOC(NULL, getpagesize());
178 testp[0] = 0;
179 tst_resm(TINFO, "private anonymous: %p", testp);
180 migrate_to_node(0, node2);
181 check_addr_on_node(testp, node2);
182 migrate_to_node(0, node1);
183 check_addr_on_node(testp, node1);
184 free(testp);
185
186 /* parent can migrate shared memory with CAP_SYS_NICE */
187 testp2 = mmap(NULL, getpagesize(), PROT_READ | PROT_WRITE,
188 MAP_ANONYMOUS | MAP_SHARED, 0, 0);
189 if (testp2 == MAP_FAILED)
190 tst_brkm(TBROK | TERRNO, cleanup, "mmap failed");
191 testp2[0] = 1;
192 tst_resm(TINFO, "shared anonymous: %p", testp2);
193 migrate_to_node(0, node2);
194 check_addr_on_node(testp2, node2);
195
196 /* shared mem is on node2, try to migrate in child to node1 */
197 fflush(stdout);
198 child = fork();
199 switch (child) {
200 case -1:
201 tst_brkm(TBROK | TERRNO, cleanup, "fork");
202 break;
203 case 0:
204 tst_resm(TINFO, "child shared anonymous, cap_sys_nice: %d",
205 cap_sys_nice);
206 testp = SAFE_MALLOC(NULL, getpagesize());
207 testp[0] = 1;
208 testp2[0] = 1;
209 if (!cap_sys_nice)
210 if (seteuid(ltpuser->pw_uid) == -1)
211 tst_brkm(TBROK | TERRNO, NULL,
212 "seteuid failed");
213
214 migrate_to_node(0, node1);
215 /* child can migrate non-shared memory */
216 ret = check_addr_on_node(testp, node1);
217
218 free(testp);
219 munmap(testp2, getpagesize());
220 exit(ret);
221 default:
222 if (waitpid(child, &status, 0) == -1)
223 tst_brkm(TBROK | TERRNO, cleanup, "waitpid");
224 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
225 tst_resm(TFAIL, "child returns %d", status);
226 if (cap_sys_nice)
227 /* child can migrate shared memory only
228 * with CAP_SYS_NICE */
229 check_addr_on_node(testp2, node1);
230 else
231 check_addr_on_node(testp2, node2);
232 munmap(testp2, getpagesize());
233 }
234 }
235
test_migrate_other_process(int node1,int node2,int cap_sys_nice)236 static void test_migrate_other_process(int node1, int node2, int cap_sys_nice)
237 {
238 char *testp;
239 int status, ret, tmp;
240 pid_t child;
241 int child_ready[2];
242 int pages_migrated[2];
243
244 /* setup pipes to synchronize child/parent */
245 if (pipe(child_ready) == -1)
246 tst_resm(TBROK | TERRNO, "pipe #1 failed");
247 if (pipe(pages_migrated) == -1)
248 tst_resm(TBROK | TERRNO, "pipe #2 failed");
249
250 tst_resm(TINFO, "other_process, cap_sys_nice: %d", cap_sys_nice);
251
252 fflush(stdout);
253 child = fork();
254 switch (child) {
255 case -1:
256 tst_brkm(TBROK | TERRNO, cleanup, "fork");
257 break;
258 case 0:
259 close(child_ready[0]);
260 close(pages_migrated[1]);
261
262 testp = SAFE_MALLOC(NULL, getpagesize());
263 testp[0] = 0;
264
265 /* make sure we are on node1 */
266 migrate_to_node(0, node1);
267 check_addr_on_node(testp, node1);
268
269 if (seteuid(ltpuser->pw_uid) == -1)
270 tst_brkm(TBROK | TERRNO, NULL, "seteuid failed");
271
272 /* signal parent it's OK to migrate child and wait */
273 if (write(child_ready[1], &tmp, 1) != 1)
274 tst_brkm(TBROK | TERRNO, NULL, "write #1 failed");
275 if (read(pages_migrated[0], &tmp, 1) != 1)
276 tst_brkm(TBROK | TERRNO, NULL, "read #1 failed");
277
278 /* parent can migrate child process with same euid */
279 /* parent can migrate child process with CAP_SYS_NICE */
280 ret = check_addr_on_node(testp, node2);
281
282 free(testp);
283 close(child_ready[1]);
284 close(pages_migrated[0]);
285 exit(ret);
286 default:
287 close(child_ready[1]);
288 close(pages_migrated[0]);
289
290 if (!cap_sys_nice)
291 if (seteuid(ltpuser->pw_uid) == -1)
292 tst_brkm(TBROK | TERRNO, NULL,
293 "seteuid failed");
294
295 /* wait until child is ready on node1, then migrate and
296 * signal to check current node */
297 if (read(child_ready[0], &tmp, 1) != 1)
298 tst_brkm(TBROK | TERRNO, NULL, "read #2 failed");
299 migrate_to_node(child, node2);
300 if (write(pages_migrated[1], &tmp, 1) != 1)
301 tst_brkm(TBROK | TERRNO, NULL, "write #2 failed");
302
303 if (waitpid(child, &status, 0) == -1)
304 tst_brkm(TBROK | TERRNO, cleanup, "waitpid");
305 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
306 tst_resm(TFAIL, "child returns %d", status);
307 close(child_ready[0]);
308 close(pages_migrated[1]);
309
310 /* reset euid, so this testcase can be used in loop */
311 if (!cap_sys_nice)
312 if (seteuid(0) == -1)
313 tst_brkm(TBROK | TERRNO, NULL,
314 "seteuid failed");
315 }
316 }
317
main(int argc,char * argv[])318 int main(int argc, char *argv[])
319 {
320 int lc;
321
322 tst_parse_opts(argc, argv, options, NULL);
323
324 setup();
325 for (lc = 0; TEST_LOOPING(lc); lc++) {
326 tst_count = 0;
327 test_migrate_current_process(nodeA, nodeB, 1);
328 test_migrate_current_process(nodeA, nodeB, 0);
329 test_migrate_other_process(nodeA, nodeB, 1);
330 test_migrate_other_process(nodeA, nodeB, 0);
331 }
332 cleanup();
333 tst_exit();
334 }
335
setup(void)336 static void setup(void)
337 {
338 int ret, i, j;
339 int pagesize = getpagesize();
340 void *p;
341
342 tst_require_root();
343 TEST(ltp_syscall(__NR_migrate_pages, 0, 0, NULL, NULL));
344
345 if (numa_available() == -1)
346 tst_brkm(TCONF, NULL, "NUMA not available");
347
348 ret = get_allowed_nodes_arr(NH_MEMS, &num_nodes, &nodes);
349 if (ret < 0)
350 tst_brkm(TBROK | TERRNO, NULL, "get_allowed_nodes(): %d", ret);
351
352 if (num_nodes < 2)
353 tst_brkm(TCONF, NULL, "at least 2 allowed NUMA nodes"
354 " are required");
355 else if (tst_kvercmp(2, 6, 18) < 0)
356 tst_brkm(TCONF, NULL, "2.6.18 or greater kernel required");
357
358 /*
359 * find 2 nodes, which can hold NODE_MIN_FREEMEM bytes
360 * The reason is that:
361 * 1. migrate_pages() is expected to succeed
362 * 2. this test avoids hitting:
363 * Bug 870326 - migrate_pages() reports success, but pages are
364 * not moved to desired node
365 * https://bugzilla.redhat.com/show_bug.cgi?id=870326
366 */
367 nodeA = nodeB = -1;
368 for (i = 0; i < num_nodes; i++) {
369 p = numa_alloc_onnode(NODE_MIN_FREEMEM, nodes[i]);
370 if (p == NULL)
371 break;
372 memset(p, 0xff, NODE_MIN_FREEMEM);
373
374 j = 0;
375 while (j < NODE_MIN_FREEMEM) {
376 if (addr_on_node(p + j) != nodes[i])
377 break;
378 j += pagesize;
379 }
380 numa_free(p, NODE_MIN_FREEMEM);
381
382 if (j >= NODE_MIN_FREEMEM) {
383 if (nodeA == -1)
384 nodeA = nodes[i];
385 else if (nodeB == -1)
386 nodeB = nodes[i];
387 else
388 break;
389 }
390 }
391
392 if (nodeA == -1 || nodeB == -1)
393 tst_brkm(TCONF, NULL, "at least 2 NUMA nodes with "
394 "free mem > %d are needed", NODE_MIN_FREEMEM);
395 tst_resm(TINFO, "Using nodes: %d %d", nodeA, nodeB);
396
397 ltpuser = getpwnam(nobody_uid);
398 if (ltpuser == NULL)
399 tst_brkm(TBROK | TERRNO, NULL, "getpwnam failed");
400
401 TEST_PAUSE;
402 }
403
cleanup(void)404 static void cleanup(void)
405 {
406 free(nodes);
407 }
408
409 #else /* __NR_migrate_pages */
main(void)410 int main(void)
411 {
412 tst_brkm(TCONF, NULL, "System doesn't support __NR_migrate_pages"
413 " or libnuma is not available");
414 }
415 #endif
416