1 #define TST_NO_DEFAULT_MAIN
2
3 #include "config.h"
4 #include <sys/types.h>
5 #include <sys/mman.h>
6 #include <sys/mount.h>
7 #include <sys/stat.h>
8 #include <sys/wait.h>
9 #include <sys/param.h>
10 #include <errno.h>
11 #include <fcntl.h>
12 #if HAVE_NUMA_H
13 #include <numa.h>
14 #endif
15 #if HAVE_NUMAIF_H
16 #include <numaif.h>
17 #endif
18 #include <pthread.h>
19 #include <stdarg.h>
20 #include <stdio.h>
21 #include <string.h>
22 #include <stdlib.h>
23 #include <unistd.h>
24
25 #include "mem.h"
26 #include "numa_helper.h"
27
28 /* OOM */
29
alloc_mem(long int length,int testcase)30 static int alloc_mem(long int length, int testcase)
31 {
32 char *s;
33 long i, pagesz = getpagesize();
34 int loop = 10;
35
36 tst_res(TINFO, "thread (%lx), allocating %ld bytes.",
37 (unsigned long) pthread_self(), length);
38
39 s = mmap(NULL, length, PROT_READ | PROT_WRITE,
40 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
41 if (s == MAP_FAILED)
42 return errno;
43
44 if (testcase == MLOCK) {
45 while (mlock(s, length) == -1 && loop > 0) {
46 if (EAGAIN != errno)
47 return errno;
48 usleep(300000);
49 loop--;
50 }
51 }
52
53 #ifdef HAVE_MADV_MERGEABLE
54 if (testcase == KSM && madvise(s, length, MADV_MERGEABLE) == -1)
55 return errno;
56 #endif
57 for (i = 0; i < length; i += pagesz)
58 s[i] = '\a';
59
60 return 0;
61 }
62
child_alloc_thread(void * args)63 static void *child_alloc_thread(void *args)
64 {
65 int ret = 0;
66
67 /* keep allocating until there's an error */
68 while (!ret)
69 ret = alloc_mem(LENGTH, (long)args);
70 exit(ret);
71 }
72
child_alloc(int testcase,int lite,int threads)73 static void child_alloc(int testcase, int lite, int threads)
74 {
75 int i;
76 pthread_t *th;
77
78 if (lite) {
79 int ret = alloc_mem(TESTMEM + MB, testcase);
80 exit(ret);
81 }
82
83 th = malloc(sizeof(pthread_t) * threads);
84 if (!th) {
85 tst_res(TINFO | TERRNO, "malloc");
86 goto out;
87 }
88
89 for (i = 0; i < threads; i++) {
90 TEST(pthread_create(&th[i], NULL, child_alloc_thread,
91 (void *)((long)testcase)));
92 if (TEST_RETURN) {
93 tst_res(TINFO | TRERRNO, "pthread_create");
94 /*
95 * Keep going if thread other than first fails to
96 * spawn due to lack of resources.
97 */
98 if (i == 0 || TEST_RETURN != EAGAIN)
99 goto out;
100 }
101 }
102
103 /* wait for one of threads to exit whole process */
104 while (1)
105 sleep(1);
106 out:
107 exit(1);
108 }
109
110 /*
111 * oom - allocates memory according to specified testcase and checks
112 * desired outcome (e.g. child killed, operation failed with ENOMEM)
113 * @testcase: selects how child allocates memory
114 * valid choices are: NORMAL, MLOCK and KSM
115 * @lite: if non-zero, child makes only single TESTMEM+MB allocation
116 * if zero, child keeps allocating memory until it gets killed
117 * or some operation fails
118 * @retcode: expected return code of child process
119 * if matches child ret code, this function reports PASS,
120 * otherwise it reports FAIL
121 * @allow_sigkill: if zero and child is killed, this function reports FAIL
122 * if non-zero, then if child is killed by SIGKILL
123 * it is considered as PASS
124 */
oom(int testcase,int lite,int retcode,int allow_sigkill)125 void oom(int testcase, int lite, int retcode, int allow_sigkill)
126 {
127 pid_t pid;
128 int status, threads;
129
130 switch (pid = SAFE_FORK()) {
131 case 0:
132 threads = MAX(1, tst_ncpus() - 1);
133 child_alloc(testcase, lite, threads);
134 default:
135 break;
136 }
137
138 tst_res(TINFO, "expected victim is %d.", pid);
139 SAFE_WAITPID(-1, &status, 0);
140
141 if (WIFSIGNALED(status)) {
142 if (allow_sigkill && WTERMSIG(status) == SIGKILL) {
143 tst_res(TPASS, "victim signalled: (%d) %s",
144 SIGKILL,
145 tst_strsig(SIGKILL));
146 } else {
147 tst_res(TFAIL, "victim signalled: (%d) %s",
148 WTERMSIG(status),
149 tst_strsig(WTERMSIG(status)));
150 }
151 } else if (WIFEXITED(status)) {
152 if (WEXITSTATUS(status) == retcode) {
153 tst_res(TPASS, "victim retcode: (%d) %s",
154 retcode, strerror(retcode));
155 } else {
156 tst_res(TFAIL, "victim unexpectedly ended with "
157 "retcode: %d, expected: %d",
158 WEXITSTATUS(status), retcode);
159 }
160 } else {
161 tst_res(TFAIL, "victim unexpectedly ended");
162 }
163 }
164
165 #ifdef HAVE_NUMA_V2
set_global_mempolicy(int mempolicy)166 static void set_global_mempolicy(int mempolicy)
167 {
168 unsigned long nmask[MAXNODES / BITS_PER_LONG] = { 0 };
169 int num_nodes, *nodes;
170 int ret;
171
172 if (mempolicy) {
173 ret = get_allowed_nodes_arr(NH_MEMS|NH_CPUS, &num_nodes, &nodes);
174 if (ret != 0)
175 tst_brk(TBROK|TERRNO, "get_allowed_nodes_arr");
176 if (num_nodes < 2) {
177 tst_res(TINFO, "mempolicy need NUMA system support");
178 free(nodes);
179 return;
180 }
181 switch(mempolicy) {
182 case MPOL_BIND:
183 /* bind the second node */
184 set_node(nmask, nodes[1]);
185 break;
186 case MPOL_INTERLEAVE:
187 case MPOL_PREFERRED:
188 if (num_nodes == 2) {
189 tst_res(TINFO, "The mempolicy need "
190 "more than 2 numa nodes");
191 free(nodes);
192 return;
193 } else {
194 /* Using the 2nd,3rd node */
195 set_node(nmask, nodes[1]);
196 set_node(nmask, nodes[2]);
197 }
198 break;
199 default:
200 tst_brk(TBROK|TERRNO, "Bad mempolicy mode");
201 }
202 if (set_mempolicy(mempolicy, nmask, MAXNODES) == -1)
203 tst_brk(TBROK|TERRNO, "set_mempolicy");
204 }
205 }
206 #else
set_global_mempolicy(int mempolicy LTP_ATTRIBUTE_UNUSED)207 static void set_global_mempolicy(int mempolicy LTP_ATTRIBUTE_UNUSED) { }
208 #endif
209
testoom(int mempolicy,int lite,int retcode,int allow_sigkill)210 void testoom(int mempolicy, int lite, int retcode, int allow_sigkill)
211 {
212 int ksm_run_orig;
213
214 set_global_mempolicy(mempolicy);
215
216 tst_res(TINFO, "start normal OOM testing.");
217 oom(NORMAL, lite, retcode, allow_sigkill);
218
219 tst_res(TINFO, "start OOM testing for mlocked pages.");
220 oom(MLOCK, lite, retcode, allow_sigkill);
221
222 /*
223 * Skip oom(KSM) if lite == 1, since limit_in_bytes may vary from
224 * run to run, which isn't reliable for oom03 cgroup test.
225 */
226 if (access(PATH_KSM, F_OK) == -1 || lite == 1) {
227 tst_res(TINFO, "KSM is not configed or lite == 1, "
228 "skip OOM test for KSM pags");
229 } else {
230 tst_res(TINFO, "start OOM testing for KSM pages.");
231 SAFE_FILE_SCANF(PATH_KSM "run", "%d", &ksm_run_orig);
232 SAFE_FILE_PRINTF(PATH_KSM "run", "1");
233 oom(KSM, lite, retcode, allow_sigkill);
234 SAFE_FILE_PRINTF(PATH_KSM "run", "%d", ksm_run_orig);
235 }
236 }
237
238 /* KSM */
239
240 static int max_page_sharing;
241
save_max_page_sharing(void)242 void save_max_page_sharing(void)
243 {
244 if (access(PATH_KSM "max_page_sharing", F_OK) == 0)
245 SAFE_FILE_SCANF(PATH_KSM "max_page_sharing",
246 "%d", &max_page_sharing);
247 }
248
restore_max_page_sharing(void)249 void restore_max_page_sharing(void)
250 {
251 if (access(PATH_KSM "max_page_sharing", F_OK) == 0)
252 FILE_PRINTF(PATH_KSM "max_page_sharing",
253 "%d", max_page_sharing);
254 }
255
check(char * path,long int value)256 static void check(char *path, long int value)
257 {
258 char fullpath[BUFSIZ];
259 long actual_val;
260
261 snprintf(fullpath, BUFSIZ, PATH_KSM "%s", path);
262 SAFE_FILE_SCANF(fullpath, "%ld", &actual_val);
263
264 if (actual_val != value)
265 tst_res(TFAIL, "%s is not %ld but %ld.", path, value,
266 actual_val);
267 else
268 tst_res(TPASS, "%s is %ld.", path, actual_val);
269 }
270
wait_ksmd_full_scan(void)271 static void wait_ksmd_full_scan(void)
272 {
273 unsigned long full_scans, at_least_one_full_scan;
274 int count = 0;
275
276 SAFE_FILE_SCANF(PATH_KSM "full_scans", "%lu", &full_scans);
277 /*
278 * The current scan is already in progress so we can't guarantee that
279 * the get_user_pages() is called on every existing rmap_item if we
280 * only waited for the remaining part of the scan.
281 *
282 * The actual merging happens after the unstable tree has been built so
283 * we need to wait at least two full scans to guarantee merging, hence
284 * wait full_scans to increment by 3 so that at least two full scans
285 * will run.
286 */
287 at_least_one_full_scan = full_scans + 3;
288 while (full_scans < at_least_one_full_scan) {
289 sleep(1);
290 count++;
291 SAFE_FILE_SCANF(PATH_KSM "full_scans", "%lu", &full_scans);
292 }
293
294 tst_res(TINFO, "ksm daemon takes %ds to run two full scans",
295 count);
296 }
297
final_group_check(int run,int pages_shared,int pages_sharing,int pages_volatile,int pages_unshared,int sleep_millisecs,int pages_to_scan)298 static void final_group_check(int run, int pages_shared, int pages_sharing,
299 int pages_volatile, int pages_unshared,
300 int sleep_millisecs, int pages_to_scan)
301 {
302 tst_res(TINFO, "check!");
303 check("run", run);
304 check("pages_shared", pages_shared);
305 check("pages_sharing", pages_sharing);
306 check("pages_volatile", pages_volatile);
307 check("pages_unshared", pages_unshared);
308 check("sleep_millisecs", sleep_millisecs);
309 check("pages_to_scan", pages_to_scan);
310 }
311
group_check(int run,int pages_shared,int pages_sharing,int pages_volatile,int pages_unshared,int sleep_millisecs,int pages_to_scan)312 static void group_check(int run, int pages_shared, int pages_sharing,
313 int pages_volatile, int pages_unshared,
314 int sleep_millisecs, int pages_to_scan)
315 {
316 if (run != 1) {
317 tst_res(TFAIL, "group_check run is not 1, %d.", run);
318 } else {
319 /* wait for ksm daemon to scan all mergeable pages. */
320 wait_ksmd_full_scan();
321 }
322
323 final_group_check(run, pages_shared, pages_sharing,
324 pages_volatile, pages_unshared,
325 sleep_millisecs, pages_to_scan);
326 }
327
verify(char ** memory,char value,int proc,int start,int end,int start2,int end2)328 static void verify(char **memory, char value, int proc,
329 int start, int end, int start2, int end2)
330 {
331 int i, j;
332 void *s = NULL;
333
334 s = SAFE_MALLOC((end - start) * (end2 - start2));
335
336 tst_res(TINFO, "child %d verifies memory content.", proc);
337 memset(s, value, (end - start) * (end2 - start2));
338 if (memcmp(memory[start], s, (end - start) * (end2 - start2))
339 != 0)
340 for (j = start; j < end; j++)
341 for (i = start2; i < end2; i++)
342 if (memory[j][i] != value)
343 tst_res(TFAIL, "child %d has %c at "
344 "%d,%d,%d.",
345 proc, memory[j][i], proc,
346 j, i);
347 free(s);
348 }
349
check_hugepage(void)350 void check_hugepage(void)
351 {
352 if (access(PATH_HUGEPAGES, F_OK))
353 tst_brk(TCONF, "Huge page is not supported.");
354 }
355
write_memcg(void)356 void write_memcg(void)
357 {
358 SAFE_FILE_PRINTF(MEMCG_LIMIT, "%ld", TESTMEM);
359
360 SAFE_FILE_PRINTF(MEMCG_PATH_NEW "/tasks", "%d", getpid());
361 }
362
363 struct ksm_merge_data {
364 char data;
365 unsigned int mergeable_size;
366 };
367
ksm_child_memset(int child_num,int size,int total_unit,struct ksm_merge_data ksm_merge_data,char ** memory)368 static void ksm_child_memset(int child_num, int size, int total_unit,
369 struct ksm_merge_data ksm_merge_data, char **memory)
370 {
371 int i = 0, j;
372 int unit = size / total_unit;
373
374 tst_res(TINFO, "child %d continues...", child_num);
375
376 if (ksm_merge_data.mergeable_size == size * MB) {
377 tst_res(TINFO, "child %d allocates %d MB filled with '%c'",
378 child_num, size, ksm_merge_data.data);
379
380 } else {
381 tst_res(TINFO, "child %d allocates %d MB filled with '%c'"
382 " except one page with 'e'",
383 child_num, size, ksm_merge_data.data);
384 }
385
386 for (j = 0; j < total_unit; j++) {
387 for (i = 0; (unsigned int)i < unit * MB; i++)
388 memory[j][i] = ksm_merge_data.data;
389 }
390
391 /* if it contains unshared page, then set 'e' char
392 * at the end of the last page
393 */
394 if (ksm_merge_data.mergeable_size < size * MB)
395 memory[j-1][i-1] = 'e';
396 }
397
create_ksm_child(int child_num,int size,int unit,struct ksm_merge_data * ksm_merge_data)398 static void create_ksm_child(int child_num, int size, int unit,
399 struct ksm_merge_data *ksm_merge_data)
400 {
401 int j, total_unit;
402 char **memory;
403
404 /* The total units in all */
405 total_unit = size / unit;
406
407 /* Apply for the space for memory */
408 memory = SAFE_MALLOC(total_unit * sizeof(char *));
409 for (j = 0; j < total_unit; j++) {
410 memory[j] = SAFE_MMAP(NULL, unit * MB, PROT_READ|PROT_WRITE,
411 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
412 #ifdef HAVE_MADV_MERGEABLE
413 if (madvise(memory[j], unit * MB, MADV_MERGEABLE) == -1)
414 tst_brk(TBROK|TERRNO, "madvise");
415 #endif
416 }
417
418 tst_res(TINFO, "child %d stops.", child_num);
419 if (raise(SIGSTOP) == -1)
420 tst_brk(TBROK|TERRNO, "kill");
421 fflush(stdout);
422
423 for (j = 0; j < 4; j++) {
424
425 ksm_child_memset(child_num, size, total_unit,
426 ksm_merge_data[j], memory);
427
428 fflush(stdout);
429
430 tst_res(TINFO, "child %d stops.", child_num);
431 if (raise(SIGSTOP) == -1)
432 tst_brk(TBROK|TERRNO, "kill");
433
434 if (ksm_merge_data[j].mergeable_size < size * MB) {
435 verify(memory, 'e', child_num, total_unit - 1,
436 total_unit, unit * MB - 1, unit * MB);
437 verify(memory, ksm_merge_data[j].data, child_num,
438 0, total_unit, 0, unit * MB - 1);
439 } else {
440 verify(memory, ksm_merge_data[j].data, child_num,
441 0, total_unit, 0, unit * MB);
442 }
443 }
444
445 tst_res(TINFO, "child %d finished.", child_num);
446 }
447
stop_ksm_children(int * child,int num)448 static void stop_ksm_children(int *child, int num)
449 {
450 int k, status;
451
452 tst_res(TINFO, "wait for all children to stop.");
453 for (k = 0; k < num; k++) {
454 SAFE_WAITPID(child[k], &status, WUNTRACED);
455 if (!WIFSTOPPED(status))
456 tst_brk(TBROK, "child %d was not stopped", k);
457 }
458 }
459
resume_ksm_children(int * child,int num)460 static void resume_ksm_children(int *child, int num)
461 {
462 int k;
463
464 tst_res(TINFO, "resume all children.");
465 for (k = 0; k < num; k++)
466 SAFE_KILL(child[k], SIGCONT);
467
468 fflush(stdout);
469 }
470
create_same_memory(int size,int num,int unit)471 void create_same_memory(int size, int num, int unit)
472 {
473 int i, j, status, *child;
474 unsigned long ps, pages;
475 struct ksm_merge_data **ksm_data;
476
477 struct ksm_merge_data ksm_data0[] = {
478 {'c', size*MB}, {'c', size*MB}, {'d', size*MB}, {'d', size*MB},
479 };
480 struct ksm_merge_data ksm_data1[] = {
481 {'a', size*MB}, {'b', size*MB}, {'d', size*MB}, {'d', size*MB-1},
482 };
483 struct ksm_merge_data ksm_data2[] = {
484 {'a', size*MB}, {'a', size*MB}, {'d', size*MB}, {'d', size*MB},
485 };
486
487 ps = sysconf(_SC_PAGE_SIZE);
488 pages = MB / ps;
489
490 ksm_data = malloc((num - 3) * sizeof(struct ksm_merge_data *));
491 /* Since from third child, the data is same with the first child's */
492 for (i = 0; i < num - 3; i++) {
493 ksm_data[i] = malloc(4 * sizeof(struct ksm_merge_data));
494 for (j = 0; j < 4; j++) {
495 ksm_data[i][j].data = ksm_data0[j].data;
496 ksm_data[i][j].mergeable_size =
497 ksm_data0[j].mergeable_size;
498 }
499 }
500
501 child = SAFE_MALLOC(num * sizeof(int));
502
503 for (i = 0; i < num; i++) {
504 fflush(stdout);
505 switch (child[i] = SAFE_FORK()) {
506 case 0:
507 if (i == 0) {
508 create_ksm_child(i, size, unit, ksm_data0);
509 exit(0);
510 } else if (i == 1) {
511 create_ksm_child(i, size, unit, ksm_data1);
512 exit(0);
513 } else if (i == 2) {
514 create_ksm_child(i, size, unit, ksm_data2);
515 exit(0);
516 } else {
517 create_ksm_child(i, size, unit, ksm_data[i-3]);
518 exit(0);
519 }
520 }
521 }
522
523 stop_ksm_children(child, num);
524
525 tst_res(TINFO, "KSM merging...");
526 if (access(PATH_KSM "max_page_sharing", F_OK) == 0)
527 SAFE_FILE_PRINTF(PATH_KSM "max_page_sharing", "%ld", size * pages * num);
528 SAFE_FILE_PRINTF(PATH_KSM "run", "1");
529 SAFE_FILE_PRINTF(PATH_KSM "pages_to_scan", "%ld", size * pages * num);
530 SAFE_FILE_PRINTF(PATH_KSM "sleep_millisecs", "0");
531
532 resume_ksm_children(child, num);
533 stop_ksm_children(child, num);
534 group_check(1, 2, size * num * pages - 2, 0, 0, 0, size * pages * num);
535
536 resume_ksm_children(child, num);
537 stop_ksm_children(child, num);
538 group_check(1, 3, size * num * pages - 3, 0, 0, 0, size * pages * num);
539
540 resume_ksm_children(child, num);
541 stop_ksm_children(child, num);
542 group_check(1, 1, size * num * pages - 1, 0, 0, 0, size * pages * num);
543
544 resume_ksm_children(child, num);
545 stop_ksm_children(child, num);
546 group_check(1, 1, size * num * pages - 2, 0, 1, 0, size * pages * num);
547
548 tst_res(TINFO, "KSM unmerging...");
549 SAFE_FILE_PRINTF(PATH_KSM "run", "2");
550
551 resume_ksm_children(child, num);
552 final_group_check(2, 0, 0, 0, 0, 0, size * pages * num);
553
554 tst_res(TINFO, "stop KSM.");
555 SAFE_FILE_PRINTF(PATH_KSM "run", "0");
556 final_group_check(0, 0, 0, 0, 0, 0, size * pages * num);
557
558 while (waitpid(-1, &status, 0) > 0)
559 if (WEXITSTATUS(status) != 0)
560 tst_res(TFAIL, "child exit status is %d",
561 WEXITSTATUS(status));
562 }
563
test_ksm_merge_across_nodes(unsigned long nr_pages)564 void test_ksm_merge_across_nodes(unsigned long nr_pages)
565 {
566 char **memory;
567 int i, ret;
568 int num_nodes, *nodes;
569 unsigned long length;
570 unsigned long pagesize;
571
572 #ifdef HAVE_NUMA_V2
573 unsigned long nmask[MAXNODES / BITS_PER_LONG] = { 0 };
574 #endif
575
576 ret = get_allowed_nodes_arr(NH_MEMS|NH_CPUS, &num_nodes, &nodes);
577 if (ret != 0)
578 tst_brk(TBROK|TERRNO, "get_allowed_nodes_arr");
579 if (num_nodes < 2) {
580 tst_res(TINFO, "need NUMA system support");
581 free(nodes);
582 return;
583 }
584
585 pagesize = sysconf(_SC_PAGE_SIZE);
586 length = nr_pages * pagesize;
587
588 memory = SAFE_MALLOC(num_nodes * sizeof(char *));
589 for (i = 0; i < num_nodes; i++) {
590 memory[i] = SAFE_MMAP(NULL, length, PROT_READ|PROT_WRITE,
591 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
592 #ifdef HAVE_MADV_MERGEABLE
593 if (madvise(memory[i], length, MADV_MERGEABLE) == -1)
594 tst_brk(TBROK|TERRNO, "madvise");
595 #endif
596
597 #ifdef HAVE_NUMA_V2
598 clean_node(nmask);
599 set_node(nmask, nodes[i]);
600 /*
601 * Use mbind() to make sure each node contains
602 * length size memory.
603 */
604 ret = mbind(memory[i], length, MPOL_BIND, nmask, MAXNODES, 0);
605 if (ret == -1)
606 tst_brk(TBROK|TERRNO, "mbind");
607 #endif
608
609 memset(memory[i], 10, length);
610 }
611
612 SAFE_FILE_PRINTF(PATH_KSM "sleep_millisecs", "0");
613 SAFE_FILE_PRINTF(PATH_KSM "pages_to_scan", "%ld",
614 nr_pages * num_nodes);
615 if (access(PATH_KSM "max_page_sharing", F_OK) == 0)
616 SAFE_FILE_PRINTF(PATH_KSM "max_page_sharing",
617 "%ld", nr_pages * num_nodes);
618 /*
619 * merge_across_nodes setting can be changed only when there
620 * are no ksm shared pages in system, so set run 2 to unmerge
621 * pages first, then to 1 after changing merge_across_nodes,
622 * to remerge according to the new setting.
623 */
624 SAFE_FILE_PRINTF(PATH_KSM "run", "2");
625 tst_res(TINFO, "Start to test KSM with merge_across_nodes=1");
626 SAFE_FILE_PRINTF(PATH_KSM "merge_across_nodes", "1");
627 SAFE_FILE_PRINTF(PATH_KSM "run", "1");
628 group_check(1, 1, nr_pages * num_nodes - 1, 0, 0, 0,
629 nr_pages * num_nodes);
630
631 SAFE_FILE_PRINTF(PATH_KSM "run", "2");
632 tst_res(TINFO, "Start to test KSM with merge_across_nodes=0");
633 SAFE_FILE_PRINTF(PATH_KSM "merge_across_nodes", "0");
634 SAFE_FILE_PRINTF(PATH_KSM "run", "1");
635 group_check(1, num_nodes, nr_pages * num_nodes - num_nodes,
636 0, 0, 0, nr_pages * num_nodes);
637
638 SAFE_FILE_PRINTF(PATH_KSM "run", "2");
639 }
640
641 /* THP */
642
643 /* cpuset/memcg */
644
gather_node_cpus(char * cpus,long nd)645 static void gather_node_cpus(char *cpus, long nd)
646 {
647 int ncpus = 0;
648 int i;
649 long online;
650 char buf[BUFSIZ];
651 char path[BUFSIZ], path1[BUFSIZ];
652
653 while (path_exist(PATH_SYS_SYSTEM "/cpu/cpu%d", ncpus))
654 ncpus++;
655
656 for (i = 0; i < ncpus; i++) {
657 snprintf(path, BUFSIZ,
658 PATH_SYS_SYSTEM "/node/node%ld/cpu%d", nd, i);
659 if (path_exist(path)) {
660 snprintf(path1, BUFSIZ, "%s/online", path);
661 /*
662 * if there is no online knob, then the cpu cannot
663 * be taken offline
664 */
665 if (path_exist(path1)) {
666 SAFE_FILE_SCANF(path1, "%ld", &online);
667 if (online == 0)
668 continue;
669 }
670 sprintf(buf, "%d,", i);
671 strcat(cpus, buf);
672 }
673 }
674 /* Remove the trailing comma. */
675 cpus[strlen(cpus) - 1] = '\0';
676 }
677
read_cpuset_files(char * prefix,char * filename,char * retbuf)678 void read_cpuset_files(char *prefix, char *filename, char *retbuf)
679 {
680 int fd;
681 char path[BUFSIZ];
682
683 /*
684 * try either '/dev/cpuset/XXXX' or '/dev/cpuset/cpuset.XXXX'
685 * please see Documentation/cgroups/cpusets.txt from kernel src
686 * for details
687 */
688 snprintf(path, BUFSIZ, "%s/%s", prefix, filename);
689 fd = open(path, O_RDONLY);
690 if (fd == -1) {
691 if (errno == ENOENT) {
692 snprintf(path, BUFSIZ, "%s/cpuset.%s",
693 prefix, filename);
694 fd = SAFE_OPEN(path, O_RDONLY);
695 } else
696 tst_brk(TBROK | TERRNO, "open %s", path);
697 }
698 if (read(fd, retbuf, BUFSIZ) < 0)
699 tst_brk(TBROK | TERRNO, "read %s", path);
700 close(fd);
701 }
702
write_cpuset_files(char * prefix,char * filename,char * buf)703 void write_cpuset_files(char *prefix, char *filename, char *buf)
704 {
705 int fd;
706 char path[BUFSIZ];
707
708 /*
709 * try either '/dev/cpuset/XXXX' or '/dev/cpuset/cpuset.XXXX'
710 * please see Documentation/cgroups/cpusets.txt from kernel src
711 * for details
712 */
713 snprintf(path, BUFSIZ, "%s/%s", prefix, filename);
714 fd = open(path, O_WRONLY);
715 if (fd == -1) {
716 if (errno == ENOENT) {
717 snprintf(path, BUFSIZ, "%s/cpuset.%s",
718 prefix, filename);
719 fd = SAFE_OPEN(path, O_WRONLY);
720 } else
721 tst_brk(TBROK | TERRNO, "open %s", path);
722 }
723 SAFE_WRITE(1, fd, buf, strlen(buf));
724 close(fd);
725 }
726
write_cpusets(long nd)727 void write_cpusets(long nd)
728 {
729 char buf[BUFSIZ];
730 char cpus[BUFSIZ] = "";
731
732 snprintf(buf, BUFSIZ, "%ld", nd);
733 write_cpuset_files(CPATH_NEW, "mems", buf);
734
735 gather_node_cpus(cpus, nd);
736 /*
737 * If the 'nd' node doesn't contain any CPUs,
738 * the first ID of CPU '0' will be used as
739 * the value of cpuset.cpus.
740 */
741 if (strlen(cpus) != 0) {
742 write_cpuset_files(CPATH_NEW, "cpus", cpus);
743 } else {
744 tst_res(TINFO, "No CPUs in the node%ld; "
745 "using only CPU0", nd);
746 write_cpuset_files(CPATH_NEW, "cpus", "0");
747 }
748
749 SAFE_FILE_PRINTF(CPATH_NEW "/tasks", "%d", getpid());
750 }
751
umount_mem(char * path,char * path_new)752 void umount_mem(char *path, char *path_new)
753 {
754 FILE *fp;
755 int fd;
756 char s_new[BUFSIZ], s[BUFSIZ], value[BUFSIZ];
757
758 /* Move all processes in task to its parent node. */
759 sprintf(s, "%s/tasks", path);
760 fd = open(s, O_WRONLY);
761 if (fd == -1)
762 tst_res(TWARN | TERRNO, "open %s", s);
763
764 snprintf(s_new, BUFSIZ, "%s/tasks", path_new);
765 fp = fopen(s_new, "r");
766 if (fp == NULL)
767 tst_res(TWARN | TERRNO, "fopen %s", s_new);
768 if ((fd != -1) && (fp != NULL)) {
769 while (fgets(value, BUFSIZ, fp) != NULL)
770 if (write(fd, value, strlen(value) - 1)
771 != (ssize_t)strlen(value) - 1)
772 tst_res(TWARN | TERRNO, "write %s", s);
773 }
774 if (fd != -1)
775 close(fd);
776 if (fp != NULL)
777 fclose(fp);
778 if (rmdir(path_new) == -1)
779 tst_res(TWARN | TERRNO, "rmdir %s", path_new);
780 if (umount(path) == -1)
781 tst_res(TWARN | TERRNO, "umount %s", path);
782 if (rmdir(path) == -1)
783 tst_res(TWARN | TERRNO, "rmdir %s", path);
784 }
785
mount_mem(char * name,char * fs,char * options,char * path,char * path_new)786 void mount_mem(char *name, char *fs, char *options, char *path, char *path_new)
787 {
788 SAFE_MKDIR(path, 0777);
789 if (mount(name, path, fs, 0, options) == -1) {
790 if (errno == ENODEV) {
791 if (rmdir(path) == -1)
792 tst_res(TWARN | TERRNO, "rmdir %s failed",
793 path);
794 tst_brk(TCONF,
795 "file system %s is not configured in kernel",
796 fs);
797 }
798 tst_brk(TBROK | TERRNO, "mount %s", path);
799 }
800 SAFE_MKDIR(path_new, 0777);
801 }
802
803 /* shared */
804
805 /* Warning: *DO NOT* use this function in child */
get_a_numa_node(void)806 unsigned int get_a_numa_node(void)
807 {
808 unsigned int nd1, nd2;
809 int ret;
810
811 ret = get_allowed_nodes(0, 2, &nd1, &nd2);
812 switch (ret) {
813 case 0:
814 break;
815 case -3:
816 tst_brk(TCONF, "requires a NUMA system.");
817 default:
818 tst_brk(TBROK | TERRNO, "1st get_allowed_nodes");
819 }
820
821 ret = get_allowed_nodes(NH_MEMS | NH_CPUS, 1, &nd1);
822 switch (ret) {
823 case 0:
824 tst_res(TINFO, "get node%u.", nd1);
825 return nd1;
826 case -3:
827 tst_brk(TCONF, "requires a NUMA system that has "
828 "at least one node with both memory and CPU "
829 "available.");
830 default:
831 tst_brk(TBROK | TERRNO, "2nd get_allowed_nodes");
832 }
833
834 /* not reached */
835 abort();
836 }
837
path_exist(const char * path,...)838 int path_exist(const char *path, ...)
839 {
840 va_list ap;
841 char pathbuf[PATH_MAX];
842
843 va_start(ap, path);
844 vsnprintf(pathbuf, sizeof(pathbuf), path, ap);
845 va_end(ap);
846
847 return access(pathbuf, F_OK) == 0;
848 }
849
set_sys_tune(char * sys_file,long tune,int check)850 void set_sys_tune(char *sys_file, long tune, int check)
851 {
852 long val;
853 char path[BUFSIZ];
854
855 tst_res(TINFO, "set %s to %ld", sys_file, tune);
856
857 snprintf(path, BUFSIZ, PATH_SYSVM "%s", sys_file);
858 SAFE_FILE_PRINTF(path, "%ld", tune);
859
860 if (check) {
861 val = get_sys_tune(sys_file);
862 if (val != tune)
863 tst_brk(TBROK, "%s = %ld, but expect %ld",
864 sys_file, val, tune);
865 }
866 }
867
get_sys_tune(char * sys_file)868 long get_sys_tune(char *sys_file)
869 {
870 char path[BUFSIZ];
871 long tune;
872
873 snprintf(path, BUFSIZ, PATH_SYSVM "%s", sys_file);
874 SAFE_FILE_SCANF(path, "%ld", &tune);
875
876 return tune;
877 }
878
update_shm_size(size_t * shm_size)879 void update_shm_size(size_t * shm_size)
880 {
881 size_t shmmax;
882
883 SAFE_FILE_SCANF(PATH_SHMMAX, "%zu", &shmmax);
884 if (*shm_size > shmmax) {
885 tst_res(TINFO, "Set shm_size to shmmax: %zu", shmmax);
886 *shm_size = shmmax;
887 }
888 }
889
range_is_mapped(unsigned long low,unsigned long high)890 int range_is_mapped(unsigned long low, unsigned long high)
891 {
892 FILE *fp;
893
894 fp = fopen("/proc/self/maps", "r");
895 if (fp == NULL)
896 tst_brk(TBROK | TERRNO, "Failed to open /proc/self/maps.");
897
898 while (!feof(fp)) {
899 unsigned long start, end;
900 int ret;
901
902 ret = fscanf(fp, "%lx-%lx %*[^\n]\n", &start, &end);
903 if (ret != 2) {
904 fclose(fp);
905 tst_brk(TBROK | TERRNO, "Couldn't parse /proc/self/maps line.");
906 }
907
908 if ((start >= low) && (start < high)) {
909 fclose(fp);
910 return 1;
911 }
912 if ((end >= low) && (end < high)) {
913 fclose(fp);
914 return 1;
915 }
916 }
917
918 fclose(fp);
919 return 0;
920 }
921