1 #include "config.h"
2 #include <sys/types.h>
3 #include <sys/mman.h>
4 #include <sys/mount.h>
5 #include <sys/stat.h>
6 #include <sys/wait.h>
7 #include <errno.h>
8 #include <fcntl.h>
9 #if HAVE_NUMA_H
10 #include <numa.h>
11 #endif
12 #if HAVE_NUMAIF_H
13 #include <numaif.h>
14 #endif
15 #include <pthread.h>
16 #include <stdarg.h>
17 #include <stdio.h>
18 #include <string.h>
19 #include <unistd.h>
20
21 #include "test.h"
22 #include "safe_macros.h"
23 #include "mem.h"
24 #include "numa_helper.h"
25
26 /* OOM */
27
alloc_mem(long int length,int testcase)28 static int alloc_mem(long int length, int testcase)
29 {
30 char *s;
31 long i, pagesz = getpagesize();
32 int loop = 10;
33
34 tst_resm(TINFO, "thread (%lx), allocating %ld bytes.",
35 (unsigned long) pthread_self(), length);
36
37 s = mmap(NULL, length, PROT_READ | PROT_WRITE,
38 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
39 if (s == MAP_FAILED)
40 return errno;
41
42 if (testcase == MLOCK) {
43 while (mlock(s, length) == -1 && loop > 0) {
44 if (EAGAIN != errno)
45 return errno;
46 usleep(300000);
47 loop--;
48 }
49 }
50
51 #ifdef HAVE_MADV_MERGEABLE
52 if (testcase == KSM && madvise(s, length, MADV_MERGEABLE) == -1)
53 return errno;
54 #endif
55 for (i = 0; i < length; i += pagesz)
56 s[i] = '\a';
57
58 return 0;
59 }
60
child_alloc_thread(void * args)61 static void *child_alloc_thread(void *args)
62 {
63 int ret = 0;
64
65 /* keep allocating until there's an error */
66 while (!ret)
67 ret = alloc_mem(LENGTH, (long)args);
68 exit(ret);
69 }
70
child_alloc(int testcase,int lite,int threads)71 static void child_alloc(int testcase, int lite, int threads)
72 {
73 int i;
74 pthread_t *th;
75
76 if (lite) {
77 int ret = alloc_mem(TESTMEM + MB, testcase);
78 exit(ret);
79 }
80
81 th = malloc(sizeof(pthread_t) * threads);
82 if (!th) {
83 tst_resm(TINFO | TERRNO, "malloc");
84 goto out;
85 }
86
87 for (i = 0; i < threads; i++) {
88 TEST(pthread_create(&th[i], NULL, child_alloc_thread,
89 (void *)((long)testcase)));
90 if (TEST_RETURN) {
91 tst_resm(TINFO | TRERRNO, "pthread_create");
92 /*
93 * Keep going if thread other than first fails to
94 * spawn due to lack of resources.
95 */
96 if (i == 0 || TEST_RETURN != EAGAIN)
97 goto out;
98 }
99 }
100
101 /* wait for one of threads to exit whole process */
102 while (1)
103 sleep(1);
104 out:
105 exit(1);
106 }
107
108 /*
109 * oom - allocates memory according to specified testcase and checks
110 * desired outcome (e.g. child killed, operation failed with ENOMEM)
111 * @testcase: selects how child allocates memory
112 * valid choices are: NORMAL, MLOCK and KSM
113 * @lite: if non-zero, child makes only single TESTMEM+MB allocation
114 * if zero, child keeps allocating memory until it gets killed
115 * or some operation fails
116 * @retcode: expected return code of child process
117 * if matches child ret code, this function reports PASS,
118 * otherwise it reports FAIL
119 * @allow_sigkill: if zero and child is killed, this function reports FAIL
120 * if non-zero, then if child is killed by SIGKILL
121 * it is considered as PASS
122 */
oom(int testcase,int lite,int retcode,int allow_sigkill)123 void oom(int testcase, int lite, int retcode, int allow_sigkill)
124 {
125 pid_t pid;
126 int status, threads;
127
128 switch (pid = fork()) {
129 case -1:
130 if (errno == retcode) {
131 tst_resm(TPASS | TERRNO, "fork");
132 return;
133 }
134 tst_brkm(TBROK | TERRNO, cleanup, "fork");
135 case 0:
136 threads = MAX(1, tst_ncpus() - 1);
137 child_alloc(testcase, lite, threads);
138 default:
139 break;
140 }
141
142 tst_resm(TINFO, "expected victim is %d.", pid);
143 if (waitpid(-1, &status, 0) == -1)
144 tst_brkm(TBROK | TERRNO, cleanup, "waitpid");
145
146 if (WIFSIGNALED(status)) {
147 if (allow_sigkill && WTERMSIG(status) == SIGKILL) {
148 tst_resm(TPASS, "victim signalled: (%d) %s",
149 SIGKILL,
150 tst_strsig(SIGKILL));
151 } else {
152 tst_resm(TFAIL, "victim signalled: (%d) %s",
153 WTERMSIG(status),
154 tst_strsig(WTERMSIG(status)));
155 }
156 } else if (WIFEXITED(status)) {
157 if (WEXITSTATUS(status) == retcode) {
158 tst_resm(TPASS, "victim retcode: (%d) %s",
159 retcode, strerror(retcode));
160 } else {
161 tst_resm(TFAIL, "victim unexpectedly ended with "
162 "retcode: %d, expected: %d",
163 WEXITSTATUS(status), retcode);
164 }
165 } else {
166 tst_resm(TFAIL, "victim unexpectedly ended");
167 }
168 }
169
set_global_mempolicy(int mempolicy)170 static void set_global_mempolicy(int mempolicy)
171 {
172 #if HAVE_NUMA_H && HAVE_LINUX_MEMPOLICY_H && HAVE_NUMAIF_H \
173 && HAVE_MPOL_CONSTANTS
174 unsigned long nmask[MAXNODES / BITS_PER_LONG] = { 0 };
175 int num_nodes, *nodes;
176 int ret;
177
178 if (mempolicy) {
179 ret = get_allowed_nodes_arr(NH_MEMS|NH_CPUS, &num_nodes, &nodes);
180 if (ret != 0)
181 tst_brkm(TBROK|TERRNO, cleanup,
182 "get_allowed_nodes_arr");
183 if (num_nodes < 2) {
184 tst_resm(TINFO, "mempolicy need NUMA system support");
185 free(nodes);
186 return;
187 }
188 switch(mempolicy) {
189 case MPOL_BIND:
190 /* bind the second node */
191 set_node(nmask, nodes[1]);
192 break;
193 case MPOL_INTERLEAVE:
194 case MPOL_PREFERRED:
195 if (num_nodes == 2) {
196 tst_resm(TINFO, "The mempolicy need "
197 "more than 2 numa nodes");
198 free(nodes);
199 return;
200 } else {
201 /* Using the 2nd,3rd node */
202 set_node(nmask, nodes[1]);
203 set_node(nmask, nodes[2]);
204 }
205 break;
206 default:
207 tst_brkm(TBROK|TERRNO, cleanup, "Bad mempolicy mode");
208 }
209 if (set_mempolicy(mempolicy, nmask, MAXNODES) == -1)
210 tst_brkm(TBROK|TERRNO, cleanup, "set_mempolicy");
211 }
212 #endif
213 }
214
testoom(int mempolicy,int lite,int retcode,int allow_sigkill)215 void testoom(int mempolicy, int lite, int retcode, int allow_sigkill)
216 {
217 int ksm_run_orig;
218
219 set_global_mempolicy(mempolicy);
220
221 tst_resm(TINFO, "start normal OOM testing.");
222 oom(NORMAL, lite, retcode, allow_sigkill);
223
224 tst_resm(TINFO, "start OOM testing for mlocked pages.");
225 oom(MLOCK, lite, retcode, allow_sigkill);
226
227 /*
228 * Skip oom(KSM) if lite == 1, since limit_in_bytes may vary from
229 * run to run, which isn't reliable for oom03 cgroup test.
230 */
231 if (access(PATH_KSM, F_OK) == -1 || lite == 1) {
232 tst_resm(TINFO, "KSM is not configed or lite == 1, "
233 "skip OOM test for KSM pags");
234 } else {
235 tst_resm(TINFO, "start OOM testing for KSM pages.");
236 SAFE_FILE_SCANF(cleanup, PATH_KSM "run", "%d", &ksm_run_orig);
237 SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "1");
238 oom(KSM, lite, retcode, allow_sigkill);
239 SAFE_FILE_PRINTF(cleanup,PATH_KSM "run", "%d", ksm_run_orig);
240 }
241 }
242
243 /* KSM */
244
245 static int max_page_sharing;
246
save_max_page_sharing(void)247 void save_max_page_sharing(void)
248 {
249 if (access(PATH_KSM "max_page_sharing", F_OK) == 0)
250 SAFE_FILE_SCANF(NULL, PATH_KSM "max_page_sharing",
251 "%d", &max_page_sharing);
252 }
253
restore_max_page_sharing(void)254 void restore_max_page_sharing(void)
255 {
256 if (access(PATH_KSM "max_page_sharing", F_OK) == 0)
257 FILE_PRINTF(PATH_KSM "max_page_sharing",
258 "%d", max_page_sharing);
259 }
260
check(char * path,long int value)261 static void check(char *path, long int value)
262 {
263 char fullpath[BUFSIZ];
264 long actual_val;
265
266 snprintf(fullpath, BUFSIZ, PATH_KSM "%s", path);
267 SAFE_FILE_SCANF(cleanup, fullpath, "%ld", &actual_val);
268
269 tst_resm(TINFO, "%s is %ld.", path, actual_val);
270 if (actual_val != value)
271 tst_resm(TFAIL, "%s is not %ld.", path, value);
272 }
273
wait_ksmd_done(void)274 static void wait_ksmd_done(void)
275 {
276 long pages_shared, pages_sharing, pages_volatile, pages_unshared;
277 long old_pages_shared = 0, old_pages_sharing = 0;
278 long old_pages_volatile = 0, old_pages_unshared = 0;
279 int changing = 1, count = 0;
280
281 while (changing) {
282 sleep(10);
283 count++;
284
285 SAFE_FILE_SCANF(cleanup, PATH_KSM "pages_shared",
286 "%ld", &pages_shared);
287
288 SAFE_FILE_SCANF(cleanup, PATH_KSM "pages_sharing",
289 "%ld", &pages_sharing);
290
291 SAFE_FILE_SCANF(cleanup, PATH_KSM "pages_volatile",
292 "%ld", &pages_volatile);
293
294 SAFE_FILE_SCANF(cleanup, PATH_KSM "pages_unshared",
295 "%ld", &pages_unshared);
296
297 if (pages_shared != old_pages_shared ||
298 pages_sharing != old_pages_sharing ||
299 pages_volatile != old_pages_volatile ||
300 pages_unshared != old_pages_unshared) {
301 old_pages_shared = pages_shared;
302 old_pages_sharing = pages_sharing;
303 old_pages_volatile = pages_volatile;
304 old_pages_unshared = pages_unshared;
305 } else {
306 changing = 0;
307 }
308 }
309
310 tst_resm(TINFO, "ksm daemon takes %ds to scan all mergeable pages",
311 count * 10);
312 }
313
group_check(int run,int pages_shared,int pages_sharing,int pages_volatile,int pages_unshared,int sleep_millisecs,int pages_to_scan)314 static void group_check(int run, int pages_shared, int pages_sharing,
315 int pages_volatile, int pages_unshared,
316 int sleep_millisecs, int pages_to_scan)
317 {
318 /* wait for ksm daemon to scan all mergeable pages. */
319 wait_ksmd_done();
320
321 tst_resm(TINFO, "check!");
322 check("run", run);
323 check("pages_shared", pages_shared);
324 check("pages_sharing", pages_sharing);
325 check("pages_volatile", pages_volatile);
326 check("pages_unshared", pages_unshared);
327 check("sleep_millisecs", sleep_millisecs);
328 check("pages_to_scan", pages_to_scan);
329 }
330
verify(char ** memory,char value,int proc,int start,int end,int start2,int end2)331 static void verify(char **memory, char value, int proc,
332 int start, int end, int start2, int end2)
333 {
334 int i, j;
335 void *s = NULL;
336
337 s = malloc((end - start) * (end2 - start2));
338 if (s == NULL)
339 tst_brkm(TBROK | TERRNO, tst_exit, "malloc");
340
341 tst_resm(TINFO, "child %d verifies memory content.", proc);
342 memset(s, value, (end - start) * (end2 - start2));
343 if (memcmp(memory[start], s, (end - start) * (end2 - start2))
344 != 0)
345 for (j = start; j < end; j++)
346 for (i = start2; i < end2; i++)
347 if (memory[j][i] != value)
348 tst_resm(TFAIL, "child %d has %c at "
349 "%d,%d,%d.",
350 proc, memory[j][i], proc,
351 j, i);
352 free(s);
353 }
354
write_memcg(void)355 void write_memcg(void)
356 {
357 SAFE_FILE_PRINTF(NULL, MEMCG_LIMIT, "%ld", TESTMEM);
358
359 SAFE_FILE_PRINTF(NULL, MEMCG_PATH_NEW "/tasks", "%d", getpid());
360 }
361
362 struct ksm_merge_data {
363 char data;
364 unsigned int mergeable_size;
365 };
366
ksm_child_memset(int child_num,int size,int total_unit,struct ksm_merge_data ksm_merge_data,char ** memory)367 static void ksm_child_memset(int child_num, int size, int total_unit,
368 struct ksm_merge_data ksm_merge_data, char **memory)
369 {
370 int i = 0, j;
371 int unit = size / total_unit;
372
373 tst_resm(TINFO, "child %d continues...", child_num);
374
375 if (ksm_merge_data.mergeable_size == size * MB) {
376 tst_resm(TINFO, "child %d allocates %d MB filled with '%c'",
377 child_num, size, ksm_merge_data.data);
378
379 } else {
380 tst_resm(TINFO, "child %d allocates %d MB filled with '%c'"
381 " except one page with 'e'",
382 child_num, size, ksm_merge_data.data);
383 }
384
385 for (j = 0; j < total_unit; j++) {
386 for (i = 0; (unsigned int)i < unit * MB; i++)
387 memory[j][i] = ksm_merge_data.data;
388 }
389
390 /* if it contains unshared page, then set 'e' char
391 * at the end of the last page
392 */
393 if (ksm_merge_data.mergeable_size < size * MB)
394 memory[j-1][i-1] = 'e';
395 }
396
create_ksm_child(int child_num,int size,int unit,struct ksm_merge_data * ksm_merge_data)397 static void create_ksm_child(int child_num, int size, int unit,
398 struct ksm_merge_data *ksm_merge_data)
399 {
400 int j, total_unit;
401 char **memory;
402
403 /* The total units in all */
404 total_unit = size / unit;
405
406 /* Apply for the space for memory */
407 memory = malloc(total_unit * sizeof(char *));
408 for (j = 0; j < total_unit; j++) {
409 memory[j] = mmap(NULL, unit * MB, PROT_READ|PROT_WRITE,
410 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
411 if (memory[j] == MAP_FAILED)
412 tst_brkm(TBROK|TERRNO, tst_exit, "mmap");
413 #ifdef HAVE_MADV_MERGEABLE
414 if (madvise(memory[j], unit * MB, MADV_MERGEABLE) == -1)
415 tst_brkm(TBROK|TERRNO, tst_exit, "madvise");
416 #endif
417 }
418
419 tst_resm(TINFO, "child %d stops.", child_num);
420 if (raise(SIGSTOP) == -1)
421 tst_brkm(TBROK|TERRNO, tst_exit, "kill");
422 fflush(stdout);
423
424 for (j = 0; j < 4; j++) {
425
426 ksm_child_memset(child_num, size, total_unit,
427 ksm_merge_data[j], memory);
428
429 fflush(stdout);
430
431 tst_resm(TINFO, "child %d stops.", child_num);
432 if (raise(SIGSTOP) == -1)
433 tst_brkm(TBROK|TERRNO, tst_exit, "kill");
434
435 if (ksm_merge_data[j].mergeable_size < size * MB) {
436 verify(memory, 'e', child_num, total_unit - 1,
437 total_unit, unit * MB - 1, unit * MB);
438 verify(memory, ksm_merge_data[j].data, child_num,
439 0, total_unit, 0, unit * MB - 1);
440 } else {
441 verify(memory, ksm_merge_data[j].data, child_num,
442 0, total_unit, 0, unit * MB);
443 }
444 }
445
446 tst_resm(TINFO, "child %d finished.", child_num);
447 }
448
stop_ksm_children(int * child,int num)449 static void stop_ksm_children(int *child, int num)
450 {
451 int k, status;
452
453 tst_resm(TINFO, "wait for all children to stop.");
454 for (k = 0; k < num; k++) {
455 if (waitpid(child[k], &status, WUNTRACED) == -1)
456 tst_brkm(TBROK|TERRNO, cleanup, "waitpid");
457 if (!WIFSTOPPED(status))
458 tst_brkm(TBROK, cleanup, "child %d was not stopped", k);
459 }
460 }
461
resume_ksm_children(int * child,int num)462 static void resume_ksm_children(int *child, int num)
463 {
464 int k;
465
466 tst_resm(TINFO, "resume all children.");
467 for (k = 0; k < num; k++) {
468 if (kill(child[k], SIGCONT) == -1)
469 tst_brkm(TBROK|TERRNO, cleanup, "kill child[%d]", k);
470 }
471 fflush(stdout);
472 }
473
create_same_memory(int size,int num,int unit)474 void create_same_memory(int size, int num, int unit)
475 {
476 int i, j, status, *child;
477 unsigned long ps, pages;
478 struct ksm_merge_data **ksm_data;
479
480 struct ksm_merge_data ksm_data0[] = {
481 {'c', size*MB}, {'c', size*MB}, {'d', size*MB}, {'d', size*MB},
482 };
483 struct ksm_merge_data ksm_data1[] = {
484 {'a', size*MB}, {'b', size*MB}, {'d', size*MB}, {'d', size*MB-1},
485 };
486 struct ksm_merge_data ksm_data2[] = {
487 {'a', size*MB}, {'a', size*MB}, {'d', size*MB}, {'d', size*MB},
488 };
489
490 ps = sysconf(_SC_PAGE_SIZE);
491 pages = MB / ps;
492
493 ksm_data = malloc((num - 3) * sizeof(struct ksm_merge_data *));
494 /* Since from third child, the data is same with the first child's */
495 for (i = 0; i < num - 3; i++) {
496 ksm_data[i] = malloc(4 * sizeof(struct ksm_merge_data));
497 for (j = 0; j < 4; j++) {
498 ksm_data[i][j].data = ksm_data0[j].data;
499 ksm_data[i][j].mergeable_size =
500 ksm_data0[j].mergeable_size;
501 }
502 }
503
504 child = malloc(num * sizeof(int));
505 if (child == NULL)
506 tst_brkm(TBROK | TERRNO, cleanup, "malloc");
507
508 for (i = 0; i < num; i++) {
509 fflush(stdout);
510 switch (child[i] = fork()) {
511 case -1:
512 tst_brkm(TBROK|TERRNO, cleanup, "fork");
513 case 0:
514 if (i == 0) {
515 create_ksm_child(i, size, unit, ksm_data0);
516 exit(0);
517 } else if (i == 1) {
518 create_ksm_child(i, size, unit, ksm_data1);
519 exit(0);
520 } else if (i == 2) {
521 create_ksm_child(i, size, unit, ksm_data2);
522 exit(0);
523 } else {
524 create_ksm_child(i, size, unit, ksm_data[i-3]);
525 exit(0);
526 }
527 }
528 }
529
530 stop_ksm_children(child, num);
531
532 tst_resm(TINFO, "KSM merging...");
533 if (access(PATH_KSM "max_page_sharing", F_OK) == 0)
534 SAFE_FILE_PRINTF(cleanup, PATH_KSM "max_page_sharing",
535 "%ld", size * pages * num);
536 SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "1");
537 SAFE_FILE_PRINTF(cleanup, PATH_KSM "pages_to_scan", "%ld",
538 size * pages * num);
539 SAFE_FILE_PRINTF(cleanup, PATH_KSM "sleep_millisecs", "0");
540
541 resume_ksm_children(child, num);
542 group_check(1, 2, size * num * pages - 2, 0, 0, 0, size * pages * num);
543
544 stop_ksm_children(child, num);
545 resume_ksm_children(child, num);
546 group_check(1, 3, size * num * pages - 3, 0, 0, 0, size * pages * num);
547
548 stop_ksm_children(child, num);
549 resume_ksm_children(child, num);
550 group_check(1, 1, size * num * pages - 1, 0, 0, 0, size * pages * num);
551
552 stop_ksm_children(child, num);
553 resume_ksm_children(child, num);
554 group_check(1, 1, size * num * pages - 2, 0, 1, 0, size * pages * num);
555
556 stop_ksm_children(child, num);
557
558 tst_resm(TINFO, "KSM unmerging...");
559 SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "2");
560
561 resume_ksm_children(child, num);
562 group_check(2, 0, 0, 0, 0, 0, size * pages * num);
563
564 tst_resm(TINFO, "stop KSM.");
565 SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "0");
566 group_check(0, 0, 0, 0, 0, 0, size * pages * num);
567
568 while (waitpid(-1, &status, WUNTRACED | WCONTINUED) > 0)
569 if (WEXITSTATUS(status) != 0)
570 tst_resm(TFAIL, "child exit status is %d",
571 WEXITSTATUS(status));
572 }
573
test_ksm_merge_across_nodes(unsigned long nr_pages)574 void test_ksm_merge_across_nodes(unsigned long nr_pages)
575 {
576 char **memory;
577 int i, ret;
578 int num_nodes, *nodes;
579 unsigned long length;
580 unsigned long pagesize;
581
582 #if HAVE_NUMA_H && HAVE_LINUX_MEMPOLICY_H && HAVE_NUMAIF_H \
583 && HAVE_MPOL_CONSTANTS
584 unsigned long nmask[MAXNODES / BITS_PER_LONG] = { 0 };
585 #endif
586
587 ret = get_allowed_nodes_arr(NH_MEMS|NH_CPUS, &num_nodes, &nodes);
588 if (ret != 0)
589 tst_brkm(TBROK|TERRNO, cleanup, "get_allowed_nodes_arr");
590 if (num_nodes < 2) {
591 tst_resm(TINFO, "need NUMA system support");
592 free(nodes);
593 return;
594 }
595
596 pagesize = sysconf(_SC_PAGE_SIZE);
597 length = nr_pages * pagesize;
598
599 memory = malloc(num_nodes * sizeof(char *));
600 for (i = 0; i < num_nodes; i++) {
601 memory[i] = mmap(NULL, length, PROT_READ|PROT_WRITE,
602 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
603 if (memory[i] == MAP_FAILED)
604 tst_brkm(TBROK|TERRNO, tst_exit, "mmap");
605 #ifdef HAVE_MADV_MERGEABLE
606 if (madvise(memory[i], length, MADV_MERGEABLE) == -1)
607 tst_brkm(TBROK|TERRNO, tst_exit, "madvise");
608 #endif
609
610 #if HAVE_NUMA_H && HAVE_LINUX_MEMPOLICY_H && HAVE_NUMAIF_H \
611 && HAVE_MPOL_CONSTANTS
612 clean_node(nmask);
613 set_node(nmask, nodes[i]);
614 /*
615 * Use mbind() to make sure each node contains
616 * length size memory.
617 */
618 ret = mbind(memory[i], length, MPOL_BIND, nmask, MAXNODES, 0);
619 if (ret == -1)
620 tst_brkm(TBROK|TERRNO, tst_exit, "mbind");
621 #endif
622
623 memset(memory[i], 10, length);
624 }
625
626 SAFE_FILE_PRINTF(cleanup, PATH_KSM "sleep_millisecs", "0");
627 SAFE_FILE_PRINTF(cleanup, PATH_KSM "pages_to_scan", "%ld",
628 nr_pages * num_nodes);
629 if (access(PATH_KSM "max_page_sharing", F_OK) == 0)
630 SAFE_FILE_PRINTF(cleanup, PATH_KSM "max_page_sharing",
631 "%ld", nr_pages * num_nodes);
632 /*
633 * merge_across_nodes setting can be changed only when there
634 * are no ksm shared pages in system, so set run 2 to unmerge
635 * pages first, then to 1 after changing merge_across_nodes,
636 * to remerge according to the new setting.
637 */
638 SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "2");
639 wait_ksmd_done();
640 tst_resm(TINFO, "Start to test KSM with merge_across_nodes=1");
641 SAFE_FILE_PRINTF(cleanup, PATH_KSM "merge_across_nodes", "1");
642 SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "1");
643 group_check(1, 1, nr_pages * num_nodes - 1, 0, 0, 0,
644 nr_pages * num_nodes);
645
646 SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "2");
647 wait_ksmd_done();
648 tst_resm(TINFO, "Start to test KSM with merge_across_nodes=0");
649 SAFE_FILE_PRINTF(cleanup, PATH_KSM "merge_across_nodes", "0");
650 SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "1");
651 group_check(1, num_nodes, nr_pages * num_nodes - num_nodes,
652 0, 0, 0, nr_pages * num_nodes);
653
654 SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "2");
655 wait_ksmd_done();
656 }
657
check_ksm_options(int * size,int * num,int * unit)658 void check_ksm_options(int *size, int *num, int *unit)
659 {
660 if (opt_size) {
661 *size = atoi(opt_sizestr);
662 if (*size < 1)
663 tst_brkm(TBROK, cleanup, "size cannot be less than 1.");
664 }
665 if (opt_unit) {
666 *unit = atoi(opt_unitstr);
667 if (*unit > *size)
668 tst_brkm(TBROK, cleanup,
669 "unit cannot be greater than size.");
670 if (*size % *unit != 0)
671 tst_brkm(TBROK, cleanup,
672 "the remainder of division of size by unit is "
673 "not zero.");
674 }
675 if (opt_num) {
676 *num = atoi(opt_numstr);
677 if (*num < 3)
678 tst_brkm(TBROK, cleanup,
679 "process number cannot be less 3.");
680 }
681 }
682
ksm_usage(void)683 void ksm_usage(void)
684 {
685 printf(" -n Number of processes\n");
686 printf(" -s Memory allocation size in MB\n");
687 printf(" -u Memory allocation unit in MB\n");
688 }
689
690 /* THP */
691
692 /* cpuset/memcg */
693
gather_node_cpus(char * cpus,long nd)694 static void gather_node_cpus(char *cpus, long nd)
695 {
696 int ncpus = 0;
697 int i;
698 long online;
699 char buf[BUFSIZ];
700 char path[BUFSIZ], path1[BUFSIZ];
701
702 while (path_exist(PATH_SYS_SYSTEM "/cpu/cpu%d", ncpus))
703 ncpus++;
704
705 for (i = 0; i < ncpus; i++) {
706 snprintf(path, BUFSIZ,
707 PATH_SYS_SYSTEM "/node/node%ld/cpu%d", nd, i);
708 if (path_exist(path)) {
709 snprintf(path1, BUFSIZ, "%s/online", path);
710 /*
711 * if there is no online knob, then the cpu cannot
712 * be taken offline
713 */
714 if (path_exist(path1)) {
715 SAFE_FILE_SCANF(cleanup, path1, "%ld", &online);
716 if (online == 0)
717 continue;
718 }
719 sprintf(buf, "%d,", i);
720 strcat(cpus, buf);
721 }
722 }
723 /* Remove the trailing comma. */
724 cpus[strlen(cpus) - 1] = '\0';
725 }
726
read_cpuset_files(char * prefix,char * filename,char * retbuf)727 void read_cpuset_files(char *prefix, char *filename, char *retbuf)
728 {
729 int fd;
730 char path[BUFSIZ];
731
732 /*
733 * try either '/dev/cpuset/XXXX' or '/dev/cpuset/cpuset.XXXX'
734 * please see Documentation/cgroups/cpusets.txt from kernel src
735 * for details
736 */
737 snprintf(path, BUFSIZ, "%s/%s", prefix, filename);
738 fd = open(path, O_RDONLY);
739 if (fd == -1) {
740 if (errno == ENOENT) {
741 snprintf(path, BUFSIZ, "%s/cpuset.%s",
742 prefix, filename);
743 fd = open(path, O_RDONLY);
744 if (fd == -1)
745 tst_brkm(TBROK | TERRNO, cleanup,
746 "open %s", path);
747 } else
748 tst_brkm(TBROK | TERRNO, cleanup, "open %s", path);
749 }
750 if (read(fd, retbuf, BUFSIZ) < 0)
751 tst_brkm(TBROK | TERRNO, cleanup, "read %s", path);
752 close(fd);
753 }
754
write_cpuset_files(char * prefix,char * filename,char * buf)755 void write_cpuset_files(char *prefix, char *filename, char *buf)
756 {
757 int fd;
758 char path[BUFSIZ];
759
760 /*
761 * try either '/dev/cpuset/XXXX' or '/dev/cpuset/cpuset.XXXX'
762 * please see Documentation/cgroups/cpusets.txt from kernel src
763 * for details
764 */
765 snprintf(path, BUFSIZ, "%s/%s", prefix, filename);
766 fd = open(path, O_WRONLY);
767 if (fd == -1) {
768 if (errno == ENOENT) {
769 snprintf(path, BUFSIZ, "%s/cpuset.%s",
770 prefix, filename);
771 fd = open(path, O_WRONLY);
772 if (fd == -1)
773 tst_brkm(TBROK | TERRNO, cleanup,
774 "open %s", path);
775 } else
776 tst_brkm(TBROK | TERRNO, cleanup, "open %s", path);
777 }
778 if (write(fd, buf, strlen(buf)) != (ssize_t)strlen(buf))
779 tst_brkm(TBROK | TERRNO, cleanup, "write %s", path);
780 close(fd);
781 }
782
write_cpusets(long nd)783 void write_cpusets(long nd)
784 {
785 char buf[BUFSIZ];
786 char cpus[BUFSIZ] = "";
787
788 snprintf(buf, BUFSIZ, "%ld", nd);
789 write_cpuset_files(CPATH_NEW, "mems", buf);
790
791 gather_node_cpus(cpus, nd);
792 /*
793 * If the 'nd' node doesn't contain any CPUs,
794 * the first ID of CPU '0' will be used as
795 * the value of cpuset.cpus.
796 */
797 if (strlen(cpus) != 0) {
798 write_cpuset_files(CPATH_NEW, "cpus", cpus);
799 } else {
800 tst_resm(TINFO, "No CPUs in the node%ld; "
801 "using only CPU0", nd);
802 write_cpuset_files(CPATH_NEW, "cpus", "0");
803 }
804
805 SAFE_FILE_PRINTF(NULL, CPATH_NEW "/tasks", "%d", getpid());
806 }
807
umount_mem(char * path,char * path_new)808 void umount_mem(char *path, char *path_new)
809 {
810 FILE *fp;
811 int fd;
812 char s_new[BUFSIZ], s[BUFSIZ], value[BUFSIZ];
813
814 /* Move all processes in task to its parent node. */
815 sprintf(s, "%s/tasks", path);
816 fd = open(s, O_WRONLY);
817 if (fd == -1)
818 tst_resm(TWARN | TERRNO, "open %s", s);
819
820 snprintf(s_new, BUFSIZ, "%s/tasks", path_new);
821 fp = fopen(s_new, "r");
822 if (fp == NULL)
823 tst_resm(TWARN | TERRNO, "fopen %s", s_new);
824 if ((fd != -1) && (fp != NULL)) {
825 while (fgets(value, BUFSIZ, fp) != NULL)
826 if (write(fd, value, strlen(value) - 1)
827 != (ssize_t)strlen(value) - 1)
828 tst_resm(TWARN | TERRNO, "write %s", s);
829 }
830 if (fd != -1)
831 close(fd);
832 if (fp != NULL)
833 fclose(fp);
834 if (rmdir(path_new) == -1)
835 tst_resm(TWARN | TERRNO, "rmdir %s", path_new);
836 if (umount(path) == -1)
837 tst_resm(TWARN | TERRNO, "umount %s", path);
838 if (rmdir(path) == -1)
839 tst_resm(TWARN | TERRNO, "rmdir %s", path);
840 }
841
mount_mem(char * name,char * fs,char * options,char * path,char * path_new)842 void mount_mem(char *name, char *fs, char *options, char *path, char *path_new)
843 {
844 if (mkdir(path, 0777) == -1)
845 tst_brkm(TBROK | TERRNO, cleanup, "mkdir %s", path);
846 if (mount(name, path, fs, 0, options) == -1) {
847 if (errno == ENODEV) {
848 if (rmdir(path) == -1)
849 tst_resm(TWARN | TERRNO, "rmdir %s failed",
850 path);
851 tst_brkm(TCONF, NULL,
852 "file system %s is not configured in kernel",
853 fs);
854 }
855 tst_brkm(TBROK | TERRNO, cleanup, "mount %s", path);
856 }
857 if (mkdir(path_new, 0777) == -1)
858 tst_brkm(TBROK | TERRNO, cleanup, "mkdir %s", path_new);
859 }
860
861 /* shared */
862
863 /* Warning: *DO NOT* use this function in child */
get_a_numa_node(void (* cleanup_fn)(void))864 unsigned int get_a_numa_node(void (*cleanup_fn) (void))
865 {
866 unsigned int nd1, nd2;
867 int ret;
868
869 ret = get_allowed_nodes(0, 2, &nd1, &nd2);
870 switch (ret) {
871 case 0:
872 break;
873 case -3:
874 tst_brkm(TCONF, cleanup_fn, "requires a NUMA system.");
875 default:
876 tst_brkm(TBROK | TERRNO, cleanup_fn, "1st get_allowed_nodes");
877 }
878
879 ret = get_allowed_nodes(NH_MEMS | NH_CPUS, 1, &nd1);
880 switch (ret) {
881 case 0:
882 tst_resm(TINFO, "get node%u.", nd1);
883 return nd1;
884 case -3:
885 tst_brkm(TCONF, cleanup_fn, "requires a NUMA system that has "
886 "at least one node with both memory and CPU "
887 "available.");
888 default:
889 break;
890 }
891 tst_brkm(TBROK | TERRNO, cleanup_fn, "2nd get_allowed_nodes");
892 }
893
path_exist(const char * path,...)894 int path_exist(const char *path, ...)
895 {
896 va_list ap;
897 char pathbuf[PATH_MAX];
898
899 va_start(ap, path);
900 vsnprintf(pathbuf, sizeof(pathbuf), path, ap);
901 va_end(ap);
902
903 return access(pathbuf, F_OK) == 0;
904 }
905
read_meminfo(char * item)906 long read_meminfo(char *item)
907 {
908 FILE *fp;
909 char line[BUFSIZ], buf[BUFSIZ];
910 long val;
911
912 fp = fopen(PATH_MEMINFO, "r");
913 if (fp == NULL)
914 tst_brkm(TBROK | TERRNO, cleanup, "fopen %s", PATH_MEMINFO);
915
916 while (fgets(line, BUFSIZ, fp) != NULL) {
917 if (sscanf(line, "%64s %ld", buf, &val) == 2)
918 if (strcmp(buf, item) == 0) {
919 fclose(fp);
920 return val;
921 }
922 continue;
923 }
924 fclose(fp);
925
926 tst_brkm(TBROK, cleanup, "cannot find \"%s\" in %s",
927 item, PATH_MEMINFO);
928 }
929
set_sys_tune(char * sys_file,long tune,int check)930 void set_sys_tune(char *sys_file, long tune, int check)
931 {
932 long val;
933 char path[BUFSIZ];
934
935 tst_resm(TINFO, "set %s to %ld", sys_file, tune);
936
937 snprintf(path, BUFSIZ, PATH_SYSVM "%s", sys_file);
938 SAFE_FILE_PRINTF(NULL, path, "%ld", tune);
939
940 if (check) {
941 val = get_sys_tune(sys_file);
942 if (val != tune)
943 tst_brkm(TBROK, cleanup, "%s = %ld, but expect %ld",
944 sys_file, val, tune);
945 }
946 }
947
get_sys_tune(char * sys_file)948 long get_sys_tune(char *sys_file)
949 {
950 char path[BUFSIZ];
951 long tune;
952
953 snprintf(path, BUFSIZ, PATH_SYSVM "%s", sys_file);
954 SAFE_FILE_SCANF(NULL, path, "%ld", &tune);
955
956 return tune;
957 }
958
update_shm_size(size_t * shm_size)959 void update_shm_size(size_t * shm_size)
960 {
961 size_t shmmax;
962
963 SAFE_FILE_SCANF(cleanup, PATH_SHMMAX, "%ld", &shmmax);
964 if (*shm_size > shmmax) {
965 tst_resm(TINFO, "Set shm_size to shmmax: %ld", shmmax);
966 *shm_size = shmmax;
967 }
968 }
969
range_is_mapped(void (* cleanup_fn)(void),unsigned long low,unsigned long high)970 int range_is_mapped(void (*cleanup_fn) (void), unsigned long low, unsigned long high)
971 {
972 FILE *fp;
973
974 fp = fopen("/proc/self/maps", "r");
975 if (fp == NULL)
976 tst_brkm(TBROK | TERRNO, cleanup_fn, "Failed to open /proc/self/maps.");
977
978 while (!feof(fp)) {
979 unsigned long start, end;
980 int ret;
981
982 ret = fscanf(fp, "%lx-%lx %*[^\n]\n", &start, &end);
983 if (ret != 2) {
984 fclose(fp);
985 tst_brkm(TBROK | TERRNO, cleanup_fn, "Couldn't parse /proc/self/maps line.");
986 }
987
988 if ((start >= low) && (start < high)) {
989 fclose(fp);
990 return 1;
991 }
992 if ((end >= low) && (end < high)) {
993 fclose(fp);
994 return 1;
995 }
996 }
997
998 fclose(fp);
999 return 0;
1000 }
1001