• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #define TST_NO_DEFAULT_MAIN
2 
3 #include "config.h"
4 #include <sys/types.h>
5 #include <sys/mman.h>
6 #include <sys/mount.h>
7 #include <sys/stat.h>
8 #include <sys/wait.h>
9 #include <sys/param.h>
10 #include <errno.h>
11 #include <fcntl.h>
12 #if HAVE_NUMA_H
13 #include <numa.h>
14 #endif
15 #if HAVE_NUMAIF_H
16 #include <numaif.h>
17 #endif
18 #include <pthread.h>
19 #include <stdarg.h>
20 #include <stdio.h>
21 #include <string.h>
22 #include <stdlib.h>
23 #include <unistd.h>
24 
25 #include "mem.h"
26 #include "numa_helper.h"
27 
28 /* OOM */
29 
alloc_mem(long int length,int testcase)30 static int alloc_mem(long int length, int testcase)
31 {
32 	char *s;
33 	long i, pagesz = getpagesize();
34 	int loop = 10;
35 
36 	tst_res(TINFO, "thread (%lx), allocating %ld bytes.",
37 		(unsigned long) pthread_self(), length);
38 
39 	s = mmap(NULL, length, PROT_READ | PROT_WRITE,
40 		 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
41 	if (s == MAP_FAILED)
42 		return errno;
43 
44 	if (testcase == MLOCK) {
45 		while (mlock(s, length) == -1 && loop > 0) {
46 			if (EAGAIN != errno)
47 				return errno;
48 			usleep(300000);
49 			loop--;
50 		}
51 	}
52 
53 #ifdef HAVE_MADV_MERGEABLE
54 	if (testcase == KSM && madvise(s, length, MADV_MERGEABLE) == -1)
55 		return errno;
56 #endif
57 	for (i = 0; i < length; i += pagesz)
58 		s[i] = '\a';
59 
60 	return 0;
61 }
62 
child_alloc_thread(void * args)63 static void *child_alloc_thread(void *args)
64 {
65 	int ret = 0;
66 
67 	/* keep allocating until there's an error */
68 	while (!ret)
69 		ret = alloc_mem(LENGTH, (long)args);
70 	exit(ret);
71 }
72 
child_alloc(int testcase,int lite,int threads)73 static void child_alloc(int testcase, int lite, int threads)
74 {
75 	int i;
76 	pthread_t *th;
77 
78 	if (lite) {
79 		int ret = alloc_mem(TESTMEM + MB, testcase);
80 		exit(ret);
81 	}
82 
83 	th = malloc(sizeof(pthread_t) * threads);
84 	if (!th) {
85 		tst_res(TINFO | TERRNO, "malloc");
86 		goto out;
87 	}
88 
89 	for (i = 0; i < threads; i++) {
90 		TEST(pthread_create(&th[i], NULL, child_alloc_thread,
91 			(void *)((long)testcase)));
92 		if (TEST_RETURN) {
93 			tst_res(TINFO | TRERRNO, "pthread_create");
94 			/*
95 			 * Keep going if thread other than first fails to
96 			 * spawn due to lack of resources.
97 			 */
98 			if (i == 0 || TEST_RETURN != EAGAIN)
99 				goto out;
100 		}
101 	}
102 
103 	/* wait for one of threads to exit whole process */
104 	while (1)
105 		sleep(1);
106 out:
107 	exit(1);
108 }
109 
110 /*
111  * oom - allocates memory according to specified testcase and checks
112  *       desired outcome (e.g. child killed, operation failed with ENOMEM)
113  * @testcase: selects how child allocates memory
114  *            valid choices are: NORMAL, MLOCK and KSM
115  * @lite: if non-zero, child makes only single TESTMEM+MB allocation
116  *        if zero, child keeps allocating memory until it gets killed
117  *        or some operation fails
118  * @retcode: expected return code of child process
119  *           if matches child ret code, this function reports PASS,
120  *           otherwise it reports FAIL
121  * @allow_sigkill: if zero and child is killed, this function reports FAIL
122  *                 if non-zero, then if child is killed by SIGKILL
123  *                 it is considered as PASS
124  */
oom(int testcase,int lite,int retcode,int allow_sigkill)125 void oom(int testcase, int lite, int retcode, int allow_sigkill)
126 {
127 	pid_t pid;
128 	int status, threads;
129 
130 	switch (pid = SAFE_FORK()) {
131 	case 0:
132 		threads = MAX(1, tst_ncpus() - 1);
133 		child_alloc(testcase, lite, threads);
134 	default:
135 		break;
136 	}
137 
138 	tst_res(TINFO, "expected victim is %d.", pid);
139 	SAFE_WAITPID(-1, &status, 0);
140 
141 	if (WIFSIGNALED(status)) {
142 		if (allow_sigkill && WTERMSIG(status) == SIGKILL) {
143 			tst_res(TPASS, "victim signalled: (%d) %s",
144 				SIGKILL,
145 				tst_strsig(SIGKILL));
146 		} else {
147 			tst_res(TFAIL, "victim signalled: (%d) %s",
148 				WTERMSIG(status),
149 				tst_strsig(WTERMSIG(status)));
150 		}
151 	} else if (WIFEXITED(status)) {
152 		if (WEXITSTATUS(status) == retcode) {
153 			tst_res(TPASS, "victim retcode: (%d) %s",
154 				retcode, strerror(retcode));
155 		} else {
156 			tst_res(TFAIL, "victim unexpectedly ended with "
157 				"retcode: %d, expected: %d",
158 				WEXITSTATUS(status), retcode);
159 		}
160 	} else {
161 		tst_res(TFAIL, "victim unexpectedly ended");
162 	}
163 }
164 
165 #ifdef HAVE_NUMA_V2
set_global_mempolicy(int mempolicy)166 static void set_global_mempolicy(int mempolicy)
167 {
168 	unsigned long nmask[MAXNODES / BITS_PER_LONG] = { 0 };
169 	int num_nodes, *nodes;
170 	int ret;
171 
172 	if (mempolicy) {
173 		ret = get_allowed_nodes_arr(NH_MEMS|NH_CPUS, &num_nodes, &nodes);
174 		if (ret != 0)
175 			tst_brk(TBROK|TERRNO, "get_allowed_nodes_arr");
176 		if (num_nodes < 2) {
177 			tst_res(TINFO, "mempolicy need NUMA system support");
178 			free(nodes);
179 			return;
180 		}
181 		switch(mempolicy) {
182 		case MPOL_BIND:
183 			/* bind the second node */
184 			set_node(nmask, nodes[1]);
185 			break;
186 		case MPOL_INTERLEAVE:
187 		case MPOL_PREFERRED:
188 			if (num_nodes == 2) {
189 				tst_res(TINFO, "The mempolicy need "
190 					 "more than 2 numa nodes");
191 				free(nodes);
192 				return;
193 			} else {
194 				/* Using the 2nd,3rd node */
195 				set_node(nmask, nodes[1]);
196 				set_node(nmask, nodes[2]);
197 			}
198 			break;
199 		default:
200 			tst_brk(TBROK|TERRNO, "Bad mempolicy mode");
201 		}
202 		if (set_mempolicy(mempolicy, nmask, MAXNODES) == -1)
203 			tst_brk(TBROK|TERRNO, "set_mempolicy");
204 	}
205 }
206 #else
set_global_mempolicy(int mempolicy LTP_ATTRIBUTE_UNUSED)207 static void set_global_mempolicy(int mempolicy LTP_ATTRIBUTE_UNUSED) { }
208 #endif
209 
testoom(int mempolicy,int lite,int retcode,int allow_sigkill)210 void testoom(int mempolicy, int lite, int retcode, int allow_sigkill)
211 {
212 	int ksm_run_orig;
213 
214 	set_global_mempolicy(mempolicy);
215 
216 	tst_res(TINFO, "start normal OOM testing.");
217 	oom(NORMAL, lite, retcode, allow_sigkill);
218 
219 	tst_res(TINFO, "start OOM testing for mlocked pages.");
220 	oom(MLOCK, lite, retcode, allow_sigkill);
221 
222 	/*
223 	 * Skip oom(KSM) if lite == 1, since limit_in_bytes may vary from
224 	 * run to run, which isn't reliable for oom03 cgroup test.
225 	 */
226 	if (access(PATH_KSM, F_OK) == -1 || lite == 1) {
227 		tst_res(TINFO, "KSM is not configed or lite == 1, "
228 			 "skip OOM test for KSM pags");
229 	} else {
230 		tst_res(TINFO, "start OOM testing for KSM pages.");
231 		SAFE_FILE_SCANF(PATH_KSM "run", "%d", &ksm_run_orig);
232 		SAFE_FILE_PRINTF(PATH_KSM "run", "1");
233 		oom(KSM, lite, retcode, allow_sigkill);
234 		SAFE_FILE_PRINTF(PATH_KSM "run", "%d", ksm_run_orig);
235 	}
236 }
237 
238 /* KSM */
239 
240 static int max_page_sharing;
241 
save_max_page_sharing(void)242 void save_max_page_sharing(void)
243 {
244 	if (access(PATH_KSM "max_page_sharing", F_OK) == 0)
245 		SAFE_FILE_SCANF(PATH_KSM "max_page_sharing",
246 				"%d", &max_page_sharing);
247 }
248 
restore_max_page_sharing(void)249 void restore_max_page_sharing(void)
250 {
251 	if (access(PATH_KSM "max_page_sharing", F_OK) == 0)
252 	        FILE_PRINTF(PATH_KSM "max_page_sharing",
253 	                         "%d", max_page_sharing);
254 }
255 
check(char * path,long int value)256 static void check(char *path, long int value)
257 {
258 	char fullpath[BUFSIZ];
259 	long actual_val;
260 
261 	snprintf(fullpath, BUFSIZ, PATH_KSM "%s", path);
262 	SAFE_FILE_SCANF(fullpath, "%ld", &actual_val);
263 
264 	if (actual_val != value)
265 		tst_res(TFAIL, "%s is not %ld but %ld.", path, value,
266 			actual_val);
267 	else
268 		tst_res(TPASS, "%s is %ld.", path, actual_val);
269 }
270 
wait_ksmd_full_scan(void)271 static void wait_ksmd_full_scan(void)
272 {
273 	unsigned long full_scans, at_least_one_full_scan;
274 	int count = 0;
275 
276 	SAFE_FILE_SCANF(PATH_KSM "full_scans", "%lu", &full_scans);
277 	/*
278 	 * The current scan is already in progress so we can't guarantee that
279 	 * the get_user_pages() is called on every existing rmap_item if we
280 	 * only waited for the remaining part of the scan.
281 	 *
282 	 * The actual merging happens after the unstable tree has been built so
283 	 * we need to wait at least two full scans to guarantee merging, hence
284 	 * wait full_scans to increment by 3 so that at least two full scans
285 	 * will run.
286 	 */
287 	at_least_one_full_scan = full_scans + 3;
288 	while (full_scans < at_least_one_full_scan) {
289 		sleep(1);
290 		count++;
291 		SAFE_FILE_SCANF(PATH_KSM "full_scans", "%lu", &full_scans);
292 	}
293 
294 	tst_res(TINFO, "ksm daemon takes %ds to run two full scans",
295 		count);
296 }
297 
final_group_check(int run,int pages_shared,int pages_sharing,int pages_volatile,int pages_unshared,int sleep_millisecs,int pages_to_scan)298 static void final_group_check(int run, int pages_shared, int pages_sharing,
299 			  int pages_volatile, int pages_unshared,
300 			  int sleep_millisecs, int pages_to_scan)
301 {
302 	tst_res(TINFO, "check!");
303 	check("run", run);
304 	check("pages_shared", pages_shared);
305 	check("pages_sharing", pages_sharing);
306 	check("pages_volatile", pages_volatile);
307 	check("pages_unshared", pages_unshared);
308 	check("sleep_millisecs", sleep_millisecs);
309 	check("pages_to_scan", pages_to_scan);
310 }
311 
group_check(int run,int pages_shared,int pages_sharing,int pages_volatile,int pages_unshared,int sleep_millisecs,int pages_to_scan)312 static void group_check(int run, int pages_shared, int pages_sharing,
313 			int pages_volatile, int pages_unshared,
314 			int sleep_millisecs, int pages_to_scan)
315 {
316 	if (run != 1) {
317 		tst_res(TFAIL, "group_check run is not 1, %d.", run);
318 	} else {
319 		/* wait for ksm daemon to scan all mergeable pages. */
320 		wait_ksmd_full_scan();
321 	}
322 
323 	final_group_check(run, pages_shared, pages_sharing,
324 			  pages_volatile, pages_unshared,
325 			  sleep_millisecs, pages_to_scan);
326 }
327 
verify(char ** memory,char value,int proc,int start,int end,int start2,int end2)328 static void verify(char **memory, char value, int proc,
329 		    int start, int end, int start2, int end2)
330 {
331 	int i, j;
332 	void *s = NULL;
333 
334 	s = SAFE_MALLOC((end - start) * (end2 - start2));
335 
336 	tst_res(TINFO, "child %d verifies memory content.", proc);
337 	memset(s, value, (end - start) * (end2 - start2));
338 	if (memcmp(memory[start], s, (end - start) * (end2 - start2))
339 	    != 0)
340 		for (j = start; j < end; j++)
341 			for (i = start2; i < end2; i++)
342 				if (memory[j][i] != value)
343 					tst_res(TFAIL, "child %d has %c at "
344 						 "%d,%d,%d.",
345 						 proc, memory[j][i], proc,
346 						 j, i);
347 	free(s);
348 }
349 
check_hugepage(void)350 void check_hugepage(void)
351 {
352 	if (access(PATH_HUGEPAGES, F_OK))
353 		tst_brk(TCONF, "Huge page is not supported.");
354 }
355 
write_memcg(void)356 void write_memcg(void)
357 {
358 	SAFE_FILE_PRINTF(MEMCG_LIMIT, "%ld", TESTMEM);
359 
360 	SAFE_FILE_PRINTF(MEMCG_PATH_NEW "/tasks", "%d", getpid());
361 }
362 
363 struct ksm_merge_data {
364 	char data;
365 	unsigned int mergeable_size;
366 };
367 
ksm_child_memset(int child_num,int size,int total_unit,struct ksm_merge_data ksm_merge_data,char ** memory)368 static void ksm_child_memset(int child_num, int size, int total_unit,
369 		 struct ksm_merge_data ksm_merge_data, char **memory)
370 {
371 	int i = 0, j;
372 	int unit = size / total_unit;
373 
374 	tst_res(TINFO, "child %d continues...", child_num);
375 
376 	if (ksm_merge_data.mergeable_size == size * MB) {
377 		tst_res(TINFO, "child %d allocates %d MB filled with '%c'",
378 			child_num, size, ksm_merge_data.data);
379 
380 	} else {
381 		tst_res(TINFO, "child %d allocates %d MB filled with '%c'"
382 				" except one page with 'e'",
383 				child_num, size, ksm_merge_data.data);
384 	}
385 
386 	for (j = 0; j < total_unit; j++) {
387 		for (i = 0; (unsigned int)i < unit * MB; i++)
388 			memory[j][i] = ksm_merge_data.data;
389 	}
390 
391 	/* if it contains unshared page, then set 'e' char
392 	 * at the end of the last page
393 	 */
394 	if (ksm_merge_data.mergeable_size < size * MB)
395 		memory[j-1][i-1] = 'e';
396 }
397 
create_ksm_child(int child_num,int size,int unit,struct ksm_merge_data * ksm_merge_data)398 static void create_ksm_child(int child_num, int size, int unit,
399 		       struct ksm_merge_data *ksm_merge_data)
400 {
401 	int j, total_unit;
402 	char **memory;
403 
404 	/* The total units in all */
405 	total_unit = size / unit;
406 
407 	/* Apply for the space for memory */
408 	memory = SAFE_MALLOC(total_unit * sizeof(char *));
409 	for (j = 0; j < total_unit; j++) {
410 		memory[j] = SAFE_MMAP(NULL, unit * MB, PROT_READ|PROT_WRITE,
411 			MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
412 #ifdef HAVE_MADV_MERGEABLE
413 		if (madvise(memory[j], unit * MB, MADV_MERGEABLE) == -1)
414 			tst_brk(TBROK|TERRNO, "madvise");
415 #endif
416 	}
417 
418 	tst_res(TINFO, "child %d stops.", child_num);
419 	if (raise(SIGSTOP) == -1)
420 		tst_brk(TBROK|TERRNO, "kill");
421 	fflush(stdout);
422 
423 	for (j = 0; j < 4; j++) {
424 
425 		ksm_child_memset(child_num, size, total_unit,
426 				  ksm_merge_data[j], memory);
427 
428 		fflush(stdout);
429 
430 		tst_res(TINFO, "child %d stops.", child_num);
431 		if (raise(SIGSTOP) == -1)
432 			tst_brk(TBROK|TERRNO, "kill");
433 
434 		if (ksm_merge_data[j].mergeable_size < size * MB) {
435 			verify(memory, 'e', child_num, total_unit - 1,
436 				total_unit, unit * MB - 1, unit * MB);
437 			verify(memory, ksm_merge_data[j].data, child_num,
438 				0, total_unit, 0, unit * MB - 1);
439 		} else {
440 			verify(memory, ksm_merge_data[j].data, child_num,
441 				0, total_unit, 0, unit * MB);
442 		}
443 	}
444 
445 	tst_res(TINFO, "child %d finished.", child_num);
446 }
447 
stop_ksm_children(int * child,int num)448 static void stop_ksm_children(int *child, int num)
449 {
450 	int k, status;
451 
452 	tst_res(TINFO, "wait for all children to stop.");
453 	for (k = 0; k < num; k++) {
454 		SAFE_WAITPID(child[k], &status, WUNTRACED);
455 		if (!WIFSTOPPED(status))
456 			tst_brk(TBROK, "child %d was not stopped", k);
457 	}
458 }
459 
resume_ksm_children(int * child,int num)460 static void resume_ksm_children(int *child, int num)
461 {
462 	int k;
463 
464 	tst_res(TINFO, "resume all children.");
465 	for (k = 0; k < num; k++)
466 		SAFE_KILL(child[k], SIGCONT);
467 
468 	fflush(stdout);
469 }
470 
create_same_memory(int size,int num,int unit)471 void create_same_memory(int size, int num, int unit)
472 {
473 	int i, j, status, *child;
474 	unsigned long ps, pages;
475 	struct ksm_merge_data **ksm_data;
476 
477 	struct ksm_merge_data ksm_data0[] = {
478 	       {'c', size*MB}, {'c', size*MB}, {'d', size*MB}, {'d', size*MB},
479 	};
480 	struct ksm_merge_data ksm_data1[] = {
481 	       {'a', size*MB}, {'b', size*MB}, {'d', size*MB}, {'d', size*MB-1},
482 	};
483 	struct ksm_merge_data ksm_data2[] = {
484 	       {'a', size*MB}, {'a', size*MB}, {'d', size*MB}, {'d', size*MB},
485 	};
486 
487 	ps = sysconf(_SC_PAGE_SIZE);
488 	pages = MB / ps;
489 
490 	ksm_data = malloc((num - 3) * sizeof(struct ksm_merge_data *));
491 	/* Since from third child, the data is same with the first child's */
492 	for (i = 0; i < num - 3; i++) {
493 		ksm_data[i] = malloc(4 * sizeof(struct ksm_merge_data));
494 		for (j = 0; j < 4; j++) {
495 			ksm_data[i][j].data = ksm_data0[j].data;
496 			ksm_data[i][j].mergeable_size =
497 				ksm_data0[j].mergeable_size;
498 		}
499 	}
500 
501 	child = SAFE_MALLOC(num * sizeof(int));
502 
503 	for (i = 0; i < num; i++) {
504 		fflush(stdout);
505 		switch (child[i] = SAFE_FORK()) {
506 		case 0:
507 			if (i == 0) {
508 				create_ksm_child(i, size, unit, ksm_data0);
509 				exit(0);
510 			} else if (i == 1) {
511 				create_ksm_child(i, size, unit, ksm_data1);
512 				exit(0);
513 			} else if (i == 2) {
514 				create_ksm_child(i, size, unit, ksm_data2);
515 				exit(0);
516 			} else {
517 				create_ksm_child(i, size, unit, ksm_data[i-3]);
518 				exit(0);
519 			}
520 		}
521 	}
522 
523 	stop_ksm_children(child, num);
524 
525 	tst_res(TINFO, "KSM merging...");
526 	if (access(PATH_KSM "max_page_sharing", F_OK) == 0)
527 		SAFE_FILE_PRINTF(PATH_KSM "max_page_sharing", "%ld", size * pages * num);
528 	SAFE_FILE_PRINTF(PATH_KSM "run", "1");
529 	SAFE_FILE_PRINTF(PATH_KSM "pages_to_scan", "%ld", size * pages * num);
530 	SAFE_FILE_PRINTF(PATH_KSM "sleep_millisecs", "0");
531 
532 	resume_ksm_children(child, num);
533 	stop_ksm_children(child, num);
534 	group_check(1, 2, size * num * pages - 2, 0, 0, 0, size * pages * num);
535 
536 	resume_ksm_children(child, num);
537 	stop_ksm_children(child, num);
538 	group_check(1, 3, size * num * pages - 3, 0, 0, 0, size * pages * num);
539 
540 	resume_ksm_children(child, num);
541 	stop_ksm_children(child, num);
542 	group_check(1, 1, size * num * pages - 1, 0, 0, 0, size * pages * num);
543 
544 	resume_ksm_children(child, num);
545 	stop_ksm_children(child, num);
546 	group_check(1, 1, size * num * pages - 2, 0, 1, 0, size * pages * num);
547 
548 	tst_res(TINFO, "KSM unmerging...");
549 	SAFE_FILE_PRINTF(PATH_KSM "run", "2");
550 
551 	resume_ksm_children(child, num);
552 	final_group_check(2, 0, 0, 0, 0, 0, size * pages * num);
553 
554 	tst_res(TINFO, "stop KSM.");
555 	SAFE_FILE_PRINTF(PATH_KSM "run", "0");
556 	final_group_check(0, 0, 0, 0, 0, 0, size * pages * num);
557 
558 	while (waitpid(-1, &status, 0) > 0)
559 		if (WEXITSTATUS(status) != 0)
560 			tst_res(TFAIL, "child exit status is %d",
561 				 WEXITSTATUS(status));
562 }
563 
test_ksm_merge_across_nodes(unsigned long nr_pages)564 void test_ksm_merge_across_nodes(unsigned long nr_pages)
565 {
566 	char **memory;
567 	int i, ret;
568 	int num_nodes, *nodes;
569 	unsigned long length;
570 	unsigned long pagesize;
571 
572 #ifdef HAVE_NUMA_V2
573 	unsigned long nmask[MAXNODES / BITS_PER_LONG] = { 0 };
574 #endif
575 
576 	ret = get_allowed_nodes_arr(NH_MEMS|NH_CPUS, &num_nodes, &nodes);
577 	if (ret != 0)
578 		tst_brk(TBROK|TERRNO, "get_allowed_nodes_arr");
579 	if (num_nodes < 2) {
580 		tst_res(TINFO, "need NUMA system support");
581 		free(nodes);
582 		return;
583 	}
584 
585 	pagesize = sysconf(_SC_PAGE_SIZE);
586 	length = nr_pages * pagesize;
587 
588 	memory = SAFE_MALLOC(num_nodes * sizeof(char *));
589 	for (i = 0; i < num_nodes; i++) {
590 		memory[i] = SAFE_MMAP(NULL, length, PROT_READ|PROT_WRITE,
591 			    MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
592 #ifdef HAVE_MADV_MERGEABLE
593 		if (madvise(memory[i], length, MADV_MERGEABLE) == -1)
594 			tst_brk(TBROK|TERRNO, "madvise");
595 #endif
596 
597 #ifdef HAVE_NUMA_V2
598 		clean_node(nmask);
599 		set_node(nmask, nodes[i]);
600 		/*
601 		 * Use mbind() to make sure each node contains
602 		 * length size memory.
603 		 */
604 		ret = mbind(memory[i], length, MPOL_BIND, nmask, MAXNODES, 0);
605 		if (ret == -1)
606 			tst_brk(TBROK|TERRNO, "mbind");
607 #endif
608 
609 		memset(memory[i], 10, length);
610 	}
611 
612 	SAFE_FILE_PRINTF(PATH_KSM "sleep_millisecs", "0");
613 	SAFE_FILE_PRINTF(PATH_KSM "pages_to_scan", "%ld",
614 			 nr_pages * num_nodes);
615 	if (access(PATH_KSM "max_page_sharing", F_OK) == 0)
616 		SAFE_FILE_PRINTF(PATH_KSM "max_page_sharing",
617 			"%ld", nr_pages * num_nodes);
618 	/*
619 	 * merge_across_nodes setting can be changed only when there
620 	 * are no ksm shared pages in system, so set run 2 to unmerge
621 	 * pages first, then to 1 after changing merge_across_nodes,
622 	 * to remerge according to the new setting.
623 	 */
624 	SAFE_FILE_PRINTF(PATH_KSM "run", "2");
625 	tst_res(TINFO, "Start to test KSM with merge_across_nodes=1");
626 	SAFE_FILE_PRINTF(PATH_KSM "merge_across_nodes", "1");
627 	SAFE_FILE_PRINTF(PATH_KSM "run", "1");
628 	group_check(1, 1, nr_pages * num_nodes - 1, 0, 0, 0,
629 		    nr_pages * num_nodes);
630 
631 	SAFE_FILE_PRINTF(PATH_KSM "run", "2");
632 	tst_res(TINFO, "Start to test KSM with merge_across_nodes=0");
633 	SAFE_FILE_PRINTF(PATH_KSM "merge_across_nodes", "0");
634 	SAFE_FILE_PRINTF(PATH_KSM "run", "1");
635 	group_check(1, num_nodes, nr_pages * num_nodes - num_nodes,
636 		    0, 0, 0, nr_pages * num_nodes);
637 
638 	SAFE_FILE_PRINTF(PATH_KSM "run", "2");
639 }
640 
641 /* THP */
642 
643 /* cpuset/memcg */
644 
gather_node_cpus(char * cpus,long nd)645 static void gather_node_cpus(char *cpus, long nd)
646 {
647 	int ncpus = 0;
648 	int i;
649 	long online;
650 	char buf[BUFSIZ];
651 	char path[BUFSIZ], path1[BUFSIZ];
652 
653 	while (path_exist(PATH_SYS_SYSTEM "/cpu/cpu%d", ncpus))
654 		ncpus++;
655 
656 	for (i = 0; i < ncpus; i++) {
657 		snprintf(path, BUFSIZ,
658 			 PATH_SYS_SYSTEM "/node/node%ld/cpu%d", nd, i);
659 		if (path_exist(path)) {
660 			snprintf(path1, BUFSIZ, "%s/online", path);
661 			/*
662 			 * if there is no online knob, then the cpu cannot
663 			 * be taken offline
664 			 */
665 			if (path_exist(path1)) {
666 				SAFE_FILE_SCANF(path1, "%ld", &online);
667 				if (online == 0)
668 					continue;
669 			}
670 			sprintf(buf, "%d,", i);
671 			strcat(cpus, buf);
672 		}
673 	}
674 	/* Remove the trailing comma. */
675 	cpus[strlen(cpus) - 1] = '\0';
676 }
677 
read_cpuset_files(char * prefix,char * filename,char * retbuf)678 void read_cpuset_files(char *prefix, char *filename, char *retbuf)
679 {
680 	int fd;
681 	char path[BUFSIZ];
682 
683 	/*
684 	 * try either '/dev/cpuset/XXXX' or '/dev/cpuset/cpuset.XXXX'
685 	 * please see Documentation/cgroups/cpusets.txt from kernel src
686 	 * for details
687 	 */
688 	snprintf(path, BUFSIZ, "%s/%s", prefix, filename);
689 	fd = open(path, O_RDONLY);
690 	if (fd == -1) {
691 		if (errno == ENOENT) {
692 			snprintf(path, BUFSIZ, "%s/cpuset.%s",
693 				 prefix, filename);
694 			fd = SAFE_OPEN(path, O_RDONLY);
695 		} else
696 			tst_brk(TBROK | TERRNO, "open %s", path);
697 	}
698 	if (read(fd, retbuf, BUFSIZ) < 0)
699 		tst_brk(TBROK | TERRNO, "read %s", path);
700 	close(fd);
701 }
702 
write_cpuset_files(char * prefix,char * filename,char * buf)703 void write_cpuset_files(char *prefix, char *filename, char *buf)
704 {
705 	int fd;
706 	char path[BUFSIZ];
707 
708 	/*
709 	 * try either '/dev/cpuset/XXXX' or '/dev/cpuset/cpuset.XXXX'
710 	 * please see Documentation/cgroups/cpusets.txt from kernel src
711 	 * for details
712 	 */
713 	snprintf(path, BUFSIZ, "%s/%s", prefix, filename);
714 	fd = open(path, O_WRONLY);
715 	if (fd == -1) {
716 		if (errno == ENOENT) {
717 			snprintf(path, BUFSIZ, "%s/cpuset.%s",
718 				 prefix, filename);
719 			fd = SAFE_OPEN(path, O_WRONLY);
720 		} else
721 			tst_brk(TBROK | TERRNO, "open %s", path);
722 	}
723 	SAFE_WRITE(1, fd, buf, strlen(buf));
724 	close(fd);
725 }
726 
write_cpusets(long nd)727 void write_cpusets(long nd)
728 {
729 	char buf[BUFSIZ];
730 	char cpus[BUFSIZ] = "";
731 
732 	snprintf(buf, BUFSIZ, "%ld", nd);
733 	write_cpuset_files(CPATH_NEW, "mems", buf);
734 
735 	gather_node_cpus(cpus, nd);
736 	/*
737 	 * If the 'nd' node doesn't contain any CPUs,
738 	 * the first ID of CPU '0' will be used as
739 	 * the value of cpuset.cpus.
740 	 */
741 	if (strlen(cpus) != 0) {
742 		write_cpuset_files(CPATH_NEW, "cpus", cpus);
743 	} else {
744 		tst_res(TINFO, "No CPUs in the node%ld; "
745 				"using only CPU0", nd);
746 		write_cpuset_files(CPATH_NEW, "cpus", "0");
747 	}
748 
749 	SAFE_FILE_PRINTF(CPATH_NEW "/tasks", "%d", getpid());
750 }
751 
umount_mem(char * path,char * path_new)752 void umount_mem(char *path, char *path_new)
753 {
754 	FILE *fp;
755 	int fd;
756 	char s_new[BUFSIZ], s[BUFSIZ], value[BUFSIZ];
757 
758 	/* Move all processes in task to its parent node. */
759 	sprintf(s, "%s/tasks", path);
760 	fd = open(s, O_WRONLY);
761 	if (fd == -1)
762 		tst_res(TWARN | TERRNO, "open %s", s);
763 
764 	snprintf(s_new, BUFSIZ, "%s/tasks", path_new);
765 	fp = fopen(s_new, "r");
766 	if (fp == NULL)
767 		tst_res(TWARN | TERRNO, "fopen %s", s_new);
768 	if ((fd != -1) && (fp != NULL)) {
769 		while (fgets(value, BUFSIZ, fp) != NULL)
770 			if (write(fd, value, strlen(value) - 1)
771 			    != (ssize_t)strlen(value) - 1)
772 				tst_res(TWARN | TERRNO, "write %s", s);
773 	}
774 	if (fd != -1)
775 		close(fd);
776 	if (fp != NULL)
777 		fclose(fp);
778 	if (rmdir(path_new) == -1)
779 		tst_res(TWARN | TERRNO, "rmdir %s", path_new);
780 	if (umount(path) == -1)
781 		tst_res(TWARN | TERRNO, "umount %s", path);
782 	if (rmdir(path) == -1)
783 		tst_res(TWARN | TERRNO, "rmdir %s", path);
784 }
785 
mount_mem(char * name,char * fs,char * options,char * path,char * path_new)786 void mount_mem(char *name, char *fs, char *options, char *path, char *path_new)
787 {
788 	SAFE_MKDIR(path, 0777);
789 	if (mount(name, path, fs, 0, options) == -1) {
790 		if (errno == ENODEV) {
791 			if (rmdir(path) == -1)
792 				tst_res(TWARN | TERRNO, "rmdir %s failed",
793 					 path);
794 			tst_brk(TCONF,
795 				 "file system %s is not configured in kernel",
796 				 fs);
797 		}
798 		tst_brk(TBROK | TERRNO, "mount %s", path);
799 	}
800 	SAFE_MKDIR(path_new, 0777);
801 }
802 
803 /* shared */
804 
805 /* Warning: *DO NOT* use this function in child */
get_a_numa_node(void)806 unsigned int get_a_numa_node(void)
807 {
808 	unsigned int nd1, nd2;
809 	int ret;
810 
811 	ret = get_allowed_nodes(0, 2, &nd1, &nd2);
812 	switch (ret) {
813 	case 0:
814 		break;
815 	case -3:
816 		tst_brk(TCONF, "requires a NUMA system.");
817 	default:
818 		tst_brk(TBROK | TERRNO, "1st get_allowed_nodes");
819 	}
820 
821 	ret = get_allowed_nodes(NH_MEMS | NH_CPUS, 1, &nd1);
822 	switch (ret) {
823 	case 0:
824 		tst_res(TINFO, "get node%u.", nd1);
825 		return nd1;
826 	case -3:
827 		tst_brk(TCONF, "requires a NUMA system that has "
828 			 "at least one node with both memory and CPU "
829 			 "available.");
830 	default:
831 		tst_brk(TBROK | TERRNO, "2nd get_allowed_nodes");
832 	}
833 
834 	/* not reached */
835 	abort();
836 }
837 
path_exist(const char * path,...)838 int path_exist(const char *path, ...)
839 {
840 	va_list ap;
841 	char pathbuf[PATH_MAX];
842 
843 	va_start(ap, path);
844 	vsnprintf(pathbuf, sizeof(pathbuf), path, ap);
845 	va_end(ap);
846 
847 	return access(pathbuf, F_OK) == 0;
848 }
849 
set_sys_tune(char * sys_file,long tune,int check)850 void set_sys_tune(char *sys_file, long tune, int check)
851 {
852 	long val;
853 	char path[BUFSIZ];
854 
855 	tst_res(TINFO, "set %s to %ld", sys_file, tune);
856 
857 	snprintf(path, BUFSIZ, PATH_SYSVM "%s", sys_file);
858 	SAFE_FILE_PRINTF(path, "%ld", tune);
859 
860 	if (check) {
861 		val = get_sys_tune(sys_file);
862 		if (val != tune)
863 			tst_brk(TBROK, "%s = %ld, but expect %ld",
864 				 sys_file, val, tune);
865 	}
866 }
867 
get_sys_tune(char * sys_file)868 long get_sys_tune(char *sys_file)
869 {
870 	char path[BUFSIZ];
871 	long tune;
872 
873 	snprintf(path, BUFSIZ, PATH_SYSVM "%s", sys_file);
874 	SAFE_FILE_SCANF(path, "%ld", &tune);
875 
876 	return tune;
877 }
878 
update_shm_size(size_t * shm_size)879 void update_shm_size(size_t * shm_size)
880 {
881 	size_t shmmax;
882 
883 	SAFE_FILE_SCANF(PATH_SHMMAX, "%zu", &shmmax);
884 	if (*shm_size > shmmax) {
885 		tst_res(TINFO, "Set shm_size to shmmax: %zu", shmmax);
886 		*shm_size = shmmax;
887 	}
888 }
889 
range_is_mapped(unsigned long low,unsigned long high)890 int range_is_mapped(unsigned long low, unsigned long high)
891 {
892 	FILE *fp;
893 
894 	fp = fopen("/proc/self/maps", "r");
895 	if (fp == NULL)
896 		tst_brk(TBROK | TERRNO, "Failed to open /proc/self/maps.");
897 
898 	while (!feof(fp)) {
899 		unsigned long start, end;
900 		int ret;
901 
902 		ret = fscanf(fp, "%lx-%lx %*[^\n]\n", &start, &end);
903 		if (ret != 2) {
904 			fclose(fp);
905 			tst_brk(TBROK | TERRNO, "Couldn't parse /proc/self/maps line.");
906 		}
907 
908 		if ((start >= low) && (start < high)) {
909 			fclose(fp);
910 			return 1;
911 		}
912 		if ((end >= low) && (end < high)) {
913 			fclose(fp);
914 			return 1;
915 		}
916 	}
917 
918 	fclose(fp);
919 	return 0;
920 }
921