• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #define TST_NO_DEFAULT_MAIN
2 
3 #include "config.h"
4 #include <sys/types.h>
5 #include <sys/mman.h>
6 #include <sys/mount.h>
7 #include <sys/stat.h>
8 #include <sys/wait.h>
9 #include <sys/param.h>
10 #include <errno.h>
11 #include <fcntl.h>
12 #if HAVE_NUMA_H
13 #include <numa.h>
14 #endif
15 #if HAVE_NUMAIF_H
16 #include <numaif.h>
17 #endif
18 #include <pthread.h>
19 #include <stdarg.h>
20 #include <stdio.h>
21 #include <string.h>
22 #include <stdlib.h>
23 #include <unistd.h>
24 
25 #include "mem.h"
26 #include "numa_helper.h"
27 
28 /* OOM */
29 
30 long overcommit = -1;
31 
alloc_mem(long int length,int testcase)32 static int alloc_mem(long int length, int testcase)
33 {
34 	char *s;
35 	long i, pagesz = getpagesize();
36 	int loop = 10;
37 
38 	tst_res(TINFO, "thread (%lx), allocating %ld bytes.",
39 		(unsigned long) pthread_self(), length);
40 
41 	s = mmap(NULL, length, PROT_READ | PROT_WRITE,
42 		 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
43 	if (s == MAP_FAILED)
44 		return errno;
45 
46 	if (testcase == MLOCK) {
47 		while (mlock(s, length) == -1 && loop > 0) {
48 			if (EAGAIN != errno)
49 				return errno;
50 			usleep(300000);
51 			loop--;
52 		}
53 	}
54 
55 #ifdef HAVE_MADV_MERGEABLE
56 	if (testcase == KSM && madvise(s, length, MADV_MERGEABLE) == -1)
57 		return errno;
58 #endif
59 	for (i = 0; i < length; i += pagesz)
60 		s[i] = '\a';
61 
62 	return 0;
63 }
64 
child_alloc_thread(void * args)65 static void *child_alloc_thread(void *args)
66 {
67 	int ret = 0;
68 
69 	/* keep allocating until there's an error */
70 	while (!ret)
71 		ret = alloc_mem(LENGTH, (long)args);
72 	exit(ret);
73 }
74 
child_alloc(int testcase,int lite,int threads)75 static void child_alloc(int testcase, int lite, int threads)
76 {
77 	int i;
78 	pthread_t *th;
79 
80 	if (lite) {
81 		int ret = alloc_mem(TESTMEM + MB, testcase);
82 		exit(ret);
83 	}
84 
85 	th = malloc(sizeof(pthread_t) * threads);
86 	if (!th) {
87 		tst_res(TINFO | TERRNO, "malloc");
88 		goto out;
89 	}
90 
91 	for (i = 0; i < threads; i++) {
92 		TEST(pthread_create(&th[i], NULL, child_alloc_thread,
93 			(void *)((long)testcase)));
94 		if (TST_RET) {
95 			tst_res(TINFO | TRERRNO, "pthread_create");
96 			/*
97 			 * Keep going if thread other than first fails to
98 			 * spawn due to lack of resources.
99 			 */
100 			if (i == 0 || TST_RET != EAGAIN)
101 				goto out;
102 		}
103 	}
104 
105 	/* wait for one of threads to exit whole process */
106 	while (1)
107 		sleep(1);
108 out:
109 	exit(1);
110 }
111 
112 /*
113  * oom - allocates memory according to specified testcase and checks
114  *       desired outcome (e.g. child killed, operation failed with ENOMEM)
115  * @testcase: selects how child allocates memory
116  *            valid choices are: NORMAL, MLOCK and KSM
117  * @lite: if non-zero, child makes only single TESTMEM+MB allocation
118  *        if zero, child keeps allocating memory until it gets killed
119  *        or some operation fails
120  * @retcode: expected return code of child process
121  *           if matches child ret code, this function reports PASS,
122  *           otherwise it reports FAIL
123  * @allow_sigkill: if zero and child is killed, this function reports FAIL
124  *                 if non-zero, then if child is killed by SIGKILL
125  *                 it is considered as PASS
126  */
oom(int testcase,int lite,int retcode,int allow_sigkill)127 void oom(int testcase, int lite, int retcode, int allow_sigkill)
128 {
129 	pid_t pid;
130 	int status, threads;
131 
132 	switch (pid = SAFE_FORK()) {
133 	case 0:
134 		threads = MAX(1, tst_ncpus() - 1);
135 		child_alloc(testcase, lite, threads);
136 	default:
137 		break;
138 	}
139 
140 	tst_res(TINFO, "expected victim is %d.", pid);
141 	SAFE_WAITPID(-1, &status, 0);
142 
143 	if (WIFSIGNALED(status)) {
144 		if (allow_sigkill && WTERMSIG(status) == SIGKILL) {
145 			tst_res(TPASS, "victim signalled: (%d) %s",
146 				SIGKILL,
147 				tst_strsig(SIGKILL));
148 		} else {
149 			tst_res(TFAIL, "victim signalled: (%d) %s",
150 				WTERMSIG(status),
151 				tst_strsig(WTERMSIG(status)));
152 		}
153 	} else if (WIFEXITED(status)) {
154 		if (WEXITSTATUS(status) == retcode) {
155 			tst_res(TPASS, "victim retcode: (%d) %s",
156 				retcode, strerror(retcode));
157 		} else {
158 			tst_res(TFAIL, "victim unexpectedly ended with "
159 				"retcode: %d, expected: %d",
160 				WEXITSTATUS(status), retcode);
161 		}
162 	} else {
163 		tst_res(TFAIL, "victim unexpectedly ended");
164 	}
165 }
166 
167 #ifdef HAVE_NUMA_V2
set_global_mempolicy(int mempolicy)168 static void set_global_mempolicy(int mempolicy)
169 {
170 	unsigned long nmask[MAXNODES / BITS_PER_LONG] = { 0 };
171 	int num_nodes, *nodes;
172 	int ret;
173 
174 	if (mempolicy) {
175 		ret = get_allowed_nodes_arr(NH_MEMS|NH_CPUS, &num_nodes, &nodes);
176 		if (ret != 0)
177 			tst_brk(TBROK|TERRNO, "get_allowed_nodes_arr");
178 		if (num_nodes < 2) {
179 			tst_res(TINFO, "mempolicy need NUMA system support");
180 			free(nodes);
181 			return;
182 		}
183 		switch(mempolicy) {
184 		case MPOL_BIND:
185 			/* bind the second node */
186 			set_node(nmask, nodes[1]);
187 			break;
188 		case MPOL_INTERLEAVE:
189 		case MPOL_PREFERRED:
190 			if (num_nodes == 2) {
191 				tst_res(TINFO, "The mempolicy need "
192 					 "more than 2 numa nodes");
193 				free(nodes);
194 				return;
195 			} else {
196 				/* Using the 2nd,3rd node */
197 				set_node(nmask, nodes[1]);
198 				set_node(nmask, nodes[2]);
199 			}
200 			break;
201 		default:
202 			tst_brk(TBROK|TERRNO, "Bad mempolicy mode");
203 		}
204 		if (set_mempolicy(mempolicy, nmask, MAXNODES) == -1)
205 			tst_brk(TBROK|TERRNO, "set_mempolicy");
206 	}
207 }
208 #else
set_global_mempolicy(int mempolicy LTP_ATTRIBUTE_UNUSED)209 static void set_global_mempolicy(int mempolicy LTP_ATTRIBUTE_UNUSED) { }
210 #endif
211 
testoom(int mempolicy,int lite,int retcode,int allow_sigkill)212 void testoom(int mempolicy, int lite, int retcode, int allow_sigkill)
213 {
214 	int ksm_run_orig;
215 
216 	set_global_mempolicy(mempolicy);
217 
218 	tst_res(TINFO, "start normal OOM testing.");
219 	oom(NORMAL, lite, retcode, allow_sigkill);
220 
221 	tst_res(TINFO, "start OOM testing for mlocked pages.");
222 	oom(MLOCK, lite, retcode, allow_sigkill);
223 
224 	/*
225 	 * Skip oom(KSM) if lite == 1, since limit_in_bytes may vary from
226 	 * run to run, which isn't reliable for oom03 cgroup test.
227 	 */
228 	if (access(PATH_KSM, F_OK) == -1 || lite == 1) {
229 		tst_res(TINFO, "KSM is not configed or lite == 1, "
230 			 "skip OOM test for KSM pags");
231 	} else {
232 		tst_res(TINFO, "start OOM testing for KSM pages.");
233 		SAFE_FILE_SCANF(PATH_KSM "run", "%d", &ksm_run_orig);
234 		SAFE_FILE_PRINTF(PATH_KSM "run", "1");
235 		oom(KSM, lite, retcode, allow_sigkill);
236 		SAFE_FILE_PRINTF(PATH_KSM "run", "%d", ksm_run_orig);
237 	}
238 }
239 
240 /* KSM */
241 
check(char * path,long int value)242 static void check(char *path, long int value)
243 {
244 	char fullpath[BUFSIZ];
245 	long actual_val;
246 
247 	snprintf(fullpath, BUFSIZ, PATH_KSM "%s", path);
248 	SAFE_FILE_SCANF(fullpath, "%ld", &actual_val);
249 
250 	if (actual_val != value)
251 		tst_res(TFAIL, "%s is not %ld but %ld.", path, value,
252 			actual_val);
253 	else
254 		tst_res(TPASS, "%s is %ld.", path, actual_val);
255 }
256 
final_group_check(int run,int pages_shared,int pages_sharing,int pages_volatile,int pages_unshared,int sleep_millisecs,int pages_to_scan)257 static void final_group_check(int run, int pages_shared, int pages_sharing,
258 			  int pages_volatile, int pages_unshared,
259 			  int sleep_millisecs, int pages_to_scan)
260 {
261 	tst_res(TINFO, "check!");
262 	check("run", run);
263 	check("pages_shared", pages_shared);
264 	check("pages_sharing", pages_sharing);
265 	check("pages_volatile", pages_volatile);
266 	check("pages_unshared", pages_unshared);
267 	check("sleep_millisecs", sleep_millisecs);
268 	check("pages_to_scan", pages_to_scan);
269 }
270 
group_check(int run,int pages_shared,int pages_sharing,int pages_volatile,int pages_unshared,int sleep_millisecs,int pages_to_scan)271 static void group_check(int run, int pages_shared, int pages_sharing,
272 			int pages_volatile, int pages_unshared,
273 			int sleep_millisecs, int pages_to_scan)
274 {
275 	if (run != 1) {
276 		tst_res(TFAIL, "group_check run is not 1, %d.", run);
277 	} else {
278 		/* wait for ksm daemon to scan all mergeable pages. */
279 		wait_ksmd_full_scan();
280 	}
281 
282 	final_group_check(run, pages_shared, pages_sharing,
283 			  pages_volatile, pages_unshared,
284 			  sleep_millisecs, pages_to_scan);
285 }
286 
verify(char ** memory,char value,int proc,int start,int end,int start2,int end2)287 static void verify(char **memory, char value, int proc,
288 		    int start, int end, int start2, int end2)
289 {
290 	int i, j;
291 	void *s = NULL;
292 
293 	s = SAFE_MALLOC((end - start) * (end2 - start2));
294 
295 	tst_res(TINFO, "child %d verifies memory content.", proc);
296 	memset(s, value, (end - start) * (end2 - start2));
297 	if (memcmp(memory[start], s, (end - start) * (end2 - start2))
298 	    != 0)
299 		for (j = start; j < end; j++)
300 			for (i = start2; i < end2; i++)
301 				if (memory[j][i] != value)
302 					tst_res(TFAIL, "child %d has %c at "
303 						 "%d,%d,%d.",
304 						 proc, memory[j][i], proc,
305 						 j, i);
306 	free(s);
307 }
308 
check_hugepage(void)309 void check_hugepage(void)
310 {
311 	if (access(PATH_HUGEPAGES, F_OK))
312 		tst_brk(TCONF, "Huge page is not supported.");
313 }
314 
write_memcg(void)315 void write_memcg(void)
316 {
317 	SAFE_FILE_PRINTF(MEMCG_LIMIT, "%ld", TESTMEM);
318 
319 	SAFE_FILE_PRINTF(MEMCG_PATH_NEW "/tasks", "%d", getpid());
320 }
321 
322 struct ksm_merge_data {
323 	char data;
324 	unsigned int mergeable_size;
325 };
326 
ksm_child_memset(int child_num,int size,int total_unit,struct ksm_merge_data ksm_merge_data,char ** memory)327 static void ksm_child_memset(int child_num, int size, int total_unit,
328 		 struct ksm_merge_data ksm_merge_data, char **memory)
329 {
330 	int i = 0, j;
331 	int unit = size / total_unit;
332 
333 	tst_res(TINFO, "child %d continues...", child_num);
334 
335 	if (ksm_merge_data.mergeable_size == size * MB) {
336 		tst_res(TINFO, "child %d allocates %d MB filled with '%c'",
337 			child_num, size, ksm_merge_data.data);
338 
339 	} else {
340 		tst_res(TINFO, "child %d allocates %d MB filled with '%c'"
341 				" except one page with 'e'",
342 				child_num, size, ksm_merge_data.data);
343 	}
344 
345 	for (j = 0; j < total_unit; j++) {
346 		for (i = 0; (unsigned int)i < unit * MB; i++)
347 			memory[j][i] = ksm_merge_data.data;
348 	}
349 
350 	/* if it contains unshared page, then set 'e' char
351 	 * at the end of the last page
352 	 */
353 	if (ksm_merge_data.mergeable_size < size * MB)
354 		memory[j-1][i-1] = 'e';
355 }
356 
create_ksm_child(int child_num,int size,int unit,struct ksm_merge_data * ksm_merge_data)357 static void create_ksm_child(int child_num, int size, int unit,
358 		       struct ksm_merge_data *ksm_merge_data)
359 {
360 	int j, total_unit;
361 	char **memory;
362 
363 	/* The total units in all */
364 	total_unit = size / unit;
365 
366 	/* Apply for the space for memory */
367 	memory = SAFE_MALLOC(total_unit * sizeof(char *));
368 	for (j = 0; j < total_unit; j++) {
369 		memory[j] = SAFE_MMAP(NULL, unit * MB, PROT_READ|PROT_WRITE,
370 			MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
371 #ifdef HAVE_MADV_MERGEABLE
372 		if (madvise(memory[j], unit * MB, MADV_MERGEABLE) == -1)
373 			tst_brk(TBROK|TERRNO, "madvise");
374 #endif
375 	}
376 
377 	tst_res(TINFO, "child %d stops.", child_num);
378 	if (raise(SIGSTOP) == -1)
379 		tst_brk(TBROK|TERRNO, "kill");
380 	fflush(stdout);
381 
382 	for (j = 0; j < 4; j++) {
383 
384 		ksm_child_memset(child_num, size, total_unit,
385 				  ksm_merge_data[j], memory);
386 
387 		fflush(stdout);
388 
389 		tst_res(TINFO, "child %d stops.", child_num);
390 		if (raise(SIGSTOP) == -1)
391 			tst_brk(TBROK|TERRNO, "kill");
392 
393 		if (ksm_merge_data[j].mergeable_size < size * MB) {
394 			verify(memory, 'e', child_num, total_unit - 1,
395 				total_unit, unit * MB - 1, unit * MB);
396 			verify(memory, ksm_merge_data[j].data, child_num,
397 				0, total_unit, 0, unit * MB - 1);
398 		} else {
399 			verify(memory, ksm_merge_data[j].data, child_num,
400 				0, total_unit, 0, unit * MB);
401 		}
402 	}
403 
404 	tst_res(TINFO, "child %d finished.", child_num);
405 }
406 
stop_ksm_children(int * child,int num)407 static void stop_ksm_children(int *child, int num)
408 {
409 	int k, status;
410 
411 	tst_res(TINFO, "wait for all children to stop.");
412 	for (k = 0; k < num; k++) {
413 		SAFE_WAITPID(child[k], &status, WUNTRACED);
414 		if (!WIFSTOPPED(status))
415 			tst_brk(TBROK, "child %d was not stopped", k);
416 	}
417 }
418 
resume_ksm_children(int * child,int num)419 static void resume_ksm_children(int *child, int num)
420 {
421 	int k;
422 
423 	tst_res(TINFO, "resume all children.");
424 	for (k = 0; k < num; k++)
425 		SAFE_KILL(child[k], SIGCONT);
426 
427 	fflush(stdout);
428 }
429 
create_same_memory(int size,int num,int unit)430 void create_same_memory(int size, int num, int unit)
431 {
432 	int i, j, status, *child;
433 	unsigned long ps, pages;
434 	struct ksm_merge_data **ksm_data;
435 
436 	struct ksm_merge_data ksm_data0[] = {
437 	       {'c', size*MB}, {'c', size*MB}, {'d', size*MB}, {'d', size*MB},
438 	};
439 	struct ksm_merge_data ksm_data1[] = {
440 	       {'a', size*MB}, {'b', size*MB}, {'d', size*MB}, {'d', size*MB-1},
441 	};
442 	struct ksm_merge_data ksm_data2[] = {
443 	       {'a', size*MB}, {'a', size*MB}, {'d', size*MB}, {'d', size*MB},
444 	};
445 
446 	ps = sysconf(_SC_PAGE_SIZE);
447 	pages = MB / ps;
448 
449 	ksm_data = malloc((num - 3) * sizeof(struct ksm_merge_data *));
450 	/* Since from third child, the data is same with the first child's */
451 	for (i = 0; i < num - 3; i++) {
452 		ksm_data[i] = malloc(4 * sizeof(struct ksm_merge_data));
453 		for (j = 0; j < 4; j++) {
454 			ksm_data[i][j].data = ksm_data0[j].data;
455 			ksm_data[i][j].mergeable_size =
456 				ksm_data0[j].mergeable_size;
457 		}
458 	}
459 
460 	child = SAFE_MALLOC(num * sizeof(int));
461 
462 	for (i = 0; i < num; i++) {
463 		fflush(stdout);
464 		switch (child[i] = SAFE_FORK()) {
465 		case 0:
466 			if (i == 0) {
467 				create_ksm_child(i, size, unit, ksm_data0);
468 				exit(0);
469 			} else if (i == 1) {
470 				create_ksm_child(i, size, unit, ksm_data1);
471 				exit(0);
472 			} else if (i == 2) {
473 				create_ksm_child(i, size, unit, ksm_data2);
474 				exit(0);
475 			} else {
476 				create_ksm_child(i, size, unit, ksm_data[i-3]);
477 				exit(0);
478 			}
479 		}
480 	}
481 
482 	stop_ksm_children(child, num);
483 
484 	tst_res(TINFO, "KSM merging...");
485 	if (access(PATH_KSM "max_page_sharing", F_OK) == 0) {
486 		SAFE_FILE_PRINTF(PATH_KSM "run", "2");
487 		SAFE_FILE_PRINTF(PATH_KSM "max_page_sharing", "%ld", size * pages * num);
488 	}
489 
490 	SAFE_FILE_PRINTF(PATH_KSM "run", "1");
491 	SAFE_FILE_PRINTF(PATH_KSM "pages_to_scan", "%ld", size * pages * num);
492 	SAFE_FILE_PRINTF(PATH_KSM "sleep_millisecs", "0");
493 
494 	resume_ksm_children(child, num);
495 	stop_ksm_children(child, num);
496 	group_check(1, 2, size * num * pages - 2, 0, 0, 0, size * pages * num);
497 
498 	resume_ksm_children(child, num);
499 	stop_ksm_children(child, num);
500 	group_check(1, 3, size * num * pages - 3, 0, 0, 0, size * pages * num);
501 
502 	resume_ksm_children(child, num);
503 	stop_ksm_children(child, num);
504 	group_check(1, 1, size * num * pages - 1, 0, 0, 0, size * pages * num);
505 
506 	resume_ksm_children(child, num);
507 	stop_ksm_children(child, num);
508 	group_check(1, 1, size * num * pages - 2, 0, 1, 0, size * pages * num);
509 
510 	tst_res(TINFO, "KSM unmerging...");
511 	SAFE_FILE_PRINTF(PATH_KSM "run", "2");
512 
513 	resume_ksm_children(child, num);
514 	final_group_check(2, 0, 0, 0, 0, 0, size * pages * num);
515 
516 	tst_res(TINFO, "stop KSM.");
517 	SAFE_FILE_PRINTF(PATH_KSM "run", "0");
518 	final_group_check(0, 0, 0, 0, 0, 0, size * pages * num);
519 
520 	while (waitpid(-1, &status, 0) > 0)
521 		if (WEXITSTATUS(status) != 0)
522 			tst_res(TFAIL, "child exit status is %d",
523 				 WEXITSTATUS(status));
524 }
525 
test_ksm_merge_across_nodes(unsigned long nr_pages)526 void test_ksm_merge_across_nodes(unsigned long nr_pages)
527 {
528 	char **memory;
529 	int i, ret;
530 	int num_nodes, *nodes;
531 	unsigned long length;
532 	unsigned long pagesize;
533 
534 #ifdef HAVE_NUMA_V2
535 	unsigned long nmask[MAXNODES / BITS_PER_LONG] = { 0 };
536 #endif
537 
538 	ret = get_allowed_nodes_arr(NH_MEMS, &num_nodes, &nodes);
539 	if (ret != 0)
540 		tst_brk(TBROK|TERRNO, "get_allowed_nodes_arr");
541 	if (num_nodes < 2) {
542 		tst_res(TINFO, "need NUMA system support");
543 		free(nodes);
544 		return;
545 	}
546 
547 	pagesize = sysconf(_SC_PAGE_SIZE);
548 	length = nr_pages * pagesize;
549 
550 	memory = SAFE_MALLOC(num_nodes * sizeof(char *));
551 	for (i = 0; i < num_nodes; i++) {
552 		memory[i] = SAFE_MMAP(NULL, length, PROT_READ|PROT_WRITE,
553 			    MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
554 #ifdef HAVE_MADV_MERGEABLE
555 		if (madvise(memory[i], length, MADV_MERGEABLE) == -1)
556 			tst_brk(TBROK|TERRNO, "madvise");
557 #endif
558 
559 #ifdef HAVE_NUMA_V2
560 		clean_node(nmask);
561 		set_node(nmask, nodes[i]);
562 		/*
563 		 * Use mbind() to make sure each node contains
564 		 * length size memory.
565 		 */
566 		ret = mbind(memory[i], length, MPOL_BIND, nmask, MAXNODES, 0);
567 		if (ret == -1)
568 			tst_brk(TBROK|TERRNO, "mbind");
569 #endif
570 
571 		memset(memory[i], 10, length);
572 	}
573 
574 	SAFE_FILE_PRINTF(PATH_KSM "sleep_millisecs", "0");
575 	SAFE_FILE_PRINTF(PATH_KSM "pages_to_scan", "%ld",
576 			 nr_pages * num_nodes);
577 	/*
578 	 * merge_across_nodes and max_page_sharing setting can be changed
579 	 * only when there are no ksm shared pages in system, so set run 2
580 	 * to unmerge pages first, then to 1 after changing merge_across_nodes,
581 	 * to remerge according to the new setting.
582 	 */
583 	SAFE_FILE_PRINTF(PATH_KSM "run", "2");
584 	if (access(PATH_KSM "max_page_sharing", F_OK) == 0)
585 		SAFE_FILE_PRINTF(PATH_KSM "max_page_sharing",
586 			"%ld", nr_pages * num_nodes);
587 	tst_res(TINFO, "Start to test KSM with merge_across_nodes=1");
588 	SAFE_FILE_PRINTF(PATH_KSM "merge_across_nodes", "1");
589 	SAFE_FILE_PRINTF(PATH_KSM "run", "1");
590 	group_check(1, 1, nr_pages * num_nodes - 1, 0, 0, 0,
591 		    nr_pages * num_nodes);
592 
593 	SAFE_FILE_PRINTF(PATH_KSM "run", "2");
594 	tst_res(TINFO, "Start to test KSM with merge_across_nodes=0");
595 	SAFE_FILE_PRINTF(PATH_KSM "merge_across_nodes", "0");
596 	SAFE_FILE_PRINTF(PATH_KSM "run", "1");
597 	group_check(1, num_nodes, nr_pages * num_nodes - num_nodes,
598 		    0, 0, 0, nr_pages * num_nodes);
599 
600 	SAFE_FILE_PRINTF(PATH_KSM "run", "2");
601 }
602 
603 /* THP */
604 
605 /* cpuset/memcg */
606 
gather_node_cpus(char * cpus,long nd)607 static void gather_node_cpus(char *cpus, long nd)
608 {
609 	int ncpus = 0;
610 	int i;
611 	long online;
612 	char buf[BUFSIZ];
613 	char path[BUFSIZ], path1[BUFSIZ];
614 
615 	while (path_exist(PATH_SYS_SYSTEM "/cpu/cpu%d", ncpus))
616 		ncpus++;
617 
618 	for (i = 0; i < ncpus; i++) {
619 		snprintf(path, BUFSIZ,
620 			 PATH_SYS_SYSTEM "/node/node%ld/cpu%d", nd, i);
621 		if (path_exist(path)) {
622 			snprintf(path1, BUFSIZ, "%s/online", path);
623 			/*
624 			 * if there is no online knob, then the cpu cannot
625 			 * be taken offline
626 			 */
627 			if (path_exist(path1)) {
628 				SAFE_FILE_SCANF(path1, "%ld", &online);
629 				if (online == 0)
630 					continue;
631 			}
632 			sprintf(buf, "%d,", i);
633 			strcat(cpus, buf);
634 		}
635 	}
636 	/* Remove the trailing comma. */
637 	cpus[strlen(cpus) - 1] = '\0';
638 }
639 
read_cpuset_files(char * prefix,char * filename,char * retbuf)640 void read_cpuset_files(char *prefix, char *filename, char *retbuf)
641 {
642 	int fd;
643 	char path[BUFSIZ];
644 
645 	/*
646 	 * try either '/dev/cpuset/XXXX' or '/dev/cpuset/cpuset.XXXX'
647 	 * please see Documentation/cgroups/cpusets.txt from kernel src
648 	 * for details
649 	 */
650 	snprintf(path, BUFSIZ, "%s/%s", prefix, filename);
651 	fd = open(path, O_RDONLY);
652 	if (fd == -1) {
653 		if (errno == ENOENT) {
654 			snprintf(path, BUFSIZ, "%s/cpuset.%s",
655 				 prefix, filename);
656 			fd = SAFE_OPEN(path, O_RDONLY);
657 		} else
658 			tst_brk(TBROK | TERRNO, "open %s", path);
659 	}
660 	if (read(fd, retbuf, BUFSIZ) < 0)
661 		tst_brk(TBROK | TERRNO, "read %s", path);
662 	close(fd);
663 }
664 
write_cpuset_files(char * prefix,char * filename,char * buf)665 void write_cpuset_files(char *prefix, char *filename, char *buf)
666 {
667 	int fd;
668 	char path[BUFSIZ];
669 
670 	/*
671 	 * try either '/dev/cpuset/XXXX' or '/dev/cpuset/cpuset.XXXX'
672 	 * please see Documentation/cgroups/cpusets.txt from kernel src
673 	 * for details
674 	 */
675 	snprintf(path, BUFSIZ, "%s/%s", prefix, filename);
676 	fd = open(path, O_WRONLY);
677 	if (fd == -1) {
678 		if (errno == ENOENT) {
679 			snprintf(path, BUFSIZ, "%s/cpuset.%s",
680 				 prefix, filename);
681 			fd = SAFE_OPEN(path, O_WRONLY);
682 		} else
683 			tst_brk(TBROK | TERRNO, "open %s", path);
684 	}
685 	SAFE_WRITE(1, fd, buf, strlen(buf));
686 	close(fd);
687 }
688 
write_cpusets(long nd)689 void write_cpusets(long nd)
690 {
691 	char buf[BUFSIZ];
692 	char cpus[BUFSIZ] = "";
693 
694 	snprintf(buf, BUFSIZ, "%ld", nd);
695 	write_cpuset_files(CPATH_NEW, "mems", buf);
696 
697 	gather_node_cpus(cpus, nd);
698 	/*
699 	 * If the 'nd' node doesn't contain any CPUs,
700 	 * the first ID of CPU '0' will be used as
701 	 * the value of cpuset.cpus.
702 	 */
703 	if (strlen(cpus) != 0) {
704 		write_cpuset_files(CPATH_NEW, "cpus", cpus);
705 	} else {
706 		tst_res(TINFO, "No CPUs in the node%ld; "
707 				"using only CPU0", nd);
708 		write_cpuset_files(CPATH_NEW, "cpus", "0");
709 	}
710 
711 	SAFE_FILE_PRINTF(CPATH_NEW "/tasks", "%d", getpid());
712 }
713 
umount_mem(char * path,char * path_new)714 void umount_mem(char *path, char *path_new)
715 {
716 	FILE *fp;
717 	int fd;
718 	char s_new[BUFSIZ], s[BUFSIZ], value[BUFSIZ];
719 
720 	/* Move all processes in task to its parent node. */
721 	sprintf(s, "%s/tasks", path);
722 	fd = open(s, O_WRONLY);
723 	if (fd == -1)
724 		tst_res(TWARN | TERRNO, "open %s", s);
725 
726 	snprintf(s_new, BUFSIZ, "%s/tasks", path_new);
727 	fp = fopen(s_new, "r");
728 	if (fp == NULL)
729 		tst_res(TWARN | TERRNO, "fopen %s", s_new);
730 	if ((fd != -1) && (fp != NULL)) {
731 		while (fgets(value, BUFSIZ, fp) != NULL)
732 			if (write(fd, value, strlen(value) - 1)
733 			    != (ssize_t)strlen(value) - 1)
734 				tst_res(TWARN | TERRNO, "write %s", s);
735 	}
736 	if (fd != -1)
737 		close(fd);
738 	if (fp != NULL)
739 		fclose(fp);
740 	if (rmdir(path_new) == -1)
741 		tst_res(TWARN | TERRNO, "rmdir %s", path_new);
742 	if (umount(path) == -1)
743 		tst_res(TWARN | TERRNO, "umount %s", path);
744 	if (rmdir(path) == -1)
745 		tst_res(TWARN | TERRNO, "rmdir %s", path);
746 }
747 
mount_mem(char * name,char * fs,char * options,char * path,char * path_new)748 void mount_mem(char *name, char *fs, char *options, char *path, char *path_new)
749 {
750 	SAFE_MKDIR(path, 0777);
751 	if (mount(name, path, fs, 0, options) == -1) {
752 		if (errno == ENODEV) {
753 			if (rmdir(path) == -1)
754 				tst_res(TWARN | TERRNO, "rmdir %s failed",
755 					 path);
756 			tst_brk(TCONF,
757 				 "file system %s is not configured in kernel",
758 				 fs);
759 		}
760 		tst_brk(TBROK | TERRNO, "mount %s", path);
761 	}
762 	SAFE_MKDIR(path_new, 0777);
763 }
764 
765 /* shared */
766 
767 /* Warning: *DO NOT* use this function in child */
get_a_numa_node(void)768 unsigned int get_a_numa_node(void)
769 {
770 	unsigned int nd1, nd2;
771 	int ret;
772 
773 	ret = get_allowed_nodes(0, 2, &nd1, &nd2);
774 	switch (ret) {
775 	case 0:
776 		break;
777 	case -3:
778 		tst_brk(TCONF, "requires a NUMA system.");
779 	default:
780 		tst_brk(TBROK | TERRNO, "1st get_allowed_nodes");
781 	}
782 
783 	ret = get_allowed_nodes(NH_MEMS | NH_CPUS, 1, &nd1);
784 	switch (ret) {
785 	case 0:
786 		tst_res(TINFO, "get node%u.", nd1);
787 		return nd1;
788 	case -3:
789 		tst_brk(TCONF, "requires a NUMA system that has "
790 			 "at least one node with both memory and CPU "
791 			 "available.");
792 	default:
793 		tst_brk(TBROK | TERRNO, "2nd get_allowed_nodes");
794 	}
795 
796 	/* not reached */
797 	abort();
798 }
799 
path_exist(const char * path,...)800 int path_exist(const char *path, ...)
801 {
802 	va_list ap;
803 	char pathbuf[PATH_MAX];
804 
805 	va_start(ap, path);
806 	vsnprintf(pathbuf, sizeof(pathbuf), path, ap);
807 	va_end(ap);
808 
809 	return access(pathbuf, F_OK) == 0;
810 }
811 
set_sys_tune(char * sys_file,long tune,int check)812 void set_sys_tune(char *sys_file, long tune, int check)
813 {
814 	long val;
815 	char path[BUFSIZ];
816 
817 	tst_res(TINFO, "set %s to %ld", sys_file, tune);
818 
819 	snprintf(path, BUFSIZ, PATH_SYSVM "%s", sys_file);
820 	SAFE_FILE_PRINTF(path, "%ld", tune);
821 
822 	if (check) {
823 		val = get_sys_tune(sys_file);
824 		if (val != tune)
825 			tst_brk(TBROK, "%s = %ld, but expect %ld",
826 				 sys_file, val, tune);
827 	}
828 }
829 
get_sys_tune(char * sys_file)830 long get_sys_tune(char *sys_file)
831 {
832 	char path[BUFSIZ];
833 	long tune;
834 
835 	snprintf(path, BUFSIZ, PATH_SYSVM "%s", sys_file);
836 	SAFE_FILE_SCANF(path, "%ld", &tune);
837 
838 	return tune;
839 }
840 
update_shm_size(size_t * shm_size)841 void update_shm_size(size_t * shm_size)
842 {
843 	size_t shmmax;
844 
845 	SAFE_FILE_SCANF(PATH_SHMMAX, "%zu", &shmmax);
846 	if (*shm_size > shmmax) {
847 		tst_res(TINFO, "Set shm_size to shmmax: %zu", shmmax);
848 		*shm_size = shmmax;
849 	}
850 }
851 
range_is_mapped(unsigned long low,unsigned long high)852 int range_is_mapped(unsigned long low, unsigned long high)
853 {
854 	FILE *fp;
855 
856 	fp = fopen("/proc/self/maps", "r");
857 	if (fp == NULL)
858 		tst_brk(TBROK | TERRNO, "Failed to open /proc/self/maps.");
859 
860 	while (!feof(fp)) {
861 		unsigned long start, end;
862 		int ret;
863 
864 		ret = fscanf(fp, "%lx-%lx %*[^\n]\n", &start, &end);
865 		if (ret != 2) {
866 			fclose(fp);
867 			tst_brk(TBROK | TERRNO, "Couldn't parse /proc/self/maps line.");
868 		}
869 
870 		if ((start >= low) && (start < high)) {
871 			fclose(fp);
872 			return 1;
873 		}
874 		if ((end >= low) && (end < high)) {
875 			fclose(fp);
876 			return 1;
877 		}
878 	}
879 
880 	fclose(fp);
881 	return 0;
882 }
883