• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2016 Red Hat, Inc.
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 /*
19  * DESCRIPTION
20  *
21  *   Page fault occurs in spite that madvise(WILLNEED) system call is called
22  *   to prefetch the page. This issue is reproduced by running a program
23  *   which sequentially accesses to a shared memory and calls madvise(WILLNEED)
24  *   to the next page on a page fault.
25  *
26  *   This bug is present in all RHEL7 versions. It looks like this was fixed in
27  *   mainline kernel > v3.15 by the following patch:
28  *
29  *   commit 55231e5c898c5c03c14194001e349f40f59bd300
30  *   Author: Johannes Weiner <hannes@cmpxchg.org>
31  *   Date:   Thu May 22 11:54:17 2014 -0700
32  *
33  *       mm: madvise: fix MADV_WILLNEED on shmem swapouts
34  */
35 
36 #include <errno.h>
37 #include <stdio.h>
38 #include <sys/mount.h>
39 #include <sys/sysinfo.h>
40 #include "tst_test.h"
41 
42 #define CHUNK_SZ (400*1024*1024L)
43 #define CHUNK_PAGES (CHUNK_SZ / pg_sz)
44 #define PASS_THRESHOLD (CHUNK_SZ / 4)
45 
46 #define MNT_NAME "memory"
47 #define GROUP_NAME "madvise06"
48 
49 static const char drop_caches_fname[] = "/proc/sys/vm/drop_caches";
50 static int pg_sz;
51 
check_path(const char * path)52 static void check_path(const char *path)
53 {
54 	if (access(path, R_OK | W_OK))
55 		tst_brk(TCONF, "file needed: %s\n", path);
56 }
57 
setup(void)58 static void setup(void)
59 {
60 	struct sysinfo sys_buf_start;
61 
62 	pg_sz = getpagesize();
63 
64 	check_path(drop_caches_fname);
65 	tst_res(TINFO, "dropping caches");
66 	sync();
67 	SAFE_FILE_PRINTF(drop_caches_fname, "3");
68 
69 	sysinfo(&sys_buf_start);
70 	if (sys_buf_start.freeram < 2 * CHUNK_SZ) {
71 		tst_brk(TCONF, "System RAM is too small (%li bytes needed)",
72 			2 * CHUNK_SZ);
73 	}
74 	if (sys_buf_start.freeswap < 2 * CHUNK_SZ) {
75 		tst_brk(TCONF, "System swap is too small (%li bytes needed)",
76 			2 * CHUNK_SZ);
77 	}
78 
79 	SAFE_MKDIR(MNT_NAME, 0700);
80 	if (mount("memory", MNT_NAME, "cgroup", 0, "memory") == -1) {
81 		if (errno == ENODEV || errno == ENOENT)
82 			tst_brk(TCONF, "memory cgroup needed");
83 	}
84 	SAFE_MKDIR(MNT_NAME"/"GROUP_NAME, 0700);
85 
86 	check_path("/proc/self/oom_score_adj");
87 	check_path(MNT_NAME"/"GROUP_NAME"/memory.limit_in_bytes");
88 	check_path(MNT_NAME"/"GROUP_NAME"/memory.swappiness");
89 	check_path(MNT_NAME"/"GROUP_NAME"/tasks");
90 
91 	SAFE_FILE_PRINTF("/proc/self/oom_score_adj", "%d", -1000);
92 	SAFE_FILE_PRINTF(MNT_NAME"/"GROUP_NAME"/memory.limit_in_bytes", "%ld\n",
93 		PASS_THRESHOLD);
94 	SAFE_FILE_PRINTF(MNT_NAME"/"GROUP_NAME"/memory.swappiness", "60");
95 	SAFE_FILE_PRINTF(MNT_NAME"/"GROUP_NAME"/tasks", "%d\n", getpid());
96 }
97 
cleanup(void)98 static void cleanup(void)
99 {
100 	if (!access(MNT_NAME"/tasks", F_OK)) {
101 		SAFE_FILE_PRINTF(MNT_NAME"/tasks", "%d\n", getpid());
102 		SAFE_RMDIR(MNT_NAME"/"GROUP_NAME);
103 		SAFE_UMOUNT(MNT_NAME);
104 	}
105 }
106 
dirty_pages(char * ptr,long size)107 static void dirty_pages(char *ptr, long size)
108 {
109 	long i;
110 	long pages = size / pg_sz;
111 
112 	for (i = 0; i < pages; i++)
113 		ptr[i * pg_sz] = 'x';
114 }
115 
get_page_fault_num(void)116 static int get_page_fault_num(void)
117 {
118 	int pg;
119 
120 	SAFE_FILE_SCANF("/proc/self/stat",
121 			"%*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %d",
122 			&pg);
123 	return pg;
124 }
125 
test_advice_willneed(void)126 static void test_advice_willneed(void)
127 {
128 	int loops = 50;
129 	char *target;
130 	long swapcached_start, swapcached;
131 	int page_fault_num_1, page_fault_num_2;
132 
133 	target = SAFE_MMAP(NULL, CHUNK_SZ, PROT_READ | PROT_WRITE,
134 			MAP_SHARED | MAP_ANONYMOUS,
135 			-1, 0);
136 	dirty_pages(target, CHUNK_SZ);
137 
138 	SAFE_FILE_LINES_SCANF("/proc/meminfo", "SwapCached: %ld",
139 		&swapcached_start);
140 	tst_res(TINFO, "SwapCached (before madvise): %ld", swapcached_start);
141 
142 	TEST(madvise(target, CHUNK_SZ, MADV_WILLNEED));
143 	if (TST_RET == -1)
144 		tst_brk(TBROK | TERRNO, "madvise failed");
145 
146 	do {
147 		loops--;
148 		usleep(100000);
149 		SAFE_FILE_LINES_SCANF("/proc/meminfo", "SwapCached: %ld",
150 			&swapcached);
151 	} while (swapcached < swapcached_start + PASS_THRESHOLD / 1024
152 		&& loops > 0);
153 
154 	tst_res(TINFO, "SwapCached (after madvise): %ld", swapcached);
155 	if (swapcached > swapcached_start + PASS_THRESHOLD / 1024) {
156 		tst_res(TPASS, "Regression test pass");
157 		SAFE_MUNMAP(target, CHUNK_SZ);
158 		return;
159 	}
160 
161 	/*
162 	 * We may have hit a bug or we just have slow I/O,
163 	 * try accessing first page.
164 	 */
165 	page_fault_num_1 = get_page_fault_num();
166 	tst_res(TINFO, "PageFault(madvice / no mem access): %d",
167 			page_fault_num_1);
168 	target[0] = 'a';
169 	page_fault_num_2 = get_page_fault_num();
170 	tst_res(TINFO, "PageFault(madvice / mem access): %d",
171 			page_fault_num_2);
172 
173 	if (page_fault_num_1 != page_fault_num_2)
174 		tst_res(TFAIL, "Bug has been reproduced");
175 	else
176 		tst_res(TPASS, "Regression test pass");
177 
178 	SAFE_MUNMAP(target, CHUNK_SZ);
179 }
180 
181 static struct tst_test test = {
182 	.test_all = test_advice_willneed,
183 	.setup = setup,
184 	.cleanup = cleanup,
185 	.min_kver = "3.10.0",
186 	.needs_tmpdir = 1,
187 	.needs_root = 1,
188 };
189