• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2012 Linux Test Project, Inc.
4  */
5 
6 /*
7  * functional test for readahead() syscall
8  *
9  * This test is measuring effects of readahead syscall.
10  * It mmaps/reads a test file with and without prior call to readahead.
11  *
12  * The overlay part of the test is regression for:
13  *  b833a3660394
14  *  ("ovl: add ovl_fadvise()")
15  * Introduced by:
16  *  5b910bd615ba
17  *  ("ovl: fix GPF in swapfile_activate of file from overlayfs over xfs")
18  */
19 #define _GNU_SOURCE
20 #include <sys/types.h>
21 #include <sys/syscall.h>
22 #include <sys/mman.h>
23 #include <sys/mount.h>
24 #include <sys/stat.h>
25 #include <sys/types.h>
26 #include <errno.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <stdint.h>
30 #include <unistd.h>
31 #include <fcntl.h>
32 #include "config.h"
33 #include "tst_test.h"
34 #include "tst_timer.h"
35 #include "lapi/syscalls.h"
36 
37 static char testfile[PATH_MAX] = "testfile";
38 #define DROP_CACHES_FNAME "/proc/sys/vm/drop_caches"
39 #define MEMINFO_FNAME "/proc/meminfo"
40 #define PROC_IO_FNAME "/proc/self/io"
41 static size_t testfile_size = 64 * 1024 * 1024;
42 static char *opt_fsizestr;
43 static int pagesize;
44 static unsigned long cached_max;
45 static int ovl_mounted;
46 static int readahead_length  = 4096;
47 static char sys_bdi_ra_path[PATH_MAX];
48 static int orig_bdi_limit;
49 
50 static const char mntpoint[] = OVL_BASE_MNTPOINT;
51 
52 static struct tst_option options[] = {
53 	{"s:", &opt_fsizestr, "-s    testfile size (default 64MB)"},
54 	{NULL, NULL, NULL}
55 };
56 
libc_readahead(int fd,off_t offset,size_t len)57 static int libc_readahead(int fd, off_t offset, size_t len)
58 {
59 	return readahead(fd, offset, len);
60 }
61 
fadvise_willneed(int fd,off_t offset,size_t len)62 static int fadvise_willneed(int fd, off_t offset, size_t len)
63 {
64 	/* Should have the same effect as readahead() syscall */
65 	errno = posix_fadvise(fd, offset, len, POSIX_FADV_WILLNEED);
66 	/* posix_fadvise returns error number (not in errno) */
67 	return errno ? -1 : 0;
68 }
69 
70 static struct tcase {
71 	const char *tname;
72 	int use_overlay;
73 	int use_fadvise;
74 	/* Use either readahead() syscall or POSIX_FADV_WILLNEED */
75 	int (*readahead)(int, off_t, size_t);
76 } tcases[] = {
77 	{ "readahead on file", 0, 0, libc_readahead },
78 	{ "readahead on overlayfs file", 1, 0, libc_readahead },
79 	{ "POSIX_FADV_WILLNEED on file", 0, 1, fadvise_willneed },
80 	{ "POSIX_FADV_WILLNEED on overlayfs file", 1, 1, fadvise_willneed },
81 };
82 
83 static int readahead_supported = 1;
84 static int fadvise_supported = 1;
85 
has_file(const char * fname,int required)86 static int has_file(const char *fname, int required)
87 {
88 	struct stat buf;
89 
90 	if (stat(fname, &buf) == -1) {
91 		if (errno != ENOENT)
92 			tst_brk(TBROK | TERRNO, "stat %s", fname);
93 		if (required)
94 			tst_brk(TCONF, "%s not available", fname);
95 		return 0;
96 	}
97 	return 1;
98 }
99 
drop_caches(void)100 static void drop_caches(void)
101 {
102 	SAFE_FILE_PRINTF(DROP_CACHES_FNAME, "1");
103 }
104 
get_bytes_read(void)105 static unsigned long get_bytes_read(void)
106 {
107 	unsigned long ret;
108 
109 	SAFE_FILE_LINES_SCANF(PROC_IO_FNAME, "read_bytes: %lu", &ret);
110 
111 	return ret;
112 }
113 
get_cached_size(void)114 static unsigned long get_cached_size(void)
115 {
116 	unsigned long ret;
117 
118 	SAFE_FILE_LINES_SCANF(MEMINFO_FNAME, "Cached: %lu", &ret);
119 
120 	return ret;
121 }
122 
create_testfile(int use_overlay)123 static void create_testfile(int use_overlay)
124 {
125 	int fd;
126 	char *tmp;
127 	size_t i;
128 
129 	sprintf(testfile, "%s/testfile",
130 		use_overlay ? OVL_MNT : OVL_BASE_MNTPOINT);
131 	tst_res(TINFO, "creating test file of size: %zu", testfile_size);
132 	tmp = SAFE_MALLOC(pagesize);
133 
134 	/* round to page size */
135 	testfile_size = testfile_size & ~((long)pagesize - 1);
136 
137 	fd = SAFE_CREAT(testfile, 0644);
138 	for (i = 0; i < testfile_size; i += pagesize)
139 		SAFE_WRITE(1, fd, tmp, pagesize);
140 	SAFE_FSYNC(fd);
141 	SAFE_CLOSE(fd);
142 	free(tmp);
143 }
144 
145 /* read_testfile - mmap testfile and read every page.
146  * This functions measures how many I/O and time it takes to fully
147  * read contents of test file.
148  *
149  * @do_readahead: call readahead prior to reading file content?
150  * @fname: name of file to test
151  * @fsize: how many bytes to read/mmap
152  * @read_bytes: returns difference of bytes read, parsed from /proc/<pid>/io
153  * @usec: returns how many microsecond it took to go over fsize bytes
154  * @cached: returns cached kB from /proc/meminfo
155  */
read_testfile(struct tcase * tc,int do_readahead,const char * fname,size_t fsize,unsigned long * read_bytes,long long * usec,unsigned long * cached)156 static int read_testfile(struct tcase *tc, int do_readahead,
157 			 const char *fname, size_t fsize,
158 			 unsigned long *read_bytes, long long *usec,
159 			 unsigned long *cached)
160 {
161 	int fd;
162 	size_t i = 0;
163 	long read_bytes_start;
164 	unsigned char *p, tmp;
165 	off_t offset = 0;
166 
167 	fd = SAFE_OPEN(fname, O_RDONLY);
168 
169 	if (do_readahead) {
170 		do {
171 			TEST(tc->readahead(fd, offset, fsize - offset));
172 			if (TST_RET != 0) {
173 				SAFE_CLOSE(fd);
174 				return TST_ERR;
175 			}
176 
177 			i++;
178 			offset += readahead_length;
179 		} while ((size_t)offset < fsize);
180 		tst_res(TINFO, "readahead calls made: %zu", i);
181 		*cached = get_cached_size();
182 
183 		/* offset of file shouldn't change after readahead */
184 		offset = SAFE_LSEEK(fd, 0, SEEK_CUR);
185 		if (offset == 0)
186 			tst_res(TPASS, "offset is still at 0 as expected");
187 		else
188 			tst_res(TFAIL, "offset has changed to: %lu", offset);
189 	}
190 
191 	tst_timer_start(CLOCK_MONOTONIC);
192 	read_bytes_start = get_bytes_read();
193 
194 	p = SAFE_MMAP(NULL, fsize, PROT_READ, MAP_SHARED | MAP_POPULATE, fd, 0);
195 
196 	/* for old kernels, where MAP_POPULATE doesn't work, touch each page */
197 	tmp = 0;
198 	for (i = 0; i < fsize; i += pagesize)
199 		tmp = tmp ^ p[i];
200 	/* prevent gcc from optimizing out loop above */
201 	if (tmp != 0)
202 		tst_brk(TBROK, "This line should not be reached");
203 
204 	if (!do_readahead)
205 		*cached = get_cached_size();
206 
207 	SAFE_MUNMAP(p, fsize);
208 
209 	*read_bytes = get_bytes_read() - read_bytes_start;
210 
211 	tst_timer_stop();
212 	*usec = tst_timer_elapsed_us();
213 
214 	SAFE_CLOSE(fd);
215 	return 0;
216 }
217 
test_readahead(unsigned int n)218 static void test_readahead(unsigned int n)
219 {
220 	unsigned long read_bytes, read_bytes_ra;
221 	long long usec, usec_ra;
222 	unsigned long cached_high, cached_low, cached, cached_ra;
223 	int ret;
224 	struct tcase *tc = &tcases[n];
225 
226 	tst_res(TINFO, "Test #%d: %s", n, tc->tname);
227 
228 	if (tc->use_overlay && !ovl_mounted) {
229 		tst_res(TCONF,
230 		        "overlayfs is not configured in this kernel.");
231 		return;
232 	}
233 
234 	create_testfile(tc->use_overlay);
235 
236 	/* find out how much can cache hold if we read whole file */
237 	read_testfile(tc, 0, testfile, testfile_size, &read_bytes, &usec,
238 		      &cached);
239 	cached_high = get_cached_size();
240 	sync();
241 	drop_caches();
242 	cached_low = get_cached_size();
243 	cached_max = MAX(cached_max, cached_high - cached_low);
244 
245 	tst_res(TINFO, "read_testfile(0)");
246 	read_testfile(tc, 0, testfile, testfile_size, &read_bytes, &usec,
247 		      &cached);
248 	if (cached > cached_low)
249 		cached = cached - cached_low;
250 	else
251 		cached = 0;
252 
253 	sync();
254 	drop_caches();
255 	cached_low = get_cached_size();
256 	tst_res(TINFO, "read_testfile(1)");
257 	ret = read_testfile(tc, 1, testfile, testfile_size, &read_bytes_ra,
258 		            &usec_ra, &cached_ra);
259 
260 	if (ret == EINVAL) {
261 		if (tc->use_fadvise &&
262 		    (!tc->use_overlay || !fadvise_supported)) {
263 			fadvise_supported = 0;
264 			tst_res(TCONF, "CONFIG_ADVISE_SYSCALLS not configured "
265 				"in kernel?");
266 			return;
267 		}
268 
269 		if (!tc->use_overlay || !readahead_supported) {
270 			readahead_supported = 0;
271 			tst_res(TCONF, "readahead not supported on %s",
272 				tst_device->fs_type);
273 			return;
274 		}
275 	}
276 
277 	if (ret) {
278 		tst_res(TFAIL | TTERRNO, "%s failed on %s",
279 			tc->use_fadvise ? "fadvise" : "readahead",
280 			tc->use_overlay ? "overlayfs" :
281 			tst_device->fs_type);
282 		return;
283 	}
284 
285 	if (cached_ra > cached_low)
286 		cached_ra = cached_ra - cached_low;
287 	else
288 		cached_ra = 0;
289 
290 	tst_res(TINFO, "read_testfile(0) took: %lli usec", usec);
291 	tst_res(TINFO, "read_testfile(1) took: %lli usec", usec_ra);
292 	if (has_file(PROC_IO_FNAME, 0)) {
293 		tst_res(TINFO, "read_testfile(0) read: %ld bytes", read_bytes);
294 		tst_res(TINFO, "read_testfile(1) read: %ld bytes",
295 			read_bytes_ra);
296 		/* actual number of read bytes depends on total RAM */
297 		if (read_bytes_ra < read_bytes)
298 			tst_res(TPASS, "readahead saved some I/O");
299 		else
300 			tst_res(TFAIL, "readahead failed to save any I/O");
301 	} else {
302 		tst_res(TCONF, "Your system doesn't have /proc/self/io,"
303 			" unable to determine read bytes during test");
304 	}
305 
306 	tst_res(TINFO, "cache can hold at least: %ld kB", cached_max);
307 	tst_res(TINFO, "read_testfile(0) used cache: %ld kB", cached);
308 	tst_res(TINFO, "read_testfile(1) used cache: %ld kB", cached_ra);
309 
310 	if (cached_max * 1024 >= testfile_size) {
311 		/*
312 		 * if cache can hold ~testfile_size then cache increase
313 		 * for readahead should be at least testfile_size/2
314 		 */
315 		if (cached_ra * 1024 > testfile_size / 2)
316 			tst_res(TPASS, "using cache as expected");
317 		else if (!cached_ra)
318 			tst_res(TFAIL, "readahead failed to use any cache");
319 		else
320 			tst_res(TWARN, "using less cache than expected");
321 	} else {
322 		tst_res(TCONF, "Page cache on your system is too small "
323 			"to hold whole testfile.");
324 	}
325 }
326 
327 
328 /*
329  * We try raising bdi readahead limit as much as we can. We write
330  * and read back "read_ahead_kb" sysfs value, starting with filesize.
331  * If that fails, we try again with lower value.
332  * readahead_length used in the test is then set to MIN(bdi limit, 2M),
333  * to respect kernels prior to commit 600e19afc5f8a6c.
334  */
setup_readahead_length(void)335 static void setup_readahead_length(void)
336 {
337 	struct stat sbuf;
338 	char tmp[PATH_MAX], *backing_dev;
339 	int ra_new_limit, ra_limit;
340 
341 	/* Find out backing device name */
342 	SAFE_LSTAT(tst_device->dev, &sbuf);
343 	if (S_ISLNK(sbuf.st_mode))
344 		SAFE_READLINK(tst_device->dev, tmp, PATH_MAX);
345 	else
346 		strcpy(tmp, tst_device->dev);
347 
348 	backing_dev = basename(tmp);
349 	sprintf(sys_bdi_ra_path, "/sys/class/block/%s/bdi/read_ahead_kb",
350 		backing_dev);
351 	if (access(sys_bdi_ra_path, F_OK))
352 		return;
353 
354 	SAFE_FILE_SCANF(sys_bdi_ra_path, "%d", &orig_bdi_limit);
355 
356 	/* raise bdi limit as much as kernel allows */
357 	ra_new_limit = testfile_size / 1024;
358 	while (ra_new_limit > pagesize / 1024) {
359 		FILE_PRINTF(sys_bdi_ra_path, "%d", ra_new_limit);
360 		SAFE_FILE_SCANF(sys_bdi_ra_path, "%d", &ra_limit);
361 
362 		if (ra_limit == ra_new_limit) {
363 			readahead_length = MIN(ra_new_limit * 1024,
364 				2 * 1024 * 1024);
365 			break;
366 		}
367 		ra_new_limit = ra_new_limit / 2;
368 	}
369 }
370 
setup(void)371 static void setup(void)
372 {
373 	if (opt_fsizestr)
374 		testfile_size = SAFE_STRTOL(opt_fsizestr, 1, INT_MAX);
375 
376 	if (access(PROC_IO_FNAME, F_OK))
377 		tst_brk(TCONF, "Requires " PROC_IO_FNAME);
378 
379 	has_file(DROP_CACHES_FNAME, 1);
380 	has_file(MEMINFO_FNAME, 1);
381 
382 	/* check if readahead is supported */
383 	tst_syscall(__NR_readahead, 0, 0, 0);
384 
385 	pagesize = getpagesize();
386 
387 	setup_readahead_length();
388 	tst_res(TINFO, "readahead length: %d", readahead_length);
389 
390 	ovl_mounted = TST_MOUNT_OVERLAY();
391 }
392 
cleanup(void)393 static void cleanup(void)
394 {
395 	if (ovl_mounted)
396 		SAFE_UMOUNT(OVL_MNT);
397 
398 	if (orig_bdi_limit)
399 		SAFE_FILE_PRINTF(sys_bdi_ra_path, "%d", orig_bdi_limit);
400 }
401 
402 static struct tst_test test = {
403 	.needs_root = 1,
404 	.mount_device = 1,
405 	.mntpoint = mntpoint,
406 	.setup = setup,
407 	.cleanup = cleanup,
408 	.options = options,
409 	.test = test_readahead,
410 	.tcnt = ARRAY_SIZE(tcases),
411 	.tags = (const struct tst_tag[]) {
412 		{"linux-git", "b833a3660394"},
413 		{"linux-git", "5b910bd615ba"},
414 		{}
415 	}
416 };
417