1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2012 Linux Test Project, Inc.
4 */
5
6 /*
7 * functional test for readahead() syscall
8 *
9 * This test is measuring effects of readahead syscall.
10 * It mmaps/reads a test file with and without prior call to readahead.
11 *
12 * The overlay part of the test is regression for:
13 * b833a3660394
14 * ("ovl: add ovl_fadvise()")
15 * Introduced by:
16 * 5b910bd615ba
17 * ("ovl: fix GPF in swapfile_activate of file from overlayfs over xfs")
18 */
19 #define _GNU_SOURCE
20 #include <sys/types.h>
21 #include <sys/syscall.h>
22 #include <sys/mman.h>
23 #include <sys/mount.h>
24 #include <sys/stat.h>
25 #include <sys/types.h>
26 #include <errno.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <stdint.h>
30 #include <unistd.h>
31 #include <fcntl.h>
32 #include "config.h"
33 #include "tst_test.h"
34 #include "tst_timer.h"
35 #include "lapi/syscalls.h"
36
37 static char testfile[PATH_MAX] = "testfile";
38 #define DROP_CACHES_FNAME "/proc/sys/vm/drop_caches"
39 #define MEMINFO_FNAME "/proc/meminfo"
40 #define PROC_IO_FNAME "/proc/self/io"
41 static size_t testfile_size = 64 * 1024 * 1024;
42 static char *opt_fsizestr;
43 static int pagesize;
44 static unsigned long cached_max;
45 static int ovl_mounted;
46 static int readahead_length = 4096;
47 static char sys_bdi_ra_path[PATH_MAX];
48 static int orig_bdi_limit;
49
50 static const char mntpoint[] = OVL_BASE_MNTPOINT;
51
52 static struct tst_option options[] = {
53 {"s:", &opt_fsizestr, "-s testfile size (default 64MB)"},
54 {NULL, NULL, NULL}
55 };
56
libc_readahead(int fd,off_t offset,size_t len)57 static int libc_readahead(int fd, off_t offset, size_t len)
58 {
59 return readahead(fd, offset, len);
60 }
61
fadvise_willneed(int fd,off_t offset,size_t len)62 static int fadvise_willneed(int fd, off_t offset, size_t len)
63 {
64 /* Should have the same effect as readahead() syscall */
65 errno = posix_fadvise(fd, offset, len, POSIX_FADV_WILLNEED);
66 /* posix_fadvise returns error number (not in errno) */
67 return errno ? -1 : 0;
68 }
69
70 static struct tcase {
71 const char *tname;
72 int use_overlay;
73 int use_fadvise;
74 /* Use either readahead() syscall or POSIX_FADV_WILLNEED */
75 int (*readahead)(int, off_t, size_t);
76 } tcases[] = {
77 { "readahead on file", 0, 0, libc_readahead },
78 { "readahead on overlayfs file", 1, 0, libc_readahead },
79 { "POSIX_FADV_WILLNEED on file", 0, 1, fadvise_willneed },
80 { "POSIX_FADV_WILLNEED on overlayfs file", 1, 1, fadvise_willneed },
81 };
82
83 static int readahead_supported = 1;
84 static int fadvise_supported = 1;
85
has_file(const char * fname,int required)86 static int has_file(const char *fname, int required)
87 {
88 struct stat buf;
89
90 if (stat(fname, &buf) == -1) {
91 if (errno != ENOENT)
92 tst_brk(TBROK | TERRNO, "stat %s", fname);
93 if (required)
94 tst_brk(TCONF, "%s not available", fname);
95 return 0;
96 }
97 return 1;
98 }
99
drop_caches(void)100 static void drop_caches(void)
101 {
102 SAFE_FILE_PRINTF(DROP_CACHES_FNAME, "1");
103 }
104
get_bytes_read(void)105 static unsigned long get_bytes_read(void)
106 {
107 unsigned long ret;
108
109 SAFE_FILE_LINES_SCANF(PROC_IO_FNAME, "read_bytes: %lu", &ret);
110
111 return ret;
112 }
113
get_cached_size(void)114 static unsigned long get_cached_size(void)
115 {
116 unsigned long ret;
117
118 SAFE_FILE_LINES_SCANF(MEMINFO_FNAME, "Cached: %lu", &ret);
119
120 return ret;
121 }
122
create_testfile(int use_overlay)123 static void create_testfile(int use_overlay)
124 {
125 int fd;
126 char *tmp;
127 size_t i;
128
129 sprintf(testfile, "%s/testfile",
130 use_overlay ? OVL_MNT : OVL_BASE_MNTPOINT);
131 tst_res(TINFO, "creating test file of size: %zu", testfile_size);
132 tmp = SAFE_MALLOC(pagesize);
133
134 /* round to page size */
135 testfile_size = testfile_size & ~((long)pagesize - 1);
136
137 fd = SAFE_CREAT(testfile, 0644);
138 for (i = 0; i < testfile_size; i += pagesize)
139 SAFE_WRITE(1, fd, tmp, pagesize);
140 SAFE_FSYNC(fd);
141 SAFE_CLOSE(fd);
142 free(tmp);
143 }
144
145 /* read_testfile - mmap testfile and read every page.
146 * This functions measures how many I/O and time it takes to fully
147 * read contents of test file.
148 *
149 * @do_readahead: call readahead prior to reading file content?
150 * @fname: name of file to test
151 * @fsize: how many bytes to read/mmap
152 * @read_bytes: returns difference of bytes read, parsed from /proc/<pid>/io
153 * @usec: returns how many microsecond it took to go over fsize bytes
154 * @cached: returns cached kB from /proc/meminfo
155 */
read_testfile(struct tcase * tc,int do_readahead,const char * fname,size_t fsize,unsigned long * read_bytes,long long * usec,unsigned long * cached)156 static int read_testfile(struct tcase *tc, int do_readahead,
157 const char *fname, size_t fsize,
158 unsigned long *read_bytes, long long *usec,
159 unsigned long *cached)
160 {
161 int fd;
162 size_t i = 0;
163 long read_bytes_start;
164 unsigned char *p, tmp;
165 off_t offset = 0;
166
167 fd = SAFE_OPEN(fname, O_RDONLY);
168
169 if (do_readahead) {
170 do {
171 TEST(tc->readahead(fd, offset, fsize - offset));
172 if (TST_RET != 0) {
173 SAFE_CLOSE(fd);
174 return TST_ERR;
175 }
176
177 i++;
178 offset += readahead_length;
179 } while ((size_t)offset < fsize);
180 tst_res(TINFO, "readahead calls made: %zu", i);
181 *cached = get_cached_size();
182
183 /* offset of file shouldn't change after readahead */
184 offset = SAFE_LSEEK(fd, 0, SEEK_CUR);
185 if (offset == 0)
186 tst_res(TPASS, "offset is still at 0 as expected");
187 else
188 tst_res(TFAIL, "offset has changed to: %lu", offset);
189 }
190
191 tst_timer_start(CLOCK_MONOTONIC);
192 read_bytes_start = get_bytes_read();
193
194 p = SAFE_MMAP(NULL, fsize, PROT_READ, MAP_SHARED | MAP_POPULATE, fd, 0);
195
196 /* for old kernels, where MAP_POPULATE doesn't work, touch each page */
197 tmp = 0;
198 for (i = 0; i < fsize; i += pagesize)
199 tmp = tmp ^ p[i];
200 /* prevent gcc from optimizing out loop above */
201 if (tmp != 0)
202 tst_brk(TBROK, "This line should not be reached");
203
204 if (!do_readahead)
205 *cached = get_cached_size();
206
207 SAFE_MUNMAP(p, fsize);
208
209 *read_bytes = get_bytes_read() - read_bytes_start;
210
211 tst_timer_stop();
212 *usec = tst_timer_elapsed_us();
213
214 SAFE_CLOSE(fd);
215 return 0;
216 }
217
test_readahead(unsigned int n)218 static void test_readahead(unsigned int n)
219 {
220 unsigned long read_bytes, read_bytes_ra;
221 long long usec, usec_ra;
222 unsigned long cached_high, cached_low, cached, cached_ra;
223 int ret;
224 struct tcase *tc = &tcases[n];
225
226 tst_res(TINFO, "Test #%d: %s", n, tc->tname);
227
228 if (tc->use_overlay && !ovl_mounted) {
229 tst_res(TCONF,
230 "overlayfs is not configured in this kernel.");
231 return;
232 }
233
234 create_testfile(tc->use_overlay);
235
236 /* find out how much can cache hold if we read whole file */
237 read_testfile(tc, 0, testfile, testfile_size, &read_bytes, &usec,
238 &cached);
239 cached_high = get_cached_size();
240 sync();
241 drop_caches();
242 cached_low = get_cached_size();
243 cached_max = MAX(cached_max, cached_high - cached_low);
244
245 tst_res(TINFO, "read_testfile(0)");
246 read_testfile(tc, 0, testfile, testfile_size, &read_bytes, &usec,
247 &cached);
248 if (cached > cached_low)
249 cached = cached - cached_low;
250 else
251 cached = 0;
252
253 sync();
254 drop_caches();
255 cached_low = get_cached_size();
256 tst_res(TINFO, "read_testfile(1)");
257 ret = read_testfile(tc, 1, testfile, testfile_size, &read_bytes_ra,
258 &usec_ra, &cached_ra);
259
260 if (ret == EINVAL) {
261 if (tc->use_fadvise &&
262 (!tc->use_overlay || !fadvise_supported)) {
263 fadvise_supported = 0;
264 tst_res(TCONF, "CONFIG_ADVISE_SYSCALLS not configured "
265 "in kernel?");
266 return;
267 }
268
269 if (!tc->use_overlay || !readahead_supported) {
270 readahead_supported = 0;
271 tst_res(TCONF, "readahead not supported on %s",
272 tst_device->fs_type);
273 return;
274 }
275 }
276
277 if (ret) {
278 tst_res(TFAIL | TTERRNO, "%s failed on %s",
279 tc->use_fadvise ? "fadvise" : "readahead",
280 tc->use_overlay ? "overlayfs" :
281 tst_device->fs_type);
282 return;
283 }
284
285 if (cached_ra > cached_low)
286 cached_ra = cached_ra - cached_low;
287 else
288 cached_ra = 0;
289
290 tst_res(TINFO, "read_testfile(0) took: %lli usec", usec);
291 tst_res(TINFO, "read_testfile(1) took: %lli usec", usec_ra);
292 if (has_file(PROC_IO_FNAME, 0)) {
293 tst_res(TINFO, "read_testfile(0) read: %ld bytes", read_bytes);
294 tst_res(TINFO, "read_testfile(1) read: %ld bytes",
295 read_bytes_ra);
296 /* actual number of read bytes depends on total RAM */
297 if (read_bytes_ra < read_bytes)
298 tst_res(TPASS, "readahead saved some I/O");
299 else
300 tst_res(TFAIL, "readahead failed to save any I/O");
301 } else {
302 tst_res(TCONF, "Your system doesn't have /proc/self/io,"
303 " unable to determine read bytes during test");
304 }
305
306 tst_res(TINFO, "cache can hold at least: %ld kB", cached_max);
307 tst_res(TINFO, "read_testfile(0) used cache: %ld kB", cached);
308 tst_res(TINFO, "read_testfile(1) used cache: %ld kB", cached_ra);
309
310 if (cached_max * 1024 >= testfile_size) {
311 /*
312 * if cache can hold ~testfile_size then cache increase
313 * for readahead should be at least testfile_size/2
314 */
315 if (cached_ra * 1024 > testfile_size / 2)
316 tst_res(TPASS, "using cache as expected");
317 else if (!cached_ra)
318 tst_res(TFAIL, "readahead failed to use any cache");
319 else
320 tst_res(TWARN, "using less cache than expected");
321 } else {
322 tst_res(TCONF, "Page cache on your system is too small "
323 "to hold whole testfile.");
324 }
325 }
326
327
328 /*
329 * We try raising bdi readahead limit as much as we can. We write
330 * and read back "read_ahead_kb" sysfs value, starting with filesize.
331 * If that fails, we try again with lower value.
332 * readahead_length used in the test is then set to MIN(bdi limit, 2M),
333 * to respect kernels prior to commit 600e19afc5f8a6c.
334 */
setup_readahead_length(void)335 static void setup_readahead_length(void)
336 {
337 struct stat sbuf;
338 char tmp[PATH_MAX], *backing_dev;
339 int ra_new_limit, ra_limit;
340
341 /* Find out backing device name */
342 SAFE_LSTAT(tst_device->dev, &sbuf);
343 if (S_ISLNK(sbuf.st_mode))
344 SAFE_READLINK(tst_device->dev, tmp, PATH_MAX);
345 else
346 strcpy(tmp, tst_device->dev);
347
348 backing_dev = basename(tmp);
349 sprintf(sys_bdi_ra_path, "/sys/class/block/%s/bdi/read_ahead_kb",
350 backing_dev);
351 if (access(sys_bdi_ra_path, F_OK))
352 return;
353
354 SAFE_FILE_SCANF(sys_bdi_ra_path, "%d", &orig_bdi_limit);
355
356 /* raise bdi limit as much as kernel allows */
357 ra_new_limit = testfile_size / 1024;
358 while (ra_new_limit > pagesize / 1024) {
359 FILE_PRINTF(sys_bdi_ra_path, "%d", ra_new_limit);
360 SAFE_FILE_SCANF(sys_bdi_ra_path, "%d", &ra_limit);
361
362 if (ra_limit == ra_new_limit) {
363 readahead_length = MIN(ra_new_limit * 1024,
364 2 * 1024 * 1024);
365 break;
366 }
367 ra_new_limit = ra_new_limit / 2;
368 }
369 }
370
setup(void)371 static void setup(void)
372 {
373 if (opt_fsizestr)
374 testfile_size = SAFE_STRTOL(opt_fsizestr, 1, INT_MAX);
375
376 if (access(PROC_IO_FNAME, F_OK))
377 tst_brk(TCONF, "Requires " PROC_IO_FNAME);
378
379 has_file(DROP_CACHES_FNAME, 1);
380 has_file(MEMINFO_FNAME, 1);
381
382 /* check if readahead is supported */
383 tst_syscall(__NR_readahead, 0, 0, 0);
384
385 pagesize = getpagesize();
386
387 setup_readahead_length();
388 tst_res(TINFO, "readahead length: %d", readahead_length);
389
390 ovl_mounted = TST_MOUNT_OVERLAY();
391 }
392
cleanup(void)393 static void cleanup(void)
394 {
395 if (ovl_mounted)
396 SAFE_UMOUNT(OVL_MNT);
397
398 if (orig_bdi_limit)
399 SAFE_FILE_PRINTF(sys_bdi_ra_path, "%d", orig_bdi_limit);
400 }
401
402 static struct tst_test test = {
403 .needs_root = 1,
404 .mount_device = 1,
405 .mntpoint = mntpoint,
406 .setup = setup,
407 .cleanup = cleanup,
408 .options = options,
409 .test = test_readahead,
410 .tcnt = ARRAY_SIZE(tcases),
411 .tags = (const struct tst_tag[]) {
412 {"linux-git", "b833a3660394"},
413 {"linux-git", "5b910bd615ba"},
414 {}
415 }
416 };
417