1 /******************************************************************************/
2 /* */
3 /* Copyright (s) Ying Han <yinghan@google.com>, 2009 */
4 /* */
5 /* This program is free software; you can redistribute it and/or modify */
6 /* it under the terms of the GNU General Public License as published by */
7 /* the Free Software Foundation; either version 2 of the License, or */
8 /* (at your option) any later version. */
9 /* */
10 /* This program is distributed in the hope that it will be useful, */
11 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
12 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See */
13 /* the GNU General Public License for more details. */
14 /* */
15 /* You should have received a copy of the GNU General Public License */
16 /* along with this program; if not, write to the Free Software */
17 /* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
18 /* */
19 /******************************************************************************/
20 /*
21 ftruncate-mmap: pages are lost after writing to mmaped file,
22
23 We triggered the failure during some internal experiment with
24 ftruncate/mmap/write/read sequence. And we found that some pages are
25 "lost" after writing to the mmaped file. which in the following test
26 cases (count >= 0).
27
28 First we deployed the test cases into group of machines and see about
29 >20% failure rate on average. Then, I did couple of experiment to try
30 to reproduce it on a single machine. what i found is that:
31 1. add a fsync after write the file, i can not reproduce this issue.
32 2. add memory pressure(mmap/mlock) while run the test in infinite
33 loop, the failure is reproduced quickly. ( background flushing ? )
34
35 The "bad pages" count differs each time from one digit to 4,5 digit
36 for 128M ftruncated file. and what i also found that the bad page
37 number are contiguous for each segment which total bad pages container
38 several segments. ext "1-4, 9-20, 48-50" ( batch flushing ? )
39
40 (The failure is reproduced based on 2.6.29-rc8, also happened on
41 2.6.18 kernel. . Here is the simple test case to reproduce it with
42 memory pressure. )
43 */
44
45 #include <sys/mman.h>
46 #include <sys/types.h>
47 #include <fcntl.h>
48 #include <unistd.h>
49 #include <stdio.h>
50 #include <stdlib.h>
51 #include <string.h>
52 #include <signal.h>
53
54 #include "test.h"
55
56 /* Extern Global Variables */
57 extern int tst_count;
58
59 /* Global Variables */
60 char *TCID = "mmap-corruption01"; /* test program identifier. */
61 int TST_TOTAL = 1; /* total number of tests in this file. */
62
63 long kMemSize = 128 << 20;
64 int kPageSize = 4096;
65
66 char *usage = "-h hours -m minutes -s secs\n";
67
anyfail(void)68 int anyfail(void)
69 {
70 tst_brkm(TFAIL, tst_rmdir, "Test failed\n");
71 }
72
main(int argc,char ** argv)73 int main(int argc, char **argv)
74 {
75 char *progname;
76 int count = 0;
77 int i, c;
78 char *fname = "test.mmap-corruption";
79 char *mem;
80 unsigned long alarmtime = 0;
81 struct sigaction sa;
82 void finish(int sig);
83
84 progname = *argv;
85 while ((c = getopt(argc, argv, ":h:m:s:")) != -1) {
86 switch (c) {
87 case 'h':
88 alarmtime += atoi(optarg) * 60 * 60;
89 break;
90 case 'm':
91 alarmtime += atoi(optarg) * 60;
92 break;
93 case 's':
94 alarmtime += atoi(optarg);
95 break;
96 default:
97 (void)fprintf(stderr, "usage: %s %s\n", progname,
98 usage);
99 anyfail();
100 }
101 }
102
103 /*
104 * Plan for death by signal. User may have specified
105 * a time limit, in which case set an alarm and catch SIGALRM.
106 * Also catch and cleanup with SIGINT, SIGQUIT, and SIGTERM.
107 */
108 sa.sa_handler = finish;
109 sa.sa_flags = 0;
110 if (sigemptyset(&sa.sa_mask)) {
111 perror("sigempty error");
112 exit(1);
113 }
114
115 if (sigaction(SIGINT, &sa, 0) == -1) {
116 perror("sigaction error SIGINT");
117 exit(1);
118 }
119 if (alarmtime) {
120 if (sigaction(SIGALRM, &sa, 0) == -1) {
121 perror("sigaction error");
122 exit(1);
123 }
124 (void)alarm(alarmtime);
125 printf("mmap-corruption will run for=> %ld, seconds\n",
126 alarmtime);
127 } else { //Run for 5 secs only
128 if (sigaction(SIGALRM, &sa, 0) == -1) {
129 perror("sigaction error");
130 exit(1);
131 }
132 (void)alarm(5);
133 printf("mmap-corruption will run for=> 5, seconds\n");
134 }
135 /* If we get a SIGQUIT or SIGTERM, clean up and exit immediately. */
136 sa.sa_handler = finish;
137 if (sigaction(SIGQUIT, &sa, 0) == -1) {
138 perror("sigaction error SIGQUIT");
139 exit(1);
140 }
141 if (sigaction(SIGTERM, &sa, 0) == -1) {
142 perror("sigaction error SIGTERM");
143 exit(1);
144 }
145
146 tst_tmpdir();
147 while (1) {
148 unlink(fname);
149 int fd = open(fname, O_CREAT | O_EXCL | O_RDWR, 0600);
150 ftruncate(fd, kMemSize);
151
152 mem =
153 mmap(0, kMemSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd,
154 0);
155 // Fill the memory with 1s.
156 memset(mem, 1, kMemSize);
157
158 for (i = 0; i < kMemSize; i++) {
159 int byte_good = mem[i] != 0;
160 if (!byte_good && ((i % kPageSize) == 0)) {
161 //printf("%d ", i / kPageSize);
162 count++;
163 }
164 }
165 munmap(mem, kMemSize);
166 close(fd);
167 unlink(fname);
168 if (count > 0) {
169 printf("Running %d bad page\n", count);
170 return 1;
171 }
172 count = 0;
173 }
174 return 0;
175 }
176
finish(int sig)177 void finish(int sig)
178 {
179 printf("mmap-corruption PASSED\n");
180 exit(0);
181 }
182