1 /*
2 * Copyright © 2012 NetCommWireless
3 * Iwo Mergler <Iwo.Mergler@netcommwireless.com.au>
4 *
5 * Test for multi-bit error recovery on a NAND page This mostly tests the
6 * ECC controller / driver.
7 *
8 * There are two test modes:
9 *
10 * 0 - artificially inserting bit errors until the ECC fails
11 * This is the default method and fairly quick. It should
12 * be independent of the quality of the FLASH.
13 *
14 * 1 - re-writing the same pattern repeatedly until the ECC fails.
15 * This method relies on the physics of NAND FLASH to eventually
16 * generate '0' bits if '1' has been written sufficient times.
17 * Depending on the NAND, the first bit errors will appear after
18 * 1000 or more writes and then will usually snowball, reaching the
19 * limits of the ECC quickly.
20 *
21 * The test stops after 10000 cycles, should your FLASH be
22 * exceptionally good and not generate bit errors before that. Try
23 * a different page in that case.
24 *
25 * Please note that neither of these tests will significantly 'use up' any
26 * FLASH endurance. Only a maximum of two erase operations will be performed.
27 *
28 *
29 * This program is free software; you can redistribute it and/or modify it
30 * under the terms of the GNU General Public License version 2 as published by
31 * the Free Software Foundation.
32 *
33 * This program is distributed in the hope that it will be useful, but WITHOUT
34 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
35 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
36 * more details.
37 *
38 * You should have received a copy of the GNU General Public License along with
39 * this program; see the file COPYING. If not, write to the Free Software
40 * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
41 */
42
43 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
44
45 #include <linux/init.h>
46 #include <linux/module.h>
47 #include <linux/moduleparam.h>
48 #include <linux/mtd/mtd.h>
49 #include <linux/err.h>
50 #include <linux/mtd/nand.h>
51 #include <linux/slab.h>
52 #include "mtd_test.h"
53
54 static int dev;
55 module_param(dev, int, S_IRUGO);
56 MODULE_PARM_DESC(dev, "MTD device number to use");
57
58 static unsigned page_offset;
59 module_param(page_offset, uint, S_IRUGO);
60 MODULE_PARM_DESC(page_offset, "Page number relative to dev start");
61
62 static unsigned seed;
63 module_param(seed, uint, S_IRUGO);
64 MODULE_PARM_DESC(seed, "Random seed");
65
66 static int mode;
67 module_param(mode, int, S_IRUGO);
68 MODULE_PARM_DESC(mode, "0=incremental errors, 1=overwrite test");
69
70 static unsigned max_overwrite = 10000;
71
72 static loff_t offset; /* Offset of the page we're using. */
73 static unsigned eraseblock; /* Eraseblock number for our page. */
74
75 /* We assume that the ECC can correct up to a certain number
76 * of biterrors per subpage. */
77 static unsigned subsize; /* Size of subpages */
78 static unsigned subcount; /* Number of subpages per page */
79
80 static struct mtd_info *mtd; /* MTD device */
81
82 static uint8_t *wbuffer; /* One page write / compare buffer */
83 static uint8_t *rbuffer; /* One page read buffer */
84
85 /* 'random' bytes from known offsets */
hash(unsigned offset)86 static uint8_t hash(unsigned offset)
87 {
88 unsigned v = offset;
89 unsigned char c;
90 v ^= 0x7f7edfd3;
91 v = v ^ (v >> 3);
92 v = v ^ (v >> 5);
93 v = v ^ (v >> 13);
94 c = v & 0xFF;
95 /* Reverse bits of result. */
96 c = (c & 0x0F) << 4 | (c & 0xF0) >> 4;
97 c = (c & 0x33) << 2 | (c & 0xCC) >> 2;
98 c = (c & 0x55) << 1 | (c & 0xAA) >> 1;
99 return c;
100 }
101
102 /* Writes wbuffer to page */
write_page(int log)103 static int write_page(int log)
104 {
105 if (log)
106 pr_info("write_page\n");
107
108 return mtdtest_write(mtd, offset, mtd->writesize, wbuffer);
109 }
110
111 /* Re-writes the data area while leaving the OOB alone. */
rewrite_page(int log)112 static int rewrite_page(int log)
113 {
114 int err = 0;
115 struct mtd_oob_ops ops;
116
117 if (log)
118 pr_info("rewrite page\n");
119
120 ops.mode = MTD_OPS_RAW; /* No ECC */
121 ops.len = mtd->writesize;
122 ops.retlen = 0;
123 ops.ooblen = 0;
124 ops.oobretlen = 0;
125 ops.ooboffs = 0;
126 ops.datbuf = wbuffer;
127 ops.oobbuf = NULL;
128
129 err = mtd_write_oob(mtd, offset, &ops);
130 if (err || ops.retlen != mtd->writesize) {
131 pr_err("error: write_oob failed (%d)\n", err);
132 if (!err)
133 err = -EIO;
134 }
135
136 return err;
137 }
138
139 /* Reads page into rbuffer. Returns number of corrected bit errors (>=0)
140 * or error (<0) */
read_page(int log)141 static int read_page(int log)
142 {
143 int err = 0;
144 size_t read;
145 struct mtd_ecc_stats oldstats;
146
147 if (log)
148 pr_info("read_page\n");
149
150 /* Saving last mtd stats */
151 memcpy(&oldstats, &mtd->ecc_stats, sizeof(oldstats));
152
153 err = mtd_read(mtd, offset, mtd->writesize, &read, rbuffer);
154 if (err == -EUCLEAN)
155 err = mtd->ecc_stats.corrected - oldstats.corrected;
156
157 if (err < 0 || read != mtd->writesize) {
158 pr_err("error: read failed at %#llx\n", (long long)offset);
159 if (err >= 0)
160 err = -EIO;
161 }
162
163 return err;
164 }
165
166 /* Verifies rbuffer against random sequence */
verify_page(int log)167 static int verify_page(int log)
168 {
169 unsigned i, errs = 0;
170
171 if (log)
172 pr_info("verify_page\n");
173
174 for (i = 0; i < mtd->writesize; i++) {
175 if (rbuffer[i] != hash(i+seed)) {
176 pr_err("Error: page offset %u, expected %02x, got %02x\n",
177 i, hash(i+seed), rbuffer[i]);
178 errs++;
179 }
180 }
181
182 if (errs)
183 return -EIO;
184 else
185 return 0;
186 }
187
188 #define CBIT(v, n) ((v) & (1 << (n)))
189 #define BCLR(v, n) ((v) = (v) & ~(1 << (n)))
190
191 /* Finds the first '1' bit in wbuffer starting at offset 'byte'
192 * and sets it to '0'. */
insert_biterror(unsigned byte)193 static int insert_biterror(unsigned byte)
194 {
195 int bit;
196
197 while (byte < mtd->writesize) {
198 for (bit = 7; bit >= 0; bit--) {
199 if (CBIT(wbuffer[byte], bit)) {
200 BCLR(wbuffer[byte], bit);
201 pr_info("Inserted biterror @ %u/%u\n", byte, bit);
202 return 0;
203 }
204 }
205 byte++;
206 }
207 pr_err("biterror: Failed to find a '1' bit\n");
208 return -EIO;
209 }
210
211 /* Writes 'random' data to page and then introduces deliberate bit
212 * errors into the page, while verifying each step. */
incremental_errors_test(void)213 static int incremental_errors_test(void)
214 {
215 int err = 0;
216 unsigned i;
217 unsigned errs_per_subpage = 0;
218
219 pr_info("incremental biterrors test\n");
220
221 for (i = 0; i < mtd->writesize; i++)
222 wbuffer[i] = hash(i+seed);
223
224 err = write_page(1);
225 if (err)
226 goto exit;
227
228 while (1) {
229
230 err = rewrite_page(1);
231 if (err)
232 goto exit;
233
234 err = read_page(1);
235 if (err > 0)
236 pr_info("Read reported %d corrected bit errors\n", err);
237 if (err < 0) {
238 pr_err("After %d biterrors per subpage, read reported error %d\n",
239 errs_per_subpage, err);
240 err = 0;
241 goto exit;
242 }
243
244 err = verify_page(1);
245 if (err) {
246 pr_err("ECC failure, read data is incorrect despite read success\n");
247 goto exit;
248 }
249
250 pr_info("Successfully corrected %d bit errors per subpage\n",
251 errs_per_subpage);
252
253 for (i = 0; i < subcount; i++) {
254 err = insert_biterror(i * subsize);
255 if (err < 0)
256 goto exit;
257 }
258 errs_per_subpage++;
259 }
260
261 exit:
262 return err;
263 }
264
265
266 /* Writes 'random' data to page and then re-writes that same data repeatedly.
267 This eventually develops bit errors (bits written as '1' will slowly become
268 '0'), which are corrected as far as the ECC is capable of. */
overwrite_test(void)269 static int overwrite_test(void)
270 {
271 int err = 0;
272 unsigned i;
273 unsigned max_corrected = 0;
274 unsigned opno = 0;
275 /* We don't expect more than this many correctable bit errors per
276 * page. */
277 #define MAXBITS 512
278 static unsigned bitstats[MAXBITS]; /* bit error histogram. */
279
280 memset(bitstats, 0, sizeof(bitstats));
281
282 pr_info("overwrite biterrors test\n");
283
284 for (i = 0; i < mtd->writesize; i++)
285 wbuffer[i] = hash(i+seed);
286
287 err = write_page(1);
288 if (err)
289 goto exit;
290
291 while (opno < max_overwrite) {
292
293 err = rewrite_page(0);
294 if (err)
295 break;
296
297 err = read_page(0);
298 if (err >= 0) {
299 if (err >= MAXBITS) {
300 pr_info("Implausible number of bit errors corrected\n");
301 err = -EIO;
302 break;
303 }
304 bitstats[err]++;
305 if (err > max_corrected) {
306 max_corrected = err;
307 pr_info("Read reported %d corrected bit errors\n",
308 err);
309 }
310 } else { /* err < 0 */
311 pr_info("Read reported error %d\n", err);
312 err = 0;
313 break;
314 }
315
316 err = verify_page(0);
317 if (err) {
318 bitstats[max_corrected] = opno;
319 pr_info("ECC failure, read data is incorrect despite read success\n");
320 break;
321 }
322
323 opno++;
324 }
325
326 /* At this point bitstats[0] contains the number of ops with no bit
327 * errors, bitstats[1] the number of ops with 1 bit error, etc. */
328 pr_info("Bit error histogram (%d operations total):\n", opno);
329 for (i = 0; i < max_corrected; i++)
330 pr_info("Page reads with %3d corrected bit errors: %d\n",
331 i, bitstats[i]);
332
333 exit:
334 return err;
335 }
336
mtd_nandbiterrs_init(void)337 static int __init mtd_nandbiterrs_init(void)
338 {
339 int err = 0;
340
341 printk("\n");
342 printk(KERN_INFO "==================================================\n");
343 pr_info("MTD device: %d\n", dev);
344
345 mtd = get_mtd_device(NULL, dev);
346 if (IS_ERR(mtd)) {
347 err = PTR_ERR(mtd);
348 pr_err("error: cannot get MTD device\n");
349 goto exit_mtddev;
350 }
351
352 if (!mtd_type_is_nand(mtd)) {
353 pr_info("this test requires NAND flash\n");
354 err = -ENODEV;
355 goto exit_nand;
356 }
357
358 pr_info("MTD device size %llu, eraseblock=%u, page=%u, oob=%u\n",
359 (unsigned long long)mtd->size, mtd->erasesize,
360 mtd->writesize, mtd->oobsize);
361
362 subsize = mtd->writesize >> mtd->subpage_sft;
363 subcount = mtd->writesize / subsize;
364
365 pr_info("Device uses %d subpages of %d bytes\n", subcount, subsize);
366
367 offset = (loff_t)page_offset * mtd->writesize;
368 eraseblock = mtd_div_by_eb(offset, mtd);
369
370 pr_info("Using page=%u, offset=%llu, eraseblock=%u\n",
371 page_offset, offset, eraseblock);
372
373 wbuffer = kmalloc(mtd->writesize, GFP_KERNEL);
374 if (!wbuffer) {
375 err = -ENOMEM;
376 goto exit_wbuffer;
377 }
378
379 rbuffer = kmalloc(mtd->writesize, GFP_KERNEL);
380 if (!rbuffer) {
381 err = -ENOMEM;
382 goto exit_rbuffer;
383 }
384
385 err = mtdtest_erase_eraseblock(mtd, eraseblock);
386 if (err)
387 goto exit_error;
388
389 if (mode == 0)
390 err = incremental_errors_test();
391 else
392 err = overwrite_test();
393
394 if (err)
395 goto exit_error;
396
397 /* We leave the block un-erased in case of test failure. */
398 err = mtdtest_erase_eraseblock(mtd, eraseblock);
399 if (err)
400 goto exit_error;
401
402 err = -EIO;
403 pr_info("finished successfully.\n");
404 printk(KERN_INFO "==================================================\n");
405
406 exit_error:
407 kfree(rbuffer);
408 exit_rbuffer:
409 kfree(wbuffer);
410 exit_wbuffer:
411 /* Nothing */
412 exit_nand:
413 put_mtd_device(mtd);
414 exit_mtddev:
415 return err;
416 }
417
mtd_nandbiterrs_exit(void)418 static void __exit mtd_nandbiterrs_exit(void)
419 {
420 return;
421 }
422
423 module_init(mtd_nandbiterrs_init);
424 module_exit(mtd_nandbiterrs_exit);
425
426 MODULE_DESCRIPTION("NAND bit error recovery test");
427 MODULE_AUTHOR("Iwo Mergler");
428 MODULE_LICENSE("GPL");
429