1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright © 2012 NetCommWireless
4 * Iwo Mergler <Iwo.Mergler@netcommwireless.com.au>
5 *
6 * Test for multi-bit error recovery on a NAND page This mostly tests the
7 * ECC controller / driver.
8 *
9 * There are two test modes:
10 *
11 * 0 - artificially inserting bit errors until the ECC fails
12 * This is the default method and fairly quick. It should
13 * be independent of the quality of the FLASH.
14 *
15 * 1 - re-writing the same pattern repeatedly until the ECC fails.
16 * This method relies on the physics of NAND FLASH to eventually
17 * generate '0' bits if '1' has been written sufficient times.
18 * Depending on the NAND, the first bit errors will appear after
19 * 1000 or more writes and then will usually snowball, reaching the
20 * limits of the ECC quickly.
21 *
22 * The test stops after 10000 cycles, should your FLASH be
23 * exceptionally good and not generate bit errors before that. Try
24 * a different page in that case.
25 *
26 * Please note that neither of these tests will significantly 'use up' any
27 * FLASH endurance. Only a maximum of two erase operations will be performed.
28 */
29
30 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
31
32 #include <linux/init.h>
33 #include <linux/module.h>
34 #include <linux/moduleparam.h>
35 #include <linux/mtd/mtd.h>
36 #include <linux/err.h>
37 #include <linux/mtd/rawnand.h>
38 #include <linux/slab.h>
39 #include "mtd_test.h"
40
41 static int dev;
42 module_param(dev, int, S_IRUGO);
43 MODULE_PARM_DESC(dev, "MTD device number to use");
44
45 static unsigned page_offset;
46 module_param(page_offset, uint, S_IRUGO);
47 MODULE_PARM_DESC(page_offset, "Page number relative to dev start");
48
49 static unsigned seed;
50 module_param(seed, uint, S_IRUGO);
51 MODULE_PARM_DESC(seed, "Random seed");
52
53 static int mode;
54 module_param(mode, int, S_IRUGO);
55 MODULE_PARM_DESC(mode, "0=incremental errors, 1=overwrite test");
56
57 static unsigned max_overwrite = 10000;
58
59 static loff_t offset; /* Offset of the page we're using. */
60 static unsigned eraseblock; /* Eraseblock number for our page. */
61
62 /* We assume that the ECC can correct up to a certain number
63 * of biterrors per subpage. */
64 static unsigned subsize; /* Size of subpages */
65 static unsigned subcount; /* Number of subpages per page */
66
67 static struct mtd_info *mtd; /* MTD device */
68
69 static uint8_t *wbuffer; /* One page write / compare buffer */
70 static uint8_t *rbuffer; /* One page read buffer */
71
72 /* 'random' bytes from known offsets */
hash(unsigned offset)73 static uint8_t hash(unsigned offset)
74 {
75 unsigned v = offset;
76 unsigned char c;
77 v ^= 0x7f7edfd3;
78 v = v ^ (v >> 3);
79 v = v ^ (v >> 5);
80 v = v ^ (v >> 13);
81 c = v & 0xFF;
82 /* Reverse bits of result. */
83 c = (c & 0x0F) << 4 | (c & 0xF0) >> 4;
84 c = (c & 0x33) << 2 | (c & 0xCC) >> 2;
85 c = (c & 0x55) << 1 | (c & 0xAA) >> 1;
86 return c;
87 }
88
89 /* Writes wbuffer to page */
write_page(int log)90 static int write_page(int log)
91 {
92 if (log)
93 pr_info("write_page\n");
94
95 return mtdtest_write(mtd, offset, mtd->writesize, wbuffer);
96 }
97
98 /* Re-writes the data area while leaving the OOB alone. */
rewrite_page(int log)99 static int rewrite_page(int log)
100 {
101 int err = 0;
102 struct mtd_oob_ops ops = { };
103
104 if (log)
105 pr_info("rewrite page\n");
106
107 ops.mode = MTD_OPS_RAW; /* No ECC */
108 ops.len = mtd->writesize;
109 ops.retlen = 0;
110 ops.ooblen = 0;
111 ops.oobretlen = 0;
112 ops.ooboffs = 0;
113 ops.datbuf = wbuffer;
114 ops.oobbuf = NULL;
115
116 err = mtd_write_oob(mtd, offset, &ops);
117 if (err || ops.retlen != mtd->writesize) {
118 pr_err("error: write_oob failed (%d)\n", err);
119 if (!err)
120 err = -EIO;
121 }
122
123 return err;
124 }
125
126 /* Reads page into rbuffer. Returns number of corrected bit errors (>=0)
127 * or error (<0) */
read_page(int log)128 static int read_page(int log)
129 {
130 int err = 0;
131 size_t read;
132 struct mtd_ecc_stats oldstats;
133
134 if (log)
135 pr_info("read_page\n");
136
137 /* Saving last mtd stats */
138 memcpy(&oldstats, &mtd->ecc_stats, sizeof(oldstats));
139
140 err = mtd_read(mtd, offset, mtd->writesize, &read, rbuffer);
141 if (!err || err == -EUCLEAN)
142 err = mtd->ecc_stats.corrected - oldstats.corrected;
143
144 if (err < 0 || read != mtd->writesize) {
145 pr_err("error: read failed at %#llx\n", (long long)offset);
146 if (err >= 0)
147 err = -EIO;
148 }
149
150 return err;
151 }
152
153 /* Verifies rbuffer against random sequence */
verify_page(int log)154 static int verify_page(int log)
155 {
156 unsigned i, errs = 0;
157
158 if (log)
159 pr_info("verify_page\n");
160
161 for (i = 0; i < mtd->writesize; i++) {
162 if (rbuffer[i] != hash(i+seed)) {
163 pr_err("Error: page offset %u, expected %02x, got %02x\n",
164 i, hash(i+seed), rbuffer[i]);
165 errs++;
166 }
167 }
168
169 if (errs)
170 return -EIO;
171 else
172 return 0;
173 }
174
175 #define CBIT(v, n) ((v) & (1 << (n)))
176 #define BCLR(v, n) ((v) = (v) & ~(1 << (n)))
177
178 /* Finds the first '1' bit in wbuffer starting at offset 'byte'
179 * and sets it to '0'. */
insert_biterror(unsigned byte)180 static int insert_biterror(unsigned byte)
181 {
182 int bit;
183
184 while (byte < mtd->writesize) {
185 for (bit = 7; bit >= 0; bit--) {
186 if (CBIT(wbuffer[byte], bit)) {
187 BCLR(wbuffer[byte], bit);
188 pr_info("Inserted biterror @ %u/%u\n", byte, bit);
189 return 0;
190 }
191 }
192 byte++;
193 }
194 pr_err("biterror: Failed to find a '1' bit\n");
195 return -EIO;
196 }
197
198 /* Writes 'random' data to page and then introduces deliberate bit
199 * errors into the page, while verifying each step. */
incremental_errors_test(void)200 static int incremental_errors_test(void)
201 {
202 int err = 0;
203 unsigned i;
204 unsigned errs_per_subpage = 0;
205
206 pr_info("incremental biterrors test\n");
207
208 for (i = 0; i < mtd->writesize; i++)
209 wbuffer[i] = hash(i+seed);
210
211 err = write_page(1);
212 if (err)
213 goto exit;
214
215 while (1) {
216
217 err = rewrite_page(1);
218 if (err)
219 goto exit;
220
221 err = read_page(1);
222 if (err > 0)
223 pr_info("Read reported %d corrected bit errors\n", err);
224 if (err < 0) {
225 pr_err("After %d biterrors per subpage, read reported error %d\n",
226 errs_per_subpage, err);
227 err = 0;
228 goto exit;
229 }
230
231 err = verify_page(1);
232 if (err) {
233 pr_err("ECC failure, read data is incorrect despite read success\n");
234 goto exit;
235 }
236
237 pr_info("Successfully corrected %d bit errors per subpage\n",
238 errs_per_subpage);
239
240 for (i = 0; i < subcount; i++) {
241 err = insert_biterror(i * subsize);
242 if (err < 0)
243 goto exit;
244 }
245 errs_per_subpage++;
246 }
247
248 exit:
249 return err;
250 }
251
252
253 /* Writes 'random' data to page and then re-writes that same data repeatedly.
254 This eventually develops bit errors (bits written as '1' will slowly become
255 '0'), which are corrected as far as the ECC is capable of. */
overwrite_test(void)256 static int overwrite_test(void)
257 {
258 int err = 0;
259 unsigned i;
260 unsigned max_corrected = 0;
261 unsigned opno = 0;
262 /* We don't expect more than this many correctable bit errors per
263 * page. */
264 #define MAXBITS 512
265 static unsigned bitstats[MAXBITS]; /* bit error histogram. */
266
267 memset(bitstats, 0, sizeof(bitstats));
268
269 pr_info("overwrite biterrors test\n");
270
271 for (i = 0; i < mtd->writesize; i++)
272 wbuffer[i] = hash(i+seed);
273
274 err = write_page(1);
275 if (err)
276 goto exit;
277
278 while (opno < max_overwrite) {
279
280 err = write_page(0);
281 if (err)
282 break;
283
284 err = read_page(0);
285 if (err >= 0) {
286 if (err >= MAXBITS) {
287 pr_info("Implausible number of bit errors corrected\n");
288 err = -EIO;
289 break;
290 }
291 bitstats[err]++;
292 if (err > max_corrected) {
293 max_corrected = err;
294 pr_info("Read reported %d corrected bit errors\n",
295 err);
296 }
297 } else { /* err < 0 */
298 pr_info("Read reported error %d\n", err);
299 err = 0;
300 break;
301 }
302
303 err = verify_page(0);
304 if (err) {
305 bitstats[max_corrected] = opno;
306 pr_info("ECC failure, read data is incorrect despite read success\n");
307 break;
308 }
309
310 err = mtdtest_relax();
311 if (err)
312 break;
313
314 opno++;
315 }
316
317 /* At this point bitstats[0] contains the number of ops with no bit
318 * errors, bitstats[1] the number of ops with 1 bit error, etc. */
319 pr_info("Bit error histogram (%d operations total):\n", opno);
320 for (i = 0; i < max_corrected; i++)
321 pr_info("Page reads with %3d corrected bit errors: %d\n",
322 i, bitstats[i]);
323
324 exit:
325 return err;
326 }
327
mtd_nandbiterrs_init(void)328 static int __init mtd_nandbiterrs_init(void)
329 {
330 int err = 0;
331
332 printk("\n");
333 printk(KERN_INFO "==================================================\n");
334 pr_info("MTD device: %d\n", dev);
335
336 mtd = get_mtd_device(NULL, dev);
337 if (IS_ERR(mtd)) {
338 err = PTR_ERR(mtd);
339 pr_err("error: cannot get MTD device\n");
340 goto exit_mtddev;
341 }
342
343 if (!mtd_type_is_nand(mtd)) {
344 pr_info("this test requires NAND flash\n");
345 err = -ENODEV;
346 goto exit_nand;
347 }
348
349 pr_info("MTD device size %llu, eraseblock=%u, page=%u, oob=%u\n",
350 (unsigned long long)mtd->size, mtd->erasesize,
351 mtd->writesize, mtd->oobsize);
352
353 subsize = mtd->writesize >> mtd->subpage_sft;
354 subcount = mtd->writesize / subsize;
355
356 pr_info("Device uses %d subpages of %d bytes\n", subcount, subsize);
357
358 offset = (loff_t)page_offset * mtd->writesize;
359 eraseblock = mtd_div_by_eb(offset, mtd);
360
361 pr_info("Using page=%u, offset=%llu, eraseblock=%u\n",
362 page_offset, offset, eraseblock);
363
364 wbuffer = kmalloc(mtd->writesize, GFP_KERNEL);
365 if (!wbuffer) {
366 err = -ENOMEM;
367 goto exit_wbuffer;
368 }
369
370 rbuffer = kmalloc(mtd->writesize, GFP_KERNEL);
371 if (!rbuffer) {
372 err = -ENOMEM;
373 goto exit_rbuffer;
374 }
375
376 err = mtdtest_erase_eraseblock(mtd, eraseblock);
377 if (err)
378 goto exit_error;
379
380 if (mode == 0)
381 err = incremental_errors_test();
382 else
383 err = overwrite_test();
384
385 if (err)
386 goto exit_error;
387
388 /* We leave the block un-erased in case of test failure. */
389 err = mtdtest_erase_eraseblock(mtd, eraseblock);
390 if (err)
391 goto exit_error;
392
393 err = -EIO;
394 pr_info("finished successfully.\n");
395 printk(KERN_INFO "==================================================\n");
396
397 exit_error:
398 kfree(rbuffer);
399 exit_rbuffer:
400 kfree(wbuffer);
401 exit_wbuffer:
402 /* Nothing */
403 exit_nand:
404 put_mtd_device(mtd);
405 exit_mtddev:
406 return err;
407 }
408
mtd_nandbiterrs_exit(void)409 static void __exit mtd_nandbiterrs_exit(void)
410 {
411 return;
412 }
413
414 module_init(mtd_nandbiterrs_init);
415 module_exit(mtd_nandbiterrs_exit);
416
417 MODULE_DESCRIPTION("NAND bit error recovery test");
418 MODULE_AUTHOR("Iwo Mergler");
419 MODULE_LICENSE("GPL");
420