• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * This file is part of UBIFS.
4  *
5  * Copyright (C) 2006-2008 Nokia Corporation
6  *
7  * Authors: Adrian Hunter
8  *          Artem Bityutskiy (Битюцкий Артём)
9  */
10 
11 /*
12  * This file implements functions needed to recover from unclean un-mounts.
13  * When UBIFS is mounted, it checks a flag on the master node to determine if
14  * an un-mount was completed successfully. If not, the process of mounting
15  * incorporates additional checking and fixing of on-flash data structures.
16  * UBIFS always cleans away all remnants of an unclean un-mount, so that
17  * errors do not accumulate. However UBIFS defers recovery if it is mounted
18  * read-only, and the flash is not modified in that case.
19  *
20  * The general UBIFS approach to the recovery is that it recovers from
21  * corruptions which could be caused by power cuts, but it refuses to recover
22  * from corruption caused by other reasons. And UBIFS tries to distinguish
23  * between these 2 reasons of corruptions and silently recover in the former
24  * case and loudly complain in the latter case.
25  *
26  * UBIFS writes only to erased LEBs, so it writes only to the flash space
27  * containing only 0xFFs. UBIFS also always writes strictly from the beginning
28  * of the LEB to the end. And UBIFS assumes that the underlying flash media
29  * writes in @c->max_write_size bytes at a time.
30  *
31  * Hence, if UBIFS finds a corrupted node at offset X, it expects only the min.
32  * I/O unit corresponding to offset X to contain corrupted data, all the
33  * following min. I/O units have to contain empty space (all 0xFFs). If this is
34  * not true, the corruption cannot be the result of a power cut, and UBIFS
35  * refuses to mount.
36  */
37 
38 #include <linux/crc32.h>
39 #include <linux/slab.h>
40 #include "ubifs.h"
41 
42 #define RECOVERY_TWO 2
43 #define RECOVERY_SEVEN 7
44 #define RECOVERY_EIGHT 8
45 
46 /**
47  * is_empty - determine whether a buffer is empty (contains all 0xff).
48  * @buf: buffer to clean
49  * @len: length of buffer
50  *
51  * This function returns %1 if the buffer is empty (contains all 0xff) otherwise
52  * %0 is returned.
53  */
is_empty(void * buf,int len)54 static int is_empty(void *buf, int len)
55 {
56     uint8_t *p = buf;
57     int i;
58 
59     for (i = 0; i < len; i++) {
60         if (*p++ != 0xff) {
61             return 0;
62         }
63     }
64     return 1;
65 }
66 
67 /**
68  * first_non_ff - find offset of the first non-0xff byte.
69  * @buf: buffer to search in
70  * @len: length of buffer
71  *
72  * This function returns offset of the first non-0xff byte in @buf or %-1 if
73  * the buffer contains only 0xff bytes.
74  */
first_non_ff(void * buf,int len)75 static int first_non_ff(void *buf, int len)
76 {
77     uint8_t *p = buf;
78     int i;
79 
80     for (i = 0; i < len; i++) {
81         if (*p++ != 0xff) {
82             return i;
83         }
84     }
85     return -1;
86 }
87 
88 /**
89  * get_master_node - get the last valid master node allowing for corruption.
90  * @c: UBIFS file-system description object
91  * @lnum: LEB number
92  * @pbuf: buffer containing the LEB read, is returned here
93  * @mst: master node, if found, is returned here
94  * @cor: corruption, if found, is returned here
95  *
96  * This function allocates a buffer, reads the LEB into it, and finds and
97  * returns the last valid master node allowing for one area of corruption.
98  * The corrupt area, if there is one, must be consistent with the assumption
99  * that it is the result of an unclean unmount while the master node was being
100  * written. Under those circumstances, it is valid to use the previously written
101  * master node.
102  *
103  * This function returns %0 on success and a negative error code on failure.
104  */
get_master_node(const struct ubifs_info * c,int lnum,void ** pbuf,struct ubifs_mst_node ** mst,void ** cor)105 static int get_master_node(const struct ubifs_info *c, int lnum, void **pbuf, struct ubifs_mst_node **mst, void **cor)
106 {
107     const int sz = c->mst_node_alsz;
108     int err, offs, len;
109     void *sbuf, *buf;
110 
111     sbuf = vmalloc(c->leb_size);
112     if (!sbuf) {
113         return -ENOMEM;
114     }
115 
116     err = ubifs_leb_read(c, lnum, sbuf, 0, c->leb_size, 0);
117     if (err && err != -EBADMSG) {
118         goto out_free;
119     }
120 
121     /* Find the first position that is definitely not a node */
122     offs = 0;
123     buf = sbuf;
124     len = c->leb_size;
125     while (offs + UBIFS_MST_NODE_SZ <= c->leb_size) {
126         struct ubifs_ch *ch = buf;
127 
128         if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC) {
129             break;
130         }
131         offs += sz;
132         buf += sz;
133         len -= sz;
134     }
135     /* See if there was a valid master node before that */
136     if (offs) {
137         int ret;
138 
139         offs -= sz;
140         buf -= sz;
141         len += sz;
142         ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
143         if (ret != SCANNED_A_NODE && offs) {
144             /* Could have been corruption so check one place back */
145             offs -= sz;
146             buf -= sz;
147             len += sz;
148             ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
149             if (ret != SCANNED_A_NODE) {
150                 /*
151                  * We accept only one area of corruption because
152                  * we are assuming that it was caused while
153                  * trying to write a master node.
154                  */
155                 goto out_err;
156             }
157         }
158         if (ret == SCANNED_A_NODE) {
159             struct ubifs_ch *ch = buf;
160 
161             if (ch->node_type != UBIFS_MST_NODE) {
162                 goto out_err;
163             }
164             dbg_rcvry("found a master node at %d:%d", lnum, offs);
165             *mst = buf;
166             offs += sz;
167             buf += sz;
168             len -= sz;
169         }
170     }
171     /* Check for corruption */
172     if (offs < c->leb_size) {
173         if (!is_empty(buf, min_t(int, len, sz))) {
174             *cor = buf;
175             dbg_rcvry("found corruption at %d:%d", lnum, offs);
176         }
177         offs += sz;
178         buf += sz;
179         len -= sz;
180     }
181     /* Check remaining empty space */
182     if (offs < c->leb_size) {
183         if (!is_empty(buf, len)) {
184             goto out_err;
185         }
186     }
187     *pbuf = sbuf;
188     return 0;
189 
190 out_err:
191     err = -EINVAL;
192 out_free:
193     vfree(sbuf);
194     *mst = NULL;
195     *cor = NULL;
196     return err;
197 }
198 
199 /**
200  * write_rcvrd_mst_node - write recovered master node.
201  * @c: UBIFS file-system description object
202  * @mst: master node
203  *
204  * This function returns %0 on success and a negative error code on failure.
205  */
write_rcvrd_mst_node(struct ubifs_info * c,struct ubifs_mst_node * mst)206 static int write_rcvrd_mst_node(struct ubifs_info *c, struct ubifs_mst_node *mst)
207 {
208     int err = 0, lnum = UBIFS_MST_LNUM, sz = c->mst_node_alsz;
209     __le32 save_flags;
210 
211     dbg_rcvry("recovery");
212 
213     save_flags = mst->flags;
214     mst->flags |= cpu_to_le32(UBIFS_MST_RCVRY);
215 
216     err = ubifs_prepare_node_hmac(c, mst, UBIFS_MST_NODE_SZ, offsetof(struct ubifs_mst_node, hmac), 1);
217     if (err) {
218         goto out;
219     }
220     err = ubifs_leb_change(c, lnum, mst, sz);
221     if (err) {
222         goto out;
223     }
224     err = ubifs_leb_change(c, lnum + 1, mst, sz);
225     if (err) {
226         goto out;
227     }
228 out:
229     mst->flags = save_flags;
230     return err;
231 }
232 
233 /**
234  * ubifs_recover_master_node - recover the master node.
235  * @c: UBIFS file-system description object
236  *
237  * This function recovers the master node from corruption that may occur due to
238  * an unclean unmount.
239  *
240  * This function returns %0 on success and a negative error code on failure.
241  */
ubifs_recover_master_node(struct ubifs_info * c)242 int ubifs_recover_master_node(struct ubifs_info *c)
243 {
244     void *buf1 = NULL, *buf2 = NULL, *cor1 = NULL, *cor2 = NULL;
245     struct ubifs_mst_node *mst1 = NULL, *mst2 = NULL, *mst;
246     const int sz = c->mst_node_alsz;
247     int err, offs1, offs2;
248 
249     dbg_rcvry("recovery");
250 
251     err = get_master_node(c, UBIFS_MST_LNUM, &buf1, &mst1, &cor1);
252     if (err) {
253         goto out_free;
254     }
255 
256     err = get_master_node(c, UBIFS_MST_LNUM + 1, &buf2, &mst2, &cor2);
257     if (err) {
258         goto out_free;
259     }
260 
261     if (mst1) {
262         offs1 = (void *)mst1 - buf1;
263         if ((le32_to_cpu(mst1->flags) & UBIFS_MST_RCVRY) && (offs1 == 0 && !cor1)) {
264             /*
265              * mst1 was written by recovery at offset 0 with no
266              * corruption.
267              */
268             dbg_rcvry("recovery recovery");
269             mst = mst1;
270         } else if (mst2) {
271             offs2 = (void *)mst2 - buf2;
272             if (offs1 == offs2) {
273                 /* Same offset, so must be the same */
274                 if (ubifs_compare_master_node(c, mst1, mst2)) {
275                     goto out_err;
276                 }
277                 mst = mst1;
278             } else if (offs2 + sz == offs1) {
279                 /* 1st LEB was written, 2nd was not */
280                 if (cor1) {
281                     goto out_err;
282                 }
283                 mst = mst1;
284             } else if (offs1 == 0 && c->leb_size - offs2 - sz < sz) {
285                 /* 1st LEB was unmapped and written, 2nd not */
286                 if (cor1) {
287                     goto out_err;
288                 }
289                 mst = mst1;
290             } else {
291                 goto out_err;
292             }
293         } else {
294             /*
295              * 2nd LEB was unmapped and about to be written, so
296              * there must be only one master node in the first LEB
297              * and no corruption.
298              */
299             if (offs1 != 0 || cor1) {
300                 goto out_err;
301             }
302             mst = mst1;
303         }
304     } else {
305         if (!mst2) {
306             goto out_err;
307         }
308         /*
309          * 1st LEB was unmapped and about to be written, so there must
310          * be no room left in 2nd LEB.
311          */
312         offs2 = (void *)mst2 - buf2;
313         if (offs2 + sz + sz <= c->leb_size) {
314             goto out_err;
315         }
316         mst = mst2;
317     }
318 
319     ubifs_msg(c, "recovered master node from LEB %d", (mst == mst1 ? UBIFS_MST_LNUM : UBIFS_MST_LNUM + 1));
320 
321     memcpy(c->mst_node, mst, UBIFS_MST_NODE_SZ);
322 
323     if (c->ro_mount) {
324         /* Read-only mode. Keep a copy for switching to rw mode */
325         c->rcvrd_mst_node = kmalloc(sz, GFP_KERNEL);
326         if (!c->rcvrd_mst_node) {
327             err = -ENOMEM;
328             goto out_free;
329         }
330         memcpy(c->rcvrd_mst_node, c->mst_node, UBIFS_MST_NODE_SZ);
331 
332         /*
333          * We had to recover the master node, which means there was an
334          * unclean reboot. However, it is possible that the master node
335          * is clean at this point, i.e., %UBIFS_MST_DIRTY is not set.
336          * E.g., consider the following chain of events:
337          *
338          * 1. UBIFS was cleanly unmounted, so the master node is clean
339          * 2. UBIFS is being mounted R/W and starts changing the master
340          *    node in the first (%UBIFS_MST_LNUM). A power cut happens,
341          *    so this LEB ends up with some amount of garbage at the
342          *    end.
343          * 3. UBIFS is being mounted R/O. We reach this place and
344          *    recover the master node from the second LEB
345          *    (%UBIFS_MST_LNUM + 1). But we cannot update the media
346          *    because we are being mounted R/O. We have to defer the
347          *    operation.
348          * 4. However, this master node (@c->mst_node) is marked as
349          *    clean (since the step 1). And if we just return, the
350          *    mount code will be confused and won't recover the master
351          *    node when it is re-mounter R/W later.
352          *
353          *    Thus, to force the recovery by marking the master node as
354          *    dirty.
355          */
356         c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
357     } else {
358         /* Write the recovered master node */
359         c->max_sqnum = le64_to_cpu(mst->ch.sqnum) - 1;
360         err = write_rcvrd_mst_node(c, c->mst_node);
361         if (err) {
362             goto out_free;
363         }
364     }
365 
366     vfree(buf2);
367     vfree(buf1);
368 
369     return 0;
370 
371 out_err:
372     err = -EINVAL;
373 out_free:
374     ubifs_err(c, "failed to recover master node");
375     if (mst1) {
376         ubifs_err(c, "dumping first master node");
377         ubifs_dump_node(c, mst1, c->leb_size - ((void *)mst1 - buf1));
378     }
379     if (mst2) {
380         ubifs_err(c, "dumping second master node");
381         ubifs_dump_node(c, mst2, c->leb_size - ((void *)mst2 - buf2));
382     }
383     vfree(buf2);
384     vfree(buf1);
385     return err;
386 }
387 
388 /**
389  * ubifs_write_rcvrd_mst_node - write the recovered master node.
390  * @c: UBIFS file-system description object
391  *
392  * This function writes the master node that was recovered during mounting in
393  * read-only mode and must now be written because we are remounting rw.
394  *
395  * This function returns %0 on success and a negative error code on failure.
396  */
ubifs_write_rcvrd_mst_node(struct ubifs_info * c)397 int ubifs_write_rcvrd_mst_node(struct ubifs_info *c)
398 {
399     int err;
400 
401     if (!c->rcvrd_mst_node) {
402         return 0;
403     }
404     c->rcvrd_mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
405     c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
406     err = write_rcvrd_mst_node(c, c->rcvrd_mst_node);
407     if (err) {
408         return err;
409     }
410     kfree(c->rcvrd_mst_node);
411     c->rcvrd_mst_node = NULL;
412     return 0;
413 }
414 
415 /**
416  * is_last_write - determine if an offset was in the last write to a LEB.
417  * @c: UBIFS file-system description object
418  * @buf: buffer to check
419  * @offs: offset to check
420  *
421  * This function returns %1 if @offs was in the last write to the LEB whose data
422  * is in @buf, otherwise %0 is returned. The determination is made by checking
423  * for subsequent empty space starting from the next @c->max_write_size
424  * boundary.
425  */
is_last_write(const struct ubifs_info * c,void * buf,int offs)426 static int is_last_write(const struct ubifs_info *c, void *buf, int offs)
427 {
428     int empty_offs, check_len;
429     uint8_t *p;
430 
431     /*
432      * Round up to the next @c->max_write_size boundary i.e. @offs is in
433      * the last wbuf written. After that should be empty space.
434      */
435     empty_offs = ALIGN(offs + 1, c->max_write_size);
436     check_len = c->leb_size - empty_offs;
437     p = buf + empty_offs - offs;
438     return is_empty(p, check_len);
439 }
440 
441 /**
442  * clean_buf - clean the data from an LEB sitting in a buffer.
443  * @c: UBIFS file-system description object
444  * @buf: buffer to clean
445  * @lnum: LEB number to clean
446  * @offs: offset from which to clean
447  * @len: length of buffer
448  *
449  * This function pads up to the next min_io_size boundary (if there is one) and
450  * sets empty space to all 0xff. @buf, @offs and @len are updated to the next
451  * @c->min_io_size boundary.
452  */
clean_buf(const struct ubifs_info * c,void ** buf,int lnum,int * offs,int * len)453 static void clean_buf(const struct ubifs_info *c, void **buf, int lnum, int *offs, int *len)
454 {
455     int empty_offs, pad_len;
456 
457     dbg_rcvry("cleaning corruption at %d:%d", lnum, *offs);
458 
459     ubifs_assert(c, !(*offs & RECOVERY_SEVEN));
460     empty_offs = ALIGN(*offs, c->min_io_size);
461     pad_len = empty_offs - *offs;
462     ubifs_pad(c, *buf, pad_len);
463     *offs += pad_len;
464     *buf += pad_len;
465     *len -= pad_len;
466     memset(*buf, 0xff, c->leb_size - empty_offs);
467 }
468 
469 /**
470  * no_more_nodes - determine if there are no more nodes in a buffer.
471  * @c: UBIFS file-system description object
472  * @buf: buffer to check
473  * @len: length of buffer
474  * @lnum: LEB number of the LEB from which @buf was read
475  * @offs: offset from which @buf was read
476  *
477  * This function ensures that the corrupted node at @offs is the last thing
478  * written to a LEB. This function returns %1 if more data is not found and
479  * %0 if more data is found.
480  */
no_more_nodes(const struct ubifs_info * c,void * buf,int len,int lnum,int offs)481 static int no_more_nodes(const struct ubifs_info *c, void *buf, int len, int lnum, int offs)
482 {
483     struct ubifs_ch *ch = buf;
484     int skip, dlen = le32_to_cpu(ch->len);
485 
486     /* Check for empty space after the corrupt node's common header */
487     skip = ALIGN(offs + UBIFS_CH_SZ, c->max_write_size) - offs;
488     if (is_empty(buf + skip, len - skip)) {
489         return 1;
490     }
491     /*
492      * The area after the common header size is not empty, so the common
493      * header must be intact. Check it.
494      */
495     if (ubifs_check_node(c, buf, len, lnum, offs, 1, 0) != -EUCLEAN) {
496         dbg_rcvry("unexpected bad common header at %d:%d", lnum, offs);
497         return 0;
498     }
499     /* Now we know the corrupt node's length we can skip over it */
500     skip = ALIGN(offs + dlen, c->max_write_size) - offs;
501     /* After which there should be empty space */
502     if (is_empty(buf + skip, len - skip)) {
503         return 1;
504     }
505     dbg_rcvry("unexpected data at %d:%d", lnum, offs + skip);
506     return 0;
507 }
508 
509 /**
510  * fix_unclean_leb - fix an unclean LEB.
511  * @c: UBIFS file-system description object
512  * @sleb: scanned LEB information
513  * @start: offset where scan started
514  */
fix_unclean_leb(struct ubifs_info * c,struct ubifs_scan_leb * sleb,int start)515 static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, int start)
516 {
517     int lnum = sleb->lnum, endpt = start;
518 
519     /* Get the end offset of the last node we are keeping */
520     if (!list_empty(&sleb->nodes)) {
521         struct ubifs_scan_node *snod;
522 
523         snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, list);
524         endpt = snod->offs + snod->len;
525     }
526 
527     if (c->ro_mount && !c->remounting_rw) {
528         /* Add to recovery list */
529         struct ubifs_unclean_leb *ucleb;
530 
531         dbg_rcvry("need to fix LEB %d start %d endpt %d", lnum, start, sleb->endpt);
532         ucleb = kzalloc(sizeof(struct ubifs_unclean_leb), GFP_NOFS);
533         if (!ucleb) {
534             return -ENOMEM;
535         }
536         ucleb->lnum = lnum;
537         ucleb->endpt = endpt;
538         list_add_tail(&ucleb->list, &c->unclean_leb_list);
539     } else {
540         /* Write the fixed LEB back to flash */
541         int err;
542 
543         dbg_rcvry("fixing LEB %d start %d endpt %d", lnum, start, sleb->endpt);
544         if (endpt == 0) {
545             err = ubifs_leb_unmap(c, lnum);
546             if (err) {
547                 return err;
548             }
549         } else {
550             int len = ALIGN(endpt, c->min_io_size);
551 
552             if (start) {
553                 err = ubifs_leb_read(c, lnum, sleb->buf, 0, start, 1);
554                 if (err) {
555                     return err;
556                 }
557             }
558             /* Pad to min_io_size */
559             if (len > endpt) {
560                 int pad_len = len - ALIGN(endpt, RECOVERY_EIGHT);
561                 if (pad_len > 0) {
562                     void *buf = sleb->buf + len - pad_len;
563 
564                     ubifs_pad(c, buf, pad_len);
565                 }
566             }
567             err = ubifs_leb_change(c, lnum, sleb->buf, len);
568             if (err) {
569                 return err;
570             }
571         }
572     }
573     return 0;
574 }
575 
576 /**
577  * drop_last_group - drop the last group of nodes.
578  * @sleb: scanned LEB information
579  * @offs: offset of dropped nodes is returned here
580  *
581  * This is a helper function for 'ubifs_recover_leb()' which drops the last
582  * group of nodes of the scanned LEB.
583  */
drop_last_group(struct ubifs_scan_leb * sleb,int * offs)584 static void drop_last_group(struct ubifs_scan_leb *sleb, int *offs)
585 {
586     while (!list_empty(&sleb->nodes)) {
587         struct ubifs_scan_node *snod;
588         struct ubifs_ch *ch;
589 
590         snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, list);
591         ch = snod->node;
592         if (ch->group_type != UBIFS_IN_NODE_GROUP) {
593             break;
594         }
595 
596         dbg_rcvry("dropping grouped node at %d:%d", sleb->lnum, snod->offs);
597         *offs = snod->offs;
598         list_del(&snod->list);
599         kfree(snod);
600         sleb->nodes_cnt -= 1;
601     }
602 }
603 
604 /**
605  * drop_last_node - drop the last node.
606  * @sleb: scanned LEB information
607  * @offs: offset of dropped nodes is returned here
608  *
609  * This is a helper function for 'ubifs_recover_leb()' which drops the last
610  * node of the scanned LEB.
611  */
drop_last_node(struct ubifs_scan_leb * sleb,int * offs)612 static void drop_last_node(struct ubifs_scan_leb *sleb, int *offs)
613 {
614     struct ubifs_scan_node *snod;
615 
616     if (!list_empty(&sleb->nodes)) {
617         snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, list);
618 
619         dbg_rcvry("dropping last node at %d:%d", sleb->lnum, snod->offs);
620         *offs = snod->offs;
621         list_del(&snod->list);
622         kfree(snod);
623         sleb->nodes_cnt -= 1;
624     }
625 }
626 
627 /**
628  * ubifs_recover_leb - scan and recover a LEB.
629  * @c: UBIFS file-system description object
630  * @lnum: LEB number
631  * @offs: offset
632  * @sbuf: LEB-sized buffer to use
633  * @jhead: journal head number this LEB belongs to (%-1 if the LEB does not
634  *         belong to any journal head)
635  *
636  * This function does a scan of a LEB, but caters for errors that might have
637  * been caused by the unclean unmount from which we are attempting to recover.
638  * Returns the scanned information on success and a negative error code on
639  * failure.
640  */
ubifs_recover_leb(struct ubifs_info * c,int lnum,int offs,void * sbuf,int jhead)641 struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf, int jhead)
642 {
643     int ret = 0, err, len = c->leb_size - offs, start = offs, min_io_unit;
644     int grouped = jhead == -1 ? 0 : c->jheads[jhead].grouped;
645     struct ubifs_scan_leb *sleb;
646     void *buf = sbuf + offs;
647 
648     dbg_rcvry("%d:%d, jhead %d, grouped %d", lnum, offs, jhead, grouped);
649 
650     sleb = ubifs_start_scan(c, lnum, offs, sbuf);
651     if (IS_ERR(sleb)) {
652         return sleb;
653     }
654 
655     ubifs_assert(c, len >= RECOVERY_EIGHT);
656     while (len >= RECOVERY_EIGHT) {
657         dbg_scan("look at LEB %d:%d (%d bytes left)", lnum, offs, len);
658 
659         cond_resched();
660 
661         /*
662          * Scan quietly until there is an error from which we cannot
663          * recover
664          */
665         ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
666         if (ret == SCANNED_A_NODE) {
667             /* A valid node, and not a padding node */
668             struct ubifs_ch *ch = buf;
669             int node_len;
670 
671             err = ubifs_add_snod(c, sleb, buf, offs);
672             if (err) {
673                 goto error;
674             }
675             node_len = ALIGN(le32_to_cpu(ch->len), RECOVERY_EIGHT);
676             offs += node_len;
677             buf += node_len;
678             len -= node_len;
679         } else if (ret > 0) {
680             /* Padding bytes or a valid padding node */
681             offs += ret;
682             buf += ret;
683             len -= ret;
684         } else if (ret == SCANNED_A_CORRUPT_NODE) {
685             dbg_rcvry("found corruption (%d) at %d:%d", ret, lnum, offs);
686             if (ubifs_check_node(c, buf, len, lnum, offs, 1, 1) == -EUCLEAN &&
687                 !no_more_nodes(c, buf, len, lnum, offs)) {
688                 int skip;
689                 struct ubifs_ch *ch = buf;
690 
691                 /*
692                  * If the flash voltage power down suddenly in the programming
693                  * process, it may lead to abnormal data written by the flash
694                  * in the low-voltage operation process, and the last data
695                  * should be discarded.
696                  */
697                 ubifs_msg(c, "recovery corrupt node\n");
698                 skip = ALIGN(offs + le32_to_cpu(ch->len), c->max_write_size) - offs;
699                 memset(buf + skip, 0xff, len - skip);
700             }
701 
702             break;
703         } else if (ret == SCANNED_EMPTY_SPACE) {
704             dbg_rcvry("found corruption (%d) at %d:%d", ret, lnum, offs);
705             if (!is_empty(buf, len) && !is_last_write(c, buf, offs)) {
706                 /*
707                  * If the flash voltage power down suddenly in the programming
708                  * process, it may lead to the data was programmed to the wroge
709                  * page written by the flash in the low-voltage operation process,
710                  * and the data should be discarded.
711                  */
712                 ubifs_msg(c, "recovery empty space\n");
713                 memset(buf, 0xff, len);
714             }
715 
716             break;
717         } else if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) {
718             dbg_rcvry("found corruption (%d) at %d:%d", ret, lnum, offs);
719             break;
720         } else {
721             ubifs_err(c, "unexpected return value %d", ret);
722             err = -EINVAL;
723             goto error;
724         }
725     }
726 
727     if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) {
728         if (!is_last_write(c, buf, offs)) {
729             goto corrupted_rescan;
730         }
731     } else if (ret == SCANNED_A_CORRUPT_NODE) {
732         if (!no_more_nodes(c, buf, len, lnum, offs)) {
733             goto corrupted_rescan;
734         }
735     } else if (!is_empty(buf, len)) {
736         if (!is_last_write(c, buf, offs)) {
737             int corruption = first_non_ff(buf, len);
738 
739             /*
740              * See header comment for this file for more
741              * explanations about the reasons we have this check.
742              */
743             ubifs_err(c, "corrupt empty space LEB %d:%d, corruption starts at %d", lnum, offs, corruption);
744             /* Make sure we dump interesting non-0xFF data */
745             offs += corruption;
746             buf += corruption;
747             goto corrupted;
748         }
749     }
750 
751     min_io_unit = round_down(offs, c->min_io_size);
752     if (grouped) {
753         /*
754          * If nodes are grouped, always drop the incomplete group at
755          * the end.
756          */
757         drop_last_group(sleb, &offs);
758     }
759 
760     if (jhead == GCHD) {
761         /*
762          * If this LEB belongs to the GC head then while we are in the
763          * middle of the same min. I/O unit keep dropping nodes. So
764          * basically, what we want is to make sure that the last min.
765          * I/O unit where we saw the corruption is dropped completely
766          * with all the uncorrupted nodes which may possibly sit there.
767          *
768          * In other words, let's name the min. I/O unit where the
769          * corruption starts B, and the previous min. I/O unit A. The
770          * below code tries to deal with a situation when half of B
771          * contains valid nodes or the end of a valid node, and the
772          * second half of B contains corrupted data or garbage. This
773          * means that UBIFS had been writing to B just before the power
774          * cut happened. I do not know how realistic is this scenario
775          * that half of the min. I/O unit had been written successfully
776          * and the other half not, but this is possible in our 'failure
777          * mode emulation' infrastructure at least.
778          *
779          * So what is the problem, why we need to drop those nodes? Why
780          * can't we just clean-up the second half of B by putting a
781          * padding node there? We can, and this works fine with one
782          * exception which was reproduced with power cut emulation
783          * testing and happens extremely rarely.
784          *
785          * Imagine the file-system is full, we run GC which starts
786          * moving valid nodes from LEB X to LEB Y (obviously, LEB Y is
787          * the current GC head LEB). The @c->gc_lnum is -1, which means
788          * that GC will retain LEB X and will try to continue. Imagine
789          * that LEB X is currently the dirtiest LEB, and the amount of
790          * used space in LEB Y is exactly the same as amount of free
791          * space in LEB X.
792          *
793          * And a power cut happens when nodes are moved from LEB X to
794          * LEB Y. We are here trying to recover LEB Y which is the GC
795          * head LEB. We find the min. I/O unit B as described above.
796          * Then we clean-up LEB Y by padding min. I/O unit. And later
797          * 'ubifs_rcvry_gc_commit()' function fails, because it cannot
798          * find a dirty LEB which could be GC'd into LEB Y! Even LEB X
799          * does not match because the amount of valid nodes there does
800          * not fit the free space in LEB Y any more! And this is
801          * because of the padding node which we added to LEB Y. The
802          * user-visible effect of this which I once observed and
803          * analysed is that we cannot mount the file-system with
804          * -ENOSPC error.
805          *
806          * So obviously, to make sure that situation does not happen we
807          * should free min. I/O unit B in LEB Y completely and the last
808          * used min. I/O unit in LEB Y should be A. This is basically
809          * what the below code tries to do.
810          */
811         while (offs > min_io_unit) {
812             drop_last_node(sleb, &offs);
813         }
814     }
815 
816     buf = sbuf + offs;
817     len = c->leb_size - offs;
818 
819     clean_buf(c, &buf, lnum, &offs, &len);
820     ubifs_end_scan(c, sleb, lnum, offs);
821 
822     err = fix_unclean_leb(c, sleb, start);
823     if (err) {
824         goto error;
825     }
826 
827     return sleb;
828 
829 corrupted_rescan:
830     /* Re-scan the corrupted data with verbose messages */
831     ubifs_err(c, "corruption %d", ret);
832     ubifs_scan_a_node(c, buf, len, lnum, offs, 0);
833 corrupted:
834     ubifs_scanned_corruption(c, lnum, offs, buf);
835     err = -EUCLEAN;
836 error:
837     ubifs_err(c, "LEB %d scanning failed", lnum);
838     ubifs_scan_destroy(sleb);
839     return ERR_PTR(err);
840 }
841 
842 /**
843  * get_cs_sqnum - get commit start sequence number.
844  * @c: UBIFS file-system description object
845  * @lnum: LEB number of commit start node
846  * @offs: offset of commit start node
847  * @cs_sqnum: commit start sequence number is returned here
848  *
849  * This function returns %0 on success and a negative error code on failure.
850  */
get_cs_sqnum(struct ubifs_info * c,int lnum,int offs,unsigned long long * cs_sqnum)851 static int get_cs_sqnum(struct ubifs_info *c, int lnum, int offs, unsigned long long *cs_sqnum)
852 {
853     struct ubifs_cs_node *cs_node = NULL;
854     int err, ret;
855 
856     dbg_rcvry("at %d:%d", lnum, offs);
857     cs_node = kmalloc(UBIFS_CS_NODE_SZ, GFP_KERNEL);
858     if (!cs_node) {
859         return -ENOMEM;
860     }
861     if (c->leb_size - offs < UBIFS_CS_NODE_SZ) {
862         goto out_err;
863     }
864     err = ubifs_leb_read(c, lnum, (void *)cs_node, offs, UBIFS_CS_NODE_SZ, 0);
865     if (err && err != -EBADMSG) {
866         goto out_free;
867     }
868     ret = ubifs_scan_a_node(c, cs_node, UBIFS_CS_NODE_SZ, lnum, offs, 0);
869     if (ret != SCANNED_A_NODE) {
870         ubifs_err(c, "Not a valid node");
871         goto out_err;
872     }
873     if (cs_node->ch.node_type != UBIFS_CS_NODE) {
874         ubifs_err(c, "Not a CS node, type is %d", cs_node->ch.node_type);
875         goto out_err;
876     }
877     if (le64_to_cpu(cs_node->cmt_no) != c->cmt_no) {
878         ubifs_err(c, "CS node cmt_no %llu != current cmt_no %llu", (unsigned long long)le64_to_cpu(cs_node->cmt_no),
879                   c->cmt_no);
880         goto out_err;
881     }
882     *cs_sqnum = le64_to_cpu(cs_node->ch.sqnum);
883     dbg_rcvry("commit start sqnum %llu", *cs_sqnum);
884     kfree(cs_node);
885     return 0;
886 
887 out_err:
888     err = -EINVAL;
889 out_free:
890     ubifs_err(c, "failed to get CS sqnum");
891     kfree(cs_node);
892     return err;
893 }
894 
895 /**
896  * ubifs_recover_log_leb - scan and recover a log LEB.
897  * @c: UBIFS file-system description object
898  * @lnum: LEB number
899  * @offs: offset
900  * @sbuf: LEB-sized buffer to use
901  *
902  * This function does a scan of a LEB, but caters for errors that might have
903  * been caused by unclean reboots from which we are attempting to recover
904  * (assume that only the last log LEB can be corrupted by an unclean reboot).
905  *
906  * This function returns %0 on success and a negative error code on failure.
907  */
ubifs_recover_log_leb(struct ubifs_info * c,int lnum,int offs,void * sbuf)908 struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
909 {
910     struct ubifs_scan_leb *sleb;
911     int next_lnum;
912 
913     dbg_rcvry("LEB %d", lnum);
914     next_lnum = lnum + 1;
915     if (next_lnum >= UBIFS_LOG_LNUM + c->log_lebs) {
916         next_lnum = UBIFS_LOG_LNUM;
917     }
918     if (next_lnum != c->ltail_lnum) {
919         /*
920          * We can only recover at the end of the log, so check that the
921          * next log LEB is empty or out of date.
922          */
923         sleb = ubifs_scan(c, next_lnum, 0, sbuf, 0);
924         if (IS_ERR(sleb)) {
925             return sleb;
926         }
927         if (sleb->nodes_cnt) {
928             struct ubifs_scan_node *snod;
929             unsigned long long cs_sqnum = c->cs_sqnum;
930 
931             snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list);
932             if (cs_sqnum == 0) {
933                 int err;
934 
935                 err = get_cs_sqnum(c, lnum, offs, &cs_sqnum);
936                 if (err) {
937                     ubifs_scan_destroy(sleb);
938                     return ERR_PTR(err);
939                 }
940             }
941             if (snod->sqnum > cs_sqnum) {
942                 ubifs_err(c, "unrecoverable log corruption in LEB %d", lnum);
943                 ubifs_scan_destroy(sleb);
944                 return ERR_PTR(-EUCLEAN);
945             }
946         }
947         ubifs_scan_destroy(sleb);
948     }
949     return ubifs_recover_leb(c, lnum, offs, sbuf, -1);
950 }
951 
952 /**
953  * recover_head - recover a head.
954  * @c: UBIFS file-system description object
955  * @lnum: LEB number of head to recover
956  * @offs: offset of head to recover
957  * @sbuf: LEB-sized buffer to use
958  *
959  * This function ensures that there is no data on the flash at a head location.
960  *
961  * This function returns %0 on success and a negative error code on failure.
962  */
recover_head(struct ubifs_info * c,int lnum,int offs,void * sbuf)963 static int recover_head(struct ubifs_info *c, int lnum, int offs, void *sbuf)
964 {
965     int len = c->max_write_size, err;
966 
967     if (offs + len > c->leb_size) {
968         len = c->leb_size - offs;
969     }
970 
971     if (!len) {
972         return 0;
973     }
974 
975     /* Read at the head location and check it is empty flash */
976     err = ubifs_leb_read(c, lnum, sbuf, offs, len, 1);
977     if (err || !is_empty(sbuf, len)) {
978         dbg_rcvry("cleaning head at %d:%d", lnum, offs);
979         if (offs == 0) {
980             return ubifs_leb_unmap(c, lnum);
981         }
982         err = ubifs_leb_read(c, lnum, sbuf, 0, offs, 1);
983         if (err) {
984             return err;
985         }
986         return ubifs_leb_change(c, lnum, sbuf, offs);
987     }
988 
989     return 0;
990 }
991 
992 /**
993  * ubifs_recover_inl_heads - recover index and LPT heads.
994  * @c: UBIFS file-system description object
995  * @sbuf: LEB-sized buffer to use
996  *
997  * This function ensures that there is no data on the flash at the index and
998  * LPT head locations.
999  *
1000  * This deals with the recovery of a half-completed journal commit. UBIFS is
1001  * careful never to overwrite the last version of the index or the LPT. Because
1002  * the index and LPT are wandering trees, data from a half-completed commit will
1003  * not be referenced anywhere in UBIFS. The data will be either in LEBs that are
1004  * assumed to be empty and will be unmapped anyway before use, or in the index
1005  * and LPT heads.
1006  *
1007  * This function returns %0 on success and a negative error code on failure.
1008  */
ubifs_recover_inl_heads(struct ubifs_info * c,void * sbuf)1009 int ubifs_recover_inl_heads(struct ubifs_info *c, void *sbuf)
1010 {
1011     int err;
1012 
1013     ubifs_assert(c, !c->ro_mount || c->remounting_rw);
1014 
1015     dbg_rcvry("checking index head at %d:%d", c->ihead_lnum, c->ihead_offs);
1016     err = recover_head(c, c->ihead_lnum, c->ihead_offs, sbuf);
1017     if (err) {
1018         return err;
1019     }
1020 
1021     dbg_rcvry("checking LPT head at %d:%d", c->nhead_lnum, c->nhead_offs);
1022 
1023     return recover_head(c, c->nhead_lnum, c->nhead_offs, sbuf);
1024 }
1025 
1026 /**
1027  * clean_an_unclean_leb - read and write a LEB to remove corruption.
1028  * @c: UBIFS file-system description object
1029  * @ucleb: unclean LEB information
1030  * @sbuf: LEB-sized buffer to use
1031  *
1032  * This function reads a LEB up to a point pre-determined by the mount recovery,
1033  * checks the nodes, and writes the result back to the flash, thereby cleaning
1034  * off any following corruption, or non-fatal ECC errors.
1035  *
1036  * This function returns %0 on success and a negative error code on failure.
1037  */
clean_an_unclean_leb(struct ubifs_info * c,struct ubifs_unclean_leb * ucleb,void * sbuf)1038 static int clean_an_unclean_leb(struct ubifs_info *c, struct ubifs_unclean_leb *ucleb, void *sbuf)
1039 {
1040     int err, lnum = ucleb->lnum, offs = 0, len = ucleb->endpt, quiet = 1;
1041     void *buf = sbuf;
1042 
1043     dbg_rcvry("LEB %d len %d", lnum, len);
1044 
1045     if (len == 0) {
1046         /* Nothing to read, just unmap it */
1047         return ubifs_leb_unmap(c, lnum);
1048     }
1049 
1050     err = ubifs_leb_read(c, lnum, buf, offs, len, 0);
1051     if (err && err != -EBADMSG) {
1052         return err;
1053     }
1054 
1055     while (len >= RECOVERY_EIGHT) {
1056         int ret;
1057 
1058         cond_resched();
1059 
1060         /* Scan quietly until there is an error */
1061         ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet);
1062         if (ret == SCANNED_A_NODE) {
1063             /* A valid node, and not a padding node */
1064             struct ubifs_ch *ch = buf;
1065             int node_len;
1066 
1067             node_len = ALIGN(le32_to_cpu(ch->len), RECOVERY_EIGHT);
1068             offs += node_len;
1069             buf += node_len;
1070             len -= node_len;
1071             continue;
1072         }
1073 
1074         if (ret > 0) {
1075             /* Padding bytes or a valid padding node */
1076             offs += ret;
1077             buf += ret;
1078             len -= ret;
1079             continue;
1080         }
1081 
1082         if (ret == SCANNED_EMPTY_SPACE) {
1083             ubifs_err(c, "unexpected empty space at %d:%d", lnum, offs);
1084             return -EUCLEAN;
1085         }
1086 
1087         if (quiet) {
1088             /* Redo the last scan but noisily */
1089             quiet = 0;
1090             continue;
1091         }
1092 
1093         ubifs_scanned_corruption(c, lnum, offs, buf);
1094         return -EUCLEAN;
1095     }
1096 
1097     /* Pad to min_io_size */
1098     len = ALIGN(ucleb->endpt, c->min_io_size);
1099     if (len > ucleb->endpt) {
1100         int pad_len = len - ALIGN(ucleb->endpt, RECOVERY_EIGHT);
1101         if (pad_len > 0) {
1102             buf = c->sbuf + len - pad_len;
1103             ubifs_pad(c, buf, pad_len);
1104         }
1105     }
1106 
1107     /* Write back the LEB atomically */
1108     err = ubifs_leb_change(c, lnum, sbuf, len);
1109     if (err) {
1110         return err;
1111     }
1112 
1113     dbg_rcvry("cleaned LEB %d", lnum);
1114 
1115     return 0;
1116 }
1117 
1118 /**
1119  * ubifs_clean_lebs - clean LEBs recovered during read-only mount.
1120  * @c: UBIFS file-system description object
1121  * @sbuf: LEB-sized buffer to use
1122  *
1123  * This function cleans a LEB identified during recovery that needs to be
1124  * written but was not because UBIFS was mounted read-only. This happens when
1125  * remounting to read-write mode.
1126  *
1127  * This function returns %0 on success and a negative error code on failure.
1128  */
ubifs_clean_lebs(struct ubifs_info * c,void * sbuf)1129 int ubifs_clean_lebs(struct ubifs_info *c, void *sbuf)
1130 {
1131     dbg_rcvry("recovery");
1132     while (!list_empty(&c->unclean_leb_list)) {
1133         struct ubifs_unclean_leb *ucleb;
1134         int err;
1135 
1136         ucleb = list_entry(c->unclean_leb_list.next, struct ubifs_unclean_leb, list);
1137         err = clean_an_unclean_leb(c, ucleb, sbuf);
1138         if (err) {
1139             return err;
1140         }
1141         list_del(&ucleb->list);
1142         kfree(ucleb);
1143     }
1144     return 0;
1145 }
1146 
1147 /**
1148  * grab_empty_leb - grab an empty LEB to use as GC LEB and run commit.
1149  * @c: UBIFS file-system description object
1150  *
1151  * This is a helper function for 'ubifs_rcvry_gc_commit()' which grabs an empty
1152  * LEB to be used as GC LEB (@c->gc_lnum), and then runs the commit. Returns
1153  * zero in case of success and a negative error code in case of failure.
1154  */
grab_empty_leb(struct ubifs_info * c)1155 static int grab_empty_leb(struct ubifs_info *c)
1156 {
1157     int lnum, err;
1158 
1159     /*
1160      * Note, it is very important to first search for an empty LEB and then
1161      * run the commit, not vice-versa. The reason is that there might be
1162      * only one empty LEB at the moment, the one which has been the
1163      * @c->gc_lnum just before the power cut happened. During the regular
1164      * UBIFS operation (not now) @c->gc_lnum is marked as "taken", so no
1165      * one but GC can grab it. But at this moment this single empty LEB is
1166      * not marked as taken, so if we run commit - what happens? Right, the
1167      * commit will grab it and write the index there. Remember that the
1168      * index always expands as long as there is free space, and it only
1169      * starts consolidating when we run out of space.
1170      *
1171      * IOW, if we run commit now, we might not be able to find a free LEB
1172      * after this.
1173      */
1174     lnum = ubifs_find_free_leb_for_idx(c);
1175     if (lnum < 0) {
1176         ubifs_err(c, "could not find an empty LEB");
1177         ubifs_dump_lprops(c);
1178         ubifs_dump_budg(c, &c->bi);
1179         return lnum;
1180     }
1181 
1182     /* Reset the index flag */
1183     err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0, LPROPS_INDEX, 0);
1184     if (err) {
1185         return err;
1186     }
1187 
1188     c->gc_lnum = lnum;
1189     dbg_rcvry("found empty LEB %d, run commit", lnum);
1190 
1191     return ubifs_run_commit(c);
1192 }
1193 
1194 /**
1195  * ubifs_rcvry_gc_commit - recover the GC LEB number and run the commit.
1196  * @c: UBIFS file-system description object
1197  *
1198  * Out-of-place garbage collection requires always one empty LEB with which to
1199  * start garbage collection. The LEB number is recorded in c->gc_lnum and is
1200  * written to the master node on unmounting. In the case of an unclean unmount
1201  * the value of gc_lnum recorded in the master node is out of date and cannot
1202  * be used. Instead, recovery must allocate an empty LEB for this purpose.
1203  * However, there may not be enough empty space, in which case it must be
1204  * possible to GC the dirtiest LEB into the GC head LEB.
1205  *
1206  * This function also runs the commit which causes the TNC updates from
1207  * size-recovery and orphans to be written to the flash. That is important to
1208  * ensure correct replay order for subsequent mounts.
1209  *
1210  * This function returns %0 on success and a negative error code on failure.
1211  */
ubifs_rcvry_gc_commit(struct ubifs_info * c)1212 int ubifs_rcvry_gc_commit(struct ubifs_info *c)
1213 {
1214     struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
1215     struct ubifs_lprops lp;
1216     int err;
1217 
1218     dbg_rcvry("GC head LEB %d, offs %d", wbuf->lnum, wbuf->offs);
1219 
1220     c->gc_lnum = -1;
1221     if (wbuf->lnum == -1 || wbuf->offs == c->leb_size) {
1222         return grab_empty_leb(c);
1223     }
1224 
1225     err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, RECOVERY_TWO);
1226     if (err) {
1227         if (err != -ENOSPC) {
1228             return err;
1229         }
1230 
1231         dbg_rcvry("could not find a dirty LEB");
1232         return grab_empty_leb(c);
1233     }
1234 
1235     ubifs_assert(c, !(lp.flags & LPROPS_INDEX));
1236     ubifs_assert(c, lp.free + lp.dirty >= wbuf->offs);
1237 
1238     /*
1239      * We run the commit before garbage collection otherwise subsequent
1240      * mounts will see the GC and orphan deletion in a different order.
1241      */
1242     dbg_rcvry("committing");
1243     err = ubifs_run_commit(c);
1244     if (err) {
1245         return err;
1246     }
1247 
1248     dbg_rcvry("GC'ing LEB %d", lp.lnum);
1249     mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
1250     err = ubifs_garbage_collect_leb(c, &lp);
1251     if (err >= 0) {
1252         int err2 = ubifs_wbuf_sync_nolock(wbuf);
1253         if (err2) {
1254             err = err2;
1255         }
1256     }
1257     mutex_unlock(&wbuf->io_mutex);
1258     if (err < 0) {
1259         ubifs_err(c, "GC failed, error %d", err);
1260         if (err == -EAGAIN) {
1261             err = -EINVAL;
1262         }
1263         return err;
1264     }
1265 
1266     ubifs_assert(c, err == LEB_RETAINED);
1267     if (err != LEB_RETAINED) {
1268         return -EINVAL;
1269     }
1270 
1271     err = ubifs_leb_unmap(c, c->gc_lnum);
1272     if (err) {
1273         return err;
1274     }
1275 
1276     dbg_rcvry("allocated LEB %d for GC", lp.lnum);
1277     return 0;
1278 }
1279 
1280 /**
1281  * struct size_entry - inode size information for recovery.
1282  * @rb: link in the RB-tree of sizes
1283  * @inum: inode number
1284  * @i_size: size on inode
1285  * @d_size: maximum size based on data nodes
1286  * @exists: indicates whether the inode exists
1287  * @inode: inode if pinned in memory awaiting rw mode to fix it
1288  */
1289 struct size_entry {
1290     struct rb_node rb;
1291     ino_t inum;
1292     loff_t i_size;
1293     loff_t d_size;
1294     int exists;
1295     struct inode *inode;
1296 };
1297 
1298 /**
1299  * add_ino - add an entry to the size tree.
1300  * @c: UBIFS file-system description object
1301  * @inum: inode number
1302  * @i_size: size on inode
1303  * @d_size: maximum size based on data nodes
1304  * @exists: indicates whether the inode exists
1305  */
add_ino(struct ubifs_info * c,ino_t inum,loff_t i_size,loff_t d_size,int exists)1306 static int add_ino(struct ubifs_info *c, ino_t inum, loff_t i_size, loff_t d_size, int exists)
1307 {
1308     struct rb_node **p = &c->size_tree.rb_node, *parent = NULL;
1309     struct size_entry *e;
1310 
1311     while (*p) {
1312         parent = *p;
1313         e = rb_entry(parent, struct size_entry, rb);
1314         if (inum < e->inum) {
1315             p = &(*p)->rb_left;
1316         } else {
1317             p = &(*p)->rb_right;
1318         }
1319     }
1320 
1321     e = kzalloc(sizeof(struct size_entry), GFP_KERNEL);
1322     if (!e) {
1323         return -ENOMEM;
1324     }
1325 
1326     e->inum = inum;
1327     e->i_size = i_size;
1328     e->d_size = d_size;
1329     e->exists = exists;
1330 
1331     rb_link_node(&e->rb, parent, p);
1332     rb_insert_color(&e->rb, &c->size_tree);
1333 
1334     return 0;
1335 }
1336 
1337 /**
1338  * find_ino - find an entry on the size tree.
1339  * @c: UBIFS file-system description object
1340  * @inum: inode number
1341  */
find_ino(struct ubifs_info * c,ino_t inum)1342 static struct size_entry *find_ino(struct ubifs_info *c, ino_t inum)
1343 {
1344     struct rb_node *p = c->size_tree.rb_node;
1345     struct size_entry *e;
1346 
1347     while (p) {
1348         e = rb_entry(p, struct size_entry, rb);
1349         if (inum < e->inum) {
1350             p = p->rb_left;
1351         } else if (inum > e->inum) {
1352             p = p->rb_right;
1353         } else {
1354             return e;
1355         }
1356     }
1357     return NULL;
1358 }
1359 
1360 /**
1361  * remove_ino - remove an entry from the size tree.
1362  * @c: UBIFS file-system description object
1363  * @inum: inode number
1364  */
remove_ino(struct ubifs_info * c,ino_t inum)1365 static void remove_ino(struct ubifs_info *c, ino_t inum)
1366 {
1367     struct size_entry *e = find_ino(c, inum);
1368 
1369     if (!e) {
1370         return;
1371     }
1372     rb_erase(&e->rb, &c->size_tree);
1373     kfree(e);
1374 }
1375 
1376 /**
1377  * ubifs_destroy_size_tree - free resources related to the size tree.
1378  * @c: UBIFS file-system description object
1379  */
ubifs_destroy_size_tree(struct ubifs_info * c)1380 void ubifs_destroy_size_tree(struct ubifs_info *c)
1381 {
1382     struct size_entry *e, *n;
1383 
1384     rbtree_postorder_for_each_entry_safe(e, n, &c->size_tree, rb)
1385     {
1386         iput(e->inode);
1387         kfree(e);
1388     }
1389 
1390     c->size_tree = RB_ROOT;
1391 }
1392 
1393 /**
1394  * ubifs_recover_size_accum - accumulate inode sizes for recovery.
1395  * @c: UBIFS file-system description object
1396  * @key: node key
1397  * @deletion: node is for a deletion
1398  * @new_size: inode size
1399  *
1400  * This function has two purposes:
1401  *     1) to ensure there are no data nodes that fall outside the inode size
1402  *     2) to ensure there are no data nodes for inodes that do not exist
1403  * To accomplish those purposes, a rb-tree is constructed containing an entry
1404  * for each inode number in the journal that has not been deleted, and recording
1405  * the size from the inode node, the maximum size of any data node (also altered
1406  * by truncations) and a flag indicating a inode number for which no inode node
1407  * was present in the journal.
1408  *
1409  * Note that there is still the possibility that there are data nodes that have
1410  * been committed that are beyond the inode size, however the only way to find
1411  * them would be to scan the entire index. Alternatively, some provision could
1412  * be made to record the size of inodes at the start of commit, which would seem
1413  * very cumbersome for a scenario that is quite unlikely and the only negative
1414  * consequence of which is wasted space.
1415  *
1416  * This functions returns %0 on success and a negative error code on failure.
1417  */
ubifs_recover_size_accum(struct ubifs_info * c,union ubifs_key * key,int deletion,loff_t new_size)1418 int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key, int deletion, loff_t new_size)
1419 {
1420     ino_t inum = key_inum(c, key);
1421     struct size_entry *e;
1422     int err;
1423 
1424     switch (key_type(c, key)) {
1425         case UBIFS_INO_KEY:
1426             if (deletion) {
1427                 remove_ino(c, inum);
1428             } else {
1429                 e = find_ino(c, inum);
1430                 if (e) {
1431                     e->i_size = new_size;
1432                     e->exists = 1;
1433                 } else {
1434                     err = add_ino(c, inum, new_size, 0, 1);
1435                     if (err) {
1436                         return err;
1437                     }
1438                 }
1439             }
1440             break;
1441         case UBIFS_DATA_KEY:
1442             e = find_ino(c, inum);
1443             if (e) {
1444                 if (new_size > e->d_size) {
1445                     e->d_size = new_size;
1446                 }
1447             } else {
1448                 err = add_ino(c, inum, 0, new_size, 0);
1449                 if (err) {
1450                     return err;
1451                 }
1452             }
1453             break;
1454         case UBIFS_TRUN_KEY:
1455             e = find_ino(c, inum);
1456             if (e) {
1457                 e->d_size = new_size;
1458             }
1459             break;
1460         default:
1461             break;
1462     }
1463     return 0;
1464 }
1465 
1466 /**
1467  * fix_size_in_place - fix inode size in place on flash.
1468  * @c: UBIFS file-system description object
1469  * @e: inode size information for recovery
1470  */
fix_size_in_place(struct ubifs_info * c,struct size_entry * e)1471 static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e)
1472 {
1473     struct ubifs_ino_node *ino = c->sbuf;
1474     unsigned char *p;
1475     union ubifs_key key;
1476     int err, lnum, offs, len;
1477     loff_t i_size;
1478     uint32_t crc;
1479 
1480     /* Locate the inode node LEB number and offset */
1481     ino_key_init(c, &key, e->inum);
1482     err = ubifs_tnc_locate(c, &key, ino, &lnum, &offs);
1483     if (err) {
1484         goto out;
1485     }
1486     /*
1487      * If the size recorded on the inode node is greater than the size that
1488      * was calculated from nodes in the journal then don't change the inode.
1489      */
1490     i_size = le64_to_cpu(ino->size);
1491     if (i_size >= e->d_size) {
1492         return 0;
1493     }
1494     /* Read the LEB */
1495     err = ubifs_leb_read(c, lnum, c->sbuf, 0, c->leb_size, 1);
1496     if (err) {
1497         goto out;
1498     }
1499     /* Change the size field and recalculate the CRC */
1500     ino = c->sbuf + offs;
1501     ino->size = cpu_to_le64(e->d_size);
1502     len = le32_to_cpu(ino->ch.len);
1503     crc = crc32(UBIFS_CRC32_INIT, (void *)ino + RECOVERY_EIGHT, len - RECOVERY_EIGHT);
1504     ino->ch.crc = cpu_to_le32(crc);
1505     /* Work out where data in the LEB ends and free space begins */
1506     p = c->sbuf;
1507     len = c->leb_size - 1;
1508     while (p[len] == 0xff) {
1509         len -= 1;
1510     }
1511     len = ALIGN(len + 1, c->min_io_size);
1512     /* Atomically write the fixed LEB back again */
1513     err = ubifs_leb_change(c, lnum, c->sbuf, len);
1514     if (err) {
1515         goto out;
1516     }
1517     dbg_rcvry("inode %lu at %d:%d size %lld -> %lld", (unsigned long)e->inum, lnum, offs, i_size, e->d_size);
1518     return 0;
1519 
1520 out:
1521     ubifs_warn(c, "inode %lu failed to fix size %lld -> %lld error %d", (unsigned long)e->inum, e->i_size, e->d_size,
1522                err);
1523     return err;
1524 }
1525 
1526 /**
1527  * inode_fix_size - fix inode size
1528  * @c: UBIFS file-system description object
1529  * @e: inode size information for recovery
1530  */
inode_fix_size(struct ubifs_info * c,struct size_entry * e)1531 static int inode_fix_size(struct ubifs_info *c, struct size_entry *e)
1532 {
1533     struct inode *inode;
1534     struct ubifs_inode *ui;
1535     int err;
1536 
1537     if (c->ro_mount) {
1538         ubifs_assert(c, !e->inode);
1539     }
1540 
1541     if (e->inode) {
1542         /* Remounting rw, pick up inode we stored earlier */
1543         inode = e->inode;
1544     } else {
1545         inode = ubifs_iget(c->vfs_sb, e->inum);
1546         if (IS_ERR(inode)) {
1547             return PTR_ERR(inode);
1548         }
1549 
1550         if (inode->i_size >= e->d_size) {
1551             /*
1552              * The original inode in the index already has a size
1553              * big enough, nothing to do
1554              */
1555             iput(inode);
1556             return 0;
1557         }
1558 
1559         dbg_rcvry("ino %lu size %lld -> %lld", (unsigned long)e->inum, inode->i_size, e->d_size);
1560 
1561         ui = ubifs_inode(inode);
1562 
1563         inode->i_size = e->d_size;
1564         ui->ui_size = e->d_size;
1565         ui->synced_i_size = e->d_size;
1566 
1567         e->inode = inode;
1568     }
1569 
1570     /*
1571      * In readonly mode just keep the inode pinned in memory until we go
1572      * readwrite. In readwrite mode write the inode to the journal with the
1573      * fixed size.
1574      */
1575     if (c->ro_mount) {
1576         return 0;
1577     }
1578 
1579     err = ubifs_jnl_write_inode(c, inode);
1580 
1581     iput(inode);
1582 
1583     if (err) {
1584         return err;
1585     }
1586 
1587     rb_erase(&e->rb, &c->size_tree);
1588     kfree(e);
1589 
1590     return 0;
1591 }
1592 
1593 /**
1594  * ubifs_recover_size - recover inode size.
1595  * @c: UBIFS file-system description object
1596  * @in_place: If true, do a in-place size fixup
1597  *
1598  * This function attempts to fix inode size discrepancies identified by the
1599  * 'ubifs_recover_size_accum()' function.
1600  *
1601  * This functions returns %0 on success and a negative error code on failure.
1602  */
ubifs_recover_size(struct ubifs_info * c,bool in_place)1603 int ubifs_recover_size(struct ubifs_info *c, bool in_place)
1604 {
1605     struct rb_node *this = rb_first(&c->size_tree);
1606 
1607     while (this) {
1608         struct size_entry *e;
1609         int err;
1610 
1611         e = rb_entry(this, struct size_entry, rb);
1612 
1613         this = rb_next(this);
1614 
1615         if (!e->exists) {
1616             union ubifs_key key;
1617 
1618             ino_key_init(c, &key, e->inum);
1619             err = ubifs_tnc_lookup(c, &key, c->sbuf);
1620             if (err && err != -ENOENT) {
1621                 return err;
1622             }
1623             if (err == -ENOENT) {
1624                 /* Remove data nodes that have no inode */
1625                 dbg_rcvry("removing ino %lu", (unsigned long)e->inum);
1626                 err = ubifs_tnc_remove_ino(c, e->inum);
1627                 if (err) {
1628                     return err;
1629                 }
1630             } else {
1631                 struct ubifs_ino_node *ino = c->sbuf;
1632 
1633                 e->exists = 1;
1634                 e->i_size = le64_to_cpu(ino->size);
1635             }
1636         }
1637 
1638         if (e->exists && e->i_size < e->d_size) {
1639             ubifs_assert(c, !(c->ro_mount && in_place));
1640 
1641             /*
1642              * We found data that is outside the found inode size,
1643              * fixup the inode size
1644              */
1645 
1646             if (in_place) {
1647                 err = fix_size_in_place(c, e);
1648                 if (err) {
1649                     return err;
1650                 }
1651                 iput(e->inode);
1652             } else {
1653                 err = inode_fix_size(c, e);
1654                 if (err) {
1655                     return err;
1656                 }
1657                 continue;
1658             }
1659         }
1660 
1661         rb_erase(&e->rb, &c->size_tree);
1662         kfree(e);
1663     }
1664 
1665     return 0;
1666 }
1667