• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2015-2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <assert.h>
18 #include <inttypes.h>
19 #include <lk/compiler.h>
20 #include <stdbool.h>
21 #include <stdint.h>
22 #include <stdio.h>
23 #include <string.h>
24 
25 #ifndef LOCAL_TRACE
26 #define LOCAL_TRACE TRACE_LEVEL_INIT
27 #endif
28 #ifndef LOCAL_TRACE_ERR
29 #define LOCAL_TRACE_ERR TRACE_LEVEL_INIT
30 #endif
31 
32 #include "array.h"
33 #include "block_allocator.h"
34 #include "block_cache.h"
35 #include "block_set.h"
36 #include "checkpoint.h"
37 #include "debug.h"
38 #include "error_reporting.h"
39 #include "file.h"
40 #include "fs.h"
41 #include "transaction.h"
42 
43 #define SUPER_BLOCK_MAGIC (0x0073797473757274ULL) /* trustys */
44 #define SUPER_BLOCK_FLAGS_VERSION_MASK (0x3U)
45 #define SUPER_BLOCK_FLAGS_BLOCK_INDEX_MASK (0x1U)
46 #define SUPER_BLOCK_FLAGS_EMPTY (0x4U)
47 #define SUPER_BLOCK_FLAGS_ALTERNATE (0x8U)
48 #define SUPER_BLOCK_FLAGS_SUPPORTED_MASK (0xfU)
49 #define SUPER_BLOCK_FS_VERSION (0U)
50 
51 /**
52  * typedef super_block_opt_flags8_t - Optional flags, can be ORed together
53  *
54  * %SUPER_BLOCK_OPT_FLAGS_HAS_FLAGS3
55  *   Indicates that the superblock has additional data after flags2 and that
56  *   flags3 should be set to the same value as flags
57  * %SUPER_BLOCK_OPT_FLAGS_HAS_CHECKPOINT
58  *   Indicates that the superblock contains the @checkpoint field
59  * %SUPER_BLOCK_OPT_FLAGS_NEEDS_FULL_SCAN
60  *   An error was detected in this file system, a full scan and possibly repair
61  *   should be initiated on the next mount. Reset after scanning.
62  */
63 typedef uint8_t super_block_opt_flags8_t;
64 #define SUPER_BLOCK_OPT_FLAGS_HAS_FLAGS3 (0x1U)
65 #define SUPER_BLOCK_OPT_FLAGS_HAS_CHECKPOINT (0x2U)
66 #define SUPER_BLOCK_OPT_FLAGS_NEEDS_FULL_SCAN (0x4U)
67 
68 /**
69  * typedef super_block_required_flags16_t - Required FS flags, can be ORed
70  *                                          together
71  *
72  * These flags are required to be supported by the current implementation; if
73  * any unrecognized flag bits are set the file system must not be mounted.
74  * Versions of the storage service prior to the addition of the @required_flags
75  * field will interpret non-zero flags as a high @fs_version and will refuse to
76  * mount the file-system.
77  *
78  * %SUPER_BLOCK_REQUIRED_FLAGS_MAIN_REPAIRED
79  *   Indicates that the main (i.e. flags does not contain
80  *   %SUPER_BLOCK_FLAGS_ALTERNATE) file system has been repaired in a manner
81  *   that effectively resulted in rollback to a previous state since it was last
82  *   cleared. This flag is required to be supported, if set, so that we do not
83  *   discard a repaired state by running an older version of the storage
84  *   service. This flag is cleared when the main file system is cleared, and
85  *   therefore only tracks repairs since the file system was last cleared.
86  * %SUPER_BLOCK_REQUIRED_FLAGS_MASK
87  *   Mask of bits that are understood by the current storage implementation. If
88  *   any bits of this field are set outside of this mask, do not mount the file
89  *   system.
90  */
91 typedef uint16_t super_block_required_flags16_t;
92 #define SUPER_BLOCK_REQUIRED_FLAGS_MAIN_REPAIRED (0x1U)
93 #define SUPER_BLOCK_REQUIRED_FLAGS_MASK \
94     (SUPER_BLOCK_REQUIRED_FLAGS_MAIN_REPAIRED)
95 
96 /**
97  * struct super_block - On-disk root block for file system state
98  * @iv:             Initial value used for encrypt/decrypt.
99  * @magic:          SUPER_BLOCK_MAGIC.
100  * @flags:          Version in bottom two bits, other bits are reserved.
101  * @fs_version:     Required file system version. If greater than
102  *                  %SUPER_BLOCK_FS_VERSION, do not mount or overwrite
103  *                  filesystem.
104  * @required_flags: Required file system flags. To mount this file system, any
105  *                  non-zero flag bits set must be supported by the storage
106  *                  implementation.
107  * @block_size:     Block size of file system.
108  * @block_num_size: Number of bytes used to store block numbers.
109  * @mac_size:       number of bytes used to store mac values.
110  * @opt_flags:      Optional flags, any of &typedef super_block_opt_flags8_t
111  *                  ORed together.
112  * @res2:           Reserved for future use. Write 0, read ignore.
113  * @block_count:    Size of file system.
114  * @free:           Block and mac of free set root node.
115  * @free_count:     Currently unused.
116  * @files:          Block and mac of files tree root node.
117  * @res3:           Reserved for future use. Write 0, read ignore.
118  * @flags2:         Copy of @flags. Allows storing the super-block in a device
119  *                  that does not support an atomic write of the entire
120  *                  super-block.
121  * @backup:         Backup of previous super-block, used to support an alternate
122  *                  backing store. 0 if no backup has ever been written. Once a
123  *                  backup exists, it will only ever be swapped, not cleared.
124  * @checkpoint:     Block and mac of checkpoint metadata block. 0 if a
125  *                  checkpoint does not exist.
126  * @res4:           Reserved for future use. Write 0, read ignore.
127  * @flags3:         Copy of @flags. Allows storing the super-block in a device
128  *                  that does not support an atomic write of the entire
129  *                  super-block. If SUPER_BLOCK_OPT_FLAGS_HAS_FLAGS3 is not set,
130  *                  @flags3 is not checked and fields after @flags2 are ignored.
131  *
132  * Block numbers and macs in @free and @files are packed as indicated by
133  * @block_num_size and @mac_size, but unlike other on-disk data, the size of the
134  * whole field is always the full 24 bytes needed for a 8 byte block number and
135  * 16 byte mac This allows the @flags2 and @flags3 to be validated before
136  * knowing @block_num_size and @mac_size.
137  */
138 struct super_block {
139     struct iv iv;
140     uint64_t magic;
141     uint32_t flags;
142     uint16_t fs_version;
143     super_block_required_flags16_t required_flags;
144     uint32_t block_size;
145     uint8_t block_num_size;
146     uint8_t mac_size;
147     super_block_opt_flags8_t opt_flags;
148     uint8_t res2;
149     data_block_t block_count;
150     struct block_mac free;
151     data_block_t free_count;
152     struct block_mac files;
153     uint32_t res3[5];
154     uint32_t flags2;
155     struct super_block_backup backup;
156     struct block_mac checkpoint;
157     uint32_t res4[6];
158     uint32_t flags3;
159 };
160 STATIC_ASSERT(offsetof(struct super_block, flags2) == 124);
161 STATIC_ASSERT(offsetof(struct super_block, flags3) == 252);
162 STATIC_ASSERT(sizeof(struct super_block) == 256);
163 
164 /*
165  * We rely on these offsets in future_fs_version_test and
166  * unknown_required_flags_test in the storage_block_test to test that we will
167  * not mount or modify a super block with unknown version or fs flags.
168  */
169 STATIC_ASSERT(offsetof(struct super_block, fs_version) == 28);
170 STATIC_ASSERT(offsetof(struct super_block, required_flags) == 30);
171 
172 /* block_device_tipc.c ensures that we have at least 256 bytes in RPMB blocks */
173 STATIC_ASSERT(sizeof(struct super_block) <= 256);
174 
175 static struct list_node fs_list = LIST_INITIAL_VALUE(fs_list);
176 
177 /**
178  * update_super_block_internal - Generate and write superblock
179  * @tr:         Transaction object.
180  * @free:       New free root.
181  * @files:      New files root.
182  * @checkpoint: New checkpoint metadata block.
183  * @pinned:     New block should not be reused in the block cache until
184  *              it is successfully written.
185  *
186  * Return: %true if super block was updated (in cache), %false if transaction
187  * failed before super block was updated.
188  */
update_super_block_internal(struct transaction * tr,const struct block_mac * free,const struct block_mac * files,const struct block_mac * checkpoint,bool pinned)189 static bool update_super_block_internal(struct transaction* tr,
190                                         const struct block_mac* free,
191                                         const struct block_mac* files,
192                                         const struct block_mac* checkpoint,
193                                         bool pinned) {
194     struct super_block* super_rw;
195     struct obj_ref super_ref = OBJ_REF_INITIAL_VALUE(super_ref);
196     unsigned int ver;
197     unsigned int index;
198     super_block_required_flags16_t required_flags = 0;
199     uint32_t flags;
200     uint32_t block_size = tr->fs->super_dev->block_size;
201     super_block_opt_flags8_t opt_flags = SUPER_BLOCK_OPT_FLAGS_HAS_FLAGS3 |
202                                          SUPER_BLOCK_OPT_FLAGS_HAS_CHECKPOINT;
203 
204     if (!tr->fs->writable) {
205         pr_err("Attempting to write superblock for read-only filesystem\n");
206         if (!tr->failed) {
207             transaction_fail(tr);
208         }
209         return false;
210     }
211 
212     assert(block_size >= sizeof(struct super_block));
213     assert(tr->fs->initial_super_block_tr == NULL ||
214            tr->fs->initial_super_block_tr == tr);
215 
216     ver = (tr->fs->super_block_version + 1) & SUPER_BLOCK_FLAGS_VERSION_MASK;
217     index = ver & SUPER_BLOCK_FLAGS_BLOCK_INDEX_MASK;
218     flags = ver;
219     if (!free && !files) {
220         /*
221          * If the free and files trees are not provided, the filesystem is in
222          * the initial empty state.
223          */
224         flags |= SUPER_BLOCK_FLAGS_EMPTY;
225     } else {
226         /* Non-empty filesystems must have both trees (with root node blocks) */
227         assert(free);
228         assert(files);
229     }
230     if (tr->fs->alternate_data) {
231         flags |= SUPER_BLOCK_FLAGS_ALTERNATE;
232     }
233     if (tr->repaired || tr->fs->main_repaired) {
234         /*
235          * We don't track repairs in alternate data mode, so we shouldn't do
236          * them - ensure the transaction does not include a repair if we are in
237          * alternate state. The FS flag is used to persist the state for the
238          * main FS.
239          */
240         assert(!tr->repaired || !tr->fs->alternate_data);
241         required_flags |= SUPER_BLOCK_REQUIRED_FLAGS_MAIN_REPAIRED;
242         /*
243          * TODO: We would like to track the number of repairs in addition to the
244          * current repair state. This may be up to three different counters: 1)
245          * the number of times this fs has been repaired over the device
246          * lifetime to report in metrics, 2) the number of repairs since last
247          * clear, and 3) the overall fs generation count (number of device
248          * lifetime repairs+clears). 2) and 3) would primarily be useful if we
249          * expose them to clients via a new query API, while 1) would mostly be
250          * for device metrics. We can implement some or all of these counters
251          * when we add an API that consumes them.
252          */
253     }
254     if (tr->fs->needs_full_scan) {
255         opt_flags |= SUPER_BLOCK_OPT_FLAGS_NEEDS_FULL_SCAN;
256     }
257 
258     pr_write("write super block %" PRIu64 ", ver %d\n",
259              tr->fs->super_block[index], ver);
260 
261     super_rw = block_get_cleared_super(tr, tr->fs->super_block[index],
262                                        &super_ref, pinned);
263     if (tr->failed) {
264         block_put_dirty_discard(super_rw, &super_ref);
265         return false;
266     }
267     super_rw->magic = SUPER_BLOCK_MAGIC;
268     super_rw->flags = flags;
269     /* TODO: keep existing fs version when possible */
270     super_rw->fs_version = SUPER_BLOCK_FS_VERSION;
271     super_rw->required_flags = required_flags;
272     super_rw->block_size = tr->fs->dev->block_size;
273     super_rw->block_num_size = tr->fs->block_num_size;
274     super_rw->mac_size = tr->fs->mac_size;
275     super_rw->opt_flags = opt_flags;
276     super_rw->block_count = tr->fs->dev->block_count;
277     if (free) {
278         super_rw->free = *free;
279     }
280     super_rw->free_count = 0; /* TODO: remove or update */
281     if (files) {
282         super_rw->files = *files;
283     }
284     if (checkpoint) {
285         super_rw->checkpoint = *checkpoint;
286     }
287     super_rw->flags2 = flags;
288     super_rw->backup = tr->fs->backup;
289     super_rw->flags3 = flags;
290     tr->fs->written_super_block_version = ver;
291 
292     block_put_dirty_no_mac(super_rw, &super_ref, tr->fs->allow_tampering);
293 
294     return true;
295 }
296 
297 /**
298  * update_super_block - Generate and write superblock
299  * @tr:         Transaction object.
300  * @free:       New free root.
301  * @files:      New files root.
302  * @checkpoint: New checkpoint metadata block.
303  *
304  * Return: %true if super block was updated (in cache), %false if transaction
305  * failed before super block was updated.
306  */
update_super_block(struct transaction * tr,const struct block_mac * free,const struct block_mac * files,const struct block_mac * checkpoint)307 bool update_super_block(struct transaction* tr,
308                         const struct block_mac* free,
309                         const struct block_mac* files,
310                         const struct block_mac* checkpoint) {
311     return update_super_block_internal(tr, free, files, checkpoint, false);
312 }
313 
314 /**
315  * write_initial_super_block - Write initial superblock to internal transaction
316  * @fs:         File system state object.
317  *
318  * When needed, this must be called before creating any other transactions on
319  * this filesystem so we don't fill up the cache with entries that can't be
320  * flushed to make room for this block.
321  *
322  * Return: %true if the initial empty superblock was successfully written to the
323  * cache, or %false otherwise.
324  */
write_initial_super_block(struct fs * fs)325 static bool write_initial_super_block(struct fs* fs) {
326     struct transaction* tr;
327     tr = calloc(1, sizeof(*tr));
328     if (!tr) {
329         return false;
330     }
331     fs->initial_super_block_tr = tr;
332 
333     transaction_init(tr, fs, true);
334     return update_super_block_internal(tr, NULL, NULL, NULL, true);
335 }
336 
337 /**
338  * write_current_super_block - Write current superblock to internal transaction
339  * @fs:           File system state object.
340  * @reinitialize: Allow the special transaction to be reinitialized if it has
341  *                failed
342  *
343  * Write the current state of the super block to an internal transaction that
344  * will be written before any other block. This can be used to re-sync the
345  * in-memory fs-state with the on-disk state after detecting a write failure
346  * where no longer know the on-disk super block state.
347  *
348  * @fs must be writable when calling this function.
349  */
write_current_super_block(struct fs * fs,bool reinitialize)350 void write_current_super_block(struct fs* fs, bool reinitialize) {
351     bool super_block_updated;
352     struct transaction* tr;
353 
354     assert(fs->writable);
355 
356     if (fs->initial_super_block_tr) {
357         /*
358          * If initial_super_block_tr is already pending and not failed there is
359          * no need to allocate a new one so return early.
360          *
361          * If the special transaction has failed, we need to re-initialize it so
362          * that we can attempt to recover to a good state.
363          *
364          * We are only allowed to reinitialze if the @reinitialize parameter is
365          * true. We don't want to allow reinitialization while cleaning blocks
366          * (i.e. via fs_unknown_super_block_state_all()), as this would reset
367          * the special transaction to non-failed state and create a situation
368          * where transaction_initial_super_block_complete() cannot know if it
369          * successfully flushed the special transaction to disk. Therefore we
370          * only allow transaction_initial_super_block_complete() to reinitialize
371          * a failed special transaction after it attempts and fails to write the
372          * block to disk.
373          *
374          * Since we pin special superblock entries in the block cache and
375          * therefore cannot evict them with normal transactions,
376          * transaction_initial_super_block_complete() is the only place we can
377          * attempt a special transaction write, and if it fails the transaction
378          * is immediately reinitialized. Therefore we should only ever be in a
379          * failed state if reinitialize is true (i.e. we are being called from
380          * transaction_initial_super_block_complete()).
381          */
382 
383         assert(reinitialize || !fs->initial_super_block_tr->failed);
384         if (!fs->initial_super_block_tr->failed || !reinitialize) {
385             return;
386         }
387 
388         tr = fs->initial_super_block_tr;
389         transaction_activate(tr);
390     } else {
391         tr = calloc(1, sizeof(*tr));
392         if (!tr) {
393             /* Not safe to proceed. TODO: add flag to defer this allocation? */
394             abort();
395         }
396         transaction_init(tr, fs, true);
397         fs->initial_super_block_tr = tr;
398     }
399 
400     /*
401      * Until the filesystem contains committed data, fs->free.block_tree.root
402      * will be zero, i.e. an invalid block mac. fs->free.block_tree.root is only
403      * updated in transaction_complete() after successfully writing a new
404      * superblock. If the filesystem is empty, we need to emit a cleared
405      * superblock with a special flag to prevent the superblock state from
406      * getting out of sync with the filesystem data if a reboot occurrs before
407      * committing a superblock with data.
408      *
409      * We can't use fs->files.root here because it may be invalid if there are
410      * no files in the filesystem. If the free node is zero, then the files node
411      * must be as well, so we assert this.
412      */
413     bool fs_is_cleared = !block_mac_valid(tr, &fs->free.block_tree.root);
414     if (fs_is_cleared) {
415         assert(!block_mac_valid(tr, &fs->files.root));
416         super_block_updated =
417                 update_super_block_internal(tr, NULL, NULL, NULL, true);
418     } else {
419         super_block_updated = update_super_block_internal(
420                 tr, &fs->free.block_tree.root, &fs->files.root, &fs->checkpoint,
421                 true);
422     }
423     if (!super_block_updated) {
424         /* Not safe to proceed. TODO: add flag to try again? */
425         fprintf(stderr,
426                 "Could not create pending write for current superblock state. "
427                 "Not safe to proceed.\n");
428         abort();
429     }
430 }
431 
432 /**
433  * fs_mark_scan_required - Require a full scan for invalid blocks the next time
434  *                         this FS is mounted
435  * @fs:             File system object
436  *
437  * Marks the file system to require a full scan (and possibly repair) on the
438  * next mount. If @fs is writable, this function immediately writes a new copy
439  * of the current super block, so the flag will persist even with no further
440  * writes to the file system.
441  */
fs_mark_scan_required(struct fs * fs)442 void fs_mark_scan_required(struct fs* fs) {
443     fs->needs_full_scan = true;
444     if (!fs->writable) {
445         /* We can't write back the superblock until this FS is writable. */
446         return;
447     }
448     write_current_super_block(fs, false);
449     assert(fs->initial_super_block_tr);
450     transaction_initial_super_block_complete(fs->initial_super_block_tr);
451 }
452 
453 /**
454  * super_block_valid - Check if superblock is valid
455  * @dev:        Block device that supoer block was read from.
456  * @super:      Super block data.
457  *
458  * Return: %true if @super is valid for @dev, %false otherwise.
459  */
super_block_valid(const struct block_device * dev,const struct super_block * super)460 static bool super_block_valid(const struct block_device* dev,
461                               const struct super_block* super) {
462     if (super->magic != SUPER_BLOCK_MAGIC) {
463         pr_init("bad magic, 0x%" PRIx64 "\n", super->magic);
464         return false;
465     }
466     if (super->flags != super->flags2) {
467         pr_warn("flags, 0x%x, does not match flags2, 0x%x\n", super->flags,
468                 super->flags2);
469         return false;
470     }
471     if ((super->opt_flags & SUPER_BLOCK_OPT_FLAGS_HAS_FLAGS3) &&
472         super->flags != super->flags3) {
473         pr_warn("flags, 0x%x, does not match flags3, 0x%x\n", super->flags,
474                 super->flags3);
475         return false;
476     }
477     if (super->fs_version > SUPER_BLOCK_FS_VERSION) {
478         pr_warn("super block is from the future: 0x%x\n", super->fs_version);
479         return true;
480     }
481     if (super->flags & ~SUPER_BLOCK_FLAGS_SUPPORTED_MASK) {
482         pr_warn("unknown flags set, 0x%x\n", super->flags);
483         return false;
484     }
485     if (super->block_size != dev->block_size) {
486         pr_warn("bad block size 0x%x, expected 0x%zx\n", super->block_size,
487                 dev->block_size);
488         return false;
489     }
490     if (super->block_num_size != dev->block_num_size) {
491         pr_warn("invalid block_num_size %d, expected %zd\n",
492                 super->block_num_size, dev->block_num_size);
493         return false;
494     }
495     if (super->mac_size != dev->mac_size) {
496         pr_warn("invalid mac_size %d, expected %zd\n", super->mac_size,
497                 dev->mac_size);
498         return false;
499     }
500     if (!dev->tamper_detecting && super->mac_size != sizeof(struct mac)) {
501         pr_warn("invalid mac_size %d != %zd\n", super->mac_size,
502                 sizeof(data_block_t));
503         return false;
504     }
505 
506     return true;
507 }
508 
509 /**
510  * super_version_delta - Find the version delta between two superblocks
511  * @new_super: Candidate new superblock
512  * @old_super: Old superblock
513  *
514  * The overflow in this function is intentional as a way to use a wrapping
515  * two-bit counter.
516  *
517  * Return: Wrapped difference between the two bit version numbers in the two
518  * superblocks. This will be 1 when new is newer than old, 3 when old is
519  * newer than new, and any other number indicates an invalid/corrupt version.
520  */
521 __attribute__((no_sanitize("unsigned-integer-overflow"))) static inline uint8_t
super_version_delta(const struct super_block * new_super,const struct super_block * old_super)522 super_version_delta(const struct super_block* new_super,
523                     const struct super_block* old_super) {
524     return (new_super->flags - old_super->flags) &
525            SUPER_BLOCK_FLAGS_VERSION_MASK;
526 }
527 
528 /**
529  * use_new_super - Check if new superblock is valid and more recent than old
530  * @dev:                Block device that super block was read from.
531  * @new_super:          New super block data.
532  * @new_super_index:    Index that @new_super was read from.
533  * @old_super:          Old super block data, or %NULL.
534  *
535  * Return: %true if @new_super is valid for @dev, and more recent than
536  * @old_super (or @old_super is %NULL), %false otherwise.
537  */
use_new_super(const struct block_device * dev,const struct super_block * new_super,unsigned int new_super_index,const struct super_block * old_super)538 static bool use_new_super(const struct block_device* dev,
539                           const struct super_block* new_super,
540                           unsigned int new_super_index,
541                           const struct super_block* old_super) {
542     uint8_t dv;
543     if (!super_block_valid(dev, new_super)) {
544         return false;
545     }
546     if ((new_super->flags & SUPER_BLOCK_FLAGS_BLOCK_INDEX_MASK) !=
547         new_super_index) {
548         pr_warn("block index, 0x%x, does not match flags, 0x%x\n",
549                 new_super_index, new_super->flags);
550         return false;
551     }
552     if (!old_super) {
553         return true;
554     }
555     dv = super_version_delta(new_super, old_super);
556     pr_read("version delta, %d (new flags 0x%x, old flags 0x%x)\n", dv,
557             new_super->flags, old_super->flags);
558     if (dv == 1) {
559         return true;
560     }
561     if (dv == 3) {
562         return false;
563     }
564     pr_warn("bad version delta, %d (new flags 0x%x, old flags 0x%x)\n", dv,
565             new_super->flags, old_super->flags);
566     return false;
567 }
568 
569 static void fs_init_free_set(struct fs* fs, struct block_set* set);
570 
571 /**
572  * fs_set_roots - Initialize fs state from super block roots
573  * @fs:                File system state object
574  * @free:              Free set root node
575  * @files:             Files tree root node
576  * @checkpoint:        Checkpoint metadata block. May be NULL.
577  * @restore_checkpoint: If %true, restore files and free roots from @checkpoint
578  *                      (which must not be NULL).
579  *
580  * Unconditionally sets the filesystem roots to @free and @files respectively,
581  * then attempts to restore the checkpoint roots if @restore_checkpoint is
582  * %true. When attempting to restore from a checkpoint that exists but is not
583  * readable, return %false, leaving the filesystem roots initialized to @free
584  * and @files. If attempting to restore from checkpoint but no checkpoint was
585  * previously set, this function will clear the filesystem.
586  *
587  * Returns %true if fs roots were correctly initialized as requested, %false if
588  * a requested checkpoint restore failed (but roots were still initialized to
589  * the provided blocks).
590  */
fs_set_roots(struct fs * fs,const struct block_mac * free,const struct block_mac * files,const struct block_mac * checkpoint,bool restore_checkpoint)591 static bool fs_set_roots(struct fs* fs,
592                          const struct block_mac* free,
593                          const struct block_mac* files,
594                          const struct block_mac* checkpoint,
595                          bool restore_checkpoint) {
596     bool success = true;
597     struct transaction tr;
598     struct block_tree checkpoint_files =
599             BLOCK_TREE_INITIAL_VALUE(checkpoint_files);
600 
601     assert(!restore_checkpoint || checkpoint);
602 
603     fs->free.block_tree.root = *free;
604     fs->files.root = *files;
605 
606     if (checkpoint) {
607         fs->checkpoint = *checkpoint;
608         transaction_init(&tr, fs, true);
609 
610         /*
611          * fs->checkpoint_free is initialized to contain all blocks, so we
612          * don't have to initialize it if there is no checkpoint on disk
613          */
614         assert(!block_range_empty(fs->checkpoint_free.initial_range));
615 
616         if (block_mac_valid(&tr, &fs->checkpoint)) {
617             success = checkpoint_read(&tr, &fs->checkpoint, &checkpoint_files,
618                                       &fs->checkpoint_free);
619         } else if (restore_checkpoint) {
620             /* We do not want to restore a non-existent checkpoint */
621             success = false;
622         }
623         if (success && restore_checkpoint) {
624             /*
625              * Checkpoint restore counts as a repair which must set the repaired
626              * flag. We disallow checkpoint restore in alternate mode in
627              * fs_init().
628              */
629             fs->main_repaired = true;
630             fs->files.root = checkpoint_files.root;
631             block_set_copy_ro(&tr, &fs->free, &fs->checkpoint_free);
632             /*
633              * block_set_copy_ro() clears the copy_on_write flag for the free
634              * set, so we have to reset it to allow modification.
635              */
636             fs->free.block_tree.copy_on_write = true;
637         }
638         if (!tr.failed) {
639             /* temporary transaction is only for reading, drop it */
640             transaction_fail(&tr);
641         }
642         transaction_free(&tr);
643     }
644 
645     return success;
646 }
647 
648 /**
649  * fs_init_free_set - Initialize an initial free set for a file system
650  * @fs:         File system state object.
651  * @set:        Block set to initialize
652  *
653  * Initializes @set to the entire range of @fs, i.e. all blocks are free.
654  */
fs_init_free_set(struct fs * fs,struct block_set * set)655 static void fs_init_free_set(struct fs* fs, struct block_set* set) {
656     struct block_range range = {
657             .start = fs->min_block_num,
658             .end = fs->dev->block_count,
659     };
660     block_set_add_initial_range(set, range);
661 }
662 
663 /**
664  * fs_init_from_super - Initialize file system from super block
665  * @fs:         File system state object.
666  * @super:      Superblock data, or %NULL.
667  * @flags:      Any of &typedef fs_init_flags32_t, ORed together.
668  *
669  * Return: 0 if super block was usable, -1 if a fatal error was encountered and
670  * initialization should not continue. The file system may not be readable, even
671  * if this function returns 0. Check @fs->readable before attempting to read
672  * from this file system.
673  */
fs_init_from_super(struct fs * fs,const struct super_block * super,fs_init_flags32_t flags)674 static int fs_init_from_super(struct fs* fs,
675                               const struct super_block* super,
676                               fs_init_flags32_t flags) {
677     bool is_clear = false;
678     bool do_clear = flags & FS_INIT_FLAGS_DO_CLEAR;
679     bool do_swap = false; /* Does the active superblock alternate mode match the
680                              current mode? */
681     bool do_clear_backup = false;
682     bool has_backup_field =
683             super && (super->opt_flags & SUPER_BLOCK_OPT_FLAGS_HAS_FLAGS3);
684     bool has_checkpoint_field =
685             has_backup_field && super &&
686             (super->opt_flags & SUPER_BLOCK_OPT_FLAGS_HAS_CHECKPOINT);
687     bool recovery_allowed = flags & FS_INIT_FLAGS_RECOVERY_CLEAR_ALLOWED;
688     bool read_only = false;
689     const struct block_mac* new_files_root;
690     const struct block_mac* new_free_root;
691     const struct block_mac* new_checkpoint = NULL;
692 
693     block_set_init(fs, &fs->free);
694     fs->free.block_tree.copy_on_write = true;
695     fs_file_tree_init(fs, &fs->files);
696     fs->files.copy_on_write = true;
697     fs->files.allow_copy_on_write = true;
698     fs->main_repaired = false;
699 
700     memset(&fs->checkpoint, 0, sizeof(fs->checkpoint));
701     block_set_init(fs, &fs->checkpoint_free);
702     /*
703      * checkpoint_init() will clear the checkpoint initial range if a valid
704      * checkpoint exists.
705      */
706     fs_init_free_set(fs, &fs->checkpoint_free);
707 
708     /* Reserve 1/4 for tmp blocks plus half of the remaining space */
709     fs->reserved_count = fs->dev->block_count / 8 * 5;
710 
711     fs->alternate_data = flags & FS_INIT_FLAGS_ALTERNATE_DATA;
712 
713     /*
714      * Check version and flags after initializing an empty FS, so that we can
715      * disallow writing and continue initializing other file systems. If we exit
716      * early here this file system will be inaccessible, but its fields are
717      * safely initialized.
718      */
719     if (super && super->fs_version > SUPER_BLOCK_FS_VERSION) {
720         pr_err("ERROR: super block is from the future 0x%x\n",
721                super->fs_version);
722         error_report_superblock_invalid(fs->name);
723         assert(!fs->readable);
724         assert(!fs->writable);
725         return 0;
726     }
727 
728     if (super && (super->required_flags & ~SUPER_BLOCK_REQUIRED_FLAGS_MASK)) {
729         pr_err("ERROR: super block requires unrecognized fs features: 0x%x\n",
730                super->required_flags);
731         error_report_superblock_invalid(fs->name);
732         assert(!fs->readable);
733         assert(!fs->writable);
734         return 0;
735     }
736 
737     if (super) {
738         fs->super_block_version = super->flags & SUPER_BLOCK_FLAGS_VERSION_MASK;
739         fs->needs_full_scan =
740                 super->opt_flags & SUPER_BLOCK_OPT_FLAGS_NEEDS_FULL_SCAN;
741         fs->main_repaired = super->required_flags &
742                             SUPER_BLOCK_REQUIRED_FLAGS_MAIN_REPAIRED;
743 
744         do_swap = !(super->flags & SUPER_BLOCK_FLAGS_ALTERNATE) !=
745                   !(flags & FS_INIT_FLAGS_ALTERNATE_DATA);
746 
747         if (do_swap) {
748             pr_init("Swapping super-block with alternate\n");
749 
750             fs->backup.flags = super->flags & (SUPER_BLOCK_FLAGS_EMPTY |
751                                                SUPER_BLOCK_FLAGS_ALTERNATE);
752             fs->backup.free = super->free;
753             fs->backup.files = super->files;
754             fs->backup.checkpoint = super->checkpoint;
755 
756             if (!has_backup_field ||
757                 super->backup.flags & SUPER_BLOCK_FLAGS_EMPTY) {
758                 is_clear = true;
759             } else if (has_backup_field) {
760                 new_files_root = &super->backup.files;
761                 new_free_root = &super->backup.free;
762                 if (has_checkpoint_field) {
763                     new_checkpoint = &super->backup.checkpoint;
764                 }
765             }
766         } else {
767             if (has_backup_field) {
768                 fs->backup = super->backup;
769             }
770 
771             if (super->flags & SUPER_BLOCK_FLAGS_EMPTY) {
772                 is_clear = true;
773             } else {
774                 new_files_root = &super->files;
775                 new_free_root = &super->free;
776                 if (has_checkpoint_field) {
777                     new_checkpoint = &super->checkpoint;
778                 }
779             }
780         }
781 
782         if (!is_clear && !do_clear &&
783             (!block_probe(fs, new_files_root, true) ||
784              !block_probe(fs, new_free_root, false))) {
785             pr_init("Backing file probe failed, fs is corrupted.\n");
786             if (recovery_allowed) {
787                 pr_init("Attempting to clear corrupted fs.\n");
788                 do_clear = true;
789             }
790         }
791 
792         /*
793          * Check that the block device has not shrunk. Shrinking is only allowed
794          * in limited circumstances if we are also clearing the filesystem.
795          */
796         if (super->block_count > fs->dev->block_count) {
797             if ((!do_clear) && (!is_clear)) {
798                 /*
799                  * If block device is smaller than super and we're not clearing
800                  * the fs, we want to prevent write access to avoid losing data.
801                  * Read-only access is still allowed, although blocks may be
802                  * missing.
803                  */
804                 pr_err("bad block count 0x%" PRIx64 ", expected <= 0x%" PRIx64
805                        "\n",
806                        super->block_count, fs->dev->block_count);
807                 read_only = true;
808             } else if (flags & FS_INIT_FLAGS_ALTERNATE_DATA) {
809                 /*
810                  * Either we are on main filesystem and switching to alternate
811                  * or we are on alternate. Either case is an error. If we get
812                  * here, then the alternate FS is not backed by a temp file,
813                  * which should never happen. We want to error loudly in this
814                  * case, but continue mounting other file systems.
815                  */
816                 pr_err("Can't clear fs if FS_INIT_FLAGS_ALTERNATE_DATA is"
817                        " set .\n");
818                 assert(!fs->readable);
819                 assert(!fs->writable);
820                 return 0;
821             } else {
822                 /*
823                  * If we are are on main filesystem and the backup is an
824                  * alternate, clear the backup also.
825                  */
826                 do_clear_backup = true;
827             }
828         }
829     }
830 
831     if (!fs->alternate_data && (flags & FS_INIT_FLAGS_RESTORE_CHECKPOINT)) {
832         fs->needs_full_scan = false;
833     }
834 
835     /*
836      * If any of the following are true:
837      * - we are initializing a new fs
838      * - we are not swapping but detect an old superblock without the backup
839      * - filesystem device has shrunk and FS_INIT_FLAGS_DO_CLEAR is set
840      * then ensure that the backup slot is a valid empty filesystem in case we
841      * later switch filesystems without an explicit clear flag.
842      */
843     if (!super || (!do_swap && !has_backup_field) || do_clear_backup) {
844         fs->backup = (struct super_block_backup){
845                 .flags = SUPER_BLOCK_FLAGS_EMPTY,
846                 .files = {0},
847                 .free = {0},
848                 .checkpoint = {0},
849         };
850     }
851 
852     if (super && !is_clear && !do_clear) {
853         if (!fs_set_roots(fs, new_free_root, new_files_root, new_checkpoint,
854                           flags & FS_INIT_FLAGS_RESTORE_CHECKPOINT)) {
855             /*
856              * fs_set_roots() returns false if the checkpoint restore failed,
857              * but leaves the roots in a valid state to allow read-only access.
858              */
859             pr_err("fs %s: failed to initialize filesystem roots\n", fs->name);
860             read_only = true;
861         } else {
862             pr_init("fs %s: loaded super block version %d, checkpoint exists: %d\n",
863                     fs->name, fs->super_block_version,
864                     block_range_empty(fs->checkpoint_free.initial_range));
865         }
866     } else {
867         if (is_clear) {
868             pr_init("fs %s: superblock, version %d, is empty fs\n", fs->name,
869                     fs->super_block_version);
870         } else if (do_clear) {
871             pr_init("fs %s: clear requested, create empty, version %d\n",
872                     fs->name, fs->super_block_version);
873             if (!fs->alternate_data) {
874                 fs->main_repaired = false;
875                 fs->needs_full_scan = false;
876             }
877         } else {
878             pr_init("fs %s: no valid super-block found, create empty\n",
879                     fs->name);
880         }
881         fs_init_free_set(fs, &fs->free);
882     }
883     assert(fs->block_num_size >= fs->dev->block_num_size);
884     assert(fs->block_num_size <= sizeof(data_block_t));
885     assert(fs->mac_size >= fs->dev->mac_size);
886     assert(fs->mac_size <= sizeof(struct mac));
887     assert(fs->mac_size == sizeof(struct mac) || fs->dev->tamper_detecting);
888 
889     /*
890      * fs_set_roots() unconditionally set the files and free roots. If it fails,
891      * it failed to read the checkpoint block but that should only block
892      * modification, not reading.
893      */
894     fs->readable = true;
895 
896     if (read_only) {
897         assert(!fs->writable);
898         return 0;
899     }
900 
901     fs->writable = true;
902     if (do_clear && !is_clear) {
903         if (!write_initial_super_block(fs)) {
904             return -1;
905         }
906     } else if (flags & FS_INIT_FLAGS_RESTORE_CHECKPOINT) {
907         /*
908          * Flush the new restored checkpoint to superblock before overwriting
909          * any data blocks. We know that we can't already have a pending
910          * initial_super_block_tr yet because we just made the filesystem
911          * writable, and write_current_super_block() requires a writable
912          * filesystem.
913          */
914         assert(!fs->initial_super_block_tr);
915         write_current_super_block(fs, false);
916     }
917 
918     return 0;
919 }
920 
921 /**
922  * load_super_block - Find and load superblock and initialize file system state
923  * @fs:         File system state object.
924  * @flags:      Any of &typedef fs_init_flags32_t, ORed together.
925  *
926  * Return: 0 if super block was readable and not from a future file system
927  * version (regardless of its other content), -1 if not.
928  */
load_super_block(struct fs * fs,fs_init_flags32_t flags)929 static int load_super_block(struct fs* fs, fs_init_flags32_t flags) {
930     unsigned int i;
931     int ret;
932     const struct super_block* new_super;
933     struct obj_ref new_super_ref = OBJ_REF_INITIAL_VALUE(new_super_ref);
934     const struct super_block* old_super = NULL;
935     struct obj_ref old_super_ref = OBJ_REF_INITIAL_VALUE(old_super_ref);
936 
937     assert(fs->super_dev->block_size >= sizeof(struct super_block));
938 
939     for (i = 0; i < countof(fs->super_block); i++) {
940         new_super = block_get_super(fs, fs->super_block[i], &new_super_ref);
941         if (!new_super) {
942             if (fs->allow_tampering) {
943                 /*
944                  * Superblock may not exist yet in non-secure storage, proceed
945                  * anyway
946                  */
947                 continue;
948             }
949             pr_err("failed to read super-block\n");
950             ret = -1;  // -EIO ? ERR_IO?;
951             goto err;
952         }
953         if (use_new_super(fs->dev, new_super, i, old_super)) {
954             if (old_super) {
955                 block_put(old_super, &old_super_ref);
956             }
957             old_super = new_super;
958             obj_ref_transfer(&old_super_ref, &new_super_ref);
959         } else {
960             block_put(new_super, &new_super_ref);
961         }
962     }
963 
964     ret = fs_init_from_super(fs, old_super, flags);
965 err:
966     if (old_super) {
967         block_put(old_super, &old_super_ref);
968     }
969     return ret;
970 }
971 
972 struct fs_check_state {
973     struct file_iterate_state iter;
974     bool delete_invalid_files;
975 
976     bool internal_state_valid;
977     bool invalid_block_found;
978 };
979 
fs_check_file(struct file_iterate_state * iter,struct transaction * tr,const struct block_mac * block_mac,bool added,bool removed)980 static bool fs_check_file(struct file_iterate_state* iter,
981                           struct transaction* tr,
982                           const struct block_mac* block_mac,
983                           bool added,
984                           bool removed) {
985     struct fs_check_state* fs_check_state =
986             containerof(iter, struct fs_check_state, iter);
987     struct obj_ref info_ref = OBJ_REF_INITIAL_VALUE(info_ref);
988     struct storage_file_handle file;
989     char path[FS_PATH_MAX];
990 
991     assert(!tr->failed);
992     assert(!tr->invalid_block_found);
993 
994     const struct file_info* info = file_get_info(tr, block_mac, &info_ref);
995     if (!info) {
996         pr_err("could not get file info at block %" PRIu64 "\n",
997                block_mac_to_block(tr, block_mac));
998         fs_check_state->internal_state_valid = false;
999         goto err_file_info;
1000     }
1001     strncpy(path, info->path, sizeof(path));
1002     path[sizeof(path) - 1] = '\0';
1003     file_info_put(info, &info_ref);
1004 
1005     enum file_op_result result =
1006             file_open(tr, path, &file, FILE_OPEN_NO_CREATE, true);
1007     if (result != FILE_OP_SUCCESS) {
1008         /* TODO: is it ok to leak the filename here? we do it elsewhere */
1009         pr_err("could not open file %s\n", path);
1010         fs_check_state->internal_state_valid = false;
1011         goto err_file_open;
1012     }
1013 
1014     if (!file_check(tr, &file)) {
1015         fs_check_state->internal_state_valid = false;
1016     }
1017 
1018     file_close(&file);
1019 
1020 err_file_open:
1021 err_file_info:
1022     if (tr->invalid_block_found) {
1023         fs_check_state->invalid_block_found = true;
1024         /* We have noted the invalid block, reset for the next file. */
1025         tr->invalid_block_found = false;
1026     }
1027     if (tr->failed) {
1028         transaction_activate(tr);
1029     }
1030 
1031     /* Continue iterating unconditionally */
1032     return false;
1033 }
1034 
fs_check_full(struct fs * fs)1035 enum fs_check_result fs_check_full(struct fs* fs) {
1036     bool free_set_valid, file_tree_valid;
1037     enum fs_check_result res = FS_CHECK_NO_ERROR;
1038     struct transaction iterate_tr;
1039     struct fs_check_state state = {
1040             .iter.file = fs_check_file,
1041             .internal_state_valid = true,
1042             .invalid_block_found = false,
1043     };
1044 
1045     transaction_init(&iterate_tr, fs, true);
1046 
1047     /* Check the free list for consistency */
1048     free_set_valid = block_set_check(&iterate_tr, &fs->free);
1049     if (!free_set_valid || iterate_tr.invalid_block_found) {
1050         pr_err("free block set is invalid\n");
1051         res = FS_CHECK_INVALID_FREE_SET;
1052         /*
1053          * We can recover the free set non-destructively by rebuilding from the
1054          * file tree, so we don't need to report the invalid block.
1055          */
1056         iterate_tr.invalid_block_found = false;
1057     }
1058     if (iterate_tr.failed) {
1059         pr_err("free set tree not fully readable\n");
1060         state.internal_state_valid = false;
1061         transaction_activate(&iterate_tr);
1062     }
1063 
1064     /* Check the file tree for consistency */
1065     file_tree_valid = block_tree_check(&iterate_tr, &fs->files);
1066     if (!file_tree_valid) {
1067         pr_err("file tree is invalid\n");
1068         res = FS_CHECK_INVALID_FILE_TREE;
1069     }
1070     if (iterate_tr.invalid_block_found) {
1071         pr_err("invalid block encountered in file tree\n");
1072         state.invalid_block_found = true;
1073         iterate_tr.invalid_block_found = false;
1074     }
1075     if (iterate_tr.failed) {
1076         pr_err("file tree not fully readable\n");
1077         state.internal_state_valid = false;
1078         transaction_activate(&iterate_tr);
1079     }
1080 
1081     file_iterate(&iterate_tr, NULL, false, &state.iter, true);
1082 
1083     /* Invalid blocks take precedence over internal consistency errors. */
1084     if (state.invalid_block_found) {
1085         res = FS_CHECK_INVALID_BLOCK;
1086     } else if (res == FS_CHECK_NO_ERROR && !state.internal_state_valid) {
1087         res = FS_CHECK_UNKNOWN;
1088     }
1089     if (!iterate_tr.failed) {
1090         transaction_fail(&iterate_tr);
1091     }
1092     transaction_free(&iterate_tr);
1093 
1094     return res;
1095 }
1096 
fs_check_quick(struct fs * fs)1097 enum fs_check_result fs_check_quick(struct fs* fs) {
1098     bool fs_is_clear = !block_range_empty(fs->free.initial_range);
1099     if (fs_is_clear || (block_probe(fs, &fs->files.root, true) &&
1100                         block_probe(fs, &fs->free.block_tree.root, false))) {
1101         return FS_CHECK_NO_ERROR;
1102     } else {
1103         return FS_CHECK_INVALID_BLOCK;
1104     }
1105 }
1106 
fs_check(struct fs * fs)1107 enum fs_check_result fs_check(struct fs* fs) {
1108     if (fs->needs_full_scan) {
1109         pr_warn("%s filesystem requires full scan on mount\n", fs->name);
1110         return fs_check_full(fs);
1111     } else {
1112         return fs_check_quick(fs);
1113     }
1114 }
1115 
1116 /**
1117  * fs_file_tree_init - Initialize an empty file tree for a file system
1118  * @fs:        File system state object.
1119  * @tree:      Block tree to initialize as a file tree.
1120  */
fs_file_tree_init(const struct fs * fs,struct block_tree * tree)1121 void fs_file_tree_init(const struct fs* fs, struct block_tree* tree) {
1122     size_t block_num_size;
1123     size_t block_mac_size;
1124 
1125     block_num_size = fs->block_num_size;
1126     block_mac_size = block_num_size + fs->mac_size;
1127     block_tree_init(tree, fs->dev->block_size, block_num_size, block_mac_size,
1128                     block_mac_size);
1129 }
1130 
1131 /**
1132  * fs_init - Initialize file system state
1133  * @fs:         File system state object.
1134  * @name:       File system name for error reporting. Must be a static string.
1135  * @key:        Key pointer. Must not be freed while @fs is in use.
1136  * @dev:        Main block device.
1137  * @super_dev:  Block device for super block.
1138  * @flags:      Any of &typedef fs_init_flags32_t, ORed together.
1139  */
fs_init(struct fs * fs,const char * name,const struct key * key,struct block_device * dev,struct block_device * super_dev,fs_init_flags32_t flags)1140 int fs_init(struct fs* fs,
1141             const char* name,
1142             const struct key* key,
1143             struct block_device* dev,
1144             struct block_device* super_dev,
1145             fs_init_flags32_t flags) {
1146     int ret;
1147 
1148     if (super_dev->block_size < sizeof(struct super_block)) {
1149         pr_err("unsupported block size for super_dev, %zd < %zd\n",
1150                super_dev->block_size, sizeof(struct super_block));
1151         return -1;  // ERR_NOT_VALID?
1152     }
1153 
1154     if (super_dev->block_count < 2) {
1155         pr_err("unsupported block count for super_dev, %" PRIu64 "\n",
1156                super_dev->block_count);
1157         return -1;  // ERR_NOT_VALID?
1158     }
1159 
1160     if ((flags & FS_INIT_FLAGS_ALTERNATE_DATA) &&
1161         (flags & FS_INIT_FLAGS_RESTORE_CHECKPOINT)) {
1162         pr_err("Alternate file system cannot restore to a checkpoint\n");
1163         return -1;
1164     }
1165 
1166     fs->name = name;
1167     fs->key = key;
1168     fs->dev = dev;
1169     fs->super_dev = super_dev;
1170     fs->readable = false;
1171     fs->writable = false;
1172     fs->allow_tampering = flags & FS_INIT_FLAGS_ALLOW_TAMPERING;
1173     fs->checkpoint_required = false;
1174     list_initialize(&fs->transactions);
1175     list_initialize(&fs->allocated);
1176     fs->initial_super_block_tr = NULL;
1177     list_add_tail(&fs_list, &fs->node);
1178 
1179     /*
1180      * We check that the super-block matches these block device params in
1181      * super_block_valid(). If these params change, the filesystem (and
1182      * alternate backup) will be wiped and reset with the new params.
1183      */
1184     fs->block_num_size = fs->dev->block_num_size;
1185     fs->mac_size = fs->dev->mac_size;
1186 
1187     if (dev == super_dev) {
1188         fs->min_block_num = 2;
1189     } else {
1190         /* TODO: use 0 when btree code allows it */
1191         fs->min_block_num = 1;
1192     }
1193     fs->super_block[0] = 0;
1194     fs->super_block[1] = 1;
1195     ret = load_super_block(fs, flags);
1196     if (ret) {
1197         fs_destroy(fs);
1198         fs->dev = NULL;
1199         fs->super_dev = NULL;
1200         return ret;
1201     }
1202 
1203     if ((flags & FS_INIT_FLAGS_AUTO_CHECKPOINT) &&
1204         !block_mac_valid_fs(fs, &fs->checkpoint)) {
1205         if (fs_check_full(fs) == FS_CHECK_NO_ERROR) {
1206             fs->checkpoint_required = true;
1207         } else {
1208             pr_err("Not automatically creating a checkpoint; "
1209                    "an error was found in filesystem %s\n",
1210                    fs->name);
1211         }
1212     }
1213 
1214     return 0;
1215 }
1216 
1217 /**
1218  * fs_destroy - Destroy file system state
1219  * @fs:         File system state object.
1220  *
1221  * Free any dynamically allocated state and check that @fs is not referenced by
1222  * any transactions.
1223  */
fs_destroy(struct fs * fs)1224 void fs_destroy(struct fs* fs) {
1225     if (fs->initial_super_block_tr) {
1226         if (!fs->initial_super_block_tr->failed) {
1227             transaction_fail(fs->initial_super_block_tr);
1228         }
1229         transaction_free(fs->initial_super_block_tr);
1230         free(fs->initial_super_block_tr);
1231         fs->initial_super_block_tr = NULL;
1232     }
1233     assert(list_is_empty(&fs->transactions));
1234     assert(list_is_empty(&fs->allocated));
1235     list_delete(&fs->node);
1236     fs->readable = false;
1237     fs->writable = false;
1238 }
1239 
1240 /**
1241  * fs_unknown_super_block_state_all - Notify filesystems of unknown disk state
1242  *
1243  * Call from other layers when detecting write failues that can cause the
1244  * in-memory state of super blocks (or other block that we don't care about) to
1245  * be different from the on-disk state. Write in-memory state to disk before
1246  * writing any other block.
1247  */
fs_unknown_super_block_state_all(void)1248 void fs_unknown_super_block_state_all(void) {
1249     struct fs* fs;
1250     list_for_every_entry(&fs_list, fs, struct fs, node) {
1251         /* TODO: filter out filesystems that are not affected? */
1252         /*
1253          * We can't reinitialize an existing, failed special transaction here.
1254          * If a initial superblock write failed and triggered
1255          * fs_unknown_super_block_state_all() we need to leave that superblock
1256          * transaction in a failed state so that the transaction that that
1257          * triggered the failing write can also be failed further up the call
1258          * chain. If a special transaction already exists we are guaranteed that
1259          * it will be reinitialized and flushed to disk before any new writes to
1260          * that FS, so we don't need to reinitialize it here.
1261          *
1262          * If this file system is not writable, we should not try to re-write
1263          * the current super block state. A read-only file system cannot have
1264          * any modifications that we are allowed to save, and it does not need
1265          * to be re-synced here as we cannot have previously failed to write its
1266          * superblock.
1267          */
1268         if (fs->writable) {
1269             write_current_super_block(fs, false /* reinitialize */);
1270         }
1271     }
1272 }
1273 
fs_fail_all_transactions(void)1274 void fs_fail_all_transactions(void) {
1275     struct transaction* tmp_tr;
1276     struct transaction* tr;
1277     struct fs* fs;
1278     list_for_every_entry(&fs_list, fs, struct fs, node) {
1279         list_for_every_entry_safe(&fs->transactions, tr, tmp_tr,
1280                                   struct transaction, node) {
1281             if (transaction_is_active(tr) && !tr->failed) {
1282                 transaction_fail(tr);
1283             }
1284         }
1285     }
1286 }
1287