• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <assert.h>
18 #include <inttypes.h>
19 #include <limits.h>
20 #include <lk/reflist.h>
21 #include <stdbool.h>
22 #include <stdint.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 
27 #include <openssl/crypto.h>
28 #include <openssl/rand.h>
29 
30 #include "block_cache.h"
31 #include "block_cache_priv.h"
32 #include "crypt.h"
33 #include "debug.h"
34 #include "debug_stats.h"
35 #include "error_reporting.h"
36 #include "transaction.h"
37 
38 static bool print_cache_lookup = false;
39 static bool print_cache_lookup_verbose = false;
40 static bool print_block_ops = false;
41 static bool print_block_load = false;
42 static bool print_block_store = false;
43 static bool print_block_move = false;
44 static bool print_block_decrypt_encrypt = false;
45 static bool print_clean_transaction = false;
46 static bool print_mac_update = false;
47 static bool print_cache_get_ref_block_count = true;
48 
49 #define BLOCK_CACHE_GUARD_1 (0xdead0001dead0003)
50 #define BLOCK_CACHE_GUARD_2 (0xdead0005dead0007)
51 
52 static struct list_node block_cache_lru = LIST_INITIAL_VALUE(block_cache_lru);
53 static struct block_cache_entry block_cache_entries[BLOCK_CACHE_SIZE];
54 static bool block_cache_init_called = false;
55 
block_cache_entry_data_is_valid(const struct block_cache_entry * entry)56 static bool block_cache_entry_data_is_valid(
57         const struct block_cache_entry* entry) {
58     return entry->state == BLOCK_ENTRY_DATA_CLEAN_DECRYPTED ||
59            entry->state == BLOCK_ENTRY_DATA_CLEAN_ENCRYPTED ||
60            entry->state == BLOCK_ENTRY_DATA_DIRTY_DECRYPTED ||
61            entry->state == BLOCK_ENTRY_DATA_DIRTY_ENCRYPTED;
62 }
63 
block_cache_entry_data_is_dirty(const struct block_cache_entry * entry)64 static bool block_cache_entry_data_is_dirty(
65         const struct block_cache_entry* entry) {
66     return entry->state == BLOCK_ENTRY_DATA_DIRTY_DECRYPTED ||
67            entry->state == BLOCK_ENTRY_DATA_DIRTY_ENCRYPTED;
68 }
69 
block_cache_entry_data_is_encrypted(const struct block_cache_entry * entry)70 static bool block_cache_entry_data_is_encrypted(
71         const struct block_cache_entry* entry) {
72     return entry->state == BLOCK_ENTRY_DATA_CLEAN_ENCRYPTED ||
73            entry->state == BLOCK_ENTRY_DATA_DIRTY_ENCRYPTED;
74 }
75 
block_cache_entry_data_is_decrypted(const struct block_cache_entry * entry)76 static bool block_cache_entry_data_is_decrypted(
77         const struct block_cache_entry* entry) {
78     return entry->state == BLOCK_ENTRY_DATA_CLEAN_DECRYPTED ||
79            entry->state == BLOCK_ENTRY_DATA_DIRTY_DECRYPTED;
80 }
81 
block_cache_entry_data_state_name(enum block_cache_entry_data_state state)82 static const char* block_cache_entry_data_state_name(
83         enum block_cache_entry_data_state state) {
84     switch (state) {
85     case BLOCK_ENTRY_DATA_INVALID:
86         return "BLOCK_ENTRY_DATA_INVALID";
87     case BLOCK_ENTRY_DATA_LOADING:
88         return "BLOCK_ENTRY_DATA_LOADING";
89     case BLOCK_ENTRY_DATA_LOAD_FAILED:
90         return "BLOCK_ENTRY_DATA_LOAD_FAILED";
91     case BLOCK_ENTRY_DATA_NOT_FOUND:
92         return "BLOCK_ENTRY_DATA_NOT_FOUND";
93     case BLOCK_ENTRY_DATA_CLEAN_DECRYPTED:
94         return "BLOCK_ENTRY_DATA_CLEAN_DECRYPTED";
95     case BLOCK_ENTRY_DATA_CLEAN_ENCRYPTED:
96         return "BLOCK_ENTRY_DATA_CLEAN_ENCRYPTED";
97     case BLOCK_ENTRY_DATA_DIRTY_DECRYPTED:
98         return "BLOCK_ENTRY_DATA_DIRTY_DECRYPTED";
99     case BLOCK_ENTRY_DATA_DIRTY_ENCRYPTED:
100         return "BLOCK_ENTRY_DATA_DIRTY_ENCRYPTED";
101     }
102 }
103 
104 /**
105  * block_cache_queue_io_op - Helper function to start a read or write operation
106  * @entry:      Cache entry.
107  * @io_op:      BLOCK_CACHE_IO_OP_READ or BLOCK_CACHE_IO_OP_WRITE.
108  *
109  * Set io_op for cache entry and add it to the tail of the io_ops for the
110  * block device that the cache entry belongs to.
111  */
block_cache_queue_io_op(struct block_cache_entry * entry,int io_op)112 static void block_cache_queue_io_op(struct block_cache_entry* entry,
113                                     int io_op) {
114     assert(io_op == BLOCK_CACHE_IO_OP_READ || io_op == BLOCK_CACHE_IO_OP_WRITE);
115     assert(entry->io_op == BLOCK_CACHE_IO_OP_NONE);
116     assert(entry->dev);
117     assert(!list_in_list(&entry->io_op_node));
118 
119     entry->io_op = io_op;
120     list_add_tail(&entry->dev->io_ops, &entry->io_op_node);
121 }
122 
123 /**
124  * block_cache_queue_read - Start a read operation
125  * @entry:      Cache entry.
126  */
block_cache_queue_read(struct block_cache_entry * entry)127 static void block_cache_queue_read(struct block_cache_entry* entry) {
128     assert(!block_cache_entry_data_is_dirty(entry));
129     entry->state = BLOCK_ENTRY_DATA_LOADING;
130     block_cache_queue_io_op(entry, BLOCK_CACHE_IO_OP_READ);
131     stats_timer_start(STATS_CACHE_START_READ);
132     entry->dev->start_read(entry->dev, entry->block);
133     stats_timer_stop(STATS_CACHE_START_READ);
134 }
135 
136 /**
137  * block_cache_queue_write - Start a write operation
138  * @entry:      Cache entry.
139  */
block_cache_queue_write(struct block_cache_entry * entry,const void * encrypted_data)140 static void block_cache_queue_write(struct block_cache_entry* entry,
141                                     const void* encrypted_data) {
142     block_cache_queue_io_op(entry, BLOCK_CACHE_IO_OP_WRITE);
143     stats_timer_start(STATS_CACHE_START_WRITE);
144     entry->dev->start_write(entry->dev, entry->block, encrypted_data,
145                             entry->block_size, entry->is_superblock);
146     stats_timer_stop(STATS_CACHE_START_WRITE);
147 }
148 
149 /**
150  * block_cache_complete_io - Wait for io operation on block device to complete
151  * @dev:        Block device to wait for
152  */
block_cache_complete_io(struct block_device * dev)153 static void block_cache_complete_io(struct block_device* dev) {
154     while (!list_is_empty(&dev->io_ops)) {
155         assert(dev->wait_for_io);
156         dev->wait_for_io(dev);
157     }
158 }
159 
160 /**
161  * block_cache_pop_io_op - Get cache entry for completed read or write operation
162  * @dev:        Block device
163  * @block:      Block number
164  * @io_op:      BLOCK_CACHE_IO_OP_READ or BLOCK_CACHE_IO_OP_WRITE.
165  *
166  * Finds block cache entry that matches @dev and @block and remove it from
167  * the io_ops queue of the block device.
168  *
169  * This is a helper function for block_cache_complete_read and
170  * block_cache_complete_write.
171  *
172  * Return: Matching block cache entry.
173  */
block_cache_pop_io_op(struct block_device * dev,data_block_t block,unsigned int io_op)174 static struct block_cache_entry* block_cache_pop_io_op(struct block_device* dev,
175                                                        data_block_t block,
176                                                        unsigned int io_op) {
177     struct block_cache_entry* entry;
178 
179     list_for_every_entry(&dev->io_ops, entry, struct block_cache_entry,
180                          io_op_node) {
181         if (entry->block == block) {
182             assert(entry->dev == dev);
183             assert(entry->io_op == io_op);
184             entry->io_op = BLOCK_CACHE_IO_OP_NONE;
185             list_delete(&entry->io_op_node);
186             return entry;
187         }
188         assert(false); /* Out of order completion not expected */
189     }
190     assert(false); /* No matching entry found */
191 
192     return NULL;
193 }
194 
195 /**
196  * block_cache_complete_read - Read complete callback from block device
197  * @dev:        Block device
198  * @block:      Block number
199  * @data:       Pointer to encrypted data, only valid if @res is
200  *              &block_read_error.BLOCK_READ_SUCCESS
201  * @data_size:  Data size, must match block size of device.
202  * @res:        &block_read_error.BLOCK_READ_SUCCESS if read operation was
203  *              successful, otherwise describes the error.
204  *
205  * Calculates mac and decrypts data into cache entry. Does not validate mac.
206  */
block_cache_complete_read(struct block_device * dev,data_block_t block,const void * data,size_t data_size,enum block_read_error res)207 void block_cache_complete_read(struct block_device* dev,
208                                data_block_t block,
209                                const void* data,
210                                size_t data_size,
211                                enum block_read_error res) {
212     int ret;
213     struct block_cache_entry* entry;
214 
215     assert(data_size <= sizeof(entry->data));
216     assert(data_size == dev->block_size);
217 
218     entry = block_cache_pop_io_op(dev, block, BLOCK_CACHE_IO_OP_READ);
219     assert(entry->state == BLOCK_ENTRY_DATA_LOADING);
220     switch (res) {
221     case BLOCK_READ_SUCCESS:
222         /* handled below */
223         break;
224     case BLOCK_READ_IO_ERROR:
225         printf("%s: load block %" PRIu64 " failed\n", __func__, entry->block);
226         entry->state = BLOCK_ENTRY_DATA_LOAD_FAILED;
227         return;
228     case BLOCK_READ_NO_DATA:
229         printf("%s: load block %" PRIu64 " failed, no data\n", __func__,
230                entry->block);
231         entry->state = BLOCK_ENTRY_DATA_NOT_FOUND;
232         return;
233     }
234     assert(res == BLOCK_READ_SUCCESS);
235 
236     entry->block_size = data_size;
237     /* TODO: change decrypt function to take separate in/out buffers */
238     memcpy(entry->data, data, data_size);
239 
240     stats_timer_start(STATS_FS_READ_BLOCK_CALC_MAC);
241     ret = calculate_mac(entry->key, &entry->mac, entry->data,
242                         entry->block_size);
243     stats_timer_stop(STATS_FS_READ_BLOCK_CALC_MAC);
244     assert(!ret);
245 
246     /* TODO: check mac here instead of when getting data from the cache? */
247     if (print_block_load) {
248         printf("%s: load/decrypt block %" PRIu64 " complete\n", __func__,
249                entry->block);
250     }
251 
252     entry->state = BLOCK_ENTRY_DATA_CLEAN_ENCRYPTED;
253 }
254 
255 /**
256  * block_cache_complete_write - Write complete callback from block device
257  * @dev:        Block device
258  * @block:      Block number
259  * @failed:     true if write operation failed, and data is not on disc. If
260  *              block device has tamper detection, e.g. rpmb, passing false here
261  *              means that the secure side block device code has verified that
262  *              the data was written to disk.
263  */
block_cache_complete_write(struct block_device * dev,data_block_t block,enum block_write_error res)264 void block_cache_complete_write(struct block_device* dev,
265                                 data_block_t block,
266                                 enum block_write_error res) {
267     struct block_cache_entry* entry;
268 
269     entry = block_cache_pop_io_op(dev, block, BLOCK_CACHE_IO_OP_WRITE);
270     if (print_block_store) {
271         printf("%s: write block %" PRIu64 " complete\n", __func__,
272                entry->block);
273     }
274     assert(entry->dirty_tr);
275     if (res == BLOCK_WRITE_SUCCESS) {
276         entry->dirty_tr = NULL;
277         entry->pinned = false;
278     } else {
279         pr_err("write block %" PRIu64 " failed, fail transaction\n",
280                entry->block);
281         transaction_fail(entry->dirty_tr);
282 
283         if (res == BLOCK_WRITE_SYNC_FAILED) {
284             /*
285              * We have to fail ALL pending transactions here because an fsync
286              * failed and we don't know which write caused that failure.
287              *
288              * TODO: Should we fail only transactions that write to non-secure
289              * devices? I.e. not fail TP transactions?
290              *
291              * TODO: storageproxy could track which file failed to sync and
292              * communicate this back so we only have to fail transactions that
293              * touched that backing file.
294              */
295             pr_err("An fsync failed, fail all pending transactions\n");
296             fs_fail_all_transactions();
297         }
298 
299         /*
300          * Failing the transaction must not clear the block number, as we rely
301          * on the block number + pinned flag to reserve and reuse the block
302          * cache entry when reinitializing a special transaction.
303          */
304         assert(block == entry->block);
305 
306         if (res == BLOCK_WRITE_FAILED_UNKNOWN_STATE) {
307             /*
308              * We don't know what was written, force superblock to be rewritten.
309              * This must be done after we have failed the transaction in case we
310              * need to reuse block that was part of this transaction.
311              */
312             fs_unknown_super_block_state_all();
313         }
314     }
315 }
316 
317 /**
318  * block_cache_entry_has_refs - Check if cache entry is referenced
319  * @entry:      Cache entry
320  *
321  * Return: true if there are no references to @entry.
322  */
block_cache_entry_has_refs(struct block_cache_entry * entry)323 static bool block_cache_entry_has_refs(struct block_cache_entry* entry) {
324     return !list_is_empty(&entry->obj.ref_list);
325 }
326 
327 /**
328  * block_cache_entry_has_one_ref - Check if cache entry is referenced once
329  * @entry:      Cache entry
330  *
331  * Return: true if there is a single reference to @entry.
332  */
block_cache_entry_has_one_ref(struct block_cache_entry * entry)333 static bool block_cache_entry_has_one_ref(struct block_cache_entry* entry) {
334     return list_length(&entry->obj.ref_list) == 1;
335 }
336 
337 /**
338  * block_cache_entry_decrypt - Decrypt cache entry
339  * @entry:          Cache entry
340  */
block_cache_entry_decrypt(struct block_cache_entry * entry)341 static void block_cache_entry_decrypt(struct block_cache_entry* entry) {
342     int ret;
343     const struct iv* iv = NULL; /* TODO: support external iv */
344     void* decrypt_data;
345     size_t decrypt_size;
346 
347     assert(block_cache_entry_data_is_encrypted(entry));
348 
349     decrypt_data = entry->data;
350     decrypt_size = entry->block_size;
351     if (!iv) {
352         iv = (void*)entry->data;
353         assert(decrypt_size > sizeof(*iv));
354         decrypt_data += sizeof(*iv);
355         decrypt_size -= sizeof(*iv);
356     }
357     stats_timer_start(STATS_FS_READ_BLOCK_DECRYPT);
358     ret = storage_decrypt(entry->key, decrypt_data, decrypt_size, iv);
359     stats_timer_stop(STATS_FS_READ_BLOCK_DECRYPT);
360     assert(!ret);
361 
362     if (print_block_decrypt_encrypt) {
363         printf("%s: decrypt block %" PRIu64 " complete\n", __func__,
364                entry->block);
365     }
366 
367     if (entry->state == BLOCK_ENTRY_DATA_CLEAN_ENCRYPTED) {
368         entry->state = BLOCK_ENTRY_DATA_CLEAN_DECRYPTED;
369     } else if (entry->state == BLOCK_ENTRY_DATA_DIRTY_ENCRYPTED) {
370         /*
371          * We leave blocks in DIRTY_ENCRYPTED state after computing a MAC but
372          * before flushing the block from the cache. We may decrypt a block
373          * again to read it before write back, which is fine as it will be
374          * re-encrypted (with the same IV) when flushed for write back.
375          */
376         entry->state = BLOCK_ENTRY_DATA_DIRTY_DECRYPTED;
377     } else {
378         /* Covered by assert that the entry was encrypted above. */
379         assert(false);
380     }
381 }
382 
383 /**
384  * block_cache_entry_encrypt - Encrypt cache entry and update mac
385  * @entry:          Cache entry
386  */
block_cache_entry_encrypt(struct block_cache_entry * entry)387 static void block_cache_entry_encrypt(struct block_cache_entry* entry) {
388     int ret;
389     void* encrypt_data;
390     size_t encrypt_size;
391     struct mac mac;
392     struct iv* iv = NULL; /* TODO: support external iv */
393 
394     assert(entry->state == BLOCK_ENTRY_DATA_DIRTY_DECRYPTED);
395     assert(!block_cache_entry_has_refs(entry));
396 
397     encrypt_data = entry->data;
398     encrypt_size = entry->block_size;
399     if (!iv) {
400         iv = (void*)entry->data;
401         assert(encrypt_size > sizeof(*iv));
402         encrypt_data += sizeof(*iv);
403         encrypt_size -= sizeof(*iv);
404     }
405 
406     stats_timer_start(STATS_FS_WRITE_BLOCK_ENCRYPT);
407     ret = storage_encrypt(entry->key, encrypt_data, encrypt_size, iv);
408     stats_timer_stop(STATS_FS_WRITE_BLOCK_ENCRYPT);
409     assert(!ret);
410     entry->state = BLOCK_ENTRY_DATA_DIRTY_ENCRYPTED;
411     if (print_block_decrypt_encrypt) {
412         printf("%s: encrypt block %" PRIu64 " complete\n", __func__,
413                entry->block);
414     }
415 
416     if (!entry->dirty_mac) {
417         mac = entry->mac;
418     }
419 
420     stats_timer_start(STATS_FS_WRITE_BLOCK_CALC_MAC);
421     ret = calculate_mac(entry->key, &entry->mac, entry->data,
422                         entry->block_size);
423     stats_timer_stop(STATS_FS_WRITE_BLOCK_CALC_MAC);
424     assert(!ret);
425 
426     if (!entry->dirty_mac) {
427         assert(!CRYPTO_memcmp(&mac, &entry->mac, sizeof(mac)));
428     }
429     entry->dirty_mac = false;
430     // assert(!entry->parent || entry->parent->ref_count);
431     // assert(!entry->parent || entry->parent->dirty_ref);
432 }
433 
434 /**
435  * block_cache_entry_clean - Write dirty cache entry to disc
436  * @entry:          Cache entry
437  *
438  * Does not wait for write to complete.
439  */
block_cache_entry_clean(struct block_cache_entry * entry)440 static void block_cache_entry_clean(struct block_cache_entry* entry) {
441     if (!block_cache_entry_data_is_dirty(entry)) {
442         return;
443     }
444 
445     if (print_block_store) {
446         printf("%s: encrypt block %" PRIu64 "\n", __func__, entry->block);
447     }
448 
449     assert(entry->block_size <= sizeof(entry->data));
450     if (entry->state == BLOCK_ENTRY_DATA_DIRTY_DECRYPTED) {
451         block_cache_entry_encrypt(entry);
452     }
453     assert(entry->state == BLOCK_ENTRY_DATA_DIRTY_ENCRYPTED);
454     /* TODO: release ref to parent */
455 
456     assert(entry->dirty_tr);
457     /*
458      * We have to save the current transaction for this entry because we need it
459      * to check for transaction failure after queueing the write. Transactions
460      * are managed by the storage client layer, and thus will outlive this
461      * function, which is internal to the block cache.
462      */
463     struct transaction* tr = entry->dirty_tr;
464 
465     assert(entry->dirty_tr->fs);
466     struct transaction* itr = entry->dirty_tr->fs->initial_super_block_tr;
467     /*
468      * Block(s) in fs->initial_super_block_tr must be written before any other
469      * blocks to the same filesystem.
470      */
471     if (itr && itr != entry->dirty_tr) {
472         printf("%s: write initial superblock before block %" PRIu64 "\n",
473                __func__, entry->block);
474         transaction_initial_super_block_complete(itr);
475 
476         /*
477          * Check that initial_super_block_tr was cleared. If it was not, it must
478          * have failed to write the initial super block and the transaction
479          * that entry belongs to must also fail.
480          */
481         if (entry->dirty_tr->fs->initial_super_block_tr) {
482             /*
483              * transaction_initial_super_block_complete() always reinitialize
484              * initial_super_block_tr if the write failed.
485              */
486             assert(!entry->dirty_tr->fs->initial_super_block_tr->failed);
487             transaction_fail(entry->dirty_tr);
488             assert(entry->state == BLOCK_ENTRY_DATA_INVALID);
489             return;
490         }
491     }
492 
493     block_cache_queue_write(entry, entry->data);
494 
495     /*
496      * If we fail the transaction in block_cache_complete_write(), which is
497      * currently called during block_cache_queue_write(), we will clear the
498      * dirty flag on all cache entries associate with the transaction, including
499      * the one we're currently trying to clean.
500      *
501      * We can't redundantly clear the flag again here if the transaction has
502      * failed, because the write failure may have forced us to trigger
503      * fs_unknown_super_block_state_all(). Triggering this function creates
504      * writes for the current superblock state of each filesystem, and this may
505      * have reused the (now) clean entry we are trying to clean. If so,
506      * entry->dirty must stay set.
507      */
508     if (!tr->failed) {
509         assert(entry->state == BLOCK_ENTRY_DATA_DIRTY_ENCRYPTED);
510         entry->state = BLOCK_ENTRY_DATA_CLEAN_ENCRYPTED;
511     }
512 }
513 
514 /**
515  * block_cache_entry_score - Get a keep score
516  * @entry:      Block cache entry to check
517  * @index:      Number of available entries before @entry in lru.
518  *
519  * Return: A score value indicating in what order entries that are close in the
520  * lru should be replaced.
521  */
block_cache_entry_score(struct block_cache_entry * entry,unsigned int index)522 static unsigned int block_cache_entry_score(struct block_cache_entry* entry,
523                                             unsigned int index) {
524     if (!entry->dev) {
525         return UINT_MAX;
526     }
527     return index * (block_cache_entry_data_is_dirty(entry)
528                             ? (entry->dirty_tmp ? 1 : 2)
529                             : 4);
530 }
531 
532 /**
533  * block_cache_entry_discard_dirty - Discard cache entry (can be dirty).
534  * @entry:      Block cache entry to discard
535  */
block_cache_entry_discard_dirty(struct block_cache_entry * entry)536 static void block_cache_entry_discard_dirty(struct block_cache_entry* entry) {
537     assert(!entry->dirty_ref);
538     assert(!list_in_list(&entry->io_op_node));
539     entry->state = BLOCK_ENTRY_DATA_INVALID;
540     entry->dev = NULL;
541     entry->block = DATA_BLOCK_INVALID;
542     entry->dirty_tr = NULL;
543     /* We have to unpin here because we're clearing the block number */
544     entry->pinned = false;
545     entry->is_superblock = false;
546 
547     entry->dirty_mac = false;
548 }
549 
550 /**
551  * block_cache_entry_discard - Discard cache entry (must be clean and unused).
552  * @entry:      Block cache entry to discard
553  */
block_cache_entry_discard(struct block_cache_entry * entry)554 static void block_cache_entry_discard(struct block_cache_entry* entry) {
555     assert(!block_cache_entry_has_refs(entry));
556     assert(!entry->dirty_ref);
557     assert(!entry->dirty_tr);
558     assert(!list_in_list(&entry->io_op_node));
559     block_cache_entry_discard_dirty(entry);
560 }
561 
562 /**
563  * block_cache_lookup - Get cache entry for a specific block
564  * @fs:         File system state object, or %NULL is @allocate is %false.
565  * @dev:        Block device object.
566  * @block:      Block number
567  * @allocate:   If true, assign an unused entry to the specified @dev,@block
568  *              if no matching entry is found.
569  *
570  * Return: cache entry matching @dev and @block. If no matching entry is found,
571  * and @allocate is true, pick an unused entry and update it to match. If no
572  * entry can be used, return NULL.
573  */
574 
block_cache_lookup(struct fs * fs,struct block_device * dev,data_block_t block,bool allocate)575 static struct block_cache_entry* block_cache_lookup(struct fs* fs,
576                                                     struct block_device* dev,
577                                                     data_block_t block,
578                                                     bool allocate) {
579     struct block_cache_entry* entry;
580     struct block_cache_entry* unused_entry = NULL;
581     unsigned int unused_entry_score = 0;
582     unsigned int score;
583     unsigned int available = 0;
584     unsigned int in_use = 0;
585 
586     assert(dev);
587     assert(fs || !allocate);
588 
589     stats_timer_start(STATS_CACHE_LOOKUP);
590     /*
591      * We may need to attempt to find and flush a cache entry multiple times
592      * before finding one that we could successfully use that was not reused
593      * during the clean. This relies on the block cache being large enough to
594      * hold a super block for each filesystem plus all currently referenced
595      * blocks (which is less than the maximum block path length). We cap the
596      * number of retries here to avoid an infinite loop, but we should only need
597      * one retry attempt since the block cache is LRU and the fresh super block
598      * will be the most recently used entry.
599      */
600     for (int retry = 0; retry < BLOCK_CACHE_SIZE; ++retry) {
601         unused_entry = NULL;
602         unused_entry_score = 0;
603         available = 0;
604         in_use = 0;
605 
606         list_for_every_entry(&block_cache_lru, entry, struct block_cache_entry,
607                              lru_node) {
608             assert(entry->guard1 == BLOCK_CACHE_GUARD_1);
609             assert(entry->guard2 == BLOCK_CACHE_GUARD_2);
610             if (entry->dev == dev && entry->block == block) {
611                 if (print_cache_lookup) {
612                     printf("%s: block %" PRIu64
613                            ", found cache entry %zd, state %s\n",
614                            __func__, block, entry - block_cache_entries,
615                            block_cache_entry_data_state_name(entry->state));
616                 }
617                 stats_timer_start(STATS_CACHE_LOOKUP_FOUND);
618                 stats_timer_stop(STATS_CACHE_LOOKUP_FOUND);
619                 goto done;
620             }
621             /*
622              * Do not select any cache entries that have active references as
623              * they aren't ready to flush, and do not select any pinned entries.
624              * Pinned entries can only be flushed by
625              * transaction_initial_super_block_complete() and may not be flushed
626              * by another transaction. We need to keep special superblock writes
627              * pinned in the cache because otherwise we might fill the cache up
628              * with other data, flushing the special superblock, which might
629              * fail to write. In this case we would leave no room to recreate
630              * the write later, since the cache is full of data which can't be
631              * flushed until the initial superblock write is completed.
632              */
633             if (!block_cache_entry_has_refs(entry) && !entry->pinned) {
634                 score = block_cache_entry_score(entry, available);
635                 available++;
636                 if (score >= unused_entry_score) {
637                     unused_entry = entry;
638                     unused_entry_score = score;
639                 }
640                 if (print_cache_lookup_verbose) {
641                     printf("%s: block %" PRIu64
642                            ", cache entry %zd available last used for %" PRIu64
643                            "\n",
644                            __func__, block, entry - block_cache_entries,
645                            entry->block);
646                 }
647             } else {
648                 /*
649                  * Pinned entries must have a valid block number so they can be
650                  * reused.
651                  */
652                 if (entry->pinned) {
653                     assert(entry->block != DATA_BLOCK_INVALID);
654                 }
655                 if (print_cache_lookup_verbose) {
656                     printf("%s: block %" PRIu64
657                            ", cache entry %zd in use for %" PRIu64 "\n",
658                            __func__, block, entry - block_cache_entries,
659                            entry->block);
660                 }
661                 in_use++;
662             }
663         }
664         entry = unused_entry;
665 
666         if (!entry || !allocate) {
667             if (print_cache_lookup) {
668                 printf("%s: block %" PRIu64
669                        ", no available entries, %u in use, allocate %d\n",
670                        __func__, block, in_use, allocate);
671             }
672             entry = NULL;
673             goto done;
674         }
675 
676         if (print_cache_lookup) {
677             printf("%s: block %" PRIu64
678                    ", use cache entry %zd, state %s, %u available, %u in_use\n",
679                    __func__, block, entry - block_cache_entries,
680                    block_cache_entry_data_state_name(entry->state), available,
681                    in_use);
682         }
683 
684         assert(!entry->dirty_ref);
685 
686         if (block_cache_entry_data_is_dirty(entry)) {
687             stats_timer_start(STATS_CACHE_LOOKUP_CLEAN);
688             block_cache_entry_clean(entry);
689             block_cache_complete_io(entry->dev);
690             stats_timer_stop(STATS_CACHE_LOOKUP_CLEAN);
691         }
692 
693         /*
694          * The chosen entry we are flushing can't have been a special superblock
695          * write because we do not select pinned entries, however, any RPMB data
696          * write may create a new pinned superblock entry if the RPMB write
697          * failed but the write counter was incremented. In this case
698          * block_cache_entry_clean() will create a new superblock write by
699          * calling fs_unknown_super_block_state_all(). This new write may reuse
700          * the block cache entry we just chose and cleaned, resulting in our
701          * chosen entry now being pinned for a different transaction. In this
702          * case we restart the search for a cache entry and try to pick (and if
703          * needed clean) a new entry.
704          */
705 
706         if (!entry->pinned) {
707             /* We found a clean entry to use */
708             break;
709         }
710 
711         pr_warn("%s: Retrying attempt to lookup and (if needed) free a block cache entry. "
712                 "Entry block %" PRIu64 " was reused during cleaning.\n",
713                 __func__, entry->block);
714     }
715     assert(!block_cache_entry_data_is_dirty(entry));
716     assert(!entry->dirty_mac);
717     assert(!entry->dirty_tr);
718 
719     entry->dev = dev;
720     entry->block = block;
721     assert(dev->block_size <= sizeof(entry->data));
722     entry->block_size = dev->block_size;
723     entry->key = fs->key;
724     entry->state = BLOCK_ENTRY_DATA_INVALID;
725     entry->is_superblock = false;
726 
727 done:
728     stats_timer_stop(STATS_CACHE_LOOKUP);
729 
730     return entry;
731 }
732 
733 enum cache_load_result {
734     CACHE_LOAD_SUCCESS = 0,
735     CACHE_LOAD_IO_FAILED,
736     CACHE_LOAD_NO_DATA,
737     CACHE_LOAD_MAC_MISMATCH,
738 };
739 
740 /**
741  * block_cache_load_entry - Get cache entry for a specific block
742  * @entry:      Block cache entry to load.
743  * @mac:        Optional mac.
744  * @mac_size:   Size of @mac.
745  *
746  * If entry is not already loaded, attempt to load the block and optionally
747  * compare with the expected @mac, if provided.
748  *
749  * Return: &cache_load_result.CACHE_LOAD_SUCCESS if the block (matching @mac, if
750  * provided) was already in cache or was loaded successfully. Otherwise return a
751  * relevant error.
752  */
block_cache_load_entry(struct block_cache_entry * entry,const void * mac,size_t mac_size)753 static enum cache_load_result block_cache_load_entry(
754         struct block_cache_entry* entry,
755         const void* mac,
756         size_t mac_size) {
757     if (!block_cache_entry_data_is_valid(entry)) {
758         assert(!block_cache_entry_has_refs(entry));
759         if (print_block_load) {
760             printf("%s: request load block %" PRIu64 "\n", __func__,
761                    entry->block);
762         }
763         block_cache_queue_read(entry);
764         block_cache_complete_io(entry->dev);
765     }
766     if (!block_cache_entry_data_is_valid(entry)) {
767         printf("%s: failed to load block %" PRIu64 ", state: %d\n", __func__,
768                entry->block, entry->state);
769         switch (entry->state) {
770         case BLOCK_ENTRY_DATA_LOAD_FAILED:
771             return CACHE_LOAD_IO_FAILED;
772         case BLOCK_ENTRY_DATA_NOT_FOUND:
773             return CACHE_LOAD_NO_DATA;
774         default:
775             assert(false && "Unexpected entry state");
776         }
777     }
778     if (mac) {
779         if (CRYPTO_memcmp(&entry->mac, mac, mac_size)) {
780             printf("%s: block %" PRIu64 ", mac mismatch\n", __func__,
781                    entry->block);
782             return CACHE_LOAD_MAC_MISMATCH;
783         }
784     }
785     /*
786      * We eagerly encrypt a block when releasing it so that we can compute the
787      * block's mac. If we re-load the same block before flushing it from the
788      * cache, we may end up decrypting a dirty block here, so we want to allow
789      * decryption of both clean and dirty blocks.
790      */
791     if (block_cache_entry_data_is_encrypted(entry)) {
792         block_cache_entry_decrypt(entry);
793     }
794     assert(block_cache_entry_data_is_decrypted(entry));
795 
796     return CACHE_LOAD_SUCCESS;
797 }
798 
799 /**
800  * block_cache_get - Get cache entry for a specific block and add a reference
801  * @fs:         File system state object.
802  * @dev:        Block device object.
803  * @block:      Block number.
804  * @load:       If true, load data if needed.
805  * @mac:        Optional mac. Unused if @load is false.
806  * @mac_size:   Size of @mac.
807  * @ref:        Pointer to store reference in.
808  * @load_result: Optional output pointer to store load result in. May be %NULL.
809  *               If not %NULL, @load must be %true.
810  *
811  * Find cache entry, optionally load then add a reference to it.
812  *
813  * Return: cache entry matching dev in @tr and @block. Can return NULL if @load
814  * is true and entry could not be loaded or does not match provided mac.
815  */
block_cache_get(struct fs * fs,struct block_device * dev,data_block_t block,bool load,const void * mac,size_t mac_size,struct obj_ref * ref,enum cache_load_result * load_result)816 static struct block_cache_entry* block_cache_get(
817         struct fs* fs,
818         struct block_device* dev,
819         data_block_t block,
820         bool load,
821         const void* mac,
822         size_t mac_size,
823         struct obj_ref* ref,
824         enum cache_load_result* load_result) {
825     enum cache_load_result res;
826     struct block_cache_entry* entry;
827 
828     assert(dev);
829     assert(!load_result || load);
830 
831     if (block >= dev->block_count) {
832         printf("%s: bad block num %" PRIu64 " >= %" PRIu64 "\n", __func__,
833                block, dev->block_count);
834         if (load_result) {
835             *load_result = CACHE_LOAD_NO_DATA;
836         }
837         return NULL;
838     }
839     assert(block < dev->block_count);
840 
841     entry = block_cache_lookup(fs, dev, block, true);
842     assert(entry);
843 
844     if (load) {
845         res = block_cache_load_entry(entry, mac, mac_size);
846         if (res == CACHE_LOAD_MAC_MISMATCH) {
847             error_report_block_mac_mismatch(fs->name, TRUSTY_BLOCKTYPE_UNKNOWN);
848         }
849         if (load_result) {
850             *load_result = res;
851         }
852         if (res != CACHE_LOAD_SUCCESS) {
853             return NULL;
854         }
855     }
856 
857     assert(!entry->dirty_ref);
858     obj_add_ref_allow_unreferenced_obj(&entry->obj, ref);
859     if (print_block_ops) {
860         printf("%s: block %" PRIu64 ", cache entry %zd, state %s\n", __func__,
861                block, entry - block_cache_entries,
862                block_cache_entry_data_state_name(entry->state));
863     }
864     return entry;
865 }
866 
867 /**
868  * block_cache_get_data - Call block_cache_get and return data pointer
869  * @fs:         File system state object.
870  * @dev:        Block device object.
871  * @block:      Block number.
872  * @load:       If true, load data if needed.
873  * @mac:        Optional mac. Unused if @load is false.
874  * @mac_size:   Size of @mac.
875  * @ref:        Pointer to store reference in.
876  * @load_result: Optional output pointer to store load result in. May be %NULL.
877  *               Only updated if @load is %true.
878  *
879  * Return: block data pointer, or NULL if block_cache_get returned NULL.
880  */
block_cache_get_data(struct fs * fs,struct block_device * dev,data_block_t block,bool load,const void * mac,size_t mac_size,struct obj_ref * ref,enum cache_load_result * load_result)881 static void* block_cache_get_data(struct fs* fs,
882                                   struct block_device* dev,
883                                   data_block_t block,
884                                   bool load,
885                                   const void* mac,
886                                   size_t mac_size,
887                                   struct obj_ref* ref,
888                                   enum cache_load_result* load_result) {
889     struct block_cache_entry* entry;
890     entry = block_cache_get(fs, dev, block, load, mac, mac_size, ref,
891                             load_result);
892     if (!entry) {
893         return NULL;
894     }
895     return entry->data;
896 }
897 
898 /**
899  * data_to_block_cache_entry - Get cache entry from data pointer
900  * @data:       Pointer to data member of cache entry.
901  *
902  * Return: cache entry matching @data.
903  */
data_to_block_cache_entry(const void * data)904 static struct block_cache_entry* data_to_block_cache_entry(const void* data) {
905     struct block_cache_entry* entry;
906 
907     assert(data);
908     entry = containerof(data, struct block_cache_entry, data);
909     assert(entry >= block_cache_entries);
910     assert(entry < &block_cache_entries[BLOCK_CACHE_SIZE]);
911     assert(((uintptr_t)entry - (uintptr_t)entry) % sizeof(entry[0]) == 0);
912     return entry;
913 }
914 
915 /**
916  * data_to_block_cache_entry_or_null - Get cache entry or NULL from data pointer
917  * @data:       Pointer to data member of cache entry or NULL.
918  *
919  * Return: cache entry matching @data, or NULL is data is NULL.
920  */
data_to_block_cache_entry_or_null(const void * data)921 static struct block_cache_entry* data_to_block_cache_entry_or_null(
922         const void* data) {
923     return data ? data_to_block_cache_entry(data) : NULL;
924 }
925 
926 /**
927  * block_cache_entry_destroy - Callback function for obj_del_ref
928  * @obj:        Pointer to obj member of cache entry.
929  *
930  * Callback called by reference tracking code when the last reference to a
931  * cache entry has been released. Since this is a cache, and not a normal heap
932  * allocated object, the cache entry is not destroyed here. It is instead left
933  * in a state where block_cache_lookup can reuse it.
934  */
block_cache_entry_destroy(struct obj * obj)935 static void block_cache_entry_destroy(struct obj* obj) {
936     struct block_cache_entry* entry =
937             containerof(obj, struct block_cache_entry, obj);
938 
939     list_delete(&entry->lru_node);
940     list_add_head(&block_cache_lru, &entry->lru_node);
941 
942     if (entry->dirty_mac) {
943         block_cache_entry_encrypt(entry);
944     }
945 }
946 
947 /**
948  * block_cache_init - Allocate and initialize block cache
949  */
block_cache_init(void)950 void block_cache_init(void) {
951     int i;
952     struct obj_ref ref;
953 
954     assert(!block_cache_init_called);
955 
956     block_cache_init_called = true;
957 
958     full_assert(memset(block_cache_entries, 1, sizeof(block_cache_entries)));
959 
960     for (i = 0; i < BLOCK_CACHE_SIZE; i++) {
961         block_cache_entries[i].guard1 = BLOCK_CACHE_GUARD_1;
962         block_cache_entries[i].guard2 = BLOCK_CACHE_GUARD_2;
963         block_cache_entries[i].dev = NULL;
964         block_cache_entries[i].block = DATA_BLOCK_INVALID;
965         block_cache_entries[i].state = BLOCK_ENTRY_DATA_INVALID;
966         block_cache_entries[i].dirty_ref = false;
967         block_cache_entries[i].dirty_mac = false;
968         block_cache_entries[i].pinned = false;
969         block_cache_entries[i].is_superblock = false;
970         block_cache_entries[i].dirty_tr = NULL;
971         block_cache_entries[i].io_op = BLOCK_CACHE_IO_OP_NONE;
972         obj_init(&block_cache_entries[i].obj, &ref);
973         list_clear_node(&block_cache_entries[i].io_op_node);
974         list_add_head(&block_cache_lru, &block_cache_entries[i].lru_node);
975         obj_del_ref(&block_cache_entries[i].obj, &ref,
976                     block_cache_entry_destroy);
977     }
978 }
979 
980 /**
981  * block_cache_dev_destroy - Discard all blocks associated with device
982  * @dev:        Block device to remove
983  */
block_cache_dev_destroy(struct block_device * dev)984 void block_cache_dev_destroy(struct block_device* dev) {
985     int i;
986     for (i = 0; i < BLOCK_CACHE_SIZE; i++) {
987         if (block_cache_entries[i].dev == dev) {
988             block_cache_entry_discard(&block_cache_entries[i]);
989         }
990     }
991 }
992 
993 /**
994  * block_cache_clean_transaction - Clean blocks modified by transaction
995  * @tr:         Transaction
996  */
block_cache_clean_transaction(struct transaction * tr)997 void block_cache_clean_transaction(struct transaction* tr) {
998     struct block_cache_entry* entry;
999     struct block_device* dev = NULL;
1000 
1001     stats_timer_start(STATS_CACHE_CLEAN_TRANSACTION);
1002 
1003     list_for_every_entry(&block_cache_lru, entry, struct block_cache_entry,
1004                          lru_node) {
1005         assert(entry->guard1 == BLOCK_CACHE_GUARD_1);
1006         assert(entry->guard2 == BLOCK_CACHE_GUARD_2);
1007         if (entry->dirty_tr != tr) {
1008             continue;
1009         }
1010 
1011         assert(block_cache_entry_data_is_dirty(entry));
1012 
1013         assert(!entry->dirty_ref);
1014 
1015         if (entry->dirty_tmp) {
1016             continue;
1017         }
1018 
1019         if (!dev) {
1020             dev = entry->dev;
1021             assert(dev == tr->fs->dev || dev == tr->fs->super_dev);
1022         }
1023 
1024         assert(entry->dev == dev);
1025 
1026         if (print_clean_transaction) {
1027 #if TLOG_LVL >= TLOG_LVL_DEBUG
1028             printf("%s: tr %p, block %" PRIu64 "\n", __func__, tr,
1029                    entry->block);
1030 #else
1031             printf("%s: transaction block %" PRIu64 "\n", __func__,
1032                    entry->block);
1033 #endif
1034         }
1035 
1036         assert(!block_cache_entry_has_refs(entry));
1037         stats_timer_start(STATS_CACHE_CLEAN_TRANSACTION_ENT_CLN);
1038         block_cache_entry_clean(entry);
1039         stats_timer_stop(STATS_CACHE_CLEAN_TRANSACTION_ENT_CLN);
1040         assert(entry->dirty_tr != tr);
1041         if (!tr->failed) {
1042             /*
1043              * If the write failed we may have reused this block cache entry for
1044              * a super block write and it therefore might not be clean.
1045              */
1046             assert(!block_cache_entry_data_is_dirty(entry));
1047             assert(!entry->dirty_tr);
1048         }
1049     }
1050 
1051     if (dev) {
1052         stats_timer_start(STATS_CACHE_CLEAN_TRANSACTION_WAIT_IO);
1053         block_cache_complete_io(dev);
1054         stats_timer_stop(STATS_CACHE_CLEAN_TRANSACTION_WAIT_IO);
1055     }
1056     stats_timer_stop(STATS_CACHE_CLEAN_TRANSACTION);
1057 }
1058 
1059 /**
1060  * block_cache_discard_transaction - Discard blocks modified by transaction
1061  * @tr:             Transaction
1062  * @discard_all:    If true, discard all dirty blocks modified by @tr. If false,
1063  *                  discard tmp dirty blocks modified by @tr.
1064  *
1065  * If @discard_all is %false, only tmp blocks should be dirty. @discard_all
1066  * therefore only affects errors checks.
1067  */
block_cache_discard_transaction(struct transaction * tr,bool discard_all)1068 void block_cache_discard_transaction(struct transaction* tr, bool discard_all) {
1069     struct block_cache_entry* entry;
1070     struct block_device* dev = NULL;
1071 
1072     list_for_every_entry(&block_cache_lru, entry, struct block_cache_entry,
1073                          lru_node) {
1074         assert(entry->guard1 == BLOCK_CACHE_GUARD_1);
1075         assert(entry->guard2 == BLOCK_CACHE_GUARD_2);
1076         if (entry->dirty_tr != tr) {
1077             continue;
1078         }
1079 
1080         if (entry->dirty_tmp) {
1081             /* tmp blocks should never be on the superblock device */
1082             assert(entry->dev == tr->fs->dev);
1083         } else {
1084             /*
1085              * An transaction should never have dirty non-tmp blocks both
1086              * devices at the same time.
1087              */
1088             if (!dev) {
1089                 dev = entry->dev;
1090                 assert(dev == tr->fs->dev || dev == tr->fs->super_dev);
1091             }
1092             assert(entry->dev == dev);
1093         }
1094         assert(block_cache_entry_data_is_dirty(entry));
1095 
1096         if (print_clean_transaction) {
1097 #if TLOG_LVL >= TLOG_LVL_DEBUG
1098             printf("%s: tr %p, block %" PRIu64 ", tmp %d\n", __func__, tr,
1099                    entry->block, entry->dirty_tmp);
1100 #else
1101             printf("%s: transaction block %" PRIu64 ", tmp %d\n", __func__,
1102                    entry->block, entry->dirty_tmp);
1103 #endif
1104         }
1105 
1106         if (block_cache_entry_has_refs(entry)) {
1107 #if TLOG_LVL >= TLOG_LVL_DEBUG
1108             pr_warn("tr %p, block %" PRIu64 " has ref (dirty_ref %d)\n", tr,
1109                     entry->block, entry->dirty_ref);
1110 #else
1111             pr_warn("transaction block %" PRIu64 " has ref (dirty_ref %d)\n",
1112                     entry->block, entry->dirty_ref);
1113 #endif
1114         } else {
1115             assert(!entry->dirty_ref);
1116         }
1117         if (!discard_all) {
1118             assert(!block_cache_entry_has_refs(entry));
1119             assert(entry->dirty_tmp);
1120         }
1121         entry->dirty_tr = NULL;
1122         entry->state = BLOCK_ENTRY_DATA_INVALID;
1123         assert(!entry->dirty_tr);
1124     }
1125 }
1126 
1127 /**
1128  * block_get_no_read - Get block data without read
1129  * @tr:         Transaction to get device from
1130  * @block:      Block number
1131  * @ref:        Pointer to store reference in.
1132  *
1133  * Return: Const block data pointer.
1134  *
1135  * This is only useful if followed by block_dirty.
1136  */
block_get_no_read(struct transaction * tr,data_block_t block,struct obj_ref * ref)1137 const void* block_get_no_read(struct transaction* tr,
1138                               data_block_t block,
1139                               struct obj_ref* ref) {
1140     assert(tr);
1141     assert(tr->fs);
1142 
1143     return block_cache_get_data(tr->fs, tr->fs->dev, block, false, NULL, 0, ref,
1144                                 NULL);
1145 }
1146 
1147 /**
1148  * block_get_super - Get super block data without checking mac
1149  * @fs:         File system state object.
1150  * @block:      Block number.
1151  * @ref:        Pointer to store reference in.
1152  *
1153  * Return: Const block data pointer.
1154  */
block_get_super(struct fs * fs,data_block_t block,struct obj_ref * ref)1155 const void* block_get_super(struct fs* fs,
1156                             data_block_t block,
1157                             struct obj_ref* ref) {
1158     assert(fs);
1159     assert(fs->super_dev);
1160     assert((fs->allow_tampering && !fs->super_dev->tamper_detecting) ||
1161            (!fs->allow_tampering && fs->super_dev->tamper_detecting));
1162 
1163     return block_cache_get_data(fs, fs->super_dev, block, true, NULL, 0, ref,
1164                                 NULL);
1165 }
1166 
1167 /**
1168  * block_get_super_with_mac - Get super block data and check the mac
1169  * @fs:         File system state object.
1170  * @block_mac:  Block number and mac.
1171  * @ref:        Pointer to store reference in.
1172  *
1173  * Return: Const block data pointer.
1174  */
block_get_super_with_mac(struct fs * fs,const struct block_mac * block_mac,struct obj_ref * ref)1175 const void* block_get_super_with_mac(struct fs* fs,
1176                                      const struct block_mac* block_mac,
1177                                      struct obj_ref* ref) {
1178     assert(fs);
1179     assert(fs->super_dev);
1180 
1181     return block_cache_get_data(
1182             fs, fs->super_dev, block_mac_to_block_fs(fs, block_mac), true,
1183             block_mac_to_mac_fs(fs, block_mac), fs->mac_size, ref, NULL);
1184 }
1185 
1186 /**
1187  * block_get_no_tr_fail - Get block data
1188  * @tr:         Transaction to get device from
1189  * @block_mac:  Block number and mac
1190  * @iv:         Initial vector used to decrypt block, or NULL. If NULL, the
1191  *              start of the loaded block data is used as the iv.
1192  *              Only NULL is currently supported.
1193  * @ref:        Pointer to store reference in.
1194  *
1195  * Return: Const block data pointer, or NULL if mac of loaded data does not mac
1196  * in @block_mac or a read error was reported by the block device when loading
1197  * the data.
1198  */
block_get_no_tr_fail(struct transaction * tr,const struct block_mac * block_mac,const struct iv * iv,struct obj_ref * ref)1199 const void* block_get_no_tr_fail(struct transaction* tr,
1200                                  const struct block_mac* block_mac,
1201                                  const struct iv* iv,
1202                                  struct obj_ref* ref) {
1203     data_block_t block;
1204     void* data;
1205     enum cache_load_result load_result = CACHE_LOAD_NO_DATA;
1206 
1207     assert(tr);
1208     assert(tr->fs);
1209     assert(block_mac);
1210 
1211     block = block_mac_to_block(tr, block_mac);
1212     assert(block);
1213 
1214     data = block_cache_get_data(tr->fs, tr->fs->dev, block, true,
1215                                 block_mac_to_mac(tr, block_mac),
1216                                 tr->fs->mac_size, ref, &load_result);
1217     if (load_result == CACHE_LOAD_MAC_MISMATCH ||
1218         load_result == CACHE_LOAD_NO_DATA) {
1219         tr->invalid_block_found = true;
1220     }
1221     return data;
1222 }
1223 
1224 /**
1225  * block_get - Get block data
1226  * @tr:         Transaction to get device from
1227  * @block_mac:  Block number and mac
1228  * @iv:         Initial vector used to decrypt block, or NULL. If NULL, the
1229  *              start of the loaded block data is used as the iv.
1230  *              Only NULL is currently supported.
1231  * @ref:        Pointer to store reference in.
1232  *
1233  * Return: Const block data pointer, or NULL if the transaction has failed. A
1234  * transaction failure is triggered if mac of loaded data does not mac in
1235  * @block_mac or a read error was reported by the block device when loading the
1236  * data.
1237  */
block_get(struct transaction * tr,const struct block_mac * block_mac,const struct iv * iv,struct obj_ref * ref)1238 const void* block_get(struct transaction* tr,
1239                       const struct block_mac* block_mac,
1240                       const struct iv* iv,
1241                       struct obj_ref* ref) {
1242     const void* data;
1243 
1244     assert(tr);
1245 
1246     if (tr->failed) {
1247         pr_warn("transaction already failed\n");
1248         return NULL;
1249     }
1250 
1251     data = block_get_no_tr_fail(tr, block_mac, iv, ref);
1252     if (!data && !tr->failed) {
1253         pr_warn("transaction failed\n");
1254         transaction_fail(tr);
1255         if (tr->invalid_block_found) {
1256             fs_mark_scan_required(tr->fs);
1257         }
1258     }
1259     return data;
1260 }
1261 
1262 /**
1263  * block_dirty - Mark cache entry dirty and return non-const block data pointer.
1264  * @tr:         Transaction
1265  * @data:       Const block data pointer
1266  * @is_tmp:     If true, data is only needed until @tr is commited.
1267  *
1268  * Return: Non-const block data pointer.
1269  */
block_dirty(struct transaction * tr,const void * data,bool is_tmp)1270 void* block_dirty(struct transaction* tr, const void* data, bool is_tmp) {
1271     struct block_cache_entry* entry = data_to_block_cache_entry(data);
1272 
1273     assert(tr);
1274     assert(list_in_list(&tr->node)); /* transaction must be active */
1275     assert(!entry->dirty_tr || entry->dirty_tr == tr);
1276     assert(!entry->dirty_ref);
1277     assert(fs_is_writable(tr->fs));
1278 
1279     if (block_cache_entry_data_is_encrypted(entry)) {
1280         if (print_block_ops) {
1281             printf("%s: skip decrypt block %" PRIu64 "\n", __func__,
1282                    entry->block);
1283         }
1284     } else if (entry->state != BLOCK_ENTRY_DATA_CLEAN_DECRYPTED) {
1285         if (print_block_ops) {
1286             printf("%s: Dirtying block %" PRIu64
1287                    " that was not loaded. Previous state: %s\n",
1288                    __func__, entry->block,
1289                    block_cache_entry_data_state_name(entry->state));
1290         }
1291     }
1292     assert(block_cache_entry_has_one_ref(entry));
1293     entry->state = BLOCK_ENTRY_DATA_DIRTY_DECRYPTED;
1294     entry->dirty_ref = true;
1295     entry->dirty_tmp = is_tmp;
1296     entry->dirty_tr = tr;
1297     return (void*)data;
1298 }
1299 
1300 /**
1301  * block_is_clean - Check if block is clean
1302  * @dev:        Block device
1303  * @block:      Block number
1304  *
1305  * Return: %true if there is no matching dirty cache entry, %false if the cache
1306  * contains a dirty block matching @dev and @block.
1307  */
block_is_clean(struct block_device * dev,data_block_t block)1308 bool block_is_clean(struct block_device* dev, data_block_t block) {
1309     struct block_cache_entry* entry;
1310 
1311     entry = block_cache_lookup(NULL, dev, block, false);
1312     return !entry || !block_cache_entry_data_is_dirty(entry);
1313 }
1314 
1315 /**
1316  * block_discard_dirty - Discard dirty cache data.
1317  * @data:       Block data pointer
1318  */
block_discard_dirty(const void * data)1319 void block_discard_dirty(const void* data) {
1320     struct block_cache_entry* entry = data_to_block_cache_entry(data);
1321 
1322     if (block_cache_entry_data_is_dirty(entry)) {
1323         assert(entry->dev);
1324         block_cache_entry_discard_dirty(entry);
1325     }
1326 }
1327 
1328 /**
1329  * block_discard_dirty_by_block - Discard cache entry if dirty.
1330  * @dev:        Block device
1331  * @block:      Block number
1332  */
block_discard_dirty_by_block(struct block_device * dev,data_block_t block)1333 void block_discard_dirty_by_block(struct block_device* dev,
1334                                   data_block_t block) {
1335     struct block_cache_entry* entry;
1336 
1337     entry = block_cache_lookup(NULL, dev, block, false);
1338     if (!entry) {
1339         return;
1340     }
1341     assert(!entry->dirty_ref);
1342     assert(!block_cache_entry_has_refs(entry));
1343     if (!block_cache_entry_data_is_dirty(entry)) {
1344         return;
1345     }
1346     block_discard_dirty(entry->data);
1347 }
1348 
1349 /**
1350  * block_put_dirty - Release reference to dirty block.
1351  * @tr:             Transaction
1352  * @data:           Block data pointer
1353  * @data_ref:       Reference pointer to release
1354  * @block_mac:      block_mac pointer to update after encryting block
1355  * @block_mac_ref:  Block data pointer that @block_mac belongs to, or NULL if
1356  *                  @block_mac points to a memory only location.
1357  *
1358  * Helper function to for block_put_dirty, block_put_dirty_no_mac and
1359  * block_put_dirty_discard.
1360  */
block_put_dirty_etc(struct transaction * tr,void * data,struct obj_ref * data_ref,struct block_mac * block_mac,void * block_mac_ref)1361 static void block_put_dirty_etc(struct transaction* tr,
1362                                 void* data,
1363                                 struct obj_ref* data_ref,
1364                                 struct block_mac* block_mac,
1365                                 void* block_mac_ref) {
1366     int ret;
1367     struct block_cache_entry* entry = data_to_block_cache_entry(data);
1368     struct block_cache_entry* parent =
1369             data_to_block_cache_entry_or_null(block_mac_ref);
1370     struct iv* iv = (void*)entry->data; /* TODO: support external iv */
1371 
1372     if (tr) {
1373         assert(block_mac);
1374         assert(entry->state == BLOCK_ENTRY_DATA_DIRTY_DECRYPTED);
1375         assert(entry->dirty_ref);
1376     } else {
1377         assert(!block_mac);
1378     }
1379     assert(entry->guard1 == BLOCK_CACHE_GUARD_1);
1380     assert(entry->guard2 == BLOCK_CACHE_GUARD_2);
1381 
1382     entry->dirty_ref = false;
1383     if (block_cache_entry_data_is_dirty(entry)) {
1384         entry->dirty_mac = true;
1385         ret = generate_iv(iv);
1386         assert(!ret);
1387     } else {
1388         pr_warn("block %" PRIu64 ", not dirty\n", entry->block);
1389         assert(entry->dirty_tr == NULL);
1390         assert(!tr);
1391     }
1392 
1393     block_put(data, data_ref);
1394     /* TODO: fix clients to support lazy write */
1395     assert(block_cache_entry_data_is_encrypted(entry) || !tr);
1396     assert(!entry->dirty_mac);
1397     if (block_mac) {
1398         assert(block_mac_to_block(tr, block_mac) == entry->block);
1399         block_mac_set_mac(tr, block_mac, &entry->mac);
1400     }
1401 #if TLOG_LVL >= TLOG_LVL_DEBUG
1402     if (print_mac_update) {
1403         printf("%s: block %" PRIu64 ", update parent mac, %p, block %" PRIu64
1404                "\n",
1405                __func__, entry->block, block_mac, parent ? parent->block : 0);
1406     }
1407 #endif
1408 }
1409 
1410 /**
1411  * block_put_dirty - Release reference to dirty block.
1412  * @tr:             Transaction
1413  * @data:           Block data pointer
1414  * @data_ref:       Reference pointer to release
1415  * @block_mac:      block_mac pointer to update after encryting block
1416  * @block_mac_ref:  Block data pointer that @block_mac belongs to, or NULL if
1417  *                  @block_mac points to a memory only location.
1418  */
block_put_dirty(struct transaction * tr,void * data,struct obj_ref * data_ref,struct block_mac * block_mac,void * block_mac_ref)1419 void block_put_dirty(struct transaction* tr,
1420                      void* data,
1421                      struct obj_ref* data_ref,
1422                      struct block_mac* block_mac,
1423                      void* block_mac_ref) {
1424     assert(tr);
1425     assert(block_mac);
1426     block_put_dirty_etc(tr, data, data_ref, block_mac, block_mac_ref);
1427 }
1428 
1429 /**
1430  * block_put_dirty_no_mac - Release reference to dirty super block.
1431  * @data:           Block data pointer
1432  * @data_ref:       Reference pointer to release
1433  * @allow_tampering: %true if this file system does not require tamper-proof
1434  *                   super block storage, %false if tamper detection must be
1435  *                   required.
1436  *
1437  * Similar to block_put_dirty except no transaction or block_mac is needed.
1438  */
block_put_dirty_no_mac(void * data,struct obj_ref * data_ref,bool allow_tampering)1439 void block_put_dirty_no_mac(void* data,
1440                             struct obj_ref* data_ref,
1441                             bool allow_tampering) {
1442     struct block_cache_entry* entry = data_to_block_cache_entry(data);
1443 
1444     assert(entry->dev);
1445     assert((allow_tampering && !entry->dev->tamper_detecting) ||
1446            (!allow_tampering && entry->dev->tamper_detecting));
1447     block_put_dirty_etc(NULL, data, data_ref, NULL, NULL);
1448 }
1449 
1450 /**
1451  * block_put_dirty_discard - Release reference to dirty block.
1452  * @data:           Block data pointer
1453  * @data_ref:       Reference pointer to release
1454  *
1455  * Similar to block_put_dirty except data can be discarded.
1456  */
block_put_dirty_discard(void * data,struct obj_ref * data_ref)1457 void block_put_dirty_discard(void* data, struct obj_ref* data_ref) {
1458     block_put_dirty_etc(NULL, data, data_ref, NULL, NULL);
1459     block_discard_dirty(data);
1460 }
1461 
1462 /**
1463  * block_get_write_no_read - Get block data without read for write
1464  * @tr:         Transaction
1465  * @block:      Block number
1466  * @is_tmp:     If true, data is only needed until @tr is commited.
1467  * @ref:        Pointer to store reference in.
1468  *
1469  * Same as block_get_no_read followed by block_dirty.
1470  *
1471  * Return: Block data pointer.
1472  */
block_get_write_no_read(struct transaction * tr,data_block_t block,bool is_tmp,struct obj_ref * ref)1473 void* block_get_write_no_read(struct transaction* tr,
1474                               data_block_t block,
1475                               bool is_tmp,
1476                               struct obj_ref* ref) {
1477     const void* data_ro = block_get_no_read(tr, block, ref);
1478     return block_dirty(tr, data_ro, is_tmp);
1479 }
1480 
1481 /**
1482  * block_get_write - Get block data for write
1483  * @tr:         Transaction
1484  * @block_mac:  Block number and mac
1485  * @iv:         Initial vector used to decrypt block, or NULL. If NULL, the
1486  *              start of the loaded block data is used as the iv.
1487  *              Only NULL is currently supported.
1488  * @is_tmp:     If true, data is only needed until @tr is commited.
1489  * @ref:        Pointer to store reference in.
1490  *
1491  * Same as block_get followed by block_dirty.
1492  *
1493  * Return: Block data pointer.
1494  */
block_get_write(struct transaction * tr,const struct block_mac * block_mac,const struct iv * iv,bool is_tmp,struct obj_ref * ref)1495 void* block_get_write(struct transaction* tr,
1496                       const struct block_mac* block_mac,
1497                       const struct iv* iv,
1498                       bool is_tmp,
1499                       struct obj_ref* ref) {
1500     const void* data_ro = block_get(tr, block_mac, iv, ref);
1501     if (!data_ro) {
1502         return NULL;
1503     }
1504     return block_dirty(tr, data_ro, is_tmp);
1505 }
1506 
1507 /**
1508  * block_get_cleared - Get block cleared data for write
1509  * @tr:         Transaction
1510  * @block:      Block number
1511  * @is_tmp:     If true, data is only needed until @tr is commited.
1512  * @ref:        Pointer to store reference in.
1513  *
1514  * Return: Block data pointer.
1515  */
block_get_cleared(struct transaction * tr,data_block_t block,bool is_tmp,struct obj_ref * ref)1516 void* block_get_cleared(struct transaction* tr,
1517                         data_block_t block,
1518                         bool is_tmp,
1519                         struct obj_ref* ref) {
1520     void* data = block_get_write_no_read(tr, block, is_tmp, ref);
1521     memset(data, 0, MAX_BLOCK_SIZE);
1522     return data;
1523 }
1524 
1525 /**
1526  * block_get_cleared_super - Get block with cleared data for write on super_dev
1527  * @tr:         Transaction
1528  * @block:      Block number
1529  * @ref:        Pointer to store reference in.
1530  * @pinned:     Pin this block in the cache until it is successfully written
1531  *
1532  * Return: Block data pointer.
1533  */
block_get_cleared_super(struct transaction * tr,data_block_t block,struct obj_ref * ref,bool pinned)1534 void* block_get_cleared_super(struct transaction* tr,
1535                               data_block_t block,
1536                               struct obj_ref* ref,
1537                               bool pinned) {
1538     void* data_rw;
1539     const void* data_ro = block_cache_get_data(tr->fs, tr->fs->super_dev, block,
1540                                                false, NULL, 0, ref, NULL);
1541 
1542     /*
1543      * We should never end up in a situation where there is a dirty copy of a
1544      * super block in the cache while we are trying to rewrite that super block.
1545      * If a super block entry was created via write_current_super_block(), it
1546      * must be flushed before the necessary data writes go through to write new
1547      * root nodes. If we are trying to commit an empty transaction (i.e. no data
1548      * blocks changed), we skip the super block update in
1549      * transaction_complete(). The only other way to write a new super block,
1550      * write_current_super_block(), will be a no-op if there is already a
1551      * pending super block rewrite.
1552      */
1553     assert(data_ro);
1554     struct block_cache_entry* entry = data_to_block_cache_entry(data_ro);
1555     assert(!block_cache_entry_data_is_dirty(entry));
1556     entry->pinned = pinned;
1557     entry->is_superblock = true;
1558 
1559     data_rw = block_dirty(tr, data_ro, false);
1560     assert(tr->fs->super_dev->block_size <= MAX_BLOCK_SIZE);
1561     memset(data_rw, 0, tr->fs->super_dev->block_size);
1562     return data_rw;
1563 }
1564 
1565 /**
1566  * block_get_copy - Get block for write with data copied from another block.
1567  * @tr:         Transaction
1568  * @data:       Block data pointer
1569  * @block:      New block number
1570  * @is_tmp:     If true, data is only needed until @tr is commited.
1571  * @new_ref:    Pointer to store reference to new block in.
1572  *
1573  * Return: Block data pointer.
1574  */
block_get_copy(struct transaction * tr,const void * data,data_block_t block,bool is_tmp,struct obj_ref * new_ref)1575 void* block_get_copy(struct transaction* tr,
1576                      const void* data,
1577                      data_block_t block,
1578                      bool is_tmp,
1579                      struct obj_ref* new_ref) {
1580     void* dst_data;
1581     struct block_cache_entry* src_entry = data_to_block_cache_entry(data);
1582 
1583     assert(block);
1584     assert(block < tr->fs->dev->block_count);
1585 
1586     dst_data = block_get_write_no_read(tr, block, is_tmp, new_ref);
1587     memcpy(dst_data, data, src_entry->block_size);
1588     return dst_data;
1589 }
1590 
1591 /**
1592  * block_move - Get block for write and move to new location
1593  * @tr:         Transaction
1594  * @data:       Block data pointer
1595  * @block:      New block number
1596  * @is_tmp:     If true, data is only needed until @tr is commited.
1597  *
1598  * Change block number of cache entry mark new block dirty. Useful for
1599  * copy-on-write.
1600  *
1601  * Return: Non-const block data pointer.
1602  */
block_move(struct transaction * tr,const void * data,data_block_t block,bool is_tmp)1603 void* block_move(struct transaction* tr,
1604                  const void* data,
1605                  data_block_t block,
1606                  bool is_tmp) {
1607     struct block_cache_entry* dest_entry;
1608     struct block_cache_entry* entry = data_to_block_cache_entry(data);
1609 
1610     assert(block_cache_entry_has_one_ref(entry));
1611     assert(!block_cache_entry_data_is_dirty(entry));
1612     assert(entry->dev == tr->fs->dev);
1613 
1614     if (print_block_move) {
1615         printf("%s: move cache entry %zd, from block %" PRIu64 " to %" PRIu64
1616                "\n",
1617                __func__, entry - block_cache_entries, entry->block, block);
1618     }
1619 
1620     dest_entry = block_cache_lookup(NULL, tr->fs->dev, block, false);
1621     if (dest_entry) {
1622         assert(!block_cache_entry_has_refs(dest_entry));
1623         assert(!dest_entry->dirty_ref);
1624         assert(!dest_entry->dirty_tr || dest_entry->dirty_tr == tr);
1625         assert(!list_in_list(&dest_entry->io_op_node));
1626         assert(dest_entry->block == block);
1627         if (print_block_move) {
1628             printf("%s: clear old cache entry for block %" PRIu64 ", %zd\n",
1629                    __func__, block, dest_entry - block_cache_entries);
1630         }
1631         /* TODO: Use block_cache_entry_discard instead? */
1632         block_cache_entry_discard_dirty(dest_entry);
1633     }
1634 
1635     entry->block = block;
1636     return block_dirty(tr, data, is_tmp);
1637 }
1638 
1639 /**
1640  * block_put - Release reference to block.
1641  * @data:           Block data pointer
1642  * @data_ref:       Reference pointer to release
1643  */
block_put(const void * data,struct obj_ref * ref)1644 void block_put(const void* data, struct obj_ref* ref) {
1645     struct block_cache_entry* entry = data_to_block_cache_entry(data);
1646 
1647     if (print_block_ops) {
1648         printf("%s: block %" PRIu64 ", cache entry %zd, state %s\n", __func__,
1649                entry->block, entry - block_cache_entries,
1650                block_cache_entry_data_state_name(entry->state));
1651     }
1652 
1653     assert(!entry->dirty_ref);
1654 
1655     obj_del_ref(&entry->obj, ref, block_cache_entry_destroy);
1656 }
1657 
1658 /**
1659  * block_probe - Verify that the given block is loadable and its mac is correct
1660  * @fs:          Filesystem containing the block to probe
1661  * @block_mac:   Block to probe
1662  * @allow_invalid: If %true, an invalid (i.e. zero) @block_mac will not be
1663  *                 probed and this function will return true
1664  *
1665  * Return: %false if the block is not valid or does not match the expected mac.
1666  * Returns %true if the block was readable, valid and matched the expected mac.
1667  * If @allow_invalid is %true, also return %true if @block_mac is invalid. Also
1668  * returns %true if an I/O error was encountered which does not positively
1669  * confirm a corrupted block.
1670  */
block_probe(struct fs * fs,const struct block_mac * block_mac,bool allow_invalid)1671 bool block_probe(struct fs* fs,
1672                  const struct block_mac* block_mac,
1673                  bool allow_invalid) {
1674     struct transaction probe_tr;
1675     struct obj_ref probe_ref = OBJ_REF_INITIAL_VALUE(probe_ref);
1676     const void* probe_block;
1677     /*
1678      * Assume the block is valid unless we get positive confirmation of an
1679      * invalid block.
1680      */
1681     bool valid = true;
1682 
1683     transaction_init(&probe_tr, fs, true);
1684     if (block_mac_valid(&probe_tr, block_mac)) {
1685         probe_block =
1686                 block_get_no_tr_fail(&probe_tr, block_mac, NULL, &probe_ref);
1687         if (probe_block) {
1688             block_put(probe_block, &probe_ref);
1689         } else if (probe_tr.invalid_block_found) {
1690             valid = false;
1691         }
1692     } else if (allow_invalid) {
1693         valid = true;
1694     }
1695     transaction_fail(&probe_tr);
1696     transaction_free(&probe_tr);
1697 
1698     return valid;
1699 }
1700 
1701 /**
1702  * data_to_block_num - Get block number from block data pointer
1703  * @data:       Block data pointer
1704  *
1705  * Only used for debug code.
1706  *
1707  * Return: block number.
1708  */
data_to_block_num(const void * data)1709 data_block_t data_to_block_num(const void* data) {
1710     struct block_cache_entry* entry = data_to_block_cache_entry(data);
1711 
1712     return entry->block;
1713 }
1714 
1715 /**
1716  * block_cache_debug_get_ref_block_count - Get number of blocks that have
1717  * references
1718  *
1719  * Only used for debug code.
1720  *
1721  * Return: number of blocks in cache that have references.
1722  */
block_cache_debug_get_ref_block_count(void)1723 unsigned int block_cache_debug_get_ref_block_count(void) {
1724     unsigned int count = 0;
1725     struct block_cache_entry* entry;
1726 
1727     list_for_every_entry(&block_cache_lru, entry, struct block_cache_entry,
1728                          lru_node) {
1729         assert(entry->guard1 == BLOCK_CACHE_GUARD_1);
1730         assert(entry->guard2 == BLOCK_CACHE_GUARD_2);
1731         if (block_cache_entry_has_refs(entry)) {
1732             if (print_cache_get_ref_block_count) {
1733 #if TLOG_LVL >= TLOG_LVL_DEBUG
1734                 printf("%s: cache entry %zd in use for %" PRIu64 ", dev %p\n",
1735                        __func__, entry - block_cache_entries, entry->block,
1736                        entry->dev);
1737 #else
1738                 printf("%s: cache entry %zd in use for %" PRIu64 "\n",
1739                        __func__, entry - block_cache_entries, entry->block);
1740 #endif
1741             }
1742             count++;
1743         }
1744     }
1745     return count;
1746 }
1747