1 /*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <assert.h>
18 #include <inttypes.h>
19 #include <limits.h>
20 #include <lk/reflist.h>
21 #include <stdbool.h>
22 #include <stdint.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26
27 #include <openssl/crypto.h>
28 #include <openssl/rand.h>
29
30 #include "block_cache.h"
31 #include "block_cache_priv.h"
32 #include "crypt.h"
33 #include "debug.h"
34 #include "debug_stats.h"
35 #include "error_reporting.h"
36 #include "transaction.h"
37
38 static bool print_cache_lookup = false;
39 static bool print_cache_lookup_verbose = false;
40 static bool print_block_ops = false;
41 static bool print_block_load = false;
42 static bool print_block_store = false;
43 static bool print_block_move = false;
44 static bool print_block_decrypt_encrypt = false;
45 static bool print_clean_transaction = false;
46 static bool print_mac_update = false;
47 static bool print_cache_get_ref_block_count = true;
48
49 #define BLOCK_CACHE_GUARD_1 (0xdead0001dead0003)
50 #define BLOCK_CACHE_GUARD_2 (0xdead0005dead0007)
51
52 static struct list_node block_cache_lru = LIST_INITIAL_VALUE(block_cache_lru);
53 static struct block_cache_entry block_cache_entries[BLOCK_CACHE_SIZE];
54 static bool block_cache_init_called = false;
55
block_cache_entry_data_is_valid(const struct block_cache_entry * entry)56 static bool block_cache_entry_data_is_valid(
57 const struct block_cache_entry* entry) {
58 return entry->state == BLOCK_ENTRY_DATA_CLEAN_DECRYPTED ||
59 entry->state == BLOCK_ENTRY_DATA_CLEAN_ENCRYPTED ||
60 entry->state == BLOCK_ENTRY_DATA_DIRTY_DECRYPTED ||
61 entry->state == BLOCK_ENTRY_DATA_DIRTY_ENCRYPTED;
62 }
63
block_cache_entry_data_is_dirty(const struct block_cache_entry * entry)64 static bool block_cache_entry_data_is_dirty(
65 const struct block_cache_entry* entry) {
66 return entry->state == BLOCK_ENTRY_DATA_DIRTY_DECRYPTED ||
67 entry->state == BLOCK_ENTRY_DATA_DIRTY_ENCRYPTED;
68 }
69
block_cache_entry_data_is_encrypted(const struct block_cache_entry * entry)70 static bool block_cache_entry_data_is_encrypted(
71 const struct block_cache_entry* entry) {
72 return entry->state == BLOCK_ENTRY_DATA_CLEAN_ENCRYPTED ||
73 entry->state == BLOCK_ENTRY_DATA_DIRTY_ENCRYPTED;
74 }
75
block_cache_entry_data_is_decrypted(const struct block_cache_entry * entry)76 static bool block_cache_entry_data_is_decrypted(
77 const struct block_cache_entry* entry) {
78 return entry->state == BLOCK_ENTRY_DATA_CLEAN_DECRYPTED ||
79 entry->state == BLOCK_ENTRY_DATA_DIRTY_DECRYPTED;
80 }
81
block_cache_entry_data_state_name(enum block_cache_entry_data_state state)82 static const char* block_cache_entry_data_state_name(
83 enum block_cache_entry_data_state state) {
84 switch (state) {
85 case BLOCK_ENTRY_DATA_INVALID:
86 return "BLOCK_ENTRY_DATA_INVALID";
87 case BLOCK_ENTRY_DATA_LOADING:
88 return "BLOCK_ENTRY_DATA_LOADING";
89 case BLOCK_ENTRY_DATA_LOAD_FAILED:
90 return "BLOCK_ENTRY_DATA_LOAD_FAILED";
91 case BLOCK_ENTRY_DATA_NOT_FOUND:
92 return "BLOCK_ENTRY_DATA_NOT_FOUND";
93 case BLOCK_ENTRY_DATA_CLEAN_DECRYPTED:
94 return "BLOCK_ENTRY_DATA_CLEAN_DECRYPTED";
95 case BLOCK_ENTRY_DATA_CLEAN_ENCRYPTED:
96 return "BLOCK_ENTRY_DATA_CLEAN_ENCRYPTED";
97 case BLOCK_ENTRY_DATA_DIRTY_DECRYPTED:
98 return "BLOCK_ENTRY_DATA_DIRTY_DECRYPTED";
99 case BLOCK_ENTRY_DATA_DIRTY_ENCRYPTED:
100 return "BLOCK_ENTRY_DATA_DIRTY_ENCRYPTED";
101 }
102 }
103
104 /**
105 * block_cache_queue_io_op - Helper function to start a read or write operation
106 * @entry: Cache entry.
107 * @io_op: BLOCK_CACHE_IO_OP_READ or BLOCK_CACHE_IO_OP_WRITE.
108 *
109 * Set io_op for cache entry and add it to the tail of the io_ops for the
110 * block device that the cache entry belongs to.
111 */
block_cache_queue_io_op(struct block_cache_entry * entry,int io_op)112 static void block_cache_queue_io_op(struct block_cache_entry* entry,
113 int io_op) {
114 assert(io_op == BLOCK_CACHE_IO_OP_READ || io_op == BLOCK_CACHE_IO_OP_WRITE);
115 assert(entry->io_op == BLOCK_CACHE_IO_OP_NONE);
116 assert(entry->dev);
117 assert(!list_in_list(&entry->io_op_node));
118
119 entry->io_op = io_op;
120 list_add_tail(&entry->dev->io_ops, &entry->io_op_node);
121 }
122
123 /**
124 * block_cache_queue_read - Start a read operation
125 * @entry: Cache entry.
126 */
block_cache_queue_read(struct block_cache_entry * entry)127 static void block_cache_queue_read(struct block_cache_entry* entry) {
128 assert(!block_cache_entry_data_is_dirty(entry));
129 entry->state = BLOCK_ENTRY_DATA_LOADING;
130 block_cache_queue_io_op(entry, BLOCK_CACHE_IO_OP_READ);
131 stats_timer_start(STATS_CACHE_START_READ);
132 entry->dev->start_read(entry->dev, entry->block);
133 stats_timer_stop(STATS_CACHE_START_READ);
134 }
135
136 /**
137 * block_cache_queue_write - Start a write operation
138 * @entry: Cache entry.
139 */
block_cache_queue_write(struct block_cache_entry * entry,const void * encrypted_data)140 static void block_cache_queue_write(struct block_cache_entry* entry,
141 const void* encrypted_data) {
142 block_cache_queue_io_op(entry, BLOCK_CACHE_IO_OP_WRITE);
143 stats_timer_start(STATS_CACHE_START_WRITE);
144 entry->dev->start_write(entry->dev, entry->block, encrypted_data,
145 entry->block_size, entry->is_superblock);
146 stats_timer_stop(STATS_CACHE_START_WRITE);
147 }
148
149 /**
150 * block_cache_complete_io - Wait for io operation on block device to complete
151 * @dev: Block device to wait for
152 */
block_cache_complete_io(struct block_device * dev)153 static void block_cache_complete_io(struct block_device* dev) {
154 while (!list_is_empty(&dev->io_ops)) {
155 assert(dev->wait_for_io);
156 dev->wait_for_io(dev);
157 }
158 }
159
160 /**
161 * block_cache_pop_io_op - Get cache entry for completed read or write operation
162 * @dev: Block device
163 * @block: Block number
164 * @io_op: BLOCK_CACHE_IO_OP_READ or BLOCK_CACHE_IO_OP_WRITE.
165 *
166 * Finds block cache entry that matches @dev and @block and remove it from
167 * the io_ops queue of the block device.
168 *
169 * This is a helper function for block_cache_complete_read and
170 * block_cache_complete_write.
171 *
172 * Return: Matching block cache entry.
173 */
block_cache_pop_io_op(struct block_device * dev,data_block_t block,unsigned int io_op)174 static struct block_cache_entry* block_cache_pop_io_op(struct block_device* dev,
175 data_block_t block,
176 unsigned int io_op) {
177 struct block_cache_entry* entry;
178
179 list_for_every_entry(&dev->io_ops, entry, struct block_cache_entry,
180 io_op_node) {
181 if (entry->block == block) {
182 assert(entry->dev == dev);
183 assert(entry->io_op == io_op);
184 entry->io_op = BLOCK_CACHE_IO_OP_NONE;
185 list_delete(&entry->io_op_node);
186 return entry;
187 }
188 assert(false); /* Out of order completion not expected */
189 }
190 assert(false); /* No matching entry found */
191
192 return NULL;
193 }
194
195 /**
196 * block_cache_complete_read - Read complete callback from block device
197 * @dev: Block device
198 * @block: Block number
199 * @data: Pointer to encrypted data, only valid if @res is
200 * &block_read_error.BLOCK_READ_SUCCESS
201 * @data_size: Data size, must match block size of device.
202 * @res: &block_read_error.BLOCK_READ_SUCCESS if read operation was
203 * successful, otherwise describes the error.
204 *
205 * Calculates mac and decrypts data into cache entry. Does not validate mac.
206 */
block_cache_complete_read(struct block_device * dev,data_block_t block,const void * data,size_t data_size,enum block_read_error res)207 void block_cache_complete_read(struct block_device* dev,
208 data_block_t block,
209 const void* data,
210 size_t data_size,
211 enum block_read_error res) {
212 int ret;
213 struct block_cache_entry* entry;
214
215 assert(data_size <= sizeof(entry->data));
216 assert(data_size == dev->block_size);
217
218 entry = block_cache_pop_io_op(dev, block, BLOCK_CACHE_IO_OP_READ);
219 assert(entry->state == BLOCK_ENTRY_DATA_LOADING);
220 switch (res) {
221 case BLOCK_READ_SUCCESS:
222 /* handled below */
223 break;
224 case BLOCK_READ_IO_ERROR:
225 printf("%s: load block %" PRIu64 " failed\n", __func__, entry->block);
226 entry->state = BLOCK_ENTRY_DATA_LOAD_FAILED;
227 return;
228 case BLOCK_READ_NO_DATA:
229 printf("%s: load block %" PRIu64 " failed, no data\n", __func__,
230 entry->block);
231 entry->state = BLOCK_ENTRY_DATA_NOT_FOUND;
232 return;
233 }
234 assert(res == BLOCK_READ_SUCCESS);
235
236 entry->block_size = data_size;
237 /* TODO: change decrypt function to take separate in/out buffers */
238 memcpy(entry->data, data, data_size);
239
240 stats_timer_start(STATS_FS_READ_BLOCK_CALC_MAC);
241 ret = calculate_mac(entry->key, &entry->mac, entry->data,
242 entry->block_size);
243 stats_timer_stop(STATS_FS_READ_BLOCK_CALC_MAC);
244 assert(!ret);
245
246 /* TODO: check mac here instead of when getting data from the cache? */
247 if (print_block_load) {
248 printf("%s: load/decrypt block %" PRIu64 " complete\n", __func__,
249 entry->block);
250 }
251
252 entry->state = BLOCK_ENTRY_DATA_CLEAN_ENCRYPTED;
253 }
254
255 /**
256 * block_cache_complete_write - Write complete callback from block device
257 * @dev: Block device
258 * @block: Block number
259 * @failed: true if write operation failed, and data is not on disc. If
260 * block device has tamper detection, e.g. rpmb, passing false here
261 * means that the secure side block device code has verified that
262 * the data was written to disk.
263 */
block_cache_complete_write(struct block_device * dev,data_block_t block,enum block_write_error res)264 void block_cache_complete_write(struct block_device* dev,
265 data_block_t block,
266 enum block_write_error res) {
267 struct block_cache_entry* entry;
268
269 entry = block_cache_pop_io_op(dev, block, BLOCK_CACHE_IO_OP_WRITE);
270 if (print_block_store) {
271 printf("%s: write block %" PRIu64 " complete\n", __func__,
272 entry->block);
273 }
274 assert(entry->dirty_tr);
275 if (res == BLOCK_WRITE_SUCCESS) {
276 entry->dirty_tr = NULL;
277 entry->pinned = false;
278 } else {
279 pr_err("write block %" PRIu64 " failed, fail transaction\n",
280 entry->block);
281 transaction_fail(entry->dirty_tr);
282
283 if (res == BLOCK_WRITE_SYNC_FAILED) {
284 /*
285 * We have to fail ALL pending transactions here because an fsync
286 * failed and we don't know which write caused that failure.
287 *
288 * TODO: Should we fail only transactions that write to non-secure
289 * devices? I.e. not fail TP transactions?
290 *
291 * TODO: storageproxy could track which file failed to sync and
292 * communicate this back so we only have to fail transactions that
293 * touched that backing file.
294 */
295 pr_err("An fsync failed, fail all pending transactions\n");
296 fs_fail_all_transactions();
297 }
298
299 /*
300 * Failing the transaction must not clear the block number, as we rely
301 * on the block number + pinned flag to reserve and reuse the block
302 * cache entry when reinitializing a special transaction.
303 */
304 assert(block == entry->block);
305
306 if (res == BLOCK_WRITE_FAILED_UNKNOWN_STATE) {
307 /*
308 * We don't know what was written, force superblock to be rewritten.
309 * This must be done after we have failed the transaction in case we
310 * need to reuse block that was part of this transaction.
311 */
312 fs_unknown_super_block_state_all();
313 }
314 }
315 }
316
317 /**
318 * block_cache_entry_has_refs - Check if cache entry is referenced
319 * @entry: Cache entry
320 *
321 * Return: true if there are no references to @entry.
322 */
block_cache_entry_has_refs(struct block_cache_entry * entry)323 static bool block_cache_entry_has_refs(struct block_cache_entry* entry) {
324 return !list_is_empty(&entry->obj.ref_list);
325 }
326
327 /**
328 * block_cache_entry_has_one_ref - Check if cache entry is referenced once
329 * @entry: Cache entry
330 *
331 * Return: true if there is a single reference to @entry.
332 */
block_cache_entry_has_one_ref(struct block_cache_entry * entry)333 static bool block_cache_entry_has_one_ref(struct block_cache_entry* entry) {
334 return list_length(&entry->obj.ref_list) == 1;
335 }
336
337 /**
338 * block_cache_entry_decrypt - Decrypt cache entry
339 * @entry: Cache entry
340 */
block_cache_entry_decrypt(struct block_cache_entry * entry)341 static void block_cache_entry_decrypt(struct block_cache_entry* entry) {
342 int ret;
343 const struct iv* iv = NULL; /* TODO: support external iv */
344 void* decrypt_data;
345 size_t decrypt_size;
346
347 assert(block_cache_entry_data_is_encrypted(entry));
348
349 decrypt_data = entry->data;
350 decrypt_size = entry->block_size;
351 if (!iv) {
352 iv = (void*)entry->data;
353 assert(decrypt_size > sizeof(*iv));
354 decrypt_data += sizeof(*iv);
355 decrypt_size -= sizeof(*iv);
356 }
357 stats_timer_start(STATS_FS_READ_BLOCK_DECRYPT);
358 ret = storage_decrypt(entry->key, decrypt_data, decrypt_size, iv);
359 stats_timer_stop(STATS_FS_READ_BLOCK_DECRYPT);
360 assert(!ret);
361
362 if (print_block_decrypt_encrypt) {
363 printf("%s: decrypt block %" PRIu64 " complete\n", __func__,
364 entry->block);
365 }
366
367 if (entry->state == BLOCK_ENTRY_DATA_CLEAN_ENCRYPTED) {
368 entry->state = BLOCK_ENTRY_DATA_CLEAN_DECRYPTED;
369 } else if (entry->state == BLOCK_ENTRY_DATA_DIRTY_ENCRYPTED) {
370 /*
371 * We leave blocks in DIRTY_ENCRYPTED state after computing a MAC but
372 * before flushing the block from the cache. We may decrypt a block
373 * again to read it before write back, which is fine as it will be
374 * re-encrypted (with the same IV) when flushed for write back.
375 */
376 entry->state = BLOCK_ENTRY_DATA_DIRTY_DECRYPTED;
377 } else {
378 /* Covered by assert that the entry was encrypted above. */
379 assert(false);
380 }
381 }
382
383 /**
384 * block_cache_entry_encrypt - Encrypt cache entry and update mac
385 * @entry: Cache entry
386 */
block_cache_entry_encrypt(struct block_cache_entry * entry)387 static void block_cache_entry_encrypt(struct block_cache_entry* entry) {
388 int ret;
389 void* encrypt_data;
390 size_t encrypt_size;
391 struct mac mac;
392 struct iv* iv = NULL; /* TODO: support external iv */
393
394 assert(entry->state == BLOCK_ENTRY_DATA_DIRTY_DECRYPTED);
395 assert(!block_cache_entry_has_refs(entry));
396
397 encrypt_data = entry->data;
398 encrypt_size = entry->block_size;
399 if (!iv) {
400 iv = (void*)entry->data;
401 assert(encrypt_size > sizeof(*iv));
402 encrypt_data += sizeof(*iv);
403 encrypt_size -= sizeof(*iv);
404 }
405
406 stats_timer_start(STATS_FS_WRITE_BLOCK_ENCRYPT);
407 ret = storage_encrypt(entry->key, encrypt_data, encrypt_size, iv);
408 stats_timer_stop(STATS_FS_WRITE_BLOCK_ENCRYPT);
409 assert(!ret);
410 entry->state = BLOCK_ENTRY_DATA_DIRTY_ENCRYPTED;
411 if (print_block_decrypt_encrypt) {
412 printf("%s: encrypt block %" PRIu64 " complete\n", __func__,
413 entry->block);
414 }
415
416 if (!entry->dirty_mac) {
417 mac = entry->mac;
418 }
419
420 stats_timer_start(STATS_FS_WRITE_BLOCK_CALC_MAC);
421 ret = calculate_mac(entry->key, &entry->mac, entry->data,
422 entry->block_size);
423 stats_timer_stop(STATS_FS_WRITE_BLOCK_CALC_MAC);
424 assert(!ret);
425
426 if (!entry->dirty_mac) {
427 assert(!CRYPTO_memcmp(&mac, &entry->mac, sizeof(mac)));
428 }
429 entry->dirty_mac = false;
430 // assert(!entry->parent || entry->parent->ref_count);
431 // assert(!entry->parent || entry->parent->dirty_ref);
432 }
433
434 /**
435 * block_cache_entry_clean - Write dirty cache entry to disc
436 * @entry: Cache entry
437 *
438 * Does not wait for write to complete.
439 */
block_cache_entry_clean(struct block_cache_entry * entry)440 static void block_cache_entry_clean(struct block_cache_entry* entry) {
441 if (!block_cache_entry_data_is_dirty(entry)) {
442 return;
443 }
444
445 if (print_block_store) {
446 printf("%s: encrypt block %" PRIu64 "\n", __func__, entry->block);
447 }
448
449 assert(entry->block_size <= sizeof(entry->data));
450 if (entry->state == BLOCK_ENTRY_DATA_DIRTY_DECRYPTED) {
451 block_cache_entry_encrypt(entry);
452 }
453 assert(entry->state == BLOCK_ENTRY_DATA_DIRTY_ENCRYPTED);
454 /* TODO: release ref to parent */
455
456 assert(entry->dirty_tr);
457 /*
458 * We have to save the current transaction for this entry because we need it
459 * to check for transaction failure after queueing the write. Transactions
460 * are managed by the storage client layer, and thus will outlive this
461 * function, which is internal to the block cache.
462 */
463 struct transaction* tr = entry->dirty_tr;
464
465 assert(entry->dirty_tr->fs);
466 struct transaction* itr = entry->dirty_tr->fs->initial_super_block_tr;
467 /*
468 * Block(s) in fs->initial_super_block_tr must be written before any other
469 * blocks to the same filesystem.
470 */
471 if (itr && itr != entry->dirty_tr) {
472 printf("%s: write initial superblock before block %" PRIu64 "\n",
473 __func__, entry->block);
474 transaction_initial_super_block_complete(itr);
475
476 /*
477 * Check that initial_super_block_tr was cleared. If it was not, it must
478 * have failed to write the initial super block and the transaction
479 * that entry belongs to must also fail.
480 */
481 if (entry->dirty_tr->fs->initial_super_block_tr) {
482 /*
483 * transaction_initial_super_block_complete() always reinitialize
484 * initial_super_block_tr if the write failed.
485 */
486 assert(!entry->dirty_tr->fs->initial_super_block_tr->failed);
487 transaction_fail(entry->dirty_tr);
488 assert(entry->state == BLOCK_ENTRY_DATA_INVALID);
489 return;
490 }
491 }
492
493 block_cache_queue_write(entry, entry->data);
494
495 /*
496 * If we fail the transaction in block_cache_complete_write(), which is
497 * currently called during block_cache_queue_write(), we will clear the
498 * dirty flag on all cache entries associate with the transaction, including
499 * the one we're currently trying to clean.
500 *
501 * We can't redundantly clear the flag again here if the transaction has
502 * failed, because the write failure may have forced us to trigger
503 * fs_unknown_super_block_state_all(). Triggering this function creates
504 * writes for the current superblock state of each filesystem, and this may
505 * have reused the (now) clean entry we are trying to clean. If so,
506 * entry->dirty must stay set.
507 */
508 if (!tr->failed) {
509 assert(entry->state == BLOCK_ENTRY_DATA_DIRTY_ENCRYPTED);
510 entry->state = BLOCK_ENTRY_DATA_CLEAN_ENCRYPTED;
511 }
512 }
513
514 /**
515 * block_cache_entry_score - Get a keep score
516 * @entry: Block cache entry to check
517 * @index: Number of available entries before @entry in lru.
518 *
519 * Return: A score value indicating in what order entries that are close in the
520 * lru should be replaced.
521 */
block_cache_entry_score(struct block_cache_entry * entry,unsigned int index)522 static unsigned int block_cache_entry_score(struct block_cache_entry* entry,
523 unsigned int index) {
524 if (!entry->dev) {
525 return UINT_MAX;
526 }
527 return index * (block_cache_entry_data_is_dirty(entry)
528 ? (entry->dirty_tmp ? 1 : 2)
529 : 4);
530 }
531
532 /**
533 * block_cache_entry_discard_dirty - Discard cache entry (can be dirty).
534 * @entry: Block cache entry to discard
535 */
block_cache_entry_discard_dirty(struct block_cache_entry * entry)536 static void block_cache_entry_discard_dirty(struct block_cache_entry* entry) {
537 assert(!entry->dirty_ref);
538 assert(!list_in_list(&entry->io_op_node));
539 entry->state = BLOCK_ENTRY_DATA_INVALID;
540 entry->dev = NULL;
541 entry->block = DATA_BLOCK_INVALID;
542 entry->dirty_tr = NULL;
543 /* We have to unpin here because we're clearing the block number */
544 entry->pinned = false;
545 entry->is_superblock = false;
546
547 entry->dirty_mac = false;
548 }
549
550 /**
551 * block_cache_entry_discard - Discard cache entry (must be clean and unused).
552 * @entry: Block cache entry to discard
553 */
block_cache_entry_discard(struct block_cache_entry * entry)554 static void block_cache_entry_discard(struct block_cache_entry* entry) {
555 assert(!block_cache_entry_has_refs(entry));
556 assert(!entry->dirty_ref);
557 assert(!entry->dirty_tr);
558 assert(!list_in_list(&entry->io_op_node));
559 block_cache_entry_discard_dirty(entry);
560 }
561
562 /**
563 * block_cache_lookup - Get cache entry for a specific block
564 * @fs: File system state object, or %NULL is @allocate is %false.
565 * @dev: Block device object.
566 * @block: Block number
567 * @allocate: If true, assign an unused entry to the specified @dev,@block
568 * if no matching entry is found.
569 *
570 * Return: cache entry matching @dev and @block. If no matching entry is found,
571 * and @allocate is true, pick an unused entry and update it to match. If no
572 * entry can be used, return NULL.
573 */
574
block_cache_lookup(struct fs * fs,struct block_device * dev,data_block_t block,bool allocate)575 static struct block_cache_entry* block_cache_lookup(struct fs* fs,
576 struct block_device* dev,
577 data_block_t block,
578 bool allocate) {
579 struct block_cache_entry* entry;
580 struct block_cache_entry* unused_entry = NULL;
581 unsigned int unused_entry_score = 0;
582 unsigned int score;
583 unsigned int available = 0;
584 unsigned int in_use = 0;
585
586 assert(dev);
587 assert(fs || !allocate);
588
589 stats_timer_start(STATS_CACHE_LOOKUP);
590 /*
591 * We may need to attempt to find and flush a cache entry multiple times
592 * before finding one that we could successfully use that was not reused
593 * during the clean. This relies on the block cache being large enough to
594 * hold a super block for each filesystem plus all currently referenced
595 * blocks (which is less than the maximum block path length). We cap the
596 * number of retries here to avoid an infinite loop, but we should only need
597 * one retry attempt since the block cache is LRU and the fresh super block
598 * will be the most recently used entry.
599 */
600 for (int retry = 0; retry < BLOCK_CACHE_SIZE; ++retry) {
601 unused_entry = NULL;
602 unused_entry_score = 0;
603 available = 0;
604 in_use = 0;
605
606 list_for_every_entry(&block_cache_lru, entry, struct block_cache_entry,
607 lru_node) {
608 assert(entry->guard1 == BLOCK_CACHE_GUARD_1);
609 assert(entry->guard2 == BLOCK_CACHE_GUARD_2);
610 if (entry->dev == dev && entry->block == block) {
611 if (print_cache_lookup) {
612 printf("%s: block %" PRIu64
613 ", found cache entry %zd, state %s\n",
614 __func__, block, entry - block_cache_entries,
615 block_cache_entry_data_state_name(entry->state));
616 }
617 stats_timer_start(STATS_CACHE_LOOKUP_FOUND);
618 stats_timer_stop(STATS_CACHE_LOOKUP_FOUND);
619 goto done;
620 }
621 /*
622 * Do not select any cache entries that have active references as
623 * they aren't ready to flush, and do not select any pinned entries.
624 * Pinned entries can only be flushed by
625 * transaction_initial_super_block_complete() and may not be flushed
626 * by another transaction. We need to keep special superblock writes
627 * pinned in the cache because otherwise we might fill the cache up
628 * with other data, flushing the special superblock, which might
629 * fail to write. In this case we would leave no room to recreate
630 * the write later, since the cache is full of data which can't be
631 * flushed until the initial superblock write is completed.
632 */
633 if (!block_cache_entry_has_refs(entry) && !entry->pinned) {
634 score = block_cache_entry_score(entry, available);
635 available++;
636 if (score >= unused_entry_score) {
637 unused_entry = entry;
638 unused_entry_score = score;
639 }
640 if (print_cache_lookup_verbose) {
641 printf("%s: block %" PRIu64
642 ", cache entry %zd available last used for %" PRIu64
643 "\n",
644 __func__, block, entry - block_cache_entries,
645 entry->block);
646 }
647 } else {
648 /*
649 * Pinned entries must have a valid block number so they can be
650 * reused.
651 */
652 if (entry->pinned) {
653 assert(entry->block != DATA_BLOCK_INVALID);
654 }
655 if (print_cache_lookup_verbose) {
656 printf("%s: block %" PRIu64
657 ", cache entry %zd in use for %" PRIu64 "\n",
658 __func__, block, entry - block_cache_entries,
659 entry->block);
660 }
661 in_use++;
662 }
663 }
664 entry = unused_entry;
665
666 if (!entry || !allocate) {
667 if (print_cache_lookup) {
668 printf("%s: block %" PRIu64
669 ", no available entries, %u in use, allocate %d\n",
670 __func__, block, in_use, allocate);
671 }
672 entry = NULL;
673 goto done;
674 }
675
676 if (print_cache_lookup) {
677 printf("%s: block %" PRIu64
678 ", use cache entry %zd, state %s, %u available, %u in_use\n",
679 __func__, block, entry - block_cache_entries,
680 block_cache_entry_data_state_name(entry->state), available,
681 in_use);
682 }
683
684 assert(!entry->dirty_ref);
685
686 if (block_cache_entry_data_is_dirty(entry)) {
687 stats_timer_start(STATS_CACHE_LOOKUP_CLEAN);
688 block_cache_entry_clean(entry);
689 block_cache_complete_io(entry->dev);
690 stats_timer_stop(STATS_CACHE_LOOKUP_CLEAN);
691 }
692
693 /*
694 * The chosen entry we are flushing can't have been a special superblock
695 * write because we do not select pinned entries, however, any RPMB data
696 * write may create a new pinned superblock entry if the RPMB write
697 * failed but the write counter was incremented. In this case
698 * block_cache_entry_clean() will create a new superblock write by
699 * calling fs_unknown_super_block_state_all(). This new write may reuse
700 * the block cache entry we just chose and cleaned, resulting in our
701 * chosen entry now being pinned for a different transaction. In this
702 * case we restart the search for a cache entry and try to pick (and if
703 * needed clean) a new entry.
704 */
705
706 if (!entry->pinned) {
707 /* We found a clean entry to use */
708 break;
709 }
710
711 pr_warn("%s: Retrying attempt to lookup and (if needed) free a block cache entry. "
712 "Entry block %" PRIu64 " was reused during cleaning.\n",
713 __func__, entry->block);
714 }
715 assert(!block_cache_entry_data_is_dirty(entry));
716 assert(!entry->dirty_mac);
717 assert(!entry->dirty_tr);
718
719 entry->dev = dev;
720 entry->block = block;
721 assert(dev->block_size <= sizeof(entry->data));
722 entry->block_size = dev->block_size;
723 entry->key = fs->key;
724 entry->state = BLOCK_ENTRY_DATA_INVALID;
725 entry->is_superblock = false;
726
727 done:
728 stats_timer_stop(STATS_CACHE_LOOKUP);
729
730 return entry;
731 }
732
733 enum cache_load_result {
734 CACHE_LOAD_SUCCESS = 0,
735 CACHE_LOAD_IO_FAILED,
736 CACHE_LOAD_NO_DATA,
737 CACHE_LOAD_MAC_MISMATCH,
738 };
739
740 /**
741 * block_cache_load_entry - Get cache entry for a specific block
742 * @entry: Block cache entry to load.
743 * @mac: Optional mac.
744 * @mac_size: Size of @mac.
745 *
746 * If entry is not already loaded, attempt to load the block and optionally
747 * compare with the expected @mac, if provided.
748 *
749 * Return: &cache_load_result.CACHE_LOAD_SUCCESS if the block (matching @mac, if
750 * provided) was already in cache or was loaded successfully. Otherwise return a
751 * relevant error.
752 */
block_cache_load_entry(struct block_cache_entry * entry,const void * mac,size_t mac_size)753 static enum cache_load_result block_cache_load_entry(
754 struct block_cache_entry* entry,
755 const void* mac,
756 size_t mac_size) {
757 if (!block_cache_entry_data_is_valid(entry)) {
758 assert(!block_cache_entry_has_refs(entry));
759 if (print_block_load) {
760 printf("%s: request load block %" PRIu64 "\n", __func__,
761 entry->block);
762 }
763 block_cache_queue_read(entry);
764 block_cache_complete_io(entry->dev);
765 }
766 if (!block_cache_entry_data_is_valid(entry)) {
767 printf("%s: failed to load block %" PRIu64 ", state: %d\n", __func__,
768 entry->block, entry->state);
769 switch (entry->state) {
770 case BLOCK_ENTRY_DATA_LOAD_FAILED:
771 return CACHE_LOAD_IO_FAILED;
772 case BLOCK_ENTRY_DATA_NOT_FOUND:
773 return CACHE_LOAD_NO_DATA;
774 default:
775 assert(false && "Unexpected entry state");
776 }
777 }
778 if (mac) {
779 if (CRYPTO_memcmp(&entry->mac, mac, mac_size)) {
780 printf("%s: block %" PRIu64 ", mac mismatch\n", __func__,
781 entry->block);
782 return CACHE_LOAD_MAC_MISMATCH;
783 }
784 }
785 /*
786 * We eagerly encrypt a block when releasing it so that we can compute the
787 * block's mac. If we re-load the same block before flushing it from the
788 * cache, we may end up decrypting a dirty block here, so we want to allow
789 * decryption of both clean and dirty blocks.
790 */
791 if (block_cache_entry_data_is_encrypted(entry)) {
792 block_cache_entry_decrypt(entry);
793 }
794 assert(block_cache_entry_data_is_decrypted(entry));
795
796 return CACHE_LOAD_SUCCESS;
797 }
798
799 /**
800 * block_cache_get - Get cache entry for a specific block and add a reference
801 * @fs: File system state object.
802 * @dev: Block device object.
803 * @block: Block number.
804 * @load: If true, load data if needed.
805 * @mac: Optional mac. Unused if @load is false.
806 * @mac_size: Size of @mac.
807 * @ref: Pointer to store reference in.
808 * @load_result: Optional output pointer to store load result in. May be %NULL.
809 * If not %NULL, @load must be %true.
810 *
811 * Find cache entry, optionally load then add a reference to it.
812 *
813 * Return: cache entry matching dev in @tr and @block. Can return NULL if @load
814 * is true and entry could not be loaded or does not match provided mac.
815 */
block_cache_get(struct fs * fs,struct block_device * dev,data_block_t block,bool load,const void * mac,size_t mac_size,struct obj_ref * ref,enum cache_load_result * load_result)816 static struct block_cache_entry* block_cache_get(
817 struct fs* fs,
818 struct block_device* dev,
819 data_block_t block,
820 bool load,
821 const void* mac,
822 size_t mac_size,
823 struct obj_ref* ref,
824 enum cache_load_result* load_result) {
825 enum cache_load_result res;
826 struct block_cache_entry* entry;
827
828 assert(dev);
829 assert(!load_result || load);
830
831 if (block >= dev->block_count) {
832 printf("%s: bad block num %" PRIu64 " >= %" PRIu64 "\n", __func__,
833 block, dev->block_count);
834 if (load_result) {
835 *load_result = CACHE_LOAD_NO_DATA;
836 }
837 return NULL;
838 }
839 assert(block < dev->block_count);
840
841 entry = block_cache_lookup(fs, dev, block, true);
842 assert(entry);
843
844 if (load) {
845 res = block_cache_load_entry(entry, mac, mac_size);
846 if (res == CACHE_LOAD_MAC_MISMATCH) {
847 error_report_block_mac_mismatch(fs->name, TRUSTY_BLOCKTYPE_UNKNOWN);
848 }
849 if (load_result) {
850 *load_result = res;
851 }
852 if (res != CACHE_LOAD_SUCCESS) {
853 return NULL;
854 }
855 }
856
857 assert(!entry->dirty_ref);
858 obj_add_ref_allow_unreferenced_obj(&entry->obj, ref);
859 if (print_block_ops) {
860 printf("%s: block %" PRIu64 ", cache entry %zd, state %s\n", __func__,
861 block, entry - block_cache_entries,
862 block_cache_entry_data_state_name(entry->state));
863 }
864 return entry;
865 }
866
867 /**
868 * block_cache_get_data - Call block_cache_get and return data pointer
869 * @fs: File system state object.
870 * @dev: Block device object.
871 * @block: Block number.
872 * @load: If true, load data if needed.
873 * @mac: Optional mac. Unused if @load is false.
874 * @mac_size: Size of @mac.
875 * @ref: Pointer to store reference in.
876 * @load_result: Optional output pointer to store load result in. May be %NULL.
877 * Only updated if @load is %true.
878 *
879 * Return: block data pointer, or NULL if block_cache_get returned NULL.
880 */
block_cache_get_data(struct fs * fs,struct block_device * dev,data_block_t block,bool load,const void * mac,size_t mac_size,struct obj_ref * ref,enum cache_load_result * load_result)881 static void* block_cache_get_data(struct fs* fs,
882 struct block_device* dev,
883 data_block_t block,
884 bool load,
885 const void* mac,
886 size_t mac_size,
887 struct obj_ref* ref,
888 enum cache_load_result* load_result) {
889 struct block_cache_entry* entry;
890 entry = block_cache_get(fs, dev, block, load, mac, mac_size, ref,
891 load_result);
892 if (!entry) {
893 return NULL;
894 }
895 return entry->data;
896 }
897
898 /**
899 * data_to_block_cache_entry - Get cache entry from data pointer
900 * @data: Pointer to data member of cache entry.
901 *
902 * Return: cache entry matching @data.
903 */
data_to_block_cache_entry(const void * data)904 static struct block_cache_entry* data_to_block_cache_entry(const void* data) {
905 struct block_cache_entry* entry;
906
907 assert(data);
908 entry = containerof(data, struct block_cache_entry, data);
909 assert(entry >= block_cache_entries);
910 assert(entry < &block_cache_entries[BLOCK_CACHE_SIZE]);
911 assert(((uintptr_t)entry - (uintptr_t)entry) % sizeof(entry[0]) == 0);
912 return entry;
913 }
914
915 /**
916 * data_to_block_cache_entry_or_null - Get cache entry or NULL from data pointer
917 * @data: Pointer to data member of cache entry or NULL.
918 *
919 * Return: cache entry matching @data, or NULL is data is NULL.
920 */
data_to_block_cache_entry_or_null(const void * data)921 static struct block_cache_entry* data_to_block_cache_entry_or_null(
922 const void* data) {
923 return data ? data_to_block_cache_entry(data) : NULL;
924 }
925
926 /**
927 * block_cache_entry_destroy - Callback function for obj_del_ref
928 * @obj: Pointer to obj member of cache entry.
929 *
930 * Callback called by reference tracking code when the last reference to a
931 * cache entry has been released. Since this is a cache, and not a normal heap
932 * allocated object, the cache entry is not destroyed here. It is instead left
933 * in a state where block_cache_lookup can reuse it.
934 */
block_cache_entry_destroy(struct obj * obj)935 static void block_cache_entry_destroy(struct obj* obj) {
936 struct block_cache_entry* entry =
937 containerof(obj, struct block_cache_entry, obj);
938
939 list_delete(&entry->lru_node);
940 list_add_head(&block_cache_lru, &entry->lru_node);
941
942 if (entry->dirty_mac) {
943 block_cache_entry_encrypt(entry);
944 }
945 }
946
947 /**
948 * block_cache_init - Allocate and initialize block cache
949 */
block_cache_init(void)950 void block_cache_init(void) {
951 int i;
952 struct obj_ref ref;
953
954 assert(!block_cache_init_called);
955
956 block_cache_init_called = true;
957
958 full_assert(memset(block_cache_entries, 1, sizeof(block_cache_entries)));
959
960 for (i = 0; i < BLOCK_CACHE_SIZE; i++) {
961 block_cache_entries[i].guard1 = BLOCK_CACHE_GUARD_1;
962 block_cache_entries[i].guard2 = BLOCK_CACHE_GUARD_2;
963 block_cache_entries[i].dev = NULL;
964 block_cache_entries[i].block = DATA_BLOCK_INVALID;
965 block_cache_entries[i].state = BLOCK_ENTRY_DATA_INVALID;
966 block_cache_entries[i].dirty_ref = false;
967 block_cache_entries[i].dirty_mac = false;
968 block_cache_entries[i].pinned = false;
969 block_cache_entries[i].is_superblock = false;
970 block_cache_entries[i].dirty_tr = NULL;
971 block_cache_entries[i].io_op = BLOCK_CACHE_IO_OP_NONE;
972 obj_init(&block_cache_entries[i].obj, &ref);
973 list_clear_node(&block_cache_entries[i].io_op_node);
974 list_add_head(&block_cache_lru, &block_cache_entries[i].lru_node);
975 obj_del_ref(&block_cache_entries[i].obj, &ref,
976 block_cache_entry_destroy);
977 }
978 }
979
980 /**
981 * block_cache_dev_destroy - Discard all blocks associated with device
982 * @dev: Block device to remove
983 */
block_cache_dev_destroy(struct block_device * dev)984 void block_cache_dev_destroy(struct block_device* dev) {
985 int i;
986 for (i = 0; i < BLOCK_CACHE_SIZE; i++) {
987 if (block_cache_entries[i].dev == dev) {
988 block_cache_entry_discard(&block_cache_entries[i]);
989 }
990 }
991 }
992
993 /**
994 * block_cache_clean_transaction - Clean blocks modified by transaction
995 * @tr: Transaction
996 */
block_cache_clean_transaction(struct transaction * tr)997 void block_cache_clean_transaction(struct transaction* tr) {
998 struct block_cache_entry* entry;
999 struct block_device* dev = NULL;
1000
1001 stats_timer_start(STATS_CACHE_CLEAN_TRANSACTION);
1002
1003 list_for_every_entry(&block_cache_lru, entry, struct block_cache_entry,
1004 lru_node) {
1005 assert(entry->guard1 == BLOCK_CACHE_GUARD_1);
1006 assert(entry->guard2 == BLOCK_CACHE_GUARD_2);
1007 if (entry->dirty_tr != tr) {
1008 continue;
1009 }
1010
1011 assert(block_cache_entry_data_is_dirty(entry));
1012
1013 assert(!entry->dirty_ref);
1014
1015 if (entry->dirty_tmp) {
1016 continue;
1017 }
1018
1019 if (!dev) {
1020 dev = entry->dev;
1021 assert(dev == tr->fs->dev || dev == tr->fs->super_dev);
1022 }
1023
1024 assert(entry->dev == dev);
1025
1026 if (print_clean_transaction) {
1027 #if TLOG_LVL >= TLOG_LVL_DEBUG
1028 printf("%s: tr %p, block %" PRIu64 "\n", __func__, tr,
1029 entry->block);
1030 #else
1031 printf("%s: transaction block %" PRIu64 "\n", __func__,
1032 entry->block);
1033 #endif
1034 }
1035
1036 assert(!block_cache_entry_has_refs(entry));
1037 stats_timer_start(STATS_CACHE_CLEAN_TRANSACTION_ENT_CLN);
1038 block_cache_entry_clean(entry);
1039 stats_timer_stop(STATS_CACHE_CLEAN_TRANSACTION_ENT_CLN);
1040 assert(entry->dirty_tr != tr);
1041 if (!tr->failed) {
1042 /*
1043 * If the write failed we may have reused this block cache entry for
1044 * a super block write and it therefore might not be clean.
1045 */
1046 assert(!block_cache_entry_data_is_dirty(entry));
1047 assert(!entry->dirty_tr);
1048 }
1049 }
1050
1051 if (dev) {
1052 stats_timer_start(STATS_CACHE_CLEAN_TRANSACTION_WAIT_IO);
1053 block_cache_complete_io(dev);
1054 stats_timer_stop(STATS_CACHE_CLEAN_TRANSACTION_WAIT_IO);
1055 }
1056 stats_timer_stop(STATS_CACHE_CLEAN_TRANSACTION);
1057 }
1058
1059 /**
1060 * block_cache_discard_transaction - Discard blocks modified by transaction
1061 * @tr: Transaction
1062 * @discard_all: If true, discard all dirty blocks modified by @tr. If false,
1063 * discard tmp dirty blocks modified by @tr.
1064 *
1065 * If @discard_all is %false, only tmp blocks should be dirty. @discard_all
1066 * therefore only affects errors checks.
1067 */
block_cache_discard_transaction(struct transaction * tr,bool discard_all)1068 void block_cache_discard_transaction(struct transaction* tr, bool discard_all) {
1069 struct block_cache_entry* entry;
1070 struct block_device* dev = NULL;
1071
1072 list_for_every_entry(&block_cache_lru, entry, struct block_cache_entry,
1073 lru_node) {
1074 assert(entry->guard1 == BLOCK_CACHE_GUARD_1);
1075 assert(entry->guard2 == BLOCK_CACHE_GUARD_2);
1076 if (entry->dirty_tr != tr) {
1077 continue;
1078 }
1079
1080 if (entry->dirty_tmp) {
1081 /* tmp blocks should never be on the superblock device */
1082 assert(entry->dev == tr->fs->dev);
1083 } else {
1084 /*
1085 * An transaction should never have dirty non-tmp blocks both
1086 * devices at the same time.
1087 */
1088 if (!dev) {
1089 dev = entry->dev;
1090 assert(dev == tr->fs->dev || dev == tr->fs->super_dev);
1091 }
1092 assert(entry->dev == dev);
1093 }
1094 assert(block_cache_entry_data_is_dirty(entry));
1095
1096 if (print_clean_transaction) {
1097 #if TLOG_LVL >= TLOG_LVL_DEBUG
1098 printf("%s: tr %p, block %" PRIu64 ", tmp %d\n", __func__, tr,
1099 entry->block, entry->dirty_tmp);
1100 #else
1101 printf("%s: transaction block %" PRIu64 ", tmp %d\n", __func__,
1102 entry->block, entry->dirty_tmp);
1103 #endif
1104 }
1105
1106 if (block_cache_entry_has_refs(entry)) {
1107 #if TLOG_LVL >= TLOG_LVL_DEBUG
1108 pr_warn("tr %p, block %" PRIu64 " has ref (dirty_ref %d)\n", tr,
1109 entry->block, entry->dirty_ref);
1110 #else
1111 pr_warn("transaction block %" PRIu64 " has ref (dirty_ref %d)\n",
1112 entry->block, entry->dirty_ref);
1113 #endif
1114 } else {
1115 assert(!entry->dirty_ref);
1116 }
1117 if (!discard_all) {
1118 assert(!block_cache_entry_has_refs(entry));
1119 assert(entry->dirty_tmp);
1120 }
1121 entry->dirty_tr = NULL;
1122 entry->state = BLOCK_ENTRY_DATA_INVALID;
1123 assert(!entry->dirty_tr);
1124 }
1125 }
1126
1127 /**
1128 * block_get_no_read - Get block data without read
1129 * @tr: Transaction to get device from
1130 * @block: Block number
1131 * @ref: Pointer to store reference in.
1132 *
1133 * Return: Const block data pointer.
1134 *
1135 * This is only useful if followed by block_dirty.
1136 */
block_get_no_read(struct transaction * tr,data_block_t block,struct obj_ref * ref)1137 const void* block_get_no_read(struct transaction* tr,
1138 data_block_t block,
1139 struct obj_ref* ref) {
1140 assert(tr);
1141 assert(tr->fs);
1142
1143 return block_cache_get_data(tr->fs, tr->fs->dev, block, false, NULL, 0, ref,
1144 NULL);
1145 }
1146
1147 /**
1148 * block_get_super - Get super block data without checking mac
1149 * @fs: File system state object.
1150 * @block: Block number.
1151 * @ref: Pointer to store reference in.
1152 *
1153 * Return: Const block data pointer.
1154 */
block_get_super(struct fs * fs,data_block_t block,struct obj_ref * ref)1155 const void* block_get_super(struct fs* fs,
1156 data_block_t block,
1157 struct obj_ref* ref) {
1158 assert(fs);
1159 assert(fs->super_dev);
1160 assert((fs->allow_tampering && !fs->super_dev->tamper_detecting) ||
1161 (!fs->allow_tampering && fs->super_dev->tamper_detecting));
1162
1163 return block_cache_get_data(fs, fs->super_dev, block, true, NULL, 0, ref,
1164 NULL);
1165 }
1166
1167 /**
1168 * block_get_super_with_mac - Get super block data and check the mac
1169 * @fs: File system state object.
1170 * @block_mac: Block number and mac.
1171 * @ref: Pointer to store reference in.
1172 *
1173 * Return: Const block data pointer.
1174 */
block_get_super_with_mac(struct fs * fs,const struct block_mac * block_mac,struct obj_ref * ref)1175 const void* block_get_super_with_mac(struct fs* fs,
1176 const struct block_mac* block_mac,
1177 struct obj_ref* ref) {
1178 assert(fs);
1179 assert(fs->super_dev);
1180
1181 return block_cache_get_data(
1182 fs, fs->super_dev, block_mac_to_block_fs(fs, block_mac), true,
1183 block_mac_to_mac_fs(fs, block_mac), fs->mac_size, ref, NULL);
1184 }
1185
1186 /**
1187 * block_get_no_tr_fail - Get block data
1188 * @tr: Transaction to get device from
1189 * @block_mac: Block number and mac
1190 * @iv: Initial vector used to decrypt block, or NULL. If NULL, the
1191 * start of the loaded block data is used as the iv.
1192 * Only NULL is currently supported.
1193 * @ref: Pointer to store reference in.
1194 *
1195 * Return: Const block data pointer, or NULL if mac of loaded data does not mac
1196 * in @block_mac or a read error was reported by the block device when loading
1197 * the data.
1198 */
block_get_no_tr_fail(struct transaction * tr,const struct block_mac * block_mac,const struct iv * iv,struct obj_ref * ref)1199 const void* block_get_no_tr_fail(struct transaction* tr,
1200 const struct block_mac* block_mac,
1201 const struct iv* iv,
1202 struct obj_ref* ref) {
1203 data_block_t block;
1204 void* data;
1205 enum cache_load_result load_result = CACHE_LOAD_NO_DATA;
1206
1207 assert(tr);
1208 assert(tr->fs);
1209 assert(block_mac);
1210
1211 block = block_mac_to_block(tr, block_mac);
1212 assert(block);
1213
1214 data = block_cache_get_data(tr->fs, tr->fs->dev, block, true,
1215 block_mac_to_mac(tr, block_mac),
1216 tr->fs->mac_size, ref, &load_result);
1217 if (load_result == CACHE_LOAD_MAC_MISMATCH ||
1218 load_result == CACHE_LOAD_NO_DATA) {
1219 tr->invalid_block_found = true;
1220 }
1221 return data;
1222 }
1223
1224 /**
1225 * block_get - Get block data
1226 * @tr: Transaction to get device from
1227 * @block_mac: Block number and mac
1228 * @iv: Initial vector used to decrypt block, or NULL. If NULL, the
1229 * start of the loaded block data is used as the iv.
1230 * Only NULL is currently supported.
1231 * @ref: Pointer to store reference in.
1232 *
1233 * Return: Const block data pointer, or NULL if the transaction has failed. A
1234 * transaction failure is triggered if mac of loaded data does not mac in
1235 * @block_mac or a read error was reported by the block device when loading the
1236 * data.
1237 */
block_get(struct transaction * tr,const struct block_mac * block_mac,const struct iv * iv,struct obj_ref * ref)1238 const void* block_get(struct transaction* tr,
1239 const struct block_mac* block_mac,
1240 const struct iv* iv,
1241 struct obj_ref* ref) {
1242 const void* data;
1243
1244 assert(tr);
1245
1246 if (tr->failed) {
1247 pr_warn("transaction already failed\n");
1248 return NULL;
1249 }
1250
1251 data = block_get_no_tr_fail(tr, block_mac, iv, ref);
1252 if (!data && !tr->failed) {
1253 pr_warn("transaction failed\n");
1254 transaction_fail(tr);
1255 if (tr->invalid_block_found) {
1256 fs_mark_scan_required(tr->fs);
1257 }
1258 }
1259 return data;
1260 }
1261
1262 /**
1263 * block_dirty - Mark cache entry dirty and return non-const block data pointer.
1264 * @tr: Transaction
1265 * @data: Const block data pointer
1266 * @is_tmp: If true, data is only needed until @tr is commited.
1267 *
1268 * Return: Non-const block data pointer.
1269 */
block_dirty(struct transaction * tr,const void * data,bool is_tmp)1270 void* block_dirty(struct transaction* tr, const void* data, bool is_tmp) {
1271 struct block_cache_entry* entry = data_to_block_cache_entry(data);
1272
1273 assert(tr);
1274 assert(list_in_list(&tr->node)); /* transaction must be active */
1275 assert(!entry->dirty_tr || entry->dirty_tr == tr);
1276 assert(!entry->dirty_ref);
1277 assert(fs_is_writable(tr->fs));
1278
1279 if (block_cache_entry_data_is_encrypted(entry)) {
1280 if (print_block_ops) {
1281 printf("%s: skip decrypt block %" PRIu64 "\n", __func__,
1282 entry->block);
1283 }
1284 } else if (entry->state != BLOCK_ENTRY_DATA_CLEAN_DECRYPTED) {
1285 if (print_block_ops) {
1286 printf("%s: Dirtying block %" PRIu64
1287 " that was not loaded. Previous state: %s\n",
1288 __func__, entry->block,
1289 block_cache_entry_data_state_name(entry->state));
1290 }
1291 }
1292 assert(block_cache_entry_has_one_ref(entry));
1293 entry->state = BLOCK_ENTRY_DATA_DIRTY_DECRYPTED;
1294 entry->dirty_ref = true;
1295 entry->dirty_tmp = is_tmp;
1296 entry->dirty_tr = tr;
1297 return (void*)data;
1298 }
1299
1300 /**
1301 * block_is_clean - Check if block is clean
1302 * @dev: Block device
1303 * @block: Block number
1304 *
1305 * Return: %true if there is no matching dirty cache entry, %false if the cache
1306 * contains a dirty block matching @dev and @block.
1307 */
block_is_clean(struct block_device * dev,data_block_t block)1308 bool block_is_clean(struct block_device* dev, data_block_t block) {
1309 struct block_cache_entry* entry;
1310
1311 entry = block_cache_lookup(NULL, dev, block, false);
1312 return !entry || !block_cache_entry_data_is_dirty(entry);
1313 }
1314
1315 /**
1316 * block_discard_dirty - Discard dirty cache data.
1317 * @data: Block data pointer
1318 */
block_discard_dirty(const void * data)1319 void block_discard_dirty(const void* data) {
1320 struct block_cache_entry* entry = data_to_block_cache_entry(data);
1321
1322 if (block_cache_entry_data_is_dirty(entry)) {
1323 assert(entry->dev);
1324 block_cache_entry_discard_dirty(entry);
1325 }
1326 }
1327
1328 /**
1329 * block_discard_dirty_by_block - Discard cache entry if dirty.
1330 * @dev: Block device
1331 * @block: Block number
1332 */
block_discard_dirty_by_block(struct block_device * dev,data_block_t block)1333 void block_discard_dirty_by_block(struct block_device* dev,
1334 data_block_t block) {
1335 struct block_cache_entry* entry;
1336
1337 entry = block_cache_lookup(NULL, dev, block, false);
1338 if (!entry) {
1339 return;
1340 }
1341 assert(!entry->dirty_ref);
1342 assert(!block_cache_entry_has_refs(entry));
1343 if (!block_cache_entry_data_is_dirty(entry)) {
1344 return;
1345 }
1346 block_discard_dirty(entry->data);
1347 }
1348
1349 /**
1350 * block_put_dirty - Release reference to dirty block.
1351 * @tr: Transaction
1352 * @data: Block data pointer
1353 * @data_ref: Reference pointer to release
1354 * @block_mac: block_mac pointer to update after encryting block
1355 * @block_mac_ref: Block data pointer that @block_mac belongs to, or NULL if
1356 * @block_mac points to a memory only location.
1357 *
1358 * Helper function to for block_put_dirty, block_put_dirty_no_mac and
1359 * block_put_dirty_discard.
1360 */
block_put_dirty_etc(struct transaction * tr,void * data,struct obj_ref * data_ref,struct block_mac * block_mac,void * block_mac_ref)1361 static void block_put_dirty_etc(struct transaction* tr,
1362 void* data,
1363 struct obj_ref* data_ref,
1364 struct block_mac* block_mac,
1365 void* block_mac_ref) {
1366 int ret;
1367 struct block_cache_entry* entry = data_to_block_cache_entry(data);
1368 struct block_cache_entry* parent =
1369 data_to_block_cache_entry_or_null(block_mac_ref);
1370 struct iv* iv = (void*)entry->data; /* TODO: support external iv */
1371
1372 if (tr) {
1373 assert(block_mac);
1374 assert(entry->state == BLOCK_ENTRY_DATA_DIRTY_DECRYPTED);
1375 assert(entry->dirty_ref);
1376 } else {
1377 assert(!block_mac);
1378 }
1379 assert(entry->guard1 == BLOCK_CACHE_GUARD_1);
1380 assert(entry->guard2 == BLOCK_CACHE_GUARD_2);
1381
1382 entry->dirty_ref = false;
1383 if (block_cache_entry_data_is_dirty(entry)) {
1384 entry->dirty_mac = true;
1385 ret = generate_iv(iv);
1386 assert(!ret);
1387 } else {
1388 pr_warn("block %" PRIu64 ", not dirty\n", entry->block);
1389 assert(entry->dirty_tr == NULL);
1390 assert(!tr);
1391 }
1392
1393 block_put(data, data_ref);
1394 /* TODO: fix clients to support lazy write */
1395 assert(block_cache_entry_data_is_encrypted(entry) || !tr);
1396 assert(!entry->dirty_mac);
1397 if (block_mac) {
1398 assert(block_mac_to_block(tr, block_mac) == entry->block);
1399 block_mac_set_mac(tr, block_mac, &entry->mac);
1400 }
1401 #if TLOG_LVL >= TLOG_LVL_DEBUG
1402 if (print_mac_update) {
1403 printf("%s: block %" PRIu64 ", update parent mac, %p, block %" PRIu64
1404 "\n",
1405 __func__, entry->block, block_mac, parent ? parent->block : 0);
1406 }
1407 #endif
1408 }
1409
1410 /**
1411 * block_put_dirty - Release reference to dirty block.
1412 * @tr: Transaction
1413 * @data: Block data pointer
1414 * @data_ref: Reference pointer to release
1415 * @block_mac: block_mac pointer to update after encryting block
1416 * @block_mac_ref: Block data pointer that @block_mac belongs to, or NULL if
1417 * @block_mac points to a memory only location.
1418 */
block_put_dirty(struct transaction * tr,void * data,struct obj_ref * data_ref,struct block_mac * block_mac,void * block_mac_ref)1419 void block_put_dirty(struct transaction* tr,
1420 void* data,
1421 struct obj_ref* data_ref,
1422 struct block_mac* block_mac,
1423 void* block_mac_ref) {
1424 assert(tr);
1425 assert(block_mac);
1426 block_put_dirty_etc(tr, data, data_ref, block_mac, block_mac_ref);
1427 }
1428
1429 /**
1430 * block_put_dirty_no_mac - Release reference to dirty super block.
1431 * @data: Block data pointer
1432 * @data_ref: Reference pointer to release
1433 * @allow_tampering: %true if this file system does not require tamper-proof
1434 * super block storage, %false if tamper detection must be
1435 * required.
1436 *
1437 * Similar to block_put_dirty except no transaction or block_mac is needed.
1438 */
block_put_dirty_no_mac(void * data,struct obj_ref * data_ref,bool allow_tampering)1439 void block_put_dirty_no_mac(void* data,
1440 struct obj_ref* data_ref,
1441 bool allow_tampering) {
1442 struct block_cache_entry* entry = data_to_block_cache_entry(data);
1443
1444 assert(entry->dev);
1445 assert((allow_tampering && !entry->dev->tamper_detecting) ||
1446 (!allow_tampering && entry->dev->tamper_detecting));
1447 block_put_dirty_etc(NULL, data, data_ref, NULL, NULL);
1448 }
1449
1450 /**
1451 * block_put_dirty_discard - Release reference to dirty block.
1452 * @data: Block data pointer
1453 * @data_ref: Reference pointer to release
1454 *
1455 * Similar to block_put_dirty except data can be discarded.
1456 */
block_put_dirty_discard(void * data,struct obj_ref * data_ref)1457 void block_put_dirty_discard(void* data, struct obj_ref* data_ref) {
1458 block_put_dirty_etc(NULL, data, data_ref, NULL, NULL);
1459 block_discard_dirty(data);
1460 }
1461
1462 /**
1463 * block_get_write_no_read - Get block data without read for write
1464 * @tr: Transaction
1465 * @block: Block number
1466 * @is_tmp: If true, data is only needed until @tr is commited.
1467 * @ref: Pointer to store reference in.
1468 *
1469 * Same as block_get_no_read followed by block_dirty.
1470 *
1471 * Return: Block data pointer.
1472 */
block_get_write_no_read(struct transaction * tr,data_block_t block,bool is_tmp,struct obj_ref * ref)1473 void* block_get_write_no_read(struct transaction* tr,
1474 data_block_t block,
1475 bool is_tmp,
1476 struct obj_ref* ref) {
1477 const void* data_ro = block_get_no_read(tr, block, ref);
1478 return block_dirty(tr, data_ro, is_tmp);
1479 }
1480
1481 /**
1482 * block_get_write - Get block data for write
1483 * @tr: Transaction
1484 * @block_mac: Block number and mac
1485 * @iv: Initial vector used to decrypt block, or NULL. If NULL, the
1486 * start of the loaded block data is used as the iv.
1487 * Only NULL is currently supported.
1488 * @is_tmp: If true, data is only needed until @tr is commited.
1489 * @ref: Pointer to store reference in.
1490 *
1491 * Same as block_get followed by block_dirty.
1492 *
1493 * Return: Block data pointer.
1494 */
block_get_write(struct transaction * tr,const struct block_mac * block_mac,const struct iv * iv,bool is_tmp,struct obj_ref * ref)1495 void* block_get_write(struct transaction* tr,
1496 const struct block_mac* block_mac,
1497 const struct iv* iv,
1498 bool is_tmp,
1499 struct obj_ref* ref) {
1500 const void* data_ro = block_get(tr, block_mac, iv, ref);
1501 if (!data_ro) {
1502 return NULL;
1503 }
1504 return block_dirty(tr, data_ro, is_tmp);
1505 }
1506
1507 /**
1508 * block_get_cleared - Get block cleared data for write
1509 * @tr: Transaction
1510 * @block: Block number
1511 * @is_tmp: If true, data is only needed until @tr is commited.
1512 * @ref: Pointer to store reference in.
1513 *
1514 * Return: Block data pointer.
1515 */
block_get_cleared(struct transaction * tr,data_block_t block,bool is_tmp,struct obj_ref * ref)1516 void* block_get_cleared(struct transaction* tr,
1517 data_block_t block,
1518 bool is_tmp,
1519 struct obj_ref* ref) {
1520 void* data = block_get_write_no_read(tr, block, is_tmp, ref);
1521 memset(data, 0, MAX_BLOCK_SIZE);
1522 return data;
1523 }
1524
1525 /**
1526 * block_get_cleared_super - Get block with cleared data for write on super_dev
1527 * @tr: Transaction
1528 * @block: Block number
1529 * @ref: Pointer to store reference in.
1530 * @pinned: Pin this block in the cache until it is successfully written
1531 *
1532 * Return: Block data pointer.
1533 */
block_get_cleared_super(struct transaction * tr,data_block_t block,struct obj_ref * ref,bool pinned)1534 void* block_get_cleared_super(struct transaction* tr,
1535 data_block_t block,
1536 struct obj_ref* ref,
1537 bool pinned) {
1538 void* data_rw;
1539 const void* data_ro = block_cache_get_data(tr->fs, tr->fs->super_dev, block,
1540 false, NULL, 0, ref, NULL);
1541
1542 /*
1543 * We should never end up in a situation where there is a dirty copy of a
1544 * super block in the cache while we are trying to rewrite that super block.
1545 * If a super block entry was created via write_current_super_block(), it
1546 * must be flushed before the necessary data writes go through to write new
1547 * root nodes. If we are trying to commit an empty transaction (i.e. no data
1548 * blocks changed), we skip the super block update in
1549 * transaction_complete(). The only other way to write a new super block,
1550 * write_current_super_block(), will be a no-op if there is already a
1551 * pending super block rewrite.
1552 */
1553 assert(data_ro);
1554 struct block_cache_entry* entry = data_to_block_cache_entry(data_ro);
1555 assert(!block_cache_entry_data_is_dirty(entry));
1556 entry->pinned = pinned;
1557 entry->is_superblock = true;
1558
1559 data_rw = block_dirty(tr, data_ro, false);
1560 assert(tr->fs->super_dev->block_size <= MAX_BLOCK_SIZE);
1561 memset(data_rw, 0, tr->fs->super_dev->block_size);
1562 return data_rw;
1563 }
1564
1565 /**
1566 * block_get_copy - Get block for write with data copied from another block.
1567 * @tr: Transaction
1568 * @data: Block data pointer
1569 * @block: New block number
1570 * @is_tmp: If true, data is only needed until @tr is commited.
1571 * @new_ref: Pointer to store reference to new block in.
1572 *
1573 * Return: Block data pointer.
1574 */
block_get_copy(struct transaction * tr,const void * data,data_block_t block,bool is_tmp,struct obj_ref * new_ref)1575 void* block_get_copy(struct transaction* tr,
1576 const void* data,
1577 data_block_t block,
1578 bool is_tmp,
1579 struct obj_ref* new_ref) {
1580 void* dst_data;
1581 struct block_cache_entry* src_entry = data_to_block_cache_entry(data);
1582
1583 assert(block);
1584 assert(block < tr->fs->dev->block_count);
1585
1586 dst_data = block_get_write_no_read(tr, block, is_tmp, new_ref);
1587 memcpy(dst_data, data, src_entry->block_size);
1588 return dst_data;
1589 }
1590
1591 /**
1592 * block_move - Get block for write and move to new location
1593 * @tr: Transaction
1594 * @data: Block data pointer
1595 * @block: New block number
1596 * @is_tmp: If true, data is only needed until @tr is commited.
1597 *
1598 * Change block number of cache entry mark new block dirty. Useful for
1599 * copy-on-write.
1600 *
1601 * Return: Non-const block data pointer.
1602 */
block_move(struct transaction * tr,const void * data,data_block_t block,bool is_tmp)1603 void* block_move(struct transaction* tr,
1604 const void* data,
1605 data_block_t block,
1606 bool is_tmp) {
1607 struct block_cache_entry* dest_entry;
1608 struct block_cache_entry* entry = data_to_block_cache_entry(data);
1609
1610 assert(block_cache_entry_has_one_ref(entry));
1611 assert(!block_cache_entry_data_is_dirty(entry));
1612 assert(entry->dev == tr->fs->dev);
1613
1614 if (print_block_move) {
1615 printf("%s: move cache entry %zd, from block %" PRIu64 " to %" PRIu64
1616 "\n",
1617 __func__, entry - block_cache_entries, entry->block, block);
1618 }
1619
1620 dest_entry = block_cache_lookup(NULL, tr->fs->dev, block, false);
1621 if (dest_entry) {
1622 assert(!block_cache_entry_has_refs(dest_entry));
1623 assert(!dest_entry->dirty_ref);
1624 assert(!dest_entry->dirty_tr || dest_entry->dirty_tr == tr);
1625 assert(!list_in_list(&dest_entry->io_op_node));
1626 assert(dest_entry->block == block);
1627 if (print_block_move) {
1628 printf("%s: clear old cache entry for block %" PRIu64 ", %zd\n",
1629 __func__, block, dest_entry - block_cache_entries);
1630 }
1631 /* TODO: Use block_cache_entry_discard instead? */
1632 block_cache_entry_discard_dirty(dest_entry);
1633 }
1634
1635 entry->block = block;
1636 return block_dirty(tr, data, is_tmp);
1637 }
1638
1639 /**
1640 * block_put - Release reference to block.
1641 * @data: Block data pointer
1642 * @data_ref: Reference pointer to release
1643 */
block_put(const void * data,struct obj_ref * ref)1644 void block_put(const void* data, struct obj_ref* ref) {
1645 struct block_cache_entry* entry = data_to_block_cache_entry(data);
1646
1647 if (print_block_ops) {
1648 printf("%s: block %" PRIu64 ", cache entry %zd, state %s\n", __func__,
1649 entry->block, entry - block_cache_entries,
1650 block_cache_entry_data_state_name(entry->state));
1651 }
1652
1653 assert(!entry->dirty_ref);
1654
1655 obj_del_ref(&entry->obj, ref, block_cache_entry_destroy);
1656 }
1657
1658 /**
1659 * block_probe - Verify that the given block is loadable and its mac is correct
1660 * @fs: Filesystem containing the block to probe
1661 * @block_mac: Block to probe
1662 * @allow_invalid: If %true, an invalid (i.e. zero) @block_mac will not be
1663 * probed and this function will return true
1664 *
1665 * Return: %false if the block is not valid or does not match the expected mac.
1666 * Returns %true if the block was readable, valid and matched the expected mac.
1667 * If @allow_invalid is %true, also return %true if @block_mac is invalid. Also
1668 * returns %true if an I/O error was encountered which does not positively
1669 * confirm a corrupted block.
1670 */
block_probe(struct fs * fs,const struct block_mac * block_mac,bool allow_invalid)1671 bool block_probe(struct fs* fs,
1672 const struct block_mac* block_mac,
1673 bool allow_invalid) {
1674 struct transaction probe_tr;
1675 struct obj_ref probe_ref = OBJ_REF_INITIAL_VALUE(probe_ref);
1676 const void* probe_block;
1677 /*
1678 * Assume the block is valid unless we get positive confirmation of an
1679 * invalid block.
1680 */
1681 bool valid = true;
1682
1683 transaction_init(&probe_tr, fs, true);
1684 if (block_mac_valid(&probe_tr, block_mac)) {
1685 probe_block =
1686 block_get_no_tr_fail(&probe_tr, block_mac, NULL, &probe_ref);
1687 if (probe_block) {
1688 block_put(probe_block, &probe_ref);
1689 } else if (probe_tr.invalid_block_found) {
1690 valid = false;
1691 }
1692 } else if (allow_invalid) {
1693 valid = true;
1694 }
1695 transaction_fail(&probe_tr);
1696 transaction_free(&probe_tr);
1697
1698 return valid;
1699 }
1700
1701 /**
1702 * data_to_block_num - Get block number from block data pointer
1703 * @data: Block data pointer
1704 *
1705 * Only used for debug code.
1706 *
1707 * Return: block number.
1708 */
data_to_block_num(const void * data)1709 data_block_t data_to_block_num(const void* data) {
1710 struct block_cache_entry* entry = data_to_block_cache_entry(data);
1711
1712 return entry->block;
1713 }
1714
1715 /**
1716 * block_cache_debug_get_ref_block_count - Get number of blocks that have
1717 * references
1718 *
1719 * Only used for debug code.
1720 *
1721 * Return: number of blocks in cache that have references.
1722 */
block_cache_debug_get_ref_block_count(void)1723 unsigned int block_cache_debug_get_ref_block_count(void) {
1724 unsigned int count = 0;
1725 struct block_cache_entry* entry;
1726
1727 list_for_every_entry(&block_cache_lru, entry, struct block_cache_entry,
1728 lru_node) {
1729 assert(entry->guard1 == BLOCK_CACHE_GUARD_1);
1730 assert(entry->guard2 == BLOCK_CACHE_GUARD_2);
1731 if (block_cache_entry_has_refs(entry)) {
1732 if (print_cache_get_ref_block_count) {
1733 #if TLOG_LVL >= TLOG_LVL_DEBUG
1734 printf("%s: cache entry %zd in use for %" PRIu64 ", dev %p\n",
1735 __func__, entry - block_cache_entries, entry->block,
1736 entry->dev);
1737 #else
1738 printf("%s: cache entry %zd in use for %" PRIu64 "\n",
1739 __func__, entry - block_cache_entries, entry->block);
1740 #endif
1741 }
1742 count++;
1743 }
1744 }
1745 return count;
1746 }
1747