1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Data verification functions, i.e. hooks for ->readahead()
4 *
5 * Copyright 2019 Google LLC
6 */
7
8 #include "fsverity_private.h"
9
10 #include <crypto/hash.h>
11 #include <linux/bio.h>
12
13 struct fsverity_pending_block {
14 const void *data;
15 u64 pos;
16 u8 real_hash[FS_VERITY_MAX_DIGEST_SIZE];
17 };
18
19 struct fsverity_verification_context {
20 struct inode *inode;
21 struct fsverity_info *vi;
22 unsigned long max_ra_pages;
23
24 /*
25 * This is the queue of data blocks that are pending verification. We
26 * allow multiple blocks to be queued up in order to support multibuffer
27 * hashing, i.e. interleaving the hashing of multiple messages. On many
28 * CPUs this improves performance significantly.
29 */
30 int num_pending;
31 struct fsverity_pending_block pending_blocks[FS_VERITY_MAX_PENDING_DATA_BLOCKS];
32 };
33
34 static struct workqueue_struct *fsverity_read_workqueue;
35
36 /*
37 * Returns true if the hash block with index @hblock_idx in the tree, located in
38 * @hpage, has already been verified.
39 */
is_hash_block_verified(struct fsverity_info * vi,struct page * hpage,unsigned long hblock_idx)40 static bool is_hash_block_verified(struct fsverity_info *vi, struct page *hpage,
41 unsigned long hblock_idx)
42 {
43 unsigned int blocks_per_page;
44 unsigned int i;
45
46 /*
47 * When the Merkle tree block size and page size are the same, then the
48 * ->hash_block_verified bitmap isn't allocated, and we use PG_checked
49 * to directly indicate whether the page's block has been verified.
50 *
51 * Using PG_checked also guarantees that we re-verify hash pages that
52 * get evicted and re-instantiated from the backing storage, as new
53 * pages always start out with PG_checked cleared.
54 */
55 if (!vi->hash_block_verified)
56 return PageChecked(hpage);
57
58 /*
59 * When the Merkle tree block size and page size differ, we use a bitmap
60 * to indicate whether each hash block has been verified.
61 *
62 * However, we still need to ensure that hash pages that get evicted and
63 * re-instantiated from the backing storage are re-verified. To do
64 * this, we use PG_checked again, but now it doesn't really mean
65 * "checked". Instead, now it just serves as an indicator for whether
66 * the hash page is newly instantiated or not. If the page is new, as
67 * indicated by PG_checked=0, we clear the bitmap bits for the page's
68 * blocks since they are untrustworthy, then set PG_checked=1.
69 * Otherwise we return the bitmap bit for the requested block.
70 *
71 * Multiple threads may execute this code concurrently on the same page.
72 * This is safe because we use memory barriers to ensure that if a
73 * thread sees PG_checked=1, then it also sees the associated bitmap
74 * clearing to have occurred. Also, all writes and their corresponding
75 * reads are atomic, and all writes are safe to repeat in the event that
76 * multiple threads get into the PG_checked=0 section. (Clearing a
77 * bitmap bit again at worst causes a hash block to be verified
78 * redundantly. That event should be very rare, so it's not worth using
79 * a lock to avoid. Setting PG_checked again has no effect.)
80 */
81 if (PageChecked(hpage)) {
82 /*
83 * A read memory barrier is needed here to give ACQUIRE
84 * semantics to the above PageChecked() test.
85 */
86 smp_rmb();
87 return test_bit(hblock_idx, vi->hash_block_verified);
88 }
89 blocks_per_page = vi->tree_params.blocks_per_page;
90 hblock_idx = round_down(hblock_idx, blocks_per_page);
91 for (i = 0; i < blocks_per_page; i++)
92 clear_bit(hblock_idx + i, vi->hash_block_verified);
93 /*
94 * A write memory barrier is needed here to give RELEASE semantics to
95 * the below SetPageChecked() operation.
96 */
97 smp_wmb();
98 SetPageChecked(hpage);
99 return false;
100 }
101
102 /*
103 * Verify the hash of a single data block against the file's Merkle tree.
104 *
105 * In principle, we need to verify the entire path to the root node. However,
106 * for efficiency the filesystem may cache the hash blocks. Therefore we need
107 * only ascend the tree until an already-verified hash block is seen, and then
108 * verify the path to that block.
109 *
110 * Return: %true if the data block is valid, else %false.
111 */
112 static bool
verify_data_block(struct inode * inode,struct fsverity_info * vi,const struct fsverity_pending_block * dblock,unsigned long max_ra_pages)113 verify_data_block(struct inode *inode, struct fsverity_info *vi,
114 const struct fsverity_pending_block *dblock,
115 unsigned long max_ra_pages)
116 {
117 const u64 data_pos = dblock->pos;
118 const struct merkle_tree_params *params = &vi->tree_params;
119 const unsigned int hsize = params->digest_size;
120 int level;
121 u8 _want_hash[FS_VERITY_MAX_DIGEST_SIZE];
122 const u8 *want_hash;
123 u8 real_hash[FS_VERITY_MAX_DIGEST_SIZE];
124 /* The hash blocks that are traversed, indexed by level */
125 struct {
126 /* Page containing the hash block */
127 struct page *page;
128 /* Mapped address of the hash block (will be within @page) */
129 const void *addr;
130 /* Index of the hash block in the tree overall */
131 unsigned long index;
132 /* Byte offset of the wanted hash relative to @addr */
133 unsigned int hoffset;
134 } hblocks[FS_VERITY_MAX_LEVELS];
135 /*
136 * The index of the previous level's block within that level; also the
137 * index of that block's hash within the current level.
138 */
139 u64 hidx = data_pos >> params->log_blocksize;
140
141 /*
142 * Up to FS_VERITY_MAX_PENDING_DATA_BLOCKS + FS_VERITY_MAX_LEVELS pages
143 * may be mapped at once.
144 */
145 BUILD_BUG_ON(FS_VERITY_MAX_PENDING_DATA_BLOCKS +
146 FS_VERITY_MAX_LEVELS > KM_MAX_IDX);
147
148 if (unlikely(data_pos >= inode->i_size)) {
149 /*
150 * This can happen in the data page spanning EOF when the Merkle
151 * tree block size is less than the page size. The Merkle tree
152 * doesn't cover data blocks fully past EOF. But the entire
153 * page spanning EOF can be visible to userspace via a mmap, and
154 * any part past EOF should be all zeroes. Therefore, we need
155 * to verify that any data blocks fully past EOF are all zeroes.
156 */
157 if (memchr_inv(dblock->data, 0, params->block_size)) {
158 fsverity_err(inode,
159 "FILE CORRUPTED! Data past EOF is not zeroed");
160 return false;
161 }
162 return true;
163 }
164
165 /*
166 * Starting at the leaf level, ascend the tree saving hash blocks along
167 * the way until we find a hash block that has already been verified, or
168 * until we reach the root.
169 */
170 for (level = 0; level < params->num_levels; level++) {
171 unsigned long next_hidx;
172 unsigned long hblock_idx;
173 pgoff_t hpage_idx;
174 unsigned int hblock_offset_in_page;
175 unsigned int hoffset;
176 struct page *hpage;
177 const void *haddr;
178
179 /*
180 * The index of the block in the current level; also the index
181 * of that block's hash within the next level.
182 */
183 next_hidx = hidx >> params->log_arity;
184
185 /* Index of the hash block in the tree overall */
186 hblock_idx = params->level_start[level] + next_hidx;
187
188 /* Index of the hash page in the tree overall */
189 hpage_idx = hblock_idx >> params->log_blocks_per_page;
190
191 /* Byte offset of the hash block within the page */
192 hblock_offset_in_page =
193 (hblock_idx << params->log_blocksize) & ~PAGE_MASK;
194
195 /* Byte offset of the hash within the block */
196 hoffset = (hidx << params->log_digestsize) &
197 (params->block_size - 1);
198
199 hpage = inode->i_sb->s_vop->read_merkle_tree_page(inode,
200 hpage_idx, level == 0 ? min(max_ra_pages,
201 params->tree_pages - hpage_idx) : 0);
202 if (IS_ERR(hpage)) {
203 fsverity_err(inode,
204 "Error %ld reading Merkle tree page %lu",
205 PTR_ERR(hpage), hpage_idx);
206 goto error;
207 }
208 haddr = kmap_local_page(hpage) + hblock_offset_in_page;
209 if (is_hash_block_verified(vi, hpage, hblock_idx)) {
210 memcpy(_want_hash, haddr + hoffset, hsize);
211 want_hash = _want_hash;
212 kunmap_local(haddr);
213 put_page(hpage);
214 goto descend;
215 }
216 hblocks[level].page = hpage;
217 hblocks[level].addr = haddr;
218 hblocks[level].index = hblock_idx;
219 hblocks[level].hoffset = hoffset;
220 hidx = next_hidx;
221 }
222
223 want_hash = vi->root_hash;
224 descend:
225 /* Descend the tree verifying hash blocks. */
226 for (; level > 0; level--) {
227 struct page *hpage = hblocks[level - 1].page;
228 const void *haddr = hblocks[level - 1].addr;
229 unsigned long hblock_idx = hblocks[level - 1].index;
230 unsigned int hoffset = hblocks[level - 1].hoffset;
231
232 if (fsverity_hash_block(params, inode, haddr, real_hash) != 0)
233 goto error;
234 if (memcmp(want_hash, real_hash, hsize) != 0)
235 goto corrupted;
236 /*
237 * Mark the hash block as verified. This must be atomic and
238 * idempotent, as the same hash block might be verified by
239 * multiple threads concurrently.
240 */
241 if (vi->hash_block_verified)
242 set_bit(hblock_idx, vi->hash_block_verified);
243 else
244 SetPageChecked(hpage);
245 memcpy(_want_hash, haddr + hoffset, hsize);
246 want_hash = _want_hash;
247 kunmap_local(haddr);
248 put_page(hpage);
249 }
250
251 /* Finally, verify the hash of the data block. */
252 if (memcmp(want_hash, dblock->real_hash, hsize) != 0)
253 goto corrupted;
254 return true;
255
256 corrupted:
257 fsverity_err(inode,
258 "FILE CORRUPTED! pos=%llu, level=%d, want_hash=%s:%*phN, real_hash=%s:%*phN",
259 data_pos, level - 1,
260 params->hash_alg->name, hsize, want_hash,
261 params->hash_alg->name, hsize,
262 level == 0 ? dblock->real_hash : real_hash);
263 error:
264 for (; level > 0; level--) {
265 kunmap_local(hblocks[level - 1].addr);
266 put_page(hblocks[level - 1].page);
267 }
268 return false;
269 }
270
271 static void
fsverity_init_verification_context(struct fsverity_verification_context * ctx,struct inode * inode,unsigned long max_ra_pages)272 fsverity_init_verification_context(struct fsverity_verification_context *ctx,
273 struct inode *inode,
274 unsigned long max_ra_pages)
275 {
276 ctx->inode = inode;
277 ctx->vi = inode->i_verity_info;
278 ctx->max_ra_pages = max_ra_pages;
279 ctx->num_pending = 0;
280 }
281
282 static void
fsverity_clear_pending_blocks(struct fsverity_verification_context * ctx)283 fsverity_clear_pending_blocks(struct fsverity_verification_context *ctx)
284 {
285 int i;
286
287 for (i = ctx->num_pending - 1; i >= 0; i--) {
288 kunmap_local(ctx->pending_blocks[i].data);
289 ctx->pending_blocks[i].data = NULL;
290 }
291 ctx->num_pending = 0;
292 }
293
294 static bool
fsverity_verify_pending_blocks(struct fsverity_verification_context * ctx)295 fsverity_verify_pending_blocks(struct fsverity_verification_context *ctx)
296 {
297 struct inode *inode = ctx->inode;
298 struct fsverity_info *vi = ctx->vi;
299 const struct merkle_tree_params *params = &vi->tree_params;
300 SHASH_DESC_ON_STACK(desc, params->hash_alg->tfm);
301 const u8 *data[FS_VERITY_MAX_PENDING_DATA_BLOCKS];
302 u8 *real_hashes[FS_VERITY_MAX_PENDING_DATA_BLOCKS];
303 int i;
304 int err;
305
306 if (ctx->num_pending == 0)
307 return true;
308
309 for (i = 0; i < ctx->num_pending; i++) {
310 data[i] = ctx->pending_blocks[i].data;
311 real_hashes[i] = ctx->pending_blocks[i].real_hash;
312 }
313
314 desc->tfm = params->hash_alg->tfm;
315 if (params->hashstate)
316 err = crypto_shash_import(desc, params->hashstate);
317 else
318 err = crypto_shash_init(desc);
319 if (err) {
320 fsverity_err(inode, "Error %d importing hash state", err);
321 return false;
322 }
323 err = crypto_shash_finup_mb(desc, data, params->block_size, real_hashes,
324 ctx->num_pending);
325 if (err) {
326 fsverity_err(inode, "Error %d computing block hashes", err);
327 return false;
328 }
329
330 for (i = 0; i < ctx->num_pending; i++) {
331 if (!verify_data_block(inode, vi, &ctx->pending_blocks[i],
332 ctx->max_ra_pages))
333 return false;
334 }
335
336 fsverity_clear_pending_blocks(ctx);
337 return true;
338 }
339
340 static bool
fsverity_add_data_blocks(struct fsverity_verification_context * ctx,struct folio * data_folio,size_t len,size_t offset)341 fsverity_add_data_blocks(struct fsverity_verification_context *ctx,
342 struct folio *data_folio, size_t len, size_t offset)
343 {
344 struct fsverity_info *vi = ctx->vi;
345 const struct merkle_tree_params *params = &vi->tree_params;
346 const unsigned int block_size = params->block_size;
347 const int mb_max_msgs = params->hash_alg->mb_max_msgs;
348 u64 pos = (u64)data_folio->index << PAGE_SHIFT;
349
350 if (WARN_ON_ONCE(len <= 0 || !IS_ALIGNED(len | offset, block_size)))
351 return false;
352 if (WARN_ON_ONCE(!folio_test_locked(data_folio) ||
353 folio_test_uptodate(data_folio)))
354 return false;
355 do {
356 ctx->pending_blocks[ctx->num_pending].data =
357 kmap_local_folio(data_folio, offset);
358 ctx->pending_blocks[ctx->num_pending].pos = pos + offset;
359 if (++ctx->num_pending == mb_max_msgs &&
360 !fsverity_verify_pending_blocks(ctx))
361 return false;
362 offset += block_size;
363 len -= block_size;
364 } while (len);
365 return true;
366 }
367
368 /**
369 * fsverity_verify_blocks() - verify data in a folio
370 * @folio: the folio containing the data to verify
371 * @len: the length of the data to verify in the folio
372 * @offset: the offset of the data to verify in the folio
373 *
374 * Verify data that has just been read from a verity file. The data must be
375 * located in a pagecache folio that is still locked and not yet uptodate. The
376 * length and offset of the data must be Merkle tree block size aligned.
377 *
378 * Return: %true if the data is valid, else %false.
379 */
fsverity_verify_blocks(struct folio * folio,size_t len,size_t offset)380 bool fsverity_verify_blocks(struct folio *folio, size_t len, size_t offset)
381 {
382 struct fsverity_verification_context ctx;
383
384 fsverity_init_verification_context(&ctx, folio->mapping->host, 0);
385
386 if (fsverity_add_data_blocks(&ctx, folio, len, offset) &&
387 fsverity_verify_pending_blocks(&ctx))
388 return true;
389 fsverity_clear_pending_blocks(&ctx);
390 return false;
391 }
392 EXPORT_SYMBOL_GPL(fsverity_verify_blocks);
393
394 #ifdef CONFIG_BLOCK
395 /**
396 * fsverity_verify_bio() - verify a 'read' bio that has just completed
397 * @bio: the bio to verify
398 *
399 * Verify the bio's data against the file's Merkle tree. All bio data segments
400 * must be aligned to the file's Merkle tree block size. If any data fails
401 * verification, then bio->bi_status is set to an error status.
402 *
403 * This is a helper function for use by the ->readahead() method of filesystems
404 * that issue bios to read data directly into the page cache. Filesystems that
405 * populate the page cache without issuing bios (e.g. non block-based
406 * filesystems) must instead call fsverity_verify_page() directly on each page.
407 * All filesystems must also call fsverity_verify_page() on holes.
408 */
fsverity_verify_bio(struct bio * bio)409 void fsverity_verify_bio(struct bio *bio)
410 {
411 struct inode *inode = bio_first_folio_all(bio)->mapping->host;
412 struct fsverity_verification_context ctx;
413 struct folio_iter fi;
414 unsigned long max_ra_pages = 0;
415
416 if (bio->bi_opf & REQ_RAHEAD) {
417 /*
418 * If this bio is for data readahead, then we also do readahead
419 * of the first (largest) level of the Merkle tree. Namely,
420 * when a Merkle tree page is read, we also try to piggy-back on
421 * some additional pages -- up to 1/4 the number of data pages.
422 *
423 * This improves sequential read performance, as it greatly
424 * reduces the number of I/O requests made to the Merkle tree.
425 */
426 max_ra_pages = bio->bi_iter.bi_size >> (PAGE_SHIFT + 2);
427 }
428
429 fsverity_init_verification_context(&ctx, inode, max_ra_pages);
430
431 bio_for_each_folio_all(fi, bio) {
432 if (!fsverity_add_data_blocks(&ctx, fi.folio, fi.length,
433 fi.offset))
434 goto ioerr;
435 }
436
437 if (!fsverity_verify_pending_blocks(&ctx))
438 goto ioerr;
439 return;
440
441 ioerr:
442 fsverity_clear_pending_blocks(&ctx);
443 bio->bi_status = BLK_STS_IOERR;
444 }
445 EXPORT_SYMBOL_GPL(fsverity_verify_bio);
446 #endif /* CONFIG_BLOCK */
447
448 /**
449 * fsverity_enqueue_verify_work() - enqueue work on the fs-verity workqueue
450 * @work: the work to enqueue
451 *
452 * Enqueue verification work for asynchronous processing.
453 */
fsverity_enqueue_verify_work(struct work_struct * work)454 void fsverity_enqueue_verify_work(struct work_struct *work)
455 {
456 queue_work(fsverity_read_workqueue, work);
457 }
458 EXPORT_SYMBOL_GPL(fsverity_enqueue_verify_work);
459
fsverity_init_workqueue(void)460 void __init fsverity_init_workqueue(void)
461 {
462 /*
463 * Use a high-priority workqueue to prioritize verification work, which
464 * blocks reads from completing, over regular application tasks.
465 *
466 * For performance reasons, don't use an unbound workqueue. Using an
467 * unbound workqueue for crypto operations causes excessive scheduler
468 * latency on ARM64.
469 */
470 fsverity_read_workqueue = alloc_workqueue("fsverity_read_queue",
471 WQ_HIGHPRI,
472 num_online_cpus());
473 if (!fsverity_read_workqueue)
474 panic("failed to allocate fsverity_read_queue");
475 }
476