• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Data verification functions, i.e. hooks for ->readahead()
4  *
5  * Copyright 2019 Google LLC
6  */
7 
8 #include "fsverity_private.h"
9 
10 #include <crypto/hash.h>
11 #include <linux/bio.h>
12 
13 struct fsverity_pending_block {
14 	const void *data;
15 	u64 pos;
16 	u8 real_hash[FS_VERITY_MAX_DIGEST_SIZE];
17 };
18 
19 struct fsverity_verification_context {
20 	struct inode *inode;
21 	struct fsverity_info *vi;
22 	unsigned long max_ra_pages;
23 
24 	/*
25 	 * This is the queue of data blocks that are pending verification.  We
26 	 * allow multiple blocks to be queued up in order to support multibuffer
27 	 * hashing, i.e. interleaving the hashing of multiple messages.  On many
28 	 * CPUs this improves performance significantly.
29 	 */
30 	int num_pending;
31 	struct fsverity_pending_block pending_blocks[FS_VERITY_MAX_PENDING_DATA_BLOCKS];
32 };
33 
34 static struct workqueue_struct *fsverity_read_workqueue;
35 
36 /*
37  * Returns true if the hash block with index @hblock_idx in the tree, located in
38  * @hpage, has already been verified.
39  */
is_hash_block_verified(struct fsverity_info * vi,struct page * hpage,unsigned long hblock_idx)40 static bool is_hash_block_verified(struct fsverity_info *vi, struct page *hpage,
41 				   unsigned long hblock_idx)
42 {
43 	unsigned int blocks_per_page;
44 	unsigned int i;
45 
46 	/*
47 	 * When the Merkle tree block size and page size are the same, then the
48 	 * ->hash_block_verified bitmap isn't allocated, and we use PG_checked
49 	 * to directly indicate whether the page's block has been verified.
50 	 *
51 	 * Using PG_checked also guarantees that we re-verify hash pages that
52 	 * get evicted and re-instantiated from the backing storage, as new
53 	 * pages always start out with PG_checked cleared.
54 	 */
55 	if (!vi->hash_block_verified)
56 		return PageChecked(hpage);
57 
58 	/*
59 	 * When the Merkle tree block size and page size differ, we use a bitmap
60 	 * to indicate whether each hash block has been verified.
61 	 *
62 	 * However, we still need to ensure that hash pages that get evicted and
63 	 * re-instantiated from the backing storage are re-verified.  To do
64 	 * this, we use PG_checked again, but now it doesn't really mean
65 	 * "checked".  Instead, now it just serves as an indicator for whether
66 	 * the hash page is newly instantiated or not.  If the page is new, as
67 	 * indicated by PG_checked=0, we clear the bitmap bits for the page's
68 	 * blocks since they are untrustworthy, then set PG_checked=1.
69 	 * Otherwise we return the bitmap bit for the requested block.
70 	 *
71 	 * Multiple threads may execute this code concurrently on the same page.
72 	 * This is safe because we use memory barriers to ensure that if a
73 	 * thread sees PG_checked=1, then it also sees the associated bitmap
74 	 * clearing to have occurred.  Also, all writes and their corresponding
75 	 * reads are atomic, and all writes are safe to repeat in the event that
76 	 * multiple threads get into the PG_checked=0 section.  (Clearing a
77 	 * bitmap bit again at worst causes a hash block to be verified
78 	 * redundantly.  That event should be very rare, so it's not worth using
79 	 * a lock to avoid.  Setting PG_checked again has no effect.)
80 	 */
81 	if (PageChecked(hpage)) {
82 		/*
83 		 * A read memory barrier is needed here to give ACQUIRE
84 		 * semantics to the above PageChecked() test.
85 		 */
86 		smp_rmb();
87 		return test_bit(hblock_idx, vi->hash_block_verified);
88 	}
89 	blocks_per_page = vi->tree_params.blocks_per_page;
90 	hblock_idx = round_down(hblock_idx, blocks_per_page);
91 	for (i = 0; i < blocks_per_page; i++)
92 		clear_bit(hblock_idx + i, vi->hash_block_verified);
93 	/*
94 	 * A write memory barrier is needed here to give RELEASE semantics to
95 	 * the below SetPageChecked() operation.
96 	 */
97 	smp_wmb();
98 	SetPageChecked(hpage);
99 	return false;
100 }
101 
102 /*
103  * Verify the hash of a single data block against the file's Merkle tree.
104  *
105  * In principle, we need to verify the entire path to the root node.  However,
106  * for efficiency the filesystem may cache the hash blocks.  Therefore we need
107  * only ascend the tree until an already-verified hash block is seen, and then
108  * verify the path to that block.
109  *
110  * Return: %true if the data block is valid, else %false.
111  */
112 static bool
verify_data_block(struct inode * inode,struct fsverity_info * vi,const struct fsverity_pending_block * dblock,unsigned long max_ra_pages)113 verify_data_block(struct inode *inode, struct fsverity_info *vi,
114 		  const struct fsverity_pending_block *dblock,
115 		  unsigned long max_ra_pages)
116 {
117 	const u64 data_pos = dblock->pos;
118 	const struct merkle_tree_params *params = &vi->tree_params;
119 	const unsigned int hsize = params->digest_size;
120 	int level;
121 	u8 _want_hash[FS_VERITY_MAX_DIGEST_SIZE];
122 	const u8 *want_hash;
123 	u8 real_hash[FS_VERITY_MAX_DIGEST_SIZE];
124 	/* The hash blocks that are traversed, indexed by level */
125 	struct {
126 		/* Page containing the hash block */
127 		struct page *page;
128 		/* Mapped address of the hash block (will be within @page) */
129 		const void *addr;
130 		/* Index of the hash block in the tree overall */
131 		unsigned long index;
132 		/* Byte offset of the wanted hash relative to @addr */
133 		unsigned int hoffset;
134 	} hblocks[FS_VERITY_MAX_LEVELS];
135 	/*
136 	 * The index of the previous level's block within that level; also the
137 	 * index of that block's hash within the current level.
138 	 */
139 	u64 hidx = data_pos >> params->log_blocksize;
140 
141 	/*
142 	 * Up to FS_VERITY_MAX_PENDING_DATA_BLOCKS + FS_VERITY_MAX_LEVELS pages
143 	 * may be mapped at once.
144 	 */
145 	BUILD_BUG_ON(FS_VERITY_MAX_PENDING_DATA_BLOCKS +
146 		     FS_VERITY_MAX_LEVELS > KM_MAX_IDX);
147 
148 	if (unlikely(data_pos >= inode->i_size)) {
149 		/*
150 		 * This can happen in the data page spanning EOF when the Merkle
151 		 * tree block size is less than the page size.  The Merkle tree
152 		 * doesn't cover data blocks fully past EOF.  But the entire
153 		 * page spanning EOF can be visible to userspace via a mmap, and
154 		 * any part past EOF should be all zeroes.  Therefore, we need
155 		 * to verify that any data blocks fully past EOF are all zeroes.
156 		 */
157 		if (memchr_inv(dblock->data, 0, params->block_size)) {
158 			fsverity_err(inode,
159 				     "FILE CORRUPTED!  Data past EOF is not zeroed");
160 			return false;
161 		}
162 		return true;
163 	}
164 
165 	/*
166 	 * Starting at the leaf level, ascend the tree saving hash blocks along
167 	 * the way until we find a hash block that has already been verified, or
168 	 * until we reach the root.
169 	 */
170 	for (level = 0; level < params->num_levels; level++) {
171 		unsigned long next_hidx;
172 		unsigned long hblock_idx;
173 		pgoff_t hpage_idx;
174 		unsigned int hblock_offset_in_page;
175 		unsigned int hoffset;
176 		struct page *hpage;
177 		const void *haddr;
178 
179 		/*
180 		 * The index of the block in the current level; also the index
181 		 * of that block's hash within the next level.
182 		 */
183 		next_hidx = hidx >> params->log_arity;
184 
185 		/* Index of the hash block in the tree overall */
186 		hblock_idx = params->level_start[level] + next_hidx;
187 
188 		/* Index of the hash page in the tree overall */
189 		hpage_idx = hblock_idx >> params->log_blocks_per_page;
190 
191 		/* Byte offset of the hash block within the page */
192 		hblock_offset_in_page =
193 			(hblock_idx << params->log_blocksize) & ~PAGE_MASK;
194 
195 		/* Byte offset of the hash within the block */
196 		hoffset = (hidx << params->log_digestsize) &
197 			  (params->block_size - 1);
198 
199 		hpage = inode->i_sb->s_vop->read_merkle_tree_page(inode,
200 				hpage_idx, level == 0 ? min(max_ra_pages,
201 					params->tree_pages - hpage_idx) : 0);
202 		if (IS_ERR(hpage)) {
203 			fsverity_err(inode,
204 				     "Error %ld reading Merkle tree page %lu",
205 				     PTR_ERR(hpage), hpage_idx);
206 			goto error;
207 		}
208 		haddr = kmap_local_page(hpage) + hblock_offset_in_page;
209 		if (is_hash_block_verified(vi, hpage, hblock_idx)) {
210 			memcpy(_want_hash, haddr + hoffset, hsize);
211 			want_hash = _want_hash;
212 			kunmap_local(haddr);
213 			put_page(hpage);
214 			goto descend;
215 		}
216 		hblocks[level].page = hpage;
217 		hblocks[level].addr = haddr;
218 		hblocks[level].index = hblock_idx;
219 		hblocks[level].hoffset = hoffset;
220 		hidx = next_hidx;
221 	}
222 
223 	want_hash = vi->root_hash;
224 descend:
225 	/* Descend the tree verifying hash blocks. */
226 	for (; level > 0; level--) {
227 		struct page *hpage = hblocks[level - 1].page;
228 		const void *haddr = hblocks[level - 1].addr;
229 		unsigned long hblock_idx = hblocks[level - 1].index;
230 		unsigned int hoffset = hblocks[level - 1].hoffset;
231 
232 		if (fsverity_hash_block(params, inode, haddr, real_hash) != 0)
233 			goto error;
234 		if (memcmp(want_hash, real_hash, hsize) != 0)
235 			goto corrupted;
236 		/*
237 		 * Mark the hash block as verified.  This must be atomic and
238 		 * idempotent, as the same hash block might be verified by
239 		 * multiple threads concurrently.
240 		 */
241 		if (vi->hash_block_verified)
242 			set_bit(hblock_idx, vi->hash_block_verified);
243 		else
244 			SetPageChecked(hpage);
245 		memcpy(_want_hash, haddr + hoffset, hsize);
246 		want_hash = _want_hash;
247 		kunmap_local(haddr);
248 		put_page(hpage);
249 	}
250 
251 	/* Finally, verify the hash of the data block. */
252 	if (memcmp(want_hash, dblock->real_hash, hsize) != 0)
253 		goto corrupted;
254 	return true;
255 
256 corrupted:
257 	fsverity_err(inode,
258 		     "FILE CORRUPTED! pos=%llu, level=%d, want_hash=%s:%*phN, real_hash=%s:%*phN",
259 		     data_pos, level - 1,
260 		     params->hash_alg->name, hsize, want_hash,
261 		     params->hash_alg->name, hsize,
262 		     level == 0 ? dblock->real_hash : real_hash);
263 error:
264 	for (; level > 0; level--) {
265 		kunmap_local(hblocks[level - 1].addr);
266 		put_page(hblocks[level - 1].page);
267 	}
268 	return false;
269 }
270 
271 static void
fsverity_init_verification_context(struct fsverity_verification_context * ctx,struct inode * inode,unsigned long max_ra_pages)272 fsverity_init_verification_context(struct fsverity_verification_context *ctx,
273 				   struct inode *inode,
274 				   unsigned long max_ra_pages)
275 {
276 	ctx->inode = inode;
277 	ctx->vi = inode->i_verity_info;
278 	ctx->max_ra_pages = max_ra_pages;
279 	ctx->num_pending = 0;
280 }
281 
282 static void
fsverity_clear_pending_blocks(struct fsverity_verification_context * ctx)283 fsverity_clear_pending_blocks(struct fsverity_verification_context *ctx)
284 {
285 	int i;
286 
287 	for (i = ctx->num_pending - 1; i >= 0; i--) {
288 		kunmap_local(ctx->pending_blocks[i].data);
289 		ctx->pending_blocks[i].data = NULL;
290 	}
291 	ctx->num_pending = 0;
292 }
293 
294 static bool
fsverity_verify_pending_blocks(struct fsverity_verification_context * ctx)295 fsverity_verify_pending_blocks(struct fsverity_verification_context *ctx)
296 {
297 	struct inode *inode = ctx->inode;
298 	struct fsverity_info *vi = ctx->vi;
299 	const struct merkle_tree_params *params = &vi->tree_params;
300 	SHASH_DESC_ON_STACK(desc, params->hash_alg->tfm);
301 	const u8 *data[FS_VERITY_MAX_PENDING_DATA_BLOCKS];
302 	u8 *real_hashes[FS_VERITY_MAX_PENDING_DATA_BLOCKS];
303 	int i;
304 	int err;
305 
306 	if (ctx->num_pending == 0)
307 		return true;
308 
309 	for (i = 0; i < ctx->num_pending; i++) {
310 		data[i] = ctx->pending_blocks[i].data;
311 		real_hashes[i] = ctx->pending_blocks[i].real_hash;
312 	}
313 
314 	desc->tfm = params->hash_alg->tfm;
315 	if (params->hashstate)
316 		err = crypto_shash_import(desc, params->hashstate);
317 	else
318 		err = crypto_shash_init(desc);
319 	if (err) {
320 		fsverity_err(inode, "Error %d importing hash state", err);
321 		return false;
322 	}
323 	err = crypto_shash_finup_mb(desc, data, params->block_size, real_hashes,
324 				    ctx->num_pending);
325 	if (err) {
326 		fsverity_err(inode, "Error %d computing block hashes", err);
327 		return false;
328 	}
329 
330 	for (i = 0; i < ctx->num_pending; i++) {
331 		if (!verify_data_block(inode, vi, &ctx->pending_blocks[i],
332 				       ctx->max_ra_pages))
333 			return false;
334 	}
335 
336 	fsverity_clear_pending_blocks(ctx);
337 	return true;
338 }
339 
340 static bool
fsverity_add_data_blocks(struct fsverity_verification_context * ctx,struct folio * data_folio,size_t len,size_t offset)341 fsverity_add_data_blocks(struct fsverity_verification_context *ctx,
342 			 struct folio *data_folio, size_t len, size_t offset)
343 {
344 	struct fsverity_info *vi = ctx->vi;
345 	const struct merkle_tree_params *params = &vi->tree_params;
346 	const unsigned int block_size = params->block_size;
347 	const int mb_max_msgs = params->hash_alg->mb_max_msgs;
348 	u64 pos = (u64)data_folio->index << PAGE_SHIFT;
349 
350 	if (WARN_ON_ONCE(len <= 0 || !IS_ALIGNED(len | offset, block_size)))
351 		return false;
352 	if (WARN_ON_ONCE(!folio_test_locked(data_folio) ||
353 			 folio_test_uptodate(data_folio)))
354 		return false;
355 	do {
356 		ctx->pending_blocks[ctx->num_pending].data =
357 			kmap_local_folio(data_folio, offset);
358 		ctx->pending_blocks[ctx->num_pending].pos = pos + offset;
359 		if (++ctx->num_pending == mb_max_msgs &&
360 		    !fsverity_verify_pending_blocks(ctx))
361 			return false;
362 		offset += block_size;
363 		len -= block_size;
364 	} while (len);
365 	return true;
366 }
367 
368 /**
369  * fsverity_verify_blocks() - verify data in a folio
370  * @folio: the folio containing the data to verify
371  * @len: the length of the data to verify in the folio
372  * @offset: the offset of the data to verify in the folio
373  *
374  * Verify data that has just been read from a verity file.  The data must be
375  * located in a pagecache folio that is still locked and not yet uptodate.  The
376  * length and offset of the data must be Merkle tree block size aligned.
377  *
378  * Return: %true if the data is valid, else %false.
379  */
fsverity_verify_blocks(struct folio * folio,size_t len,size_t offset)380 bool fsverity_verify_blocks(struct folio *folio, size_t len, size_t offset)
381 {
382 	struct fsverity_verification_context ctx;
383 
384 	fsverity_init_verification_context(&ctx, folio->mapping->host, 0);
385 
386 	if (fsverity_add_data_blocks(&ctx, folio, len, offset) &&
387 	    fsverity_verify_pending_blocks(&ctx))
388 		return true;
389 	fsverity_clear_pending_blocks(&ctx);
390 	return false;
391 }
392 EXPORT_SYMBOL_GPL(fsverity_verify_blocks);
393 
394 #ifdef CONFIG_BLOCK
395 /**
396  * fsverity_verify_bio() - verify a 'read' bio that has just completed
397  * @bio: the bio to verify
398  *
399  * Verify the bio's data against the file's Merkle tree.  All bio data segments
400  * must be aligned to the file's Merkle tree block size.  If any data fails
401  * verification, then bio->bi_status is set to an error status.
402  *
403  * This is a helper function for use by the ->readahead() method of filesystems
404  * that issue bios to read data directly into the page cache.  Filesystems that
405  * populate the page cache without issuing bios (e.g. non block-based
406  * filesystems) must instead call fsverity_verify_page() directly on each page.
407  * All filesystems must also call fsverity_verify_page() on holes.
408  */
fsverity_verify_bio(struct bio * bio)409 void fsverity_verify_bio(struct bio *bio)
410 {
411 	struct inode *inode = bio_first_folio_all(bio)->mapping->host;
412 	struct fsverity_verification_context ctx;
413 	struct folio_iter fi;
414 	unsigned long max_ra_pages = 0;
415 
416 	if (bio->bi_opf & REQ_RAHEAD) {
417 		/*
418 		 * If this bio is for data readahead, then we also do readahead
419 		 * of the first (largest) level of the Merkle tree.  Namely,
420 		 * when a Merkle tree page is read, we also try to piggy-back on
421 		 * some additional pages -- up to 1/4 the number of data pages.
422 		 *
423 		 * This improves sequential read performance, as it greatly
424 		 * reduces the number of I/O requests made to the Merkle tree.
425 		 */
426 		max_ra_pages = bio->bi_iter.bi_size >> (PAGE_SHIFT + 2);
427 	}
428 
429 	fsverity_init_verification_context(&ctx, inode, max_ra_pages);
430 
431 	bio_for_each_folio_all(fi, bio) {
432 		if (!fsverity_add_data_blocks(&ctx, fi.folio, fi.length,
433 					      fi.offset))
434 			goto ioerr;
435 	}
436 
437 	if (!fsverity_verify_pending_blocks(&ctx))
438 		goto ioerr;
439 	return;
440 
441 ioerr:
442 	fsverity_clear_pending_blocks(&ctx);
443 	bio->bi_status = BLK_STS_IOERR;
444 }
445 EXPORT_SYMBOL_GPL(fsverity_verify_bio);
446 #endif /* CONFIG_BLOCK */
447 
448 /**
449  * fsverity_enqueue_verify_work() - enqueue work on the fs-verity workqueue
450  * @work: the work to enqueue
451  *
452  * Enqueue verification work for asynchronous processing.
453  */
fsverity_enqueue_verify_work(struct work_struct * work)454 void fsverity_enqueue_verify_work(struct work_struct *work)
455 {
456 	queue_work(fsverity_read_workqueue, work);
457 }
458 EXPORT_SYMBOL_GPL(fsverity_enqueue_verify_work);
459 
fsverity_init_workqueue(void)460 void __init fsverity_init_workqueue(void)
461 {
462 	/*
463 	 * Use a high-priority workqueue to prioritize verification work, which
464 	 * blocks reads from completing, over regular application tasks.
465 	 *
466 	 * For performance reasons, don't use an unbound workqueue.  Using an
467 	 * unbound workqueue for crypto operations causes excessive scheduler
468 	 * latency on ARM64.
469 	 */
470 	fsverity_read_workqueue = alloc_workqueue("fsverity_read_queue",
471 						  WQ_HIGHPRI,
472 						  num_online_cpus());
473 	if (!fsverity_read_workqueue)
474 		panic("failed to allocate fsverity_read_queue");
475 }
476