• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * linux/fs/jbd2/recovery.c
4  *
5  * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
6  *
7  * Copyright 1999-2000 Red Hat Software --- All Rights Reserved
8  *
9  * Journal recovery routines for the generic filesystem journaling code;
10  * part of the ext2fs journaling system.
11  */
12 
13 #ifndef __KERNEL__
14 #include "jfs_user.h"
15 #else
16 #include <linux/time.h>
17 #include <linux/fs.h>
18 #include <linux/jbd2.h>
19 #include <linux/errno.h>
20 #include <linux/crc32.h>
21 #include <linux/blkdev.h>
22 #endif
23 
24 /*
25  * Maintain information about the progress of the recovery job, so that
26  * the different passes can carry information between them.
27  */
28 struct recovery_info
29 {
30 	tid_t		start_transaction;
31 	tid_t		end_transaction;
32 
33 	int		nr_replays;
34 	int		nr_revokes;
35 	int		nr_revoke_hits;
36 };
37 
38 enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY};
39 static int do_one_pass(journal_t *journal,
40 				struct recovery_info *info, enum passtype pass);
41 static int scan_revoke_records(journal_t *, struct buffer_head *,
42 				tid_t, struct recovery_info *);
43 
44 #ifdef __KERNEL__
45 
46 /* Release readahead buffers after use */
journal_brelse_array(struct buffer_head * b[],int n)47 static void journal_brelse_array(struct buffer_head *b[], int n)
48 {
49 	while (--n >= 0)
50 		brelse (b[n]);
51 }
52 
53 
54 /*
55  * When reading from the journal, we are going through the block device
56  * layer directly and so there is no readahead being done for us.  We
57  * need to implement any readahead ourselves if we want it to happen at
58  * all.  Recovery is basically one long sequential read, so make sure we
59  * do the IO in reasonably large chunks.
60  *
61  * This is not so critical that we need to be enormously clever about
62  * the readahead size, though.  128K is a purely arbitrary, good-enough
63  * fixed value.
64  */
65 
66 #define MAXBUF 8
do_readahead(journal_t * journal,unsigned int start)67 static int do_readahead(journal_t *journal, unsigned int start)
68 {
69 	int err;
70 	unsigned int max, nbufs, next;
71 	unsigned long long blocknr;
72 	struct buffer_head *bh;
73 
74 	struct buffer_head * bufs[MAXBUF];
75 
76 	/* Do up to 128K of readahead */
77 	max = start + (128 * 1024 / journal->j_blocksize);
78 	if (max > journal->j_maxlen)
79 		max = journal->j_maxlen;
80 
81 	/* Do the readahead itself.  We'll submit MAXBUF buffer_heads at
82 	 * a time to the block device IO layer. */
83 
84 	nbufs = 0;
85 
86 	for (next = start; next < max; next++) {
87 		err = jbd2_journal_bmap(journal, next, &blocknr);
88 
89 		if (err) {
90 			printk(KERN_ERR "JBD2: bad block at offset %u\n",
91 				next);
92 			goto failed;
93 		}
94 
95 		bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
96 		if (!bh) {
97 			err = -ENOMEM;
98 			goto failed;
99 		}
100 
101 		if (!buffer_uptodate(bh) && !buffer_locked(bh)) {
102 			bufs[nbufs++] = bh;
103 			if (nbufs == MAXBUF) {
104 				ll_rw_block(REQ_OP_READ, 0, nbufs, bufs);
105 				journal_brelse_array(bufs, nbufs);
106 				nbufs = 0;
107 			}
108 		} else
109 			brelse(bh);
110 	}
111 
112 	if (nbufs)
113 		ll_rw_block(REQ_OP_READ, 0, nbufs, bufs);
114 	err = 0;
115 
116 failed:
117 	if (nbufs)
118 		journal_brelse_array(bufs, nbufs);
119 	return err;
120 }
121 
122 #endif /* __KERNEL__ */
123 
124 
125 /*
126  * Read a block from the journal
127  */
128 
jread(struct buffer_head ** bhp,journal_t * journal,unsigned int offset)129 static int jread(struct buffer_head **bhp, journal_t *journal,
130 		 unsigned int offset)
131 {
132 	int err;
133 	unsigned long long blocknr;
134 	struct buffer_head *bh;
135 
136 	*bhp = NULL;
137 
138 	if (offset >= journal->j_maxlen) {
139 		printk(KERN_ERR "JBD2: corrupted journal superblock\n");
140 		return -EFSCORRUPTED;
141 	}
142 
143 	err = jbd2_journal_bmap(journal, offset, &blocknr);
144 
145 	if (err) {
146 		printk(KERN_ERR "JBD2: bad block at offset %u\n",
147 			offset);
148 		return err;
149 	}
150 
151 	bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
152 	if (!bh)
153 		return -ENOMEM;
154 
155 	if (!buffer_uptodate(bh)) {
156 		/* If this is a brand new buffer, start readahead.
157                    Otherwise, we assume we are already reading it.  */
158 		if (!buffer_req(bh))
159 			do_readahead(journal, offset);
160 		wait_on_buffer(bh);
161 	}
162 
163 	if (!buffer_uptodate(bh)) {
164 		printk(KERN_ERR "JBD2: Failed to read block at offset %u\n",
165 			offset);
166 		brelse(bh);
167 		return -EIO;
168 	}
169 
170 	*bhp = bh;
171 	return 0;
172 }
173 
jbd2_descriptor_block_csum_verify(journal_t * j,void * buf)174 static int jbd2_descriptor_block_csum_verify(journal_t *j, void *buf)
175 {
176 	struct jbd2_journal_block_tail *tail;
177 	__be32 provided;
178 	__u32 calculated;
179 
180 	if (!jbd2_journal_has_csum_v2or3(j))
181 		return 1;
182 
183 	tail = (struct jbd2_journal_block_tail *)(buf + j->j_blocksize -
184 			sizeof(struct jbd2_journal_block_tail));
185 	provided = tail->t_checksum;
186 	tail->t_checksum = 0;
187 	calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
188 	tail->t_checksum = provided;
189 
190 	return provided == cpu_to_be32(calculated);
191 }
192 
193 /*
194  * Count the number of in-use tags in a journal descriptor block.
195  */
196 
count_tags(journal_t * journal,struct buffer_head * bh)197 static int count_tags(journal_t *journal, struct buffer_head *bh)
198 {
199 	char *			tagp;
200 	journal_block_tag_t *	tag;
201 	int			nr = 0, size = journal->j_blocksize;
202 	int			tag_bytes = journal_tag_bytes(journal);
203 
204 	if (jbd2_journal_has_csum_v2or3(journal))
205 		size -= sizeof(struct jbd2_journal_block_tail);
206 
207 	tagp = &bh->b_data[sizeof(journal_header_t)];
208 
209 	while ((tagp - bh->b_data + tag_bytes) <= size) {
210 		tag = (journal_block_tag_t *) tagp;
211 
212 		nr++;
213 		tagp += tag_bytes;
214 		if (!(tag->t_flags & cpu_to_be16(JBD2_FLAG_SAME_UUID)))
215 			tagp += 16;
216 
217 		if (tag->t_flags & cpu_to_be16(JBD2_FLAG_LAST_TAG))
218 			break;
219 	}
220 
221 	return nr;
222 }
223 
224 
225 /* Make sure we wrap around the log correctly! */
226 #define wrap(journal, var)						\
227 do {									\
228 	if (var >= (journal)->j_last)					\
229 		var -= ((journal)->j_last - (journal)->j_first);	\
230 } while (0)
231 
232 /**
233  * jbd2_journal_recover - recovers a on-disk journal
234  * @journal: the journal to recover
235  *
236  * The primary function for recovering the log contents when mounting a
237  * journaled device.
238  *
239  * Recovery is done in three passes.  In the first pass, we look for the
240  * end of the log.  In the second, we assemble the list of revoke
241  * blocks.  In the third and final pass, we replay any un-revoked blocks
242  * in the log.
243  */
jbd2_journal_recover(journal_t * journal)244 int jbd2_journal_recover(journal_t *journal)
245 {
246 	int			err, err2;
247 	journal_superblock_t *	sb;
248 
249 	struct recovery_info	info;
250 	errseq_t		wb_err;
251 	struct address_space	*mapping;
252 
253 	memset(&info, 0, sizeof(info));
254 	sb = journal->j_superblock;
255 
256 	/*
257 	 * The journal superblock's s_start field (the current log head)
258 	 * is always zero if, and only if, the journal was cleanly
259 	 * unmounted.
260 	 */
261 
262 	if (!sb->s_start) {
263 		jbd_debug(1, "No recovery required, last transaction %d\n",
264 			  be32_to_cpu(sb->s_sequence));
265 		journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1;
266 		return 0;
267 	}
268 
269 	wb_err = 0;
270 	mapping = journal->j_fs_dev->bd_inode->i_mapping;
271 	errseq_check_and_advance(&mapping->wb_err, &wb_err);
272 	err = do_one_pass(journal, &info, PASS_SCAN);
273 	if (!err)
274 		err = do_one_pass(journal, &info, PASS_REVOKE);
275 	if (!err)
276 		err = do_one_pass(journal, &info, PASS_REPLAY);
277 
278 	jbd_debug(1, "JBD2: recovery, exit status %d, "
279 		  "recovered transactions %u to %u\n",
280 		  err, info.start_transaction, info.end_transaction);
281 	jbd_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n",
282 		  info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
283 
284 	/* Restart the log at the next transaction ID, thus invalidating
285 	 * any existing commit records in the log. */
286 	journal->j_transaction_sequence = ++info.end_transaction;
287 
288 	jbd2_journal_clear_revoke(journal);
289 	err2 = sync_blockdev(journal->j_fs_dev);
290 	if (!err)
291 		err = err2;
292 	err2 = errseq_check_and_advance(&mapping->wb_err, &wb_err);
293 	if (!err)
294 		err = err2;
295 	/* Make sure all replayed data is on permanent storage */
296 	if (journal->j_flags & JBD2_BARRIER) {
297 		err2 = blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
298 		if (!err)
299 			err = err2;
300 	}
301 	return err;
302 }
303 
304 /**
305  * jbd2_journal_skip_recovery - Start journal and wipe exiting records
306  * @journal: journal to startup
307  *
308  * Locate any valid recovery information from the journal and set up the
309  * journal structures in memory to ignore it (presumably because the
310  * caller has evidence that it is out of date).
311  * This function doesn't appear to be exported..
312  *
313  * We perform one pass over the journal to allow us to tell the user how
314  * much recovery information is being erased, and to let us initialise
315  * the journal transaction sequence numbers to the next unused ID.
316  */
jbd2_journal_skip_recovery(journal_t * journal)317 int jbd2_journal_skip_recovery(journal_t *journal)
318 {
319 	int			err;
320 
321 	struct recovery_info	info;
322 
323 	memset (&info, 0, sizeof(info));
324 
325 	err = do_one_pass(journal, &info, PASS_SCAN);
326 
327 	if (err) {
328 		printk(KERN_ERR "JBD2: error %d scanning journal\n", err);
329 		++journal->j_transaction_sequence;
330 	} else {
331 #ifdef CONFIG_JBD2_DEBUG
332 		int dropped = info.end_transaction -
333 			be32_to_cpu(journal->j_superblock->s_sequence);
334 		jbd_debug(1,
335 			  "JBD2: ignoring %d transaction%s from the journal.\n",
336 			  dropped, (dropped == 1) ? "" : "s");
337 #endif
338 		journal->j_transaction_sequence = ++info.end_transaction;
339 	}
340 
341 	journal->j_tail = 0;
342 	return err;
343 }
344 
read_tag_block(journal_t * journal,journal_block_tag_t * tag)345 static inline unsigned long long read_tag_block(journal_t *journal,
346 						journal_block_tag_t *tag)
347 {
348 	unsigned long long block = be32_to_cpu(tag->t_blocknr);
349 	if (jbd2_has_feature_64bit(journal))
350 		block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32;
351 	return block;
352 }
353 
354 /*
355  * calc_chksums calculates the checksums for the blocks described in the
356  * descriptor block.
357  */
calc_chksums(journal_t * journal,struct buffer_head * bh,unsigned long * next_log_block,__u32 * crc32_sum)358 static int calc_chksums(journal_t *journal, struct buffer_head *bh,
359 			unsigned long *next_log_block, __u32 *crc32_sum)
360 {
361 	int i, num_blks, err;
362 	unsigned long io_block;
363 	struct buffer_head *obh;
364 
365 	num_blks = count_tags(journal, bh);
366 	/* Calculate checksum of the descriptor block. */
367 	*crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size);
368 
369 	for (i = 0; i < num_blks; i++) {
370 		io_block = (*next_log_block)++;
371 		wrap(journal, *next_log_block);
372 		err = jread(&obh, journal, io_block);
373 		if (err) {
374 			printk(KERN_ERR "JBD2: IO error %d recovering block "
375 				"%lu in log\n", err, io_block);
376 			return 1;
377 		} else {
378 			*crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data,
379 				     obh->b_size);
380 		}
381 		put_bh(obh);
382 	}
383 	return 0;
384 }
385 
jbd2_commit_block_csum_verify(journal_t * j,void * buf)386 static int jbd2_commit_block_csum_verify(journal_t *j, void *buf)
387 {
388 	struct commit_header *h;
389 	__be32 provided;
390 	__u32 calculated;
391 
392 	if (!jbd2_journal_has_csum_v2or3(j))
393 		return 1;
394 
395 	h = buf;
396 	provided = h->h_chksum[0];
397 	h->h_chksum[0] = 0;
398 	calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
399 	h->h_chksum[0] = provided;
400 
401 	return provided == cpu_to_be32(calculated);
402 }
403 
jbd2_block_tag_csum_verify(journal_t * j,journal_block_tag_t * tag,void * buf,__u32 sequence)404 static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag,
405 				      void *buf, __u32 sequence)
406 {
407 	journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag;
408 	__u32 csum32;
409 	__be32 seq;
410 
411 	if (!jbd2_journal_has_csum_v2or3(j))
412 		return 1;
413 
414 	seq = cpu_to_be32(sequence);
415 	csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq));
416 	csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize);
417 
418 	if (jbd2_has_feature_csum3(j))
419 		return tag3->t_checksum == cpu_to_be32(csum32);
420 	else
421 		return tag->t_checksum == cpu_to_be16(csum32);
422 }
423 
do_one_pass(journal_t * journal,struct recovery_info * info,enum passtype pass)424 static int do_one_pass(journal_t *journal,
425 			struct recovery_info *info, enum passtype pass)
426 {
427 	unsigned int		first_commit_ID, next_commit_ID;
428 	unsigned long		next_log_block;
429 	int			err, success = 0;
430 	journal_superblock_t *	sb;
431 	journal_header_t *	tmp;
432 	struct buffer_head *	bh;
433 	unsigned int		sequence;
434 	int			blocktype;
435 	int			tag_bytes = journal_tag_bytes(journal);
436 	__u32			crc32_sum = ~0; /* Transactional Checksums */
437 	int			descr_csum_size = 0;
438 	int			block_error = 0;
439 
440 	/*
441 	 * First thing is to establish what we expect to find in the log
442 	 * (in terms of transaction IDs), and where (in terms of log
443 	 * block offsets): query the superblock.
444 	 */
445 
446 	sb = journal->j_superblock;
447 	next_commit_ID = be32_to_cpu(sb->s_sequence);
448 	next_log_block = be32_to_cpu(sb->s_start);
449 
450 	first_commit_ID = next_commit_ID;
451 	if (pass == PASS_SCAN)
452 		info->start_transaction = first_commit_ID;
453 
454 	jbd_debug(1, "Starting recovery pass %d\n", pass);
455 
456 	/*
457 	 * Now we walk through the log, transaction by transaction,
458 	 * making sure that each transaction has a commit block in the
459 	 * expected place.  Each complete transaction gets replayed back
460 	 * into the main filesystem.
461 	 */
462 
463 	while (1) {
464 		int			flags;
465 		char *			tagp;
466 		journal_block_tag_t *	tag;
467 		struct buffer_head *	obh;
468 		struct buffer_head *	nbh;
469 
470 		cond_resched();
471 
472 		/* If we already know where to stop the log traversal,
473 		 * check right now that we haven't gone past the end of
474 		 * the log. */
475 
476 		if (pass != PASS_SCAN)
477 			if (tid_geq(next_commit_ID, info->end_transaction))
478 				break;
479 
480 		jbd_debug(2, "Scanning for sequence ID %u at %lu/%lu\n",
481 			  next_commit_ID, next_log_block, journal->j_last);
482 
483 		/* Skip over each chunk of the transaction looking
484 		 * either the next descriptor block or the final commit
485 		 * record. */
486 
487 		jbd_debug(3, "JBD2: checking block %ld\n", next_log_block);
488 		err = jread(&bh, journal, next_log_block);
489 		if (err)
490 			goto failed;
491 
492 		next_log_block++;
493 		wrap(journal, next_log_block);
494 
495 		/* What kind of buffer is it?
496 		 *
497 		 * If it is a descriptor block, check that it has the
498 		 * expected sequence number.  Otherwise, we're all done
499 		 * here. */
500 
501 		tmp = (journal_header_t *)bh->b_data;
502 
503 		if (tmp->h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER)) {
504 			brelse(bh);
505 			break;
506 		}
507 
508 		blocktype = be32_to_cpu(tmp->h_blocktype);
509 		sequence = be32_to_cpu(tmp->h_sequence);
510 		jbd_debug(3, "Found magic %d, sequence %d\n",
511 			  blocktype, sequence);
512 
513 		if (sequence != next_commit_ID) {
514 			brelse(bh);
515 			break;
516 		}
517 
518 		/* OK, we have a valid descriptor block which matches
519 		 * all of the sequence number checks.  What are we going
520 		 * to do with it?  That depends on the pass... */
521 
522 		switch(blocktype) {
523 		case JBD2_DESCRIPTOR_BLOCK:
524 			/* Verify checksum first */
525 			if (jbd2_journal_has_csum_v2or3(journal))
526 				descr_csum_size =
527 					sizeof(struct jbd2_journal_block_tail);
528 			if (descr_csum_size > 0 &&
529 			    !jbd2_descriptor_block_csum_verify(journal,
530 							       bh->b_data)) {
531 				printk(KERN_ERR "JBD2: Invalid checksum "
532 				       "recovering block %lu in log\n",
533 				       next_log_block);
534 				err = -EFSBADCRC;
535 				brelse(bh);
536 				goto failed;
537 			}
538 
539 			/* If it is a valid descriptor block, replay it
540 			 * in pass REPLAY; if journal_checksums enabled, then
541 			 * calculate checksums in PASS_SCAN, otherwise,
542 			 * just skip over the blocks it describes. */
543 			if (pass != PASS_REPLAY) {
544 				if (pass == PASS_SCAN &&
545 				    jbd2_has_feature_checksum(journal) &&
546 				    !info->end_transaction) {
547 					if (calc_chksums(journal, bh,
548 							&next_log_block,
549 							&crc32_sum)) {
550 						put_bh(bh);
551 						break;
552 					}
553 					put_bh(bh);
554 					continue;
555 				}
556 				next_log_block += count_tags(journal, bh);
557 				wrap(journal, next_log_block);
558 				put_bh(bh);
559 				continue;
560 			}
561 
562 			/* A descriptor block: we can now write all of
563 			 * the data blocks.  Yay, useful work is finally
564 			 * getting done here! */
565 
566 			tagp = &bh->b_data[sizeof(journal_header_t)];
567 			while ((tagp - bh->b_data + tag_bytes)
568 			       <= journal->j_blocksize - descr_csum_size) {
569 				unsigned long io_block;
570 
571 				tag = (journal_block_tag_t *) tagp;
572 				flags = be16_to_cpu(tag->t_flags);
573 
574 				io_block = next_log_block++;
575 				wrap(journal, next_log_block);
576 				err = jread(&obh, journal, io_block);
577 				if (err) {
578 					/* Recover what we can, but
579 					 * report failure at the end. */
580 					success = err;
581 					printk(KERN_ERR
582 						"JBD2: IO error %d recovering "
583 						"block %ld in log\n",
584 						err, io_block);
585 				} else {
586 					unsigned long long blocknr;
587 
588 					J_ASSERT(obh != NULL);
589 					blocknr = read_tag_block(journal,
590 								 tag);
591 
592 					/* If the block has been
593 					 * revoked, then we're all done
594 					 * here. */
595 					if (jbd2_journal_test_revoke
596 					    (journal, blocknr,
597 					     next_commit_ID)) {
598 						brelse(obh);
599 						++info->nr_revoke_hits;
600 						goto skip_write;
601 					}
602 
603 					/* Look for block corruption */
604 					if (!jbd2_block_tag_csum_verify(
605 						journal, tag, obh->b_data,
606 						be32_to_cpu(tmp->h_sequence))) {
607 						brelse(obh);
608 						success = -EFSBADCRC;
609 						printk(KERN_ERR "JBD2: Invalid "
610 						       "checksum recovering "
611 						       "data block %llu in "
612 						       "log\n", blocknr);
613 						block_error = 1;
614 						goto skip_write;
615 					}
616 
617 					/* Find a buffer for the new
618 					 * data being restored */
619 					nbh = __getblk(journal->j_fs_dev,
620 							blocknr,
621 							journal->j_blocksize);
622 					if (nbh == NULL) {
623 						printk(KERN_ERR
624 						       "JBD2: Out of memory "
625 						       "during recovery.\n");
626 						err = -ENOMEM;
627 						brelse(bh);
628 						brelse(obh);
629 						goto failed;
630 					}
631 
632 					lock_buffer(nbh);
633 					memcpy(nbh->b_data, obh->b_data,
634 							journal->j_blocksize);
635 					if (flags & JBD2_FLAG_ESCAPE) {
636 						*((__be32 *)nbh->b_data) =
637 						cpu_to_be32(JBD2_MAGIC_NUMBER);
638 					}
639 
640 					BUFFER_TRACE(nbh, "marking dirty");
641 					set_buffer_uptodate(nbh);
642 					mark_buffer_dirty(nbh);
643 					BUFFER_TRACE(nbh, "marking uptodate");
644 					++info->nr_replays;
645 					/* ll_rw_block(WRITE, 1, &nbh); */
646 					unlock_buffer(nbh);
647 					brelse(obh);
648 					brelse(nbh);
649 				}
650 
651 			skip_write:
652 				tagp += tag_bytes;
653 				if (!(flags & JBD2_FLAG_SAME_UUID))
654 					tagp += 16;
655 
656 				if (flags & JBD2_FLAG_LAST_TAG)
657 					break;
658 			}
659 
660 			brelse(bh);
661 			continue;
662 
663 		case JBD2_COMMIT_BLOCK:
664 			/*     How to differentiate between interrupted commit
665 			 *               and journal corruption ?
666 			 *
667 			 * {nth transaction}
668 			 *        Checksum Verification Failed
669 			 *			 |
670 			 *		 ____________________
671 			 *		|		     |
672 			 * 	async_commit             sync_commit
673 			 *     		|                    |
674 			 *		| GO TO NEXT    "Journal Corruption"
675 			 *		| TRANSACTION
676 			 *		|
677 			 * {(n+1)th transanction}
678 			 *		|
679 			 * 	 _______|______________
680 			 * 	|	 	      |
681 			 * Commit block found	Commit block not found
682 			 *      |		      |
683 			 * "Journal Corruption"       |
684 			 *		 _____________|_________
685 			 *     		|	           	|
686 			 *	nth trans corrupt	OR   nth trans
687 			 *	and (n+1)th interrupted     interrupted
688 			 *	before commit block
689 			 *      could reach the disk.
690 			 *	(Cannot find the difference in above
691 			 *	 mentioned conditions. Hence assume
692 			 *	 "Interrupted Commit".)
693 			 */
694 
695 			/* Found an expected commit block: if checksums
696 			 * are present verify them in PASS_SCAN; else not
697 			 * much to do other than move on to the next sequence
698 			 * number. */
699 			if (pass == PASS_SCAN &&
700 			    jbd2_has_feature_checksum(journal)) {
701 				int chksum_err, chksum_seen;
702 				struct commit_header *cbh =
703 					(struct commit_header *)bh->b_data;
704 				unsigned found_chksum =
705 					be32_to_cpu(cbh->h_chksum[0]);
706 
707 				chksum_err = chksum_seen = 0;
708 
709 				if (info->end_transaction) {
710 					journal->j_failed_commit =
711 						info->end_transaction;
712 					brelse(bh);
713 					break;
714 				}
715 
716 				if (crc32_sum == found_chksum &&
717 				    cbh->h_chksum_type == JBD2_CRC32_CHKSUM &&
718 				    cbh->h_chksum_size ==
719 						JBD2_CRC32_CHKSUM_SIZE)
720 				       chksum_seen = 1;
721 				else if (!(cbh->h_chksum_type == 0 &&
722 					     cbh->h_chksum_size == 0 &&
723 					     found_chksum == 0 &&
724 					     !chksum_seen))
725 				/*
726 				 * If fs is mounted using an old kernel and then
727 				 * kernel with journal_chksum is used then we
728 				 * get a situation where the journal flag has
729 				 * checksum flag set but checksums are not
730 				 * present i.e chksum = 0, in the individual
731 				 * commit blocks.
732 				 * Hence to avoid checksum failures, in this
733 				 * situation, this extra check is added.
734 				 */
735 						chksum_err = 1;
736 
737 				if (chksum_err) {
738 					info->end_transaction = next_commit_ID;
739 
740 					if (!jbd2_has_feature_async_commit(journal)) {
741 						journal->j_failed_commit =
742 							next_commit_ID;
743 						brelse(bh);
744 						break;
745 					}
746 				}
747 				crc32_sum = ~0;
748 			}
749 			if (pass == PASS_SCAN &&
750 			    !jbd2_commit_block_csum_verify(journal,
751 							   bh->b_data)) {
752 				info->end_transaction = next_commit_ID;
753 
754 				if (!jbd2_has_feature_async_commit(journal)) {
755 					journal->j_failed_commit =
756 						next_commit_ID;
757 					brelse(bh);
758 					break;
759 				}
760 			}
761 			brelse(bh);
762 			next_commit_ID++;
763 			continue;
764 
765 		case JBD2_REVOKE_BLOCK:
766 			/* If we aren't in the REVOKE pass, then we can
767 			 * just skip over this block. */
768 			if (pass != PASS_REVOKE) {
769 				brelse(bh);
770 				continue;
771 			}
772 
773 			err = scan_revoke_records(journal, bh,
774 						  next_commit_ID, info);
775 			brelse(bh);
776 			if (err)
777 				goto failed;
778 			continue;
779 
780 		default:
781 			jbd_debug(3, "Unrecognised magic %d, end of scan.\n",
782 				  blocktype);
783 			brelse(bh);
784 			goto done;
785 		}
786 	}
787 
788  done:
789 	/*
790 	 * We broke out of the log scan loop: either we came to the
791 	 * known end of the log or we found an unexpected block in the
792 	 * log.  If the latter happened, then we know that the "current"
793 	 * transaction marks the end of the valid log.
794 	 */
795 
796 	if (pass == PASS_SCAN) {
797 		if (!info->end_transaction)
798 			info->end_transaction = next_commit_ID;
799 	} else {
800 		/* It's really bad news if different passes end up at
801 		 * different places (but possible due to IO errors). */
802 		if (info->end_transaction != next_commit_ID) {
803 			printk(KERN_ERR "JBD2: recovery pass %d ended at "
804 				"transaction %u, expected %u\n",
805 				pass, next_commit_ID, info->end_transaction);
806 			if (!success)
807 				success = -EIO;
808 		}
809 	}
810 	if (block_error && success == 0)
811 		success = -EIO;
812 	return success;
813 
814  failed:
815 	return err;
816 }
817 
818 /* Scan a revoke record, marking all blocks mentioned as revoked. */
819 
scan_revoke_records(journal_t * journal,struct buffer_head * bh,tid_t sequence,struct recovery_info * info)820 static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
821 			       tid_t sequence, struct recovery_info *info)
822 {
823 	jbd2_journal_revoke_header_t *header;
824 	int offset, max;
825 	int csum_size = 0;
826 	__u32 rcount;
827 	int record_len = 4;
828 
829 	header = (jbd2_journal_revoke_header_t *) bh->b_data;
830 	offset = sizeof(jbd2_journal_revoke_header_t);
831 	rcount = be32_to_cpu(header->r_count);
832 
833 	if (!jbd2_descriptor_block_csum_verify(journal, header))
834 		return -EFSBADCRC;
835 
836 	if (jbd2_journal_has_csum_v2or3(journal))
837 		csum_size = sizeof(struct jbd2_journal_block_tail);
838 	if (rcount > journal->j_blocksize - csum_size)
839 		return -EINVAL;
840 	max = rcount;
841 
842 	if (jbd2_has_feature_64bit(journal))
843 		record_len = 8;
844 
845 	while (offset + record_len <= max) {
846 		unsigned long long blocknr;
847 		int err;
848 
849 		if (record_len == 4)
850 			blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset)));
851 		else
852 			blocknr = be64_to_cpu(* ((__be64 *) (bh->b_data+offset)));
853 		offset += record_len;
854 		err = jbd2_journal_set_revoke(journal, blocknr, sequence);
855 		if (err)
856 			return err;
857 		++info->nr_revokes;
858 	}
859 	return 0;
860 }
861