1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (C) 2017-2023 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_inode.h"
13 #include "xfs_btree.h"
14 #include "scrub/scrub.h"
15 #include "scrub/common.h"
16 #include "scrub/btree.h"
17 #include "scrub/trace.h"
18
19 /* btree scrubbing */
20
21 /*
22 * Check for btree operation errors. See the section about handling
23 * operational errors in common.c.
24 */
25 static bool
__xchk_btree_process_error(struct xfs_scrub * sc,struct xfs_btree_cur * cur,int level,int * error,__u32 errflag,void * ret_ip)26 __xchk_btree_process_error(
27 struct xfs_scrub *sc,
28 struct xfs_btree_cur *cur,
29 int level,
30 int *error,
31 __u32 errflag,
32 void *ret_ip)
33 {
34 if (*error == 0)
35 return true;
36
37 switch (*error) {
38 case -EDEADLOCK:
39 case -ECHRNG:
40 /* Used to restart an op with deadlock avoidance. */
41 trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
42 break;
43 case -EFSBADCRC:
44 case -EFSCORRUPTED:
45 /* Note the badness but don't abort. */
46 sc->sm->sm_flags |= errflag;
47 *error = 0;
48 fallthrough;
49 default:
50 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
51 trace_xchk_ifork_btree_op_error(sc, cur, level,
52 *error, ret_ip);
53 else
54 trace_xchk_btree_op_error(sc, cur, level,
55 *error, ret_ip);
56 break;
57 }
58 return false;
59 }
60
61 bool
xchk_btree_process_error(struct xfs_scrub * sc,struct xfs_btree_cur * cur,int level,int * error)62 xchk_btree_process_error(
63 struct xfs_scrub *sc,
64 struct xfs_btree_cur *cur,
65 int level,
66 int *error)
67 {
68 return __xchk_btree_process_error(sc, cur, level, error,
69 XFS_SCRUB_OFLAG_CORRUPT, __return_address);
70 }
71
72 bool
xchk_btree_xref_process_error(struct xfs_scrub * sc,struct xfs_btree_cur * cur,int level,int * error)73 xchk_btree_xref_process_error(
74 struct xfs_scrub *sc,
75 struct xfs_btree_cur *cur,
76 int level,
77 int *error)
78 {
79 return __xchk_btree_process_error(sc, cur, level, error,
80 XFS_SCRUB_OFLAG_XFAIL, __return_address);
81 }
82
83 /* Record btree block corruption. */
84 static void
__xchk_btree_set_corrupt(struct xfs_scrub * sc,struct xfs_btree_cur * cur,int level,__u32 errflag,void * ret_ip)85 __xchk_btree_set_corrupt(
86 struct xfs_scrub *sc,
87 struct xfs_btree_cur *cur,
88 int level,
89 __u32 errflag,
90 void *ret_ip)
91 {
92 sc->sm->sm_flags |= errflag;
93
94 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
95 trace_xchk_ifork_btree_error(sc, cur, level,
96 ret_ip);
97 else
98 trace_xchk_btree_error(sc, cur, level,
99 ret_ip);
100 }
101
102 void
xchk_btree_set_corrupt(struct xfs_scrub * sc,struct xfs_btree_cur * cur,int level)103 xchk_btree_set_corrupt(
104 struct xfs_scrub *sc,
105 struct xfs_btree_cur *cur,
106 int level)
107 {
108 __xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_CORRUPT,
109 __return_address);
110 }
111
112 void
xchk_btree_xref_set_corrupt(struct xfs_scrub * sc,struct xfs_btree_cur * cur,int level)113 xchk_btree_xref_set_corrupt(
114 struct xfs_scrub *sc,
115 struct xfs_btree_cur *cur,
116 int level)
117 {
118 __xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_XCORRUPT,
119 __return_address);
120 }
121
122 void
xchk_btree_set_preen(struct xfs_scrub * sc,struct xfs_btree_cur * cur,int level)123 xchk_btree_set_preen(
124 struct xfs_scrub *sc,
125 struct xfs_btree_cur *cur,
126 int level)
127 {
128 __xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_PREEN,
129 __return_address);
130 }
131
132 /*
133 * Make sure this record is in order and doesn't stray outside of the parent
134 * keys.
135 */
136 STATIC void
xchk_btree_rec(struct xchk_btree * bs)137 xchk_btree_rec(
138 struct xchk_btree *bs)
139 {
140 struct xfs_btree_cur *cur = bs->cur;
141 union xfs_btree_rec *rec;
142 union xfs_btree_key key;
143 union xfs_btree_key hkey;
144 union xfs_btree_key *keyp;
145 struct xfs_btree_block *block;
146 struct xfs_btree_block *keyblock;
147 struct xfs_buf *bp;
148
149 block = xfs_btree_get_block(cur, 0, &bp);
150 rec = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr, block);
151
152 trace_xchk_btree_rec(bs->sc, cur, 0);
153
154 /* Are all records across all record blocks in order? */
155 if (bs->lastrec_valid &&
156 !cur->bc_ops->recs_inorder(cur, &bs->lastrec, rec))
157 xchk_btree_set_corrupt(bs->sc, cur, 0);
158 memcpy(&bs->lastrec, rec, cur->bc_ops->rec_len);
159 bs->lastrec_valid = true;
160
161 if (cur->bc_nlevels == 1)
162 return;
163
164 /* Is low_key(rec) at least as large as the parent low key? */
165 cur->bc_ops->init_key_from_rec(&key, rec);
166 keyblock = xfs_btree_get_block(cur, 1, &bp);
167 keyp = xfs_btree_key_addr(cur, cur->bc_levels[1].ptr, keyblock);
168 if (xfs_btree_keycmp_lt(cur, &key, keyp))
169 xchk_btree_set_corrupt(bs->sc, cur, 1);
170
171 if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
172 return;
173
174 /* Is high_key(rec) no larger than the parent high key? */
175 cur->bc_ops->init_high_key_from_rec(&hkey, rec);
176 keyp = xfs_btree_high_key_addr(cur, cur->bc_levels[1].ptr, keyblock);
177 if (xfs_btree_keycmp_lt(cur, keyp, &hkey))
178 xchk_btree_set_corrupt(bs->sc, cur, 1);
179 }
180
181 /*
182 * Make sure this key is in order and doesn't stray outside of the parent
183 * keys.
184 */
185 STATIC void
xchk_btree_key(struct xchk_btree * bs,int level)186 xchk_btree_key(
187 struct xchk_btree *bs,
188 int level)
189 {
190 struct xfs_btree_cur *cur = bs->cur;
191 union xfs_btree_key *key;
192 union xfs_btree_key *keyp;
193 struct xfs_btree_block *block;
194 struct xfs_btree_block *keyblock;
195 struct xfs_buf *bp;
196
197 block = xfs_btree_get_block(cur, level, &bp);
198 key = xfs_btree_key_addr(cur, cur->bc_levels[level].ptr, block);
199
200 trace_xchk_btree_key(bs->sc, cur, level);
201
202 /* Are all low keys across all node blocks in order? */
203 if (bs->lastkey[level - 1].valid &&
204 !cur->bc_ops->keys_inorder(cur, &bs->lastkey[level - 1].key, key))
205 xchk_btree_set_corrupt(bs->sc, cur, level);
206 memcpy(&bs->lastkey[level - 1].key, key, cur->bc_ops->key_len);
207 bs->lastkey[level - 1].valid = true;
208
209 if (level + 1 >= cur->bc_nlevels)
210 return;
211
212 /* Is this block's low key at least as large as the parent low key? */
213 keyblock = xfs_btree_get_block(cur, level + 1, &bp);
214 keyp = xfs_btree_key_addr(cur, cur->bc_levels[level + 1].ptr, keyblock);
215 if (xfs_btree_keycmp_lt(cur, key, keyp))
216 xchk_btree_set_corrupt(bs->sc, cur, level);
217
218 if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
219 return;
220
221 /* Is this block's high key no larger than the parent high key? */
222 key = xfs_btree_high_key_addr(cur, cur->bc_levels[level].ptr, block);
223 keyp = xfs_btree_high_key_addr(cur, cur->bc_levels[level + 1].ptr,
224 keyblock);
225 if (xfs_btree_keycmp_lt(cur, keyp, key))
226 xchk_btree_set_corrupt(bs->sc, cur, level);
227 }
228
229 /*
230 * Check a btree pointer. Returns true if it's ok to use this pointer.
231 * Callers do not need to set the corrupt flag.
232 */
233 static bool
xchk_btree_ptr_ok(struct xchk_btree * bs,int level,union xfs_btree_ptr * ptr)234 xchk_btree_ptr_ok(
235 struct xchk_btree *bs,
236 int level,
237 union xfs_btree_ptr *ptr)
238 {
239 bool res;
240
241 /* A btree rooted in an inode has no block pointer to the root. */
242 if ((bs->cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
243 level == bs->cur->bc_nlevels)
244 return true;
245
246 /* Otherwise, check the pointers. */
247 if (bs->cur->bc_flags & XFS_BTREE_LONG_PTRS)
248 res = xfs_btree_check_lptr(bs->cur, be64_to_cpu(ptr->l), level);
249 else
250 res = xfs_btree_check_sptr(bs->cur, be32_to_cpu(ptr->s), level);
251 if (!res)
252 xchk_btree_set_corrupt(bs->sc, bs->cur, level);
253
254 return res;
255 }
256
257 /* Check that a btree block's sibling matches what we expect it. */
258 STATIC int
xchk_btree_block_check_sibling(struct xchk_btree * bs,int level,int direction,union xfs_btree_ptr * sibling)259 xchk_btree_block_check_sibling(
260 struct xchk_btree *bs,
261 int level,
262 int direction,
263 union xfs_btree_ptr *sibling)
264 {
265 struct xfs_btree_cur *cur = bs->cur;
266 struct xfs_btree_block *pblock;
267 struct xfs_buf *pbp;
268 struct xfs_btree_cur *ncur = NULL;
269 union xfs_btree_ptr *pp;
270 int success;
271 int error;
272
273 error = xfs_btree_dup_cursor(cur, &ncur);
274 if (!xchk_btree_process_error(bs->sc, cur, level + 1, &error) ||
275 !ncur)
276 return error;
277
278 /*
279 * If the pointer is null, we shouldn't be able to move the upper
280 * level pointer anywhere.
281 */
282 if (xfs_btree_ptr_is_null(cur, sibling)) {
283 if (direction > 0)
284 error = xfs_btree_increment(ncur, level + 1, &success);
285 else
286 error = xfs_btree_decrement(ncur, level + 1, &success);
287 if (error == 0 && success)
288 xchk_btree_set_corrupt(bs->sc, cur, level);
289 error = 0;
290 goto out;
291 }
292
293 /* Increment upper level pointer. */
294 if (direction > 0)
295 error = xfs_btree_increment(ncur, level + 1, &success);
296 else
297 error = xfs_btree_decrement(ncur, level + 1, &success);
298 if (!xchk_btree_process_error(bs->sc, cur, level + 1, &error))
299 goto out;
300 if (!success) {
301 xchk_btree_set_corrupt(bs->sc, cur, level + 1);
302 goto out;
303 }
304
305 /* Compare upper level pointer to sibling pointer. */
306 pblock = xfs_btree_get_block(ncur, level + 1, &pbp);
307 pp = xfs_btree_ptr_addr(ncur, ncur->bc_levels[level + 1].ptr, pblock);
308 if (!xchk_btree_ptr_ok(bs, level + 1, pp))
309 goto out;
310 if (pbp)
311 xchk_buffer_recheck(bs->sc, pbp);
312
313 if (xfs_btree_diff_two_ptrs(cur, pp, sibling))
314 xchk_btree_set_corrupt(bs->sc, cur, level);
315 out:
316 xfs_btree_del_cursor(ncur, XFS_BTREE_ERROR);
317 return error;
318 }
319
320 /* Check the siblings of a btree block. */
321 STATIC int
xchk_btree_block_check_siblings(struct xchk_btree * bs,struct xfs_btree_block * block)322 xchk_btree_block_check_siblings(
323 struct xchk_btree *bs,
324 struct xfs_btree_block *block)
325 {
326 struct xfs_btree_cur *cur = bs->cur;
327 union xfs_btree_ptr leftsib;
328 union xfs_btree_ptr rightsib;
329 int level;
330 int error = 0;
331
332 xfs_btree_get_sibling(cur, block, &leftsib, XFS_BB_LEFTSIB);
333 xfs_btree_get_sibling(cur, block, &rightsib, XFS_BB_RIGHTSIB);
334 level = xfs_btree_get_level(block);
335
336 /* Root block should never have siblings. */
337 if (level == cur->bc_nlevels - 1) {
338 if (!xfs_btree_ptr_is_null(cur, &leftsib) ||
339 !xfs_btree_ptr_is_null(cur, &rightsib))
340 xchk_btree_set_corrupt(bs->sc, cur, level);
341 goto out;
342 }
343
344 /*
345 * Does the left & right sibling pointers match the adjacent
346 * parent level pointers?
347 * (These function absorbs error codes for us.)
348 */
349 error = xchk_btree_block_check_sibling(bs, level, -1, &leftsib);
350 if (error)
351 return error;
352 error = xchk_btree_block_check_sibling(bs, level, 1, &rightsib);
353 if (error)
354 return error;
355 out:
356 return error;
357 }
358
359 struct check_owner {
360 struct list_head list;
361 xfs_daddr_t daddr;
362 int level;
363 };
364
365 /*
366 * Make sure this btree block isn't in the free list and that there's
367 * an rmap record for it.
368 */
369 STATIC int
xchk_btree_check_block_owner(struct xchk_btree * bs,int level,xfs_daddr_t daddr)370 xchk_btree_check_block_owner(
371 struct xchk_btree *bs,
372 int level,
373 xfs_daddr_t daddr)
374 {
375 xfs_agnumber_t agno;
376 xfs_agblock_t agbno;
377 xfs_btnum_t btnum;
378 bool init_sa;
379 int error = 0;
380
381 if (!bs->cur)
382 return 0;
383
384 btnum = bs->cur->bc_btnum;
385 agno = xfs_daddr_to_agno(bs->cur->bc_mp, daddr);
386 agbno = xfs_daddr_to_agbno(bs->cur->bc_mp, daddr);
387
388 /*
389 * If the btree being examined is not itself a per-AG btree, initialize
390 * sc->sa so that we can check for the presence of an ownership record
391 * in the rmap btree for the AG containing the block.
392 */
393 init_sa = bs->cur->bc_flags & XFS_BTREE_ROOT_IN_INODE;
394 if (init_sa) {
395 error = xchk_ag_init_existing(bs->sc, agno, &bs->sc->sa);
396 if (!xchk_btree_xref_process_error(bs->sc, bs->cur,
397 level, &error))
398 goto out_free;
399 }
400
401 xchk_xref_is_used_space(bs->sc, agbno, 1);
402 /*
403 * The bnobt scrubber aliases bs->cur to bs->sc->sa.bno_cur, so we
404 * have to nullify it (to shut down further block owner checks) if
405 * self-xref encounters problems.
406 */
407 if (!bs->sc->sa.bno_cur && btnum == XFS_BTNUM_BNO)
408 bs->cur = NULL;
409
410 xchk_xref_is_only_owned_by(bs->sc, agbno, 1, bs->oinfo);
411 if (!bs->sc->sa.rmap_cur && btnum == XFS_BTNUM_RMAP)
412 bs->cur = NULL;
413
414 out_free:
415 if (init_sa)
416 xchk_ag_free(bs->sc, &bs->sc->sa);
417
418 return error;
419 }
420
421 /* Check the owner of a btree block. */
422 STATIC int
xchk_btree_check_owner(struct xchk_btree * bs,int level,struct xfs_buf * bp)423 xchk_btree_check_owner(
424 struct xchk_btree *bs,
425 int level,
426 struct xfs_buf *bp)
427 {
428 struct xfs_btree_cur *cur = bs->cur;
429
430 /*
431 * In theory, xfs_btree_get_block should only give us a null buffer
432 * pointer for the root of a root-in-inode btree type, but we need
433 * to check defensively here in case the cursor state is also screwed
434 * up.
435 */
436 if (bp == NULL) {
437 if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE))
438 xchk_btree_set_corrupt(bs->sc, bs->cur, level);
439 return 0;
440 }
441
442 /*
443 * We want to cross-reference each btree block with the bnobt
444 * and the rmapbt. We cannot cross-reference the bnobt or
445 * rmapbt while scanning the bnobt or rmapbt, respectively,
446 * because we cannot alter the cursor and we'd prefer not to
447 * duplicate cursors. Therefore, save the buffer daddr for
448 * later scanning.
449 */
450 if (cur->bc_btnum == XFS_BTNUM_BNO || cur->bc_btnum == XFS_BTNUM_RMAP) {
451 struct check_owner *co;
452
453 co = kmalloc(sizeof(struct check_owner), XCHK_GFP_FLAGS);
454 if (!co)
455 return -ENOMEM;
456
457 INIT_LIST_HEAD(&co->list);
458 co->level = level;
459 co->daddr = xfs_buf_daddr(bp);
460 list_add_tail(&co->list, &bs->to_check);
461 return 0;
462 }
463
464 return xchk_btree_check_block_owner(bs, level, xfs_buf_daddr(bp));
465 }
466
467 /* Decide if we want to check minrecs of a btree block in the inode root. */
468 static inline bool
xchk_btree_check_iroot_minrecs(struct xchk_btree * bs)469 xchk_btree_check_iroot_minrecs(
470 struct xchk_btree *bs)
471 {
472 /*
473 * xfs_bmap_add_attrfork_btree had an implementation bug wherein it
474 * would miscalculate the space required for the data fork bmbt root
475 * when adding an attr fork, and promote the iroot contents to an
476 * external block unnecessarily. This went unnoticed for many years
477 * until scrub found filesystems in this state. Inode rooted btrees are
478 * not supposed to have immediate child blocks that are small enough
479 * that the contents could fit in the inode root, but we can't fail
480 * existing filesystems, so instead we disable the check for data fork
481 * bmap btrees when there's an attr fork.
482 */
483 if (bs->cur->bc_btnum == XFS_BTNUM_BMAP &&
484 bs->cur->bc_ino.whichfork == XFS_DATA_FORK &&
485 xfs_inode_has_attr_fork(bs->sc->ip))
486 return false;
487
488 return true;
489 }
490
491 /*
492 * Check that this btree block has at least minrecs records or is one of the
493 * special blocks that don't require that.
494 */
495 STATIC void
xchk_btree_check_minrecs(struct xchk_btree * bs,int level,struct xfs_btree_block * block)496 xchk_btree_check_minrecs(
497 struct xchk_btree *bs,
498 int level,
499 struct xfs_btree_block *block)
500 {
501 struct xfs_btree_cur *cur = bs->cur;
502 unsigned int root_level = cur->bc_nlevels - 1;
503 unsigned int numrecs = be16_to_cpu(block->bb_numrecs);
504
505 /* More records than minrecs means the block is ok. */
506 if (numrecs >= cur->bc_ops->get_minrecs(cur, level))
507 return;
508
509 /*
510 * For btrees rooted in the inode, it's possible that the root block
511 * contents spilled into a regular ondisk block because there wasn't
512 * enough space in the inode root. The number of records in that
513 * child block might be less than the standard minrecs, but that's ok
514 * provided that there's only one direct child of the root.
515 */
516 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
517 level == cur->bc_nlevels - 2) {
518 struct xfs_btree_block *root_block;
519 struct xfs_buf *root_bp;
520 int root_maxrecs;
521
522 root_block = xfs_btree_get_block(cur, root_level, &root_bp);
523 root_maxrecs = cur->bc_ops->get_dmaxrecs(cur, root_level);
524 if (xchk_btree_check_iroot_minrecs(bs) &&
525 (be16_to_cpu(root_block->bb_numrecs) != 1 ||
526 numrecs <= root_maxrecs))
527 xchk_btree_set_corrupt(bs->sc, cur, level);
528 return;
529 }
530
531 /*
532 * Otherwise, only the root level is allowed to have fewer than minrecs
533 * records or keyptrs.
534 */
535 if (level < root_level)
536 xchk_btree_set_corrupt(bs->sc, cur, level);
537 }
538
539 /*
540 * If this btree block has a parent, make sure that the parent's keys capture
541 * the keyspace contained in this block.
542 */
543 STATIC void
xchk_btree_block_check_keys(struct xchk_btree * bs,int level,struct xfs_btree_block * block)544 xchk_btree_block_check_keys(
545 struct xchk_btree *bs,
546 int level,
547 struct xfs_btree_block *block)
548 {
549 union xfs_btree_key block_key;
550 union xfs_btree_key *block_high_key;
551 union xfs_btree_key *parent_low_key, *parent_high_key;
552 struct xfs_btree_cur *cur = bs->cur;
553 struct xfs_btree_block *parent_block;
554 struct xfs_buf *bp;
555
556 if (level == cur->bc_nlevels - 1)
557 return;
558
559 xfs_btree_get_keys(cur, block, &block_key);
560
561 /* Make sure the low key of this block matches the parent. */
562 parent_block = xfs_btree_get_block(cur, level + 1, &bp);
563 parent_low_key = xfs_btree_key_addr(cur, cur->bc_levels[level + 1].ptr,
564 parent_block);
565 if (xfs_btree_keycmp_ne(cur, &block_key, parent_low_key)) {
566 xchk_btree_set_corrupt(bs->sc, bs->cur, level);
567 return;
568 }
569
570 if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
571 return;
572
573 /* Make sure the high key of this block matches the parent. */
574 parent_high_key = xfs_btree_high_key_addr(cur,
575 cur->bc_levels[level + 1].ptr, parent_block);
576 block_high_key = xfs_btree_high_key_from_key(cur, &block_key);
577 if (xfs_btree_keycmp_ne(cur, block_high_key, parent_high_key))
578 xchk_btree_set_corrupt(bs->sc, bs->cur, level);
579 }
580
581 /*
582 * Grab and scrub a btree block given a btree pointer. Returns block
583 * and buffer pointers (if applicable) if they're ok to use.
584 */
585 STATIC int
xchk_btree_get_block(struct xchk_btree * bs,int level,union xfs_btree_ptr * pp,struct xfs_btree_block ** pblock,struct xfs_buf ** pbp)586 xchk_btree_get_block(
587 struct xchk_btree *bs,
588 int level,
589 union xfs_btree_ptr *pp,
590 struct xfs_btree_block **pblock,
591 struct xfs_buf **pbp)
592 {
593 xfs_failaddr_t failed_at;
594 int error;
595
596 *pblock = NULL;
597 *pbp = NULL;
598
599 error = xfs_btree_lookup_get_block(bs->cur, level, pp, pblock);
600 if (!xchk_btree_process_error(bs->sc, bs->cur, level, &error) ||
601 !*pblock)
602 return error;
603
604 xfs_btree_get_block(bs->cur, level, pbp);
605 if (bs->cur->bc_flags & XFS_BTREE_LONG_PTRS)
606 failed_at = __xfs_btree_check_lblock(bs->cur, *pblock,
607 level, *pbp);
608 else
609 failed_at = __xfs_btree_check_sblock(bs->cur, *pblock,
610 level, *pbp);
611 if (failed_at) {
612 xchk_btree_set_corrupt(bs->sc, bs->cur, level);
613 return 0;
614 }
615 if (*pbp)
616 xchk_buffer_recheck(bs->sc, *pbp);
617
618 xchk_btree_check_minrecs(bs, level, *pblock);
619
620 /*
621 * Check the block's owner; this function absorbs error codes
622 * for us.
623 */
624 error = xchk_btree_check_owner(bs, level, *pbp);
625 if (error)
626 return error;
627
628 /*
629 * Check the block's siblings; this function absorbs error codes
630 * for us.
631 */
632 error = xchk_btree_block_check_siblings(bs, *pblock);
633 if (error)
634 return error;
635
636 xchk_btree_block_check_keys(bs, level, *pblock);
637 return 0;
638 }
639
640 /*
641 * Check that the low and high keys of this block match the keys stored
642 * in the parent block.
643 */
644 STATIC void
xchk_btree_block_keys(struct xchk_btree * bs,int level,struct xfs_btree_block * block)645 xchk_btree_block_keys(
646 struct xchk_btree *bs,
647 int level,
648 struct xfs_btree_block *block)
649 {
650 union xfs_btree_key block_keys;
651 struct xfs_btree_cur *cur = bs->cur;
652 union xfs_btree_key *high_bk;
653 union xfs_btree_key *parent_keys;
654 union xfs_btree_key *high_pk;
655 struct xfs_btree_block *parent_block;
656 struct xfs_buf *bp;
657
658 if (level >= cur->bc_nlevels - 1)
659 return;
660
661 /* Calculate the keys for this block. */
662 xfs_btree_get_keys(cur, block, &block_keys);
663
664 /* Obtain the parent's copy of the keys for this block. */
665 parent_block = xfs_btree_get_block(cur, level + 1, &bp);
666 parent_keys = xfs_btree_key_addr(cur, cur->bc_levels[level + 1].ptr,
667 parent_block);
668
669 if (xfs_btree_keycmp_ne(cur, &block_keys, parent_keys))
670 xchk_btree_set_corrupt(bs->sc, cur, 1);
671
672 if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
673 return;
674
675 /* Get high keys */
676 high_bk = xfs_btree_high_key_from_key(cur, &block_keys);
677 high_pk = xfs_btree_high_key_addr(cur, cur->bc_levels[level + 1].ptr,
678 parent_block);
679
680 if (xfs_btree_keycmp_ne(cur, high_bk, high_pk))
681 xchk_btree_set_corrupt(bs->sc, cur, 1);
682 }
683
684 /*
685 * Visit all nodes and leaves of a btree. Check that all pointers and
686 * records are in order, that the keys reflect the records, and use a callback
687 * so that the caller can verify individual records.
688 */
689 int
xchk_btree(struct xfs_scrub * sc,struct xfs_btree_cur * cur,xchk_btree_rec_fn scrub_fn,const struct xfs_owner_info * oinfo,void * private)690 xchk_btree(
691 struct xfs_scrub *sc,
692 struct xfs_btree_cur *cur,
693 xchk_btree_rec_fn scrub_fn,
694 const struct xfs_owner_info *oinfo,
695 void *private)
696 {
697 union xfs_btree_ptr ptr;
698 struct xchk_btree *bs;
699 union xfs_btree_ptr *pp;
700 union xfs_btree_rec *recp;
701 struct xfs_btree_block *block;
702 struct xfs_buf *bp;
703 struct check_owner *co;
704 struct check_owner *n;
705 size_t cur_sz;
706 int level;
707 int error = 0;
708
709 /*
710 * Allocate the btree scrub context from the heap, because this
711 * structure can get rather large. Don't let a caller feed us a
712 * totally absurd size.
713 */
714 cur_sz = xchk_btree_sizeof(cur->bc_nlevels);
715 if (cur_sz > PAGE_SIZE) {
716 xchk_btree_set_corrupt(sc, cur, 0);
717 return 0;
718 }
719 bs = kzalloc(cur_sz, XCHK_GFP_FLAGS);
720 if (!bs)
721 return -ENOMEM;
722 bs->cur = cur;
723 bs->scrub_rec = scrub_fn;
724 bs->oinfo = oinfo;
725 bs->private = private;
726 bs->sc = sc;
727
728 /* Initialize scrub state */
729 INIT_LIST_HEAD(&bs->to_check);
730
731 /*
732 * Load the root of the btree. The helper function absorbs
733 * error codes for us.
734 */
735 level = cur->bc_nlevels - 1;
736 cur->bc_ops->init_ptr_from_cur(cur, &ptr);
737 if (!xchk_btree_ptr_ok(bs, cur->bc_nlevels, &ptr))
738 goto out;
739 error = xchk_btree_get_block(bs, level, &ptr, &block, &bp);
740 if (error || !block)
741 goto out;
742
743 cur->bc_levels[level].ptr = 1;
744
745 while (level < cur->bc_nlevels) {
746 block = xfs_btree_get_block(cur, level, &bp);
747
748 if (level == 0) {
749 /* End of leaf, pop back towards the root. */
750 if (cur->bc_levels[level].ptr >
751 be16_to_cpu(block->bb_numrecs)) {
752 xchk_btree_block_keys(bs, level, block);
753 if (level < cur->bc_nlevels - 1)
754 cur->bc_levels[level + 1].ptr++;
755 level++;
756 continue;
757 }
758
759 /* Records in order for scrub? */
760 xchk_btree_rec(bs);
761
762 /* Call out to the record checker. */
763 recp = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr,
764 block);
765 error = bs->scrub_rec(bs, recp);
766 if (error)
767 break;
768 if (xchk_should_terminate(sc, &error) ||
769 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
770 break;
771
772 cur->bc_levels[level].ptr++;
773 continue;
774 }
775
776 /* End of node, pop back towards the root. */
777 if (cur->bc_levels[level].ptr >
778 be16_to_cpu(block->bb_numrecs)) {
779 xchk_btree_block_keys(bs, level, block);
780 if (level < cur->bc_nlevels - 1)
781 cur->bc_levels[level + 1].ptr++;
782 level++;
783 continue;
784 }
785
786 /* Keys in order for scrub? */
787 xchk_btree_key(bs, level);
788
789 /* Drill another level deeper. */
790 pp = xfs_btree_ptr_addr(cur, cur->bc_levels[level].ptr, block);
791 if (!xchk_btree_ptr_ok(bs, level, pp)) {
792 cur->bc_levels[level].ptr++;
793 continue;
794 }
795 level--;
796 error = xchk_btree_get_block(bs, level, pp, &block, &bp);
797 if (error || !block)
798 goto out;
799
800 cur->bc_levels[level].ptr = 1;
801 }
802
803 out:
804 /* Process deferred owner checks on btree blocks. */
805 list_for_each_entry_safe(co, n, &bs->to_check, list) {
806 if (!error && bs->cur)
807 error = xchk_btree_check_block_owner(bs, co->level,
808 co->daddr);
809 list_del(&co->list);
810 kfree(co);
811 }
812 kfree(bs);
813
814 return error;
815 }
816