1 /*
2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_shared.h"
21 #include "xfs_format.h"
22 #include "xfs_log_format.h"
23 #include "xfs_trans_resv.h"
24 #include "xfs_bit.h"
25 #include "xfs_mount.h"
26 #include "xfs_defer.h"
27 #include "xfs_inode.h"
28 #include "xfs_trans.h"
29 #include "xfs_inode_item.h"
30 #include "xfs_buf_item.h"
31 #include "xfs_btree.h"
32 #include "xfs_error.h"
33 #include "xfs_trace.h"
34 #include "xfs_cksum.h"
35 #include "xfs_alloc.h"
36 #include "xfs_log.h"
37
38 /*
39 * Cursor allocation zone.
40 */
41 kmem_zone_t *xfs_btree_cur_zone;
42
43 /*
44 * Btree magic numbers.
45 */
46 static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
47 { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, 0, XFS_BMAP_MAGIC, XFS_IBT_MAGIC,
48 XFS_FIBT_MAGIC, 0 },
49 { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, XFS_RMAP_CRC_MAGIC,
50 XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC,
51 XFS_REFC_CRC_MAGIC }
52 };
53 #define xfs_btree_magic(cur) \
54 xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum]
55
56 STATIC int /* error (0 or EFSCORRUPTED) */
xfs_btree_check_lblock(struct xfs_btree_cur * cur,struct xfs_btree_block * block,int level,struct xfs_buf * bp)57 xfs_btree_check_lblock(
58 struct xfs_btree_cur *cur, /* btree cursor */
59 struct xfs_btree_block *block, /* btree long form block pointer */
60 int level, /* level of the btree block */
61 struct xfs_buf *bp) /* buffer for block, if any */
62 {
63 int lblock_ok = 1; /* block passes checks */
64 struct xfs_mount *mp; /* file system mount point */
65
66 mp = cur->bc_mp;
67
68 if (xfs_sb_version_hascrc(&mp->m_sb)) {
69 lblock_ok = lblock_ok &&
70 uuid_equal(&block->bb_u.l.bb_uuid,
71 &mp->m_sb.sb_meta_uuid) &&
72 block->bb_u.l.bb_blkno == cpu_to_be64(
73 bp ? bp->b_bn : XFS_BUF_DADDR_NULL);
74 }
75
76 lblock_ok = lblock_ok &&
77 be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) &&
78 be16_to_cpu(block->bb_level) == level &&
79 be16_to_cpu(block->bb_numrecs) <=
80 cur->bc_ops->get_maxrecs(cur, level) &&
81 block->bb_u.l.bb_leftsib &&
82 (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK) ||
83 XFS_FSB_SANITY_CHECK(mp,
84 be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
85 block->bb_u.l.bb_rightsib &&
86 (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK) ||
87 XFS_FSB_SANITY_CHECK(mp,
88 be64_to_cpu(block->bb_u.l.bb_rightsib)));
89
90 if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
91 XFS_ERRTAG_BTREE_CHECK_LBLOCK,
92 XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
93 if (bp)
94 trace_xfs_btree_corrupt(bp, _RET_IP_);
95 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
96 return -EFSCORRUPTED;
97 }
98 return 0;
99 }
100
101 STATIC int /* error (0 or EFSCORRUPTED) */
xfs_btree_check_sblock(struct xfs_btree_cur * cur,struct xfs_btree_block * block,int level,struct xfs_buf * bp)102 xfs_btree_check_sblock(
103 struct xfs_btree_cur *cur, /* btree cursor */
104 struct xfs_btree_block *block, /* btree short form block pointer */
105 int level, /* level of the btree block */
106 struct xfs_buf *bp) /* buffer containing block */
107 {
108 struct xfs_mount *mp; /* file system mount point */
109 struct xfs_buf *agbp; /* buffer for ag. freespace struct */
110 struct xfs_agf *agf; /* ag. freespace structure */
111 xfs_agblock_t agflen; /* native ag. freespace length */
112 int sblock_ok = 1; /* block passes checks */
113
114 mp = cur->bc_mp;
115 agbp = cur->bc_private.a.agbp;
116 agf = XFS_BUF_TO_AGF(agbp);
117 agflen = be32_to_cpu(agf->agf_length);
118
119 if (xfs_sb_version_hascrc(&mp->m_sb)) {
120 sblock_ok = sblock_ok &&
121 uuid_equal(&block->bb_u.s.bb_uuid,
122 &mp->m_sb.sb_meta_uuid) &&
123 block->bb_u.s.bb_blkno == cpu_to_be64(
124 bp ? bp->b_bn : XFS_BUF_DADDR_NULL);
125 }
126
127 sblock_ok = sblock_ok &&
128 be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) &&
129 be16_to_cpu(block->bb_level) == level &&
130 be16_to_cpu(block->bb_numrecs) <=
131 cur->bc_ops->get_maxrecs(cur, level) &&
132 (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) ||
133 be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) &&
134 block->bb_u.s.bb_leftsib &&
135 (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) ||
136 be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) &&
137 block->bb_u.s.bb_rightsib;
138
139 if (unlikely(XFS_TEST_ERROR(!sblock_ok, mp,
140 XFS_ERRTAG_BTREE_CHECK_SBLOCK,
141 XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
142 if (bp)
143 trace_xfs_btree_corrupt(bp, _RET_IP_);
144 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
145 return -EFSCORRUPTED;
146 }
147 return 0;
148 }
149
150 /*
151 * Debug routine: check that block header is ok.
152 */
153 int
xfs_btree_check_block(struct xfs_btree_cur * cur,struct xfs_btree_block * block,int level,struct xfs_buf * bp)154 xfs_btree_check_block(
155 struct xfs_btree_cur *cur, /* btree cursor */
156 struct xfs_btree_block *block, /* generic btree block pointer */
157 int level, /* level of the btree block */
158 struct xfs_buf *bp) /* buffer containing block, if any */
159 {
160 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
161 return xfs_btree_check_lblock(cur, block, level, bp);
162 else
163 return xfs_btree_check_sblock(cur, block, level, bp);
164 }
165
166 /*
167 * Check that (long) pointer is ok.
168 */
169 int /* error (0 or EFSCORRUPTED) */
xfs_btree_check_lptr(struct xfs_btree_cur * cur,xfs_fsblock_t bno,int level)170 xfs_btree_check_lptr(
171 struct xfs_btree_cur *cur, /* btree cursor */
172 xfs_fsblock_t bno, /* btree block disk address */
173 int level) /* btree block level */
174 {
175 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp,
176 level > 0 &&
177 bno != NULLFSBLOCK &&
178 XFS_FSB_SANITY_CHECK(cur->bc_mp, bno));
179 return 0;
180 }
181
182 #ifdef DEBUG
183 /*
184 * Check that (short) pointer is ok.
185 */
186 STATIC int /* error (0 or EFSCORRUPTED) */
xfs_btree_check_sptr(struct xfs_btree_cur * cur,xfs_agblock_t bno,int level)187 xfs_btree_check_sptr(
188 struct xfs_btree_cur *cur, /* btree cursor */
189 xfs_agblock_t bno, /* btree block disk address */
190 int level) /* btree block level */
191 {
192 xfs_agblock_t agblocks = cur->bc_mp->m_sb.sb_agblocks;
193
194 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp,
195 level > 0 &&
196 bno != NULLAGBLOCK &&
197 bno != 0 &&
198 bno < agblocks);
199 return 0;
200 }
201
202 /*
203 * Check that block ptr is ok.
204 */
205 STATIC int /* error (0 or EFSCORRUPTED) */
xfs_btree_check_ptr(struct xfs_btree_cur * cur,union xfs_btree_ptr * ptr,int index,int level)206 xfs_btree_check_ptr(
207 struct xfs_btree_cur *cur, /* btree cursor */
208 union xfs_btree_ptr *ptr, /* btree block disk address */
209 int index, /* offset from ptr to check */
210 int level) /* btree block level */
211 {
212 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
213 return xfs_btree_check_lptr(cur,
214 be64_to_cpu((&ptr->l)[index]), level);
215 } else {
216 return xfs_btree_check_sptr(cur,
217 be32_to_cpu((&ptr->s)[index]), level);
218 }
219 }
220 #endif
221
222 /*
223 * Calculate CRC on the whole btree block and stuff it into the
224 * long-form btree header.
225 *
226 * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
227 * it into the buffer so recovery knows what the last modification was that made
228 * it to disk.
229 */
230 void
xfs_btree_lblock_calc_crc(struct xfs_buf * bp)231 xfs_btree_lblock_calc_crc(
232 struct xfs_buf *bp)
233 {
234 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
235 struct xfs_buf_log_item *bip = bp->b_fspriv;
236
237 if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
238 return;
239 if (bip)
240 block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
241 xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
242 }
243
244 bool
xfs_btree_lblock_verify_crc(struct xfs_buf * bp)245 xfs_btree_lblock_verify_crc(
246 struct xfs_buf *bp)
247 {
248 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
249 struct xfs_mount *mp = bp->b_target->bt_mount;
250
251 if (xfs_sb_version_hascrc(&mp->m_sb)) {
252 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn)))
253 return false;
254 return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
255 }
256
257 return true;
258 }
259
260 /*
261 * Calculate CRC on the whole btree block and stuff it into the
262 * short-form btree header.
263 *
264 * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
265 * it into the buffer so recovery knows what the last modification was that made
266 * it to disk.
267 */
268 void
xfs_btree_sblock_calc_crc(struct xfs_buf * bp)269 xfs_btree_sblock_calc_crc(
270 struct xfs_buf *bp)
271 {
272 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
273 struct xfs_buf_log_item *bip = bp->b_fspriv;
274
275 if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
276 return;
277 if (bip)
278 block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
279 xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
280 }
281
282 bool
xfs_btree_sblock_verify_crc(struct xfs_buf * bp)283 xfs_btree_sblock_verify_crc(
284 struct xfs_buf *bp)
285 {
286 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
287 struct xfs_mount *mp = bp->b_target->bt_mount;
288
289 if (xfs_sb_version_hascrc(&mp->m_sb)) {
290 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn)))
291 return false;
292 return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
293 }
294
295 return true;
296 }
297
298 static int
xfs_btree_free_block(struct xfs_btree_cur * cur,struct xfs_buf * bp)299 xfs_btree_free_block(
300 struct xfs_btree_cur *cur,
301 struct xfs_buf *bp)
302 {
303 int error;
304
305 error = cur->bc_ops->free_block(cur, bp);
306 if (!error) {
307 xfs_trans_binval(cur->bc_tp, bp);
308 XFS_BTREE_STATS_INC(cur, free);
309 }
310 return error;
311 }
312
313 /*
314 * Delete the btree cursor.
315 */
316 void
xfs_btree_del_cursor(xfs_btree_cur_t * cur,int error)317 xfs_btree_del_cursor(
318 xfs_btree_cur_t *cur, /* btree cursor */
319 int error) /* del because of error */
320 {
321 int i; /* btree level */
322
323 /*
324 * Clear the buffer pointers, and release the buffers.
325 * If we're doing this in the face of an error, we
326 * need to make sure to inspect all of the entries
327 * in the bc_bufs array for buffers to be unlocked.
328 * This is because some of the btree code works from
329 * level n down to 0, and if we get an error along
330 * the way we won't have initialized all the entries
331 * down to 0.
332 */
333 for (i = 0; i < cur->bc_nlevels; i++) {
334 if (cur->bc_bufs[i])
335 xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]);
336 else if (!error)
337 break;
338 }
339 /*
340 * Can't free a bmap cursor without having dealt with the
341 * allocated indirect blocks' accounting.
342 */
343 ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP ||
344 cur->bc_private.b.allocated == 0);
345 /*
346 * Free the cursor.
347 */
348 kmem_zone_free(xfs_btree_cur_zone, cur);
349 }
350
351 /*
352 * Duplicate the btree cursor.
353 * Allocate a new one, copy the record, re-get the buffers.
354 */
355 int /* error */
xfs_btree_dup_cursor(xfs_btree_cur_t * cur,xfs_btree_cur_t ** ncur)356 xfs_btree_dup_cursor(
357 xfs_btree_cur_t *cur, /* input cursor */
358 xfs_btree_cur_t **ncur) /* output cursor */
359 {
360 xfs_buf_t *bp; /* btree block's buffer pointer */
361 int error; /* error return value */
362 int i; /* level number of btree block */
363 xfs_mount_t *mp; /* mount structure for filesystem */
364 xfs_btree_cur_t *new; /* new cursor value */
365 xfs_trans_t *tp; /* transaction pointer, can be NULL */
366
367 tp = cur->bc_tp;
368 mp = cur->bc_mp;
369
370 /*
371 * Allocate a new cursor like the old one.
372 */
373 new = cur->bc_ops->dup_cursor(cur);
374
375 /*
376 * Copy the record currently in the cursor.
377 */
378 new->bc_rec = cur->bc_rec;
379
380 /*
381 * For each level current, re-get the buffer and copy the ptr value.
382 */
383 for (i = 0; i < new->bc_nlevels; i++) {
384 new->bc_ptrs[i] = cur->bc_ptrs[i];
385 new->bc_ra[i] = cur->bc_ra[i];
386 bp = cur->bc_bufs[i];
387 if (bp) {
388 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
389 XFS_BUF_ADDR(bp), mp->m_bsize,
390 0, &bp,
391 cur->bc_ops->buf_ops);
392 if (error) {
393 xfs_btree_del_cursor(new, error);
394 *ncur = NULL;
395 return error;
396 }
397 }
398 new->bc_bufs[i] = bp;
399 }
400 *ncur = new;
401 return 0;
402 }
403
404 /*
405 * XFS btree block layout and addressing:
406 *
407 * There are two types of blocks in the btree: leaf and non-leaf blocks.
408 *
409 * The leaf record start with a header then followed by records containing
410 * the values. A non-leaf block also starts with the same header, and
411 * then first contains lookup keys followed by an equal number of pointers
412 * to the btree blocks at the previous level.
413 *
414 * +--------+-------+-------+-------+-------+-------+-------+
415 * Leaf: | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N |
416 * +--------+-------+-------+-------+-------+-------+-------+
417 *
418 * +--------+-------+-------+-------+-------+-------+-------+
419 * Non-Leaf: | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N |
420 * +--------+-------+-------+-------+-------+-------+-------+
421 *
422 * The header is called struct xfs_btree_block for reasons better left unknown
423 * and comes in different versions for short (32bit) and long (64bit) block
424 * pointers. The record and key structures are defined by the btree instances
425 * and opaque to the btree core. The block pointers are simple disk endian
426 * integers, available in a short (32bit) and long (64bit) variant.
427 *
428 * The helpers below calculate the offset of a given record, key or pointer
429 * into a btree block (xfs_btree_*_offset) or return a pointer to the given
430 * record, key or pointer (xfs_btree_*_addr). Note that all addressing
431 * inside the btree block is done using indices starting at one, not zero!
432 *
433 * If XFS_BTREE_OVERLAPPING is set, then this btree supports keys containing
434 * overlapping intervals. In such a tree, records are still sorted lowest to
435 * highest and indexed by the smallest key value that refers to the record.
436 * However, nodes are different: each pointer has two associated keys -- one
437 * indexing the lowest key available in the block(s) below (the same behavior
438 * as the key in a regular btree) and another indexing the highest key
439 * available in the block(s) below. Because records are /not/ sorted by the
440 * highest key, all leaf block updates require us to compute the highest key
441 * that matches any record in the leaf and to recursively update the high keys
442 * in the nodes going further up in the tree, if necessary. Nodes look like
443 * this:
444 *
445 * +--------+-----+-----+-----+-----+-----+-------+-------+-----+
446 * Non-Leaf: | header | lo1 | hi1 | lo2 | hi2 | ... | ptr 1 | ptr 2 | ... |
447 * +--------+-----+-----+-----+-----+-----+-------+-------+-----+
448 *
449 * To perform an interval query on an overlapped tree, perform the usual
450 * depth-first search and use the low and high keys to decide if we can skip
451 * that particular node. If a leaf node is reached, return the records that
452 * intersect the interval. Note that an interval query may return numerous
453 * entries. For a non-overlapped tree, simply search for the record associated
454 * with the lowest key and iterate forward until a non-matching record is
455 * found. Section 14.3 ("Interval Trees") of _Introduction to Algorithms_ by
456 * Cormen, Leiserson, Rivest, and Stein (2nd or 3rd ed. only) discuss this in
457 * more detail.
458 *
459 * Why do we care about overlapping intervals? Let's say you have a bunch of
460 * reverse mapping records on a reflink filesystem:
461 *
462 * 1: +- file A startblock B offset C length D -----------+
463 * 2: +- file E startblock F offset G length H --------------+
464 * 3: +- file I startblock F offset J length K --+
465 * 4: +- file L... --+
466 *
467 * Now say we want to map block (B+D) into file A at offset (C+D). Ideally,
468 * we'd simply increment the length of record 1. But how do we find the record
469 * that ends at (B+D-1) (i.e. record 1)? A LE lookup of (B+D-1) would return
470 * record 3 because the keys are ordered first by startblock. An interval
471 * query would return records 1 and 2 because they both overlap (B+D-1), and
472 * from that we can pick out record 1 as the appropriate left neighbor.
473 *
474 * In the non-overlapped case you can do a LE lookup and decrement the cursor
475 * because a record's interval must end before the next record.
476 */
477
478 /*
479 * Return size of the btree block header for this btree instance.
480 */
xfs_btree_block_len(struct xfs_btree_cur * cur)481 static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
482 {
483 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
484 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
485 return XFS_BTREE_LBLOCK_CRC_LEN;
486 return XFS_BTREE_LBLOCK_LEN;
487 }
488 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
489 return XFS_BTREE_SBLOCK_CRC_LEN;
490 return XFS_BTREE_SBLOCK_LEN;
491 }
492
493 /*
494 * Return size of btree block pointers for this btree instance.
495 */
xfs_btree_ptr_len(struct xfs_btree_cur * cur)496 static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur)
497 {
498 return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
499 sizeof(__be64) : sizeof(__be32);
500 }
501
502 /*
503 * Calculate offset of the n-th record in a btree block.
504 */
505 STATIC size_t
xfs_btree_rec_offset(struct xfs_btree_cur * cur,int n)506 xfs_btree_rec_offset(
507 struct xfs_btree_cur *cur,
508 int n)
509 {
510 return xfs_btree_block_len(cur) +
511 (n - 1) * cur->bc_ops->rec_len;
512 }
513
514 /*
515 * Calculate offset of the n-th key in a btree block.
516 */
517 STATIC size_t
xfs_btree_key_offset(struct xfs_btree_cur * cur,int n)518 xfs_btree_key_offset(
519 struct xfs_btree_cur *cur,
520 int n)
521 {
522 return xfs_btree_block_len(cur) +
523 (n - 1) * cur->bc_ops->key_len;
524 }
525
526 /*
527 * Calculate offset of the n-th high key in a btree block.
528 */
529 STATIC size_t
xfs_btree_high_key_offset(struct xfs_btree_cur * cur,int n)530 xfs_btree_high_key_offset(
531 struct xfs_btree_cur *cur,
532 int n)
533 {
534 return xfs_btree_block_len(cur) +
535 (n - 1) * cur->bc_ops->key_len + (cur->bc_ops->key_len / 2);
536 }
537
538 /*
539 * Calculate offset of the n-th block pointer in a btree block.
540 */
541 STATIC size_t
xfs_btree_ptr_offset(struct xfs_btree_cur * cur,int n,int level)542 xfs_btree_ptr_offset(
543 struct xfs_btree_cur *cur,
544 int n,
545 int level)
546 {
547 return xfs_btree_block_len(cur) +
548 cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len +
549 (n - 1) * xfs_btree_ptr_len(cur);
550 }
551
552 /*
553 * Return a pointer to the n-th record in the btree block.
554 */
555 STATIC union xfs_btree_rec *
xfs_btree_rec_addr(struct xfs_btree_cur * cur,int n,struct xfs_btree_block * block)556 xfs_btree_rec_addr(
557 struct xfs_btree_cur *cur,
558 int n,
559 struct xfs_btree_block *block)
560 {
561 return (union xfs_btree_rec *)
562 ((char *)block + xfs_btree_rec_offset(cur, n));
563 }
564
565 /*
566 * Return a pointer to the n-th key in the btree block.
567 */
568 STATIC union xfs_btree_key *
xfs_btree_key_addr(struct xfs_btree_cur * cur,int n,struct xfs_btree_block * block)569 xfs_btree_key_addr(
570 struct xfs_btree_cur *cur,
571 int n,
572 struct xfs_btree_block *block)
573 {
574 return (union xfs_btree_key *)
575 ((char *)block + xfs_btree_key_offset(cur, n));
576 }
577
578 /*
579 * Return a pointer to the n-th high key in the btree block.
580 */
581 STATIC union xfs_btree_key *
xfs_btree_high_key_addr(struct xfs_btree_cur * cur,int n,struct xfs_btree_block * block)582 xfs_btree_high_key_addr(
583 struct xfs_btree_cur *cur,
584 int n,
585 struct xfs_btree_block *block)
586 {
587 return (union xfs_btree_key *)
588 ((char *)block + xfs_btree_high_key_offset(cur, n));
589 }
590
591 /*
592 * Return a pointer to the n-th block pointer in the btree block.
593 */
594 STATIC union xfs_btree_ptr *
xfs_btree_ptr_addr(struct xfs_btree_cur * cur,int n,struct xfs_btree_block * block)595 xfs_btree_ptr_addr(
596 struct xfs_btree_cur *cur,
597 int n,
598 struct xfs_btree_block *block)
599 {
600 int level = xfs_btree_get_level(block);
601
602 ASSERT(block->bb_level != 0);
603
604 return (union xfs_btree_ptr *)
605 ((char *)block + xfs_btree_ptr_offset(cur, n, level));
606 }
607
608 /*
609 * Get the root block which is stored in the inode.
610 *
611 * For now this btree implementation assumes the btree root is always
612 * stored in the if_broot field of an inode fork.
613 */
614 STATIC struct xfs_btree_block *
xfs_btree_get_iroot(struct xfs_btree_cur * cur)615 xfs_btree_get_iroot(
616 struct xfs_btree_cur *cur)
617 {
618 struct xfs_ifork *ifp;
619
620 ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork);
621 return (struct xfs_btree_block *)ifp->if_broot;
622 }
623
624 /*
625 * Retrieve the block pointer from the cursor at the given level.
626 * This may be an inode btree root or from a buffer.
627 */
628 STATIC struct xfs_btree_block * /* generic btree block pointer */
xfs_btree_get_block(struct xfs_btree_cur * cur,int level,struct xfs_buf ** bpp)629 xfs_btree_get_block(
630 struct xfs_btree_cur *cur, /* btree cursor */
631 int level, /* level in btree */
632 struct xfs_buf **bpp) /* buffer containing the block */
633 {
634 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
635 (level == cur->bc_nlevels - 1)) {
636 *bpp = NULL;
637 return xfs_btree_get_iroot(cur);
638 }
639
640 *bpp = cur->bc_bufs[level];
641 return XFS_BUF_TO_BLOCK(*bpp);
642 }
643
644 /*
645 * Get a buffer for the block, return it with no data read.
646 * Long-form addressing.
647 */
648 xfs_buf_t * /* buffer for fsbno */
xfs_btree_get_bufl(xfs_mount_t * mp,xfs_trans_t * tp,xfs_fsblock_t fsbno,uint lock)649 xfs_btree_get_bufl(
650 xfs_mount_t *mp, /* file system mount point */
651 xfs_trans_t *tp, /* transaction pointer */
652 xfs_fsblock_t fsbno, /* file system block number */
653 uint lock) /* lock flags for get_buf */
654 {
655 xfs_daddr_t d; /* real disk block address */
656
657 ASSERT(fsbno != NULLFSBLOCK);
658 d = XFS_FSB_TO_DADDR(mp, fsbno);
659 return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
660 }
661
662 /*
663 * Get a buffer for the block, return it with no data read.
664 * Short-form addressing.
665 */
666 xfs_buf_t * /* buffer for agno/agbno */
xfs_btree_get_bufs(xfs_mount_t * mp,xfs_trans_t * tp,xfs_agnumber_t agno,xfs_agblock_t agbno,uint lock)667 xfs_btree_get_bufs(
668 xfs_mount_t *mp, /* file system mount point */
669 xfs_trans_t *tp, /* transaction pointer */
670 xfs_agnumber_t agno, /* allocation group number */
671 xfs_agblock_t agbno, /* allocation group block number */
672 uint lock) /* lock flags for get_buf */
673 {
674 xfs_daddr_t d; /* real disk block address */
675
676 ASSERT(agno != NULLAGNUMBER);
677 ASSERT(agbno != NULLAGBLOCK);
678 d = XFS_AGB_TO_DADDR(mp, agno, agbno);
679 return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
680 }
681
682 /*
683 * Check for the cursor referring to the last block at the given level.
684 */
685 int /* 1=is last block, 0=not last block */
xfs_btree_islastblock(xfs_btree_cur_t * cur,int level)686 xfs_btree_islastblock(
687 xfs_btree_cur_t *cur, /* btree cursor */
688 int level) /* level to check */
689 {
690 struct xfs_btree_block *block; /* generic btree block pointer */
691 xfs_buf_t *bp; /* buffer containing block */
692
693 block = xfs_btree_get_block(cur, level, &bp);
694 xfs_btree_check_block(cur, block, level, bp);
695 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
696 return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK);
697 else
698 return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK);
699 }
700
701 /*
702 * Change the cursor to point to the first record at the given level.
703 * Other levels are unaffected.
704 */
705 STATIC int /* success=1, failure=0 */
xfs_btree_firstrec(xfs_btree_cur_t * cur,int level)706 xfs_btree_firstrec(
707 xfs_btree_cur_t *cur, /* btree cursor */
708 int level) /* level to change */
709 {
710 struct xfs_btree_block *block; /* generic btree block pointer */
711 xfs_buf_t *bp; /* buffer containing block */
712
713 /*
714 * Get the block pointer for this level.
715 */
716 block = xfs_btree_get_block(cur, level, &bp);
717 if (xfs_btree_check_block(cur, block, level, bp))
718 return 0;
719 /*
720 * It's empty, there is no such record.
721 */
722 if (!block->bb_numrecs)
723 return 0;
724 /*
725 * Set the ptr value to 1, that's the first record/key.
726 */
727 cur->bc_ptrs[level] = 1;
728 return 1;
729 }
730
731 /*
732 * Change the cursor to point to the last record in the current block
733 * at the given level. Other levels are unaffected.
734 */
735 STATIC int /* success=1, failure=0 */
xfs_btree_lastrec(xfs_btree_cur_t * cur,int level)736 xfs_btree_lastrec(
737 xfs_btree_cur_t *cur, /* btree cursor */
738 int level) /* level to change */
739 {
740 struct xfs_btree_block *block; /* generic btree block pointer */
741 xfs_buf_t *bp; /* buffer containing block */
742
743 /*
744 * Get the block pointer for this level.
745 */
746 block = xfs_btree_get_block(cur, level, &bp);
747 if (xfs_btree_check_block(cur, block, level, bp))
748 return 0;
749 /*
750 * It's empty, there is no such record.
751 */
752 if (!block->bb_numrecs)
753 return 0;
754 /*
755 * Set the ptr value to numrecs, that's the last record/key.
756 */
757 cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs);
758 return 1;
759 }
760
761 /*
762 * Compute first and last byte offsets for the fields given.
763 * Interprets the offsets table, which contains struct field offsets.
764 */
765 void
xfs_btree_offsets(__int64_t fields,const short * offsets,int nbits,int * first,int * last)766 xfs_btree_offsets(
767 __int64_t fields, /* bitmask of fields */
768 const short *offsets, /* table of field offsets */
769 int nbits, /* number of bits to inspect */
770 int *first, /* output: first byte offset */
771 int *last) /* output: last byte offset */
772 {
773 int i; /* current bit number */
774 __int64_t imask; /* mask for current bit number */
775
776 ASSERT(fields != 0);
777 /*
778 * Find the lowest bit, so the first byte offset.
779 */
780 for (i = 0, imask = 1LL; ; i++, imask <<= 1) {
781 if (imask & fields) {
782 *first = offsets[i];
783 break;
784 }
785 }
786 /*
787 * Find the highest bit, so the last byte offset.
788 */
789 for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) {
790 if (imask & fields) {
791 *last = offsets[i + 1] - 1;
792 break;
793 }
794 }
795 }
796
797 /*
798 * Get a buffer for the block, return it read in.
799 * Long-form addressing.
800 */
801 int
xfs_btree_read_bufl(struct xfs_mount * mp,struct xfs_trans * tp,xfs_fsblock_t fsbno,uint lock,struct xfs_buf ** bpp,int refval,const struct xfs_buf_ops * ops)802 xfs_btree_read_bufl(
803 struct xfs_mount *mp, /* file system mount point */
804 struct xfs_trans *tp, /* transaction pointer */
805 xfs_fsblock_t fsbno, /* file system block number */
806 uint lock, /* lock flags for read_buf */
807 struct xfs_buf **bpp, /* buffer for fsbno */
808 int refval, /* ref count value for buffer */
809 const struct xfs_buf_ops *ops)
810 {
811 struct xfs_buf *bp; /* return value */
812 xfs_daddr_t d; /* real disk block address */
813 int error;
814
815 if (!XFS_FSB_SANITY_CHECK(mp, fsbno))
816 return -EFSCORRUPTED;
817 d = XFS_FSB_TO_DADDR(mp, fsbno);
818 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
819 mp->m_bsize, lock, &bp, ops);
820 if (error)
821 return error;
822 if (bp)
823 xfs_buf_set_ref(bp, refval);
824 *bpp = bp;
825 return 0;
826 }
827
828 /*
829 * Read-ahead the block, don't wait for it, don't return a buffer.
830 * Long-form addressing.
831 */
832 /* ARGSUSED */
833 void
xfs_btree_reada_bufl(struct xfs_mount * mp,xfs_fsblock_t fsbno,xfs_extlen_t count,const struct xfs_buf_ops * ops)834 xfs_btree_reada_bufl(
835 struct xfs_mount *mp, /* file system mount point */
836 xfs_fsblock_t fsbno, /* file system block number */
837 xfs_extlen_t count, /* count of filesystem blocks */
838 const struct xfs_buf_ops *ops)
839 {
840 xfs_daddr_t d;
841
842 ASSERT(fsbno != NULLFSBLOCK);
843 d = XFS_FSB_TO_DADDR(mp, fsbno);
844 xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
845 }
846
847 /*
848 * Read-ahead the block, don't wait for it, don't return a buffer.
849 * Short-form addressing.
850 */
851 /* ARGSUSED */
852 void
xfs_btree_reada_bufs(struct xfs_mount * mp,xfs_agnumber_t agno,xfs_agblock_t agbno,xfs_extlen_t count,const struct xfs_buf_ops * ops)853 xfs_btree_reada_bufs(
854 struct xfs_mount *mp, /* file system mount point */
855 xfs_agnumber_t agno, /* allocation group number */
856 xfs_agblock_t agbno, /* allocation group block number */
857 xfs_extlen_t count, /* count of filesystem blocks */
858 const struct xfs_buf_ops *ops)
859 {
860 xfs_daddr_t d;
861
862 ASSERT(agno != NULLAGNUMBER);
863 ASSERT(agbno != NULLAGBLOCK);
864 d = XFS_AGB_TO_DADDR(mp, agno, agbno);
865 xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
866 }
867
868 STATIC int
xfs_btree_readahead_lblock(struct xfs_btree_cur * cur,int lr,struct xfs_btree_block * block)869 xfs_btree_readahead_lblock(
870 struct xfs_btree_cur *cur,
871 int lr,
872 struct xfs_btree_block *block)
873 {
874 int rval = 0;
875 xfs_fsblock_t left = be64_to_cpu(block->bb_u.l.bb_leftsib);
876 xfs_fsblock_t right = be64_to_cpu(block->bb_u.l.bb_rightsib);
877
878 if ((lr & XFS_BTCUR_LEFTRA) && left != NULLFSBLOCK) {
879 xfs_btree_reada_bufl(cur->bc_mp, left, 1,
880 cur->bc_ops->buf_ops);
881 rval++;
882 }
883
884 if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLFSBLOCK) {
885 xfs_btree_reada_bufl(cur->bc_mp, right, 1,
886 cur->bc_ops->buf_ops);
887 rval++;
888 }
889
890 return rval;
891 }
892
893 STATIC int
xfs_btree_readahead_sblock(struct xfs_btree_cur * cur,int lr,struct xfs_btree_block * block)894 xfs_btree_readahead_sblock(
895 struct xfs_btree_cur *cur,
896 int lr,
897 struct xfs_btree_block *block)
898 {
899 int rval = 0;
900 xfs_agblock_t left = be32_to_cpu(block->bb_u.s.bb_leftsib);
901 xfs_agblock_t right = be32_to_cpu(block->bb_u.s.bb_rightsib);
902
903
904 if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
905 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
906 left, 1, cur->bc_ops->buf_ops);
907 rval++;
908 }
909
910 if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
911 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
912 right, 1, cur->bc_ops->buf_ops);
913 rval++;
914 }
915
916 return rval;
917 }
918
919 /*
920 * Read-ahead btree blocks, at the given level.
921 * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
922 */
923 STATIC int
xfs_btree_readahead(struct xfs_btree_cur * cur,int lev,int lr)924 xfs_btree_readahead(
925 struct xfs_btree_cur *cur, /* btree cursor */
926 int lev, /* level in btree */
927 int lr) /* left/right bits */
928 {
929 struct xfs_btree_block *block;
930
931 /*
932 * No readahead needed if we are at the root level and the
933 * btree root is stored in the inode.
934 */
935 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
936 (lev == cur->bc_nlevels - 1))
937 return 0;
938
939 if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev])
940 return 0;
941
942 cur->bc_ra[lev] |= lr;
943 block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]);
944
945 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
946 return xfs_btree_readahead_lblock(cur, lr, block);
947 return xfs_btree_readahead_sblock(cur, lr, block);
948 }
949
950 STATIC xfs_daddr_t
xfs_btree_ptr_to_daddr(struct xfs_btree_cur * cur,union xfs_btree_ptr * ptr)951 xfs_btree_ptr_to_daddr(
952 struct xfs_btree_cur *cur,
953 union xfs_btree_ptr *ptr)
954 {
955 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
956 ASSERT(ptr->l != cpu_to_be64(NULLFSBLOCK));
957
958 return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
959 } else {
960 ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
961 ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK));
962
963 return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
964 be32_to_cpu(ptr->s));
965 }
966 }
967
968 /*
969 * Readahead @count btree blocks at the given @ptr location.
970 *
971 * We don't need to care about long or short form btrees here as we have a
972 * method of converting the ptr directly to a daddr available to us.
973 */
974 STATIC void
xfs_btree_readahead_ptr(struct xfs_btree_cur * cur,union xfs_btree_ptr * ptr,xfs_extlen_t count)975 xfs_btree_readahead_ptr(
976 struct xfs_btree_cur *cur,
977 union xfs_btree_ptr *ptr,
978 xfs_extlen_t count)
979 {
980 xfs_buf_readahead(cur->bc_mp->m_ddev_targp,
981 xfs_btree_ptr_to_daddr(cur, ptr),
982 cur->bc_mp->m_bsize * count, cur->bc_ops->buf_ops);
983 }
984
985 /*
986 * Set the buffer for level "lev" in the cursor to bp, releasing
987 * any previous buffer.
988 */
989 STATIC void
xfs_btree_setbuf(xfs_btree_cur_t * cur,int lev,xfs_buf_t * bp)990 xfs_btree_setbuf(
991 xfs_btree_cur_t *cur, /* btree cursor */
992 int lev, /* level in btree */
993 xfs_buf_t *bp) /* new buffer to set */
994 {
995 struct xfs_btree_block *b; /* btree block */
996
997 if (cur->bc_bufs[lev])
998 xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[lev]);
999 cur->bc_bufs[lev] = bp;
1000 cur->bc_ra[lev] = 0;
1001
1002 b = XFS_BUF_TO_BLOCK(bp);
1003 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
1004 if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK))
1005 cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
1006 if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK))
1007 cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
1008 } else {
1009 if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK))
1010 cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
1011 if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK))
1012 cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
1013 }
1014 }
1015
1016 STATIC int
xfs_btree_ptr_is_null(struct xfs_btree_cur * cur,union xfs_btree_ptr * ptr)1017 xfs_btree_ptr_is_null(
1018 struct xfs_btree_cur *cur,
1019 union xfs_btree_ptr *ptr)
1020 {
1021 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1022 return ptr->l == cpu_to_be64(NULLFSBLOCK);
1023 else
1024 return ptr->s == cpu_to_be32(NULLAGBLOCK);
1025 }
1026
1027 STATIC void
xfs_btree_set_ptr_null(struct xfs_btree_cur * cur,union xfs_btree_ptr * ptr)1028 xfs_btree_set_ptr_null(
1029 struct xfs_btree_cur *cur,
1030 union xfs_btree_ptr *ptr)
1031 {
1032 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1033 ptr->l = cpu_to_be64(NULLFSBLOCK);
1034 else
1035 ptr->s = cpu_to_be32(NULLAGBLOCK);
1036 }
1037
1038 /*
1039 * Get/set/init sibling pointers
1040 */
1041 STATIC void
xfs_btree_get_sibling(struct xfs_btree_cur * cur,struct xfs_btree_block * block,union xfs_btree_ptr * ptr,int lr)1042 xfs_btree_get_sibling(
1043 struct xfs_btree_cur *cur,
1044 struct xfs_btree_block *block,
1045 union xfs_btree_ptr *ptr,
1046 int lr)
1047 {
1048 ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
1049
1050 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
1051 if (lr == XFS_BB_RIGHTSIB)
1052 ptr->l = block->bb_u.l.bb_rightsib;
1053 else
1054 ptr->l = block->bb_u.l.bb_leftsib;
1055 } else {
1056 if (lr == XFS_BB_RIGHTSIB)
1057 ptr->s = block->bb_u.s.bb_rightsib;
1058 else
1059 ptr->s = block->bb_u.s.bb_leftsib;
1060 }
1061 }
1062
1063 STATIC void
xfs_btree_set_sibling(struct xfs_btree_cur * cur,struct xfs_btree_block * block,union xfs_btree_ptr * ptr,int lr)1064 xfs_btree_set_sibling(
1065 struct xfs_btree_cur *cur,
1066 struct xfs_btree_block *block,
1067 union xfs_btree_ptr *ptr,
1068 int lr)
1069 {
1070 ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
1071
1072 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
1073 if (lr == XFS_BB_RIGHTSIB)
1074 block->bb_u.l.bb_rightsib = ptr->l;
1075 else
1076 block->bb_u.l.bb_leftsib = ptr->l;
1077 } else {
1078 if (lr == XFS_BB_RIGHTSIB)
1079 block->bb_u.s.bb_rightsib = ptr->s;
1080 else
1081 block->bb_u.s.bb_leftsib = ptr->s;
1082 }
1083 }
1084
1085 void
xfs_btree_init_block_int(struct xfs_mount * mp,struct xfs_btree_block * buf,xfs_daddr_t blkno,__u32 magic,__u16 level,__u16 numrecs,__u64 owner,unsigned int flags)1086 xfs_btree_init_block_int(
1087 struct xfs_mount *mp,
1088 struct xfs_btree_block *buf,
1089 xfs_daddr_t blkno,
1090 __u32 magic,
1091 __u16 level,
1092 __u16 numrecs,
1093 __u64 owner,
1094 unsigned int flags)
1095 {
1096 buf->bb_magic = cpu_to_be32(magic);
1097 buf->bb_level = cpu_to_be16(level);
1098 buf->bb_numrecs = cpu_to_be16(numrecs);
1099
1100 if (flags & XFS_BTREE_LONG_PTRS) {
1101 buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK);
1102 buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK);
1103 if (flags & XFS_BTREE_CRC_BLOCKS) {
1104 buf->bb_u.l.bb_blkno = cpu_to_be64(blkno);
1105 buf->bb_u.l.bb_owner = cpu_to_be64(owner);
1106 uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid);
1107 buf->bb_u.l.bb_pad = 0;
1108 buf->bb_u.l.bb_lsn = 0;
1109 }
1110 } else {
1111 /* owner is a 32 bit value on short blocks */
1112 __u32 __owner = (__u32)owner;
1113
1114 buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
1115 buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
1116 if (flags & XFS_BTREE_CRC_BLOCKS) {
1117 buf->bb_u.s.bb_blkno = cpu_to_be64(blkno);
1118 buf->bb_u.s.bb_owner = cpu_to_be32(__owner);
1119 uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid);
1120 buf->bb_u.s.bb_lsn = 0;
1121 }
1122 }
1123 }
1124
1125 void
xfs_btree_init_block(struct xfs_mount * mp,struct xfs_buf * bp,__u32 magic,__u16 level,__u16 numrecs,__u64 owner,unsigned int flags)1126 xfs_btree_init_block(
1127 struct xfs_mount *mp,
1128 struct xfs_buf *bp,
1129 __u32 magic,
1130 __u16 level,
1131 __u16 numrecs,
1132 __u64 owner,
1133 unsigned int flags)
1134 {
1135 xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
1136 magic, level, numrecs, owner, flags);
1137 }
1138
1139 STATIC void
xfs_btree_init_block_cur(struct xfs_btree_cur * cur,struct xfs_buf * bp,int level,int numrecs)1140 xfs_btree_init_block_cur(
1141 struct xfs_btree_cur *cur,
1142 struct xfs_buf *bp,
1143 int level,
1144 int numrecs)
1145 {
1146 __u64 owner;
1147
1148 /*
1149 * we can pull the owner from the cursor right now as the different
1150 * owners align directly with the pointer size of the btree. This may
1151 * change in future, but is safe for current users of the generic btree
1152 * code.
1153 */
1154 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1155 owner = cur->bc_private.b.ip->i_ino;
1156 else
1157 owner = cur->bc_private.a.agno;
1158
1159 xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
1160 xfs_btree_magic(cur), level, numrecs,
1161 owner, cur->bc_flags);
1162 }
1163
1164 /*
1165 * Return true if ptr is the last record in the btree and
1166 * we need to track updates to this record. The decision
1167 * will be further refined in the update_lastrec method.
1168 */
1169 STATIC int
xfs_btree_is_lastrec(struct xfs_btree_cur * cur,struct xfs_btree_block * block,int level)1170 xfs_btree_is_lastrec(
1171 struct xfs_btree_cur *cur,
1172 struct xfs_btree_block *block,
1173 int level)
1174 {
1175 union xfs_btree_ptr ptr;
1176
1177 if (level > 0)
1178 return 0;
1179 if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE))
1180 return 0;
1181
1182 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1183 if (!xfs_btree_ptr_is_null(cur, &ptr))
1184 return 0;
1185 return 1;
1186 }
1187
1188 STATIC void
xfs_btree_buf_to_ptr(struct xfs_btree_cur * cur,struct xfs_buf * bp,union xfs_btree_ptr * ptr)1189 xfs_btree_buf_to_ptr(
1190 struct xfs_btree_cur *cur,
1191 struct xfs_buf *bp,
1192 union xfs_btree_ptr *ptr)
1193 {
1194 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1195 ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp,
1196 XFS_BUF_ADDR(bp)));
1197 else {
1198 ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp,
1199 XFS_BUF_ADDR(bp)));
1200 }
1201 }
1202
1203 STATIC void
xfs_btree_set_refs(struct xfs_btree_cur * cur,struct xfs_buf * bp)1204 xfs_btree_set_refs(
1205 struct xfs_btree_cur *cur,
1206 struct xfs_buf *bp)
1207 {
1208 switch (cur->bc_btnum) {
1209 case XFS_BTNUM_BNO:
1210 case XFS_BTNUM_CNT:
1211 xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF);
1212 break;
1213 case XFS_BTNUM_INO:
1214 case XFS_BTNUM_FINO:
1215 xfs_buf_set_ref(bp, XFS_INO_BTREE_REF);
1216 break;
1217 case XFS_BTNUM_BMAP:
1218 xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF);
1219 break;
1220 case XFS_BTNUM_RMAP:
1221 xfs_buf_set_ref(bp, XFS_RMAP_BTREE_REF);
1222 break;
1223 case XFS_BTNUM_REFC:
1224 xfs_buf_set_ref(bp, XFS_REFC_BTREE_REF);
1225 break;
1226 default:
1227 ASSERT(0);
1228 }
1229 }
1230
1231 STATIC int
xfs_btree_get_buf_block(struct xfs_btree_cur * cur,union xfs_btree_ptr * ptr,int flags,struct xfs_btree_block ** block,struct xfs_buf ** bpp)1232 xfs_btree_get_buf_block(
1233 struct xfs_btree_cur *cur,
1234 union xfs_btree_ptr *ptr,
1235 int flags,
1236 struct xfs_btree_block **block,
1237 struct xfs_buf **bpp)
1238 {
1239 struct xfs_mount *mp = cur->bc_mp;
1240 xfs_daddr_t d;
1241
1242 /* need to sort out how callers deal with failures first */
1243 ASSERT(!(flags & XBF_TRYLOCK));
1244
1245 d = xfs_btree_ptr_to_daddr(cur, ptr);
1246 *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d,
1247 mp->m_bsize, flags);
1248
1249 if (!*bpp)
1250 return -ENOMEM;
1251
1252 (*bpp)->b_ops = cur->bc_ops->buf_ops;
1253 *block = XFS_BUF_TO_BLOCK(*bpp);
1254 return 0;
1255 }
1256
1257 /*
1258 * Read in the buffer at the given ptr and return the buffer and
1259 * the block pointer within the buffer.
1260 */
1261 STATIC int
xfs_btree_read_buf_block(struct xfs_btree_cur * cur,union xfs_btree_ptr * ptr,int flags,struct xfs_btree_block ** block,struct xfs_buf ** bpp)1262 xfs_btree_read_buf_block(
1263 struct xfs_btree_cur *cur,
1264 union xfs_btree_ptr *ptr,
1265 int flags,
1266 struct xfs_btree_block **block,
1267 struct xfs_buf **bpp)
1268 {
1269 struct xfs_mount *mp = cur->bc_mp;
1270 xfs_daddr_t d;
1271 int error;
1272
1273 /* need to sort out how callers deal with failures first */
1274 ASSERT(!(flags & XBF_TRYLOCK));
1275
1276 d = xfs_btree_ptr_to_daddr(cur, ptr);
1277 error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d,
1278 mp->m_bsize, flags, bpp,
1279 cur->bc_ops->buf_ops);
1280 if (error)
1281 return error;
1282
1283 xfs_btree_set_refs(cur, *bpp);
1284 *block = XFS_BUF_TO_BLOCK(*bpp);
1285 return 0;
1286 }
1287
1288 /*
1289 * Copy keys from one btree block to another.
1290 */
1291 STATIC void
xfs_btree_copy_keys(struct xfs_btree_cur * cur,union xfs_btree_key * dst_key,union xfs_btree_key * src_key,int numkeys)1292 xfs_btree_copy_keys(
1293 struct xfs_btree_cur *cur,
1294 union xfs_btree_key *dst_key,
1295 union xfs_btree_key *src_key,
1296 int numkeys)
1297 {
1298 ASSERT(numkeys >= 0);
1299 memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len);
1300 }
1301
1302 /*
1303 * Copy records from one btree block to another.
1304 */
1305 STATIC void
xfs_btree_copy_recs(struct xfs_btree_cur * cur,union xfs_btree_rec * dst_rec,union xfs_btree_rec * src_rec,int numrecs)1306 xfs_btree_copy_recs(
1307 struct xfs_btree_cur *cur,
1308 union xfs_btree_rec *dst_rec,
1309 union xfs_btree_rec *src_rec,
1310 int numrecs)
1311 {
1312 ASSERT(numrecs >= 0);
1313 memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len);
1314 }
1315
1316 /*
1317 * Copy block pointers from one btree block to another.
1318 */
1319 STATIC void
xfs_btree_copy_ptrs(struct xfs_btree_cur * cur,union xfs_btree_ptr * dst_ptr,union xfs_btree_ptr * src_ptr,int numptrs)1320 xfs_btree_copy_ptrs(
1321 struct xfs_btree_cur *cur,
1322 union xfs_btree_ptr *dst_ptr,
1323 union xfs_btree_ptr *src_ptr,
1324 int numptrs)
1325 {
1326 ASSERT(numptrs >= 0);
1327 memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur));
1328 }
1329
1330 /*
1331 * Shift keys one index left/right inside a single btree block.
1332 */
1333 STATIC void
xfs_btree_shift_keys(struct xfs_btree_cur * cur,union xfs_btree_key * key,int dir,int numkeys)1334 xfs_btree_shift_keys(
1335 struct xfs_btree_cur *cur,
1336 union xfs_btree_key *key,
1337 int dir,
1338 int numkeys)
1339 {
1340 char *dst_key;
1341
1342 ASSERT(numkeys >= 0);
1343 ASSERT(dir == 1 || dir == -1);
1344
1345 dst_key = (char *)key + (dir * cur->bc_ops->key_len);
1346 memmove(dst_key, key, numkeys * cur->bc_ops->key_len);
1347 }
1348
1349 /*
1350 * Shift records one index left/right inside a single btree block.
1351 */
1352 STATIC void
xfs_btree_shift_recs(struct xfs_btree_cur * cur,union xfs_btree_rec * rec,int dir,int numrecs)1353 xfs_btree_shift_recs(
1354 struct xfs_btree_cur *cur,
1355 union xfs_btree_rec *rec,
1356 int dir,
1357 int numrecs)
1358 {
1359 char *dst_rec;
1360
1361 ASSERT(numrecs >= 0);
1362 ASSERT(dir == 1 || dir == -1);
1363
1364 dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len);
1365 memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len);
1366 }
1367
1368 /*
1369 * Shift block pointers one index left/right inside a single btree block.
1370 */
1371 STATIC void
xfs_btree_shift_ptrs(struct xfs_btree_cur * cur,union xfs_btree_ptr * ptr,int dir,int numptrs)1372 xfs_btree_shift_ptrs(
1373 struct xfs_btree_cur *cur,
1374 union xfs_btree_ptr *ptr,
1375 int dir,
1376 int numptrs)
1377 {
1378 char *dst_ptr;
1379
1380 ASSERT(numptrs >= 0);
1381 ASSERT(dir == 1 || dir == -1);
1382
1383 dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur));
1384 memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur));
1385 }
1386
1387 /*
1388 * Log key values from the btree block.
1389 */
1390 STATIC void
xfs_btree_log_keys(struct xfs_btree_cur * cur,struct xfs_buf * bp,int first,int last)1391 xfs_btree_log_keys(
1392 struct xfs_btree_cur *cur,
1393 struct xfs_buf *bp,
1394 int first,
1395 int last)
1396 {
1397 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1398 XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1399
1400 if (bp) {
1401 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1402 xfs_trans_log_buf(cur->bc_tp, bp,
1403 xfs_btree_key_offset(cur, first),
1404 xfs_btree_key_offset(cur, last + 1) - 1);
1405 } else {
1406 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1407 xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1408 }
1409
1410 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1411 }
1412
1413 /*
1414 * Log record values from the btree block.
1415 */
1416 void
xfs_btree_log_recs(struct xfs_btree_cur * cur,struct xfs_buf * bp,int first,int last)1417 xfs_btree_log_recs(
1418 struct xfs_btree_cur *cur,
1419 struct xfs_buf *bp,
1420 int first,
1421 int last)
1422 {
1423 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1424 XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1425
1426 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1427 xfs_trans_log_buf(cur->bc_tp, bp,
1428 xfs_btree_rec_offset(cur, first),
1429 xfs_btree_rec_offset(cur, last + 1) - 1);
1430
1431 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1432 }
1433
1434 /*
1435 * Log block pointer fields from a btree block (nonleaf).
1436 */
1437 STATIC void
xfs_btree_log_ptrs(struct xfs_btree_cur * cur,struct xfs_buf * bp,int first,int last)1438 xfs_btree_log_ptrs(
1439 struct xfs_btree_cur *cur, /* btree cursor */
1440 struct xfs_buf *bp, /* buffer containing btree block */
1441 int first, /* index of first pointer to log */
1442 int last) /* index of last pointer to log */
1443 {
1444 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1445 XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1446
1447 if (bp) {
1448 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
1449 int level = xfs_btree_get_level(block);
1450
1451 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1452 xfs_trans_log_buf(cur->bc_tp, bp,
1453 xfs_btree_ptr_offset(cur, first, level),
1454 xfs_btree_ptr_offset(cur, last + 1, level) - 1);
1455 } else {
1456 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1457 xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1458 }
1459
1460 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1461 }
1462
1463 /*
1464 * Log fields from a btree block header.
1465 */
1466 void
xfs_btree_log_block(struct xfs_btree_cur * cur,struct xfs_buf * bp,int fields)1467 xfs_btree_log_block(
1468 struct xfs_btree_cur *cur, /* btree cursor */
1469 struct xfs_buf *bp, /* buffer containing btree block */
1470 int fields) /* mask of fields: XFS_BB_... */
1471 {
1472 int first; /* first byte offset logged */
1473 int last; /* last byte offset logged */
1474 static const short soffsets[] = { /* table of offsets (short) */
1475 offsetof(struct xfs_btree_block, bb_magic),
1476 offsetof(struct xfs_btree_block, bb_level),
1477 offsetof(struct xfs_btree_block, bb_numrecs),
1478 offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib),
1479 offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib),
1480 offsetof(struct xfs_btree_block, bb_u.s.bb_blkno),
1481 offsetof(struct xfs_btree_block, bb_u.s.bb_lsn),
1482 offsetof(struct xfs_btree_block, bb_u.s.bb_uuid),
1483 offsetof(struct xfs_btree_block, bb_u.s.bb_owner),
1484 offsetof(struct xfs_btree_block, bb_u.s.bb_crc),
1485 XFS_BTREE_SBLOCK_CRC_LEN
1486 };
1487 static const short loffsets[] = { /* table of offsets (long) */
1488 offsetof(struct xfs_btree_block, bb_magic),
1489 offsetof(struct xfs_btree_block, bb_level),
1490 offsetof(struct xfs_btree_block, bb_numrecs),
1491 offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib),
1492 offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib),
1493 offsetof(struct xfs_btree_block, bb_u.l.bb_blkno),
1494 offsetof(struct xfs_btree_block, bb_u.l.bb_lsn),
1495 offsetof(struct xfs_btree_block, bb_u.l.bb_uuid),
1496 offsetof(struct xfs_btree_block, bb_u.l.bb_owner),
1497 offsetof(struct xfs_btree_block, bb_u.l.bb_crc),
1498 offsetof(struct xfs_btree_block, bb_u.l.bb_pad),
1499 XFS_BTREE_LBLOCK_CRC_LEN
1500 };
1501
1502 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1503 XFS_BTREE_TRACE_ARGBI(cur, bp, fields);
1504
1505 if (bp) {
1506 int nbits;
1507
1508 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
1509 /*
1510 * We don't log the CRC when updating a btree
1511 * block but instead recreate it during log
1512 * recovery. As the log buffers have checksums
1513 * of their own this is safe and avoids logging a crc
1514 * update in a lot of places.
1515 */
1516 if (fields == XFS_BB_ALL_BITS)
1517 fields = XFS_BB_ALL_BITS_CRC;
1518 nbits = XFS_BB_NUM_BITS_CRC;
1519 } else {
1520 nbits = XFS_BB_NUM_BITS;
1521 }
1522 xfs_btree_offsets(fields,
1523 (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
1524 loffsets : soffsets,
1525 nbits, &first, &last);
1526 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
1527 xfs_trans_log_buf(cur->bc_tp, bp, first, last);
1528 } else {
1529 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1530 xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1531 }
1532
1533 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1534 }
1535
1536 /*
1537 * Increment cursor by one record at the level.
1538 * For nonzero levels the leaf-ward information is untouched.
1539 */
1540 int /* error */
xfs_btree_increment(struct xfs_btree_cur * cur,int level,int * stat)1541 xfs_btree_increment(
1542 struct xfs_btree_cur *cur,
1543 int level,
1544 int *stat) /* success/failure */
1545 {
1546 struct xfs_btree_block *block;
1547 union xfs_btree_ptr ptr;
1548 struct xfs_buf *bp;
1549 int error; /* error return value */
1550 int lev;
1551
1552 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1553 XFS_BTREE_TRACE_ARGI(cur, level);
1554
1555 ASSERT(level < cur->bc_nlevels);
1556
1557 /* Read-ahead to the right at this level. */
1558 xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
1559
1560 /* Get a pointer to the btree block. */
1561 block = xfs_btree_get_block(cur, level, &bp);
1562
1563 #ifdef DEBUG
1564 error = xfs_btree_check_block(cur, block, level, bp);
1565 if (error)
1566 goto error0;
1567 #endif
1568
1569 /* We're done if we remain in the block after the increment. */
1570 if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block))
1571 goto out1;
1572
1573 /* Fail if we just went off the right edge of the tree. */
1574 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1575 if (xfs_btree_ptr_is_null(cur, &ptr))
1576 goto out0;
1577
1578 XFS_BTREE_STATS_INC(cur, increment);
1579
1580 /*
1581 * March up the tree incrementing pointers.
1582 * Stop when we don't go off the right edge of a block.
1583 */
1584 for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1585 block = xfs_btree_get_block(cur, lev, &bp);
1586
1587 #ifdef DEBUG
1588 error = xfs_btree_check_block(cur, block, lev, bp);
1589 if (error)
1590 goto error0;
1591 #endif
1592
1593 if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block))
1594 break;
1595
1596 /* Read-ahead the right block for the next loop. */
1597 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
1598 }
1599
1600 /*
1601 * If we went off the root then we are either seriously
1602 * confused or have the tree root in an inode.
1603 */
1604 if (lev == cur->bc_nlevels) {
1605 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1606 goto out0;
1607 ASSERT(0);
1608 error = -EFSCORRUPTED;
1609 goto error0;
1610 }
1611 ASSERT(lev < cur->bc_nlevels);
1612
1613 /*
1614 * Now walk back down the tree, fixing up the cursor's buffer
1615 * pointers and key numbers.
1616 */
1617 for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1618 union xfs_btree_ptr *ptrp;
1619
1620 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
1621 --lev;
1622 error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
1623 if (error)
1624 goto error0;
1625
1626 xfs_btree_setbuf(cur, lev, bp);
1627 cur->bc_ptrs[lev] = 1;
1628 }
1629 out1:
1630 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1631 *stat = 1;
1632 return 0;
1633
1634 out0:
1635 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1636 *stat = 0;
1637 return 0;
1638
1639 error0:
1640 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1641 return error;
1642 }
1643
1644 /*
1645 * Decrement cursor by one record at the level.
1646 * For nonzero levels the leaf-ward information is untouched.
1647 */
1648 int /* error */
xfs_btree_decrement(struct xfs_btree_cur * cur,int level,int * stat)1649 xfs_btree_decrement(
1650 struct xfs_btree_cur *cur,
1651 int level,
1652 int *stat) /* success/failure */
1653 {
1654 struct xfs_btree_block *block;
1655 xfs_buf_t *bp;
1656 int error; /* error return value */
1657 int lev;
1658 union xfs_btree_ptr ptr;
1659
1660 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1661 XFS_BTREE_TRACE_ARGI(cur, level);
1662
1663 ASSERT(level < cur->bc_nlevels);
1664
1665 /* Read-ahead to the left at this level. */
1666 xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
1667
1668 /* We're done if we remain in the block after the decrement. */
1669 if (--cur->bc_ptrs[level] > 0)
1670 goto out1;
1671
1672 /* Get a pointer to the btree block. */
1673 block = xfs_btree_get_block(cur, level, &bp);
1674
1675 #ifdef DEBUG
1676 error = xfs_btree_check_block(cur, block, level, bp);
1677 if (error)
1678 goto error0;
1679 #endif
1680
1681 /* Fail if we just went off the left edge of the tree. */
1682 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
1683 if (xfs_btree_ptr_is_null(cur, &ptr))
1684 goto out0;
1685
1686 XFS_BTREE_STATS_INC(cur, decrement);
1687
1688 /*
1689 * March up the tree decrementing pointers.
1690 * Stop when we don't go off the left edge of a block.
1691 */
1692 for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1693 if (--cur->bc_ptrs[lev] > 0)
1694 break;
1695 /* Read-ahead the left block for the next loop. */
1696 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
1697 }
1698
1699 /*
1700 * If we went off the root then we are seriously confused.
1701 * or the root of the tree is in an inode.
1702 */
1703 if (lev == cur->bc_nlevels) {
1704 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1705 goto out0;
1706 ASSERT(0);
1707 error = -EFSCORRUPTED;
1708 goto error0;
1709 }
1710 ASSERT(lev < cur->bc_nlevels);
1711
1712 /*
1713 * Now walk back down the tree, fixing up the cursor's buffer
1714 * pointers and key numbers.
1715 */
1716 for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1717 union xfs_btree_ptr *ptrp;
1718
1719 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
1720 --lev;
1721 error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
1722 if (error)
1723 goto error0;
1724 xfs_btree_setbuf(cur, lev, bp);
1725 cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block);
1726 }
1727 out1:
1728 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1729 *stat = 1;
1730 return 0;
1731
1732 out0:
1733 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1734 *stat = 0;
1735 return 0;
1736
1737 error0:
1738 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1739 return error;
1740 }
1741
1742 STATIC int
xfs_btree_lookup_get_block(struct xfs_btree_cur * cur,int level,union xfs_btree_ptr * pp,struct xfs_btree_block ** blkp)1743 xfs_btree_lookup_get_block(
1744 struct xfs_btree_cur *cur, /* btree cursor */
1745 int level, /* level in the btree */
1746 union xfs_btree_ptr *pp, /* ptr to btree block */
1747 struct xfs_btree_block **blkp) /* return btree block */
1748 {
1749 struct xfs_buf *bp; /* buffer pointer for btree block */
1750 int error = 0;
1751
1752 /* special case the root block if in an inode */
1753 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1754 (level == cur->bc_nlevels - 1)) {
1755 *blkp = xfs_btree_get_iroot(cur);
1756 return 0;
1757 }
1758
1759 /*
1760 * If the old buffer at this level for the disk address we are
1761 * looking for re-use it.
1762 *
1763 * Otherwise throw it away and get a new one.
1764 */
1765 bp = cur->bc_bufs[level];
1766 if (bp && XFS_BUF_ADDR(bp) == xfs_btree_ptr_to_daddr(cur, pp)) {
1767 *blkp = XFS_BUF_TO_BLOCK(bp);
1768 return 0;
1769 }
1770
1771 error = xfs_btree_read_buf_block(cur, pp, 0, blkp, &bp);
1772 if (error)
1773 return error;
1774
1775 /* Check the inode owner since the verifiers don't. */
1776 if (xfs_sb_version_hascrc(&cur->bc_mp->m_sb) &&
1777 !(cur->bc_private.b.flags & XFS_BTCUR_BPRV_INVALID_OWNER) &&
1778 (cur->bc_flags & XFS_BTREE_LONG_PTRS) &&
1779 be64_to_cpu((*blkp)->bb_u.l.bb_owner) !=
1780 cur->bc_private.b.ip->i_ino)
1781 goto out_bad;
1782
1783 /* Did we get the level we were looking for? */
1784 if (be16_to_cpu((*blkp)->bb_level) != level)
1785 goto out_bad;
1786
1787 /* Check that internal nodes have at least one record. */
1788 if (level != 0 && be16_to_cpu((*blkp)->bb_numrecs) == 0)
1789 goto out_bad;
1790
1791 xfs_btree_setbuf(cur, level, bp);
1792 return 0;
1793
1794 out_bad:
1795 *blkp = NULL;
1796 xfs_trans_brelse(cur->bc_tp, bp);
1797 return -EFSCORRUPTED;
1798 }
1799
1800 /*
1801 * Get current search key. For level 0 we don't actually have a key
1802 * structure so we make one up from the record. For all other levels
1803 * we just return the right key.
1804 */
1805 STATIC union xfs_btree_key *
xfs_lookup_get_search_key(struct xfs_btree_cur * cur,int level,int keyno,struct xfs_btree_block * block,union xfs_btree_key * kp)1806 xfs_lookup_get_search_key(
1807 struct xfs_btree_cur *cur,
1808 int level,
1809 int keyno,
1810 struct xfs_btree_block *block,
1811 union xfs_btree_key *kp)
1812 {
1813 if (level == 0) {
1814 cur->bc_ops->init_key_from_rec(kp,
1815 xfs_btree_rec_addr(cur, keyno, block));
1816 return kp;
1817 }
1818
1819 return xfs_btree_key_addr(cur, keyno, block);
1820 }
1821
1822 /*
1823 * Lookup the record. The cursor is made to point to it, based on dir.
1824 * stat is set to 0 if can't find any such record, 1 for success.
1825 */
1826 int /* error */
xfs_btree_lookup(struct xfs_btree_cur * cur,xfs_lookup_t dir,int * stat)1827 xfs_btree_lookup(
1828 struct xfs_btree_cur *cur, /* btree cursor */
1829 xfs_lookup_t dir, /* <=, ==, or >= */
1830 int *stat) /* success/failure */
1831 {
1832 struct xfs_btree_block *block; /* current btree block */
1833 __int64_t diff; /* difference for the current key */
1834 int error; /* error return value */
1835 int keyno; /* current key number */
1836 int level; /* level in the btree */
1837 union xfs_btree_ptr *pp; /* ptr to btree block */
1838 union xfs_btree_ptr ptr; /* ptr to btree block */
1839
1840 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1841 XFS_BTREE_TRACE_ARGI(cur, dir);
1842
1843 XFS_BTREE_STATS_INC(cur, lookup);
1844
1845 /* No such thing as a zero-level tree. */
1846 if (cur->bc_nlevels == 0)
1847 return -EFSCORRUPTED;
1848
1849 block = NULL;
1850 keyno = 0;
1851
1852 /* initialise start pointer from cursor */
1853 cur->bc_ops->init_ptr_from_cur(cur, &ptr);
1854 pp = &ptr;
1855
1856 /*
1857 * Iterate over each level in the btree, starting at the root.
1858 * For each level above the leaves, find the key we need, based
1859 * on the lookup record, then follow the corresponding block
1860 * pointer down to the next level.
1861 */
1862 for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
1863 /* Get the block we need to do the lookup on. */
1864 error = xfs_btree_lookup_get_block(cur, level, pp, &block);
1865 if (error)
1866 goto error0;
1867
1868 if (diff == 0) {
1869 /*
1870 * If we already had a key match at a higher level, we
1871 * know we need to use the first entry in this block.
1872 */
1873 keyno = 1;
1874 } else {
1875 /* Otherwise search this block. Do a binary search. */
1876
1877 int high; /* high entry number */
1878 int low; /* low entry number */
1879
1880 /* Set low and high entry numbers, 1-based. */
1881 low = 1;
1882 high = xfs_btree_get_numrecs(block);
1883 if (!high) {
1884 /* Block is empty, must be an empty leaf. */
1885 ASSERT(level == 0 && cur->bc_nlevels == 1);
1886
1887 cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
1888 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1889 *stat = 0;
1890 return 0;
1891 }
1892
1893 /* Binary search the block. */
1894 while (low <= high) {
1895 union xfs_btree_key key;
1896 union xfs_btree_key *kp;
1897
1898 XFS_BTREE_STATS_INC(cur, compare);
1899
1900 /* keyno is average of low and high. */
1901 keyno = (low + high) >> 1;
1902
1903 /* Get current search key */
1904 kp = xfs_lookup_get_search_key(cur, level,
1905 keyno, block, &key);
1906
1907 /*
1908 * Compute difference to get next direction:
1909 * - less than, move right
1910 * - greater than, move left
1911 * - equal, we're done
1912 */
1913 diff = cur->bc_ops->key_diff(cur, kp);
1914 if (diff < 0)
1915 low = keyno + 1;
1916 else if (diff > 0)
1917 high = keyno - 1;
1918 else
1919 break;
1920 }
1921 }
1922
1923 /*
1924 * If there are more levels, set up for the next level
1925 * by getting the block number and filling in the cursor.
1926 */
1927 if (level > 0) {
1928 /*
1929 * If we moved left, need the previous key number,
1930 * unless there isn't one.
1931 */
1932 if (diff > 0 && --keyno < 1)
1933 keyno = 1;
1934 pp = xfs_btree_ptr_addr(cur, keyno, block);
1935
1936 #ifdef DEBUG
1937 error = xfs_btree_check_ptr(cur, pp, 0, level);
1938 if (error)
1939 goto error0;
1940 #endif
1941 cur->bc_ptrs[level] = keyno;
1942 }
1943 }
1944
1945 /* Done with the search. See if we need to adjust the results. */
1946 if (dir != XFS_LOOKUP_LE && diff < 0) {
1947 keyno++;
1948 /*
1949 * If ge search and we went off the end of the block, but it's
1950 * not the last block, we're in the wrong block.
1951 */
1952 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1953 if (dir == XFS_LOOKUP_GE &&
1954 keyno > xfs_btree_get_numrecs(block) &&
1955 !xfs_btree_ptr_is_null(cur, &ptr)) {
1956 int i;
1957
1958 cur->bc_ptrs[0] = keyno;
1959 error = xfs_btree_increment(cur, 0, &i);
1960 if (error)
1961 goto error0;
1962 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
1963 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1964 *stat = 1;
1965 return 0;
1966 }
1967 } else if (dir == XFS_LOOKUP_LE && diff > 0)
1968 keyno--;
1969 cur->bc_ptrs[0] = keyno;
1970
1971 /* Return if we succeeded or not. */
1972 if (keyno == 0 || keyno > xfs_btree_get_numrecs(block))
1973 *stat = 0;
1974 else if (dir != XFS_LOOKUP_EQ || diff == 0)
1975 *stat = 1;
1976 else
1977 *stat = 0;
1978 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1979 return 0;
1980
1981 error0:
1982 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1983 return error;
1984 }
1985
1986 /* Find the high key storage area from a regular key. */
1987 STATIC union xfs_btree_key *
xfs_btree_high_key_from_key(struct xfs_btree_cur * cur,union xfs_btree_key * key)1988 xfs_btree_high_key_from_key(
1989 struct xfs_btree_cur *cur,
1990 union xfs_btree_key *key)
1991 {
1992 ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING);
1993 return (union xfs_btree_key *)((char *)key +
1994 (cur->bc_ops->key_len / 2));
1995 }
1996
1997 /* Determine the low (and high if overlapped) keys of a leaf block */
1998 STATIC void
xfs_btree_get_leaf_keys(struct xfs_btree_cur * cur,struct xfs_btree_block * block,union xfs_btree_key * key)1999 xfs_btree_get_leaf_keys(
2000 struct xfs_btree_cur *cur,
2001 struct xfs_btree_block *block,
2002 union xfs_btree_key *key)
2003 {
2004 union xfs_btree_key max_hkey;
2005 union xfs_btree_key hkey;
2006 union xfs_btree_rec *rec;
2007 union xfs_btree_key *high;
2008 int n;
2009
2010 rec = xfs_btree_rec_addr(cur, 1, block);
2011 cur->bc_ops->init_key_from_rec(key, rec);
2012
2013 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2014
2015 cur->bc_ops->init_high_key_from_rec(&max_hkey, rec);
2016 for (n = 2; n <= xfs_btree_get_numrecs(block); n++) {
2017 rec = xfs_btree_rec_addr(cur, n, block);
2018 cur->bc_ops->init_high_key_from_rec(&hkey, rec);
2019 if (cur->bc_ops->diff_two_keys(cur, &hkey, &max_hkey)
2020 > 0)
2021 max_hkey = hkey;
2022 }
2023
2024 high = xfs_btree_high_key_from_key(cur, key);
2025 memcpy(high, &max_hkey, cur->bc_ops->key_len / 2);
2026 }
2027 }
2028
2029 /* Determine the low (and high if overlapped) keys of a node block */
2030 STATIC void
xfs_btree_get_node_keys(struct xfs_btree_cur * cur,struct xfs_btree_block * block,union xfs_btree_key * key)2031 xfs_btree_get_node_keys(
2032 struct xfs_btree_cur *cur,
2033 struct xfs_btree_block *block,
2034 union xfs_btree_key *key)
2035 {
2036 union xfs_btree_key *hkey;
2037 union xfs_btree_key *max_hkey;
2038 union xfs_btree_key *high;
2039 int n;
2040
2041 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2042 memcpy(key, xfs_btree_key_addr(cur, 1, block),
2043 cur->bc_ops->key_len / 2);
2044
2045 max_hkey = xfs_btree_high_key_addr(cur, 1, block);
2046 for (n = 2; n <= xfs_btree_get_numrecs(block); n++) {
2047 hkey = xfs_btree_high_key_addr(cur, n, block);
2048 if (cur->bc_ops->diff_two_keys(cur, hkey, max_hkey) > 0)
2049 max_hkey = hkey;
2050 }
2051
2052 high = xfs_btree_high_key_from_key(cur, key);
2053 memcpy(high, max_hkey, cur->bc_ops->key_len / 2);
2054 } else {
2055 memcpy(key, xfs_btree_key_addr(cur, 1, block),
2056 cur->bc_ops->key_len);
2057 }
2058 }
2059
2060 /* Derive the keys for any btree block. */
2061 STATIC void
xfs_btree_get_keys(struct xfs_btree_cur * cur,struct xfs_btree_block * block,union xfs_btree_key * key)2062 xfs_btree_get_keys(
2063 struct xfs_btree_cur *cur,
2064 struct xfs_btree_block *block,
2065 union xfs_btree_key *key)
2066 {
2067 if (be16_to_cpu(block->bb_level) == 0)
2068 xfs_btree_get_leaf_keys(cur, block, key);
2069 else
2070 xfs_btree_get_node_keys(cur, block, key);
2071 }
2072
2073 /*
2074 * Decide if we need to update the parent keys of a btree block. For
2075 * a standard btree this is only necessary if we're updating the first
2076 * record/key. For an overlapping btree, we must always update the
2077 * keys because the highest key can be in any of the records or keys
2078 * in the block.
2079 */
2080 static inline bool
xfs_btree_needs_key_update(struct xfs_btree_cur * cur,int ptr)2081 xfs_btree_needs_key_update(
2082 struct xfs_btree_cur *cur,
2083 int ptr)
2084 {
2085 return (cur->bc_flags & XFS_BTREE_OVERLAPPING) || ptr == 1;
2086 }
2087
2088 /*
2089 * Update the low and high parent keys of the given level, progressing
2090 * towards the root. If force_all is false, stop if the keys for a given
2091 * level do not need updating.
2092 */
2093 STATIC int
__xfs_btree_updkeys(struct xfs_btree_cur * cur,int level,struct xfs_btree_block * block,struct xfs_buf * bp0,bool force_all)2094 __xfs_btree_updkeys(
2095 struct xfs_btree_cur *cur,
2096 int level,
2097 struct xfs_btree_block *block,
2098 struct xfs_buf *bp0,
2099 bool force_all)
2100 {
2101 union xfs_btree_key key; /* keys from current level */
2102 union xfs_btree_key *lkey; /* keys from the next level up */
2103 union xfs_btree_key *hkey;
2104 union xfs_btree_key *nlkey; /* keys from the next level up */
2105 union xfs_btree_key *nhkey;
2106 struct xfs_buf *bp;
2107 int ptr;
2108
2109 ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING);
2110
2111 /* Exit if there aren't any parent levels to update. */
2112 if (level + 1 >= cur->bc_nlevels)
2113 return 0;
2114
2115 trace_xfs_btree_updkeys(cur, level, bp0);
2116
2117 lkey = &key;
2118 hkey = xfs_btree_high_key_from_key(cur, lkey);
2119 xfs_btree_get_keys(cur, block, lkey);
2120 for (level++; level < cur->bc_nlevels; level++) {
2121 #ifdef DEBUG
2122 int error;
2123 #endif
2124 block = xfs_btree_get_block(cur, level, &bp);
2125 trace_xfs_btree_updkeys(cur, level, bp);
2126 #ifdef DEBUG
2127 error = xfs_btree_check_block(cur, block, level, bp);
2128 if (error) {
2129 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2130 return error;
2131 }
2132 #endif
2133 ptr = cur->bc_ptrs[level];
2134 nlkey = xfs_btree_key_addr(cur, ptr, block);
2135 nhkey = xfs_btree_high_key_addr(cur, ptr, block);
2136 if (!force_all &&
2137 !(cur->bc_ops->diff_two_keys(cur, nlkey, lkey) != 0 ||
2138 cur->bc_ops->diff_two_keys(cur, nhkey, hkey) != 0))
2139 break;
2140 xfs_btree_copy_keys(cur, nlkey, lkey, 1);
2141 xfs_btree_log_keys(cur, bp, ptr, ptr);
2142 if (level + 1 >= cur->bc_nlevels)
2143 break;
2144 xfs_btree_get_node_keys(cur, block, lkey);
2145 }
2146
2147 return 0;
2148 }
2149
2150 /* Update all the keys from some level in cursor back to the root. */
2151 STATIC int
xfs_btree_updkeys_force(struct xfs_btree_cur * cur,int level)2152 xfs_btree_updkeys_force(
2153 struct xfs_btree_cur *cur,
2154 int level)
2155 {
2156 struct xfs_buf *bp;
2157 struct xfs_btree_block *block;
2158
2159 block = xfs_btree_get_block(cur, level, &bp);
2160 return __xfs_btree_updkeys(cur, level, block, bp, true);
2161 }
2162
2163 /*
2164 * Update the parent keys of the given level, progressing towards the root.
2165 */
2166 STATIC int
xfs_btree_update_keys(struct xfs_btree_cur * cur,int level)2167 xfs_btree_update_keys(
2168 struct xfs_btree_cur *cur,
2169 int level)
2170 {
2171 struct xfs_btree_block *block;
2172 struct xfs_buf *bp;
2173 union xfs_btree_key *kp;
2174 union xfs_btree_key key;
2175 int ptr;
2176
2177 ASSERT(level >= 0);
2178
2179 block = xfs_btree_get_block(cur, level, &bp);
2180 if (cur->bc_flags & XFS_BTREE_OVERLAPPING)
2181 return __xfs_btree_updkeys(cur, level, block, bp, false);
2182
2183 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2184 XFS_BTREE_TRACE_ARGIK(cur, level, keyp);
2185
2186 /*
2187 * Go up the tree from this level toward the root.
2188 * At each level, update the key value to the value input.
2189 * Stop when we reach a level where the cursor isn't pointing
2190 * at the first entry in the block.
2191 */
2192 xfs_btree_get_keys(cur, block, &key);
2193 for (level++, ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
2194 #ifdef DEBUG
2195 int error;
2196 #endif
2197 block = xfs_btree_get_block(cur, level, &bp);
2198 #ifdef DEBUG
2199 error = xfs_btree_check_block(cur, block, level, bp);
2200 if (error) {
2201 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2202 return error;
2203 }
2204 #endif
2205 ptr = cur->bc_ptrs[level];
2206 kp = xfs_btree_key_addr(cur, ptr, block);
2207 xfs_btree_copy_keys(cur, kp, &key, 1);
2208 xfs_btree_log_keys(cur, bp, ptr, ptr);
2209 }
2210
2211 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2212 return 0;
2213 }
2214
2215 /*
2216 * Update the record referred to by cur to the value in the
2217 * given record. This either works (return 0) or gets an
2218 * EFSCORRUPTED error.
2219 */
2220 int
xfs_btree_update(struct xfs_btree_cur * cur,union xfs_btree_rec * rec)2221 xfs_btree_update(
2222 struct xfs_btree_cur *cur,
2223 union xfs_btree_rec *rec)
2224 {
2225 struct xfs_btree_block *block;
2226 struct xfs_buf *bp;
2227 int error;
2228 int ptr;
2229 union xfs_btree_rec *rp;
2230
2231 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2232 XFS_BTREE_TRACE_ARGR(cur, rec);
2233
2234 /* Pick up the current block. */
2235 block = xfs_btree_get_block(cur, 0, &bp);
2236
2237 #ifdef DEBUG
2238 error = xfs_btree_check_block(cur, block, 0, bp);
2239 if (error)
2240 goto error0;
2241 #endif
2242 /* Get the address of the rec to be updated. */
2243 ptr = cur->bc_ptrs[0];
2244 rp = xfs_btree_rec_addr(cur, ptr, block);
2245
2246 /* Fill in the new contents and log them. */
2247 xfs_btree_copy_recs(cur, rp, rec, 1);
2248 xfs_btree_log_recs(cur, bp, ptr, ptr);
2249
2250 /*
2251 * If we are tracking the last record in the tree and
2252 * we are at the far right edge of the tree, update it.
2253 */
2254 if (xfs_btree_is_lastrec(cur, block, 0)) {
2255 cur->bc_ops->update_lastrec(cur, block, rec,
2256 ptr, LASTREC_UPDATE);
2257 }
2258
2259 /* Pass new key value up to our parent. */
2260 if (xfs_btree_needs_key_update(cur, ptr)) {
2261 error = xfs_btree_update_keys(cur, 0);
2262 if (error)
2263 goto error0;
2264 }
2265
2266 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2267 return 0;
2268
2269 error0:
2270 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2271 return error;
2272 }
2273
2274 /*
2275 * Move 1 record left from cur/level if possible.
2276 * Update cur to reflect the new path.
2277 */
2278 STATIC int /* error */
xfs_btree_lshift(struct xfs_btree_cur * cur,int level,int * stat)2279 xfs_btree_lshift(
2280 struct xfs_btree_cur *cur,
2281 int level,
2282 int *stat) /* success/failure */
2283 {
2284 struct xfs_buf *lbp; /* left buffer pointer */
2285 struct xfs_btree_block *left; /* left btree block */
2286 int lrecs; /* left record count */
2287 struct xfs_buf *rbp; /* right buffer pointer */
2288 struct xfs_btree_block *right; /* right btree block */
2289 struct xfs_btree_cur *tcur; /* temporary btree cursor */
2290 int rrecs; /* right record count */
2291 union xfs_btree_ptr lptr; /* left btree pointer */
2292 union xfs_btree_key *rkp = NULL; /* right btree key */
2293 union xfs_btree_ptr *rpp = NULL; /* right address pointer */
2294 union xfs_btree_rec *rrp = NULL; /* right record pointer */
2295 int error; /* error return value */
2296 int i;
2297
2298 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2299 XFS_BTREE_TRACE_ARGI(cur, level);
2300
2301 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2302 level == cur->bc_nlevels - 1)
2303 goto out0;
2304
2305 /* Set up variables for this block as "right". */
2306 right = xfs_btree_get_block(cur, level, &rbp);
2307
2308 #ifdef DEBUG
2309 error = xfs_btree_check_block(cur, right, level, rbp);
2310 if (error)
2311 goto error0;
2312 #endif
2313
2314 /* If we've got no left sibling then we can't shift an entry left. */
2315 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2316 if (xfs_btree_ptr_is_null(cur, &lptr))
2317 goto out0;
2318
2319 /*
2320 * If the cursor entry is the one that would be moved, don't
2321 * do it... it's too complicated.
2322 */
2323 if (cur->bc_ptrs[level] <= 1)
2324 goto out0;
2325
2326 /* Set up the left neighbor as "left". */
2327 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
2328 if (error)
2329 goto error0;
2330
2331 /* If it's full, it can't take another entry. */
2332 lrecs = xfs_btree_get_numrecs(left);
2333 if (lrecs == cur->bc_ops->get_maxrecs(cur, level))
2334 goto out0;
2335
2336 rrecs = xfs_btree_get_numrecs(right);
2337
2338 /*
2339 * We add one entry to the left side and remove one for the right side.
2340 * Account for it here, the changes will be updated on disk and logged
2341 * later.
2342 */
2343 lrecs++;
2344 rrecs--;
2345
2346 XFS_BTREE_STATS_INC(cur, lshift);
2347 XFS_BTREE_STATS_ADD(cur, moves, 1);
2348
2349 /*
2350 * If non-leaf, copy a key and a ptr to the left block.
2351 * Log the changes to the left block.
2352 */
2353 if (level > 0) {
2354 /* It's a non-leaf. Move keys and pointers. */
2355 union xfs_btree_key *lkp; /* left btree key */
2356 union xfs_btree_ptr *lpp; /* left address pointer */
2357
2358 lkp = xfs_btree_key_addr(cur, lrecs, left);
2359 rkp = xfs_btree_key_addr(cur, 1, right);
2360
2361 lpp = xfs_btree_ptr_addr(cur, lrecs, left);
2362 rpp = xfs_btree_ptr_addr(cur, 1, right);
2363 #ifdef DEBUG
2364 error = xfs_btree_check_ptr(cur, rpp, 0, level);
2365 if (error)
2366 goto error0;
2367 #endif
2368 xfs_btree_copy_keys(cur, lkp, rkp, 1);
2369 xfs_btree_copy_ptrs(cur, lpp, rpp, 1);
2370
2371 xfs_btree_log_keys(cur, lbp, lrecs, lrecs);
2372 xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs);
2373
2374 ASSERT(cur->bc_ops->keys_inorder(cur,
2375 xfs_btree_key_addr(cur, lrecs - 1, left), lkp));
2376 } else {
2377 /* It's a leaf. Move records. */
2378 union xfs_btree_rec *lrp; /* left record pointer */
2379
2380 lrp = xfs_btree_rec_addr(cur, lrecs, left);
2381 rrp = xfs_btree_rec_addr(cur, 1, right);
2382
2383 xfs_btree_copy_recs(cur, lrp, rrp, 1);
2384 xfs_btree_log_recs(cur, lbp, lrecs, lrecs);
2385
2386 ASSERT(cur->bc_ops->recs_inorder(cur,
2387 xfs_btree_rec_addr(cur, lrecs - 1, left), lrp));
2388 }
2389
2390 xfs_btree_set_numrecs(left, lrecs);
2391 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
2392
2393 xfs_btree_set_numrecs(right, rrecs);
2394 xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
2395
2396 /*
2397 * Slide the contents of right down one entry.
2398 */
2399 XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1);
2400 if (level > 0) {
2401 /* It's a nonleaf. operate on keys and ptrs */
2402 #ifdef DEBUG
2403 int i; /* loop index */
2404
2405 for (i = 0; i < rrecs; i++) {
2406 error = xfs_btree_check_ptr(cur, rpp, i + 1, level);
2407 if (error)
2408 goto error0;
2409 }
2410 #endif
2411 xfs_btree_shift_keys(cur,
2412 xfs_btree_key_addr(cur, 2, right),
2413 -1, rrecs);
2414 xfs_btree_shift_ptrs(cur,
2415 xfs_btree_ptr_addr(cur, 2, right),
2416 -1, rrecs);
2417
2418 xfs_btree_log_keys(cur, rbp, 1, rrecs);
2419 xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
2420 } else {
2421 /* It's a leaf. operate on records */
2422 xfs_btree_shift_recs(cur,
2423 xfs_btree_rec_addr(cur, 2, right),
2424 -1, rrecs);
2425 xfs_btree_log_recs(cur, rbp, 1, rrecs);
2426 }
2427
2428 /*
2429 * Using a temporary cursor, update the parent key values of the
2430 * block on the left.
2431 */
2432 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2433 error = xfs_btree_dup_cursor(cur, &tcur);
2434 if (error)
2435 goto error0;
2436 i = xfs_btree_firstrec(tcur, level);
2437 XFS_WANT_CORRUPTED_GOTO(tcur->bc_mp, i == 1, error0);
2438
2439 error = xfs_btree_decrement(tcur, level, &i);
2440 if (error)
2441 goto error1;
2442
2443 /* Update the parent high keys of the left block, if needed. */
2444 error = xfs_btree_update_keys(tcur, level);
2445 if (error)
2446 goto error1;
2447
2448 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
2449 }
2450
2451 /* Update the parent keys of the right block. */
2452 error = xfs_btree_update_keys(cur, level);
2453 if (error)
2454 goto error0;
2455
2456 /* Slide the cursor value left one. */
2457 cur->bc_ptrs[level]--;
2458
2459 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2460 *stat = 1;
2461 return 0;
2462
2463 out0:
2464 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2465 *stat = 0;
2466 return 0;
2467
2468 error0:
2469 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2470 return error;
2471
2472 error1:
2473 XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR);
2474 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
2475 return error;
2476 }
2477
2478 /*
2479 * Move 1 record right from cur/level if possible.
2480 * Update cur to reflect the new path.
2481 */
2482 STATIC int /* error */
xfs_btree_rshift(struct xfs_btree_cur * cur,int level,int * stat)2483 xfs_btree_rshift(
2484 struct xfs_btree_cur *cur,
2485 int level,
2486 int *stat) /* success/failure */
2487 {
2488 struct xfs_buf *lbp; /* left buffer pointer */
2489 struct xfs_btree_block *left; /* left btree block */
2490 struct xfs_buf *rbp; /* right buffer pointer */
2491 struct xfs_btree_block *right; /* right btree block */
2492 struct xfs_btree_cur *tcur; /* temporary btree cursor */
2493 union xfs_btree_ptr rptr; /* right block pointer */
2494 union xfs_btree_key *rkp; /* right btree key */
2495 int rrecs; /* right record count */
2496 int lrecs; /* left record count */
2497 int error; /* error return value */
2498 int i; /* loop counter */
2499
2500 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2501 XFS_BTREE_TRACE_ARGI(cur, level);
2502
2503 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2504 (level == cur->bc_nlevels - 1))
2505 goto out0;
2506
2507 /* Set up variables for this block as "left". */
2508 left = xfs_btree_get_block(cur, level, &lbp);
2509
2510 #ifdef DEBUG
2511 error = xfs_btree_check_block(cur, left, level, lbp);
2512 if (error)
2513 goto error0;
2514 #endif
2515
2516 /* If we've got no right sibling then we can't shift an entry right. */
2517 xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2518 if (xfs_btree_ptr_is_null(cur, &rptr))
2519 goto out0;
2520
2521 /*
2522 * If the cursor entry is the one that would be moved, don't
2523 * do it... it's too complicated.
2524 */
2525 lrecs = xfs_btree_get_numrecs(left);
2526 if (cur->bc_ptrs[level] >= lrecs)
2527 goto out0;
2528
2529 /* Set up the right neighbor as "right". */
2530 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
2531 if (error)
2532 goto error0;
2533
2534 /* If it's full, it can't take another entry. */
2535 rrecs = xfs_btree_get_numrecs(right);
2536 if (rrecs == cur->bc_ops->get_maxrecs(cur, level))
2537 goto out0;
2538
2539 XFS_BTREE_STATS_INC(cur, rshift);
2540 XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2541
2542 /*
2543 * Make a hole at the start of the right neighbor block, then
2544 * copy the last left block entry to the hole.
2545 */
2546 if (level > 0) {
2547 /* It's a nonleaf. make a hole in the keys and ptrs */
2548 union xfs_btree_key *lkp;
2549 union xfs_btree_ptr *lpp;
2550 union xfs_btree_ptr *rpp;
2551
2552 lkp = xfs_btree_key_addr(cur, lrecs, left);
2553 lpp = xfs_btree_ptr_addr(cur, lrecs, left);
2554 rkp = xfs_btree_key_addr(cur, 1, right);
2555 rpp = xfs_btree_ptr_addr(cur, 1, right);
2556
2557 #ifdef DEBUG
2558 for (i = rrecs - 1; i >= 0; i--) {
2559 error = xfs_btree_check_ptr(cur, rpp, i, level);
2560 if (error)
2561 goto error0;
2562 }
2563 #endif
2564
2565 xfs_btree_shift_keys(cur, rkp, 1, rrecs);
2566 xfs_btree_shift_ptrs(cur, rpp, 1, rrecs);
2567
2568 #ifdef DEBUG
2569 error = xfs_btree_check_ptr(cur, lpp, 0, level);
2570 if (error)
2571 goto error0;
2572 #endif
2573
2574 /* Now put the new data in, and log it. */
2575 xfs_btree_copy_keys(cur, rkp, lkp, 1);
2576 xfs_btree_copy_ptrs(cur, rpp, lpp, 1);
2577
2578 xfs_btree_log_keys(cur, rbp, 1, rrecs + 1);
2579 xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1);
2580
2581 ASSERT(cur->bc_ops->keys_inorder(cur, rkp,
2582 xfs_btree_key_addr(cur, 2, right)));
2583 } else {
2584 /* It's a leaf. make a hole in the records */
2585 union xfs_btree_rec *lrp;
2586 union xfs_btree_rec *rrp;
2587
2588 lrp = xfs_btree_rec_addr(cur, lrecs, left);
2589 rrp = xfs_btree_rec_addr(cur, 1, right);
2590
2591 xfs_btree_shift_recs(cur, rrp, 1, rrecs);
2592
2593 /* Now put the new data in, and log it. */
2594 xfs_btree_copy_recs(cur, rrp, lrp, 1);
2595 xfs_btree_log_recs(cur, rbp, 1, rrecs + 1);
2596 }
2597
2598 /*
2599 * Decrement and log left's numrecs, bump and log right's numrecs.
2600 */
2601 xfs_btree_set_numrecs(left, --lrecs);
2602 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
2603
2604 xfs_btree_set_numrecs(right, ++rrecs);
2605 xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
2606
2607 /*
2608 * Using a temporary cursor, update the parent key values of the
2609 * block on the right.
2610 */
2611 error = xfs_btree_dup_cursor(cur, &tcur);
2612 if (error)
2613 goto error0;
2614 i = xfs_btree_lastrec(tcur, level);
2615 XFS_WANT_CORRUPTED_GOTO(tcur->bc_mp, i == 1, error0);
2616
2617 error = xfs_btree_increment(tcur, level, &i);
2618 if (error)
2619 goto error1;
2620
2621 /* Update the parent high keys of the left block, if needed. */
2622 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2623 error = xfs_btree_update_keys(cur, level);
2624 if (error)
2625 goto error1;
2626 }
2627
2628 /* Update the parent keys of the right block. */
2629 error = xfs_btree_update_keys(tcur, level);
2630 if (error)
2631 goto error1;
2632
2633 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
2634
2635 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2636 *stat = 1;
2637 return 0;
2638
2639 out0:
2640 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2641 *stat = 0;
2642 return 0;
2643
2644 error0:
2645 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2646 return error;
2647
2648 error1:
2649 XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR);
2650 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
2651 return error;
2652 }
2653
2654 /*
2655 * Split cur/level block in half.
2656 * Return new block number and the key to its first
2657 * record (to be inserted into parent).
2658 */
2659 STATIC int /* error */
__xfs_btree_split(struct xfs_btree_cur * cur,int level,union xfs_btree_ptr * ptrp,union xfs_btree_key * key,struct xfs_btree_cur ** curp,int * stat)2660 __xfs_btree_split(
2661 struct xfs_btree_cur *cur,
2662 int level,
2663 union xfs_btree_ptr *ptrp,
2664 union xfs_btree_key *key,
2665 struct xfs_btree_cur **curp,
2666 int *stat) /* success/failure */
2667 {
2668 union xfs_btree_ptr lptr; /* left sibling block ptr */
2669 struct xfs_buf *lbp; /* left buffer pointer */
2670 struct xfs_btree_block *left; /* left btree block */
2671 union xfs_btree_ptr rptr; /* right sibling block ptr */
2672 struct xfs_buf *rbp; /* right buffer pointer */
2673 struct xfs_btree_block *right; /* right btree block */
2674 union xfs_btree_ptr rrptr; /* right-right sibling ptr */
2675 struct xfs_buf *rrbp; /* right-right buffer pointer */
2676 struct xfs_btree_block *rrblock; /* right-right btree block */
2677 int lrecs;
2678 int rrecs;
2679 int src_index;
2680 int error; /* error return value */
2681 #ifdef DEBUG
2682 int i;
2683 #endif
2684
2685 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2686 XFS_BTREE_TRACE_ARGIPK(cur, level, *ptrp, key);
2687
2688 XFS_BTREE_STATS_INC(cur, split);
2689
2690 /* Set up left block (current one). */
2691 left = xfs_btree_get_block(cur, level, &lbp);
2692
2693 #ifdef DEBUG
2694 error = xfs_btree_check_block(cur, left, level, lbp);
2695 if (error)
2696 goto error0;
2697 #endif
2698
2699 xfs_btree_buf_to_ptr(cur, lbp, &lptr);
2700
2701 /* Allocate the new block. If we can't do it, we're toast. Give up. */
2702 error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, stat);
2703 if (error)
2704 goto error0;
2705 if (*stat == 0)
2706 goto out0;
2707 XFS_BTREE_STATS_INC(cur, alloc);
2708
2709 /* Set up the new block as "right". */
2710 error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp);
2711 if (error)
2712 goto error0;
2713
2714 /* Fill in the btree header for the new right block. */
2715 xfs_btree_init_block_cur(cur, rbp, xfs_btree_get_level(left), 0);
2716
2717 /*
2718 * Split the entries between the old and the new block evenly.
2719 * Make sure that if there's an odd number of entries now, that
2720 * each new block will have the same number of entries.
2721 */
2722 lrecs = xfs_btree_get_numrecs(left);
2723 rrecs = lrecs / 2;
2724 if ((lrecs & 1) && cur->bc_ptrs[level] <= rrecs + 1)
2725 rrecs++;
2726 src_index = (lrecs - rrecs + 1);
2727
2728 XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2729
2730 /* Adjust numrecs for the later get_*_keys() calls. */
2731 lrecs -= rrecs;
2732 xfs_btree_set_numrecs(left, lrecs);
2733 xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs);
2734
2735 /*
2736 * Copy btree block entries from the left block over to the
2737 * new block, the right. Update the right block and log the
2738 * changes.
2739 */
2740 if (level > 0) {
2741 /* It's a non-leaf. Move keys and pointers. */
2742 union xfs_btree_key *lkp; /* left btree key */
2743 union xfs_btree_ptr *lpp; /* left address pointer */
2744 union xfs_btree_key *rkp; /* right btree key */
2745 union xfs_btree_ptr *rpp; /* right address pointer */
2746
2747 lkp = xfs_btree_key_addr(cur, src_index, left);
2748 lpp = xfs_btree_ptr_addr(cur, src_index, left);
2749 rkp = xfs_btree_key_addr(cur, 1, right);
2750 rpp = xfs_btree_ptr_addr(cur, 1, right);
2751
2752 #ifdef DEBUG
2753 for (i = src_index; i < rrecs; i++) {
2754 error = xfs_btree_check_ptr(cur, lpp, i, level);
2755 if (error)
2756 goto error0;
2757 }
2758 #endif
2759
2760 /* Copy the keys & pointers to the new block. */
2761 xfs_btree_copy_keys(cur, rkp, lkp, rrecs);
2762 xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs);
2763
2764 xfs_btree_log_keys(cur, rbp, 1, rrecs);
2765 xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
2766
2767 /* Stash the keys of the new block for later insertion. */
2768 xfs_btree_get_node_keys(cur, right, key);
2769 } else {
2770 /* It's a leaf. Move records. */
2771 union xfs_btree_rec *lrp; /* left record pointer */
2772 union xfs_btree_rec *rrp; /* right record pointer */
2773
2774 lrp = xfs_btree_rec_addr(cur, src_index, left);
2775 rrp = xfs_btree_rec_addr(cur, 1, right);
2776
2777 /* Copy records to the new block. */
2778 xfs_btree_copy_recs(cur, rrp, lrp, rrecs);
2779 xfs_btree_log_recs(cur, rbp, 1, rrecs);
2780
2781 /* Stash the keys of the new block for later insertion. */
2782 xfs_btree_get_leaf_keys(cur, right, key);
2783 }
2784
2785 /*
2786 * Find the left block number by looking in the buffer.
2787 * Adjust sibling pointers.
2788 */
2789 xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB);
2790 xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB);
2791 xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2792 xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2793
2794 xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS);
2795 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
2796
2797 /*
2798 * If there's a block to the new block's right, make that block
2799 * point back to right instead of to left.
2800 */
2801 if (!xfs_btree_ptr_is_null(cur, &rrptr)) {
2802 error = xfs_btree_read_buf_block(cur, &rrptr,
2803 0, &rrblock, &rrbp);
2804 if (error)
2805 goto error0;
2806 xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB);
2807 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
2808 }
2809
2810 /* Update the parent high keys of the left block, if needed. */
2811 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2812 error = xfs_btree_update_keys(cur, level);
2813 if (error)
2814 goto error0;
2815 }
2816
2817 /*
2818 * If the cursor is really in the right block, move it there.
2819 * If it's just pointing past the last entry in left, then we'll
2820 * insert there, so don't change anything in that case.
2821 */
2822 if (cur->bc_ptrs[level] > lrecs + 1) {
2823 xfs_btree_setbuf(cur, level, rbp);
2824 cur->bc_ptrs[level] -= lrecs;
2825 }
2826 /*
2827 * If there are more levels, we'll need another cursor which refers
2828 * the right block, no matter where this cursor was.
2829 */
2830 if (level + 1 < cur->bc_nlevels) {
2831 error = xfs_btree_dup_cursor(cur, curp);
2832 if (error)
2833 goto error0;
2834 (*curp)->bc_ptrs[level + 1]++;
2835 }
2836 *ptrp = rptr;
2837 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2838 *stat = 1;
2839 return 0;
2840 out0:
2841 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2842 *stat = 0;
2843 return 0;
2844
2845 error0:
2846 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2847 return error;
2848 }
2849
2850 struct xfs_btree_split_args {
2851 struct xfs_btree_cur *cur;
2852 int level;
2853 union xfs_btree_ptr *ptrp;
2854 union xfs_btree_key *key;
2855 struct xfs_btree_cur **curp;
2856 int *stat; /* success/failure */
2857 int result;
2858 bool kswapd; /* allocation in kswapd context */
2859 struct completion *done;
2860 struct work_struct work;
2861 };
2862
2863 /*
2864 * Stack switching interfaces for allocation
2865 */
2866 static void
xfs_btree_split_worker(struct work_struct * work)2867 xfs_btree_split_worker(
2868 struct work_struct *work)
2869 {
2870 struct xfs_btree_split_args *args = container_of(work,
2871 struct xfs_btree_split_args, work);
2872 unsigned long pflags;
2873 unsigned long new_pflags = PF_FSTRANS;
2874
2875 /*
2876 * we are in a transaction context here, but may also be doing work
2877 * in kswapd context, and hence we may need to inherit that state
2878 * temporarily to ensure that we don't block waiting for memory reclaim
2879 * in any way.
2880 */
2881 if (args->kswapd)
2882 new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
2883
2884 current_set_flags_nested(&pflags, new_pflags);
2885
2886 args->result = __xfs_btree_split(args->cur, args->level, args->ptrp,
2887 args->key, args->curp, args->stat);
2888 complete(args->done);
2889
2890 current_restore_flags_nested(&pflags, new_pflags);
2891 }
2892
2893 /*
2894 * BMBT split requests often come in with little stack to work on. Push
2895 * them off to a worker thread so there is lots of stack to use. For the other
2896 * btree types, just call directly to avoid the context switch overhead here.
2897 */
2898 STATIC int /* error */
xfs_btree_split(struct xfs_btree_cur * cur,int level,union xfs_btree_ptr * ptrp,union xfs_btree_key * key,struct xfs_btree_cur ** curp,int * stat)2899 xfs_btree_split(
2900 struct xfs_btree_cur *cur,
2901 int level,
2902 union xfs_btree_ptr *ptrp,
2903 union xfs_btree_key *key,
2904 struct xfs_btree_cur **curp,
2905 int *stat) /* success/failure */
2906 {
2907 struct xfs_btree_split_args args;
2908 DECLARE_COMPLETION_ONSTACK(done);
2909
2910 if (cur->bc_btnum != XFS_BTNUM_BMAP)
2911 return __xfs_btree_split(cur, level, ptrp, key, curp, stat);
2912
2913 args.cur = cur;
2914 args.level = level;
2915 args.ptrp = ptrp;
2916 args.key = key;
2917 args.curp = curp;
2918 args.stat = stat;
2919 args.done = &done;
2920 args.kswapd = current_is_kswapd();
2921 INIT_WORK_ONSTACK(&args.work, xfs_btree_split_worker);
2922 queue_work(xfs_alloc_wq, &args.work);
2923 wait_for_completion(&done);
2924 destroy_work_on_stack(&args.work);
2925 return args.result;
2926 }
2927
2928
2929 /*
2930 * Copy the old inode root contents into a real block and make the
2931 * broot point to it.
2932 */
2933 int /* error */
xfs_btree_new_iroot(struct xfs_btree_cur * cur,int * logflags,int * stat)2934 xfs_btree_new_iroot(
2935 struct xfs_btree_cur *cur, /* btree cursor */
2936 int *logflags, /* logging flags for inode */
2937 int *stat) /* return status - 0 fail */
2938 {
2939 struct xfs_buf *cbp; /* buffer for cblock */
2940 struct xfs_btree_block *block; /* btree block */
2941 struct xfs_btree_block *cblock; /* child btree block */
2942 union xfs_btree_key *ckp; /* child key pointer */
2943 union xfs_btree_ptr *cpp; /* child ptr pointer */
2944 union xfs_btree_key *kp; /* pointer to btree key */
2945 union xfs_btree_ptr *pp; /* pointer to block addr */
2946 union xfs_btree_ptr nptr; /* new block addr */
2947 int level; /* btree level */
2948 int error; /* error return code */
2949 #ifdef DEBUG
2950 int i; /* loop counter */
2951 #endif
2952
2953 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2954 XFS_BTREE_STATS_INC(cur, newroot);
2955
2956 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
2957
2958 level = cur->bc_nlevels - 1;
2959
2960 block = xfs_btree_get_iroot(cur);
2961 pp = xfs_btree_ptr_addr(cur, 1, block);
2962
2963 /* Allocate the new block. If we can't do it, we're toast. Give up. */
2964 error = cur->bc_ops->alloc_block(cur, pp, &nptr, stat);
2965 if (error)
2966 goto error0;
2967 if (*stat == 0) {
2968 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2969 return 0;
2970 }
2971 XFS_BTREE_STATS_INC(cur, alloc);
2972
2973 /* Copy the root into a real block. */
2974 error = xfs_btree_get_buf_block(cur, &nptr, 0, &cblock, &cbp);
2975 if (error)
2976 goto error0;
2977
2978 /*
2979 * we can't just memcpy() the root in for CRC enabled btree blocks.
2980 * In that case have to also ensure the blkno remains correct
2981 */
2982 memcpy(cblock, block, xfs_btree_block_len(cur));
2983 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
2984 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
2985 cblock->bb_u.l.bb_blkno = cpu_to_be64(cbp->b_bn);
2986 else
2987 cblock->bb_u.s.bb_blkno = cpu_to_be64(cbp->b_bn);
2988 }
2989
2990 be16_add_cpu(&block->bb_level, 1);
2991 xfs_btree_set_numrecs(block, 1);
2992 cur->bc_nlevels++;
2993 cur->bc_ptrs[level + 1] = 1;
2994
2995 kp = xfs_btree_key_addr(cur, 1, block);
2996 ckp = xfs_btree_key_addr(cur, 1, cblock);
2997 xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock));
2998
2999 cpp = xfs_btree_ptr_addr(cur, 1, cblock);
3000 #ifdef DEBUG
3001 for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
3002 error = xfs_btree_check_ptr(cur, pp, i, level);
3003 if (error)
3004 goto error0;
3005 }
3006 #endif
3007 xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock));
3008
3009 #ifdef DEBUG
3010 error = xfs_btree_check_ptr(cur, &nptr, 0, level);
3011 if (error)
3012 goto error0;
3013 #endif
3014 xfs_btree_copy_ptrs(cur, pp, &nptr, 1);
3015
3016 xfs_iroot_realloc(cur->bc_private.b.ip,
3017 1 - xfs_btree_get_numrecs(cblock),
3018 cur->bc_private.b.whichfork);
3019
3020 xfs_btree_setbuf(cur, level, cbp);
3021
3022 /*
3023 * Do all this logging at the end so that
3024 * the root is at the right level.
3025 */
3026 xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS);
3027 xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
3028 xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
3029
3030 *logflags |=
3031 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork);
3032 *stat = 1;
3033 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3034 return 0;
3035 error0:
3036 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3037 return error;
3038 }
3039
3040 /*
3041 * Allocate a new root block, fill it in.
3042 */
3043 STATIC int /* error */
xfs_btree_new_root(struct xfs_btree_cur * cur,int * stat)3044 xfs_btree_new_root(
3045 struct xfs_btree_cur *cur, /* btree cursor */
3046 int *stat) /* success/failure */
3047 {
3048 struct xfs_btree_block *block; /* one half of the old root block */
3049 struct xfs_buf *bp; /* buffer containing block */
3050 int error; /* error return value */
3051 struct xfs_buf *lbp; /* left buffer pointer */
3052 struct xfs_btree_block *left; /* left btree block */
3053 struct xfs_buf *nbp; /* new (root) buffer */
3054 struct xfs_btree_block *new; /* new (root) btree block */
3055 int nptr; /* new value for key index, 1 or 2 */
3056 struct xfs_buf *rbp; /* right buffer pointer */
3057 struct xfs_btree_block *right; /* right btree block */
3058 union xfs_btree_ptr rptr;
3059 union xfs_btree_ptr lptr;
3060
3061 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3062 XFS_BTREE_STATS_INC(cur, newroot);
3063
3064 /* initialise our start point from the cursor */
3065 cur->bc_ops->init_ptr_from_cur(cur, &rptr);
3066
3067 /* Allocate the new block. If we can't do it, we're toast. Give up. */
3068 error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, stat);
3069 if (error)
3070 goto error0;
3071 if (*stat == 0)
3072 goto out0;
3073 XFS_BTREE_STATS_INC(cur, alloc);
3074
3075 /* Set up the new block. */
3076 error = xfs_btree_get_buf_block(cur, &lptr, 0, &new, &nbp);
3077 if (error)
3078 goto error0;
3079
3080 /* Set the root in the holding structure increasing the level by 1. */
3081 cur->bc_ops->set_root(cur, &lptr, 1);
3082
3083 /*
3084 * At the previous root level there are now two blocks: the old root,
3085 * and the new block generated when it was split. We don't know which
3086 * one the cursor is pointing at, so we set up variables "left" and
3087 * "right" for each case.
3088 */
3089 block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp);
3090
3091 #ifdef DEBUG
3092 error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp);
3093 if (error)
3094 goto error0;
3095 #endif
3096
3097 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
3098 if (!xfs_btree_ptr_is_null(cur, &rptr)) {
3099 /* Our block is left, pick up the right block. */
3100 lbp = bp;
3101 xfs_btree_buf_to_ptr(cur, lbp, &lptr);
3102 left = block;
3103 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
3104 if (error)
3105 goto error0;
3106 bp = rbp;
3107 nptr = 1;
3108 } else {
3109 /* Our block is right, pick up the left block. */
3110 rbp = bp;
3111 xfs_btree_buf_to_ptr(cur, rbp, &rptr);
3112 right = block;
3113 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
3114 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
3115 if (error)
3116 goto error0;
3117 bp = lbp;
3118 nptr = 2;
3119 }
3120
3121 /* Fill in the new block's btree header and log it. */
3122 xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2);
3123 xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);
3124 ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) &&
3125 !xfs_btree_ptr_is_null(cur, &rptr));
3126
3127 /* Fill in the key data in the new root. */
3128 if (xfs_btree_get_level(left) > 0) {
3129 /*
3130 * Get the keys for the left block's keys and put them directly
3131 * in the parent block. Do the same for the right block.
3132 */
3133 xfs_btree_get_node_keys(cur, left,
3134 xfs_btree_key_addr(cur, 1, new));
3135 xfs_btree_get_node_keys(cur, right,
3136 xfs_btree_key_addr(cur, 2, new));
3137 } else {
3138 /*
3139 * Get the keys for the left block's records and put them
3140 * directly in the parent block. Do the same for the right
3141 * block.
3142 */
3143 xfs_btree_get_leaf_keys(cur, left,
3144 xfs_btree_key_addr(cur, 1, new));
3145 xfs_btree_get_leaf_keys(cur, right,
3146 xfs_btree_key_addr(cur, 2, new));
3147 }
3148 xfs_btree_log_keys(cur, nbp, 1, 2);
3149
3150 /* Fill in the pointer data in the new root. */
3151 xfs_btree_copy_ptrs(cur,
3152 xfs_btree_ptr_addr(cur, 1, new), &lptr, 1);
3153 xfs_btree_copy_ptrs(cur,
3154 xfs_btree_ptr_addr(cur, 2, new), &rptr, 1);
3155 xfs_btree_log_ptrs(cur, nbp, 1, 2);
3156
3157 /* Fix up the cursor. */
3158 xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
3159 cur->bc_ptrs[cur->bc_nlevels] = nptr;
3160 cur->bc_nlevels++;
3161 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3162 *stat = 1;
3163 return 0;
3164 error0:
3165 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3166 return error;
3167 out0:
3168 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3169 *stat = 0;
3170 return 0;
3171 }
3172
3173 STATIC int
xfs_btree_make_block_unfull(struct xfs_btree_cur * cur,int level,int numrecs,int * oindex,int * index,union xfs_btree_ptr * nptr,struct xfs_btree_cur ** ncur,union xfs_btree_key * key,int * stat)3174 xfs_btree_make_block_unfull(
3175 struct xfs_btree_cur *cur, /* btree cursor */
3176 int level, /* btree level */
3177 int numrecs,/* # of recs in block */
3178 int *oindex,/* old tree index */
3179 int *index, /* new tree index */
3180 union xfs_btree_ptr *nptr, /* new btree ptr */
3181 struct xfs_btree_cur **ncur, /* new btree cursor */
3182 union xfs_btree_key *key, /* key of new block */
3183 int *stat)
3184 {
3185 int error = 0;
3186
3187 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
3188 level == cur->bc_nlevels - 1) {
3189 struct xfs_inode *ip = cur->bc_private.b.ip;
3190
3191 if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {
3192 /* A root block that can be made bigger. */
3193 xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork);
3194 *stat = 1;
3195 } else {
3196 /* A root block that needs replacing */
3197 int logflags = 0;
3198
3199 error = xfs_btree_new_iroot(cur, &logflags, stat);
3200 if (error || *stat == 0)
3201 return error;
3202
3203 xfs_trans_log_inode(cur->bc_tp, ip, logflags);
3204 }
3205
3206 return 0;
3207 }
3208
3209 /* First, try shifting an entry to the right neighbor. */
3210 error = xfs_btree_rshift(cur, level, stat);
3211 if (error || *stat)
3212 return error;
3213
3214 /* Next, try shifting an entry to the left neighbor. */
3215 error = xfs_btree_lshift(cur, level, stat);
3216 if (error)
3217 return error;
3218
3219 if (*stat) {
3220 *oindex = *index = cur->bc_ptrs[level];
3221 return 0;
3222 }
3223
3224 /*
3225 * Next, try splitting the current block in half.
3226 *
3227 * If this works we have to re-set our variables because we
3228 * could be in a different block now.
3229 */
3230 error = xfs_btree_split(cur, level, nptr, key, ncur, stat);
3231 if (error || *stat == 0)
3232 return error;
3233
3234
3235 *index = cur->bc_ptrs[level];
3236 return 0;
3237 }
3238
3239 /*
3240 * Insert one record/level. Return information to the caller
3241 * allowing the next level up to proceed if necessary.
3242 */
3243 STATIC int
xfs_btree_insrec(struct xfs_btree_cur * cur,int level,union xfs_btree_ptr * ptrp,union xfs_btree_rec * rec,union xfs_btree_key * key,struct xfs_btree_cur ** curp,int * stat)3244 xfs_btree_insrec(
3245 struct xfs_btree_cur *cur, /* btree cursor */
3246 int level, /* level to insert record at */
3247 union xfs_btree_ptr *ptrp, /* i/o: block number inserted */
3248 union xfs_btree_rec *rec, /* record to insert */
3249 union xfs_btree_key *key, /* i/o: block key for ptrp */
3250 struct xfs_btree_cur **curp, /* output: new cursor replacing cur */
3251 int *stat) /* success/failure */
3252 {
3253 struct xfs_btree_block *block; /* btree block */
3254 struct xfs_buf *bp; /* buffer for block */
3255 union xfs_btree_ptr nptr; /* new block ptr */
3256 struct xfs_btree_cur *ncur; /* new btree cursor */
3257 union xfs_btree_key nkey; /* new block key */
3258 union xfs_btree_key *lkey;
3259 int optr; /* old key/record index */
3260 int ptr; /* key/record index */
3261 int numrecs;/* number of records */
3262 int error; /* error return value */
3263 #ifdef DEBUG
3264 int i;
3265 #endif
3266 xfs_daddr_t old_bn;
3267
3268 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3269 XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, &rec);
3270
3271 ncur = NULL;
3272 lkey = &nkey;
3273
3274 /*
3275 * If we have an external root pointer, and we've made it to the
3276 * root level, allocate a new root block and we're done.
3277 */
3278 if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
3279 (level >= cur->bc_nlevels)) {
3280 error = xfs_btree_new_root(cur, stat);
3281 xfs_btree_set_ptr_null(cur, ptrp);
3282
3283 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3284 return error;
3285 }
3286
3287 /* If we're off the left edge, return failure. */
3288 ptr = cur->bc_ptrs[level];
3289 if (ptr == 0) {
3290 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3291 *stat = 0;
3292 return 0;
3293 }
3294
3295 optr = ptr;
3296
3297 XFS_BTREE_STATS_INC(cur, insrec);
3298
3299 /* Get pointers to the btree buffer and block. */
3300 block = xfs_btree_get_block(cur, level, &bp);
3301 old_bn = bp ? bp->b_bn : XFS_BUF_DADDR_NULL;
3302 numrecs = xfs_btree_get_numrecs(block);
3303
3304 #ifdef DEBUG
3305 error = xfs_btree_check_block(cur, block, level, bp);
3306 if (error)
3307 goto error0;
3308
3309 /* Check that the new entry is being inserted in the right place. */
3310 if (ptr <= numrecs) {
3311 if (level == 0) {
3312 ASSERT(cur->bc_ops->recs_inorder(cur, rec,
3313 xfs_btree_rec_addr(cur, ptr, block)));
3314 } else {
3315 ASSERT(cur->bc_ops->keys_inorder(cur, key,
3316 xfs_btree_key_addr(cur, ptr, block)));
3317 }
3318 }
3319 #endif
3320
3321 /*
3322 * If the block is full, we can't insert the new entry until we
3323 * make the block un-full.
3324 */
3325 xfs_btree_set_ptr_null(cur, &nptr);
3326 if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) {
3327 error = xfs_btree_make_block_unfull(cur, level, numrecs,
3328 &optr, &ptr, &nptr, &ncur, lkey, stat);
3329 if (error || *stat == 0)
3330 goto error0;
3331 }
3332
3333 /*
3334 * The current block may have changed if the block was
3335 * previously full and we have just made space in it.
3336 */
3337 block = xfs_btree_get_block(cur, level, &bp);
3338 numrecs = xfs_btree_get_numrecs(block);
3339
3340 #ifdef DEBUG
3341 error = xfs_btree_check_block(cur, block, level, bp);
3342 if (error)
3343 return error;
3344 #endif
3345
3346 /*
3347 * At this point we know there's room for our new entry in the block
3348 * we're pointing at.
3349 */
3350 XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1);
3351
3352 if (level > 0) {
3353 /* It's a nonleaf. make a hole in the keys and ptrs */
3354 union xfs_btree_key *kp;
3355 union xfs_btree_ptr *pp;
3356
3357 kp = xfs_btree_key_addr(cur, ptr, block);
3358 pp = xfs_btree_ptr_addr(cur, ptr, block);
3359
3360 #ifdef DEBUG
3361 for (i = numrecs - ptr; i >= 0; i--) {
3362 error = xfs_btree_check_ptr(cur, pp, i, level);
3363 if (error)
3364 return error;
3365 }
3366 #endif
3367
3368 xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1);
3369 xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1);
3370
3371 #ifdef DEBUG
3372 error = xfs_btree_check_ptr(cur, ptrp, 0, level);
3373 if (error)
3374 goto error0;
3375 #endif
3376
3377 /* Now put the new data in, bump numrecs and log it. */
3378 xfs_btree_copy_keys(cur, kp, key, 1);
3379 xfs_btree_copy_ptrs(cur, pp, ptrp, 1);
3380 numrecs++;
3381 xfs_btree_set_numrecs(block, numrecs);
3382 xfs_btree_log_ptrs(cur, bp, ptr, numrecs);
3383 xfs_btree_log_keys(cur, bp, ptr, numrecs);
3384 #ifdef DEBUG
3385 if (ptr < numrecs) {
3386 ASSERT(cur->bc_ops->keys_inorder(cur, kp,
3387 xfs_btree_key_addr(cur, ptr + 1, block)));
3388 }
3389 #endif
3390 } else {
3391 /* It's a leaf. make a hole in the records */
3392 union xfs_btree_rec *rp;
3393
3394 rp = xfs_btree_rec_addr(cur, ptr, block);
3395
3396 xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1);
3397
3398 /* Now put the new data in, bump numrecs and log it. */
3399 xfs_btree_copy_recs(cur, rp, rec, 1);
3400 xfs_btree_set_numrecs(block, ++numrecs);
3401 xfs_btree_log_recs(cur, bp, ptr, numrecs);
3402 #ifdef DEBUG
3403 if (ptr < numrecs) {
3404 ASSERT(cur->bc_ops->recs_inorder(cur, rp,
3405 xfs_btree_rec_addr(cur, ptr + 1, block)));
3406 }
3407 #endif
3408 }
3409
3410 /* Log the new number of records in the btree header. */
3411 xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
3412
3413 /*
3414 * If we just inserted into a new tree block, we have to
3415 * recalculate nkey here because nkey is out of date.
3416 *
3417 * Otherwise we're just updating an existing block (having shoved
3418 * some records into the new tree block), so use the regular key
3419 * update mechanism.
3420 */
3421 if (bp && bp->b_bn != old_bn) {
3422 xfs_btree_get_keys(cur, block, lkey);
3423 } else if (xfs_btree_needs_key_update(cur, optr)) {
3424 error = xfs_btree_update_keys(cur, level);
3425 if (error)
3426 goto error0;
3427 }
3428
3429 /*
3430 * If we are tracking the last record in the tree and
3431 * we are at the far right edge of the tree, update it.
3432 */
3433 if (xfs_btree_is_lastrec(cur, block, level)) {
3434 cur->bc_ops->update_lastrec(cur, block, rec,
3435 ptr, LASTREC_INSREC);
3436 }
3437
3438 /*
3439 * Return the new block number, if any.
3440 * If there is one, give back a record value and a cursor too.
3441 */
3442 *ptrp = nptr;
3443 if (!xfs_btree_ptr_is_null(cur, &nptr)) {
3444 xfs_btree_copy_keys(cur, key, lkey, 1);
3445 *curp = ncur;
3446 }
3447
3448 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3449 *stat = 1;
3450 return 0;
3451
3452 error0:
3453 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3454 return error;
3455 }
3456
3457 /*
3458 * Insert the record at the point referenced by cur.
3459 *
3460 * A multi-level split of the tree on insert will invalidate the original
3461 * cursor. All callers of this function should assume that the cursor is
3462 * no longer valid and revalidate it.
3463 */
3464 int
xfs_btree_insert(struct xfs_btree_cur * cur,int * stat)3465 xfs_btree_insert(
3466 struct xfs_btree_cur *cur,
3467 int *stat)
3468 {
3469 int error; /* error return value */
3470 int i; /* result value, 0 for failure */
3471 int level; /* current level number in btree */
3472 union xfs_btree_ptr nptr; /* new block number (split result) */
3473 struct xfs_btree_cur *ncur; /* new cursor (split result) */
3474 struct xfs_btree_cur *pcur; /* previous level's cursor */
3475 union xfs_btree_key bkey; /* key of block to insert */
3476 union xfs_btree_key *key;
3477 union xfs_btree_rec rec; /* record to insert */
3478
3479 level = 0;
3480 ncur = NULL;
3481 pcur = cur;
3482 key = &bkey;
3483
3484 xfs_btree_set_ptr_null(cur, &nptr);
3485
3486 /* Make a key out of the record data to be inserted, and save it. */
3487 cur->bc_ops->init_rec_from_cur(cur, &rec);
3488 cur->bc_ops->init_key_from_rec(key, &rec);
3489
3490 /*
3491 * Loop going up the tree, starting at the leaf level.
3492 * Stop when we don't get a split block, that must mean that
3493 * the insert is finished with this level.
3494 */
3495 do {
3496 /*
3497 * Insert nrec/nptr into this level of the tree.
3498 * Note if we fail, nptr will be null.
3499 */
3500 error = xfs_btree_insrec(pcur, level, &nptr, &rec, key,
3501 &ncur, &i);
3502 if (error) {
3503 if (pcur != cur)
3504 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
3505 goto error0;
3506 }
3507
3508 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
3509 level++;
3510
3511 /*
3512 * See if the cursor we just used is trash.
3513 * Can't trash the caller's cursor, but otherwise we should
3514 * if ncur is a new cursor or we're about to be done.
3515 */
3516 if (pcur != cur &&
3517 (ncur || xfs_btree_ptr_is_null(cur, &nptr))) {
3518 /* Save the state from the cursor before we trash it */
3519 if (cur->bc_ops->update_cursor)
3520 cur->bc_ops->update_cursor(pcur, cur);
3521 cur->bc_nlevels = pcur->bc_nlevels;
3522 xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
3523 }
3524 /* If we got a new cursor, switch to it. */
3525 if (ncur) {
3526 pcur = ncur;
3527 ncur = NULL;
3528 }
3529 } while (!xfs_btree_ptr_is_null(cur, &nptr));
3530
3531 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3532 *stat = i;
3533 return 0;
3534 error0:
3535 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3536 return error;
3537 }
3538
3539 /*
3540 * Try to merge a non-leaf block back into the inode root.
3541 *
3542 * Note: the killroot names comes from the fact that we're effectively
3543 * killing the old root block. But because we can't just delete the
3544 * inode we have to copy the single block it was pointing to into the
3545 * inode.
3546 */
3547 STATIC int
xfs_btree_kill_iroot(struct xfs_btree_cur * cur)3548 xfs_btree_kill_iroot(
3549 struct xfs_btree_cur *cur)
3550 {
3551 int whichfork = cur->bc_private.b.whichfork;
3552 struct xfs_inode *ip = cur->bc_private.b.ip;
3553 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
3554 struct xfs_btree_block *block;
3555 struct xfs_btree_block *cblock;
3556 union xfs_btree_key *kp;
3557 union xfs_btree_key *ckp;
3558 union xfs_btree_ptr *pp;
3559 union xfs_btree_ptr *cpp;
3560 struct xfs_buf *cbp;
3561 int level;
3562 int index;
3563 int numrecs;
3564 int error;
3565 #ifdef DEBUG
3566 union xfs_btree_ptr ptr;
3567 int i;
3568 #endif
3569
3570 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3571
3572 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
3573 ASSERT(cur->bc_nlevels > 1);
3574
3575 /*
3576 * Don't deal with the root block needs to be a leaf case.
3577 * We're just going to turn the thing back into extents anyway.
3578 */
3579 level = cur->bc_nlevels - 1;
3580 if (level == 1)
3581 goto out0;
3582
3583 /*
3584 * Give up if the root has multiple children.
3585 */
3586 block = xfs_btree_get_iroot(cur);
3587 if (xfs_btree_get_numrecs(block) != 1)
3588 goto out0;
3589
3590 cblock = xfs_btree_get_block(cur, level - 1, &cbp);
3591 numrecs = xfs_btree_get_numrecs(cblock);
3592
3593 /*
3594 * Only do this if the next level will fit.
3595 * Then the data must be copied up to the inode,
3596 * instead of freeing the root you free the next level.
3597 */
3598 if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level))
3599 goto out0;
3600
3601 XFS_BTREE_STATS_INC(cur, killroot);
3602
3603 #ifdef DEBUG
3604 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
3605 ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
3606 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
3607 ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
3608 #endif
3609
3610 index = numrecs - cur->bc_ops->get_maxrecs(cur, level);
3611 if (index) {
3612 xfs_iroot_realloc(cur->bc_private.b.ip, index,
3613 cur->bc_private.b.whichfork);
3614 block = ifp->if_broot;
3615 }
3616
3617 be16_add_cpu(&block->bb_numrecs, index);
3618 ASSERT(block->bb_numrecs == cblock->bb_numrecs);
3619
3620 kp = xfs_btree_key_addr(cur, 1, block);
3621 ckp = xfs_btree_key_addr(cur, 1, cblock);
3622 xfs_btree_copy_keys(cur, kp, ckp, numrecs);
3623
3624 pp = xfs_btree_ptr_addr(cur, 1, block);
3625 cpp = xfs_btree_ptr_addr(cur, 1, cblock);
3626 #ifdef DEBUG
3627 for (i = 0; i < numrecs; i++) {
3628 error = xfs_btree_check_ptr(cur, cpp, i, level - 1);
3629 if (error) {
3630 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3631 return error;
3632 }
3633 }
3634 #endif
3635 xfs_btree_copy_ptrs(cur, pp, cpp, numrecs);
3636
3637 error = xfs_btree_free_block(cur, cbp);
3638 if (error) {
3639 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3640 return error;
3641 }
3642
3643 cur->bc_bufs[level - 1] = NULL;
3644 be16_add_cpu(&block->bb_level, -1);
3645 xfs_trans_log_inode(cur->bc_tp, ip,
3646 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork));
3647 cur->bc_nlevels--;
3648 out0:
3649 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3650 return 0;
3651 }
3652
3653 /*
3654 * Kill the current root node, and replace it with it's only child node.
3655 */
3656 STATIC int
xfs_btree_kill_root(struct xfs_btree_cur * cur,struct xfs_buf * bp,int level,union xfs_btree_ptr * newroot)3657 xfs_btree_kill_root(
3658 struct xfs_btree_cur *cur,
3659 struct xfs_buf *bp,
3660 int level,
3661 union xfs_btree_ptr *newroot)
3662 {
3663 int error;
3664
3665 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3666 XFS_BTREE_STATS_INC(cur, killroot);
3667
3668 /*
3669 * Update the root pointer, decreasing the level by 1 and then
3670 * free the old root.
3671 */
3672 cur->bc_ops->set_root(cur, newroot, -1);
3673
3674 error = xfs_btree_free_block(cur, bp);
3675 if (error) {
3676 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3677 return error;
3678 }
3679
3680 cur->bc_bufs[level] = NULL;
3681 cur->bc_ra[level] = 0;
3682 cur->bc_nlevels--;
3683
3684 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3685 return 0;
3686 }
3687
3688 STATIC int
xfs_btree_dec_cursor(struct xfs_btree_cur * cur,int level,int * stat)3689 xfs_btree_dec_cursor(
3690 struct xfs_btree_cur *cur,
3691 int level,
3692 int *stat)
3693 {
3694 int error;
3695 int i;
3696
3697 if (level > 0) {
3698 error = xfs_btree_decrement(cur, level, &i);
3699 if (error)
3700 return error;
3701 }
3702
3703 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3704 *stat = 1;
3705 return 0;
3706 }
3707
3708 /*
3709 * Single level of the btree record deletion routine.
3710 * Delete record pointed to by cur/level.
3711 * Remove the record from its block then rebalance the tree.
3712 * Return 0 for error, 1 for done, 2 to go on to the next level.
3713 */
3714 STATIC int /* error */
xfs_btree_delrec(struct xfs_btree_cur * cur,int level,int * stat)3715 xfs_btree_delrec(
3716 struct xfs_btree_cur *cur, /* btree cursor */
3717 int level, /* level removing record from */
3718 int *stat) /* fail/done/go-on */
3719 {
3720 struct xfs_btree_block *block; /* btree block */
3721 union xfs_btree_ptr cptr; /* current block ptr */
3722 struct xfs_buf *bp; /* buffer for block */
3723 int error; /* error return value */
3724 int i; /* loop counter */
3725 union xfs_btree_ptr lptr; /* left sibling block ptr */
3726 struct xfs_buf *lbp; /* left buffer pointer */
3727 struct xfs_btree_block *left; /* left btree block */
3728 int lrecs = 0; /* left record count */
3729 int ptr; /* key/record index */
3730 union xfs_btree_ptr rptr; /* right sibling block ptr */
3731 struct xfs_buf *rbp; /* right buffer pointer */
3732 struct xfs_btree_block *right; /* right btree block */
3733 struct xfs_btree_block *rrblock; /* right-right btree block */
3734 struct xfs_buf *rrbp; /* right-right buffer pointer */
3735 int rrecs = 0; /* right record count */
3736 struct xfs_btree_cur *tcur; /* temporary btree cursor */
3737 int numrecs; /* temporary numrec count */
3738
3739 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3740 XFS_BTREE_TRACE_ARGI(cur, level);
3741
3742 tcur = NULL;
3743
3744 /* Get the index of the entry being deleted, check for nothing there. */
3745 ptr = cur->bc_ptrs[level];
3746 if (ptr == 0) {
3747 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3748 *stat = 0;
3749 return 0;
3750 }
3751
3752 /* Get the buffer & block containing the record or key/ptr. */
3753 block = xfs_btree_get_block(cur, level, &bp);
3754 numrecs = xfs_btree_get_numrecs(block);
3755
3756 #ifdef DEBUG
3757 error = xfs_btree_check_block(cur, block, level, bp);
3758 if (error)
3759 goto error0;
3760 #endif
3761
3762 /* Fail if we're off the end of the block. */
3763 if (ptr > numrecs) {
3764 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3765 *stat = 0;
3766 return 0;
3767 }
3768
3769 XFS_BTREE_STATS_INC(cur, delrec);
3770 XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr);
3771
3772 /* Excise the entries being deleted. */
3773 if (level > 0) {
3774 /* It's a nonleaf. operate on keys and ptrs */
3775 union xfs_btree_key *lkp;
3776 union xfs_btree_ptr *lpp;
3777
3778 lkp = xfs_btree_key_addr(cur, ptr + 1, block);
3779 lpp = xfs_btree_ptr_addr(cur, ptr + 1, block);
3780
3781 #ifdef DEBUG
3782 for (i = 0; i < numrecs - ptr; i++) {
3783 error = xfs_btree_check_ptr(cur, lpp, i, level);
3784 if (error)
3785 goto error0;
3786 }
3787 #endif
3788
3789 if (ptr < numrecs) {
3790 xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr);
3791 xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr);
3792 xfs_btree_log_keys(cur, bp, ptr, numrecs - 1);
3793 xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1);
3794 }
3795 } else {
3796 /* It's a leaf. operate on records */
3797 if (ptr < numrecs) {
3798 xfs_btree_shift_recs(cur,
3799 xfs_btree_rec_addr(cur, ptr + 1, block),
3800 -1, numrecs - ptr);
3801 xfs_btree_log_recs(cur, bp, ptr, numrecs - 1);
3802 }
3803 }
3804
3805 /*
3806 * Decrement and log the number of entries in the block.
3807 */
3808 xfs_btree_set_numrecs(block, --numrecs);
3809 xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
3810
3811 /*
3812 * If we are tracking the last record in the tree and
3813 * we are at the far right edge of the tree, update it.
3814 */
3815 if (xfs_btree_is_lastrec(cur, block, level)) {
3816 cur->bc_ops->update_lastrec(cur, block, NULL,
3817 ptr, LASTREC_DELREC);
3818 }
3819
3820 /*
3821 * We're at the root level. First, shrink the root block in-memory.
3822 * Try to get rid of the next level down. If we can't then there's
3823 * nothing left to do.
3824 */
3825 if (level == cur->bc_nlevels - 1) {
3826 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3827 xfs_iroot_realloc(cur->bc_private.b.ip, -1,
3828 cur->bc_private.b.whichfork);
3829
3830 error = xfs_btree_kill_iroot(cur);
3831 if (error)
3832 goto error0;
3833
3834 error = xfs_btree_dec_cursor(cur, level, stat);
3835 if (error)
3836 goto error0;
3837 *stat = 1;
3838 return 0;
3839 }
3840
3841 /*
3842 * If this is the root level, and there's only one entry left,
3843 * and it's NOT the leaf level, then we can get rid of this
3844 * level.
3845 */
3846 if (numrecs == 1 && level > 0) {
3847 union xfs_btree_ptr *pp;
3848 /*
3849 * pp is still set to the first pointer in the block.
3850 * Make it the new root of the btree.
3851 */
3852 pp = xfs_btree_ptr_addr(cur, 1, block);
3853 error = xfs_btree_kill_root(cur, bp, level, pp);
3854 if (error)
3855 goto error0;
3856 } else if (level > 0) {
3857 error = xfs_btree_dec_cursor(cur, level, stat);
3858 if (error)
3859 goto error0;
3860 }
3861 *stat = 1;
3862 return 0;
3863 }
3864
3865 /*
3866 * If we deleted the leftmost entry in the block, update the
3867 * key values above us in the tree.
3868 */
3869 if (xfs_btree_needs_key_update(cur, ptr)) {
3870 error = xfs_btree_update_keys(cur, level);
3871 if (error)
3872 goto error0;
3873 }
3874
3875 /*
3876 * If the number of records remaining in the block is at least
3877 * the minimum, we're done.
3878 */
3879 if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) {
3880 error = xfs_btree_dec_cursor(cur, level, stat);
3881 if (error)
3882 goto error0;
3883 return 0;
3884 }
3885
3886 /*
3887 * Otherwise, we have to move some records around to keep the
3888 * tree balanced. Look at the left and right sibling blocks to
3889 * see if we can re-balance by moving only one record.
3890 */
3891 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
3892 xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB);
3893
3894 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3895 /*
3896 * One child of root, need to get a chance to copy its contents
3897 * into the root and delete it. Can't go up to next level,
3898 * there's nothing to delete there.
3899 */
3900 if (xfs_btree_ptr_is_null(cur, &rptr) &&
3901 xfs_btree_ptr_is_null(cur, &lptr) &&
3902 level == cur->bc_nlevels - 2) {
3903 error = xfs_btree_kill_iroot(cur);
3904 if (!error)
3905 error = xfs_btree_dec_cursor(cur, level, stat);
3906 if (error)
3907 goto error0;
3908 return 0;
3909 }
3910 }
3911
3912 ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) ||
3913 !xfs_btree_ptr_is_null(cur, &lptr));
3914
3915 /*
3916 * Duplicate the cursor so our btree manipulations here won't
3917 * disrupt the next level up.
3918 */
3919 error = xfs_btree_dup_cursor(cur, &tcur);
3920 if (error)
3921 goto error0;
3922
3923 /*
3924 * If there's a right sibling, see if it's ok to shift an entry
3925 * out of it.
3926 */
3927 if (!xfs_btree_ptr_is_null(cur, &rptr)) {
3928 /*
3929 * Move the temp cursor to the last entry in the next block.
3930 * Actually any entry but the first would suffice.
3931 */
3932 i = xfs_btree_lastrec(tcur, level);
3933 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
3934
3935 error = xfs_btree_increment(tcur, level, &i);
3936 if (error)
3937 goto error0;
3938 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
3939
3940 i = xfs_btree_lastrec(tcur, level);
3941 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
3942
3943 /* Grab a pointer to the block. */
3944 right = xfs_btree_get_block(tcur, level, &rbp);
3945 #ifdef DEBUG
3946 error = xfs_btree_check_block(tcur, right, level, rbp);
3947 if (error)
3948 goto error0;
3949 #endif
3950 /* Grab the current block number, for future use. */
3951 xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB);
3952
3953 /*
3954 * If right block is full enough so that removing one entry
3955 * won't make it too empty, and left-shifting an entry out
3956 * of right to us works, we're done.
3957 */
3958 if (xfs_btree_get_numrecs(right) - 1 >=
3959 cur->bc_ops->get_minrecs(tcur, level)) {
3960 error = xfs_btree_lshift(tcur, level, &i);
3961 if (error)
3962 goto error0;
3963 if (i) {
3964 ASSERT(xfs_btree_get_numrecs(block) >=
3965 cur->bc_ops->get_minrecs(tcur, level));
3966
3967 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3968 tcur = NULL;
3969
3970 error = xfs_btree_dec_cursor(cur, level, stat);
3971 if (error)
3972 goto error0;
3973 return 0;
3974 }
3975 }
3976
3977 /*
3978 * Otherwise, grab the number of records in right for
3979 * future reference, and fix up the temp cursor to point
3980 * to our block again (last record).
3981 */
3982 rrecs = xfs_btree_get_numrecs(right);
3983 if (!xfs_btree_ptr_is_null(cur, &lptr)) {
3984 i = xfs_btree_firstrec(tcur, level);
3985 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
3986
3987 error = xfs_btree_decrement(tcur, level, &i);
3988 if (error)
3989 goto error0;
3990 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
3991 }
3992 }
3993
3994 /*
3995 * If there's a left sibling, see if it's ok to shift an entry
3996 * out of it.
3997 */
3998 if (!xfs_btree_ptr_is_null(cur, &lptr)) {
3999 /*
4000 * Move the temp cursor to the first entry in the
4001 * previous block.
4002 */
4003 i = xfs_btree_firstrec(tcur, level);
4004 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
4005
4006 error = xfs_btree_decrement(tcur, level, &i);
4007 if (error)
4008 goto error0;
4009 i = xfs_btree_firstrec(tcur, level);
4010 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
4011
4012 /* Grab a pointer to the block. */
4013 left = xfs_btree_get_block(tcur, level, &lbp);
4014 #ifdef DEBUG
4015 error = xfs_btree_check_block(cur, left, level, lbp);
4016 if (error)
4017 goto error0;
4018 #endif
4019 /* Grab the current block number, for future use. */
4020 xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB);
4021
4022 /*
4023 * If left block is full enough so that removing one entry
4024 * won't make it too empty, and right-shifting an entry out
4025 * of left to us works, we're done.
4026 */
4027 if (xfs_btree_get_numrecs(left) - 1 >=
4028 cur->bc_ops->get_minrecs(tcur, level)) {
4029 error = xfs_btree_rshift(tcur, level, &i);
4030 if (error)
4031 goto error0;
4032 if (i) {
4033 ASSERT(xfs_btree_get_numrecs(block) >=
4034 cur->bc_ops->get_minrecs(tcur, level));
4035 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
4036 tcur = NULL;
4037 if (level == 0)
4038 cur->bc_ptrs[0]++;
4039 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
4040 *stat = 1;
4041 return 0;
4042 }
4043 }
4044
4045 /*
4046 * Otherwise, grab the number of records in right for
4047 * future reference.
4048 */
4049 lrecs = xfs_btree_get_numrecs(left);
4050 }
4051
4052 /* Delete the temp cursor, we're done with it. */
4053 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
4054 tcur = NULL;
4055
4056 /* If here, we need to do a join to keep the tree balanced. */
4057 ASSERT(!xfs_btree_ptr_is_null(cur, &cptr));
4058
4059 if (!xfs_btree_ptr_is_null(cur, &lptr) &&
4060 lrecs + xfs_btree_get_numrecs(block) <=
4061 cur->bc_ops->get_maxrecs(cur, level)) {
4062 /*
4063 * Set "right" to be the starting block,
4064 * "left" to be the left neighbor.
4065 */
4066 rptr = cptr;
4067 right = block;
4068 rbp = bp;
4069 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
4070 if (error)
4071 goto error0;
4072
4073 /*
4074 * If that won't work, see if we can join with the right neighbor block.
4075 */
4076 } else if (!xfs_btree_ptr_is_null(cur, &rptr) &&
4077 rrecs + xfs_btree_get_numrecs(block) <=
4078 cur->bc_ops->get_maxrecs(cur, level)) {
4079 /*
4080 * Set "left" to be the starting block,
4081 * "right" to be the right neighbor.
4082 */
4083 lptr = cptr;
4084 left = block;
4085 lbp = bp;
4086 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
4087 if (error)
4088 goto error0;
4089
4090 /*
4091 * Otherwise, we can't fix the imbalance.
4092 * Just return. This is probably a logic error, but it's not fatal.
4093 */
4094 } else {
4095 error = xfs_btree_dec_cursor(cur, level, stat);
4096 if (error)
4097 goto error0;
4098 return 0;
4099 }
4100
4101 rrecs = xfs_btree_get_numrecs(right);
4102 lrecs = xfs_btree_get_numrecs(left);
4103
4104 /*
4105 * We're now going to join "left" and "right" by moving all the stuff
4106 * in "right" to "left" and deleting "right".
4107 */
4108 XFS_BTREE_STATS_ADD(cur, moves, rrecs);
4109 if (level > 0) {
4110 /* It's a non-leaf. Move keys and pointers. */
4111 union xfs_btree_key *lkp; /* left btree key */
4112 union xfs_btree_ptr *lpp; /* left address pointer */
4113 union xfs_btree_key *rkp; /* right btree key */
4114 union xfs_btree_ptr *rpp; /* right address pointer */
4115
4116 lkp = xfs_btree_key_addr(cur, lrecs + 1, left);
4117 lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left);
4118 rkp = xfs_btree_key_addr(cur, 1, right);
4119 rpp = xfs_btree_ptr_addr(cur, 1, right);
4120 #ifdef DEBUG
4121 for (i = 1; i < rrecs; i++) {
4122 error = xfs_btree_check_ptr(cur, rpp, i, level);
4123 if (error)
4124 goto error0;
4125 }
4126 #endif
4127 xfs_btree_copy_keys(cur, lkp, rkp, rrecs);
4128 xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs);
4129
4130 xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
4131 xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
4132 } else {
4133 /* It's a leaf. Move records. */
4134 union xfs_btree_rec *lrp; /* left record pointer */
4135 union xfs_btree_rec *rrp; /* right record pointer */
4136
4137 lrp = xfs_btree_rec_addr(cur, lrecs + 1, left);
4138 rrp = xfs_btree_rec_addr(cur, 1, right);
4139
4140 xfs_btree_copy_recs(cur, lrp, rrp, rrecs);
4141 xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
4142 }
4143
4144 XFS_BTREE_STATS_INC(cur, join);
4145
4146 /*
4147 * Fix up the number of records and right block pointer in the
4148 * surviving block, and log it.
4149 */
4150 xfs_btree_set_numrecs(left, lrecs + rrecs);
4151 xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB),
4152 xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
4153 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
4154
4155 /* If there is a right sibling, point it to the remaining block. */
4156 xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
4157 if (!xfs_btree_ptr_is_null(cur, &cptr)) {
4158 error = xfs_btree_read_buf_block(cur, &cptr, 0, &rrblock, &rrbp);
4159 if (error)
4160 goto error0;
4161 xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB);
4162 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
4163 }
4164
4165 /* Free the deleted block. */
4166 error = xfs_btree_free_block(cur, rbp);
4167 if (error)
4168 goto error0;
4169
4170 /*
4171 * If we joined with the left neighbor, set the buffer in the
4172 * cursor to the left block, and fix up the index.
4173 */
4174 if (bp != lbp) {
4175 cur->bc_bufs[level] = lbp;
4176 cur->bc_ptrs[level] += lrecs;
4177 cur->bc_ra[level] = 0;
4178 }
4179 /*
4180 * If we joined with the right neighbor and there's a level above
4181 * us, increment the cursor at that level.
4182 */
4183 else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) ||
4184 (level + 1 < cur->bc_nlevels)) {
4185 error = xfs_btree_increment(cur, level + 1, &i);
4186 if (error)
4187 goto error0;
4188 }
4189
4190 /*
4191 * Readjust the ptr at this level if it's not a leaf, since it's
4192 * still pointing at the deletion point, which makes the cursor
4193 * inconsistent. If this makes the ptr 0, the caller fixes it up.
4194 * We can't use decrement because it would change the next level up.
4195 */
4196 if (level > 0)
4197 cur->bc_ptrs[level]--;
4198
4199 /*
4200 * We combined blocks, so we have to update the parent keys if the
4201 * btree supports overlapped intervals. However, bc_ptrs[level + 1]
4202 * points to the old block so that the caller knows which record to
4203 * delete. Therefore, the caller must be savvy enough to call updkeys
4204 * for us if we return stat == 2. The other exit points from this
4205 * function don't require deletions further up the tree, so they can
4206 * call updkeys directly.
4207 */
4208
4209 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
4210 /* Return value means the next level up has something to do. */
4211 *stat = 2;
4212 return 0;
4213
4214 error0:
4215 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
4216 if (tcur)
4217 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
4218 return error;
4219 }
4220
4221 /*
4222 * Delete the record pointed to by cur.
4223 * The cursor refers to the place where the record was (could be inserted)
4224 * when the operation returns.
4225 */
4226 int /* error */
xfs_btree_delete(struct xfs_btree_cur * cur,int * stat)4227 xfs_btree_delete(
4228 struct xfs_btree_cur *cur,
4229 int *stat) /* success/failure */
4230 {
4231 int error; /* error return value */
4232 int level;
4233 int i;
4234 bool joined = false;
4235
4236 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
4237
4238 /*
4239 * Go up the tree, starting at leaf level.
4240 *
4241 * If 2 is returned then a join was done; go to the next level.
4242 * Otherwise we are done.
4243 */
4244 for (level = 0, i = 2; i == 2; level++) {
4245 error = xfs_btree_delrec(cur, level, &i);
4246 if (error)
4247 goto error0;
4248 if (i == 2)
4249 joined = true;
4250 }
4251
4252 /*
4253 * If we combined blocks as part of deleting the record, delrec won't
4254 * have updated the parent high keys so we have to do that here.
4255 */
4256 if (joined && (cur->bc_flags & XFS_BTREE_OVERLAPPING)) {
4257 error = xfs_btree_updkeys_force(cur, 0);
4258 if (error)
4259 goto error0;
4260 }
4261
4262 if (i == 0) {
4263 for (level = 1; level < cur->bc_nlevels; level++) {
4264 if (cur->bc_ptrs[level] == 0) {
4265 error = xfs_btree_decrement(cur, level, &i);
4266 if (error)
4267 goto error0;
4268 break;
4269 }
4270 }
4271 }
4272
4273 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
4274 *stat = i;
4275 return 0;
4276 error0:
4277 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
4278 return error;
4279 }
4280
4281 /*
4282 * Get the data from the pointed-to record.
4283 */
4284 int /* error */
xfs_btree_get_rec(struct xfs_btree_cur * cur,union xfs_btree_rec ** recp,int * stat)4285 xfs_btree_get_rec(
4286 struct xfs_btree_cur *cur, /* btree cursor */
4287 union xfs_btree_rec **recp, /* output: btree record */
4288 int *stat) /* output: success/failure */
4289 {
4290 struct xfs_btree_block *block; /* btree block */
4291 struct xfs_buf *bp; /* buffer pointer */
4292 int ptr; /* record number */
4293 #ifdef DEBUG
4294 int error; /* error return value */
4295 #endif
4296
4297 ptr = cur->bc_ptrs[0];
4298 block = xfs_btree_get_block(cur, 0, &bp);
4299
4300 #ifdef DEBUG
4301 error = xfs_btree_check_block(cur, block, 0, bp);
4302 if (error)
4303 return error;
4304 #endif
4305
4306 /*
4307 * Off the right end or left end, return failure.
4308 */
4309 if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) {
4310 *stat = 0;
4311 return 0;
4312 }
4313
4314 /*
4315 * Point to the record and extract its data.
4316 */
4317 *recp = xfs_btree_rec_addr(cur, ptr, block);
4318 *stat = 1;
4319 return 0;
4320 }
4321
4322 /* Visit a block in a btree. */
4323 STATIC int
xfs_btree_visit_block(struct xfs_btree_cur * cur,int level,xfs_btree_visit_blocks_fn fn,void * data)4324 xfs_btree_visit_block(
4325 struct xfs_btree_cur *cur,
4326 int level,
4327 xfs_btree_visit_blocks_fn fn,
4328 void *data)
4329 {
4330 struct xfs_btree_block *block;
4331 struct xfs_buf *bp;
4332 union xfs_btree_ptr rptr;
4333 int error;
4334
4335 /* do right sibling readahead */
4336 xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
4337 block = xfs_btree_get_block(cur, level, &bp);
4338
4339 /* process the block */
4340 error = fn(cur, level, data);
4341 if (error)
4342 return error;
4343
4344 /* now read rh sibling block for next iteration */
4345 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
4346 if (xfs_btree_ptr_is_null(cur, &rptr))
4347 return -ENOENT;
4348
4349 return xfs_btree_lookup_get_block(cur, level, &rptr, &block);
4350 }
4351
4352
4353 /* Visit every block in a btree. */
4354 int
xfs_btree_visit_blocks(struct xfs_btree_cur * cur,xfs_btree_visit_blocks_fn fn,void * data)4355 xfs_btree_visit_blocks(
4356 struct xfs_btree_cur *cur,
4357 xfs_btree_visit_blocks_fn fn,
4358 void *data)
4359 {
4360 union xfs_btree_ptr lptr;
4361 int level;
4362 struct xfs_btree_block *block = NULL;
4363 int error = 0;
4364
4365 cur->bc_ops->init_ptr_from_cur(cur, &lptr);
4366
4367 /* for each level */
4368 for (level = cur->bc_nlevels - 1; level >= 0; level--) {
4369 /* grab the left hand block */
4370 error = xfs_btree_lookup_get_block(cur, level, &lptr, &block);
4371 if (error)
4372 return error;
4373
4374 /* readahead the left most block for the next level down */
4375 if (level > 0) {
4376 union xfs_btree_ptr *ptr;
4377
4378 ptr = xfs_btree_ptr_addr(cur, 1, block);
4379 xfs_btree_readahead_ptr(cur, ptr, 1);
4380
4381 /* save for the next iteration of the loop */
4382 xfs_btree_copy_ptrs(cur, &lptr, ptr, 1);
4383 }
4384
4385 /* for each buffer in the level */
4386 do {
4387 error = xfs_btree_visit_block(cur, level, fn, data);
4388 } while (!error);
4389
4390 if (error != -ENOENT)
4391 return error;
4392 }
4393
4394 return 0;
4395 }
4396
4397 /*
4398 * Change the owner of a btree.
4399 *
4400 * The mechanism we use here is ordered buffer logging. Because we don't know
4401 * how many buffers were are going to need to modify, we don't really want to
4402 * have to make transaction reservations for the worst case of every buffer in a
4403 * full size btree as that may be more space that we can fit in the log....
4404 *
4405 * We do the btree walk in the most optimal manner possible - we have sibling
4406 * pointers so we can just walk all the blocks on each level from left to right
4407 * in a single pass, and then move to the next level and do the same. We can
4408 * also do readahead on the sibling pointers to get IO moving more quickly,
4409 * though for slow disks this is unlikely to make much difference to performance
4410 * as the amount of CPU work we have to do before moving to the next block is
4411 * relatively small.
4412 *
4413 * For each btree block that we load, modify the owner appropriately, set the
4414 * buffer as an ordered buffer and log it appropriately. We need to ensure that
4415 * we mark the region we change dirty so that if the buffer is relogged in
4416 * a subsequent transaction the changes we make here as an ordered buffer are
4417 * correctly relogged in that transaction. If we are in recovery context, then
4418 * just queue the modified buffer as delayed write buffer so the transaction
4419 * recovery completion writes the changes to disk.
4420 */
4421 struct xfs_btree_block_change_owner_info {
4422 __uint64_t new_owner;
4423 struct list_head *buffer_list;
4424 };
4425
4426 static int
xfs_btree_block_change_owner(struct xfs_btree_cur * cur,int level,void * data)4427 xfs_btree_block_change_owner(
4428 struct xfs_btree_cur *cur,
4429 int level,
4430 void *data)
4431 {
4432 struct xfs_btree_block_change_owner_info *bbcoi = data;
4433 struct xfs_btree_block *block;
4434 struct xfs_buf *bp;
4435
4436 /* modify the owner */
4437 block = xfs_btree_get_block(cur, level, &bp);
4438 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
4439 if (block->bb_u.l.bb_owner == cpu_to_be64(bbcoi->new_owner))
4440 return 0;
4441 block->bb_u.l.bb_owner = cpu_to_be64(bbcoi->new_owner);
4442 } else {
4443 if (block->bb_u.s.bb_owner == cpu_to_be32(bbcoi->new_owner))
4444 return 0;
4445 block->bb_u.s.bb_owner = cpu_to_be32(bbcoi->new_owner);
4446 }
4447
4448 /*
4449 * If the block is a root block hosted in an inode, we might not have a
4450 * buffer pointer here and we shouldn't attempt to log the change as the
4451 * information is already held in the inode and discarded when the root
4452 * block is formatted into the on-disk inode fork. We still change it,
4453 * though, so everything is consistent in memory.
4454 */
4455 if (!bp) {
4456 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
4457 ASSERT(level == cur->bc_nlevels - 1);
4458 return 0;
4459 }
4460
4461 if (cur->bc_tp) {
4462 if (!xfs_trans_ordered_buf(cur->bc_tp, bp)) {
4463 xfs_btree_log_block(cur, bp, XFS_BB_OWNER);
4464 return -EAGAIN;
4465 }
4466 } else {
4467 xfs_buf_delwri_queue(bp, bbcoi->buffer_list);
4468 }
4469
4470 return 0;
4471 }
4472
4473 int
xfs_btree_change_owner(struct xfs_btree_cur * cur,__uint64_t new_owner,struct list_head * buffer_list)4474 xfs_btree_change_owner(
4475 struct xfs_btree_cur *cur,
4476 __uint64_t new_owner,
4477 struct list_head *buffer_list)
4478 {
4479 struct xfs_btree_block_change_owner_info bbcoi;
4480
4481 bbcoi.new_owner = new_owner;
4482 bbcoi.buffer_list = buffer_list;
4483
4484 return xfs_btree_visit_blocks(cur, xfs_btree_block_change_owner,
4485 &bbcoi);
4486 }
4487
4488 /**
4489 * xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format
4490 * btree block
4491 *
4492 * @bp: buffer containing the btree block
4493 * @max_recs: pointer to the m_*_mxr max records field in the xfs mount
4494 * @pag_max_level: pointer to the per-ag max level field
4495 */
4496 bool
xfs_btree_sblock_v5hdr_verify(struct xfs_buf * bp)4497 xfs_btree_sblock_v5hdr_verify(
4498 struct xfs_buf *bp)
4499 {
4500 struct xfs_mount *mp = bp->b_target->bt_mount;
4501 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
4502 struct xfs_perag *pag = bp->b_pag;
4503
4504 if (!xfs_sb_version_hascrc(&mp->m_sb))
4505 return false;
4506 if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
4507 return false;
4508 if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
4509 return false;
4510 if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
4511 return false;
4512 return true;
4513 }
4514
4515 /**
4516 * xfs_btree_sblock_verify() -- verify a short-format btree block
4517 *
4518 * @bp: buffer containing the btree block
4519 * @max_recs: maximum records allowed in this btree node
4520 */
4521 bool
xfs_btree_sblock_verify(struct xfs_buf * bp,unsigned int max_recs)4522 xfs_btree_sblock_verify(
4523 struct xfs_buf *bp,
4524 unsigned int max_recs)
4525 {
4526 struct xfs_mount *mp = bp->b_target->bt_mount;
4527 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
4528
4529 /* numrecs verification */
4530 if (be16_to_cpu(block->bb_numrecs) > max_recs)
4531 return false;
4532
4533 /* sibling pointer verification */
4534 if (!block->bb_u.s.bb_leftsib ||
4535 (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
4536 block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
4537 return false;
4538 if (!block->bb_u.s.bb_rightsib ||
4539 (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
4540 block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
4541 return false;
4542
4543 return true;
4544 }
4545
4546 /*
4547 * Calculate the number of btree levels needed to store a given number of
4548 * records in a short-format btree.
4549 */
4550 uint
xfs_btree_compute_maxlevels(struct xfs_mount * mp,uint * limits,unsigned long len)4551 xfs_btree_compute_maxlevels(
4552 struct xfs_mount *mp,
4553 uint *limits,
4554 unsigned long len)
4555 {
4556 uint level;
4557 unsigned long maxblocks;
4558
4559 maxblocks = (len + limits[0] - 1) / limits[0];
4560 for (level = 1; maxblocks > 1; level++)
4561 maxblocks = (maxblocks + limits[1] - 1) / limits[1];
4562 return level;
4563 }
4564
4565 /*
4566 * Query a regular btree for all records overlapping a given interval.
4567 * Start with a LE lookup of the key of low_rec and return all records
4568 * until we find a record with a key greater than the key of high_rec.
4569 */
4570 STATIC int
xfs_btree_simple_query_range(struct xfs_btree_cur * cur,union xfs_btree_key * low_key,union xfs_btree_key * high_key,xfs_btree_query_range_fn fn,void * priv)4571 xfs_btree_simple_query_range(
4572 struct xfs_btree_cur *cur,
4573 union xfs_btree_key *low_key,
4574 union xfs_btree_key *high_key,
4575 xfs_btree_query_range_fn fn,
4576 void *priv)
4577 {
4578 union xfs_btree_rec *recp;
4579 union xfs_btree_key rec_key;
4580 __int64_t diff;
4581 int stat;
4582 bool firstrec = true;
4583 int error;
4584
4585 ASSERT(cur->bc_ops->init_high_key_from_rec);
4586 ASSERT(cur->bc_ops->diff_two_keys);
4587
4588 /*
4589 * Find the leftmost record. The btree cursor must be set
4590 * to the low record used to generate low_key.
4591 */
4592 stat = 0;
4593 error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, &stat);
4594 if (error)
4595 goto out;
4596
4597 /* Nothing? See if there's anything to the right. */
4598 if (!stat) {
4599 error = xfs_btree_increment(cur, 0, &stat);
4600 if (error)
4601 goto out;
4602 }
4603
4604 while (stat) {
4605 /* Find the record. */
4606 error = xfs_btree_get_rec(cur, &recp, &stat);
4607 if (error || !stat)
4608 break;
4609
4610 /* Skip if high_key(rec) < low_key. */
4611 if (firstrec) {
4612 cur->bc_ops->init_high_key_from_rec(&rec_key, recp);
4613 firstrec = false;
4614 diff = cur->bc_ops->diff_two_keys(cur, low_key,
4615 &rec_key);
4616 if (diff > 0)
4617 goto advloop;
4618 }
4619
4620 /* Stop if high_key < low_key(rec). */
4621 cur->bc_ops->init_key_from_rec(&rec_key, recp);
4622 diff = cur->bc_ops->diff_two_keys(cur, &rec_key, high_key);
4623 if (diff > 0)
4624 break;
4625
4626 /* Callback */
4627 error = fn(cur, recp, priv);
4628 if (error < 0 || error == XFS_BTREE_QUERY_RANGE_ABORT)
4629 break;
4630
4631 advloop:
4632 /* Move on to the next record. */
4633 error = xfs_btree_increment(cur, 0, &stat);
4634 if (error)
4635 break;
4636 }
4637
4638 out:
4639 return error;
4640 }
4641
4642 /*
4643 * Query an overlapped interval btree for all records overlapping a given
4644 * interval. This function roughly follows the algorithm given in
4645 * "Interval Trees" of _Introduction to Algorithms_, which is section
4646 * 14.3 in the 2nd and 3rd editions.
4647 *
4648 * First, generate keys for the low and high records passed in.
4649 *
4650 * For any leaf node, generate the high and low keys for the record.
4651 * If the record keys overlap with the query low/high keys, pass the
4652 * record to the function iterator.
4653 *
4654 * For any internal node, compare the low and high keys of each
4655 * pointer against the query low/high keys. If there's an overlap,
4656 * follow the pointer.
4657 *
4658 * As an optimization, we stop scanning a block when we find a low key
4659 * that is greater than the query's high key.
4660 */
4661 STATIC int
xfs_btree_overlapped_query_range(struct xfs_btree_cur * cur,union xfs_btree_key * low_key,union xfs_btree_key * high_key,xfs_btree_query_range_fn fn,void * priv)4662 xfs_btree_overlapped_query_range(
4663 struct xfs_btree_cur *cur,
4664 union xfs_btree_key *low_key,
4665 union xfs_btree_key *high_key,
4666 xfs_btree_query_range_fn fn,
4667 void *priv)
4668 {
4669 union xfs_btree_ptr ptr;
4670 union xfs_btree_ptr *pp;
4671 union xfs_btree_key rec_key;
4672 union xfs_btree_key rec_hkey;
4673 union xfs_btree_key *lkp;
4674 union xfs_btree_key *hkp;
4675 union xfs_btree_rec *recp;
4676 struct xfs_btree_block *block;
4677 __int64_t ldiff;
4678 __int64_t hdiff;
4679 int level;
4680 struct xfs_buf *bp;
4681 int i;
4682 int error;
4683
4684 /* Load the root of the btree. */
4685 level = cur->bc_nlevels - 1;
4686 cur->bc_ops->init_ptr_from_cur(cur, &ptr);
4687 error = xfs_btree_lookup_get_block(cur, level, &ptr, &block);
4688 if (error)
4689 return error;
4690 xfs_btree_get_block(cur, level, &bp);
4691 trace_xfs_btree_overlapped_query_range(cur, level, bp);
4692 #ifdef DEBUG
4693 error = xfs_btree_check_block(cur, block, level, bp);
4694 if (error)
4695 goto out;
4696 #endif
4697 cur->bc_ptrs[level] = 1;
4698
4699 while (level < cur->bc_nlevels) {
4700 block = xfs_btree_get_block(cur, level, &bp);
4701
4702 /* End of node, pop back towards the root. */
4703 if (cur->bc_ptrs[level] > be16_to_cpu(block->bb_numrecs)) {
4704 pop_up:
4705 if (level < cur->bc_nlevels - 1)
4706 cur->bc_ptrs[level + 1]++;
4707 level++;
4708 continue;
4709 }
4710
4711 if (level == 0) {
4712 /* Handle a leaf node. */
4713 recp = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block);
4714
4715 cur->bc_ops->init_high_key_from_rec(&rec_hkey, recp);
4716 ldiff = cur->bc_ops->diff_two_keys(cur, &rec_hkey,
4717 low_key);
4718
4719 cur->bc_ops->init_key_from_rec(&rec_key, recp);
4720 hdiff = cur->bc_ops->diff_two_keys(cur, high_key,
4721 &rec_key);
4722
4723 /*
4724 * If (record's high key >= query's low key) and
4725 * (query's high key >= record's low key), then
4726 * this record overlaps the query range; callback.
4727 */
4728 if (ldiff >= 0 && hdiff >= 0) {
4729 error = fn(cur, recp, priv);
4730 if (error < 0 ||
4731 error == XFS_BTREE_QUERY_RANGE_ABORT)
4732 break;
4733 } else if (hdiff < 0) {
4734 /* Record is larger than high key; pop. */
4735 goto pop_up;
4736 }
4737 cur->bc_ptrs[level]++;
4738 continue;
4739 }
4740
4741 /* Handle an internal node. */
4742 lkp = xfs_btree_key_addr(cur, cur->bc_ptrs[level], block);
4743 hkp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level], block);
4744 pp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[level], block);
4745
4746 ldiff = cur->bc_ops->diff_two_keys(cur, hkp, low_key);
4747 hdiff = cur->bc_ops->diff_two_keys(cur, high_key, lkp);
4748
4749 /*
4750 * If (pointer's high key >= query's low key) and
4751 * (query's high key >= pointer's low key), then
4752 * this record overlaps the query range; follow pointer.
4753 */
4754 if (ldiff >= 0 && hdiff >= 0) {
4755 level--;
4756 error = xfs_btree_lookup_get_block(cur, level, pp,
4757 &block);
4758 if (error)
4759 goto out;
4760 xfs_btree_get_block(cur, level, &bp);
4761 trace_xfs_btree_overlapped_query_range(cur, level, bp);
4762 #ifdef DEBUG
4763 error = xfs_btree_check_block(cur, block, level, bp);
4764 if (error)
4765 goto out;
4766 #endif
4767 cur->bc_ptrs[level] = 1;
4768 continue;
4769 } else if (hdiff < 0) {
4770 /* The low key is larger than the upper range; pop. */
4771 goto pop_up;
4772 }
4773 cur->bc_ptrs[level]++;
4774 }
4775
4776 out:
4777 /*
4778 * If we don't end this function with the cursor pointing at a record
4779 * block, a subsequent non-error cursor deletion will not release
4780 * node-level buffers, causing a buffer leak. This is quite possible
4781 * with a zero-results range query, so release the buffers if we
4782 * failed to return any results.
4783 */
4784 if (cur->bc_bufs[0] == NULL) {
4785 for (i = 0; i < cur->bc_nlevels; i++) {
4786 if (cur->bc_bufs[i]) {
4787 xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]);
4788 cur->bc_bufs[i] = NULL;
4789 cur->bc_ptrs[i] = 0;
4790 cur->bc_ra[i] = 0;
4791 }
4792 }
4793 }
4794
4795 return error;
4796 }
4797
4798 /*
4799 * Query a btree for all records overlapping a given interval of keys. The
4800 * supplied function will be called with each record found; return one of the
4801 * XFS_BTREE_QUERY_RANGE_{CONTINUE,ABORT} values or the usual negative error
4802 * code. This function returns XFS_BTREE_QUERY_RANGE_ABORT, zero, or a
4803 * negative error code.
4804 */
4805 int
xfs_btree_query_range(struct xfs_btree_cur * cur,union xfs_btree_irec * low_rec,union xfs_btree_irec * high_rec,xfs_btree_query_range_fn fn,void * priv)4806 xfs_btree_query_range(
4807 struct xfs_btree_cur *cur,
4808 union xfs_btree_irec *low_rec,
4809 union xfs_btree_irec *high_rec,
4810 xfs_btree_query_range_fn fn,
4811 void *priv)
4812 {
4813 union xfs_btree_rec rec;
4814 union xfs_btree_key low_key;
4815 union xfs_btree_key high_key;
4816
4817 /* Find the keys of both ends of the interval. */
4818 cur->bc_rec = *high_rec;
4819 cur->bc_ops->init_rec_from_cur(cur, &rec);
4820 cur->bc_ops->init_key_from_rec(&high_key, &rec);
4821
4822 cur->bc_rec = *low_rec;
4823 cur->bc_ops->init_rec_from_cur(cur, &rec);
4824 cur->bc_ops->init_key_from_rec(&low_key, &rec);
4825
4826 /* Enforce low key < high key. */
4827 if (cur->bc_ops->diff_two_keys(cur, &low_key, &high_key) > 0)
4828 return -EINVAL;
4829
4830 if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
4831 return xfs_btree_simple_query_range(cur, &low_key,
4832 &high_key, fn, priv);
4833 return xfs_btree_overlapped_query_range(cur, &low_key, &high_key,
4834 fn, priv);
4835 }
4836
4837 /*
4838 * Calculate the number of blocks needed to store a given number of records
4839 * in a short-format (per-AG metadata) btree.
4840 */
4841 xfs_extlen_t
xfs_btree_calc_size(struct xfs_mount * mp,uint * limits,unsigned long long len)4842 xfs_btree_calc_size(
4843 struct xfs_mount *mp,
4844 uint *limits,
4845 unsigned long long len)
4846 {
4847 int level;
4848 int maxrecs;
4849 xfs_extlen_t rval;
4850
4851 maxrecs = limits[0];
4852 for (level = 0, rval = 0; len > 1; level++) {
4853 len += maxrecs - 1;
4854 do_div(len, maxrecs);
4855 maxrecs = limits[1];
4856 rval += len;
4857 }
4858 return rval;
4859 }
4860
4861 static int
xfs_btree_count_blocks_helper(struct xfs_btree_cur * cur,int level,void * data)4862 xfs_btree_count_blocks_helper(
4863 struct xfs_btree_cur *cur,
4864 int level,
4865 void *data)
4866 {
4867 xfs_extlen_t *blocks = data;
4868 (*blocks)++;
4869
4870 return 0;
4871 }
4872
4873 /* Count the blocks in a btree and return the result in *blocks. */
4874 int
xfs_btree_count_blocks(struct xfs_btree_cur * cur,xfs_extlen_t * blocks)4875 xfs_btree_count_blocks(
4876 struct xfs_btree_cur *cur,
4877 xfs_extlen_t *blocks)
4878 {
4879 *blocks = 0;
4880 return xfs_btree_visit_blocks(cur, xfs_btree_count_blocks_helper,
4881 blocks);
4882 }
4883