• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_types.h"
21 #include "xfs_bit.h"
22 #include "xfs_log.h"
23 #include "xfs_inum.h"
24 #include "xfs_trans.h"
25 #include "xfs_sb.h"
26 #include "xfs_ag.h"
27 #include "xfs_dir2.h"
28 #include "xfs_dmapi.h"
29 #include "xfs_mount.h"
30 #include "xfs_bmap_btree.h"
31 #include "xfs_alloc_btree.h"
32 #include "xfs_ialloc_btree.h"
33 #include "xfs_dir2_sf.h"
34 #include "xfs_attr_sf.h"
35 #include "xfs_dinode.h"
36 #include "xfs_inode.h"
37 #include "xfs_inode_item.h"
38 #include "xfs_btree.h"
39 #include "xfs_btree_trace.h"
40 #include "xfs_ialloc.h"
41 #include "xfs_error.h"
42 
43 /*
44  * Cursor allocation zone.
45  */
46 kmem_zone_t	*xfs_btree_cur_zone;
47 
48 /*
49  * Btree magic numbers.
50  */
51 const __uint32_t xfs_magics[XFS_BTNUM_MAX] = {
52 	XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC
53 };
54 
55 
56 STATIC int				/* error (0 or EFSCORRUPTED) */
xfs_btree_check_lblock(struct xfs_btree_cur * cur,struct xfs_btree_block * block,int level,struct xfs_buf * bp)57 xfs_btree_check_lblock(
58 	struct xfs_btree_cur	*cur,	/* btree cursor */
59 	struct xfs_btree_block	*block,	/* btree long form block pointer */
60 	int			level,	/* level of the btree block */
61 	struct xfs_buf		*bp)	/* buffer for block, if any */
62 {
63 	int			lblock_ok; /* block passes checks */
64 	struct xfs_mount	*mp;	/* file system mount point */
65 
66 	mp = cur->bc_mp;
67 	lblock_ok =
68 		be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
69 		be16_to_cpu(block->bb_level) == level &&
70 		be16_to_cpu(block->bb_numrecs) <=
71 			cur->bc_ops->get_maxrecs(cur, level) &&
72 		block->bb_u.l.bb_leftsib &&
73 		(be64_to_cpu(block->bb_u.l.bb_leftsib) == NULLDFSBNO ||
74 		 XFS_FSB_SANITY_CHECK(mp,
75 		 	be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
76 		block->bb_u.l.bb_rightsib &&
77 		(be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO ||
78 		 XFS_FSB_SANITY_CHECK(mp,
79 		 	be64_to_cpu(block->bb_u.l.bb_rightsib)));
80 	if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
81 			XFS_ERRTAG_BTREE_CHECK_LBLOCK,
82 			XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
83 		if (bp)
84 			xfs_buftrace("LBTREE ERROR", bp);
85 		XFS_ERROR_REPORT("xfs_btree_check_lblock", XFS_ERRLEVEL_LOW,
86 				 mp);
87 		return XFS_ERROR(EFSCORRUPTED);
88 	}
89 	return 0;
90 }
91 
92 STATIC int				/* error (0 or EFSCORRUPTED) */
xfs_btree_check_sblock(struct xfs_btree_cur * cur,struct xfs_btree_block * block,int level,struct xfs_buf * bp)93 xfs_btree_check_sblock(
94 	struct xfs_btree_cur	*cur,	/* btree cursor */
95 	struct xfs_btree_block	*block,	/* btree short form block pointer */
96 	int			level,	/* level of the btree block */
97 	struct xfs_buf		*bp)	/* buffer containing block */
98 {
99 	struct xfs_buf		*agbp;	/* buffer for ag. freespace struct */
100 	struct xfs_agf		*agf;	/* ag. freespace structure */
101 	xfs_agblock_t		agflen;	/* native ag. freespace length */
102 	int			sblock_ok; /* block passes checks */
103 
104 	agbp = cur->bc_private.a.agbp;
105 	agf = XFS_BUF_TO_AGF(agbp);
106 	agflen = be32_to_cpu(agf->agf_length);
107 	sblock_ok =
108 		be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
109 		be16_to_cpu(block->bb_level) == level &&
110 		be16_to_cpu(block->bb_numrecs) <=
111 			cur->bc_ops->get_maxrecs(cur, level) &&
112 		(be32_to_cpu(block->bb_u.s.bb_leftsib) == NULLAGBLOCK ||
113 		 be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) &&
114 		block->bb_u.s.bb_leftsib &&
115 		(be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK ||
116 		 be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) &&
117 		block->bb_u.s.bb_rightsib;
118 	if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp,
119 			XFS_ERRTAG_BTREE_CHECK_SBLOCK,
120 			XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
121 		if (bp)
122 			xfs_buftrace("SBTREE ERROR", bp);
123 		XFS_ERROR_REPORT("xfs_btree_check_sblock", XFS_ERRLEVEL_LOW,
124 				 cur->bc_mp);
125 		return XFS_ERROR(EFSCORRUPTED);
126 	}
127 	return 0;
128 }
129 
130 /*
131  * Debug routine: check that block header is ok.
132  */
133 int
xfs_btree_check_block(struct xfs_btree_cur * cur,struct xfs_btree_block * block,int level,struct xfs_buf * bp)134 xfs_btree_check_block(
135 	struct xfs_btree_cur	*cur,	/* btree cursor */
136 	struct xfs_btree_block	*block,	/* generic btree block pointer */
137 	int			level,	/* level of the btree block */
138 	struct xfs_buf		*bp)	/* buffer containing block, if any */
139 {
140 	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
141 		return xfs_btree_check_lblock(cur, block, level, bp);
142 	else
143 		return xfs_btree_check_sblock(cur, block, level, bp);
144 }
145 
146 /*
147  * Check that (long) pointer is ok.
148  */
149 int					/* error (0 or EFSCORRUPTED) */
xfs_btree_check_lptr(struct xfs_btree_cur * cur,xfs_dfsbno_t bno,int level)150 xfs_btree_check_lptr(
151 	struct xfs_btree_cur	*cur,	/* btree cursor */
152 	xfs_dfsbno_t		bno,	/* btree block disk address */
153 	int			level)	/* btree block level */
154 {
155 	XFS_WANT_CORRUPTED_RETURN(
156 		level > 0 &&
157 		bno != NULLDFSBNO &&
158 		XFS_FSB_SANITY_CHECK(cur->bc_mp, bno));
159 	return 0;
160 }
161 
162 #ifdef DEBUG
163 /*
164  * Check that (short) pointer is ok.
165  */
166 STATIC int				/* error (0 or EFSCORRUPTED) */
xfs_btree_check_sptr(struct xfs_btree_cur * cur,xfs_agblock_t bno,int level)167 xfs_btree_check_sptr(
168 	struct xfs_btree_cur	*cur,	/* btree cursor */
169 	xfs_agblock_t		bno,	/* btree block disk address */
170 	int			level)	/* btree block level */
171 {
172 	xfs_agblock_t		agblocks = cur->bc_mp->m_sb.sb_agblocks;
173 
174 	XFS_WANT_CORRUPTED_RETURN(
175 		level > 0 &&
176 		bno != NULLAGBLOCK &&
177 		bno != 0 &&
178 		bno < agblocks);
179 	return 0;
180 }
181 
182 /*
183  * Check that block ptr is ok.
184  */
185 STATIC int				/* error (0 or EFSCORRUPTED) */
xfs_btree_check_ptr(struct xfs_btree_cur * cur,union xfs_btree_ptr * ptr,int index,int level)186 xfs_btree_check_ptr(
187 	struct xfs_btree_cur	*cur,	/* btree cursor */
188 	union xfs_btree_ptr	*ptr,	/* btree block disk address */
189 	int			index,	/* offset from ptr to check */
190 	int			level)	/* btree block level */
191 {
192 	if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
193 		return xfs_btree_check_lptr(cur,
194 				be64_to_cpu((&ptr->l)[index]), level);
195 	} else {
196 		return xfs_btree_check_sptr(cur,
197 				be32_to_cpu((&ptr->s)[index]), level);
198 	}
199 }
200 #endif
201 
202 /*
203  * Delete the btree cursor.
204  */
205 void
xfs_btree_del_cursor(xfs_btree_cur_t * cur,int error)206 xfs_btree_del_cursor(
207 	xfs_btree_cur_t	*cur,		/* btree cursor */
208 	int		error)		/* del because of error */
209 {
210 	int		i;		/* btree level */
211 
212 	/*
213 	 * Clear the buffer pointers, and release the buffers.
214 	 * If we're doing this in the face of an error, we
215 	 * need to make sure to inspect all of the entries
216 	 * in the bc_bufs array for buffers to be unlocked.
217 	 * This is because some of the btree code works from
218 	 * level n down to 0, and if we get an error along
219 	 * the way we won't have initialized all the entries
220 	 * down to 0.
221 	 */
222 	for (i = 0; i < cur->bc_nlevels; i++) {
223 		if (cur->bc_bufs[i])
224 			xfs_btree_setbuf(cur, i, NULL);
225 		else if (!error)
226 			break;
227 	}
228 	/*
229 	 * Can't free a bmap cursor without having dealt with the
230 	 * allocated indirect blocks' accounting.
231 	 */
232 	ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP ||
233 	       cur->bc_private.b.allocated == 0);
234 	/*
235 	 * Free the cursor.
236 	 */
237 	kmem_zone_free(xfs_btree_cur_zone, cur);
238 }
239 
240 /*
241  * Duplicate the btree cursor.
242  * Allocate a new one, copy the record, re-get the buffers.
243  */
244 int					/* error */
xfs_btree_dup_cursor(xfs_btree_cur_t * cur,xfs_btree_cur_t ** ncur)245 xfs_btree_dup_cursor(
246 	xfs_btree_cur_t	*cur,		/* input cursor */
247 	xfs_btree_cur_t	**ncur)		/* output cursor */
248 {
249 	xfs_buf_t	*bp;		/* btree block's buffer pointer */
250 	int		error;		/* error return value */
251 	int		i;		/* level number of btree block */
252 	xfs_mount_t	*mp;		/* mount structure for filesystem */
253 	xfs_btree_cur_t	*new;		/* new cursor value */
254 	xfs_trans_t	*tp;		/* transaction pointer, can be NULL */
255 
256 	tp = cur->bc_tp;
257 	mp = cur->bc_mp;
258 
259 	/*
260 	 * Allocate a new cursor like the old one.
261 	 */
262 	new = cur->bc_ops->dup_cursor(cur);
263 
264 	/*
265 	 * Copy the record currently in the cursor.
266 	 */
267 	new->bc_rec = cur->bc_rec;
268 
269 	/*
270 	 * For each level current, re-get the buffer and copy the ptr value.
271 	 */
272 	for (i = 0; i < new->bc_nlevels; i++) {
273 		new->bc_ptrs[i] = cur->bc_ptrs[i];
274 		new->bc_ra[i] = cur->bc_ra[i];
275 		if ((bp = cur->bc_bufs[i])) {
276 			if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
277 				XFS_BUF_ADDR(bp), mp->m_bsize, 0, &bp))) {
278 				xfs_btree_del_cursor(new, error);
279 				*ncur = NULL;
280 				return error;
281 			}
282 			new->bc_bufs[i] = bp;
283 			ASSERT(bp);
284 			ASSERT(!XFS_BUF_GETERROR(bp));
285 		} else
286 			new->bc_bufs[i] = NULL;
287 	}
288 	*ncur = new;
289 	return 0;
290 }
291 
292 /*
293  * XFS btree block layout and addressing:
294  *
295  * There are two types of blocks in the btree: leaf and non-leaf blocks.
296  *
297  * The leaf record start with a header then followed by records containing
298  * the values.  A non-leaf block also starts with the same header, and
299  * then first contains lookup keys followed by an equal number of pointers
300  * to the btree blocks at the previous level.
301  *
302  *		+--------+-------+-------+-------+-------+-------+-------+
303  * Leaf:	| header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N |
304  *		+--------+-------+-------+-------+-------+-------+-------+
305  *
306  *		+--------+-------+-------+-------+-------+-------+-------+
307  * Non-Leaf:	| header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N |
308  *		+--------+-------+-------+-------+-------+-------+-------+
309  *
310  * The header is called struct xfs_btree_block for reasons better left unknown
311  * and comes in different versions for short (32bit) and long (64bit) block
312  * pointers.  The record and key structures are defined by the btree instances
313  * and opaque to the btree core.  The block pointers are simple disk endian
314  * integers, available in a short (32bit) and long (64bit) variant.
315  *
316  * The helpers below calculate the offset of a given record, key or pointer
317  * into a btree block (xfs_btree_*_offset) or return a pointer to the given
318  * record, key or pointer (xfs_btree_*_addr).  Note that all addressing
319  * inside the btree block is done using indices starting at one, not zero!
320  */
321 
322 /*
323  * Return size of the btree block header for this btree instance.
324  */
xfs_btree_block_len(struct xfs_btree_cur * cur)325 static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
326 {
327 	return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
328 		XFS_BTREE_LBLOCK_LEN :
329 		XFS_BTREE_SBLOCK_LEN;
330 }
331 
332 /*
333  * Return size of btree block pointers for this btree instance.
334  */
xfs_btree_ptr_len(struct xfs_btree_cur * cur)335 static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur)
336 {
337 	return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
338 		sizeof(__be64) : sizeof(__be32);
339 }
340 
341 /*
342  * Calculate offset of the n-th record in a btree block.
343  */
344 STATIC size_t
xfs_btree_rec_offset(struct xfs_btree_cur * cur,int n)345 xfs_btree_rec_offset(
346 	struct xfs_btree_cur	*cur,
347 	int			n)
348 {
349 	return xfs_btree_block_len(cur) +
350 		(n - 1) * cur->bc_ops->rec_len;
351 }
352 
353 /*
354  * Calculate offset of the n-th key in a btree block.
355  */
356 STATIC size_t
xfs_btree_key_offset(struct xfs_btree_cur * cur,int n)357 xfs_btree_key_offset(
358 	struct xfs_btree_cur	*cur,
359 	int			n)
360 {
361 	return xfs_btree_block_len(cur) +
362 		(n - 1) * cur->bc_ops->key_len;
363 }
364 
365 /*
366  * Calculate offset of the n-th block pointer in a btree block.
367  */
368 STATIC size_t
xfs_btree_ptr_offset(struct xfs_btree_cur * cur,int n,int level)369 xfs_btree_ptr_offset(
370 	struct xfs_btree_cur	*cur,
371 	int			n,
372 	int			level)
373 {
374 	return xfs_btree_block_len(cur) +
375 		cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len +
376 		(n - 1) * xfs_btree_ptr_len(cur);
377 }
378 
379 /*
380  * Return a pointer to the n-th record in the btree block.
381  */
382 STATIC union xfs_btree_rec *
xfs_btree_rec_addr(struct xfs_btree_cur * cur,int n,struct xfs_btree_block * block)383 xfs_btree_rec_addr(
384 	struct xfs_btree_cur	*cur,
385 	int			n,
386 	struct xfs_btree_block	*block)
387 {
388 	return (union xfs_btree_rec *)
389 		((char *)block + xfs_btree_rec_offset(cur, n));
390 }
391 
392 /*
393  * Return a pointer to the n-th key in the btree block.
394  */
395 STATIC union xfs_btree_key *
xfs_btree_key_addr(struct xfs_btree_cur * cur,int n,struct xfs_btree_block * block)396 xfs_btree_key_addr(
397 	struct xfs_btree_cur	*cur,
398 	int			n,
399 	struct xfs_btree_block	*block)
400 {
401 	return (union xfs_btree_key *)
402 		((char *)block + xfs_btree_key_offset(cur, n));
403 }
404 
405 /*
406  * Return a pointer to the n-th block pointer in the btree block.
407  */
408 STATIC union xfs_btree_ptr *
xfs_btree_ptr_addr(struct xfs_btree_cur * cur,int n,struct xfs_btree_block * block)409 xfs_btree_ptr_addr(
410 	struct xfs_btree_cur	*cur,
411 	int			n,
412 	struct xfs_btree_block	*block)
413 {
414 	int			level = xfs_btree_get_level(block);
415 
416 	ASSERT(block->bb_level != 0);
417 
418 	return (union xfs_btree_ptr *)
419 		((char *)block + xfs_btree_ptr_offset(cur, n, level));
420 }
421 
422 /*
423  * Get a the root block which is stored in the inode.
424  *
425  * For now this btree implementation assumes the btree root is always
426  * stored in the if_broot field of an inode fork.
427  */
428 STATIC struct xfs_btree_block *
xfs_btree_get_iroot(struct xfs_btree_cur * cur)429 xfs_btree_get_iroot(
430        struct xfs_btree_cur    *cur)
431 {
432        struct xfs_ifork        *ifp;
433 
434        ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork);
435        return (struct xfs_btree_block *)ifp->if_broot;
436 }
437 
438 /*
439  * Retrieve the block pointer from the cursor at the given level.
440  * This may be an inode btree root or from a buffer.
441  */
442 STATIC struct xfs_btree_block *		/* generic btree block pointer */
xfs_btree_get_block(struct xfs_btree_cur * cur,int level,struct xfs_buf ** bpp)443 xfs_btree_get_block(
444 	struct xfs_btree_cur	*cur,	/* btree cursor */
445 	int			level,	/* level in btree */
446 	struct xfs_buf		**bpp)	/* buffer containing the block */
447 {
448 	if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
449 	    (level == cur->bc_nlevels - 1)) {
450 		*bpp = NULL;
451 		return xfs_btree_get_iroot(cur);
452 	}
453 
454 	*bpp = cur->bc_bufs[level];
455 	return XFS_BUF_TO_BLOCK(*bpp);
456 }
457 
458 /*
459  * Get a buffer for the block, return it with no data read.
460  * Long-form addressing.
461  */
462 xfs_buf_t *				/* buffer for fsbno */
xfs_btree_get_bufl(xfs_mount_t * mp,xfs_trans_t * tp,xfs_fsblock_t fsbno,uint lock)463 xfs_btree_get_bufl(
464 	xfs_mount_t	*mp,		/* file system mount point */
465 	xfs_trans_t	*tp,		/* transaction pointer */
466 	xfs_fsblock_t	fsbno,		/* file system block number */
467 	uint		lock)		/* lock flags for get_buf */
468 {
469 	xfs_buf_t	*bp;		/* buffer pointer (return value) */
470 	xfs_daddr_t		d;		/* real disk block address */
471 
472 	ASSERT(fsbno != NULLFSBLOCK);
473 	d = XFS_FSB_TO_DADDR(mp, fsbno);
474 	bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
475 	ASSERT(bp);
476 	ASSERT(!XFS_BUF_GETERROR(bp));
477 	return bp;
478 }
479 
480 /*
481  * Get a buffer for the block, return it with no data read.
482  * Short-form addressing.
483  */
484 xfs_buf_t *				/* buffer for agno/agbno */
xfs_btree_get_bufs(xfs_mount_t * mp,xfs_trans_t * tp,xfs_agnumber_t agno,xfs_agblock_t agbno,uint lock)485 xfs_btree_get_bufs(
486 	xfs_mount_t	*mp,		/* file system mount point */
487 	xfs_trans_t	*tp,		/* transaction pointer */
488 	xfs_agnumber_t	agno,		/* allocation group number */
489 	xfs_agblock_t	agbno,		/* allocation group block number */
490 	uint		lock)		/* lock flags for get_buf */
491 {
492 	xfs_buf_t	*bp;		/* buffer pointer (return value) */
493 	xfs_daddr_t		d;		/* real disk block address */
494 
495 	ASSERT(agno != NULLAGNUMBER);
496 	ASSERT(agbno != NULLAGBLOCK);
497 	d = XFS_AGB_TO_DADDR(mp, agno, agbno);
498 	bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
499 	ASSERT(bp);
500 	ASSERT(!XFS_BUF_GETERROR(bp));
501 	return bp;
502 }
503 
504 /*
505  * Check for the cursor referring to the last block at the given level.
506  */
507 int					/* 1=is last block, 0=not last block */
xfs_btree_islastblock(xfs_btree_cur_t * cur,int level)508 xfs_btree_islastblock(
509 	xfs_btree_cur_t		*cur,	/* btree cursor */
510 	int			level)	/* level to check */
511 {
512 	struct xfs_btree_block	*block;	/* generic btree block pointer */
513 	xfs_buf_t		*bp;	/* buffer containing block */
514 
515 	block = xfs_btree_get_block(cur, level, &bp);
516 	xfs_btree_check_block(cur, block, level, bp);
517 	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
518 		return be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO;
519 	else
520 		return be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK;
521 }
522 
523 /*
524  * Change the cursor to point to the first record at the given level.
525  * Other levels are unaffected.
526  */
527 STATIC int				/* success=1, failure=0 */
xfs_btree_firstrec(xfs_btree_cur_t * cur,int level)528 xfs_btree_firstrec(
529 	xfs_btree_cur_t		*cur,	/* btree cursor */
530 	int			level)	/* level to change */
531 {
532 	struct xfs_btree_block	*block;	/* generic btree block pointer */
533 	xfs_buf_t		*bp;	/* buffer containing block */
534 
535 	/*
536 	 * Get the block pointer for this level.
537 	 */
538 	block = xfs_btree_get_block(cur, level, &bp);
539 	xfs_btree_check_block(cur, block, level, bp);
540 	/*
541 	 * It's empty, there is no such record.
542 	 */
543 	if (!block->bb_numrecs)
544 		return 0;
545 	/*
546 	 * Set the ptr value to 1, that's the first record/key.
547 	 */
548 	cur->bc_ptrs[level] = 1;
549 	return 1;
550 }
551 
552 /*
553  * Change the cursor to point to the last record in the current block
554  * at the given level.  Other levels are unaffected.
555  */
556 STATIC int				/* success=1, failure=0 */
xfs_btree_lastrec(xfs_btree_cur_t * cur,int level)557 xfs_btree_lastrec(
558 	xfs_btree_cur_t		*cur,	/* btree cursor */
559 	int			level)	/* level to change */
560 {
561 	struct xfs_btree_block	*block;	/* generic btree block pointer */
562 	xfs_buf_t		*bp;	/* buffer containing block */
563 
564 	/*
565 	 * Get the block pointer for this level.
566 	 */
567 	block = xfs_btree_get_block(cur, level, &bp);
568 	xfs_btree_check_block(cur, block, level, bp);
569 	/*
570 	 * It's empty, there is no such record.
571 	 */
572 	if (!block->bb_numrecs)
573 		return 0;
574 	/*
575 	 * Set the ptr value to numrecs, that's the last record/key.
576 	 */
577 	cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs);
578 	return 1;
579 }
580 
581 /*
582  * Compute first and last byte offsets for the fields given.
583  * Interprets the offsets table, which contains struct field offsets.
584  */
585 void
xfs_btree_offsets(__int64_t fields,const short * offsets,int nbits,int * first,int * last)586 xfs_btree_offsets(
587 	__int64_t	fields,		/* bitmask of fields */
588 	const short	*offsets,	/* table of field offsets */
589 	int		nbits,		/* number of bits to inspect */
590 	int		*first,		/* output: first byte offset */
591 	int		*last)		/* output: last byte offset */
592 {
593 	int		i;		/* current bit number */
594 	__int64_t	imask;		/* mask for current bit number */
595 
596 	ASSERT(fields != 0);
597 	/*
598 	 * Find the lowest bit, so the first byte offset.
599 	 */
600 	for (i = 0, imask = 1LL; ; i++, imask <<= 1) {
601 		if (imask & fields) {
602 			*first = offsets[i];
603 			break;
604 		}
605 	}
606 	/*
607 	 * Find the highest bit, so the last byte offset.
608 	 */
609 	for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) {
610 		if (imask & fields) {
611 			*last = offsets[i + 1] - 1;
612 			break;
613 		}
614 	}
615 }
616 
617 /*
618  * Get a buffer for the block, return it read in.
619  * Long-form addressing.
620  */
621 int					/* error */
xfs_btree_read_bufl(xfs_mount_t * mp,xfs_trans_t * tp,xfs_fsblock_t fsbno,uint lock,xfs_buf_t ** bpp,int refval)622 xfs_btree_read_bufl(
623 	xfs_mount_t	*mp,		/* file system mount point */
624 	xfs_trans_t	*tp,		/* transaction pointer */
625 	xfs_fsblock_t	fsbno,		/* file system block number */
626 	uint		lock,		/* lock flags for read_buf */
627 	xfs_buf_t	**bpp,		/* buffer for fsbno */
628 	int		refval)		/* ref count value for buffer */
629 {
630 	xfs_buf_t	*bp;		/* return value */
631 	xfs_daddr_t		d;		/* real disk block address */
632 	int		error;
633 
634 	ASSERT(fsbno != NULLFSBLOCK);
635 	d = XFS_FSB_TO_DADDR(mp, fsbno);
636 	if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
637 			mp->m_bsize, lock, &bp))) {
638 		return error;
639 	}
640 	ASSERT(!bp || !XFS_BUF_GETERROR(bp));
641 	if (bp != NULL) {
642 		XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
643 	}
644 	*bpp = bp;
645 	return 0;
646 }
647 
648 /*
649  * Get a buffer for the block, return it read in.
650  * Short-form addressing.
651  */
652 int					/* error */
xfs_btree_read_bufs(xfs_mount_t * mp,xfs_trans_t * tp,xfs_agnumber_t agno,xfs_agblock_t agbno,uint lock,xfs_buf_t ** bpp,int refval)653 xfs_btree_read_bufs(
654 	xfs_mount_t	*mp,		/* file system mount point */
655 	xfs_trans_t	*tp,		/* transaction pointer */
656 	xfs_agnumber_t	agno,		/* allocation group number */
657 	xfs_agblock_t	agbno,		/* allocation group block number */
658 	uint		lock,		/* lock flags for read_buf */
659 	xfs_buf_t	**bpp,		/* buffer for agno/agbno */
660 	int		refval)		/* ref count value for buffer */
661 {
662 	xfs_buf_t	*bp;		/* return value */
663 	xfs_daddr_t	d;		/* real disk block address */
664 	int		error;
665 
666 	ASSERT(agno != NULLAGNUMBER);
667 	ASSERT(agbno != NULLAGBLOCK);
668 	d = XFS_AGB_TO_DADDR(mp, agno, agbno);
669 	if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
670 					mp->m_bsize, lock, &bp))) {
671 		return error;
672 	}
673 	ASSERT(!bp || !XFS_BUF_GETERROR(bp));
674 	if (bp != NULL) {
675 		switch (refval) {
676 		case XFS_ALLOC_BTREE_REF:
677 			XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
678 			break;
679 		case XFS_INO_BTREE_REF:
680 			XFS_BUF_SET_VTYPE_REF(bp, B_FS_INOMAP, refval);
681 			break;
682 		}
683 	}
684 	*bpp = bp;
685 	return 0;
686 }
687 
688 /*
689  * Read-ahead the block, don't wait for it, don't return a buffer.
690  * Long-form addressing.
691  */
692 /* ARGSUSED */
693 void
xfs_btree_reada_bufl(xfs_mount_t * mp,xfs_fsblock_t fsbno,xfs_extlen_t count)694 xfs_btree_reada_bufl(
695 	xfs_mount_t	*mp,		/* file system mount point */
696 	xfs_fsblock_t	fsbno,		/* file system block number */
697 	xfs_extlen_t	count)		/* count of filesystem blocks */
698 {
699 	xfs_daddr_t		d;
700 
701 	ASSERT(fsbno != NULLFSBLOCK);
702 	d = XFS_FSB_TO_DADDR(mp, fsbno);
703 	xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count);
704 }
705 
706 /*
707  * Read-ahead the block, don't wait for it, don't return a buffer.
708  * Short-form addressing.
709  */
710 /* ARGSUSED */
711 void
xfs_btree_reada_bufs(xfs_mount_t * mp,xfs_agnumber_t agno,xfs_agblock_t agbno,xfs_extlen_t count)712 xfs_btree_reada_bufs(
713 	xfs_mount_t	*mp,		/* file system mount point */
714 	xfs_agnumber_t	agno,		/* allocation group number */
715 	xfs_agblock_t	agbno,		/* allocation group block number */
716 	xfs_extlen_t	count)		/* count of filesystem blocks */
717 {
718 	xfs_daddr_t		d;
719 
720 	ASSERT(agno != NULLAGNUMBER);
721 	ASSERT(agbno != NULLAGBLOCK);
722 	d = XFS_AGB_TO_DADDR(mp, agno, agbno);
723 	xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count);
724 }
725 
726 STATIC int
xfs_btree_readahead_lblock(struct xfs_btree_cur * cur,int lr,struct xfs_btree_block * block)727 xfs_btree_readahead_lblock(
728 	struct xfs_btree_cur	*cur,
729 	int			lr,
730 	struct xfs_btree_block	*block)
731 {
732 	int			rval = 0;
733 	xfs_dfsbno_t		left = be64_to_cpu(block->bb_u.l.bb_leftsib);
734 	xfs_dfsbno_t		right = be64_to_cpu(block->bb_u.l.bb_rightsib);
735 
736 	if ((lr & XFS_BTCUR_LEFTRA) && left != NULLDFSBNO) {
737 		xfs_btree_reada_bufl(cur->bc_mp, left, 1);
738 		rval++;
739 	}
740 
741 	if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLDFSBNO) {
742 		xfs_btree_reada_bufl(cur->bc_mp, right, 1);
743 		rval++;
744 	}
745 
746 	return rval;
747 }
748 
749 STATIC int
xfs_btree_readahead_sblock(struct xfs_btree_cur * cur,int lr,struct xfs_btree_block * block)750 xfs_btree_readahead_sblock(
751 	struct xfs_btree_cur	*cur,
752 	int			lr,
753 	struct xfs_btree_block *block)
754 {
755 	int			rval = 0;
756 	xfs_agblock_t		left = be32_to_cpu(block->bb_u.s.bb_leftsib);
757 	xfs_agblock_t		right = be32_to_cpu(block->bb_u.s.bb_rightsib);
758 
759 
760 	if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
761 		xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
762 				     left, 1);
763 		rval++;
764 	}
765 
766 	if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
767 		xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
768 				     right, 1);
769 		rval++;
770 	}
771 
772 	return rval;
773 }
774 
775 /*
776  * Read-ahead btree blocks, at the given level.
777  * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
778  */
779 STATIC int
xfs_btree_readahead(struct xfs_btree_cur * cur,int lev,int lr)780 xfs_btree_readahead(
781 	struct xfs_btree_cur	*cur,		/* btree cursor */
782 	int			lev,		/* level in btree */
783 	int			lr)		/* left/right bits */
784 {
785 	struct xfs_btree_block	*block;
786 
787 	/*
788 	 * No readahead needed if we are at the root level and the
789 	 * btree root is stored in the inode.
790 	 */
791 	if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
792 	    (lev == cur->bc_nlevels - 1))
793 		return 0;
794 
795 	if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev])
796 		return 0;
797 
798 	cur->bc_ra[lev] |= lr;
799 	block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]);
800 
801 	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
802 		return xfs_btree_readahead_lblock(cur, lr, block);
803 	return xfs_btree_readahead_sblock(cur, lr, block);
804 }
805 
806 /*
807  * Set the buffer for level "lev" in the cursor to bp, releasing
808  * any previous buffer.
809  */
810 void
xfs_btree_setbuf(xfs_btree_cur_t * cur,int lev,xfs_buf_t * bp)811 xfs_btree_setbuf(
812 	xfs_btree_cur_t		*cur,	/* btree cursor */
813 	int			lev,	/* level in btree */
814 	xfs_buf_t		*bp)	/* new buffer to set */
815 {
816 	struct xfs_btree_block	*b;	/* btree block */
817 	xfs_buf_t		*obp;	/* old buffer pointer */
818 
819 	obp = cur->bc_bufs[lev];
820 	if (obp)
821 		xfs_trans_brelse(cur->bc_tp, obp);
822 	cur->bc_bufs[lev] = bp;
823 	cur->bc_ra[lev] = 0;
824 	if (!bp)
825 		return;
826 	b = XFS_BUF_TO_BLOCK(bp);
827 	if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
828 		if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO)
829 			cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
830 		if (be64_to_cpu(b->bb_u.l.bb_rightsib) == NULLDFSBNO)
831 			cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
832 	} else {
833 		if (be32_to_cpu(b->bb_u.s.bb_leftsib) == NULLAGBLOCK)
834 			cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
835 		if (be32_to_cpu(b->bb_u.s.bb_rightsib) == NULLAGBLOCK)
836 			cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
837 	}
838 }
839 
840 STATIC int
xfs_btree_ptr_is_null(struct xfs_btree_cur * cur,union xfs_btree_ptr * ptr)841 xfs_btree_ptr_is_null(
842 	struct xfs_btree_cur	*cur,
843 	union xfs_btree_ptr	*ptr)
844 {
845 	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
846 		return be64_to_cpu(ptr->l) == NULLDFSBNO;
847 	else
848 		return be32_to_cpu(ptr->s) == NULLAGBLOCK;
849 }
850 
851 STATIC void
xfs_btree_set_ptr_null(struct xfs_btree_cur * cur,union xfs_btree_ptr * ptr)852 xfs_btree_set_ptr_null(
853 	struct xfs_btree_cur	*cur,
854 	union xfs_btree_ptr	*ptr)
855 {
856 	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
857 		ptr->l = cpu_to_be64(NULLDFSBNO);
858 	else
859 		ptr->s = cpu_to_be32(NULLAGBLOCK);
860 }
861 
862 /*
863  * Get/set/init sibling pointers
864  */
865 STATIC void
xfs_btree_get_sibling(struct xfs_btree_cur * cur,struct xfs_btree_block * block,union xfs_btree_ptr * ptr,int lr)866 xfs_btree_get_sibling(
867 	struct xfs_btree_cur	*cur,
868 	struct xfs_btree_block	*block,
869 	union xfs_btree_ptr	*ptr,
870 	int			lr)
871 {
872 	ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
873 
874 	if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
875 		if (lr == XFS_BB_RIGHTSIB)
876 			ptr->l = block->bb_u.l.bb_rightsib;
877 		else
878 			ptr->l = block->bb_u.l.bb_leftsib;
879 	} else {
880 		if (lr == XFS_BB_RIGHTSIB)
881 			ptr->s = block->bb_u.s.bb_rightsib;
882 		else
883 			ptr->s = block->bb_u.s.bb_leftsib;
884 	}
885 }
886 
887 STATIC void
xfs_btree_set_sibling(struct xfs_btree_cur * cur,struct xfs_btree_block * block,union xfs_btree_ptr * ptr,int lr)888 xfs_btree_set_sibling(
889 	struct xfs_btree_cur	*cur,
890 	struct xfs_btree_block	*block,
891 	union xfs_btree_ptr	*ptr,
892 	int			lr)
893 {
894 	ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
895 
896 	if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
897 		if (lr == XFS_BB_RIGHTSIB)
898 			block->bb_u.l.bb_rightsib = ptr->l;
899 		else
900 			block->bb_u.l.bb_leftsib = ptr->l;
901 	} else {
902 		if (lr == XFS_BB_RIGHTSIB)
903 			block->bb_u.s.bb_rightsib = ptr->s;
904 		else
905 			block->bb_u.s.bb_leftsib = ptr->s;
906 	}
907 }
908 
909 STATIC void
xfs_btree_init_block(struct xfs_btree_cur * cur,int level,int numrecs,struct xfs_btree_block * new)910 xfs_btree_init_block(
911 	struct xfs_btree_cur	*cur,
912 	int			level,
913 	int			numrecs,
914 	struct xfs_btree_block	*new)	/* new block */
915 {
916 	new->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
917 	new->bb_level = cpu_to_be16(level);
918 	new->bb_numrecs = cpu_to_be16(numrecs);
919 
920 	if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
921 		new->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
922 		new->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
923 	} else {
924 		new->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
925 		new->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
926 	}
927 }
928 
929 /*
930  * Return true if ptr is the last record in the btree and
931  * we need to track updateѕ to this record.  The decision
932  * will be further refined in the update_lastrec method.
933  */
934 STATIC int
xfs_btree_is_lastrec(struct xfs_btree_cur * cur,struct xfs_btree_block * block,int level)935 xfs_btree_is_lastrec(
936 	struct xfs_btree_cur	*cur,
937 	struct xfs_btree_block	*block,
938 	int			level)
939 {
940 	union xfs_btree_ptr	ptr;
941 
942 	if (level > 0)
943 		return 0;
944 	if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE))
945 		return 0;
946 
947 	xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
948 	if (!xfs_btree_ptr_is_null(cur, &ptr))
949 		return 0;
950 	return 1;
951 }
952 
953 STATIC void
xfs_btree_buf_to_ptr(struct xfs_btree_cur * cur,struct xfs_buf * bp,union xfs_btree_ptr * ptr)954 xfs_btree_buf_to_ptr(
955 	struct xfs_btree_cur	*cur,
956 	struct xfs_buf		*bp,
957 	union xfs_btree_ptr	*ptr)
958 {
959 	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
960 		ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp,
961 					XFS_BUF_ADDR(bp)));
962 	else {
963 		ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp,
964 					XFS_BUF_ADDR(bp)));
965 	}
966 }
967 
968 STATIC xfs_daddr_t
xfs_btree_ptr_to_daddr(struct xfs_btree_cur * cur,union xfs_btree_ptr * ptr)969 xfs_btree_ptr_to_daddr(
970 	struct xfs_btree_cur	*cur,
971 	union xfs_btree_ptr	*ptr)
972 {
973 	if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
974 		ASSERT(be64_to_cpu(ptr->l) != NULLDFSBNO);
975 
976 		return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
977 	} else {
978 		ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
979 		ASSERT(be32_to_cpu(ptr->s) != NULLAGBLOCK);
980 
981 		return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
982 					be32_to_cpu(ptr->s));
983 	}
984 }
985 
986 STATIC void
xfs_btree_set_refs(struct xfs_btree_cur * cur,struct xfs_buf * bp)987 xfs_btree_set_refs(
988 	struct xfs_btree_cur	*cur,
989 	struct xfs_buf		*bp)
990 {
991 	switch (cur->bc_btnum) {
992 	case XFS_BTNUM_BNO:
993 	case XFS_BTNUM_CNT:
994 		XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_ALLOC_BTREE_REF);
995 		break;
996 	case XFS_BTNUM_INO:
997 		XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_INOMAP, XFS_INO_BTREE_REF);
998 		break;
999 	case XFS_BTNUM_BMAP:
1000 		XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_BMAP_BTREE_REF);
1001 		break;
1002 	default:
1003 		ASSERT(0);
1004 	}
1005 }
1006 
1007 STATIC int
xfs_btree_get_buf_block(struct xfs_btree_cur * cur,union xfs_btree_ptr * ptr,int flags,struct xfs_btree_block ** block,struct xfs_buf ** bpp)1008 xfs_btree_get_buf_block(
1009 	struct xfs_btree_cur	*cur,
1010 	union xfs_btree_ptr	*ptr,
1011 	int			flags,
1012 	struct xfs_btree_block	**block,
1013 	struct xfs_buf		**bpp)
1014 {
1015 	struct xfs_mount	*mp = cur->bc_mp;
1016 	xfs_daddr_t		d;
1017 
1018 	/* need to sort out how callers deal with failures first */
1019 	ASSERT(!(flags & XFS_BUF_TRYLOCK));
1020 
1021 	d = xfs_btree_ptr_to_daddr(cur, ptr);
1022 	*bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d,
1023 				 mp->m_bsize, flags);
1024 
1025 	ASSERT(*bpp);
1026 	ASSERT(!XFS_BUF_GETERROR(*bpp));
1027 
1028 	*block = XFS_BUF_TO_BLOCK(*bpp);
1029 	return 0;
1030 }
1031 
1032 /*
1033  * Read in the buffer at the given ptr and return the buffer and
1034  * the block pointer within the buffer.
1035  */
1036 STATIC int
xfs_btree_read_buf_block(struct xfs_btree_cur * cur,union xfs_btree_ptr * ptr,int level,int flags,struct xfs_btree_block ** block,struct xfs_buf ** bpp)1037 xfs_btree_read_buf_block(
1038 	struct xfs_btree_cur	*cur,
1039 	union xfs_btree_ptr	*ptr,
1040 	int			level,
1041 	int			flags,
1042 	struct xfs_btree_block	**block,
1043 	struct xfs_buf		**bpp)
1044 {
1045 	struct xfs_mount	*mp = cur->bc_mp;
1046 	xfs_daddr_t		d;
1047 	int			error;
1048 
1049 	/* need to sort out how callers deal with failures first */
1050 	ASSERT(!(flags & XFS_BUF_TRYLOCK));
1051 
1052 	d = xfs_btree_ptr_to_daddr(cur, ptr);
1053 	error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d,
1054 				   mp->m_bsize, flags, bpp);
1055 	if (error)
1056 		return error;
1057 
1058 	ASSERT(*bpp != NULL);
1059 	ASSERT(!XFS_BUF_GETERROR(*bpp));
1060 
1061 	xfs_btree_set_refs(cur, *bpp);
1062 	*block = XFS_BUF_TO_BLOCK(*bpp);
1063 
1064 	error = xfs_btree_check_block(cur, *block, level, *bpp);
1065 	if (error)
1066 		xfs_trans_brelse(cur->bc_tp, *bpp);
1067 	return error;
1068 }
1069 
1070 /*
1071  * Copy keys from one btree block to another.
1072  */
1073 STATIC void
xfs_btree_copy_keys(struct xfs_btree_cur * cur,union xfs_btree_key * dst_key,union xfs_btree_key * src_key,int numkeys)1074 xfs_btree_copy_keys(
1075 	struct xfs_btree_cur	*cur,
1076 	union xfs_btree_key	*dst_key,
1077 	union xfs_btree_key	*src_key,
1078 	int			numkeys)
1079 {
1080 	ASSERT(numkeys >= 0);
1081 	memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len);
1082 }
1083 
1084 /*
1085  * Copy records from one btree block to another.
1086  */
1087 STATIC void
xfs_btree_copy_recs(struct xfs_btree_cur * cur,union xfs_btree_rec * dst_rec,union xfs_btree_rec * src_rec,int numrecs)1088 xfs_btree_copy_recs(
1089 	struct xfs_btree_cur	*cur,
1090 	union xfs_btree_rec	*dst_rec,
1091 	union xfs_btree_rec	*src_rec,
1092 	int			numrecs)
1093 {
1094 	ASSERT(numrecs >= 0);
1095 	memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len);
1096 }
1097 
1098 /*
1099  * Copy block pointers from one btree block to another.
1100  */
1101 STATIC void
xfs_btree_copy_ptrs(struct xfs_btree_cur * cur,union xfs_btree_ptr * dst_ptr,union xfs_btree_ptr * src_ptr,int numptrs)1102 xfs_btree_copy_ptrs(
1103 	struct xfs_btree_cur	*cur,
1104 	union xfs_btree_ptr	*dst_ptr,
1105 	union xfs_btree_ptr	*src_ptr,
1106 	int			numptrs)
1107 {
1108 	ASSERT(numptrs >= 0);
1109 	memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur));
1110 }
1111 
1112 /*
1113  * Shift keys one index left/right inside a single btree block.
1114  */
1115 STATIC void
xfs_btree_shift_keys(struct xfs_btree_cur * cur,union xfs_btree_key * key,int dir,int numkeys)1116 xfs_btree_shift_keys(
1117 	struct xfs_btree_cur	*cur,
1118 	union xfs_btree_key	*key,
1119 	int			dir,
1120 	int			numkeys)
1121 {
1122 	char			*dst_key;
1123 
1124 	ASSERT(numkeys >= 0);
1125 	ASSERT(dir == 1 || dir == -1);
1126 
1127 	dst_key = (char *)key + (dir * cur->bc_ops->key_len);
1128 	memmove(dst_key, key, numkeys * cur->bc_ops->key_len);
1129 }
1130 
1131 /*
1132  * Shift records one index left/right inside a single btree block.
1133  */
1134 STATIC void
xfs_btree_shift_recs(struct xfs_btree_cur * cur,union xfs_btree_rec * rec,int dir,int numrecs)1135 xfs_btree_shift_recs(
1136 	struct xfs_btree_cur	*cur,
1137 	union xfs_btree_rec	*rec,
1138 	int			dir,
1139 	int			numrecs)
1140 {
1141 	char			*dst_rec;
1142 
1143 	ASSERT(numrecs >= 0);
1144 	ASSERT(dir == 1 || dir == -1);
1145 
1146 	dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len);
1147 	memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len);
1148 }
1149 
1150 /*
1151  * Shift block pointers one index left/right inside a single btree block.
1152  */
1153 STATIC void
xfs_btree_shift_ptrs(struct xfs_btree_cur * cur,union xfs_btree_ptr * ptr,int dir,int numptrs)1154 xfs_btree_shift_ptrs(
1155 	struct xfs_btree_cur	*cur,
1156 	union xfs_btree_ptr	*ptr,
1157 	int			dir,
1158 	int			numptrs)
1159 {
1160 	char			*dst_ptr;
1161 
1162 	ASSERT(numptrs >= 0);
1163 	ASSERT(dir == 1 || dir == -1);
1164 
1165 	dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur));
1166 	memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur));
1167 }
1168 
1169 /*
1170  * Log key values from the btree block.
1171  */
1172 STATIC void
xfs_btree_log_keys(struct xfs_btree_cur * cur,struct xfs_buf * bp,int first,int last)1173 xfs_btree_log_keys(
1174 	struct xfs_btree_cur	*cur,
1175 	struct xfs_buf		*bp,
1176 	int			first,
1177 	int			last)
1178 {
1179 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1180 	XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1181 
1182 	if (bp) {
1183 		xfs_trans_log_buf(cur->bc_tp, bp,
1184 				  xfs_btree_key_offset(cur, first),
1185 				  xfs_btree_key_offset(cur, last + 1) - 1);
1186 	} else {
1187 		xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1188 				xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1189 	}
1190 
1191 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1192 }
1193 
1194 /*
1195  * Log record values from the btree block.
1196  */
1197 void
xfs_btree_log_recs(struct xfs_btree_cur * cur,struct xfs_buf * bp,int first,int last)1198 xfs_btree_log_recs(
1199 	struct xfs_btree_cur	*cur,
1200 	struct xfs_buf		*bp,
1201 	int			first,
1202 	int			last)
1203 {
1204 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1205 	XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1206 
1207 	xfs_trans_log_buf(cur->bc_tp, bp,
1208 			  xfs_btree_rec_offset(cur, first),
1209 			  xfs_btree_rec_offset(cur, last + 1) - 1);
1210 
1211 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1212 }
1213 
1214 /*
1215  * Log block pointer fields from a btree block (nonleaf).
1216  */
1217 STATIC void
xfs_btree_log_ptrs(struct xfs_btree_cur * cur,struct xfs_buf * bp,int first,int last)1218 xfs_btree_log_ptrs(
1219 	struct xfs_btree_cur	*cur,	/* btree cursor */
1220 	struct xfs_buf		*bp,	/* buffer containing btree block */
1221 	int			first,	/* index of first pointer to log */
1222 	int			last)	/* index of last pointer to log */
1223 {
1224 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1225 	XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1226 
1227 	if (bp) {
1228 		struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
1229 		int			level = xfs_btree_get_level(block);
1230 
1231 		xfs_trans_log_buf(cur->bc_tp, bp,
1232 				xfs_btree_ptr_offset(cur, first, level),
1233 				xfs_btree_ptr_offset(cur, last + 1, level) - 1);
1234 	} else {
1235 		xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1236 			xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1237 	}
1238 
1239 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1240 }
1241 
1242 /*
1243  * Log fields from a btree block header.
1244  */
1245 void
xfs_btree_log_block(struct xfs_btree_cur * cur,struct xfs_buf * bp,int fields)1246 xfs_btree_log_block(
1247 	struct xfs_btree_cur	*cur,	/* btree cursor */
1248 	struct xfs_buf		*bp,	/* buffer containing btree block */
1249 	int			fields)	/* mask of fields: XFS_BB_... */
1250 {
1251 	int			first;	/* first byte offset logged */
1252 	int			last;	/* last byte offset logged */
1253 	static const short	soffsets[] = {	/* table of offsets (short) */
1254 		offsetof(struct xfs_btree_block, bb_magic),
1255 		offsetof(struct xfs_btree_block, bb_level),
1256 		offsetof(struct xfs_btree_block, bb_numrecs),
1257 		offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib),
1258 		offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib),
1259 		XFS_BTREE_SBLOCK_LEN
1260 	};
1261 	static const short	loffsets[] = {	/* table of offsets (long) */
1262 		offsetof(struct xfs_btree_block, bb_magic),
1263 		offsetof(struct xfs_btree_block, bb_level),
1264 		offsetof(struct xfs_btree_block, bb_numrecs),
1265 		offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib),
1266 		offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib),
1267 		XFS_BTREE_LBLOCK_LEN
1268 	};
1269 
1270 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1271 	XFS_BTREE_TRACE_ARGBI(cur, bp, fields);
1272 
1273 	if (bp) {
1274 		xfs_btree_offsets(fields,
1275 				  (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
1276 					loffsets : soffsets,
1277 				  XFS_BB_NUM_BITS, &first, &last);
1278 		xfs_trans_log_buf(cur->bc_tp, bp, first, last);
1279 	} else {
1280 		xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1281 			xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1282 	}
1283 
1284 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1285 }
1286 
1287 /*
1288  * Increment cursor by one record at the level.
1289  * For nonzero levels the leaf-ward information is untouched.
1290  */
1291 int						/* error */
xfs_btree_increment(struct xfs_btree_cur * cur,int level,int * stat)1292 xfs_btree_increment(
1293 	struct xfs_btree_cur	*cur,
1294 	int			level,
1295 	int			*stat)		/* success/failure */
1296 {
1297 	struct xfs_btree_block	*block;
1298 	union xfs_btree_ptr	ptr;
1299 	struct xfs_buf		*bp;
1300 	int			error;		/* error return value */
1301 	int			lev;
1302 
1303 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1304 	XFS_BTREE_TRACE_ARGI(cur, level);
1305 
1306 	ASSERT(level < cur->bc_nlevels);
1307 
1308 	/* Read-ahead to the right at this level. */
1309 	xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
1310 
1311 	/* Get a pointer to the btree block. */
1312 	block = xfs_btree_get_block(cur, level, &bp);
1313 
1314 #ifdef DEBUG
1315 	error = xfs_btree_check_block(cur, block, level, bp);
1316 	if (error)
1317 		goto error0;
1318 #endif
1319 
1320 	/* We're done if we remain in the block after the increment. */
1321 	if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block))
1322 		goto out1;
1323 
1324 	/* Fail if we just went off the right edge of the tree. */
1325 	xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1326 	if (xfs_btree_ptr_is_null(cur, &ptr))
1327 		goto out0;
1328 
1329 	XFS_BTREE_STATS_INC(cur, increment);
1330 
1331 	/*
1332 	 * March up the tree incrementing pointers.
1333 	 * Stop when we don't go off the right edge of a block.
1334 	 */
1335 	for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1336 		block = xfs_btree_get_block(cur, lev, &bp);
1337 
1338 #ifdef DEBUG
1339 		error = xfs_btree_check_block(cur, block, lev, bp);
1340 		if (error)
1341 			goto error0;
1342 #endif
1343 
1344 		if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block))
1345 			break;
1346 
1347 		/* Read-ahead the right block for the next loop. */
1348 		xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
1349 	}
1350 
1351 	/*
1352 	 * If we went off the root then we are either seriously
1353 	 * confused or have the tree root in an inode.
1354 	 */
1355 	if (lev == cur->bc_nlevels) {
1356 		if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1357 			goto out0;
1358 		ASSERT(0);
1359 		error = EFSCORRUPTED;
1360 		goto error0;
1361 	}
1362 	ASSERT(lev < cur->bc_nlevels);
1363 
1364 	/*
1365 	 * Now walk back down the tree, fixing up the cursor's buffer
1366 	 * pointers and key numbers.
1367 	 */
1368 	for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1369 		union xfs_btree_ptr	*ptrp;
1370 
1371 		ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
1372 		error = xfs_btree_read_buf_block(cur, ptrp, --lev,
1373 							0, &block, &bp);
1374 		if (error)
1375 			goto error0;
1376 
1377 		xfs_btree_setbuf(cur, lev, bp);
1378 		cur->bc_ptrs[lev] = 1;
1379 	}
1380 out1:
1381 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1382 	*stat = 1;
1383 	return 0;
1384 
1385 out0:
1386 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1387 	*stat = 0;
1388 	return 0;
1389 
1390 error0:
1391 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1392 	return error;
1393 }
1394 
1395 /*
1396  * Decrement cursor by one record at the level.
1397  * For nonzero levels the leaf-ward information is untouched.
1398  */
1399 int						/* error */
xfs_btree_decrement(struct xfs_btree_cur * cur,int level,int * stat)1400 xfs_btree_decrement(
1401 	struct xfs_btree_cur	*cur,
1402 	int			level,
1403 	int			*stat)		/* success/failure */
1404 {
1405 	struct xfs_btree_block	*block;
1406 	xfs_buf_t		*bp;
1407 	int			error;		/* error return value */
1408 	int			lev;
1409 	union xfs_btree_ptr	ptr;
1410 
1411 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1412 	XFS_BTREE_TRACE_ARGI(cur, level);
1413 
1414 	ASSERT(level < cur->bc_nlevels);
1415 
1416 	/* Read-ahead to the left at this level. */
1417 	xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
1418 
1419 	/* We're done if we remain in the block after the decrement. */
1420 	if (--cur->bc_ptrs[level] > 0)
1421 		goto out1;
1422 
1423 	/* Get a pointer to the btree block. */
1424 	block = xfs_btree_get_block(cur, level, &bp);
1425 
1426 #ifdef DEBUG
1427 	error = xfs_btree_check_block(cur, block, level, bp);
1428 	if (error)
1429 		goto error0;
1430 #endif
1431 
1432 	/* Fail if we just went off the left edge of the tree. */
1433 	xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
1434 	if (xfs_btree_ptr_is_null(cur, &ptr))
1435 		goto out0;
1436 
1437 	XFS_BTREE_STATS_INC(cur, decrement);
1438 
1439 	/*
1440 	 * March up the tree decrementing pointers.
1441 	 * Stop when we don't go off the left edge of a block.
1442 	 */
1443 	for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1444 		if (--cur->bc_ptrs[lev] > 0)
1445 			break;
1446 		/* Read-ahead the left block for the next loop. */
1447 		xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
1448 	}
1449 
1450 	/*
1451 	 * If we went off the root then we are seriously confused.
1452 	 * or the root of the tree is in an inode.
1453 	 */
1454 	if (lev == cur->bc_nlevels) {
1455 		if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1456 			goto out0;
1457 		ASSERT(0);
1458 		error = EFSCORRUPTED;
1459 		goto error0;
1460 	}
1461 	ASSERT(lev < cur->bc_nlevels);
1462 
1463 	/*
1464 	 * Now walk back down the tree, fixing up the cursor's buffer
1465 	 * pointers and key numbers.
1466 	 */
1467 	for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1468 		union xfs_btree_ptr	*ptrp;
1469 
1470 		ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
1471 		error = xfs_btree_read_buf_block(cur, ptrp, --lev,
1472 							0, &block, &bp);
1473 		if (error)
1474 			goto error0;
1475 		xfs_btree_setbuf(cur, lev, bp);
1476 		cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block);
1477 	}
1478 out1:
1479 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1480 	*stat = 1;
1481 	return 0;
1482 
1483 out0:
1484 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1485 	*stat = 0;
1486 	return 0;
1487 
1488 error0:
1489 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1490 	return error;
1491 }
1492 
1493 STATIC int
xfs_btree_lookup_get_block(struct xfs_btree_cur * cur,int level,union xfs_btree_ptr * pp,struct xfs_btree_block ** blkp)1494 xfs_btree_lookup_get_block(
1495 	struct xfs_btree_cur	*cur,	/* btree cursor */
1496 	int			level,	/* level in the btree */
1497 	union xfs_btree_ptr	*pp,	/* ptr to btree block */
1498 	struct xfs_btree_block	**blkp) /* return btree block */
1499 {
1500 	struct xfs_buf		*bp;	/* buffer pointer for btree block */
1501 	int			error = 0;
1502 
1503 	/* special case the root block if in an inode */
1504 	if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1505 	    (level == cur->bc_nlevels - 1)) {
1506 		*blkp = xfs_btree_get_iroot(cur);
1507 		return 0;
1508 	}
1509 
1510 	/*
1511 	 * If the old buffer at this level for the disk address we are
1512 	 * looking for re-use it.
1513 	 *
1514 	 * Otherwise throw it away and get a new one.
1515 	 */
1516 	bp = cur->bc_bufs[level];
1517 	if (bp && XFS_BUF_ADDR(bp) == xfs_btree_ptr_to_daddr(cur, pp)) {
1518 		*blkp = XFS_BUF_TO_BLOCK(bp);
1519 		return 0;
1520 	}
1521 
1522 	error = xfs_btree_read_buf_block(cur, pp, level, 0, blkp, &bp);
1523 	if (error)
1524 		return error;
1525 
1526 	xfs_btree_setbuf(cur, level, bp);
1527 	return 0;
1528 }
1529 
1530 /*
1531  * Get current search key.  For level 0 we don't actually have a key
1532  * structure so we make one up from the record.  For all other levels
1533  * we just return the right key.
1534  */
1535 STATIC union xfs_btree_key *
xfs_lookup_get_search_key(struct xfs_btree_cur * cur,int level,int keyno,struct xfs_btree_block * block,union xfs_btree_key * kp)1536 xfs_lookup_get_search_key(
1537 	struct xfs_btree_cur	*cur,
1538 	int			level,
1539 	int			keyno,
1540 	struct xfs_btree_block	*block,
1541 	union xfs_btree_key	*kp)
1542 {
1543 	if (level == 0) {
1544 		cur->bc_ops->init_key_from_rec(kp,
1545 				xfs_btree_rec_addr(cur, keyno, block));
1546 		return kp;
1547 	}
1548 
1549 	return xfs_btree_key_addr(cur, keyno, block);
1550 }
1551 
1552 /*
1553  * Lookup the record.  The cursor is made to point to it, based on dir.
1554  * Return 0 if can't find any such record, 1 for success.
1555  */
1556 int					/* error */
xfs_btree_lookup(struct xfs_btree_cur * cur,xfs_lookup_t dir,int * stat)1557 xfs_btree_lookup(
1558 	struct xfs_btree_cur	*cur,	/* btree cursor */
1559 	xfs_lookup_t		dir,	/* <=, ==, or >= */
1560 	int			*stat)	/* success/failure */
1561 {
1562 	struct xfs_btree_block	*block;	/* current btree block */
1563 	__int64_t		diff;	/* difference for the current key */
1564 	int			error;	/* error return value */
1565 	int			keyno;	/* current key number */
1566 	int			level;	/* level in the btree */
1567 	union xfs_btree_ptr	*pp;	/* ptr to btree block */
1568 	union xfs_btree_ptr	ptr;	/* ptr to btree block */
1569 
1570 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1571 	XFS_BTREE_TRACE_ARGI(cur, dir);
1572 
1573 	XFS_BTREE_STATS_INC(cur, lookup);
1574 
1575 	block = NULL;
1576 	keyno = 0;
1577 
1578 	/* initialise start pointer from cursor */
1579 	cur->bc_ops->init_ptr_from_cur(cur, &ptr);
1580 	pp = &ptr;
1581 
1582 	/*
1583 	 * Iterate over each level in the btree, starting at the root.
1584 	 * For each level above the leaves, find the key we need, based
1585 	 * on the lookup record, then follow the corresponding block
1586 	 * pointer down to the next level.
1587 	 */
1588 	for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
1589 		/* Get the block we need to do the lookup on. */
1590 		error = xfs_btree_lookup_get_block(cur, level, pp, &block);
1591 		if (error)
1592 			goto error0;
1593 
1594 		if (diff == 0) {
1595 			/*
1596 			 * If we already had a key match at a higher level, we
1597 			 * know we need to use the first entry in this block.
1598 			 */
1599 			keyno = 1;
1600 		} else {
1601 			/* Otherwise search this block. Do a binary search. */
1602 
1603 			int	high;	/* high entry number */
1604 			int	low;	/* low entry number */
1605 
1606 			/* Set low and high entry numbers, 1-based. */
1607 			low = 1;
1608 			high = xfs_btree_get_numrecs(block);
1609 			if (!high) {
1610 				/* Block is empty, must be an empty leaf. */
1611 				ASSERT(level == 0 && cur->bc_nlevels == 1);
1612 
1613 				cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
1614 				XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1615 				*stat = 0;
1616 				return 0;
1617 			}
1618 
1619 			/* Binary search the block. */
1620 			while (low <= high) {
1621 				union xfs_btree_key	key;
1622 				union xfs_btree_key	*kp;
1623 
1624 				XFS_BTREE_STATS_INC(cur, compare);
1625 
1626 				/* keyno is average of low and high. */
1627 				keyno = (low + high) >> 1;
1628 
1629 				/* Get current search key */
1630 				kp = xfs_lookup_get_search_key(cur, level,
1631 						keyno, block, &key);
1632 
1633 				/*
1634 				 * Compute difference to get next direction:
1635 				 *  - less than, move right
1636 				 *  - greater than, move left
1637 				 *  - equal, we're done
1638 				 */
1639 				diff = cur->bc_ops->key_diff(cur, kp);
1640 				if (diff < 0)
1641 					low = keyno + 1;
1642 				else if (diff > 0)
1643 					high = keyno - 1;
1644 				else
1645 					break;
1646 			}
1647 		}
1648 
1649 		/*
1650 		 * If there are more levels, set up for the next level
1651 		 * by getting the block number and filling in the cursor.
1652 		 */
1653 		if (level > 0) {
1654 			/*
1655 			 * If we moved left, need the previous key number,
1656 			 * unless there isn't one.
1657 			 */
1658 			if (diff > 0 && --keyno < 1)
1659 				keyno = 1;
1660 			pp = xfs_btree_ptr_addr(cur, keyno, block);
1661 
1662 #ifdef DEBUG
1663 			error = xfs_btree_check_ptr(cur, pp, 0, level);
1664 			if (error)
1665 				goto error0;
1666 #endif
1667 			cur->bc_ptrs[level] = keyno;
1668 		}
1669 	}
1670 
1671 	/* Done with the search. See if we need to adjust the results. */
1672 	if (dir != XFS_LOOKUP_LE && diff < 0) {
1673 		keyno++;
1674 		/*
1675 		 * If ge search and we went off the end of the block, but it's
1676 		 * not the last block, we're in the wrong block.
1677 		 */
1678 		xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1679 		if (dir == XFS_LOOKUP_GE &&
1680 		    keyno > xfs_btree_get_numrecs(block) &&
1681 		    !xfs_btree_ptr_is_null(cur, &ptr)) {
1682 			int	i;
1683 
1684 			cur->bc_ptrs[0] = keyno;
1685 			error = xfs_btree_increment(cur, 0, &i);
1686 			if (error)
1687 				goto error0;
1688 			XFS_WANT_CORRUPTED_RETURN(i == 1);
1689 			XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1690 			*stat = 1;
1691 			return 0;
1692 		}
1693 	} else if (dir == XFS_LOOKUP_LE && diff > 0)
1694 		keyno--;
1695 	cur->bc_ptrs[0] = keyno;
1696 
1697 	/* Return if we succeeded or not. */
1698 	if (keyno == 0 || keyno > xfs_btree_get_numrecs(block))
1699 		*stat = 0;
1700 	else if (dir != XFS_LOOKUP_EQ || diff == 0)
1701 		*stat = 1;
1702 	else
1703 		*stat = 0;
1704 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1705 	return 0;
1706 
1707 error0:
1708 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1709 	return error;
1710 }
1711 
1712 /*
1713  * Update keys at all levels from here to the root along the cursor's path.
1714  */
1715 STATIC int
xfs_btree_updkey(struct xfs_btree_cur * cur,union xfs_btree_key * keyp,int level)1716 xfs_btree_updkey(
1717 	struct xfs_btree_cur	*cur,
1718 	union xfs_btree_key	*keyp,
1719 	int			level)
1720 {
1721 	struct xfs_btree_block	*block;
1722 	struct xfs_buf		*bp;
1723 	union xfs_btree_key	*kp;
1724 	int			ptr;
1725 
1726 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1727 	XFS_BTREE_TRACE_ARGIK(cur, level, keyp);
1728 
1729 	ASSERT(!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || level >= 1);
1730 
1731 	/*
1732 	 * Go up the tree from this level toward the root.
1733 	 * At each level, update the key value to the value input.
1734 	 * Stop when we reach a level where the cursor isn't pointing
1735 	 * at the first entry in the block.
1736 	 */
1737 	for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
1738 #ifdef DEBUG
1739 		int		error;
1740 #endif
1741 		block = xfs_btree_get_block(cur, level, &bp);
1742 #ifdef DEBUG
1743 		error = xfs_btree_check_block(cur, block, level, bp);
1744 		if (error) {
1745 			XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1746 			return error;
1747 		}
1748 #endif
1749 		ptr = cur->bc_ptrs[level];
1750 		kp = xfs_btree_key_addr(cur, ptr, block);
1751 		xfs_btree_copy_keys(cur, kp, keyp, 1);
1752 		xfs_btree_log_keys(cur, bp, ptr, ptr);
1753 	}
1754 
1755 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1756 	return 0;
1757 }
1758 
1759 /*
1760  * Update the record referred to by cur to the value in the
1761  * given record. This either works (return 0) or gets an
1762  * EFSCORRUPTED error.
1763  */
1764 int
xfs_btree_update(struct xfs_btree_cur * cur,union xfs_btree_rec * rec)1765 xfs_btree_update(
1766 	struct xfs_btree_cur	*cur,
1767 	union xfs_btree_rec	*rec)
1768 {
1769 	struct xfs_btree_block	*block;
1770 	struct xfs_buf		*bp;
1771 	int			error;
1772 	int			ptr;
1773 	union xfs_btree_rec	*rp;
1774 
1775 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1776 	XFS_BTREE_TRACE_ARGR(cur, rec);
1777 
1778 	/* Pick up the current block. */
1779 	block = xfs_btree_get_block(cur, 0, &bp);
1780 
1781 #ifdef DEBUG
1782 	error = xfs_btree_check_block(cur, block, 0, bp);
1783 	if (error)
1784 		goto error0;
1785 #endif
1786 	/* Get the address of the rec to be updated. */
1787 	ptr = cur->bc_ptrs[0];
1788 	rp = xfs_btree_rec_addr(cur, ptr, block);
1789 
1790 	/* Fill in the new contents and log them. */
1791 	xfs_btree_copy_recs(cur, rp, rec, 1);
1792 	xfs_btree_log_recs(cur, bp, ptr, ptr);
1793 
1794 	/*
1795 	 * If we are tracking the last record in the tree and
1796 	 * we are at the far right edge of the tree, update it.
1797 	 */
1798 	if (xfs_btree_is_lastrec(cur, block, 0)) {
1799 		cur->bc_ops->update_lastrec(cur, block, rec,
1800 					    ptr, LASTREC_UPDATE);
1801 	}
1802 
1803 	/* Updating first rec in leaf. Pass new key value up to our parent. */
1804 	if (ptr == 1) {
1805 		union xfs_btree_key	key;
1806 
1807 		cur->bc_ops->init_key_from_rec(&key, rec);
1808 		error = xfs_btree_updkey(cur, &key, 1);
1809 		if (error)
1810 			goto error0;
1811 	}
1812 
1813 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1814 	return 0;
1815 
1816 error0:
1817 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1818 	return error;
1819 }
1820 
1821 /*
1822  * Move 1 record left from cur/level if possible.
1823  * Update cur to reflect the new path.
1824  */
1825 STATIC int					/* error */
xfs_btree_lshift(struct xfs_btree_cur * cur,int level,int * stat)1826 xfs_btree_lshift(
1827 	struct xfs_btree_cur	*cur,
1828 	int			level,
1829 	int			*stat)		/* success/failure */
1830 {
1831 	union xfs_btree_key	key;		/* btree key */
1832 	struct xfs_buf		*lbp;		/* left buffer pointer */
1833 	struct xfs_btree_block	*left;		/* left btree block */
1834 	int			lrecs;		/* left record count */
1835 	struct xfs_buf		*rbp;		/* right buffer pointer */
1836 	struct xfs_btree_block	*right;		/* right btree block */
1837 	int			rrecs;		/* right record count */
1838 	union xfs_btree_ptr	lptr;		/* left btree pointer */
1839 	union xfs_btree_key	*rkp = NULL;	/* right btree key */
1840 	union xfs_btree_ptr	*rpp = NULL;	/* right address pointer */
1841 	union xfs_btree_rec	*rrp = NULL;	/* right record pointer */
1842 	int			error;		/* error return value */
1843 
1844 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1845 	XFS_BTREE_TRACE_ARGI(cur, level);
1846 
1847 	if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1848 	    level == cur->bc_nlevels - 1)
1849 		goto out0;
1850 
1851 	/* Set up variables for this block as "right". */
1852 	right = xfs_btree_get_block(cur, level, &rbp);
1853 
1854 #ifdef DEBUG
1855 	error = xfs_btree_check_block(cur, right, level, rbp);
1856 	if (error)
1857 		goto error0;
1858 #endif
1859 
1860 	/* If we've got no left sibling then we can't shift an entry left. */
1861 	xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
1862 	if (xfs_btree_ptr_is_null(cur, &lptr))
1863 		goto out0;
1864 
1865 	/*
1866 	 * If the cursor entry is the one that would be moved, don't
1867 	 * do it... it's too complicated.
1868 	 */
1869 	if (cur->bc_ptrs[level] <= 1)
1870 		goto out0;
1871 
1872 	/* Set up the left neighbor as "left". */
1873 	error = xfs_btree_read_buf_block(cur, &lptr, level, 0, &left, &lbp);
1874 	if (error)
1875 		goto error0;
1876 
1877 	/* If it's full, it can't take another entry. */
1878 	lrecs = xfs_btree_get_numrecs(left);
1879 	if (lrecs == cur->bc_ops->get_maxrecs(cur, level))
1880 		goto out0;
1881 
1882 	rrecs = xfs_btree_get_numrecs(right);
1883 
1884 	/*
1885 	 * We add one entry to the left side and remove one for the right side.
1886 	 * Accout for it here, the changes will be updated on disk and logged
1887 	 * later.
1888 	 */
1889 	lrecs++;
1890 	rrecs--;
1891 
1892 	XFS_BTREE_STATS_INC(cur, lshift);
1893 	XFS_BTREE_STATS_ADD(cur, moves, 1);
1894 
1895 	/*
1896 	 * If non-leaf, copy a key and a ptr to the left block.
1897 	 * Log the changes to the left block.
1898 	 */
1899 	if (level > 0) {
1900 		/* It's a non-leaf.  Move keys and pointers. */
1901 		union xfs_btree_key	*lkp;	/* left btree key */
1902 		union xfs_btree_ptr	*lpp;	/* left address pointer */
1903 
1904 		lkp = xfs_btree_key_addr(cur, lrecs, left);
1905 		rkp = xfs_btree_key_addr(cur, 1, right);
1906 
1907 		lpp = xfs_btree_ptr_addr(cur, lrecs, left);
1908 		rpp = xfs_btree_ptr_addr(cur, 1, right);
1909 #ifdef DEBUG
1910 		error = xfs_btree_check_ptr(cur, rpp, 0, level);
1911 		if (error)
1912 			goto error0;
1913 #endif
1914 		xfs_btree_copy_keys(cur, lkp, rkp, 1);
1915 		xfs_btree_copy_ptrs(cur, lpp, rpp, 1);
1916 
1917 		xfs_btree_log_keys(cur, lbp, lrecs, lrecs);
1918 		xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs);
1919 
1920 		ASSERT(cur->bc_ops->keys_inorder(cur,
1921 			xfs_btree_key_addr(cur, lrecs - 1, left), lkp));
1922 	} else {
1923 		/* It's a leaf.  Move records.  */
1924 		union xfs_btree_rec	*lrp;	/* left record pointer */
1925 
1926 		lrp = xfs_btree_rec_addr(cur, lrecs, left);
1927 		rrp = xfs_btree_rec_addr(cur, 1, right);
1928 
1929 		xfs_btree_copy_recs(cur, lrp, rrp, 1);
1930 		xfs_btree_log_recs(cur, lbp, lrecs, lrecs);
1931 
1932 		ASSERT(cur->bc_ops->recs_inorder(cur,
1933 			xfs_btree_rec_addr(cur, lrecs - 1, left), lrp));
1934 	}
1935 
1936 	xfs_btree_set_numrecs(left, lrecs);
1937 	xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
1938 
1939 	xfs_btree_set_numrecs(right, rrecs);
1940 	xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
1941 
1942 	/*
1943 	 * Slide the contents of right down one entry.
1944 	 */
1945 	XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1);
1946 	if (level > 0) {
1947 		/* It's a nonleaf. operate on keys and ptrs */
1948 #ifdef DEBUG
1949 		int			i;		/* loop index */
1950 
1951 		for (i = 0; i < rrecs; i++) {
1952 			error = xfs_btree_check_ptr(cur, rpp, i + 1, level);
1953 			if (error)
1954 				goto error0;
1955 		}
1956 #endif
1957 		xfs_btree_shift_keys(cur,
1958 				xfs_btree_key_addr(cur, 2, right),
1959 				-1, rrecs);
1960 		xfs_btree_shift_ptrs(cur,
1961 				xfs_btree_ptr_addr(cur, 2, right),
1962 				-1, rrecs);
1963 
1964 		xfs_btree_log_keys(cur, rbp, 1, rrecs);
1965 		xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
1966 	} else {
1967 		/* It's a leaf. operate on records */
1968 		xfs_btree_shift_recs(cur,
1969 			xfs_btree_rec_addr(cur, 2, right),
1970 			-1, rrecs);
1971 		xfs_btree_log_recs(cur, rbp, 1, rrecs);
1972 
1973 		/*
1974 		 * If it's the first record in the block, we'll need a key
1975 		 * structure to pass up to the next level (updkey).
1976 		 */
1977 		cur->bc_ops->init_key_from_rec(&key,
1978 			xfs_btree_rec_addr(cur, 1, right));
1979 		rkp = &key;
1980 	}
1981 
1982 	/* Update the parent key values of right. */
1983 	error = xfs_btree_updkey(cur, rkp, level + 1);
1984 	if (error)
1985 		goto error0;
1986 
1987 	/* Slide the cursor value left one. */
1988 	cur->bc_ptrs[level]--;
1989 
1990 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1991 	*stat = 1;
1992 	return 0;
1993 
1994 out0:
1995 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1996 	*stat = 0;
1997 	return 0;
1998 
1999 error0:
2000 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2001 	return error;
2002 }
2003 
2004 /*
2005  * Move 1 record right from cur/level if possible.
2006  * Update cur to reflect the new path.
2007  */
2008 STATIC int					/* error */
xfs_btree_rshift(struct xfs_btree_cur * cur,int level,int * stat)2009 xfs_btree_rshift(
2010 	struct xfs_btree_cur	*cur,
2011 	int			level,
2012 	int			*stat)		/* success/failure */
2013 {
2014 	union xfs_btree_key	key;		/* btree key */
2015 	struct xfs_buf		*lbp;		/* left buffer pointer */
2016 	struct xfs_btree_block	*left;		/* left btree block */
2017 	struct xfs_buf		*rbp;		/* right buffer pointer */
2018 	struct xfs_btree_block	*right;		/* right btree block */
2019 	struct xfs_btree_cur	*tcur;		/* temporary btree cursor */
2020 	union xfs_btree_ptr	rptr;		/* right block pointer */
2021 	union xfs_btree_key	*rkp;		/* right btree key */
2022 	int			rrecs;		/* right record count */
2023 	int			lrecs;		/* left record count */
2024 	int			error;		/* error return value */
2025 	int			i;		/* loop counter */
2026 
2027 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2028 	XFS_BTREE_TRACE_ARGI(cur, level);
2029 
2030 	if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2031 	    (level == cur->bc_nlevels - 1))
2032 		goto out0;
2033 
2034 	/* Set up variables for this block as "left". */
2035 	left = xfs_btree_get_block(cur, level, &lbp);
2036 
2037 #ifdef DEBUG
2038 	error = xfs_btree_check_block(cur, left, level, lbp);
2039 	if (error)
2040 		goto error0;
2041 #endif
2042 
2043 	/* If we've got no right sibling then we can't shift an entry right. */
2044 	xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2045 	if (xfs_btree_ptr_is_null(cur, &rptr))
2046 		goto out0;
2047 
2048 	/*
2049 	 * If the cursor entry is the one that would be moved, don't
2050 	 * do it... it's too complicated.
2051 	 */
2052 	lrecs = xfs_btree_get_numrecs(left);
2053 	if (cur->bc_ptrs[level] >= lrecs)
2054 		goto out0;
2055 
2056 	/* Set up the right neighbor as "right". */
2057 	error = xfs_btree_read_buf_block(cur, &rptr, level, 0, &right, &rbp);
2058 	if (error)
2059 		goto error0;
2060 
2061 	/* If it's full, it can't take another entry. */
2062 	rrecs = xfs_btree_get_numrecs(right);
2063 	if (rrecs == cur->bc_ops->get_maxrecs(cur, level))
2064 		goto out0;
2065 
2066 	XFS_BTREE_STATS_INC(cur, rshift);
2067 	XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2068 
2069 	/*
2070 	 * Make a hole at the start of the right neighbor block, then
2071 	 * copy the last left block entry to the hole.
2072 	 */
2073 	if (level > 0) {
2074 		/* It's a nonleaf. make a hole in the keys and ptrs */
2075 		union xfs_btree_key	*lkp;
2076 		union xfs_btree_ptr	*lpp;
2077 		union xfs_btree_ptr	*rpp;
2078 
2079 		lkp = xfs_btree_key_addr(cur, lrecs, left);
2080 		lpp = xfs_btree_ptr_addr(cur, lrecs, left);
2081 		rkp = xfs_btree_key_addr(cur, 1, right);
2082 		rpp = xfs_btree_ptr_addr(cur, 1, right);
2083 
2084 #ifdef DEBUG
2085 		for (i = rrecs - 1; i >= 0; i--) {
2086 			error = xfs_btree_check_ptr(cur, rpp, i, level);
2087 			if (error)
2088 				goto error0;
2089 		}
2090 #endif
2091 
2092 		xfs_btree_shift_keys(cur, rkp, 1, rrecs);
2093 		xfs_btree_shift_ptrs(cur, rpp, 1, rrecs);
2094 
2095 #ifdef DEBUG
2096 		error = xfs_btree_check_ptr(cur, lpp, 0, level);
2097 		if (error)
2098 			goto error0;
2099 #endif
2100 
2101 		/* Now put the new data in, and log it. */
2102 		xfs_btree_copy_keys(cur, rkp, lkp, 1);
2103 		xfs_btree_copy_ptrs(cur, rpp, lpp, 1);
2104 
2105 		xfs_btree_log_keys(cur, rbp, 1, rrecs + 1);
2106 		xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1);
2107 
2108 		ASSERT(cur->bc_ops->keys_inorder(cur, rkp,
2109 			xfs_btree_key_addr(cur, 2, right)));
2110 	} else {
2111 		/* It's a leaf. make a hole in the records */
2112 		union xfs_btree_rec	*lrp;
2113 		union xfs_btree_rec	*rrp;
2114 
2115 		lrp = xfs_btree_rec_addr(cur, lrecs, left);
2116 		rrp = xfs_btree_rec_addr(cur, 1, right);
2117 
2118 		xfs_btree_shift_recs(cur, rrp, 1, rrecs);
2119 
2120 		/* Now put the new data in, and log it. */
2121 		xfs_btree_copy_recs(cur, rrp, lrp, 1);
2122 		xfs_btree_log_recs(cur, rbp, 1, rrecs + 1);
2123 
2124 		cur->bc_ops->init_key_from_rec(&key, rrp);
2125 		rkp = &key;
2126 
2127 		ASSERT(cur->bc_ops->recs_inorder(cur, rrp,
2128 			xfs_btree_rec_addr(cur, 2, right)));
2129 	}
2130 
2131 	/*
2132 	 * Decrement and log left's numrecs, bump and log right's numrecs.
2133 	 */
2134 	xfs_btree_set_numrecs(left, --lrecs);
2135 	xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
2136 
2137 	xfs_btree_set_numrecs(right, ++rrecs);
2138 	xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
2139 
2140 	/*
2141 	 * Using a temporary cursor, update the parent key values of the
2142 	 * block on the right.
2143 	 */
2144 	error = xfs_btree_dup_cursor(cur, &tcur);
2145 	if (error)
2146 		goto error0;
2147 	i = xfs_btree_lastrec(tcur, level);
2148 	XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
2149 
2150 	error = xfs_btree_increment(tcur, level, &i);
2151 	if (error)
2152 		goto error1;
2153 
2154 	error = xfs_btree_updkey(tcur, rkp, level + 1);
2155 	if (error)
2156 		goto error1;
2157 
2158 	xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
2159 
2160 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2161 	*stat = 1;
2162 	return 0;
2163 
2164 out0:
2165 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2166 	*stat = 0;
2167 	return 0;
2168 
2169 error0:
2170 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2171 	return error;
2172 
2173 error1:
2174 	XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR);
2175 	xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
2176 	return error;
2177 }
2178 
2179 /*
2180  * Split cur/level block in half.
2181  * Return new block number and the key to its first
2182  * record (to be inserted into parent).
2183  */
2184 STATIC int					/* error */
xfs_btree_split(struct xfs_btree_cur * cur,int level,union xfs_btree_ptr * ptrp,union xfs_btree_key * key,struct xfs_btree_cur ** curp,int * stat)2185 xfs_btree_split(
2186 	struct xfs_btree_cur	*cur,
2187 	int			level,
2188 	union xfs_btree_ptr	*ptrp,
2189 	union xfs_btree_key	*key,
2190 	struct xfs_btree_cur	**curp,
2191 	int			*stat)		/* success/failure */
2192 {
2193 	union xfs_btree_ptr	lptr;		/* left sibling block ptr */
2194 	struct xfs_buf		*lbp;		/* left buffer pointer */
2195 	struct xfs_btree_block	*left;		/* left btree block */
2196 	union xfs_btree_ptr	rptr;		/* right sibling block ptr */
2197 	struct xfs_buf		*rbp;		/* right buffer pointer */
2198 	struct xfs_btree_block	*right;		/* right btree block */
2199 	union xfs_btree_ptr	rrptr;		/* right-right sibling ptr */
2200 	struct xfs_buf		*rrbp;		/* right-right buffer pointer */
2201 	struct xfs_btree_block	*rrblock;	/* right-right btree block */
2202 	int			lrecs;
2203 	int			rrecs;
2204 	int			src_index;
2205 	int			error;		/* error return value */
2206 #ifdef DEBUG
2207 	int			i;
2208 #endif
2209 
2210 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2211 	XFS_BTREE_TRACE_ARGIPK(cur, level, *ptrp, key);
2212 
2213 	XFS_BTREE_STATS_INC(cur, split);
2214 
2215 	/* Set up left block (current one). */
2216 	left = xfs_btree_get_block(cur, level, &lbp);
2217 
2218 #ifdef DEBUG
2219 	error = xfs_btree_check_block(cur, left, level, lbp);
2220 	if (error)
2221 		goto error0;
2222 #endif
2223 
2224 	xfs_btree_buf_to_ptr(cur, lbp, &lptr);
2225 
2226 	/* Allocate the new block. If we can't do it, we're toast. Give up. */
2227 	error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, 1, stat);
2228 	if (error)
2229 		goto error0;
2230 	if (*stat == 0)
2231 		goto out0;
2232 	XFS_BTREE_STATS_INC(cur, alloc);
2233 
2234 	/* Set up the new block as "right". */
2235 	error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp);
2236 	if (error)
2237 		goto error0;
2238 
2239 	/* Fill in the btree header for the new right block. */
2240 	xfs_btree_init_block(cur, xfs_btree_get_level(left), 0, right);
2241 
2242 	/*
2243 	 * Split the entries between the old and the new block evenly.
2244 	 * Make sure that if there's an odd number of entries now, that
2245 	 * each new block will have the same number of entries.
2246 	 */
2247 	lrecs = xfs_btree_get_numrecs(left);
2248 	rrecs = lrecs / 2;
2249 	if ((lrecs & 1) && cur->bc_ptrs[level] <= rrecs + 1)
2250 		rrecs++;
2251 	src_index = (lrecs - rrecs + 1);
2252 
2253 	XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2254 
2255 	/*
2256 	 * Copy btree block entries from the left block over to the
2257 	 * new block, the right. Update the right block and log the
2258 	 * changes.
2259 	 */
2260 	if (level > 0) {
2261 		/* It's a non-leaf.  Move keys and pointers. */
2262 		union xfs_btree_key	*lkp;	/* left btree key */
2263 		union xfs_btree_ptr	*lpp;	/* left address pointer */
2264 		union xfs_btree_key	*rkp;	/* right btree key */
2265 		union xfs_btree_ptr	*rpp;	/* right address pointer */
2266 
2267 		lkp = xfs_btree_key_addr(cur, src_index, left);
2268 		lpp = xfs_btree_ptr_addr(cur, src_index, left);
2269 		rkp = xfs_btree_key_addr(cur, 1, right);
2270 		rpp = xfs_btree_ptr_addr(cur, 1, right);
2271 
2272 #ifdef DEBUG
2273 		for (i = src_index; i < rrecs; i++) {
2274 			error = xfs_btree_check_ptr(cur, lpp, i, level);
2275 			if (error)
2276 				goto error0;
2277 		}
2278 #endif
2279 
2280 		xfs_btree_copy_keys(cur, rkp, lkp, rrecs);
2281 		xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs);
2282 
2283 		xfs_btree_log_keys(cur, rbp, 1, rrecs);
2284 		xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
2285 
2286 		/* Grab the keys to the entries moved to the right block */
2287 		xfs_btree_copy_keys(cur, key, rkp, 1);
2288 	} else {
2289 		/* It's a leaf.  Move records.  */
2290 		union xfs_btree_rec	*lrp;	/* left record pointer */
2291 		union xfs_btree_rec	*rrp;	/* right record pointer */
2292 
2293 		lrp = xfs_btree_rec_addr(cur, src_index, left);
2294 		rrp = xfs_btree_rec_addr(cur, 1, right);
2295 
2296 		xfs_btree_copy_recs(cur, rrp, lrp, rrecs);
2297 		xfs_btree_log_recs(cur, rbp, 1, rrecs);
2298 
2299 		cur->bc_ops->init_key_from_rec(key,
2300 			xfs_btree_rec_addr(cur, 1, right));
2301 	}
2302 
2303 
2304 	/*
2305 	 * Find the left block number by looking in the buffer.
2306 	 * Adjust numrecs, sibling pointers.
2307 	 */
2308 	xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB);
2309 	xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB);
2310 	xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2311 	xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2312 
2313 	lrecs -= rrecs;
2314 	xfs_btree_set_numrecs(left, lrecs);
2315 	xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs);
2316 
2317 	xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS);
2318 	xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
2319 
2320 	/*
2321 	 * If there's a block to the new block's right, make that block
2322 	 * point back to right instead of to left.
2323 	 */
2324 	if (!xfs_btree_ptr_is_null(cur, &rrptr)) {
2325 		error = xfs_btree_read_buf_block(cur, &rrptr, level,
2326 							0, &rrblock, &rrbp);
2327 		if (error)
2328 			goto error0;
2329 		xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB);
2330 		xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
2331 	}
2332 	/*
2333 	 * If the cursor is really in the right block, move it there.
2334 	 * If it's just pointing past the last entry in left, then we'll
2335 	 * insert there, so don't change anything in that case.
2336 	 */
2337 	if (cur->bc_ptrs[level] > lrecs + 1) {
2338 		xfs_btree_setbuf(cur, level, rbp);
2339 		cur->bc_ptrs[level] -= lrecs;
2340 	}
2341 	/*
2342 	 * If there are more levels, we'll need another cursor which refers
2343 	 * the right block, no matter where this cursor was.
2344 	 */
2345 	if (level + 1 < cur->bc_nlevels) {
2346 		error = xfs_btree_dup_cursor(cur, curp);
2347 		if (error)
2348 			goto error0;
2349 		(*curp)->bc_ptrs[level + 1]++;
2350 	}
2351 	*ptrp = rptr;
2352 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2353 	*stat = 1;
2354 	return 0;
2355 out0:
2356 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2357 	*stat = 0;
2358 	return 0;
2359 
2360 error0:
2361 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2362 	return error;
2363 }
2364 
2365 /*
2366  * Copy the old inode root contents into a real block and make the
2367  * broot point to it.
2368  */
2369 int						/* error */
xfs_btree_new_iroot(struct xfs_btree_cur * cur,int * logflags,int * stat)2370 xfs_btree_new_iroot(
2371 	struct xfs_btree_cur	*cur,		/* btree cursor */
2372 	int			*logflags,	/* logging flags for inode */
2373 	int			*stat)		/* return status - 0 fail */
2374 {
2375 	struct xfs_buf		*cbp;		/* buffer for cblock */
2376 	struct xfs_btree_block	*block;		/* btree block */
2377 	struct xfs_btree_block	*cblock;	/* child btree block */
2378 	union xfs_btree_key	*ckp;		/* child key pointer */
2379 	union xfs_btree_ptr	*cpp;		/* child ptr pointer */
2380 	union xfs_btree_key	*kp;		/* pointer to btree key */
2381 	union xfs_btree_ptr	*pp;		/* pointer to block addr */
2382 	union xfs_btree_ptr	nptr;		/* new block addr */
2383 	int			level;		/* btree level */
2384 	int			error;		/* error return code */
2385 #ifdef DEBUG
2386 	int			i;		/* loop counter */
2387 #endif
2388 
2389 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2390 	XFS_BTREE_STATS_INC(cur, newroot);
2391 
2392 	ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
2393 
2394 	level = cur->bc_nlevels - 1;
2395 
2396 	block = xfs_btree_get_iroot(cur);
2397 	pp = xfs_btree_ptr_addr(cur, 1, block);
2398 
2399 	/* Allocate the new block. If we can't do it, we're toast. Give up. */
2400 	error = cur->bc_ops->alloc_block(cur, pp, &nptr, 1, stat);
2401 	if (error)
2402 		goto error0;
2403 	if (*stat == 0) {
2404 		XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2405 		return 0;
2406 	}
2407 	XFS_BTREE_STATS_INC(cur, alloc);
2408 
2409 	/* Copy the root into a real block. */
2410 	error = xfs_btree_get_buf_block(cur, &nptr, 0, &cblock, &cbp);
2411 	if (error)
2412 		goto error0;
2413 
2414 	memcpy(cblock, block, xfs_btree_block_len(cur));
2415 
2416 	be16_add_cpu(&block->bb_level, 1);
2417 	xfs_btree_set_numrecs(block, 1);
2418 	cur->bc_nlevels++;
2419 	cur->bc_ptrs[level + 1] = 1;
2420 
2421 	kp = xfs_btree_key_addr(cur, 1, block);
2422 	ckp = xfs_btree_key_addr(cur, 1, cblock);
2423 	xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock));
2424 
2425 	cpp = xfs_btree_ptr_addr(cur, 1, cblock);
2426 #ifdef DEBUG
2427 	for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
2428 		error = xfs_btree_check_ptr(cur, pp, i, level);
2429 		if (error)
2430 			goto error0;
2431 	}
2432 #endif
2433 	xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock));
2434 
2435 #ifdef DEBUG
2436 	error = xfs_btree_check_ptr(cur, &nptr, 0, level);
2437 	if (error)
2438 		goto error0;
2439 #endif
2440 	xfs_btree_copy_ptrs(cur, pp, &nptr, 1);
2441 
2442 	xfs_iroot_realloc(cur->bc_private.b.ip,
2443 			  1 - xfs_btree_get_numrecs(cblock),
2444 			  cur->bc_private.b.whichfork);
2445 
2446 	xfs_btree_setbuf(cur, level, cbp);
2447 
2448 	/*
2449 	 * Do all this logging at the end so that
2450 	 * the root is at the right level.
2451 	 */
2452 	xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS);
2453 	xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
2454 	xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
2455 
2456 	*logflags |=
2457 		XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork);
2458 	*stat = 1;
2459 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2460 	return 0;
2461 error0:
2462 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2463 	return error;
2464 }
2465 
2466 /*
2467  * Allocate a new root block, fill it in.
2468  */
2469 STATIC int				/* error */
xfs_btree_new_root(struct xfs_btree_cur * cur,int * stat)2470 xfs_btree_new_root(
2471 	struct xfs_btree_cur	*cur,	/* btree cursor */
2472 	int			*stat)	/* success/failure */
2473 {
2474 	struct xfs_btree_block	*block;	/* one half of the old root block */
2475 	struct xfs_buf		*bp;	/* buffer containing block */
2476 	int			error;	/* error return value */
2477 	struct xfs_buf		*lbp;	/* left buffer pointer */
2478 	struct xfs_btree_block	*left;	/* left btree block */
2479 	struct xfs_buf		*nbp;	/* new (root) buffer */
2480 	struct xfs_btree_block	*new;	/* new (root) btree block */
2481 	int			nptr;	/* new value for key index, 1 or 2 */
2482 	struct xfs_buf		*rbp;	/* right buffer pointer */
2483 	struct xfs_btree_block	*right;	/* right btree block */
2484 	union xfs_btree_ptr	rptr;
2485 	union xfs_btree_ptr	lptr;
2486 
2487 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2488 	XFS_BTREE_STATS_INC(cur, newroot);
2489 
2490 	/* initialise our start point from the cursor */
2491 	cur->bc_ops->init_ptr_from_cur(cur, &rptr);
2492 
2493 	/* Allocate the new block. If we can't do it, we're toast. Give up. */
2494 	error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, 1, stat);
2495 	if (error)
2496 		goto error0;
2497 	if (*stat == 0)
2498 		goto out0;
2499 	XFS_BTREE_STATS_INC(cur, alloc);
2500 
2501 	/* Set up the new block. */
2502 	error = xfs_btree_get_buf_block(cur, &lptr, 0, &new, &nbp);
2503 	if (error)
2504 		goto error0;
2505 
2506 	/* Set the root in the holding structure  increasing the level by 1. */
2507 	cur->bc_ops->set_root(cur, &lptr, 1);
2508 
2509 	/*
2510 	 * At the previous root level there are now two blocks: the old root,
2511 	 * and the new block generated when it was split.  We don't know which
2512 	 * one the cursor is pointing at, so we set up variables "left" and
2513 	 * "right" for each case.
2514 	 */
2515 	block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp);
2516 
2517 #ifdef DEBUG
2518 	error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp);
2519 	if (error)
2520 		goto error0;
2521 #endif
2522 
2523 	xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
2524 	if (!xfs_btree_ptr_is_null(cur, &rptr)) {
2525 		/* Our block is left, pick up the right block. */
2526 		lbp = bp;
2527 		xfs_btree_buf_to_ptr(cur, lbp, &lptr);
2528 		left = block;
2529 		error = xfs_btree_read_buf_block(cur, &rptr,
2530 					cur->bc_nlevels - 1, 0, &right, &rbp);
2531 		if (error)
2532 			goto error0;
2533 		bp = rbp;
2534 		nptr = 1;
2535 	} else {
2536 		/* Our block is right, pick up the left block. */
2537 		rbp = bp;
2538 		xfs_btree_buf_to_ptr(cur, rbp, &rptr);
2539 		right = block;
2540 		xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2541 		error = xfs_btree_read_buf_block(cur, &lptr,
2542 					cur->bc_nlevels - 1, 0, &left, &lbp);
2543 		if (error)
2544 			goto error0;
2545 		bp = lbp;
2546 		nptr = 2;
2547 	}
2548 	/* Fill in the new block's btree header and log it. */
2549 	xfs_btree_init_block(cur, cur->bc_nlevels, 2, new);
2550 	xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);
2551 	ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) &&
2552 			!xfs_btree_ptr_is_null(cur, &rptr));
2553 
2554 	/* Fill in the key data in the new root. */
2555 	if (xfs_btree_get_level(left) > 0) {
2556 		xfs_btree_copy_keys(cur,
2557 				xfs_btree_key_addr(cur, 1, new),
2558 				xfs_btree_key_addr(cur, 1, left), 1);
2559 		xfs_btree_copy_keys(cur,
2560 				xfs_btree_key_addr(cur, 2, new),
2561 				xfs_btree_key_addr(cur, 1, right), 1);
2562 	} else {
2563 		cur->bc_ops->init_key_from_rec(
2564 				xfs_btree_key_addr(cur, 1, new),
2565 				xfs_btree_rec_addr(cur, 1, left));
2566 		cur->bc_ops->init_key_from_rec(
2567 				xfs_btree_key_addr(cur, 2, new),
2568 				xfs_btree_rec_addr(cur, 1, right));
2569 	}
2570 	xfs_btree_log_keys(cur, nbp, 1, 2);
2571 
2572 	/* Fill in the pointer data in the new root. */
2573 	xfs_btree_copy_ptrs(cur,
2574 		xfs_btree_ptr_addr(cur, 1, new), &lptr, 1);
2575 	xfs_btree_copy_ptrs(cur,
2576 		xfs_btree_ptr_addr(cur, 2, new), &rptr, 1);
2577 	xfs_btree_log_ptrs(cur, nbp, 1, 2);
2578 
2579 	/* Fix up the cursor. */
2580 	xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
2581 	cur->bc_ptrs[cur->bc_nlevels] = nptr;
2582 	cur->bc_nlevels++;
2583 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2584 	*stat = 1;
2585 	return 0;
2586 error0:
2587 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2588 	return error;
2589 out0:
2590 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2591 	*stat = 0;
2592 	return 0;
2593 }
2594 
2595 STATIC int
xfs_btree_make_block_unfull(struct xfs_btree_cur * cur,int level,int numrecs,int * oindex,int * index,union xfs_btree_ptr * nptr,struct xfs_btree_cur ** ncur,union xfs_btree_rec * nrec,int * stat)2596 xfs_btree_make_block_unfull(
2597 	struct xfs_btree_cur	*cur,	/* btree cursor */
2598 	int			level,	/* btree level */
2599 	int			numrecs,/* # of recs in block */
2600 	int			*oindex,/* old tree index */
2601 	int			*index,	/* new tree index */
2602 	union xfs_btree_ptr	*nptr,	/* new btree ptr */
2603 	struct xfs_btree_cur	**ncur,	/* new btree cursor */
2604 	union xfs_btree_rec	*nrec,	/* new record */
2605 	int			*stat)
2606 {
2607 	union xfs_btree_key	key;	/* new btree key value */
2608 	int			error = 0;
2609 
2610 	if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2611 	    level == cur->bc_nlevels - 1) {
2612 	    	struct xfs_inode *ip = cur->bc_private.b.ip;
2613 
2614 		if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {
2615 			/* A root block that can be made bigger. */
2616 
2617 			xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork);
2618 		} else {
2619 			/* A root block that needs replacing */
2620 			int	logflags = 0;
2621 
2622 			error = xfs_btree_new_iroot(cur, &logflags, stat);
2623 			if (error || *stat == 0)
2624 				return error;
2625 
2626 			xfs_trans_log_inode(cur->bc_tp, ip, logflags);
2627 		}
2628 
2629 		return 0;
2630 	}
2631 
2632 	/* First, try shifting an entry to the right neighbor. */
2633 	error = xfs_btree_rshift(cur, level, stat);
2634 	if (error || *stat)
2635 		return error;
2636 
2637 	/* Next, try shifting an entry to the left neighbor. */
2638 	error = xfs_btree_lshift(cur, level, stat);
2639 	if (error)
2640 		return error;
2641 
2642 	if (*stat) {
2643 		*oindex = *index = cur->bc_ptrs[level];
2644 		return 0;
2645 	}
2646 
2647 	/*
2648 	 * Next, try splitting the current block in half.
2649 	 *
2650 	 * If this works we have to re-set our variables because we
2651 	 * could be in a different block now.
2652 	 */
2653 	error = xfs_btree_split(cur, level, nptr, &key, ncur, stat);
2654 	if (error || *stat == 0)
2655 		return error;
2656 
2657 
2658 	*index = cur->bc_ptrs[level];
2659 	cur->bc_ops->init_rec_from_key(&key, nrec);
2660 	return 0;
2661 }
2662 
2663 /*
2664  * Insert one record/level.  Return information to the caller
2665  * allowing the next level up to proceed if necessary.
2666  */
2667 STATIC int
xfs_btree_insrec(struct xfs_btree_cur * cur,int level,union xfs_btree_ptr * ptrp,union xfs_btree_rec * recp,struct xfs_btree_cur ** curp,int * stat)2668 xfs_btree_insrec(
2669 	struct xfs_btree_cur	*cur,	/* btree cursor */
2670 	int			level,	/* level to insert record at */
2671 	union xfs_btree_ptr	*ptrp,	/* i/o: block number inserted */
2672 	union xfs_btree_rec	*recp,	/* i/o: record data inserted */
2673 	struct xfs_btree_cur	**curp,	/* output: new cursor replacing cur */
2674 	int			*stat)	/* success/failure */
2675 {
2676 	struct xfs_btree_block	*block;	/* btree block */
2677 	struct xfs_buf		*bp;	/* buffer for block */
2678 	union xfs_btree_key	key;	/* btree key */
2679 	union xfs_btree_ptr	nptr;	/* new block ptr */
2680 	struct xfs_btree_cur	*ncur;	/* new btree cursor */
2681 	union xfs_btree_rec	nrec;	/* new record count */
2682 	int			optr;	/* old key/record index */
2683 	int			ptr;	/* key/record index */
2684 	int			numrecs;/* number of records */
2685 	int			error;	/* error return value */
2686 #ifdef DEBUG
2687 	int			i;
2688 #endif
2689 
2690 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2691 	XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, recp);
2692 
2693 	ncur = NULL;
2694 
2695 	/*
2696 	 * If we have an external root pointer, and we've made it to the
2697 	 * root level, allocate a new root block and we're done.
2698 	 */
2699 	if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2700 	    (level >= cur->bc_nlevels)) {
2701 		error = xfs_btree_new_root(cur, stat);
2702 		xfs_btree_set_ptr_null(cur, ptrp);
2703 
2704 		XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2705 		return error;
2706 	}
2707 
2708 	/* If we're off the left edge, return failure. */
2709 	ptr = cur->bc_ptrs[level];
2710 	if (ptr == 0) {
2711 		XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2712 		*stat = 0;
2713 		return 0;
2714 	}
2715 
2716 	/* Make a key out of the record data to be inserted, and save it. */
2717 	cur->bc_ops->init_key_from_rec(&key, recp);
2718 
2719 	optr = ptr;
2720 
2721 	XFS_BTREE_STATS_INC(cur, insrec);
2722 
2723 	/* Get pointers to the btree buffer and block. */
2724 	block = xfs_btree_get_block(cur, level, &bp);
2725 	numrecs = xfs_btree_get_numrecs(block);
2726 
2727 #ifdef DEBUG
2728 	error = xfs_btree_check_block(cur, block, level, bp);
2729 	if (error)
2730 		goto error0;
2731 
2732 	/* Check that the new entry is being inserted in the right place. */
2733 	if (ptr <= numrecs) {
2734 		if (level == 0) {
2735 			ASSERT(cur->bc_ops->recs_inorder(cur, recp,
2736 				xfs_btree_rec_addr(cur, ptr, block)));
2737 		} else {
2738 			ASSERT(cur->bc_ops->keys_inorder(cur, &key,
2739 				xfs_btree_key_addr(cur, ptr, block)));
2740 		}
2741 	}
2742 #endif
2743 
2744 	/*
2745 	 * If the block is full, we can't insert the new entry until we
2746 	 * make the block un-full.
2747 	 */
2748 	xfs_btree_set_ptr_null(cur, &nptr);
2749 	if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) {
2750 		error = xfs_btree_make_block_unfull(cur, level, numrecs,
2751 					&optr, &ptr, &nptr, &ncur, &nrec, stat);
2752 		if (error || *stat == 0)
2753 			goto error0;
2754 	}
2755 
2756 	/*
2757 	 * The current block may have changed if the block was
2758 	 * previously full and we have just made space in it.
2759 	 */
2760 	block = xfs_btree_get_block(cur, level, &bp);
2761 	numrecs = xfs_btree_get_numrecs(block);
2762 
2763 #ifdef DEBUG
2764 	error = xfs_btree_check_block(cur, block, level, bp);
2765 	if (error)
2766 		return error;
2767 #endif
2768 
2769 	/*
2770 	 * At this point we know there's room for our new entry in the block
2771 	 * we're pointing at.
2772 	 */
2773 	XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1);
2774 
2775 	if (level > 0) {
2776 		/* It's a nonleaf. make a hole in the keys and ptrs */
2777 		union xfs_btree_key	*kp;
2778 		union xfs_btree_ptr	*pp;
2779 
2780 		kp = xfs_btree_key_addr(cur, ptr, block);
2781 		pp = xfs_btree_ptr_addr(cur, ptr, block);
2782 
2783 #ifdef DEBUG
2784 		for (i = numrecs - ptr; i >= 0; i--) {
2785 			error = xfs_btree_check_ptr(cur, pp, i, level);
2786 			if (error)
2787 				return error;
2788 		}
2789 #endif
2790 
2791 		xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1);
2792 		xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1);
2793 
2794 #ifdef DEBUG
2795 		error = xfs_btree_check_ptr(cur, ptrp, 0, level);
2796 		if (error)
2797 			goto error0;
2798 #endif
2799 
2800 		/* Now put the new data in, bump numrecs and log it. */
2801 		xfs_btree_copy_keys(cur, kp, &key, 1);
2802 		xfs_btree_copy_ptrs(cur, pp, ptrp, 1);
2803 		numrecs++;
2804 		xfs_btree_set_numrecs(block, numrecs);
2805 		xfs_btree_log_ptrs(cur, bp, ptr, numrecs);
2806 		xfs_btree_log_keys(cur, bp, ptr, numrecs);
2807 #ifdef DEBUG
2808 		if (ptr < numrecs) {
2809 			ASSERT(cur->bc_ops->keys_inorder(cur, kp,
2810 				xfs_btree_key_addr(cur, ptr + 1, block)));
2811 		}
2812 #endif
2813 	} else {
2814 		/* It's a leaf. make a hole in the records */
2815 		union xfs_btree_rec             *rp;
2816 
2817 		rp = xfs_btree_rec_addr(cur, ptr, block);
2818 
2819 		xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1);
2820 
2821 		/* Now put the new data in, bump numrecs and log it. */
2822 		xfs_btree_copy_recs(cur, rp, recp, 1);
2823 		xfs_btree_set_numrecs(block, ++numrecs);
2824 		xfs_btree_log_recs(cur, bp, ptr, numrecs);
2825 #ifdef DEBUG
2826 		if (ptr < numrecs) {
2827 			ASSERT(cur->bc_ops->recs_inorder(cur, rp,
2828 				xfs_btree_rec_addr(cur, ptr + 1, block)));
2829 		}
2830 #endif
2831 	}
2832 
2833 	/* Log the new number of records in the btree header. */
2834 	xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
2835 
2836 	/* If we inserted at the start of a block, update the parents' keys. */
2837 	if (optr == 1) {
2838 		error = xfs_btree_updkey(cur, &key, level + 1);
2839 		if (error)
2840 			goto error0;
2841 	}
2842 
2843 	/*
2844 	 * If we are tracking the last record in the tree and
2845 	 * we are at the far right edge of the tree, update it.
2846 	 */
2847 	if (xfs_btree_is_lastrec(cur, block, level)) {
2848 		cur->bc_ops->update_lastrec(cur, block, recp,
2849 					    ptr, LASTREC_INSREC);
2850 	}
2851 
2852 	/*
2853 	 * Return the new block number, if any.
2854 	 * If there is one, give back a record value and a cursor too.
2855 	 */
2856 	*ptrp = nptr;
2857 	if (!xfs_btree_ptr_is_null(cur, &nptr)) {
2858 		*recp = nrec;
2859 		*curp = ncur;
2860 	}
2861 
2862 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2863 	*stat = 1;
2864 	return 0;
2865 
2866 error0:
2867 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2868 	return error;
2869 }
2870 
2871 /*
2872  * Insert the record at the point referenced by cur.
2873  *
2874  * A multi-level split of the tree on insert will invalidate the original
2875  * cursor.  All callers of this function should assume that the cursor is
2876  * no longer valid and revalidate it.
2877  */
2878 int
xfs_btree_insert(struct xfs_btree_cur * cur,int * stat)2879 xfs_btree_insert(
2880 	struct xfs_btree_cur	*cur,
2881 	int			*stat)
2882 {
2883 	int			error;	/* error return value */
2884 	int			i;	/* result value, 0 for failure */
2885 	int			level;	/* current level number in btree */
2886 	union xfs_btree_ptr	nptr;	/* new block number (split result) */
2887 	struct xfs_btree_cur	*ncur;	/* new cursor (split result) */
2888 	struct xfs_btree_cur	*pcur;	/* previous level's cursor */
2889 	union xfs_btree_rec	rec;	/* record to insert */
2890 
2891 	level = 0;
2892 	ncur = NULL;
2893 	pcur = cur;
2894 
2895 	xfs_btree_set_ptr_null(cur, &nptr);
2896 	cur->bc_ops->init_rec_from_cur(cur, &rec);
2897 
2898 	/*
2899 	 * Loop going up the tree, starting at the leaf level.
2900 	 * Stop when we don't get a split block, that must mean that
2901 	 * the insert is finished with this level.
2902 	 */
2903 	do {
2904 		/*
2905 		 * Insert nrec/nptr into this level of the tree.
2906 		 * Note if we fail, nptr will be null.
2907 		 */
2908 		error = xfs_btree_insrec(pcur, level, &nptr, &rec, &ncur, &i);
2909 		if (error) {
2910 			if (pcur != cur)
2911 				xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
2912 			goto error0;
2913 		}
2914 
2915 		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
2916 		level++;
2917 
2918 		/*
2919 		 * See if the cursor we just used is trash.
2920 		 * Can't trash the caller's cursor, but otherwise we should
2921 		 * if ncur is a new cursor or we're about to be done.
2922 		 */
2923 		if (pcur != cur &&
2924 		    (ncur || xfs_btree_ptr_is_null(cur, &nptr))) {
2925 			/* Save the state from the cursor before we trash it */
2926 			if (cur->bc_ops->update_cursor)
2927 				cur->bc_ops->update_cursor(pcur, cur);
2928 			cur->bc_nlevels = pcur->bc_nlevels;
2929 			xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
2930 		}
2931 		/* If we got a new cursor, switch to it. */
2932 		if (ncur) {
2933 			pcur = ncur;
2934 			ncur = NULL;
2935 		}
2936 	} while (!xfs_btree_ptr_is_null(cur, &nptr));
2937 
2938 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2939 	*stat = i;
2940 	return 0;
2941 error0:
2942 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2943 	return error;
2944 }
2945 
2946 /*
2947  * Try to merge a non-leaf block back into the inode root.
2948  *
2949  * Note: the killroot names comes from the fact that we're effectively
2950  * killing the old root block.  But because we can't just delete the
2951  * inode we have to copy the single block it was pointing to into the
2952  * inode.
2953  */
2954 int
xfs_btree_kill_iroot(struct xfs_btree_cur * cur)2955 xfs_btree_kill_iroot(
2956 	struct xfs_btree_cur	*cur)
2957 {
2958 	int			whichfork = cur->bc_private.b.whichfork;
2959 	struct xfs_inode	*ip = cur->bc_private.b.ip;
2960 	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
2961 	struct xfs_btree_block	*block;
2962 	struct xfs_btree_block	*cblock;
2963 	union xfs_btree_key	*kp;
2964 	union xfs_btree_key	*ckp;
2965 	union xfs_btree_ptr	*pp;
2966 	union xfs_btree_ptr	*cpp;
2967 	struct xfs_buf		*cbp;
2968 	int			level;
2969 	int			index;
2970 	int			numrecs;
2971 #ifdef DEBUG
2972 	union xfs_btree_ptr	ptr;
2973 	int			i;
2974 #endif
2975 
2976 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2977 
2978 	ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
2979 	ASSERT(cur->bc_nlevels > 1);
2980 
2981 	/*
2982 	 * Don't deal with the root block needs to be a leaf case.
2983 	 * We're just going to turn the thing back into extents anyway.
2984 	 */
2985 	level = cur->bc_nlevels - 1;
2986 	if (level == 1)
2987 		goto out0;
2988 
2989 	/*
2990 	 * Give up if the root has multiple children.
2991 	 */
2992 	block = xfs_btree_get_iroot(cur);
2993 	if (xfs_btree_get_numrecs(block) != 1)
2994 		goto out0;
2995 
2996 	cblock = xfs_btree_get_block(cur, level - 1, &cbp);
2997 	numrecs = xfs_btree_get_numrecs(cblock);
2998 
2999 	/*
3000 	 * Only do this if the next level will fit.
3001 	 * Then the data must be copied up to the inode,
3002 	 * instead of freeing the root you free the next level.
3003 	 */
3004 	if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level))
3005 		goto out0;
3006 
3007 	XFS_BTREE_STATS_INC(cur, killroot);
3008 
3009 #ifdef DEBUG
3010 	xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
3011 	ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
3012 	xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
3013 	ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
3014 #endif
3015 
3016 	index = numrecs - cur->bc_ops->get_maxrecs(cur, level);
3017 	if (index) {
3018 		xfs_iroot_realloc(cur->bc_private.b.ip, index,
3019 				  cur->bc_private.b.whichfork);
3020 		block = ifp->if_broot;
3021 	}
3022 
3023 	be16_add_cpu(&block->bb_numrecs, index);
3024 	ASSERT(block->bb_numrecs == cblock->bb_numrecs);
3025 
3026 	kp = xfs_btree_key_addr(cur, 1, block);
3027 	ckp = xfs_btree_key_addr(cur, 1, cblock);
3028 	xfs_btree_copy_keys(cur, kp, ckp, numrecs);
3029 
3030 	pp = xfs_btree_ptr_addr(cur, 1, block);
3031 	cpp = xfs_btree_ptr_addr(cur, 1, cblock);
3032 #ifdef DEBUG
3033 	for (i = 0; i < numrecs; i++) {
3034 		int		error;
3035 
3036 		error = xfs_btree_check_ptr(cur, cpp, i, level - 1);
3037 		if (error) {
3038 			XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3039 			return error;
3040 		}
3041 	}
3042 #endif
3043 	xfs_btree_copy_ptrs(cur, pp, cpp, numrecs);
3044 
3045 	cur->bc_ops->free_block(cur, cbp);
3046 	XFS_BTREE_STATS_INC(cur, free);
3047 
3048 	cur->bc_bufs[level - 1] = NULL;
3049 	be16_add_cpu(&block->bb_level, -1);
3050 	xfs_trans_log_inode(cur->bc_tp, ip,
3051 		XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork));
3052 	cur->bc_nlevels--;
3053 out0:
3054 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3055 	return 0;
3056 }
3057 
3058 STATIC int
xfs_btree_dec_cursor(struct xfs_btree_cur * cur,int level,int * stat)3059 xfs_btree_dec_cursor(
3060 	struct xfs_btree_cur	*cur,
3061 	int			level,
3062 	int			*stat)
3063 {
3064 	int			error;
3065 	int			i;
3066 
3067 	if (level > 0) {
3068 		error = xfs_btree_decrement(cur, level, &i);
3069 		if (error)
3070 			return error;
3071 	}
3072 
3073 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3074 	*stat = 1;
3075 	return 0;
3076 }
3077 
3078 /*
3079  * Single level of the btree record deletion routine.
3080  * Delete record pointed to by cur/level.
3081  * Remove the record from its block then rebalance the tree.
3082  * Return 0 for error, 1 for done, 2 to go on to the next level.
3083  */
3084 STATIC int					/* error */
xfs_btree_delrec(struct xfs_btree_cur * cur,int level,int * stat)3085 xfs_btree_delrec(
3086 	struct xfs_btree_cur	*cur,		/* btree cursor */
3087 	int			level,		/* level removing record from */
3088 	int			*stat)		/* fail/done/go-on */
3089 {
3090 	struct xfs_btree_block	*block;		/* btree block */
3091 	union xfs_btree_ptr	cptr;		/* current block ptr */
3092 	struct xfs_buf		*bp;		/* buffer for block */
3093 	int			error;		/* error return value */
3094 	int			i;		/* loop counter */
3095 	union xfs_btree_key	key;		/* storage for keyp */
3096 	union xfs_btree_key	*keyp = &key;	/* passed to the next level */
3097 	union xfs_btree_ptr	lptr;		/* left sibling block ptr */
3098 	struct xfs_buf		*lbp;		/* left buffer pointer */
3099 	struct xfs_btree_block	*left;		/* left btree block */
3100 	int			lrecs = 0;	/* left record count */
3101 	int			ptr;		/* key/record index */
3102 	union xfs_btree_ptr	rptr;		/* right sibling block ptr */
3103 	struct xfs_buf		*rbp;		/* right buffer pointer */
3104 	struct xfs_btree_block	*right;		/* right btree block */
3105 	struct xfs_btree_block	*rrblock;	/* right-right btree block */
3106 	struct xfs_buf		*rrbp;		/* right-right buffer pointer */
3107 	int			rrecs = 0;	/* right record count */
3108 	struct xfs_btree_cur	*tcur;		/* temporary btree cursor */
3109 	int			numrecs;	/* temporary numrec count */
3110 
3111 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3112 	XFS_BTREE_TRACE_ARGI(cur, level);
3113 
3114 	tcur = NULL;
3115 
3116 	/* Get the index of the entry being deleted, check for nothing there. */
3117 	ptr = cur->bc_ptrs[level];
3118 	if (ptr == 0) {
3119 		XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3120 		*stat = 0;
3121 		return 0;
3122 	}
3123 
3124 	/* Get the buffer & block containing the record or key/ptr. */
3125 	block = xfs_btree_get_block(cur, level, &bp);
3126 	numrecs = xfs_btree_get_numrecs(block);
3127 
3128 #ifdef DEBUG
3129 	error = xfs_btree_check_block(cur, block, level, bp);
3130 	if (error)
3131 		goto error0;
3132 #endif
3133 
3134 	/* Fail if we're off the end of the block. */
3135 	if (ptr > numrecs) {
3136 		XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3137 		*stat = 0;
3138 		return 0;
3139 	}
3140 
3141 	XFS_BTREE_STATS_INC(cur, delrec);
3142 	XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr);
3143 
3144 	/* Excise the entries being deleted. */
3145 	if (level > 0) {
3146 		/* It's a nonleaf. operate on keys and ptrs */
3147 		union xfs_btree_key	*lkp;
3148 		union xfs_btree_ptr	*lpp;
3149 
3150 		lkp = xfs_btree_key_addr(cur, ptr + 1, block);
3151 		lpp = xfs_btree_ptr_addr(cur, ptr + 1, block);
3152 
3153 #ifdef DEBUG
3154 		for (i = 0; i < numrecs - ptr; i++) {
3155 			error = xfs_btree_check_ptr(cur, lpp, i, level);
3156 			if (error)
3157 				goto error0;
3158 		}
3159 #endif
3160 
3161 		if (ptr < numrecs) {
3162 			xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr);
3163 			xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr);
3164 			xfs_btree_log_keys(cur, bp, ptr, numrecs - 1);
3165 			xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1);
3166 		}
3167 
3168 		/*
3169 		 * If it's the first record in the block, we'll need to pass a
3170 		 * key up to the next level (updkey).
3171 		 */
3172 		if (ptr == 1)
3173 			keyp = xfs_btree_key_addr(cur, 1, block);
3174 	} else {
3175 		/* It's a leaf. operate on records */
3176 		if (ptr < numrecs) {
3177 			xfs_btree_shift_recs(cur,
3178 				xfs_btree_rec_addr(cur, ptr + 1, block),
3179 				-1, numrecs - ptr);
3180 			xfs_btree_log_recs(cur, bp, ptr, numrecs - 1);
3181 		}
3182 
3183 		/*
3184 		 * If it's the first record in the block, we'll need a key
3185 		 * structure to pass up to the next level (updkey).
3186 		 */
3187 		if (ptr == 1) {
3188 			cur->bc_ops->init_key_from_rec(&key,
3189 					xfs_btree_rec_addr(cur, 1, block));
3190 			keyp = &key;
3191 		}
3192 	}
3193 
3194 	/*
3195 	 * Decrement and log the number of entries in the block.
3196 	 */
3197 	xfs_btree_set_numrecs(block, --numrecs);
3198 	xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
3199 
3200 	/*
3201 	 * If we are tracking the last record in the tree and
3202 	 * we are at the far right edge of the tree, update it.
3203 	 */
3204 	if (xfs_btree_is_lastrec(cur, block, level)) {
3205 		cur->bc_ops->update_lastrec(cur, block, NULL,
3206 					    ptr, LASTREC_DELREC);
3207 	}
3208 
3209 	/*
3210 	 * We're at the root level.  First, shrink the root block in-memory.
3211 	 * Try to get rid of the next level down.  If we can't then there's
3212 	 * nothing left to do.
3213 	 */
3214 	if (level == cur->bc_nlevels - 1) {
3215 		if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3216 			xfs_iroot_realloc(cur->bc_private.b.ip, -1,
3217 					  cur->bc_private.b.whichfork);
3218 
3219 			error = xfs_btree_kill_iroot(cur);
3220 			if (error)
3221 				goto error0;
3222 
3223 			error = xfs_btree_dec_cursor(cur, level, stat);
3224 			if (error)
3225 				goto error0;
3226 			*stat = 1;
3227 			return 0;
3228 		}
3229 
3230 		/*
3231 		 * If this is the root level, and there's only one entry left,
3232 		 * and it's NOT the leaf level, then we can get rid of this
3233 		 * level.
3234 		 */
3235 		if (numrecs == 1 && level > 0) {
3236 			union xfs_btree_ptr	*pp;
3237 			/*
3238 			 * pp is still set to the first pointer in the block.
3239 			 * Make it the new root of the btree.
3240 			 */
3241 			pp = xfs_btree_ptr_addr(cur, 1, block);
3242 			error = cur->bc_ops->kill_root(cur, bp, level, pp);
3243 			if (error)
3244 				goto error0;
3245 		} else if (level > 0) {
3246 			error = xfs_btree_dec_cursor(cur, level, stat);
3247 			if (error)
3248 				goto error0;
3249 		}
3250 		*stat = 1;
3251 		return 0;
3252 	}
3253 
3254 	/*
3255 	 * If we deleted the leftmost entry in the block, update the
3256 	 * key values above us in the tree.
3257 	 */
3258 	if (ptr == 1) {
3259 		error = xfs_btree_updkey(cur, keyp, level + 1);
3260 		if (error)
3261 			goto error0;
3262 	}
3263 
3264 	/*
3265 	 * If the number of records remaining in the block is at least
3266 	 * the minimum, we're done.
3267 	 */
3268 	if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) {
3269 		error = xfs_btree_dec_cursor(cur, level, stat);
3270 		if (error)
3271 			goto error0;
3272 		return 0;
3273 	}
3274 
3275 	/*
3276 	 * Otherwise, we have to move some records around to keep the
3277 	 * tree balanced.  Look at the left and right sibling blocks to
3278 	 * see if we can re-balance by moving only one record.
3279 	 */
3280 	xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
3281 	xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB);
3282 
3283 	if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3284 		/*
3285 		 * One child of root, need to get a chance to copy its contents
3286 		 * into the root and delete it. Can't go up to next level,
3287 		 * there's nothing to delete there.
3288 		 */
3289 		if (xfs_btree_ptr_is_null(cur, &rptr) &&
3290 		    xfs_btree_ptr_is_null(cur, &lptr) &&
3291 		    level == cur->bc_nlevels - 2) {
3292 			error = xfs_btree_kill_iroot(cur);
3293 			if (!error)
3294 				error = xfs_btree_dec_cursor(cur, level, stat);
3295 			if (error)
3296 				goto error0;
3297 			return 0;
3298 		}
3299 	}
3300 
3301 	ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) ||
3302 	       !xfs_btree_ptr_is_null(cur, &lptr));
3303 
3304 	/*
3305 	 * Duplicate the cursor so our btree manipulations here won't
3306 	 * disrupt the next level up.
3307 	 */
3308 	error = xfs_btree_dup_cursor(cur, &tcur);
3309 	if (error)
3310 		goto error0;
3311 
3312 	/*
3313 	 * If there's a right sibling, see if it's ok to shift an entry
3314 	 * out of it.
3315 	 */
3316 	if (!xfs_btree_ptr_is_null(cur, &rptr)) {
3317 		/*
3318 		 * Move the temp cursor to the last entry in the next block.
3319 		 * Actually any entry but the first would suffice.
3320 		 */
3321 		i = xfs_btree_lastrec(tcur, level);
3322 		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3323 
3324 		error = xfs_btree_increment(tcur, level, &i);
3325 		if (error)
3326 			goto error0;
3327 		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3328 
3329 		i = xfs_btree_lastrec(tcur, level);
3330 		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3331 
3332 		/* Grab a pointer to the block. */
3333 		right = xfs_btree_get_block(tcur, level, &rbp);
3334 #ifdef DEBUG
3335 		error = xfs_btree_check_block(tcur, right, level, rbp);
3336 		if (error)
3337 			goto error0;
3338 #endif
3339 		/* Grab the current block number, for future use. */
3340 		xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB);
3341 
3342 		/*
3343 		 * If right block is full enough so that removing one entry
3344 		 * won't make it too empty, and left-shifting an entry out
3345 		 * of right to us works, we're done.
3346 		 */
3347 		if (xfs_btree_get_numrecs(right) - 1 >=
3348 		    cur->bc_ops->get_minrecs(tcur, level)) {
3349 			error = xfs_btree_lshift(tcur, level, &i);
3350 			if (error)
3351 				goto error0;
3352 			if (i) {
3353 				ASSERT(xfs_btree_get_numrecs(block) >=
3354 				       cur->bc_ops->get_minrecs(tcur, level));
3355 
3356 				xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3357 				tcur = NULL;
3358 
3359 				error = xfs_btree_dec_cursor(cur, level, stat);
3360 				if (error)
3361 					goto error0;
3362 				return 0;
3363 			}
3364 		}
3365 
3366 		/*
3367 		 * Otherwise, grab the number of records in right for
3368 		 * future reference, and fix up the temp cursor to point
3369 		 * to our block again (last record).
3370 		 */
3371 		rrecs = xfs_btree_get_numrecs(right);
3372 		if (!xfs_btree_ptr_is_null(cur, &lptr)) {
3373 			i = xfs_btree_firstrec(tcur, level);
3374 			XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3375 
3376 			error = xfs_btree_decrement(tcur, level, &i);
3377 			if (error)
3378 				goto error0;
3379 			XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3380 		}
3381 	}
3382 
3383 	/*
3384 	 * If there's a left sibling, see if it's ok to shift an entry
3385 	 * out of it.
3386 	 */
3387 	if (!xfs_btree_ptr_is_null(cur, &lptr)) {
3388 		/*
3389 		 * Move the temp cursor to the first entry in the
3390 		 * previous block.
3391 		 */
3392 		i = xfs_btree_firstrec(tcur, level);
3393 		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3394 
3395 		error = xfs_btree_decrement(tcur, level, &i);
3396 		if (error)
3397 			goto error0;
3398 		i = xfs_btree_firstrec(tcur, level);
3399 		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3400 
3401 		/* Grab a pointer to the block. */
3402 		left = xfs_btree_get_block(tcur, level, &lbp);
3403 #ifdef DEBUG
3404 		error = xfs_btree_check_block(cur, left, level, lbp);
3405 		if (error)
3406 			goto error0;
3407 #endif
3408 		/* Grab the current block number, for future use. */
3409 		xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB);
3410 
3411 		/*
3412 		 * If left block is full enough so that removing one entry
3413 		 * won't make it too empty, and right-shifting an entry out
3414 		 * of left to us works, we're done.
3415 		 */
3416 		if (xfs_btree_get_numrecs(left) - 1 >=
3417 		    cur->bc_ops->get_minrecs(tcur, level)) {
3418 			error = xfs_btree_rshift(tcur, level, &i);
3419 			if (error)
3420 				goto error0;
3421 			if (i) {
3422 				ASSERT(xfs_btree_get_numrecs(block) >=
3423 				       cur->bc_ops->get_minrecs(tcur, level));
3424 				xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3425 				tcur = NULL;
3426 				if (level == 0)
3427 					cur->bc_ptrs[0]++;
3428 				XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3429 				*stat = 1;
3430 				return 0;
3431 			}
3432 		}
3433 
3434 		/*
3435 		 * Otherwise, grab the number of records in right for
3436 		 * future reference.
3437 		 */
3438 		lrecs = xfs_btree_get_numrecs(left);
3439 	}
3440 
3441 	/* Delete the temp cursor, we're done with it. */
3442 	xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3443 	tcur = NULL;
3444 
3445 	/* If here, we need to do a join to keep the tree balanced. */
3446 	ASSERT(!xfs_btree_ptr_is_null(cur, &cptr));
3447 
3448 	if (!xfs_btree_ptr_is_null(cur, &lptr) &&
3449 	    lrecs + xfs_btree_get_numrecs(block) <=
3450 			cur->bc_ops->get_maxrecs(cur, level)) {
3451 		/*
3452 		 * Set "right" to be the starting block,
3453 		 * "left" to be the left neighbor.
3454 		 */
3455 		rptr = cptr;
3456 		right = block;
3457 		rbp = bp;
3458 		error = xfs_btree_read_buf_block(cur, &lptr, level,
3459 							0, &left, &lbp);
3460 		if (error)
3461 			goto error0;
3462 
3463 	/*
3464 	 * If that won't work, see if we can join with the right neighbor block.
3465 	 */
3466 	} else if (!xfs_btree_ptr_is_null(cur, &rptr) &&
3467 		   rrecs + xfs_btree_get_numrecs(block) <=
3468 			cur->bc_ops->get_maxrecs(cur, level)) {
3469 		/*
3470 		 * Set "left" to be the starting block,
3471 		 * "right" to be the right neighbor.
3472 		 */
3473 		lptr = cptr;
3474 		left = block;
3475 		lbp = bp;
3476 		error = xfs_btree_read_buf_block(cur, &rptr, level,
3477 							0, &right, &rbp);
3478 		if (error)
3479 			goto error0;
3480 
3481 	/*
3482 	 * Otherwise, we can't fix the imbalance.
3483 	 * Just return.  This is probably a logic error, but it's not fatal.
3484 	 */
3485 	} else {
3486 		error = xfs_btree_dec_cursor(cur, level, stat);
3487 		if (error)
3488 			goto error0;
3489 		return 0;
3490 	}
3491 
3492 	rrecs = xfs_btree_get_numrecs(right);
3493 	lrecs = xfs_btree_get_numrecs(left);
3494 
3495 	/*
3496 	 * We're now going to join "left" and "right" by moving all the stuff
3497 	 * in "right" to "left" and deleting "right".
3498 	 */
3499 	XFS_BTREE_STATS_ADD(cur, moves, rrecs);
3500 	if (level > 0) {
3501 		/* It's a non-leaf.  Move keys and pointers. */
3502 		union xfs_btree_key	*lkp;	/* left btree key */
3503 		union xfs_btree_ptr	*lpp;	/* left address pointer */
3504 		union xfs_btree_key	*rkp;	/* right btree key */
3505 		union xfs_btree_ptr	*rpp;	/* right address pointer */
3506 
3507 		lkp = xfs_btree_key_addr(cur, lrecs + 1, left);
3508 		lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left);
3509 		rkp = xfs_btree_key_addr(cur, 1, right);
3510 		rpp = xfs_btree_ptr_addr(cur, 1, right);
3511 #ifdef DEBUG
3512 		for (i = 1; i < rrecs; i++) {
3513 			error = xfs_btree_check_ptr(cur, rpp, i, level);
3514 			if (error)
3515 				goto error0;
3516 		}
3517 #endif
3518 		xfs_btree_copy_keys(cur, lkp, rkp, rrecs);
3519 		xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs);
3520 
3521 		xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
3522 		xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
3523 	} else {
3524 		/* It's a leaf.  Move records.  */
3525 		union xfs_btree_rec	*lrp;	/* left record pointer */
3526 		union xfs_btree_rec	*rrp;	/* right record pointer */
3527 
3528 		lrp = xfs_btree_rec_addr(cur, lrecs + 1, left);
3529 		rrp = xfs_btree_rec_addr(cur, 1, right);
3530 
3531 		xfs_btree_copy_recs(cur, lrp, rrp, rrecs);
3532 		xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
3533 	}
3534 
3535 	XFS_BTREE_STATS_INC(cur, join);
3536 
3537 	/*
3538 	 * Fix up the the number of records and right block pointer in the
3539 	 * surviving block, and log it.
3540 	 */
3541 	xfs_btree_set_numrecs(left, lrecs + rrecs);
3542 	xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB),
3543 	xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
3544 	xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
3545 
3546 	/* If there is a right sibling, point it to the remaining block. */
3547 	xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
3548 	if (!xfs_btree_ptr_is_null(cur, &cptr)) {
3549 		error = xfs_btree_read_buf_block(cur, &cptr, level,
3550 							0, &rrblock, &rrbp);
3551 		if (error)
3552 			goto error0;
3553 		xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB);
3554 		xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
3555 	}
3556 
3557 	/* Free the deleted block. */
3558 	error = cur->bc_ops->free_block(cur, rbp);
3559 	if (error)
3560 		goto error0;
3561 	XFS_BTREE_STATS_INC(cur, free);
3562 
3563 	/*
3564 	 * If we joined with the left neighbor, set the buffer in the
3565 	 * cursor to the left block, and fix up the index.
3566 	 */
3567 	if (bp != lbp) {
3568 		cur->bc_bufs[level] = lbp;
3569 		cur->bc_ptrs[level] += lrecs;
3570 		cur->bc_ra[level] = 0;
3571 	}
3572 	/*
3573 	 * If we joined with the right neighbor and there's a level above
3574 	 * us, increment the cursor at that level.
3575 	 */
3576 	else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) ||
3577 		   (level + 1 < cur->bc_nlevels)) {
3578 		error = xfs_btree_increment(cur, level + 1, &i);
3579 		if (error)
3580 			goto error0;
3581 	}
3582 
3583 	/*
3584 	 * Readjust the ptr at this level if it's not a leaf, since it's
3585 	 * still pointing at the deletion point, which makes the cursor
3586 	 * inconsistent.  If this makes the ptr 0, the caller fixes it up.
3587 	 * We can't use decrement because it would change the next level up.
3588 	 */
3589 	if (level > 0)
3590 		cur->bc_ptrs[level]--;
3591 
3592 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3593 	/* Return value means the next level up has something to do. */
3594 	*stat = 2;
3595 	return 0;
3596 
3597 error0:
3598 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3599 	if (tcur)
3600 		xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
3601 	return error;
3602 }
3603 
3604 /*
3605  * Delete the record pointed to by cur.
3606  * The cursor refers to the place where the record was (could be inserted)
3607  * when the operation returns.
3608  */
3609 int					/* error */
xfs_btree_delete(struct xfs_btree_cur * cur,int * stat)3610 xfs_btree_delete(
3611 	struct xfs_btree_cur	*cur,
3612 	int			*stat)	/* success/failure */
3613 {
3614 	int			error;	/* error return value */
3615 	int			level;
3616 	int			i;
3617 
3618 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3619 
3620 	/*
3621 	 * Go up the tree, starting at leaf level.
3622 	 *
3623 	 * If 2 is returned then a join was done; go to the next level.
3624 	 * Otherwise we are done.
3625 	 */
3626 	for (level = 0, i = 2; i == 2; level++) {
3627 		error = xfs_btree_delrec(cur, level, &i);
3628 		if (error)
3629 			goto error0;
3630 	}
3631 
3632 	if (i == 0) {
3633 		for (level = 1; level < cur->bc_nlevels; level++) {
3634 			if (cur->bc_ptrs[level] == 0) {
3635 				error = xfs_btree_decrement(cur, level, &i);
3636 				if (error)
3637 					goto error0;
3638 				break;
3639 			}
3640 		}
3641 	}
3642 
3643 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3644 	*stat = i;
3645 	return 0;
3646 error0:
3647 	XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3648 	return error;
3649 }
3650 
3651 /*
3652  * Get the data from the pointed-to record.
3653  */
3654 int					/* error */
xfs_btree_get_rec(struct xfs_btree_cur * cur,union xfs_btree_rec ** recp,int * stat)3655 xfs_btree_get_rec(
3656 	struct xfs_btree_cur	*cur,	/* btree cursor */
3657 	union xfs_btree_rec	**recp,	/* output: btree record */
3658 	int			*stat)	/* output: success/failure */
3659 {
3660 	struct xfs_btree_block	*block;	/* btree block */
3661 	struct xfs_buf		*bp;	/* buffer pointer */
3662 	int			ptr;	/* record number */
3663 #ifdef DEBUG
3664 	int			error;	/* error return value */
3665 #endif
3666 
3667 	ptr = cur->bc_ptrs[0];
3668 	block = xfs_btree_get_block(cur, 0, &bp);
3669 
3670 #ifdef DEBUG
3671 	error = xfs_btree_check_block(cur, block, 0, bp);
3672 	if (error)
3673 		return error;
3674 #endif
3675 
3676 	/*
3677 	 * Off the right end or left end, return failure.
3678 	 */
3679 	if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) {
3680 		*stat = 0;
3681 		return 0;
3682 	}
3683 
3684 	/*
3685 	 * Point to the record and extract its data.
3686 	 */
3687 	*recp = xfs_btree_rec_addr(cur, ptr, block);
3688 	*stat = 1;
3689 	return 0;
3690 }
3691