• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2007,2008 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 
19 #include <linux/sched.h>
20 #include "ctree.h"
21 #include "disk-io.h"
22 #include "transaction.h"
23 #include "print-tree.h"
24 #include "locking.h"
25 
26 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
27 		      *root, struct btrfs_path *path, int level);
28 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
29 		      *root, struct btrfs_key *ins_key,
30 		      struct btrfs_path *path, int data_size, int extend);
31 static int push_node_left(struct btrfs_trans_handle *trans,
32 			  struct btrfs_root *root, struct extent_buffer *dst,
33 			  struct extent_buffer *src, int empty);
34 static int balance_node_right(struct btrfs_trans_handle *trans,
35 			      struct btrfs_root *root,
36 			      struct extent_buffer *dst_buf,
37 			      struct extent_buffer *src_buf);
38 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
39 		   struct btrfs_path *path, int level, int slot);
40 
btrfs_alloc_path(void)41 struct btrfs_path *btrfs_alloc_path(void)
42 {
43 	struct btrfs_path *path;
44 	path = kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS);
45 	if (path)
46 		path->reada = 1;
47 	return path;
48 }
49 
50 /*
51  * set all locked nodes in the path to blocking locks.  This should
52  * be done before scheduling
53  */
btrfs_set_path_blocking(struct btrfs_path * p)54 noinline void btrfs_set_path_blocking(struct btrfs_path *p)
55 {
56 	int i;
57 	for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
58 		if (p->nodes[i] && p->locks[i])
59 			btrfs_set_lock_blocking(p->nodes[i]);
60 	}
61 }
62 
63 /*
64  * reset all the locked nodes in the patch to spinning locks.
65  *
66  * held is used to keep lockdep happy, when lockdep is enabled
67  * we set held to a blocking lock before we go around and
68  * retake all the spinlocks in the path.  You can safely use NULL
69  * for held
70  */
btrfs_clear_path_blocking(struct btrfs_path * p,struct extent_buffer * held)71 noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
72 					struct extent_buffer *held)
73 {
74 	int i;
75 
76 #ifdef CONFIG_DEBUG_LOCK_ALLOC
77 	/* lockdep really cares that we take all of these spinlocks
78 	 * in the right order.  If any of the locks in the path are not
79 	 * currently blocking, it is going to complain.  So, make really
80 	 * really sure by forcing the path to blocking before we clear
81 	 * the path blocking.
82 	 */
83 	if (held)
84 		btrfs_set_lock_blocking(held);
85 	btrfs_set_path_blocking(p);
86 #endif
87 
88 	for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) {
89 		if (p->nodes[i] && p->locks[i])
90 			btrfs_clear_lock_blocking(p->nodes[i]);
91 	}
92 
93 #ifdef CONFIG_DEBUG_LOCK_ALLOC
94 	if (held)
95 		btrfs_clear_lock_blocking(held);
96 #endif
97 }
98 
99 /* this also releases the path */
btrfs_free_path(struct btrfs_path * p)100 void btrfs_free_path(struct btrfs_path *p)
101 {
102 	btrfs_release_path(NULL, p);
103 	kmem_cache_free(btrfs_path_cachep, p);
104 }
105 
106 /*
107  * path release drops references on the extent buffers in the path
108  * and it drops any locks held by this path
109  *
110  * It is safe to call this on paths that no locks or extent buffers held.
111  */
btrfs_release_path(struct btrfs_root * root,struct btrfs_path * p)112 noinline void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
113 {
114 	int i;
115 
116 	for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
117 		p->slots[i] = 0;
118 		if (!p->nodes[i])
119 			continue;
120 		if (p->locks[i]) {
121 			btrfs_tree_unlock(p->nodes[i]);
122 			p->locks[i] = 0;
123 		}
124 		free_extent_buffer(p->nodes[i]);
125 		p->nodes[i] = NULL;
126 	}
127 }
128 
129 /*
130  * safely gets a reference on the root node of a tree.  A lock
131  * is not taken, so a concurrent writer may put a different node
132  * at the root of the tree.  See btrfs_lock_root_node for the
133  * looping required.
134  *
135  * The extent buffer returned by this has a reference taken, so
136  * it won't disappear.  It may stop being the root of the tree
137  * at any time because there are no locks held.
138  */
btrfs_root_node(struct btrfs_root * root)139 struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
140 {
141 	struct extent_buffer *eb;
142 	spin_lock(&root->node_lock);
143 	eb = root->node;
144 	extent_buffer_get(eb);
145 	spin_unlock(&root->node_lock);
146 	return eb;
147 }
148 
149 /* loop around taking references on and locking the root node of the
150  * tree until you end up with a lock on the root.  A locked buffer
151  * is returned, with a reference held.
152  */
btrfs_lock_root_node(struct btrfs_root * root)153 struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
154 {
155 	struct extent_buffer *eb;
156 
157 	while (1) {
158 		eb = btrfs_root_node(root);
159 		btrfs_tree_lock(eb);
160 
161 		spin_lock(&root->node_lock);
162 		if (eb == root->node) {
163 			spin_unlock(&root->node_lock);
164 			break;
165 		}
166 		spin_unlock(&root->node_lock);
167 
168 		btrfs_tree_unlock(eb);
169 		free_extent_buffer(eb);
170 	}
171 	return eb;
172 }
173 
174 /* cowonly root (everything not a reference counted cow subvolume), just get
175  * put onto a simple dirty list.  transaction.c walks this to make sure they
176  * get properly updated on disk.
177  */
add_root_to_dirty_list(struct btrfs_root * root)178 static void add_root_to_dirty_list(struct btrfs_root *root)
179 {
180 	if (root->track_dirty && list_empty(&root->dirty_list)) {
181 		list_add(&root->dirty_list,
182 			 &root->fs_info->dirty_cowonly_roots);
183 	}
184 }
185 
186 /*
187  * used by snapshot creation to make a copy of a root for a tree with
188  * a given objectid.  The buffer with the new root node is returned in
189  * cow_ret, and this func returns zero on success or a negative error code.
190  */
btrfs_copy_root(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct extent_buffer * buf,struct extent_buffer ** cow_ret,u64 new_root_objectid)191 int btrfs_copy_root(struct btrfs_trans_handle *trans,
192 		      struct btrfs_root *root,
193 		      struct extent_buffer *buf,
194 		      struct extent_buffer **cow_ret, u64 new_root_objectid)
195 {
196 	struct extent_buffer *cow;
197 	u32 nritems;
198 	int ret = 0;
199 	int level;
200 	struct btrfs_root *new_root;
201 
202 	new_root = kmalloc(sizeof(*new_root), GFP_NOFS);
203 	if (!new_root)
204 		return -ENOMEM;
205 
206 	memcpy(new_root, root, sizeof(*new_root));
207 	new_root->root_key.objectid = new_root_objectid;
208 
209 	WARN_ON(root->ref_cows && trans->transid !=
210 		root->fs_info->running_transaction->transid);
211 	WARN_ON(root->ref_cows && trans->transid != root->last_trans);
212 
213 	level = btrfs_header_level(buf);
214 	nritems = btrfs_header_nritems(buf);
215 
216 	cow = btrfs_alloc_free_block(trans, new_root, buf->len, 0,
217 				     new_root_objectid, trans->transid,
218 				     level, buf->start, 0);
219 	if (IS_ERR(cow)) {
220 		kfree(new_root);
221 		return PTR_ERR(cow);
222 	}
223 
224 	copy_extent_buffer(cow, buf, 0, 0, cow->len);
225 	btrfs_set_header_bytenr(cow, cow->start);
226 	btrfs_set_header_generation(cow, trans->transid);
227 	btrfs_set_header_owner(cow, new_root_objectid);
228 	btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN);
229 
230 	write_extent_buffer(cow, root->fs_info->fsid,
231 			    (unsigned long)btrfs_header_fsid(cow),
232 			    BTRFS_FSID_SIZE);
233 
234 	WARN_ON(btrfs_header_generation(buf) > trans->transid);
235 	ret = btrfs_inc_ref(trans, new_root, buf, cow, NULL);
236 	kfree(new_root);
237 
238 	if (ret)
239 		return ret;
240 
241 	btrfs_mark_buffer_dirty(cow);
242 	*cow_ret = cow;
243 	return 0;
244 }
245 
246 /*
247  * does the dirty work in cow of a single block.  The parent block (if
248  * supplied) is updated to point to the new cow copy.  The new buffer is marked
249  * dirty and returned locked.  If you modify the block it needs to be marked
250  * dirty again.
251  *
252  * search_start -- an allocation hint for the new block
253  *
254  * empty_size -- a hint that you plan on doing more cow.  This is the size in
255  * bytes the allocator should try to find free next to the block it returns.
256  * This is just a hint and may be ignored by the allocator.
257  *
258  * prealloc_dest -- if you have already reserved a destination for the cow,
259  * this uses that block instead of allocating a new one.
260  * btrfs_alloc_reserved_extent is used to finish the allocation.
261  */
__btrfs_cow_block(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct extent_buffer * buf,struct extent_buffer * parent,int parent_slot,struct extent_buffer ** cow_ret,u64 search_start,u64 empty_size,u64 prealloc_dest)262 static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
263 			     struct btrfs_root *root,
264 			     struct extent_buffer *buf,
265 			     struct extent_buffer *parent, int parent_slot,
266 			     struct extent_buffer **cow_ret,
267 			     u64 search_start, u64 empty_size,
268 			     u64 prealloc_dest)
269 {
270 	u64 parent_start;
271 	struct extent_buffer *cow;
272 	u32 nritems;
273 	int ret = 0;
274 	int level;
275 	int unlock_orig = 0;
276 
277 	if (*cow_ret == buf)
278 		unlock_orig = 1;
279 
280 	btrfs_assert_tree_locked(buf);
281 
282 	if (parent)
283 		parent_start = parent->start;
284 	else
285 		parent_start = 0;
286 
287 	WARN_ON(root->ref_cows && trans->transid !=
288 		root->fs_info->running_transaction->transid);
289 	WARN_ON(root->ref_cows && trans->transid != root->last_trans);
290 
291 	level = btrfs_header_level(buf);
292 	nritems = btrfs_header_nritems(buf);
293 
294 	if (prealloc_dest) {
295 		struct btrfs_key ins;
296 
297 		ins.objectid = prealloc_dest;
298 		ins.offset = buf->len;
299 		ins.type = BTRFS_EXTENT_ITEM_KEY;
300 
301 		ret = btrfs_alloc_reserved_extent(trans, root, parent_start,
302 						  root->root_key.objectid,
303 						  trans->transid, level, &ins);
304 		BUG_ON(ret);
305 		cow = btrfs_init_new_buffer(trans, root, prealloc_dest,
306 					    buf->len, level);
307 	} else {
308 		cow = btrfs_alloc_free_block(trans, root, buf->len,
309 					     parent_start,
310 					     root->root_key.objectid,
311 					     trans->transid, level,
312 					     search_start, empty_size);
313 	}
314 	if (IS_ERR(cow))
315 		return PTR_ERR(cow);
316 
317 	/* cow is set to blocking by btrfs_init_new_buffer */
318 
319 	copy_extent_buffer(cow, buf, 0, 0, cow->len);
320 	btrfs_set_header_bytenr(cow, cow->start);
321 	btrfs_set_header_generation(cow, trans->transid);
322 	btrfs_set_header_owner(cow, root->root_key.objectid);
323 	btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN);
324 
325 	write_extent_buffer(cow, root->fs_info->fsid,
326 			    (unsigned long)btrfs_header_fsid(cow),
327 			    BTRFS_FSID_SIZE);
328 
329 	WARN_ON(btrfs_header_generation(buf) > trans->transid);
330 	if (btrfs_header_generation(buf) != trans->transid) {
331 		u32 nr_extents;
332 		ret = btrfs_inc_ref(trans, root, buf, cow, &nr_extents);
333 		if (ret)
334 			return ret;
335 
336 		ret = btrfs_cache_ref(trans, root, buf, nr_extents);
337 		WARN_ON(ret);
338 	} else if (btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID) {
339 		/*
340 		 * There are only two places that can drop reference to
341 		 * tree blocks owned by living reloc trees, one is here,
342 		 * the other place is btrfs_drop_subtree. In both places,
343 		 * we check reference count while tree block is locked.
344 		 * Furthermore, if reference count is one, it won't get
345 		 * increased by someone else.
346 		 */
347 		u32 refs;
348 		ret = btrfs_lookup_extent_ref(trans, root, buf->start,
349 					      buf->len, &refs);
350 		BUG_ON(ret);
351 		if (refs == 1) {
352 			ret = btrfs_update_ref(trans, root, buf, cow,
353 					       0, nritems);
354 			clean_tree_block(trans, root, buf);
355 		} else {
356 			ret = btrfs_inc_ref(trans, root, buf, cow, NULL);
357 		}
358 		BUG_ON(ret);
359 	} else {
360 		ret = btrfs_update_ref(trans, root, buf, cow, 0, nritems);
361 		if (ret)
362 			return ret;
363 		clean_tree_block(trans, root, buf);
364 	}
365 
366 	if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
367 		ret = btrfs_reloc_tree_cache_ref(trans, root, cow, buf->start);
368 		WARN_ON(ret);
369 	}
370 
371 	if (buf == root->node) {
372 		WARN_ON(parent && parent != buf);
373 
374 		spin_lock(&root->node_lock);
375 		root->node = cow;
376 		extent_buffer_get(cow);
377 		spin_unlock(&root->node_lock);
378 
379 		if (buf != root->commit_root) {
380 			btrfs_free_extent(trans, root, buf->start,
381 					  buf->len, buf->start,
382 					  root->root_key.objectid,
383 					  btrfs_header_generation(buf),
384 					  level, 1);
385 		}
386 		free_extent_buffer(buf);
387 		add_root_to_dirty_list(root);
388 	} else {
389 		btrfs_set_node_blockptr(parent, parent_slot,
390 					cow->start);
391 		WARN_ON(trans->transid == 0);
392 		btrfs_set_node_ptr_generation(parent, parent_slot,
393 					      trans->transid);
394 		btrfs_mark_buffer_dirty(parent);
395 		WARN_ON(btrfs_header_generation(parent) != trans->transid);
396 		btrfs_free_extent(trans, root, buf->start, buf->len,
397 				  parent_start, btrfs_header_owner(parent),
398 				  btrfs_header_generation(parent), level, 1);
399 	}
400 	if (unlock_orig)
401 		btrfs_tree_unlock(buf);
402 	free_extent_buffer(buf);
403 	btrfs_mark_buffer_dirty(cow);
404 	*cow_ret = cow;
405 	return 0;
406 }
407 
408 /*
409  * cows a single block, see __btrfs_cow_block for the real work.
410  * This version of it has extra checks so that a block isn't cow'd more than
411  * once per transaction, as long as it hasn't been written yet
412  */
btrfs_cow_block(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct extent_buffer * buf,struct extent_buffer * parent,int parent_slot,struct extent_buffer ** cow_ret,u64 prealloc_dest)413 noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
414 		    struct btrfs_root *root, struct extent_buffer *buf,
415 		    struct extent_buffer *parent, int parent_slot,
416 		    struct extent_buffer **cow_ret, u64 prealloc_dest)
417 {
418 	u64 search_start;
419 	int ret;
420 
421 	if (trans->transaction != root->fs_info->running_transaction) {
422 		printk(KERN_CRIT "trans %llu running %llu\n",
423 		       (unsigned long long)trans->transid,
424 		       (unsigned long long)
425 		       root->fs_info->running_transaction->transid);
426 		WARN_ON(1);
427 	}
428 	if (trans->transid != root->fs_info->generation) {
429 		printk(KERN_CRIT "trans %llu running %llu\n",
430 		       (unsigned long long)trans->transid,
431 		       (unsigned long long)root->fs_info->generation);
432 		WARN_ON(1);
433 	}
434 
435 	if (btrfs_header_generation(buf) == trans->transid &&
436 	    btrfs_header_owner(buf) == root->root_key.objectid &&
437 	    !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
438 		*cow_ret = buf;
439 		WARN_ON(prealloc_dest);
440 		return 0;
441 	}
442 
443 	search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1);
444 
445 	if (parent)
446 		btrfs_set_lock_blocking(parent);
447 	btrfs_set_lock_blocking(buf);
448 
449 	ret = __btrfs_cow_block(trans, root, buf, parent,
450 				 parent_slot, cow_ret, search_start, 0,
451 				 prealloc_dest);
452 	return ret;
453 }
454 
455 /*
456  * helper function for defrag to decide if two blocks pointed to by a
457  * node are actually close by
458  */
close_blocks(u64 blocknr,u64 other,u32 blocksize)459 static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
460 {
461 	if (blocknr < other && other - (blocknr + blocksize) < 32768)
462 		return 1;
463 	if (blocknr > other && blocknr - (other + blocksize) < 32768)
464 		return 1;
465 	return 0;
466 }
467 
468 /*
469  * compare two keys in a memcmp fashion
470  */
comp_keys(struct btrfs_disk_key * disk,struct btrfs_key * k2)471 static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
472 {
473 	struct btrfs_key k1;
474 
475 	btrfs_disk_key_to_cpu(&k1, disk);
476 
477 	if (k1.objectid > k2->objectid)
478 		return 1;
479 	if (k1.objectid < k2->objectid)
480 		return -1;
481 	if (k1.type > k2->type)
482 		return 1;
483 	if (k1.type < k2->type)
484 		return -1;
485 	if (k1.offset > k2->offset)
486 		return 1;
487 	if (k1.offset < k2->offset)
488 		return -1;
489 	return 0;
490 }
491 
492 /*
493  * same as comp_keys only with two btrfs_key's
494  */
comp_cpu_keys(struct btrfs_key * k1,struct btrfs_key * k2)495 static int comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2)
496 {
497 	if (k1->objectid > k2->objectid)
498 		return 1;
499 	if (k1->objectid < k2->objectid)
500 		return -1;
501 	if (k1->type > k2->type)
502 		return 1;
503 	if (k1->type < k2->type)
504 		return -1;
505 	if (k1->offset > k2->offset)
506 		return 1;
507 	if (k1->offset < k2->offset)
508 		return -1;
509 	return 0;
510 }
511 
512 /*
513  * this is used by the defrag code to go through all the
514  * leaves pointed to by a node and reallocate them so that
515  * disk order is close to key order
516  */
btrfs_realloc_node(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct extent_buffer * parent,int start_slot,int cache_only,u64 * last_ret,struct btrfs_key * progress)517 int btrfs_realloc_node(struct btrfs_trans_handle *trans,
518 		       struct btrfs_root *root, struct extent_buffer *parent,
519 		       int start_slot, int cache_only, u64 *last_ret,
520 		       struct btrfs_key *progress)
521 {
522 	struct extent_buffer *cur;
523 	u64 blocknr;
524 	u64 gen;
525 	u64 search_start = *last_ret;
526 	u64 last_block = 0;
527 	u64 other;
528 	u32 parent_nritems;
529 	int end_slot;
530 	int i;
531 	int err = 0;
532 	int parent_level;
533 	int uptodate;
534 	u32 blocksize;
535 	int progress_passed = 0;
536 	struct btrfs_disk_key disk_key;
537 
538 	parent_level = btrfs_header_level(parent);
539 	if (cache_only && parent_level != 1)
540 		return 0;
541 
542 	if (trans->transaction != root->fs_info->running_transaction)
543 		WARN_ON(1);
544 	if (trans->transid != root->fs_info->generation)
545 		WARN_ON(1);
546 
547 	parent_nritems = btrfs_header_nritems(parent);
548 	blocksize = btrfs_level_size(root, parent_level - 1);
549 	end_slot = parent_nritems;
550 
551 	if (parent_nritems == 1)
552 		return 0;
553 
554 	btrfs_set_lock_blocking(parent);
555 
556 	for (i = start_slot; i < end_slot; i++) {
557 		int close = 1;
558 
559 		if (!parent->map_token) {
560 			map_extent_buffer(parent,
561 					btrfs_node_key_ptr_offset(i),
562 					sizeof(struct btrfs_key_ptr),
563 					&parent->map_token, &parent->kaddr,
564 					&parent->map_start, &parent->map_len,
565 					KM_USER1);
566 		}
567 		btrfs_node_key(parent, &disk_key, i);
568 		if (!progress_passed && comp_keys(&disk_key, progress) < 0)
569 			continue;
570 
571 		progress_passed = 1;
572 		blocknr = btrfs_node_blockptr(parent, i);
573 		gen = btrfs_node_ptr_generation(parent, i);
574 		if (last_block == 0)
575 			last_block = blocknr;
576 
577 		if (i > 0) {
578 			other = btrfs_node_blockptr(parent, i - 1);
579 			close = close_blocks(blocknr, other, blocksize);
580 		}
581 		if (!close && i < end_slot - 2) {
582 			other = btrfs_node_blockptr(parent, i + 1);
583 			close = close_blocks(blocknr, other, blocksize);
584 		}
585 		if (close) {
586 			last_block = blocknr;
587 			continue;
588 		}
589 		if (parent->map_token) {
590 			unmap_extent_buffer(parent, parent->map_token,
591 					    KM_USER1);
592 			parent->map_token = NULL;
593 		}
594 
595 		cur = btrfs_find_tree_block(root, blocknr, blocksize);
596 		if (cur)
597 			uptodate = btrfs_buffer_uptodate(cur, gen);
598 		else
599 			uptodate = 0;
600 		if (!cur || !uptodate) {
601 			if (cache_only) {
602 				free_extent_buffer(cur);
603 				continue;
604 			}
605 			if (!cur) {
606 				cur = read_tree_block(root, blocknr,
607 							 blocksize, gen);
608 			} else if (!uptodate) {
609 				btrfs_read_buffer(cur, gen);
610 			}
611 		}
612 		if (search_start == 0)
613 			search_start = last_block;
614 
615 		btrfs_tree_lock(cur);
616 		btrfs_set_lock_blocking(cur);
617 		err = __btrfs_cow_block(trans, root, cur, parent, i,
618 					&cur, search_start,
619 					min(16 * blocksize,
620 					    (end_slot - i) * blocksize), 0);
621 		if (err) {
622 			btrfs_tree_unlock(cur);
623 			free_extent_buffer(cur);
624 			break;
625 		}
626 		search_start = cur->start;
627 		last_block = cur->start;
628 		*last_ret = search_start;
629 		btrfs_tree_unlock(cur);
630 		free_extent_buffer(cur);
631 	}
632 	if (parent->map_token) {
633 		unmap_extent_buffer(parent, parent->map_token,
634 				    KM_USER1);
635 		parent->map_token = NULL;
636 	}
637 	return err;
638 }
639 
640 /*
641  * The leaf data grows from end-to-front in the node.
642  * this returns the address of the start of the last item,
643  * which is the stop of the leaf data stack
644  */
leaf_data_end(struct btrfs_root * root,struct extent_buffer * leaf)645 static inline unsigned int leaf_data_end(struct btrfs_root *root,
646 					 struct extent_buffer *leaf)
647 {
648 	u32 nr = btrfs_header_nritems(leaf);
649 	if (nr == 0)
650 		return BTRFS_LEAF_DATA_SIZE(root);
651 	return btrfs_item_offset_nr(leaf, nr - 1);
652 }
653 
654 /*
655  * extra debugging checks to make sure all the items in a key are
656  * well formed and in the proper order
657  */
check_node(struct btrfs_root * root,struct btrfs_path * path,int level)658 static int check_node(struct btrfs_root *root, struct btrfs_path *path,
659 		      int level)
660 {
661 	struct extent_buffer *parent = NULL;
662 	struct extent_buffer *node = path->nodes[level];
663 	struct btrfs_disk_key parent_key;
664 	struct btrfs_disk_key node_key;
665 	int parent_slot;
666 	int slot;
667 	struct btrfs_key cpukey;
668 	u32 nritems = btrfs_header_nritems(node);
669 
670 	if (path->nodes[level + 1])
671 		parent = path->nodes[level + 1];
672 
673 	slot = path->slots[level];
674 	BUG_ON(nritems == 0);
675 	if (parent) {
676 		parent_slot = path->slots[level + 1];
677 		btrfs_node_key(parent, &parent_key, parent_slot);
678 		btrfs_node_key(node, &node_key, 0);
679 		BUG_ON(memcmp(&parent_key, &node_key,
680 			      sizeof(struct btrfs_disk_key)));
681 		BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
682 		       btrfs_header_bytenr(node));
683 	}
684 	BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
685 	if (slot != 0) {
686 		btrfs_node_key_to_cpu(node, &cpukey, slot - 1);
687 		btrfs_node_key(node, &node_key, slot);
688 		BUG_ON(comp_keys(&node_key, &cpukey) <= 0);
689 	}
690 	if (slot < nritems - 1) {
691 		btrfs_node_key_to_cpu(node, &cpukey, slot + 1);
692 		btrfs_node_key(node, &node_key, slot);
693 		BUG_ON(comp_keys(&node_key, &cpukey) >= 0);
694 	}
695 	return 0;
696 }
697 
698 /*
699  * extra checking to make sure all the items in a leaf are
700  * well formed and in the proper order
701  */
check_leaf(struct btrfs_root * root,struct btrfs_path * path,int level)702 static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
703 		      int level)
704 {
705 	struct extent_buffer *leaf = path->nodes[level];
706 	struct extent_buffer *parent = NULL;
707 	int parent_slot;
708 	struct btrfs_key cpukey;
709 	struct btrfs_disk_key parent_key;
710 	struct btrfs_disk_key leaf_key;
711 	int slot = path->slots[0];
712 
713 	u32 nritems = btrfs_header_nritems(leaf);
714 
715 	if (path->nodes[level + 1])
716 		parent = path->nodes[level + 1];
717 
718 	if (nritems == 0)
719 		return 0;
720 
721 	if (parent) {
722 		parent_slot = path->slots[level + 1];
723 		btrfs_node_key(parent, &parent_key, parent_slot);
724 		btrfs_item_key(leaf, &leaf_key, 0);
725 
726 		BUG_ON(memcmp(&parent_key, &leaf_key,
727 		       sizeof(struct btrfs_disk_key)));
728 		BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
729 		       btrfs_header_bytenr(leaf));
730 	}
731 	if (slot != 0 && slot < nritems - 1) {
732 		btrfs_item_key(leaf, &leaf_key, slot);
733 		btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1);
734 		if (comp_keys(&leaf_key, &cpukey) <= 0) {
735 			btrfs_print_leaf(root, leaf);
736 			printk(KERN_CRIT "slot %d offset bad key\n", slot);
737 			BUG_ON(1);
738 		}
739 		if (btrfs_item_offset_nr(leaf, slot - 1) !=
740 		       btrfs_item_end_nr(leaf, slot)) {
741 			btrfs_print_leaf(root, leaf);
742 			printk(KERN_CRIT "slot %d offset bad\n", slot);
743 			BUG_ON(1);
744 		}
745 	}
746 	if (slot < nritems - 1) {
747 		btrfs_item_key(leaf, &leaf_key, slot);
748 		btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1);
749 		BUG_ON(comp_keys(&leaf_key, &cpukey) >= 0);
750 		if (btrfs_item_offset_nr(leaf, slot) !=
751 			btrfs_item_end_nr(leaf, slot + 1)) {
752 			btrfs_print_leaf(root, leaf);
753 			printk(KERN_CRIT "slot %d offset bad\n", slot);
754 			BUG_ON(1);
755 		}
756 	}
757 	BUG_ON(btrfs_item_offset_nr(leaf, 0) +
758 	       btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root));
759 	return 0;
760 }
761 
check_block(struct btrfs_root * root,struct btrfs_path * path,int level)762 static noinline int check_block(struct btrfs_root *root,
763 				struct btrfs_path *path, int level)
764 {
765 	return 0;
766 	if (level == 0)
767 		return check_leaf(root, path, level);
768 	return check_node(root, path, level);
769 }
770 
771 /*
772  * search for key in the extent_buffer.  The items start at offset p,
773  * and they are item_size apart.  There are 'max' items in p.
774  *
775  * the slot in the array is returned via slot, and it points to
776  * the place where you would insert key if it is not found in
777  * the array.
778  *
779  * slot may point to max if the key is bigger than all of the keys
780  */
generic_bin_search(struct extent_buffer * eb,unsigned long p,int item_size,struct btrfs_key * key,int max,int * slot)781 static noinline int generic_bin_search(struct extent_buffer *eb,
782 				       unsigned long p,
783 				       int item_size, struct btrfs_key *key,
784 				       int max, int *slot)
785 {
786 	int low = 0;
787 	int high = max;
788 	int mid;
789 	int ret;
790 	struct btrfs_disk_key *tmp = NULL;
791 	struct btrfs_disk_key unaligned;
792 	unsigned long offset;
793 	char *map_token = NULL;
794 	char *kaddr = NULL;
795 	unsigned long map_start = 0;
796 	unsigned long map_len = 0;
797 	int err;
798 
799 	while (low < high) {
800 		mid = (low + high) / 2;
801 		offset = p + mid * item_size;
802 
803 		if (!map_token || offset < map_start ||
804 		    (offset + sizeof(struct btrfs_disk_key)) >
805 		    map_start + map_len) {
806 			if (map_token) {
807 				unmap_extent_buffer(eb, map_token, KM_USER0);
808 				map_token = NULL;
809 			}
810 
811 			err = map_private_extent_buffer(eb, offset,
812 						sizeof(struct btrfs_disk_key),
813 						&map_token, &kaddr,
814 						&map_start, &map_len, KM_USER0);
815 
816 			if (!err) {
817 				tmp = (struct btrfs_disk_key *)(kaddr + offset -
818 							map_start);
819 			} else {
820 				read_extent_buffer(eb, &unaligned,
821 						   offset, sizeof(unaligned));
822 				tmp = &unaligned;
823 			}
824 
825 		} else {
826 			tmp = (struct btrfs_disk_key *)(kaddr + offset -
827 							map_start);
828 		}
829 		ret = comp_keys(tmp, key);
830 
831 		if (ret < 0)
832 			low = mid + 1;
833 		else if (ret > 0)
834 			high = mid;
835 		else {
836 			*slot = mid;
837 			if (map_token)
838 				unmap_extent_buffer(eb, map_token, KM_USER0);
839 			return 0;
840 		}
841 	}
842 	*slot = low;
843 	if (map_token)
844 		unmap_extent_buffer(eb, map_token, KM_USER0);
845 	return 1;
846 }
847 
848 /*
849  * simple bin_search frontend that does the right thing for
850  * leaves vs nodes
851  */
bin_search(struct extent_buffer * eb,struct btrfs_key * key,int level,int * slot)852 static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
853 		      int level, int *slot)
854 {
855 	if (level == 0) {
856 		return generic_bin_search(eb,
857 					  offsetof(struct btrfs_leaf, items),
858 					  sizeof(struct btrfs_item),
859 					  key, btrfs_header_nritems(eb),
860 					  slot);
861 	} else {
862 		return generic_bin_search(eb,
863 					  offsetof(struct btrfs_node, ptrs),
864 					  sizeof(struct btrfs_key_ptr),
865 					  key, btrfs_header_nritems(eb),
866 					  slot);
867 	}
868 	return -1;
869 }
870 
871 /* given a node and slot number, this reads the blocks it points to.  The
872  * extent buffer is returned with a reference taken (but unlocked).
873  * NULL is returned on error.
874  */
read_node_slot(struct btrfs_root * root,struct extent_buffer * parent,int slot)875 static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root,
876 				   struct extent_buffer *parent, int slot)
877 {
878 	int level = btrfs_header_level(parent);
879 	if (slot < 0)
880 		return NULL;
881 	if (slot >= btrfs_header_nritems(parent))
882 		return NULL;
883 
884 	BUG_ON(level == 0);
885 
886 	return read_tree_block(root, btrfs_node_blockptr(parent, slot),
887 		       btrfs_level_size(root, level - 1),
888 		       btrfs_node_ptr_generation(parent, slot));
889 }
890 
891 /*
892  * node level balancing, used to make sure nodes are in proper order for
893  * item deletion.  We balance from the top down, so we have to make sure
894  * that a deletion won't leave an node completely empty later on.
895  */
balance_level(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,int level)896 static noinline int balance_level(struct btrfs_trans_handle *trans,
897 			 struct btrfs_root *root,
898 			 struct btrfs_path *path, int level)
899 {
900 	struct extent_buffer *right = NULL;
901 	struct extent_buffer *mid;
902 	struct extent_buffer *left = NULL;
903 	struct extent_buffer *parent = NULL;
904 	int ret = 0;
905 	int wret;
906 	int pslot;
907 	int orig_slot = path->slots[level];
908 	int err_on_enospc = 0;
909 	u64 orig_ptr;
910 
911 	if (level == 0)
912 		return 0;
913 
914 	mid = path->nodes[level];
915 
916 	WARN_ON(!path->locks[level]);
917 	WARN_ON(btrfs_header_generation(mid) != trans->transid);
918 
919 	orig_ptr = btrfs_node_blockptr(mid, orig_slot);
920 
921 	if (level < BTRFS_MAX_LEVEL - 1)
922 		parent = path->nodes[level + 1];
923 	pslot = path->slots[level + 1];
924 
925 	/*
926 	 * deal with the case where there is only one pointer in the root
927 	 * by promoting the node below to a root
928 	 */
929 	if (!parent) {
930 		struct extent_buffer *child;
931 
932 		if (btrfs_header_nritems(mid) != 1)
933 			return 0;
934 
935 		/* promote the child to a root */
936 		child = read_node_slot(root, mid, 0);
937 		BUG_ON(!child);
938 		btrfs_tree_lock(child);
939 		btrfs_set_lock_blocking(child);
940 		ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 0);
941 		BUG_ON(ret);
942 
943 		spin_lock(&root->node_lock);
944 		root->node = child;
945 		spin_unlock(&root->node_lock);
946 
947 		ret = btrfs_update_extent_ref(trans, root, child->start,
948 					      mid->start, child->start,
949 					      root->root_key.objectid,
950 					      trans->transid, level - 1);
951 		BUG_ON(ret);
952 
953 		add_root_to_dirty_list(root);
954 		btrfs_tree_unlock(child);
955 
956 		path->locks[level] = 0;
957 		path->nodes[level] = NULL;
958 		clean_tree_block(trans, root, mid);
959 		btrfs_tree_unlock(mid);
960 		/* once for the path */
961 		free_extent_buffer(mid);
962 		ret = btrfs_free_extent(trans, root, mid->start, mid->len,
963 					mid->start, root->root_key.objectid,
964 					btrfs_header_generation(mid),
965 					level, 1);
966 		/* once for the root ptr */
967 		free_extent_buffer(mid);
968 		return ret;
969 	}
970 	if (btrfs_header_nritems(mid) >
971 	    BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
972 		return 0;
973 
974 	if (btrfs_header_nritems(mid) < 2)
975 		err_on_enospc = 1;
976 
977 	left = read_node_slot(root, parent, pslot - 1);
978 	if (left) {
979 		btrfs_tree_lock(left);
980 		btrfs_set_lock_blocking(left);
981 		wret = btrfs_cow_block(trans, root, left,
982 				       parent, pslot - 1, &left, 0);
983 		if (wret) {
984 			ret = wret;
985 			goto enospc;
986 		}
987 	}
988 	right = read_node_slot(root, parent, pslot + 1);
989 	if (right) {
990 		btrfs_tree_lock(right);
991 		btrfs_set_lock_blocking(right);
992 		wret = btrfs_cow_block(trans, root, right,
993 				       parent, pslot + 1, &right, 0);
994 		if (wret) {
995 			ret = wret;
996 			goto enospc;
997 		}
998 	}
999 
1000 	/* first, try to make some room in the middle buffer */
1001 	if (left) {
1002 		orig_slot += btrfs_header_nritems(left);
1003 		wret = push_node_left(trans, root, left, mid, 1);
1004 		if (wret < 0)
1005 			ret = wret;
1006 		if (btrfs_header_nritems(mid) < 2)
1007 			err_on_enospc = 1;
1008 	}
1009 
1010 	/*
1011 	 * then try to empty the right most buffer into the middle
1012 	 */
1013 	if (right) {
1014 		wret = push_node_left(trans, root, mid, right, 1);
1015 		if (wret < 0 && wret != -ENOSPC)
1016 			ret = wret;
1017 		if (btrfs_header_nritems(right) == 0) {
1018 			u64 bytenr = right->start;
1019 			u64 generation = btrfs_header_generation(parent);
1020 			u32 blocksize = right->len;
1021 
1022 			clean_tree_block(trans, root, right);
1023 			btrfs_tree_unlock(right);
1024 			free_extent_buffer(right);
1025 			right = NULL;
1026 			wret = del_ptr(trans, root, path, level + 1, pslot +
1027 				       1);
1028 			if (wret)
1029 				ret = wret;
1030 			wret = btrfs_free_extent(trans, root, bytenr,
1031 						 blocksize, parent->start,
1032 						 btrfs_header_owner(parent),
1033 						 generation, level, 1);
1034 			if (wret)
1035 				ret = wret;
1036 		} else {
1037 			struct btrfs_disk_key right_key;
1038 			btrfs_node_key(right, &right_key, 0);
1039 			btrfs_set_node_key(parent, &right_key, pslot + 1);
1040 			btrfs_mark_buffer_dirty(parent);
1041 		}
1042 	}
1043 	if (btrfs_header_nritems(mid) == 1) {
1044 		/*
1045 		 * we're not allowed to leave a node with one item in the
1046 		 * tree during a delete.  A deletion from lower in the tree
1047 		 * could try to delete the only pointer in this node.
1048 		 * So, pull some keys from the left.
1049 		 * There has to be a left pointer at this point because
1050 		 * otherwise we would have pulled some pointers from the
1051 		 * right
1052 		 */
1053 		BUG_ON(!left);
1054 		wret = balance_node_right(trans, root, mid, left);
1055 		if (wret < 0) {
1056 			ret = wret;
1057 			goto enospc;
1058 		}
1059 		if (wret == 1) {
1060 			wret = push_node_left(trans, root, left, mid, 1);
1061 			if (wret < 0)
1062 				ret = wret;
1063 		}
1064 		BUG_ON(wret == 1);
1065 	}
1066 	if (btrfs_header_nritems(mid) == 0) {
1067 		/* we've managed to empty the middle node, drop it */
1068 		u64 root_gen = btrfs_header_generation(parent);
1069 		u64 bytenr = mid->start;
1070 		u32 blocksize = mid->len;
1071 
1072 		clean_tree_block(trans, root, mid);
1073 		btrfs_tree_unlock(mid);
1074 		free_extent_buffer(mid);
1075 		mid = NULL;
1076 		wret = del_ptr(trans, root, path, level + 1, pslot);
1077 		if (wret)
1078 			ret = wret;
1079 		wret = btrfs_free_extent(trans, root, bytenr, blocksize,
1080 					 parent->start,
1081 					 btrfs_header_owner(parent),
1082 					 root_gen, level, 1);
1083 		if (wret)
1084 			ret = wret;
1085 	} else {
1086 		/* update the parent key to reflect our changes */
1087 		struct btrfs_disk_key mid_key;
1088 		btrfs_node_key(mid, &mid_key, 0);
1089 		btrfs_set_node_key(parent, &mid_key, pslot);
1090 		btrfs_mark_buffer_dirty(parent);
1091 	}
1092 
1093 	/* update the path */
1094 	if (left) {
1095 		if (btrfs_header_nritems(left) > orig_slot) {
1096 			extent_buffer_get(left);
1097 			/* left was locked after cow */
1098 			path->nodes[level] = left;
1099 			path->slots[level + 1] -= 1;
1100 			path->slots[level] = orig_slot;
1101 			if (mid) {
1102 				btrfs_tree_unlock(mid);
1103 				free_extent_buffer(mid);
1104 			}
1105 		} else {
1106 			orig_slot -= btrfs_header_nritems(left);
1107 			path->slots[level] = orig_slot;
1108 		}
1109 	}
1110 	/* double check we haven't messed things up */
1111 	check_block(root, path, level);
1112 	if (orig_ptr !=
1113 	    btrfs_node_blockptr(path->nodes[level], path->slots[level]))
1114 		BUG();
1115 enospc:
1116 	if (right) {
1117 		btrfs_tree_unlock(right);
1118 		free_extent_buffer(right);
1119 	}
1120 	if (left) {
1121 		if (path->nodes[level] != left)
1122 			btrfs_tree_unlock(left);
1123 		free_extent_buffer(left);
1124 	}
1125 	return ret;
1126 }
1127 
1128 /* Node balancing for insertion.  Here we only split or push nodes around
1129  * when they are completely full.  This is also done top down, so we
1130  * have to be pessimistic.
1131  */
push_nodes_for_insert(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,int level)1132 static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
1133 					  struct btrfs_root *root,
1134 					  struct btrfs_path *path, int level)
1135 {
1136 	struct extent_buffer *right = NULL;
1137 	struct extent_buffer *mid;
1138 	struct extent_buffer *left = NULL;
1139 	struct extent_buffer *parent = NULL;
1140 	int ret = 0;
1141 	int wret;
1142 	int pslot;
1143 	int orig_slot = path->slots[level];
1144 	u64 orig_ptr;
1145 
1146 	if (level == 0)
1147 		return 1;
1148 
1149 	mid = path->nodes[level];
1150 	WARN_ON(btrfs_header_generation(mid) != trans->transid);
1151 	orig_ptr = btrfs_node_blockptr(mid, orig_slot);
1152 
1153 	if (level < BTRFS_MAX_LEVEL - 1)
1154 		parent = path->nodes[level + 1];
1155 	pslot = path->slots[level + 1];
1156 
1157 	if (!parent)
1158 		return 1;
1159 
1160 	left = read_node_slot(root, parent, pslot - 1);
1161 
1162 	/* first, try to make some room in the middle buffer */
1163 	if (left) {
1164 		u32 left_nr;
1165 
1166 		btrfs_tree_lock(left);
1167 		btrfs_set_lock_blocking(left);
1168 
1169 		left_nr = btrfs_header_nritems(left);
1170 		if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
1171 			wret = 1;
1172 		} else {
1173 			ret = btrfs_cow_block(trans, root, left, parent,
1174 					      pslot - 1, &left, 0);
1175 			if (ret)
1176 				wret = 1;
1177 			else {
1178 				wret = push_node_left(trans, root,
1179 						      left, mid, 0);
1180 			}
1181 		}
1182 		if (wret < 0)
1183 			ret = wret;
1184 		if (wret == 0) {
1185 			struct btrfs_disk_key disk_key;
1186 			orig_slot += left_nr;
1187 			btrfs_node_key(mid, &disk_key, 0);
1188 			btrfs_set_node_key(parent, &disk_key, pslot);
1189 			btrfs_mark_buffer_dirty(parent);
1190 			if (btrfs_header_nritems(left) > orig_slot) {
1191 				path->nodes[level] = left;
1192 				path->slots[level + 1] -= 1;
1193 				path->slots[level] = orig_slot;
1194 				btrfs_tree_unlock(mid);
1195 				free_extent_buffer(mid);
1196 			} else {
1197 				orig_slot -=
1198 					btrfs_header_nritems(left);
1199 				path->slots[level] = orig_slot;
1200 				btrfs_tree_unlock(left);
1201 				free_extent_buffer(left);
1202 			}
1203 			return 0;
1204 		}
1205 		btrfs_tree_unlock(left);
1206 		free_extent_buffer(left);
1207 	}
1208 	right = read_node_slot(root, parent, pslot + 1);
1209 
1210 	/*
1211 	 * then try to empty the right most buffer into the middle
1212 	 */
1213 	if (right) {
1214 		u32 right_nr;
1215 
1216 		btrfs_tree_lock(right);
1217 		btrfs_set_lock_blocking(right);
1218 
1219 		right_nr = btrfs_header_nritems(right);
1220 		if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
1221 			wret = 1;
1222 		} else {
1223 			ret = btrfs_cow_block(trans, root, right,
1224 					      parent, pslot + 1,
1225 					      &right, 0);
1226 			if (ret)
1227 				wret = 1;
1228 			else {
1229 				wret = balance_node_right(trans, root,
1230 							  right, mid);
1231 			}
1232 		}
1233 		if (wret < 0)
1234 			ret = wret;
1235 		if (wret == 0) {
1236 			struct btrfs_disk_key disk_key;
1237 
1238 			btrfs_node_key(right, &disk_key, 0);
1239 			btrfs_set_node_key(parent, &disk_key, pslot + 1);
1240 			btrfs_mark_buffer_dirty(parent);
1241 
1242 			if (btrfs_header_nritems(mid) <= orig_slot) {
1243 				path->nodes[level] = right;
1244 				path->slots[level + 1] += 1;
1245 				path->slots[level] = orig_slot -
1246 					btrfs_header_nritems(mid);
1247 				btrfs_tree_unlock(mid);
1248 				free_extent_buffer(mid);
1249 			} else {
1250 				btrfs_tree_unlock(right);
1251 				free_extent_buffer(right);
1252 			}
1253 			return 0;
1254 		}
1255 		btrfs_tree_unlock(right);
1256 		free_extent_buffer(right);
1257 	}
1258 	return 1;
1259 }
1260 
1261 /*
1262  * readahead one full node of leaves, finding things that are close
1263  * to the block in 'slot', and triggering ra on them.
1264  */
reada_for_search(struct btrfs_root * root,struct btrfs_path * path,int level,int slot,u64 objectid)1265 static noinline void reada_for_search(struct btrfs_root *root,
1266 				      struct btrfs_path *path,
1267 				      int level, int slot, u64 objectid)
1268 {
1269 	struct extent_buffer *node;
1270 	struct btrfs_disk_key disk_key;
1271 	u32 nritems;
1272 	u64 search;
1273 	u64 target;
1274 	u64 nread = 0;
1275 	int direction = path->reada;
1276 	struct extent_buffer *eb;
1277 	u32 nr;
1278 	u32 blocksize;
1279 	u32 nscan = 0;
1280 
1281 	if (level != 1)
1282 		return;
1283 
1284 	if (!path->nodes[level])
1285 		return;
1286 
1287 	node = path->nodes[level];
1288 
1289 	search = btrfs_node_blockptr(node, slot);
1290 	blocksize = btrfs_level_size(root, level - 1);
1291 	eb = btrfs_find_tree_block(root, search, blocksize);
1292 	if (eb) {
1293 		free_extent_buffer(eb);
1294 		return;
1295 	}
1296 
1297 	target = search;
1298 
1299 	nritems = btrfs_header_nritems(node);
1300 	nr = slot;
1301 	while (1) {
1302 		if (direction < 0) {
1303 			if (nr == 0)
1304 				break;
1305 			nr--;
1306 		} else if (direction > 0) {
1307 			nr++;
1308 			if (nr >= nritems)
1309 				break;
1310 		}
1311 		if (path->reada < 0 && objectid) {
1312 			btrfs_node_key(node, &disk_key, nr);
1313 			if (btrfs_disk_key_objectid(&disk_key) != objectid)
1314 				break;
1315 		}
1316 		search = btrfs_node_blockptr(node, nr);
1317 		if ((search <= target && target - search <= 65536) ||
1318 		    (search > target && search - target <= 65536)) {
1319 			readahead_tree_block(root, search, blocksize,
1320 				     btrfs_node_ptr_generation(node, nr));
1321 			nread += blocksize;
1322 		}
1323 		nscan++;
1324 		if ((nread > 65536 || nscan > 32))
1325 			break;
1326 	}
1327 }
1328 
1329 /*
1330  * returns -EAGAIN if it had to drop the path, or zero if everything was in
1331  * cache
1332  */
reada_for_balance(struct btrfs_root * root,struct btrfs_path * path,int level)1333 static noinline int reada_for_balance(struct btrfs_root *root,
1334 				      struct btrfs_path *path, int level)
1335 {
1336 	int slot;
1337 	int nritems;
1338 	struct extent_buffer *parent;
1339 	struct extent_buffer *eb;
1340 	u64 gen;
1341 	u64 block1 = 0;
1342 	u64 block2 = 0;
1343 	int ret = 0;
1344 	int blocksize;
1345 
1346 	parent = path->nodes[level - 1];
1347 	if (!parent)
1348 		return 0;
1349 
1350 	nritems = btrfs_header_nritems(parent);
1351 	slot = path->slots[level];
1352 	blocksize = btrfs_level_size(root, level);
1353 
1354 	if (slot > 0) {
1355 		block1 = btrfs_node_blockptr(parent, slot - 1);
1356 		gen = btrfs_node_ptr_generation(parent, slot - 1);
1357 		eb = btrfs_find_tree_block(root, block1, blocksize);
1358 		if (eb && btrfs_buffer_uptodate(eb, gen))
1359 			block1 = 0;
1360 		free_extent_buffer(eb);
1361 	}
1362 	if (slot < nritems) {
1363 		block2 = btrfs_node_blockptr(parent, slot + 1);
1364 		gen = btrfs_node_ptr_generation(parent, slot + 1);
1365 		eb = btrfs_find_tree_block(root, block2, blocksize);
1366 		if (eb && btrfs_buffer_uptodate(eb, gen))
1367 			block2 = 0;
1368 		free_extent_buffer(eb);
1369 	}
1370 	if (block1 || block2) {
1371 		ret = -EAGAIN;
1372 		btrfs_release_path(root, path);
1373 		if (block1)
1374 			readahead_tree_block(root, block1, blocksize, 0);
1375 		if (block2)
1376 			readahead_tree_block(root, block2, blocksize, 0);
1377 
1378 		if (block1) {
1379 			eb = read_tree_block(root, block1, blocksize, 0);
1380 			free_extent_buffer(eb);
1381 		}
1382 		if (block1) {
1383 			eb = read_tree_block(root, block2, blocksize, 0);
1384 			free_extent_buffer(eb);
1385 		}
1386 	}
1387 	return ret;
1388 }
1389 
1390 
1391 /*
1392  * when we walk down the tree, it is usually safe to unlock the higher layers
1393  * in the tree.  The exceptions are when our path goes through slot 0, because
1394  * operations on the tree might require changing key pointers higher up in the
1395  * tree.
1396  *
1397  * callers might also have set path->keep_locks, which tells this code to keep
1398  * the lock if the path points to the last slot in the block.  This is part of
1399  * walking through the tree, and selecting the next slot in the higher block.
1400  *
1401  * lowest_unlock sets the lowest level in the tree we're allowed to unlock.  so
1402  * if lowest_unlock is 1, level 0 won't be unlocked
1403  */
unlock_up(struct btrfs_path * path,int level,int lowest_unlock)1404 static noinline void unlock_up(struct btrfs_path *path, int level,
1405 			       int lowest_unlock)
1406 {
1407 	int i;
1408 	int skip_level = level;
1409 	int no_skips = 0;
1410 	struct extent_buffer *t;
1411 
1412 	for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1413 		if (!path->nodes[i])
1414 			break;
1415 		if (!path->locks[i])
1416 			break;
1417 		if (!no_skips && path->slots[i] == 0) {
1418 			skip_level = i + 1;
1419 			continue;
1420 		}
1421 		if (!no_skips && path->keep_locks) {
1422 			u32 nritems;
1423 			t = path->nodes[i];
1424 			nritems = btrfs_header_nritems(t);
1425 			if (nritems < 1 || path->slots[i] >= nritems - 1) {
1426 				skip_level = i + 1;
1427 				continue;
1428 			}
1429 		}
1430 		if (skip_level < i && i >= lowest_unlock)
1431 			no_skips = 1;
1432 
1433 		t = path->nodes[i];
1434 		if (i >= lowest_unlock && i > skip_level && path->locks[i]) {
1435 			btrfs_tree_unlock(t);
1436 			path->locks[i] = 0;
1437 		}
1438 	}
1439 }
1440 
1441 /*
1442  * This releases any locks held in the path starting at level and
1443  * going all the way up to the root.
1444  *
1445  * btrfs_search_slot will keep the lock held on higher nodes in a few
1446  * corner cases, such as COW of the block at slot zero in the node.  This
1447  * ignores those rules, and it should only be called when there are no
1448  * more updates to be done higher up in the tree.
1449  */
btrfs_unlock_up_safe(struct btrfs_path * path,int level)1450 noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
1451 {
1452 	int i;
1453 
1454 	if (path->keep_locks || path->lowest_level)
1455 		return;
1456 
1457 	for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1458 		if (!path->nodes[i])
1459 			continue;
1460 		if (!path->locks[i])
1461 			continue;
1462 		btrfs_tree_unlock(path->nodes[i]);
1463 		path->locks[i] = 0;
1464 	}
1465 }
1466 
1467 /*
1468  * look for key in the tree.  path is filled in with nodes along the way
1469  * if key is found, we return zero and you can find the item in the leaf
1470  * level of the path (level 0)
1471  *
1472  * If the key isn't found, the path points to the slot where it should
1473  * be inserted, and 1 is returned.  If there are other errors during the
1474  * search a negative error number is returned.
1475  *
1476  * if ins_len > 0, nodes and leaves will be split as we walk down the
1477  * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
1478  * possible)
1479  */
btrfs_search_slot(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_key * key,struct btrfs_path * p,int ins_len,int cow)1480 int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1481 		      *root, struct btrfs_key *key, struct btrfs_path *p, int
1482 		      ins_len, int cow)
1483 {
1484 	struct extent_buffer *b;
1485 	struct extent_buffer *tmp;
1486 	int slot;
1487 	int ret;
1488 	int level;
1489 	int should_reada = p->reada;
1490 	int lowest_unlock = 1;
1491 	int blocksize;
1492 	u8 lowest_level = 0;
1493 	u64 blocknr;
1494 	u64 gen;
1495 	struct btrfs_key prealloc_block;
1496 
1497 	lowest_level = p->lowest_level;
1498 	WARN_ON(lowest_level && ins_len > 0);
1499 	WARN_ON(p->nodes[0] != NULL);
1500 
1501 	if (ins_len < 0)
1502 		lowest_unlock = 2;
1503 
1504 	prealloc_block.objectid = 0;
1505 
1506 again:
1507 	if (p->skip_locking)
1508 		b = btrfs_root_node(root);
1509 	else
1510 		b = btrfs_lock_root_node(root);
1511 
1512 	while (b) {
1513 		level = btrfs_header_level(b);
1514 
1515 		/*
1516 		 * setup the path here so we can release it under lock
1517 		 * contention with the cow code
1518 		 */
1519 		p->nodes[level] = b;
1520 		if (!p->skip_locking)
1521 			p->locks[level] = 1;
1522 
1523 		if (cow) {
1524 			int wret;
1525 
1526 			/* is a cow on this block not required */
1527 			if (btrfs_header_generation(b) == trans->transid &&
1528 			    btrfs_header_owner(b) == root->root_key.objectid &&
1529 			    !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) {
1530 				goto cow_done;
1531 			}
1532 
1533 			/* ok, we have to cow, is our old prealloc the right
1534 			 * size?
1535 			 */
1536 			if (prealloc_block.objectid &&
1537 			    prealloc_block.offset != b->len) {
1538 				btrfs_release_path(root, p);
1539 				btrfs_free_reserved_extent(root,
1540 					   prealloc_block.objectid,
1541 					   prealloc_block.offset);
1542 				prealloc_block.objectid = 0;
1543 				goto again;
1544 			}
1545 
1546 			/*
1547 			 * for higher level blocks, try not to allocate blocks
1548 			 * with the block and the parent locks held.
1549 			 */
1550 			if (level > 0 && !prealloc_block.objectid) {
1551 				u32 size = b->len;
1552 				u64 hint = b->start;
1553 
1554 				btrfs_release_path(root, p);
1555 				ret = btrfs_reserve_extent(trans, root,
1556 							   size, size, 0,
1557 							   hint, (u64)-1,
1558 							   &prealloc_block, 0);
1559 				BUG_ON(ret);
1560 				goto again;
1561 			}
1562 
1563 			btrfs_set_path_blocking(p);
1564 
1565 			wret = btrfs_cow_block(trans, root, b,
1566 					       p->nodes[level + 1],
1567 					       p->slots[level + 1],
1568 					       &b, prealloc_block.objectid);
1569 			prealloc_block.objectid = 0;
1570 			if (wret) {
1571 				free_extent_buffer(b);
1572 				ret = wret;
1573 				goto done;
1574 			}
1575 		}
1576 cow_done:
1577 		BUG_ON(!cow && ins_len);
1578 		if (level != btrfs_header_level(b))
1579 			WARN_ON(1);
1580 		level = btrfs_header_level(b);
1581 
1582 		p->nodes[level] = b;
1583 		if (!p->skip_locking)
1584 			p->locks[level] = 1;
1585 
1586 		btrfs_clear_path_blocking(p, NULL);
1587 
1588 		/*
1589 		 * we have a lock on b and as long as we aren't changing
1590 		 * the tree, there is no way to for the items in b to change.
1591 		 * It is safe to drop the lock on our parent before we
1592 		 * go through the expensive btree search on b.
1593 		 *
1594 		 * If cow is true, then we might be changing slot zero,
1595 		 * which may require changing the parent.  So, we can't
1596 		 * drop the lock until after we know which slot we're
1597 		 * operating on.
1598 		 */
1599 		if (!cow)
1600 			btrfs_unlock_up_safe(p, level + 1);
1601 
1602 		ret = check_block(root, p, level);
1603 		if (ret) {
1604 			ret = -1;
1605 			goto done;
1606 		}
1607 
1608 		ret = bin_search(b, key, level, &slot);
1609 
1610 		if (level != 0) {
1611 			if (ret && slot > 0)
1612 				slot -= 1;
1613 			p->slots[level] = slot;
1614 			if ((p->search_for_split || ins_len > 0) &&
1615 			    btrfs_header_nritems(b) >=
1616 			    BTRFS_NODEPTRS_PER_BLOCK(root) - 3) {
1617 				int sret;
1618 
1619 				sret = reada_for_balance(root, p, level);
1620 				if (sret)
1621 					goto again;
1622 
1623 				btrfs_set_path_blocking(p);
1624 				sret = split_node(trans, root, p, level);
1625 				btrfs_clear_path_blocking(p, NULL);
1626 
1627 				BUG_ON(sret > 0);
1628 				if (sret) {
1629 					ret = sret;
1630 					goto done;
1631 				}
1632 				b = p->nodes[level];
1633 				slot = p->slots[level];
1634 			} else if (ins_len < 0 &&
1635 				   btrfs_header_nritems(b) <
1636 				   BTRFS_NODEPTRS_PER_BLOCK(root) / 4) {
1637 				int sret;
1638 
1639 				sret = reada_for_balance(root, p, level);
1640 				if (sret)
1641 					goto again;
1642 
1643 				btrfs_set_path_blocking(p);
1644 				sret = balance_level(trans, root, p, level);
1645 				btrfs_clear_path_blocking(p, NULL);
1646 
1647 				if (sret) {
1648 					ret = sret;
1649 					goto done;
1650 				}
1651 				b = p->nodes[level];
1652 				if (!b) {
1653 					btrfs_release_path(NULL, p);
1654 					goto again;
1655 				}
1656 				slot = p->slots[level];
1657 				BUG_ON(btrfs_header_nritems(b) == 1);
1658 			}
1659 			unlock_up(p, level, lowest_unlock);
1660 
1661 			/* this is only true while dropping a snapshot */
1662 			if (level == lowest_level) {
1663 				ret = 0;
1664 				goto done;
1665 			}
1666 
1667 			blocknr = btrfs_node_blockptr(b, slot);
1668 			gen = btrfs_node_ptr_generation(b, slot);
1669 			blocksize = btrfs_level_size(root, level - 1);
1670 
1671 			tmp = btrfs_find_tree_block(root, blocknr, blocksize);
1672 			if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
1673 				b = tmp;
1674 			} else {
1675 				/*
1676 				 * reduce lock contention at high levels
1677 				 * of the btree by dropping locks before
1678 				 * we read.
1679 				 */
1680 				if (level > 0) {
1681 					btrfs_release_path(NULL, p);
1682 					if (tmp)
1683 						free_extent_buffer(tmp);
1684 					if (should_reada)
1685 						reada_for_search(root, p,
1686 								 level, slot,
1687 								 key->objectid);
1688 
1689 					tmp = read_tree_block(root, blocknr,
1690 							 blocksize, gen);
1691 					if (tmp)
1692 						free_extent_buffer(tmp);
1693 					goto again;
1694 				} else {
1695 					btrfs_set_path_blocking(p);
1696 					if (tmp)
1697 						free_extent_buffer(tmp);
1698 					if (should_reada)
1699 						reada_for_search(root, p,
1700 								 level, slot,
1701 								 key->objectid);
1702 					b = read_node_slot(root, b, slot);
1703 				}
1704 			}
1705 			if (!p->skip_locking) {
1706 				int lret;
1707 
1708 				btrfs_clear_path_blocking(p, NULL);
1709 				lret = btrfs_try_spin_lock(b);
1710 
1711 				if (!lret) {
1712 					btrfs_set_path_blocking(p);
1713 					btrfs_tree_lock(b);
1714 					btrfs_clear_path_blocking(p, b);
1715 				}
1716 			}
1717 		} else {
1718 			p->slots[level] = slot;
1719 			if (ins_len > 0 &&
1720 			    btrfs_leaf_free_space(root, b) < ins_len) {
1721 				int sret;
1722 
1723 				btrfs_set_path_blocking(p);
1724 				sret = split_leaf(trans, root, key,
1725 						      p, ins_len, ret == 0);
1726 				btrfs_clear_path_blocking(p, NULL);
1727 
1728 				BUG_ON(sret > 0);
1729 				if (sret) {
1730 					ret = sret;
1731 					goto done;
1732 				}
1733 			}
1734 			if (!p->search_for_split)
1735 				unlock_up(p, level, lowest_unlock);
1736 			goto done;
1737 		}
1738 	}
1739 	ret = 1;
1740 done:
1741 	/*
1742 	 * we don't really know what they plan on doing with the path
1743 	 * from here on, so for now just mark it as blocking
1744 	 */
1745 	btrfs_set_path_blocking(p);
1746 	if (prealloc_block.objectid) {
1747 		btrfs_free_reserved_extent(root,
1748 			   prealloc_block.objectid,
1749 			   prealloc_block.offset);
1750 	}
1751 	return ret;
1752 }
1753 
btrfs_merge_path(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_key * node_keys,u64 * nodes,int lowest_level)1754 int btrfs_merge_path(struct btrfs_trans_handle *trans,
1755 		     struct btrfs_root *root,
1756 		     struct btrfs_key *node_keys,
1757 		     u64 *nodes, int lowest_level)
1758 {
1759 	struct extent_buffer *eb;
1760 	struct extent_buffer *parent;
1761 	struct btrfs_key key;
1762 	u64 bytenr;
1763 	u64 generation;
1764 	u32 blocksize;
1765 	int level;
1766 	int slot;
1767 	int key_match;
1768 	int ret;
1769 
1770 	eb = btrfs_lock_root_node(root);
1771 	ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb, 0);
1772 	BUG_ON(ret);
1773 
1774 	btrfs_set_lock_blocking(eb);
1775 
1776 	parent = eb;
1777 	while (1) {
1778 		level = btrfs_header_level(parent);
1779 		if (level == 0 || level <= lowest_level)
1780 			break;
1781 
1782 		ret = bin_search(parent, &node_keys[lowest_level], level,
1783 				 &slot);
1784 		if (ret && slot > 0)
1785 			slot--;
1786 
1787 		bytenr = btrfs_node_blockptr(parent, slot);
1788 		if (nodes[level - 1] == bytenr)
1789 			break;
1790 
1791 		blocksize = btrfs_level_size(root, level - 1);
1792 		generation = btrfs_node_ptr_generation(parent, slot);
1793 		btrfs_node_key_to_cpu(eb, &key, slot);
1794 		key_match = !memcmp(&key, &node_keys[level - 1], sizeof(key));
1795 
1796 		if (generation == trans->transid) {
1797 			eb = read_tree_block(root, bytenr, blocksize,
1798 					     generation);
1799 			btrfs_tree_lock(eb);
1800 			btrfs_set_lock_blocking(eb);
1801 		}
1802 
1803 		/*
1804 		 * if node keys match and node pointer hasn't been modified
1805 		 * in the running transaction, we can merge the path. for
1806 		 * blocks owened by reloc trees, the node pointer check is
1807 		 * skipped, this is because these blocks are fully controlled
1808 		 * by the space balance code, no one else can modify them.
1809 		 */
1810 		if (!nodes[level - 1] || !key_match ||
1811 		    (generation == trans->transid &&
1812 		     btrfs_header_owner(eb) != BTRFS_TREE_RELOC_OBJECTID)) {
1813 			if (level == 1 || level == lowest_level + 1) {
1814 				if (generation == trans->transid) {
1815 					btrfs_tree_unlock(eb);
1816 					free_extent_buffer(eb);
1817 				}
1818 				break;
1819 			}
1820 
1821 			if (generation != trans->transid) {
1822 				eb = read_tree_block(root, bytenr, blocksize,
1823 						generation);
1824 				btrfs_tree_lock(eb);
1825 				btrfs_set_lock_blocking(eb);
1826 			}
1827 
1828 			ret = btrfs_cow_block(trans, root, eb, parent, slot,
1829 					      &eb, 0);
1830 			BUG_ON(ret);
1831 
1832 			if (root->root_key.objectid ==
1833 			    BTRFS_TREE_RELOC_OBJECTID) {
1834 				if (!nodes[level - 1]) {
1835 					nodes[level - 1] = eb->start;
1836 					memcpy(&node_keys[level - 1], &key,
1837 					       sizeof(node_keys[0]));
1838 				} else {
1839 					WARN_ON(1);
1840 				}
1841 			}
1842 
1843 			btrfs_tree_unlock(parent);
1844 			free_extent_buffer(parent);
1845 			parent = eb;
1846 			continue;
1847 		}
1848 
1849 		btrfs_set_node_blockptr(parent, slot, nodes[level - 1]);
1850 		btrfs_set_node_ptr_generation(parent, slot, trans->transid);
1851 		btrfs_mark_buffer_dirty(parent);
1852 
1853 		ret = btrfs_inc_extent_ref(trans, root,
1854 					nodes[level - 1],
1855 					blocksize, parent->start,
1856 					btrfs_header_owner(parent),
1857 					btrfs_header_generation(parent),
1858 					level - 1);
1859 		BUG_ON(ret);
1860 
1861 		/*
1862 		 * If the block was created in the running transaction,
1863 		 * it's possible this is the last reference to it, so we
1864 		 * should drop the subtree.
1865 		 */
1866 		if (generation == trans->transid) {
1867 			ret = btrfs_drop_subtree(trans, root, eb, parent);
1868 			BUG_ON(ret);
1869 			btrfs_tree_unlock(eb);
1870 			free_extent_buffer(eb);
1871 		} else {
1872 			ret = btrfs_free_extent(trans, root, bytenr,
1873 					blocksize, parent->start,
1874 					btrfs_header_owner(parent),
1875 					btrfs_header_generation(parent),
1876 					level - 1, 1);
1877 			BUG_ON(ret);
1878 		}
1879 		break;
1880 	}
1881 	btrfs_tree_unlock(parent);
1882 	free_extent_buffer(parent);
1883 	return 0;
1884 }
1885 
1886 /*
1887  * adjust the pointers going up the tree, starting at level
1888  * making sure the right key of each node is points to 'key'.
1889  * This is used after shifting pointers to the left, so it stops
1890  * fixing up pointers when a given leaf/node is not in slot 0 of the
1891  * higher levels
1892  *
1893  * If this fails to write a tree block, it returns -1, but continues
1894  * fixing up the blocks in ram so the tree is consistent.
1895  */
fixup_low_keys(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,struct btrfs_disk_key * key,int level)1896 static int fixup_low_keys(struct btrfs_trans_handle *trans,
1897 			  struct btrfs_root *root, struct btrfs_path *path,
1898 			  struct btrfs_disk_key *key, int level)
1899 {
1900 	int i;
1901 	int ret = 0;
1902 	struct extent_buffer *t;
1903 
1904 	for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1905 		int tslot = path->slots[i];
1906 		if (!path->nodes[i])
1907 			break;
1908 		t = path->nodes[i];
1909 		btrfs_set_node_key(t, key, tslot);
1910 		btrfs_mark_buffer_dirty(path->nodes[i]);
1911 		if (tslot != 0)
1912 			break;
1913 	}
1914 	return ret;
1915 }
1916 
1917 /*
1918  * update item key.
1919  *
1920  * This function isn't completely safe. It's the caller's responsibility
1921  * that the new key won't break the order
1922  */
btrfs_set_item_key_safe(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,struct btrfs_key * new_key)1923 int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
1924 			    struct btrfs_root *root, struct btrfs_path *path,
1925 			    struct btrfs_key *new_key)
1926 {
1927 	struct btrfs_disk_key disk_key;
1928 	struct extent_buffer *eb;
1929 	int slot;
1930 
1931 	eb = path->nodes[0];
1932 	slot = path->slots[0];
1933 	if (slot > 0) {
1934 		btrfs_item_key(eb, &disk_key, slot - 1);
1935 		if (comp_keys(&disk_key, new_key) >= 0)
1936 			return -1;
1937 	}
1938 	if (slot < btrfs_header_nritems(eb) - 1) {
1939 		btrfs_item_key(eb, &disk_key, slot + 1);
1940 		if (comp_keys(&disk_key, new_key) <= 0)
1941 			return -1;
1942 	}
1943 
1944 	btrfs_cpu_key_to_disk(&disk_key, new_key);
1945 	btrfs_set_item_key(eb, &disk_key, slot);
1946 	btrfs_mark_buffer_dirty(eb);
1947 	if (slot == 0)
1948 		fixup_low_keys(trans, root, path, &disk_key, 1);
1949 	return 0;
1950 }
1951 
1952 /*
1953  * try to push data from one node into the next node left in the
1954  * tree.
1955  *
1956  * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
1957  * error, and > 0 if there was no room in the left hand block.
1958  */
push_node_left(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct extent_buffer * dst,struct extent_buffer * src,int empty)1959 static int push_node_left(struct btrfs_trans_handle *trans,
1960 			  struct btrfs_root *root, struct extent_buffer *dst,
1961 			  struct extent_buffer *src, int empty)
1962 {
1963 	int push_items = 0;
1964 	int src_nritems;
1965 	int dst_nritems;
1966 	int ret = 0;
1967 
1968 	src_nritems = btrfs_header_nritems(src);
1969 	dst_nritems = btrfs_header_nritems(dst);
1970 	push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1971 	WARN_ON(btrfs_header_generation(src) != trans->transid);
1972 	WARN_ON(btrfs_header_generation(dst) != trans->transid);
1973 
1974 	if (!empty && src_nritems <= 8)
1975 		return 1;
1976 
1977 	if (push_items <= 0)
1978 		return 1;
1979 
1980 	if (empty) {
1981 		push_items = min(src_nritems, push_items);
1982 		if (push_items < src_nritems) {
1983 			/* leave at least 8 pointers in the node if
1984 			 * we aren't going to empty it
1985 			 */
1986 			if (src_nritems - push_items < 8) {
1987 				if (push_items <= 8)
1988 					return 1;
1989 				push_items -= 8;
1990 			}
1991 		}
1992 	} else
1993 		push_items = min(src_nritems - 8, push_items);
1994 
1995 	copy_extent_buffer(dst, src,
1996 			   btrfs_node_key_ptr_offset(dst_nritems),
1997 			   btrfs_node_key_ptr_offset(0),
1998 			   push_items * sizeof(struct btrfs_key_ptr));
1999 
2000 	if (push_items < src_nritems) {
2001 		memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
2002 				      btrfs_node_key_ptr_offset(push_items),
2003 				      (src_nritems - push_items) *
2004 				      sizeof(struct btrfs_key_ptr));
2005 	}
2006 	btrfs_set_header_nritems(src, src_nritems - push_items);
2007 	btrfs_set_header_nritems(dst, dst_nritems + push_items);
2008 	btrfs_mark_buffer_dirty(src);
2009 	btrfs_mark_buffer_dirty(dst);
2010 
2011 	ret = btrfs_update_ref(trans, root, src, dst, dst_nritems, push_items);
2012 	BUG_ON(ret);
2013 
2014 	return ret;
2015 }
2016 
2017 /*
2018  * try to push data from one node into the next node right in the
2019  * tree.
2020  *
2021  * returns 0 if some ptrs were pushed, < 0 if there was some horrible
2022  * error, and > 0 if there was no room in the right hand block.
2023  *
2024  * this will  only push up to 1/2 the contents of the left node over
2025  */
balance_node_right(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct extent_buffer * dst,struct extent_buffer * src)2026 static int balance_node_right(struct btrfs_trans_handle *trans,
2027 			      struct btrfs_root *root,
2028 			      struct extent_buffer *dst,
2029 			      struct extent_buffer *src)
2030 {
2031 	int push_items = 0;
2032 	int max_push;
2033 	int src_nritems;
2034 	int dst_nritems;
2035 	int ret = 0;
2036 
2037 	WARN_ON(btrfs_header_generation(src) != trans->transid);
2038 	WARN_ON(btrfs_header_generation(dst) != trans->transid);
2039 
2040 	src_nritems = btrfs_header_nritems(src);
2041 	dst_nritems = btrfs_header_nritems(dst);
2042 	push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
2043 	if (push_items <= 0)
2044 		return 1;
2045 
2046 	if (src_nritems < 4)
2047 		return 1;
2048 
2049 	max_push = src_nritems / 2 + 1;
2050 	/* don't try to empty the node */
2051 	if (max_push >= src_nritems)
2052 		return 1;
2053 
2054 	if (max_push < push_items)
2055 		push_items = max_push;
2056 
2057 	memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
2058 				      btrfs_node_key_ptr_offset(0),
2059 				      (dst_nritems) *
2060 				      sizeof(struct btrfs_key_ptr));
2061 
2062 	copy_extent_buffer(dst, src,
2063 			   btrfs_node_key_ptr_offset(0),
2064 			   btrfs_node_key_ptr_offset(src_nritems - push_items),
2065 			   push_items * sizeof(struct btrfs_key_ptr));
2066 
2067 	btrfs_set_header_nritems(src, src_nritems - push_items);
2068 	btrfs_set_header_nritems(dst, dst_nritems + push_items);
2069 
2070 	btrfs_mark_buffer_dirty(src);
2071 	btrfs_mark_buffer_dirty(dst);
2072 
2073 	ret = btrfs_update_ref(trans, root, src, dst, 0, push_items);
2074 	BUG_ON(ret);
2075 
2076 	return ret;
2077 }
2078 
2079 /*
2080  * helper function to insert a new root level in the tree.
2081  * A new node is allocated, and a single item is inserted to
2082  * point to the existing root
2083  *
2084  * returns zero on success or < 0 on failure.
2085  */
insert_new_root(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,int level)2086 static noinline int insert_new_root(struct btrfs_trans_handle *trans,
2087 			   struct btrfs_root *root,
2088 			   struct btrfs_path *path, int level)
2089 {
2090 	u64 lower_gen;
2091 	struct extent_buffer *lower;
2092 	struct extent_buffer *c;
2093 	struct extent_buffer *old;
2094 	struct btrfs_disk_key lower_key;
2095 	int ret;
2096 
2097 	BUG_ON(path->nodes[level]);
2098 	BUG_ON(path->nodes[level-1] != root->node);
2099 
2100 	lower = path->nodes[level-1];
2101 	if (level == 1)
2102 		btrfs_item_key(lower, &lower_key, 0);
2103 	else
2104 		btrfs_node_key(lower, &lower_key, 0);
2105 
2106 	c = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
2107 				   root->root_key.objectid, trans->transid,
2108 				   level, root->node->start, 0);
2109 	if (IS_ERR(c))
2110 		return PTR_ERR(c);
2111 
2112 	memset_extent_buffer(c, 0, 0, root->nodesize);
2113 	btrfs_set_header_nritems(c, 1);
2114 	btrfs_set_header_level(c, level);
2115 	btrfs_set_header_bytenr(c, c->start);
2116 	btrfs_set_header_generation(c, trans->transid);
2117 	btrfs_set_header_owner(c, root->root_key.objectid);
2118 
2119 	write_extent_buffer(c, root->fs_info->fsid,
2120 			    (unsigned long)btrfs_header_fsid(c),
2121 			    BTRFS_FSID_SIZE);
2122 
2123 	write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
2124 			    (unsigned long)btrfs_header_chunk_tree_uuid(c),
2125 			    BTRFS_UUID_SIZE);
2126 
2127 	btrfs_set_node_key(c, &lower_key, 0);
2128 	btrfs_set_node_blockptr(c, 0, lower->start);
2129 	lower_gen = btrfs_header_generation(lower);
2130 	WARN_ON(lower_gen != trans->transid);
2131 
2132 	btrfs_set_node_ptr_generation(c, 0, lower_gen);
2133 
2134 	btrfs_mark_buffer_dirty(c);
2135 
2136 	spin_lock(&root->node_lock);
2137 	old = root->node;
2138 	root->node = c;
2139 	spin_unlock(&root->node_lock);
2140 
2141 	ret = btrfs_update_extent_ref(trans, root, lower->start,
2142 				      lower->start, c->start,
2143 				      root->root_key.objectid,
2144 				      trans->transid, level - 1);
2145 	BUG_ON(ret);
2146 
2147 	/* the super has an extra ref to root->node */
2148 	free_extent_buffer(old);
2149 
2150 	add_root_to_dirty_list(root);
2151 	extent_buffer_get(c);
2152 	path->nodes[level] = c;
2153 	path->locks[level] = 1;
2154 	path->slots[level] = 0;
2155 	return 0;
2156 }
2157 
2158 /*
2159  * worker function to insert a single pointer in a node.
2160  * the node should have enough room for the pointer already
2161  *
2162  * slot and level indicate where you want the key to go, and
2163  * blocknr is the block the key points to.
2164  *
2165  * returns zero on success and < 0 on any error
2166  */
insert_ptr(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,struct btrfs_disk_key * key,u64 bytenr,int slot,int level)2167 static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
2168 		      *root, struct btrfs_path *path, struct btrfs_disk_key
2169 		      *key, u64 bytenr, int slot, int level)
2170 {
2171 	struct extent_buffer *lower;
2172 	int nritems;
2173 
2174 	BUG_ON(!path->nodes[level]);
2175 	lower = path->nodes[level];
2176 	nritems = btrfs_header_nritems(lower);
2177 	if (slot > nritems)
2178 		BUG();
2179 	if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
2180 		BUG();
2181 	if (slot != nritems) {
2182 		memmove_extent_buffer(lower,
2183 			      btrfs_node_key_ptr_offset(slot + 1),
2184 			      btrfs_node_key_ptr_offset(slot),
2185 			      (nritems - slot) * sizeof(struct btrfs_key_ptr));
2186 	}
2187 	btrfs_set_node_key(lower, key, slot);
2188 	btrfs_set_node_blockptr(lower, slot, bytenr);
2189 	WARN_ON(trans->transid == 0);
2190 	btrfs_set_node_ptr_generation(lower, slot, trans->transid);
2191 	btrfs_set_header_nritems(lower, nritems + 1);
2192 	btrfs_mark_buffer_dirty(lower);
2193 	return 0;
2194 }
2195 
2196 /*
2197  * split the node at the specified level in path in two.
2198  * The path is corrected to point to the appropriate node after the split
2199  *
2200  * Before splitting this tries to make some room in the node by pushing
2201  * left and right, if either one works, it returns right away.
2202  *
2203  * returns 0 on success and < 0 on failure
2204  */
split_node(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,int level)2205 static noinline int split_node(struct btrfs_trans_handle *trans,
2206 			       struct btrfs_root *root,
2207 			       struct btrfs_path *path, int level)
2208 {
2209 	struct extent_buffer *c;
2210 	struct extent_buffer *split;
2211 	struct btrfs_disk_key disk_key;
2212 	int mid;
2213 	int ret;
2214 	int wret;
2215 	u32 c_nritems;
2216 
2217 	c = path->nodes[level];
2218 	WARN_ON(btrfs_header_generation(c) != trans->transid);
2219 	if (c == root->node) {
2220 		/* trying to split the root, lets make a new one */
2221 		ret = insert_new_root(trans, root, path, level + 1);
2222 		if (ret)
2223 			return ret;
2224 	} else {
2225 		ret = push_nodes_for_insert(trans, root, path, level);
2226 		c = path->nodes[level];
2227 		if (!ret && btrfs_header_nritems(c) <
2228 		    BTRFS_NODEPTRS_PER_BLOCK(root) - 3)
2229 			return 0;
2230 		if (ret < 0)
2231 			return ret;
2232 	}
2233 
2234 	c_nritems = btrfs_header_nritems(c);
2235 
2236 	split = btrfs_alloc_free_block(trans, root, root->nodesize,
2237 					path->nodes[level + 1]->start,
2238 					root->root_key.objectid,
2239 					trans->transid, level, c->start, 0);
2240 	if (IS_ERR(split))
2241 		return PTR_ERR(split);
2242 
2243 	btrfs_set_header_flags(split, btrfs_header_flags(c));
2244 	btrfs_set_header_level(split, btrfs_header_level(c));
2245 	btrfs_set_header_bytenr(split, split->start);
2246 	btrfs_set_header_generation(split, trans->transid);
2247 	btrfs_set_header_owner(split, root->root_key.objectid);
2248 	btrfs_set_header_flags(split, 0);
2249 	write_extent_buffer(split, root->fs_info->fsid,
2250 			    (unsigned long)btrfs_header_fsid(split),
2251 			    BTRFS_FSID_SIZE);
2252 	write_extent_buffer(split, root->fs_info->chunk_tree_uuid,
2253 			    (unsigned long)btrfs_header_chunk_tree_uuid(split),
2254 			    BTRFS_UUID_SIZE);
2255 
2256 	mid = (c_nritems + 1) / 2;
2257 
2258 	copy_extent_buffer(split, c,
2259 			   btrfs_node_key_ptr_offset(0),
2260 			   btrfs_node_key_ptr_offset(mid),
2261 			   (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
2262 	btrfs_set_header_nritems(split, c_nritems - mid);
2263 	btrfs_set_header_nritems(c, mid);
2264 	ret = 0;
2265 
2266 	btrfs_mark_buffer_dirty(c);
2267 	btrfs_mark_buffer_dirty(split);
2268 
2269 	btrfs_node_key(split, &disk_key, 0);
2270 	wret = insert_ptr(trans, root, path, &disk_key, split->start,
2271 			  path->slots[level + 1] + 1,
2272 			  level + 1);
2273 	if (wret)
2274 		ret = wret;
2275 
2276 	ret = btrfs_update_ref(trans, root, c, split, 0, c_nritems - mid);
2277 	BUG_ON(ret);
2278 
2279 	if (path->slots[level] >= mid) {
2280 		path->slots[level] -= mid;
2281 		btrfs_tree_unlock(c);
2282 		free_extent_buffer(c);
2283 		path->nodes[level] = split;
2284 		path->slots[level + 1] += 1;
2285 	} else {
2286 		btrfs_tree_unlock(split);
2287 		free_extent_buffer(split);
2288 	}
2289 	return ret;
2290 }
2291 
2292 /*
2293  * how many bytes are required to store the items in a leaf.  start
2294  * and nr indicate which items in the leaf to check.  This totals up the
2295  * space used both by the item structs and the item data
2296  */
leaf_space_used(struct extent_buffer * l,int start,int nr)2297 static int leaf_space_used(struct extent_buffer *l, int start, int nr)
2298 {
2299 	int data_len;
2300 	int nritems = btrfs_header_nritems(l);
2301 	int end = min(nritems, start + nr) - 1;
2302 
2303 	if (!nr)
2304 		return 0;
2305 	data_len = btrfs_item_end_nr(l, start);
2306 	data_len = data_len - btrfs_item_offset_nr(l, end);
2307 	data_len += sizeof(struct btrfs_item) * nr;
2308 	WARN_ON(data_len < 0);
2309 	return data_len;
2310 }
2311 
2312 /*
2313  * The space between the end of the leaf items and
2314  * the start of the leaf data.  IOW, how much room
2315  * the leaf has left for both items and data
2316  */
btrfs_leaf_free_space(struct btrfs_root * root,struct extent_buffer * leaf)2317 noinline int btrfs_leaf_free_space(struct btrfs_root *root,
2318 				   struct extent_buffer *leaf)
2319 {
2320 	int nritems = btrfs_header_nritems(leaf);
2321 	int ret;
2322 	ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
2323 	if (ret < 0) {
2324 		printk(KERN_CRIT "leaf free space ret %d, leaf data size %lu, "
2325 		       "used %d nritems %d\n",
2326 		       ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root),
2327 		       leaf_space_used(leaf, 0, nritems), nritems);
2328 	}
2329 	return ret;
2330 }
2331 
2332 /*
2333  * push some data in the path leaf to the right, trying to free up at
2334  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
2335  *
2336  * returns 1 if the push failed because the other node didn't have enough
2337  * room, 0 if everything worked out and < 0 if there were major errors.
2338  */
push_leaf_right(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,int data_size,int empty)2339 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
2340 			   *root, struct btrfs_path *path, int data_size,
2341 			   int empty)
2342 {
2343 	struct extent_buffer *left = path->nodes[0];
2344 	struct extent_buffer *right;
2345 	struct extent_buffer *upper;
2346 	struct btrfs_disk_key disk_key;
2347 	int slot;
2348 	u32 i;
2349 	int free_space;
2350 	int push_space = 0;
2351 	int push_items = 0;
2352 	struct btrfs_item *item;
2353 	u32 left_nritems;
2354 	u32 nr;
2355 	u32 right_nritems;
2356 	u32 data_end;
2357 	u32 this_item_size;
2358 	int ret;
2359 
2360 	slot = path->slots[1];
2361 	if (!path->nodes[1])
2362 		return 1;
2363 
2364 	upper = path->nodes[1];
2365 	if (slot >= btrfs_header_nritems(upper) - 1)
2366 		return 1;
2367 
2368 	btrfs_assert_tree_locked(path->nodes[1]);
2369 
2370 	right = read_node_slot(root, upper, slot + 1);
2371 	btrfs_tree_lock(right);
2372 	btrfs_set_lock_blocking(right);
2373 
2374 	free_space = btrfs_leaf_free_space(root, right);
2375 	if (free_space < data_size)
2376 		goto out_unlock;
2377 
2378 	/* cow and double check */
2379 	ret = btrfs_cow_block(trans, root, right, upper,
2380 			      slot + 1, &right, 0);
2381 	if (ret)
2382 		goto out_unlock;
2383 
2384 	free_space = btrfs_leaf_free_space(root, right);
2385 	if (free_space < data_size)
2386 		goto out_unlock;
2387 
2388 	left_nritems = btrfs_header_nritems(left);
2389 	if (left_nritems == 0)
2390 		goto out_unlock;
2391 
2392 	if (empty)
2393 		nr = 0;
2394 	else
2395 		nr = 1;
2396 
2397 	if (path->slots[0] >= left_nritems)
2398 		push_space += data_size;
2399 
2400 	i = left_nritems - 1;
2401 	while (i >= nr) {
2402 		item = btrfs_item_nr(left, i);
2403 
2404 		if (!empty && push_items > 0) {
2405 			if (path->slots[0] > i)
2406 				break;
2407 			if (path->slots[0] == i) {
2408 				int space = btrfs_leaf_free_space(root, left);
2409 				if (space + push_space * 2 > free_space)
2410 					break;
2411 			}
2412 		}
2413 
2414 		if (path->slots[0] == i)
2415 			push_space += data_size;
2416 
2417 		if (!left->map_token) {
2418 			map_extent_buffer(left, (unsigned long)item,
2419 					sizeof(struct btrfs_item),
2420 					&left->map_token, &left->kaddr,
2421 					&left->map_start, &left->map_len,
2422 					KM_USER1);
2423 		}
2424 
2425 		this_item_size = btrfs_item_size(left, item);
2426 		if (this_item_size + sizeof(*item) + push_space > free_space)
2427 			break;
2428 
2429 		push_items++;
2430 		push_space += this_item_size + sizeof(*item);
2431 		if (i == 0)
2432 			break;
2433 		i--;
2434 	}
2435 	if (left->map_token) {
2436 		unmap_extent_buffer(left, left->map_token, KM_USER1);
2437 		left->map_token = NULL;
2438 	}
2439 
2440 	if (push_items == 0)
2441 		goto out_unlock;
2442 
2443 	if (!empty && push_items == left_nritems)
2444 		WARN_ON(1);
2445 
2446 	/* push left to right */
2447 	right_nritems = btrfs_header_nritems(right);
2448 
2449 	push_space = btrfs_item_end_nr(left, left_nritems - push_items);
2450 	push_space -= leaf_data_end(root, left);
2451 
2452 	/* make room in the right data area */
2453 	data_end = leaf_data_end(root, right);
2454 	memmove_extent_buffer(right,
2455 			      btrfs_leaf_data(right) + data_end - push_space,
2456 			      btrfs_leaf_data(right) + data_end,
2457 			      BTRFS_LEAF_DATA_SIZE(root) - data_end);
2458 
2459 	/* copy from the left data area */
2460 	copy_extent_buffer(right, left, btrfs_leaf_data(right) +
2461 		     BTRFS_LEAF_DATA_SIZE(root) - push_space,
2462 		     btrfs_leaf_data(left) + leaf_data_end(root, left),
2463 		     push_space);
2464 
2465 	memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
2466 			      btrfs_item_nr_offset(0),
2467 			      right_nritems * sizeof(struct btrfs_item));
2468 
2469 	/* copy the items from left to right */
2470 	copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
2471 		   btrfs_item_nr_offset(left_nritems - push_items),
2472 		   push_items * sizeof(struct btrfs_item));
2473 
2474 	/* update the item pointers */
2475 	right_nritems += push_items;
2476 	btrfs_set_header_nritems(right, right_nritems);
2477 	push_space = BTRFS_LEAF_DATA_SIZE(root);
2478 	for (i = 0; i < right_nritems; i++) {
2479 		item = btrfs_item_nr(right, i);
2480 		if (!right->map_token) {
2481 			map_extent_buffer(right, (unsigned long)item,
2482 					sizeof(struct btrfs_item),
2483 					&right->map_token, &right->kaddr,
2484 					&right->map_start, &right->map_len,
2485 					KM_USER1);
2486 		}
2487 		push_space -= btrfs_item_size(right, item);
2488 		btrfs_set_item_offset(right, item, push_space);
2489 	}
2490 
2491 	if (right->map_token) {
2492 		unmap_extent_buffer(right, right->map_token, KM_USER1);
2493 		right->map_token = NULL;
2494 	}
2495 	left_nritems -= push_items;
2496 	btrfs_set_header_nritems(left, left_nritems);
2497 
2498 	if (left_nritems)
2499 		btrfs_mark_buffer_dirty(left);
2500 	btrfs_mark_buffer_dirty(right);
2501 
2502 	ret = btrfs_update_ref(trans, root, left, right, 0, push_items);
2503 	BUG_ON(ret);
2504 
2505 	btrfs_item_key(right, &disk_key, 0);
2506 	btrfs_set_node_key(upper, &disk_key, slot + 1);
2507 	btrfs_mark_buffer_dirty(upper);
2508 
2509 	/* then fixup the leaf pointer in the path */
2510 	if (path->slots[0] >= left_nritems) {
2511 		path->slots[0] -= left_nritems;
2512 		if (btrfs_header_nritems(path->nodes[0]) == 0)
2513 			clean_tree_block(trans, root, path->nodes[0]);
2514 		btrfs_tree_unlock(path->nodes[0]);
2515 		free_extent_buffer(path->nodes[0]);
2516 		path->nodes[0] = right;
2517 		path->slots[1] += 1;
2518 	} else {
2519 		btrfs_tree_unlock(right);
2520 		free_extent_buffer(right);
2521 	}
2522 	return 0;
2523 
2524 out_unlock:
2525 	btrfs_tree_unlock(right);
2526 	free_extent_buffer(right);
2527 	return 1;
2528 }
2529 
2530 /*
2531  * push some data in the path leaf to the left, trying to free up at
2532  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
2533  */
push_leaf_left(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,int data_size,int empty)2534 static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
2535 			  *root, struct btrfs_path *path, int data_size,
2536 			  int empty)
2537 {
2538 	struct btrfs_disk_key disk_key;
2539 	struct extent_buffer *right = path->nodes[0];
2540 	struct extent_buffer *left;
2541 	int slot;
2542 	int i;
2543 	int free_space;
2544 	int push_space = 0;
2545 	int push_items = 0;
2546 	struct btrfs_item *item;
2547 	u32 old_left_nritems;
2548 	u32 right_nritems;
2549 	u32 nr;
2550 	int ret = 0;
2551 	int wret;
2552 	u32 this_item_size;
2553 	u32 old_left_item_size;
2554 
2555 	slot = path->slots[1];
2556 	if (slot == 0)
2557 		return 1;
2558 	if (!path->nodes[1])
2559 		return 1;
2560 
2561 	right_nritems = btrfs_header_nritems(right);
2562 	if (right_nritems == 0)
2563 		return 1;
2564 
2565 	btrfs_assert_tree_locked(path->nodes[1]);
2566 
2567 	left = read_node_slot(root, path->nodes[1], slot - 1);
2568 	btrfs_tree_lock(left);
2569 	btrfs_set_lock_blocking(left);
2570 
2571 	free_space = btrfs_leaf_free_space(root, left);
2572 	if (free_space < data_size) {
2573 		ret = 1;
2574 		goto out;
2575 	}
2576 
2577 	/* cow and double check */
2578 	ret = btrfs_cow_block(trans, root, left,
2579 			      path->nodes[1], slot - 1, &left, 0);
2580 	if (ret) {
2581 		/* we hit -ENOSPC, but it isn't fatal here */
2582 		ret = 1;
2583 		goto out;
2584 	}
2585 
2586 	free_space = btrfs_leaf_free_space(root, left);
2587 	if (free_space < data_size) {
2588 		ret = 1;
2589 		goto out;
2590 	}
2591 
2592 	if (empty)
2593 		nr = right_nritems;
2594 	else
2595 		nr = right_nritems - 1;
2596 
2597 	for (i = 0; i < nr; i++) {
2598 		item = btrfs_item_nr(right, i);
2599 		if (!right->map_token) {
2600 			map_extent_buffer(right, (unsigned long)item,
2601 					sizeof(struct btrfs_item),
2602 					&right->map_token, &right->kaddr,
2603 					&right->map_start, &right->map_len,
2604 					KM_USER1);
2605 		}
2606 
2607 		if (!empty && push_items > 0) {
2608 			if (path->slots[0] < i)
2609 				break;
2610 			if (path->slots[0] == i) {
2611 				int space = btrfs_leaf_free_space(root, right);
2612 				if (space + push_space * 2 > free_space)
2613 					break;
2614 			}
2615 		}
2616 
2617 		if (path->slots[0] == i)
2618 			push_space += data_size;
2619 
2620 		this_item_size = btrfs_item_size(right, item);
2621 		if (this_item_size + sizeof(*item) + push_space > free_space)
2622 			break;
2623 
2624 		push_items++;
2625 		push_space += this_item_size + sizeof(*item);
2626 	}
2627 
2628 	if (right->map_token) {
2629 		unmap_extent_buffer(right, right->map_token, KM_USER1);
2630 		right->map_token = NULL;
2631 	}
2632 
2633 	if (push_items == 0) {
2634 		ret = 1;
2635 		goto out;
2636 	}
2637 	if (!empty && push_items == btrfs_header_nritems(right))
2638 		WARN_ON(1);
2639 
2640 	/* push data from right to left */
2641 	copy_extent_buffer(left, right,
2642 			   btrfs_item_nr_offset(btrfs_header_nritems(left)),
2643 			   btrfs_item_nr_offset(0),
2644 			   push_items * sizeof(struct btrfs_item));
2645 
2646 	push_space = BTRFS_LEAF_DATA_SIZE(root) -
2647 		     btrfs_item_offset_nr(right, push_items - 1);
2648 
2649 	copy_extent_buffer(left, right, btrfs_leaf_data(left) +
2650 		     leaf_data_end(root, left) - push_space,
2651 		     btrfs_leaf_data(right) +
2652 		     btrfs_item_offset_nr(right, push_items - 1),
2653 		     push_space);
2654 	old_left_nritems = btrfs_header_nritems(left);
2655 	BUG_ON(old_left_nritems <= 0);
2656 
2657 	old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
2658 	for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
2659 		u32 ioff;
2660 
2661 		item = btrfs_item_nr(left, i);
2662 		if (!left->map_token) {
2663 			map_extent_buffer(left, (unsigned long)item,
2664 					sizeof(struct btrfs_item),
2665 					&left->map_token, &left->kaddr,
2666 					&left->map_start, &left->map_len,
2667 					KM_USER1);
2668 		}
2669 
2670 		ioff = btrfs_item_offset(left, item);
2671 		btrfs_set_item_offset(left, item,
2672 		      ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size));
2673 	}
2674 	btrfs_set_header_nritems(left, old_left_nritems + push_items);
2675 	if (left->map_token) {
2676 		unmap_extent_buffer(left, left->map_token, KM_USER1);
2677 		left->map_token = NULL;
2678 	}
2679 
2680 	/* fixup right node */
2681 	if (push_items > right_nritems) {
2682 		printk(KERN_CRIT "push items %d nr %u\n", push_items,
2683 		       right_nritems);
2684 		WARN_ON(1);
2685 	}
2686 
2687 	if (push_items < right_nritems) {
2688 		push_space = btrfs_item_offset_nr(right, push_items - 1) -
2689 						  leaf_data_end(root, right);
2690 		memmove_extent_buffer(right, btrfs_leaf_data(right) +
2691 				      BTRFS_LEAF_DATA_SIZE(root) - push_space,
2692 				      btrfs_leaf_data(right) +
2693 				      leaf_data_end(root, right), push_space);
2694 
2695 		memmove_extent_buffer(right, btrfs_item_nr_offset(0),
2696 			      btrfs_item_nr_offset(push_items),
2697 			     (btrfs_header_nritems(right) - push_items) *
2698 			     sizeof(struct btrfs_item));
2699 	}
2700 	right_nritems -= push_items;
2701 	btrfs_set_header_nritems(right, right_nritems);
2702 	push_space = BTRFS_LEAF_DATA_SIZE(root);
2703 	for (i = 0; i < right_nritems; i++) {
2704 		item = btrfs_item_nr(right, i);
2705 
2706 		if (!right->map_token) {
2707 			map_extent_buffer(right, (unsigned long)item,
2708 					sizeof(struct btrfs_item),
2709 					&right->map_token, &right->kaddr,
2710 					&right->map_start, &right->map_len,
2711 					KM_USER1);
2712 		}
2713 
2714 		push_space = push_space - btrfs_item_size(right, item);
2715 		btrfs_set_item_offset(right, item, push_space);
2716 	}
2717 	if (right->map_token) {
2718 		unmap_extent_buffer(right, right->map_token, KM_USER1);
2719 		right->map_token = NULL;
2720 	}
2721 
2722 	btrfs_mark_buffer_dirty(left);
2723 	if (right_nritems)
2724 		btrfs_mark_buffer_dirty(right);
2725 
2726 	ret = btrfs_update_ref(trans, root, right, left,
2727 			       old_left_nritems, push_items);
2728 	BUG_ON(ret);
2729 
2730 	btrfs_item_key(right, &disk_key, 0);
2731 	wret = fixup_low_keys(trans, root, path, &disk_key, 1);
2732 	if (wret)
2733 		ret = wret;
2734 
2735 	/* then fixup the leaf pointer in the path */
2736 	if (path->slots[0] < push_items) {
2737 		path->slots[0] += old_left_nritems;
2738 		if (btrfs_header_nritems(path->nodes[0]) == 0)
2739 			clean_tree_block(trans, root, path->nodes[0]);
2740 		btrfs_tree_unlock(path->nodes[0]);
2741 		free_extent_buffer(path->nodes[0]);
2742 		path->nodes[0] = left;
2743 		path->slots[1] -= 1;
2744 	} else {
2745 		btrfs_tree_unlock(left);
2746 		free_extent_buffer(left);
2747 		path->slots[0] -= push_items;
2748 	}
2749 	BUG_ON(path->slots[0] < 0);
2750 	return ret;
2751 out:
2752 	btrfs_tree_unlock(left);
2753 	free_extent_buffer(left);
2754 	return ret;
2755 }
2756 
2757 /*
2758  * split the path's leaf in two, making sure there is at least data_size
2759  * available for the resulting leaf level of the path.
2760  *
2761  * returns 0 if all went well and < 0 on failure.
2762  */
split_leaf(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_key * ins_key,struct btrfs_path * path,int data_size,int extend)2763 static noinline int split_leaf(struct btrfs_trans_handle *trans,
2764 			       struct btrfs_root *root,
2765 			       struct btrfs_key *ins_key,
2766 			       struct btrfs_path *path, int data_size,
2767 			       int extend)
2768 {
2769 	struct extent_buffer *l;
2770 	u32 nritems;
2771 	int mid;
2772 	int slot;
2773 	struct extent_buffer *right;
2774 	int data_copy_size;
2775 	int rt_data_off;
2776 	int i;
2777 	int ret = 0;
2778 	int wret;
2779 	int double_split;
2780 	int num_doubles = 0;
2781 	struct btrfs_disk_key disk_key;
2782 
2783 	/* first try to make some room by pushing left and right */
2784 	if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) {
2785 		wret = push_leaf_right(trans, root, path, data_size, 0);
2786 		if (wret < 0)
2787 			return wret;
2788 		if (wret) {
2789 			wret = push_leaf_left(trans, root, path, data_size, 0);
2790 			if (wret < 0)
2791 				return wret;
2792 		}
2793 		l = path->nodes[0];
2794 
2795 		/* did the pushes work? */
2796 		if (btrfs_leaf_free_space(root, l) >= data_size)
2797 			return 0;
2798 	}
2799 
2800 	if (!path->nodes[1]) {
2801 		ret = insert_new_root(trans, root, path, 1);
2802 		if (ret)
2803 			return ret;
2804 	}
2805 again:
2806 	double_split = 0;
2807 	l = path->nodes[0];
2808 	slot = path->slots[0];
2809 	nritems = btrfs_header_nritems(l);
2810 	mid = (nritems + 1) / 2;
2811 
2812 	right = btrfs_alloc_free_block(trans, root, root->leafsize,
2813 					path->nodes[1]->start,
2814 					root->root_key.objectid,
2815 					trans->transid, 0, l->start, 0);
2816 	if (IS_ERR(right)) {
2817 		BUG_ON(1);
2818 		return PTR_ERR(right);
2819 	}
2820 
2821 	memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
2822 	btrfs_set_header_bytenr(right, right->start);
2823 	btrfs_set_header_generation(right, trans->transid);
2824 	btrfs_set_header_owner(right, root->root_key.objectid);
2825 	btrfs_set_header_level(right, 0);
2826 	write_extent_buffer(right, root->fs_info->fsid,
2827 			    (unsigned long)btrfs_header_fsid(right),
2828 			    BTRFS_FSID_SIZE);
2829 
2830 	write_extent_buffer(right, root->fs_info->chunk_tree_uuid,
2831 			    (unsigned long)btrfs_header_chunk_tree_uuid(right),
2832 			    BTRFS_UUID_SIZE);
2833 	if (mid <= slot) {
2834 		if (nritems == 1 ||
2835 		    leaf_space_used(l, mid, nritems - mid) + data_size >
2836 			BTRFS_LEAF_DATA_SIZE(root)) {
2837 			if (slot >= nritems) {
2838 				btrfs_cpu_key_to_disk(&disk_key, ins_key);
2839 				btrfs_set_header_nritems(right, 0);
2840 				wret = insert_ptr(trans, root, path,
2841 						  &disk_key, right->start,
2842 						  path->slots[1] + 1, 1);
2843 				if (wret)
2844 					ret = wret;
2845 
2846 				btrfs_tree_unlock(path->nodes[0]);
2847 				free_extent_buffer(path->nodes[0]);
2848 				path->nodes[0] = right;
2849 				path->slots[0] = 0;
2850 				path->slots[1] += 1;
2851 				btrfs_mark_buffer_dirty(right);
2852 				return ret;
2853 			}
2854 			mid = slot;
2855 			if (mid != nritems &&
2856 			    leaf_space_used(l, mid, nritems - mid) +
2857 			    data_size > BTRFS_LEAF_DATA_SIZE(root)) {
2858 				double_split = 1;
2859 			}
2860 		}
2861 	} else {
2862 		if (leaf_space_used(l, 0, mid) + data_size >
2863 			BTRFS_LEAF_DATA_SIZE(root)) {
2864 			if (!extend && data_size && slot == 0) {
2865 				btrfs_cpu_key_to_disk(&disk_key, ins_key);
2866 				btrfs_set_header_nritems(right, 0);
2867 				wret = insert_ptr(trans, root, path,
2868 						  &disk_key,
2869 						  right->start,
2870 						  path->slots[1], 1);
2871 				if (wret)
2872 					ret = wret;
2873 				btrfs_tree_unlock(path->nodes[0]);
2874 				free_extent_buffer(path->nodes[0]);
2875 				path->nodes[0] = right;
2876 				path->slots[0] = 0;
2877 				if (path->slots[1] == 0) {
2878 					wret = fixup_low_keys(trans, root,
2879 						      path, &disk_key, 1);
2880 					if (wret)
2881 						ret = wret;
2882 				}
2883 				btrfs_mark_buffer_dirty(right);
2884 				return ret;
2885 			} else if ((extend || !data_size) && slot == 0) {
2886 				mid = 1;
2887 			} else {
2888 				mid = slot;
2889 				if (mid != nritems &&
2890 				    leaf_space_used(l, mid, nritems - mid) +
2891 				    data_size > BTRFS_LEAF_DATA_SIZE(root)) {
2892 					double_split = 1;
2893 				}
2894 			}
2895 		}
2896 	}
2897 	nritems = nritems - mid;
2898 	btrfs_set_header_nritems(right, nritems);
2899 	data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
2900 
2901 	copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
2902 			   btrfs_item_nr_offset(mid),
2903 			   nritems * sizeof(struct btrfs_item));
2904 
2905 	copy_extent_buffer(right, l,
2906 		     btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
2907 		     data_copy_size, btrfs_leaf_data(l) +
2908 		     leaf_data_end(root, l), data_copy_size);
2909 
2910 	rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
2911 		      btrfs_item_end_nr(l, mid);
2912 
2913 	for (i = 0; i < nritems; i++) {
2914 		struct btrfs_item *item = btrfs_item_nr(right, i);
2915 		u32 ioff;
2916 
2917 		if (!right->map_token) {
2918 			map_extent_buffer(right, (unsigned long)item,
2919 					sizeof(struct btrfs_item),
2920 					&right->map_token, &right->kaddr,
2921 					&right->map_start, &right->map_len,
2922 					KM_USER1);
2923 		}
2924 
2925 		ioff = btrfs_item_offset(right, item);
2926 		btrfs_set_item_offset(right, item, ioff + rt_data_off);
2927 	}
2928 
2929 	if (right->map_token) {
2930 		unmap_extent_buffer(right, right->map_token, KM_USER1);
2931 		right->map_token = NULL;
2932 	}
2933 
2934 	btrfs_set_header_nritems(l, mid);
2935 	ret = 0;
2936 	btrfs_item_key(right, &disk_key, 0);
2937 	wret = insert_ptr(trans, root, path, &disk_key, right->start,
2938 			  path->slots[1] + 1, 1);
2939 	if (wret)
2940 		ret = wret;
2941 
2942 	btrfs_mark_buffer_dirty(right);
2943 	btrfs_mark_buffer_dirty(l);
2944 	BUG_ON(path->slots[0] != slot);
2945 
2946 	ret = btrfs_update_ref(trans, root, l, right, 0, nritems);
2947 	BUG_ON(ret);
2948 
2949 	if (mid <= slot) {
2950 		btrfs_tree_unlock(path->nodes[0]);
2951 		free_extent_buffer(path->nodes[0]);
2952 		path->nodes[0] = right;
2953 		path->slots[0] -= mid;
2954 		path->slots[1] += 1;
2955 	} else {
2956 		btrfs_tree_unlock(right);
2957 		free_extent_buffer(right);
2958 	}
2959 
2960 	BUG_ON(path->slots[0] < 0);
2961 
2962 	if (double_split) {
2963 		BUG_ON(num_doubles != 0);
2964 		num_doubles++;
2965 		goto again;
2966 	}
2967 	return ret;
2968 }
2969 
2970 /*
2971  * This function splits a single item into two items,
2972  * giving 'new_key' to the new item and splitting the
2973  * old one at split_offset (from the start of the item).
2974  *
2975  * The path may be released by this operation.  After
2976  * the split, the path is pointing to the old item.  The
2977  * new item is going to be in the same node as the old one.
2978  *
2979  * Note, the item being split must be smaller enough to live alone on
2980  * a tree block with room for one extra struct btrfs_item
2981  *
2982  * This allows us to split the item in place, keeping a lock on the
2983  * leaf the entire time.
2984  */
btrfs_split_item(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,struct btrfs_key * new_key,unsigned long split_offset)2985 int btrfs_split_item(struct btrfs_trans_handle *trans,
2986 		     struct btrfs_root *root,
2987 		     struct btrfs_path *path,
2988 		     struct btrfs_key *new_key,
2989 		     unsigned long split_offset)
2990 {
2991 	u32 item_size;
2992 	struct extent_buffer *leaf;
2993 	struct btrfs_key orig_key;
2994 	struct btrfs_item *item;
2995 	struct btrfs_item *new_item;
2996 	int ret = 0;
2997 	int slot;
2998 	u32 nritems;
2999 	u32 orig_offset;
3000 	struct btrfs_disk_key disk_key;
3001 	char *buf;
3002 
3003 	leaf = path->nodes[0];
3004 	btrfs_item_key_to_cpu(leaf, &orig_key, path->slots[0]);
3005 	if (btrfs_leaf_free_space(root, leaf) >= sizeof(struct btrfs_item))
3006 		goto split;
3007 
3008 	item_size = btrfs_item_size_nr(leaf, path->slots[0]);
3009 	btrfs_release_path(root, path);
3010 
3011 	path->search_for_split = 1;
3012 	path->keep_locks = 1;
3013 
3014 	ret = btrfs_search_slot(trans, root, &orig_key, path, 0, 1);
3015 	path->search_for_split = 0;
3016 
3017 	/* if our item isn't there or got smaller, return now */
3018 	if (ret != 0 || item_size != btrfs_item_size_nr(path->nodes[0],
3019 							path->slots[0])) {
3020 		path->keep_locks = 0;
3021 		return -EAGAIN;
3022 	}
3023 
3024 	ret = split_leaf(trans, root, &orig_key, path,
3025 			 sizeof(struct btrfs_item), 1);
3026 	path->keep_locks = 0;
3027 	BUG_ON(ret);
3028 
3029 	/*
3030 	 * make sure any changes to the path from split_leaf leave it
3031 	 * in a blocking state
3032 	 */
3033 	btrfs_set_path_blocking(path);
3034 
3035 	leaf = path->nodes[0];
3036 	BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));
3037 
3038 split:
3039 	item = btrfs_item_nr(leaf, path->slots[0]);
3040 	orig_offset = btrfs_item_offset(leaf, item);
3041 	item_size = btrfs_item_size(leaf, item);
3042 
3043 
3044 	buf = kmalloc(item_size, GFP_NOFS);
3045 	read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
3046 			    path->slots[0]), item_size);
3047 	slot = path->slots[0] + 1;
3048 	leaf = path->nodes[0];
3049 
3050 	nritems = btrfs_header_nritems(leaf);
3051 
3052 	if (slot != nritems) {
3053 		/* shift the items */
3054 		memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
3055 			      btrfs_item_nr_offset(slot),
3056 			      (nritems - slot) * sizeof(struct btrfs_item));
3057 
3058 	}
3059 
3060 	btrfs_cpu_key_to_disk(&disk_key, new_key);
3061 	btrfs_set_item_key(leaf, &disk_key, slot);
3062 
3063 	new_item = btrfs_item_nr(leaf, slot);
3064 
3065 	btrfs_set_item_offset(leaf, new_item, orig_offset);
3066 	btrfs_set_item_size(leaf, new_item, item_size - split_offset);
3067 
3068 	btrfs_set_item_offset(leaf, item,
3069 			      orig_offset + item_size - split_offset);
3070 	btrfs_set_item_size(leaf, item, split_offset);
3071 
3072 	btrfs_set_header_nritems(leaf, nritems + 1);
3073 
3074 	/* write the data for the start of the original item */
3075 	write_extent_buffer(leaf, buf,
3076 			    btrfs_item_ptr_offset(leaf, path->slots[0]),
3077 			    split_offset);
3078 
3079 	/* write the data for the new item */
3080 	write_extent_buffer(leaf, buf + split_offset,
3081 			    btrfs_item_ptr_offset(leaf, slot),
3082 			    item_size - split_offset);
3083 	btrfs_mark_buffer_dirty(leaf);
3084 
3085 	ret = 0;
3086 	if (btrfs_leaf_free_space(root, leaf) < 0) {
3087 		btrfs_print_leaf(root, leaf);
3088 		BUG();
3089 	}
3090 	kfree(buf);
3091 	return ret;
3092 }
3093 
3094 /*
3095  * make the item pointed to by the path smaller.  new_size indicates
3096  * how small to make it, and from_end tells us if we just chop bytes
3097  * off the end of the item or if we shift the item to chop bytes off
3098  * the front.
3099  */
btrfs_truncate_item(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,u32 new_size,int from_end)3100 int btrfs_truncate_item(struct btrfs_trans_handle *trans,
3101 			struct btrfs_root *root,
3102 			struct btrfs_path *path,
3103 			u32 new_size, int from_end)
3104 {
3105 	int ret = 0;
3106 	int slot;
3107 	int slot_orig;
3108 	struct extent_buffer *leaf;
3109 	struct btrfs_item *item;
3110 	u32 nritems;
3111 	unsigned int data_end;
3112 	unsigned int old_data_start;
3113 	unsigned int old_size;
3114 	unsigned int size_diff;
3115 	int i;
3116 
3117 	slot_orig = path->slots[0];
3118 	leaf = path->nodes[0];
3119 	slot = path->slots[0];
3120 
3121 	old_size = btrfs_item_size_nr(leaf, slot);
3122 	if (old_size == new_size)
3123 		return 0;
3124 
3125 	nritems = btrfs_header_nritems(leaf);
3126 	data_end = leaf_data_end(root, leaf);
3127 
3128 	old_data_start = btrfs_item_offset_nr(leaf, slot);
3129 
3130 	size_diff = old_size - new_size;
3131 
3132 	BUG_ON(slot < 0);
3133 	BUG_ON(slot >= nritems);
3134 
3135 	/*
3136 	 * item0..itemN ... dataN.offset..dataN.size .. data0.size
3137 	 */
3138 	/* first correct the data pointers */
3139 	for (i = slot; i < nritems; i++) {
3140 		u32 ioff;
3141 		item = btrfs_item_nr(leaf, i);
3142 
3143 		if (!leaf->map_token) {
3144 			map_extent_buffer(leaf, (unsigned long)item,
3145 					sizeof(struct btrfs_item),
3146 					&leaf->map_token, &leaf->kaddr,
3147 					&leaf->map_start, &leaf->map_len,
3148 					KM_USER1);
3149 		}
3150 
3151 		ioff = btrfs_item_offset(leaf, item);
3152 		btrfs_set_item_offset(leaf, item, ioff + size_diff);
3153 	}
3154 
3155 	if (leaf->map_token) {
3156 		unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
3157 		leaf->map_token = NULL;
3158 	}
3159 
3160 	/* shift the data */
3161 	if (from_end) {
3162 		memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3163 			      data_end + size_diff, btrfs_leaf_data(leaf) +
3164 			      data_end, old_data_start + new_size - data_end);
3165 	} else {
3166 		struct btrfs_disk_key disk_key;
3167 		u64 offset;
3168 
3169 		btrfs_item_key(leaf, &disk_key, slot);
3170 
3171 		if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
3172 			unsigned long ptr;
3173 			struct btrfs_file_extent_item *fi;
3174 
3175 			fi = btrfs_item_ptr(leaf, slot,
3176 					    struct btrfs_file_extent_item);
3177 			fi = (struct btrfs_file_extent_item *)(
3178 			     (unsigned long)fi - size_diff);
3179 
3180 			if (btrfs_file_extent_type(leaf, fi) ==
3181 			    BTRFS_FILE_EXTENT_INLINE) {
3182 				ptr = btrfs_item_ptr_offset(leaf, slot);
3183 				memmove_extent_buffer(leaf, ptr,
3184 				      (unsigned long)fi,
3185 				      offsetof(struct btrfs_file_extent_item,
3186 						 disk_bytenr));
3187 			}
3188 		}
3189 
3190 		memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3191 			      data_end + size_diff, btrfs_leaf_data(leaf) +
3192 			      data_end, old_data_start - data_end);
3193 
3194 		offset = btrfs_disk_key_offset(&disk_key);
3195 		btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
3196 		btrfs_set_item_key(leaf, &disk_key, slot);
3197 		if (slot == 0)
3198 			fixup_low_keys(trans, root, path, &disk_key, 1);
3199 	}
3200 
3201 	item = btrfs_item_nr(leaf, slot);
3202 	btrfs_set_item_size(leaf, item, new_size);
3203 	btrfs_mark_buffer_dirty(leaf);
3204 
3205 	ret = 0;
3206 	if (btrfs_leaf_free_space(root, leaf) < 0) {
3207 		btrfs_print_leaf(root, leaf);
3208 		BUG();
3209 	}
3210 	return ret;
3211 }
3212 
3213 /*
3214  * make the item pointed to by the path bigger, data_size is the new size.
3215  */
btrfs_extend_item(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,u32 data_size)3216 int btrfs_extend_item(struct btrfs_trans_handle *trans,
3217 		      struct btrfs_root *root, struct btrfs_path *path,
3218 		      u32 data_size)
3219 {
3220 	int ret = 0;
3221 	int slot;
3222 	int slot_orig;
3223 	struct extent_buffer *leaf;
3224 	struct btrfs_item *item;
3225 	u32 nritems;
3226 	unsigned int data_end;
3227 	unsigned int old_data;
3228 	unsigned int old_size;
3229 	int i;
3230 
3231 	slot_orig = path->slots[0];
3232 	leaf = path->nodes[0];
3233 
3234 	nritems = btrfs_header_nritems(leaf);
3235 	data_end = leaf_data_end(root, leaf);
3236 
3237 	if (btrfs_leaf_free_space(root, leaf) < data_size) {
3238 		btrfs_print_leaf(root, leaf);
3239 		BUG();
3240 	}
3241 	slot = path->slots[0];
3242 	old_data = btrfs_item_end_nr(leaf, slot);
3243 
3244 	BUG_ON(slot < 0);
3245 	if (slot >= nritems) {
3246 		btrfs_print_leaf(root, leaf);
3247 		printk(KERN_CRIT "slot %d too large, nritems %d\n",
3248 		       slot, nritems);
3249 		BUG_ON(1);
3250 	}
3251 
3252 	/*
3253 	 * item0..itemN ... dataN.offset..dataN.size .. data0.size
3254 	 */
3255 	/* first correct the data pointers */
3256 	for (i = slot; i < nritems; i++) {
3257 		u32 ioff;
3258 		item = btrfs_item_nr(leaf, i);
3259 
3260 		if (!leaf->map_token) {
3261 			map_extent_buffer(leaf, (unsigned long)item,
3262 					sizeof(struct btrfs_item),
3263 					&leaf->map_token, &leaf->kaddr,
3264 					&leaf->map_start, &leaf->map_len,
3265 					KM_USER1);
3266 		}
3267 		ioff = btrfs_item_offset(leaf, item);
3268 		btrfs_set_item_offset(leaf, item, ioff - data_size);
3269 	}
3270 
3271 	if (leaf->map_token) {
3272 		unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
3273 		leaf->map_token = NULL;
3274 	}
3275 
3276 	/* shift the data */
3277 	memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3278 		      data_end - data_size, btrfs_leaf_data(leaf) +
3279 		      data_end, old_data - data_end);
3280 
3281 	data_end = old_data;
3282 	old_size = btrfs_item_size_nr(leaf, slot);
3283 	item = btrfs_item_nr(leaf, slot);
3284 	btrfs_set_item_size(leaf, item, old_size + data_size);
3285 	btrfs_mark_buffer_dirty(leaf);
3286 
3287 	ret = 0;
3288 	if (btrfs_leaf_free_space(root, leaf) < 0) {
3289 		btrfs_print_leaf(root, leaf);
3290 		BUG();
3291 	}
3292 	return ret;
3293 }
3294 
3295 /*
3296  * Given a key and some data, insert items into the tree.
3297  * This does all the path init required, making room in the tree if needed.
3298  * Returns the number of keys that were inserted.
3299  */
btrfs_insert_some_items(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,struct btrfs_key * cpu_key,u32 * data_size,int nr)3300 int btrfs_insert_some_items(struct btrfs_trans_handle *trans,
3301 			    struct btrfs_root *root,
3302 			    struct btrfs_path *path,
3303 			    struct btrfs_key *cpu_key, u32 *data_size,
3304 			    int nr)
3305 {
3306 	struct extent_buffer *leaf;
3307 	struct btrfs_item *item;
3308 	int ret = 0;
3309 	int slot;
3310 	int i;
3311 	u32 nritems;
3312 	u32 total_data = 0;
3313 	u32 total_size = 0;
3314 	unsigned int data_end;
3315 	struct btrfs_disk_key disk_key;
3316 	struct btrfs_key found_key;
3317 
3318 	for (i = 0; i < nr; i++) {
3319 		if (total_size + data_size[i] + sizeof(struct btrfs_item) >
3320 		    BTRFS_LEAF_DATA_SIZE(root)) {
3321 			break;
3322 			nr = i;
3323 		}
3324 		total_data += data_size[i];
3325 		total_size += data_size[i] + sizeof(struct btrfs_item);
3326 	}
3327 	BUG_ON(nr == 0);
3328 
3329 	ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
3330 	if (ret == 0)
3331 		return -EEXIST;
3332 	if (ret < 0)
3333 		goto out;
3334 
3335 	leaf = path->nodes[0];
3336 
3337 	nritems = btrfs_header_nritems(leaf);
3338 	data_end = leaf_data_end(root, leaf);
3339 
3340 	if (btrfs_leaf_free_space(root, leaf) < total_size) {
3341 		for (i = nr; i >= 0; i--) {
3342 			total_data -= data_size[i];
3343 			total_size -= data_size[i] + sizeof(struct btrfs_item);
3344 			if (total_size < btrfs_leaf_free_space(root, leaf))
3345 				break;
3346 		}
3347 		nr = i;
3348 	}
3349 
3350 	slot = path->slots[0];
3351 	BUG_ON(slot < 0);
3352 
3353 	if (slot != nritems) {
3354 		unsigned int old_data = btrfs_item_end_nr(leaf, slot);
3355 
3356 		item = btrfs_item_nr(leaf, slot);
3357 		btrfs_item_key_to_cpu(leaf, &found_key, slot);
3358 
3359 		/* figure out how many keys we can insert in here */
3360 		total_data = data_size[0];
3361 		for (i = 1; i < nr; i++) {
3362 			if (comp_cpu_keys(&found_key, cpu_key + i) <= 0)
3363 				break;
3364 			total_data += data_size[i];
3365 		}
3366 		nr = i;
3367 
3368 		if (old_data < data_end) {
3369 			btrfs_print_leaf(root, leaf);
3370 			printk(KERN_CRIT "slot %d old_data %d data_end %d\n",
3371 			       slot, old_data, data_end);
3372 			BUG_ON(1);
3373 		}
3374 		/*
3375 		 * item0..itemN ... dataN.offset..dataN.size .. data0.size
3376 		 */
3377 		/* first correct the data pointers */
3378 		WARN_ON(leaf->map_token);
3379 		for (i = slot; i < nritems; i++) {
3380 			u32 ioff;
3381 
3382 			item = btrfs_item_nr(leaf, i);
3383 			if (!leaf->map_token) {
3384 				map_extent_buffer(leaf, (unsigned long)item,
3385 					sizeof(struct btrfs_item),
3386 					&leaf->map_token, &leaf->kaddr,
3387 					&leaf->map_start, &leaf->map_len,
3388 					KM_USER1);
3389 			}
3390 
3391 			ioff = btrfs_item_offset(leaf, item);
3392 			btrfs_set_item_offset(leaf, item, ioff - total_data);
3393 		}
3394 		if (leaf->map_token) {
3395 			unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
3396 			leaf->map_token = NULL;
3397 		}
3398 
3399 		/* shift the items */
3400 		memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
3401 			      btrfs_item_nr_offset(slot),
3402 			      (nritems - slot) * sizeof(struct btrfs_item));
3403 
3404 		/* shift the data */
3405 		memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3406 			      data_end - total_data, btrfs_leaf_data(leaf) +
3407 			      data_end, old_data - data_end);
3408 		data_end = old_data;
3409 	} else {
3410 		/*
3411 		 * this sucks but it has to be done, if we are inserting at
3412 		 * the end of the leaf only insert 1 of the items, since we
3413 		 * have no way of knowing whats on the next leaf and we'd have
3414 		 * to drop our current locks to figure it out
3415 		 */
3416 		nr = 1;
3417 	}
3418 
3419 	/* setup the item for the new data */
3420 	for (i = 0; i < nr; i++) {
3421 		btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
3422 		btrfs_set_item_key(leaf, &disk_key, slot + i);
3423 		item = btrfs_item_nr(leaf, slot + i);
3424 		btrfs_set_item_offset(leaf, item, data_end - data_size[i]);
3425 		data_end -= data_size[i];
3426 		btrfs_set_item_size(leaf, item, data_size[i]);
3427 	}
3428 	btrfs_set_header_nritems(leaf, nritems + nr);
3429 	btrfs_mark_buffer_dirty(leaf);
3430 
3431 	ret = 0;
3432 	if (slot == 0) {
3433 		btrfs_cpu_key_to_disk(&disk_key, cpu_key);
3434 		ret = fixup_low_keys(trans, root, path, &disk_key, 1);
3435 	}
3436 
3437 	if (btrfs_leaf_free_space(root, leaf) < 0) {
3438 		btrfs_print_leaf(root, leaf);
3439 		BUG();
3440 	}
3441 out:
3442 	if (!ret)
3443 		ret = nr;
3444 	return ret;
3445 }
3446 
3447 /*
3448  * Given a key and some data, insert items into the tree.
3449  * This does all the path init required, making room in the tree if needed.
3450  */
btrfs_insert_empty_items(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,struct btrfs_key * cpu_key,u32 * data_size,int nr)3451 int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
3452 			    struct btrfs_root *root,
3453 			    struct btrfs_path *path,
3454 			    struct btrfs_key *cpu_key, u32 *data_size,
3455 			    int nr)
3456 {
3457 	struct extent_buffer *leaf;
3458 	struct btrfs_item *item;
3459 	int ret = 0;
3460 	int slot;
3461 	int slot_orig;
3462 	int i;
3463 	u32 nritems;
3464 	u32 total_size = 0;
3465 	u32 total_data = 0;
3466 	unsigned int data_end;
3467 	struct btrfs_disk_key disk_key;
3468 
3469 	for (i = 0; i < nr; i++)
3470 		total_data += data_size[i];
3471 
3472 	total_size = total_data + (nr * sizeof(struct btrfs_item));
3473 	ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
3474 	if (ret == 0)
3475 		return -EEXIST;
3476 	if (ret < 0)
3477 		goto out;
3478 
3479 	slot_orig = path->slots[0];
3480 	leaf = path->nodes[0];
3481 
3482 	nritems = btrfs_header_nritems(leaf);
3483 	data_end = leaf_data_end(root, leaf);
3484 
3485 	if (btrfs_leaf_free_space(root, leaf) < total_size) {
3486 		btrfs_print_leaf(root, leaf);
3487 		printk(KERN_CRIT "not enough freespace need %u have %d\n",
3488 		       total_size, btrfs_leaf_free_space(root, leaf));
3489 		BUG();
3490 	}
3491 
3492 	slot = path->slots[0];
3493 	BUG_ON(slot < 0);
3494 
3495 	if (slot != nritems) {
3496 		unsigned int old_data = btrfs_item_end_nr(leaf, slot);
3497 
3498 		if (old_data < data_end) {
3499 			btrfs_print_leaf(root, leaf);
3500 			printk(KERN_CRIT "slot %d old_data %d data_end %d\n",
3501 			       slot, old_data, data_end);
3502 			BUG_ON(1);
3503 		}
3504 		/*
3505 		 * item0..itemN ... dataN.offset..dataN.size .. data0.size
3506 		 */
3507 		/* first correct the data pointers */
3508 		WARN_ON(leaf->map_token);
3509 		for (i = slot; i < nritems; i++) {
3510 			u32 ioff;
3511 
3512 			item = btrfs_item_nr(leaf, i);
3513 			if (!leaf->map_token) {
3514 				map_extent_buffer(leaf, (unsigned long)item,
3515 					sizeof(struct btrfs_item),
3516 					&leaf->map_token, &leaf->kaddr,
3517 					&leaf->map_start, &leaf->map_len,
3518 					KM_USER1);
3519 			}
3520 
3521 			ioff = btrfs_item_offset(leaf, item);
3522 			btrfs_set_item_offset(leaf, item, ioff - total_data);
3523 		}
3524 		if (leaf->map_token) {
3525 			unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
3526 			leaf->map_token = NULL;
3527 		}
3528 
3529 		/* shift the items */
3530 		memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
3531 			      btrfs_item_nr_offset(slot),
3532 			      (nritems - slot) * sizeof(struct btrfs_item));
3533 
3534 		/* shift the data */
3535 		memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3536 			      data_end - total_data, btrfs_leaf_data(leaf) +
3537 			      data_end, old_data - data_end);
3538 		data_end = old_data;
3539 	}
3540 
3541 	/* setup the item for the new data */
3542 	for (i = 0; i < nr; i++) {
3543 		btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
3544 		btrfs_set_item_key(leaf, &disk_key, slot + i);
3545 		item = btrfs_item_nr(leaf, slot + i);
3546 		btrfs_set_item_offset(leaf, item, data_end - data_size[i]);
3547 		data_end -= data_size[i];
3548 		btrfs_set_item_size(leaf, item, data_size[i]);
3549 	}
3550 	btrfs_set_header_nritems(leaf, nritems + nr);
3551 	btrfs_mark_buffer_dirty(leaf);
3552 
3553 	ret = 0;
3554 	if (slot == 0) {
3555 		btrfs_cpu_key_to_disk(&disk_key, cpu_key);
3556 		ret = fixup_low_keys(trans, root, path, &disk_key, 1);
3557 	}
3558 
3559 	if (btrfs_leaf_free_space(root, leaf) < 0) {
3560 		btrfs_print_leaf(root, leaf);
3561 		BUG();
3562 	}
3563 out:
3564 	btrfs_unlock_up_safe(path, 1);
3565 	return ret;
3566 }
3567 
3568 /*
3569  * Given a key and some data, insert an item into the tree.
3570  * This does all the path init required, making room in the tree if needed.
3571  */
btrfs_insert_item(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_key * cpu_key,void * data,u32 data_size)3572 int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
3573 		      *root, struct btrfs_key *cpu_key, void *data, u32
3574 		      data_size)
3575 {
3576 	int ret = 0;
3577 	struct btrfs_path *path;
3578 	struct extent_buffer *leaf;
3579 	unsigned long ptr;
3580 
3581 	path = btrfs_alloc_path();
3582 	BUG_ON(!path);
3583 	ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
3584 	if (!ret) {
3585 		leaf = path->nodes[0];
3586 		ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
3587 		write_extent_buffer(leaf, data, ptr, data_size);
3588 		btrfs_mark_buffer_dirty(leaf);
3589 	}
3590 	btrfs_free_path(path);
3591 	return ret;
3592 }
3593 
3594 /*
3595  * delete the pointer from a given node.
3596  *
3597  * the tree should have been previously balanced so the deletion does not
3598  * empty a node.
3599  */
del_ptr(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,int level,int slot)3600 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3601 		   struct btrfs_path *path, int level, int slot)
3602 {
3603 	struct extent_buffer *parent = path->nodes[level];
3604 	u32 nritems;
3605 	int ret = 0;
3606 	int wret;
3607 
3608 	nritems = btrfs_header_nritems(parent);
3609 	if (slot != nritems - 1) {
3610 		memmove_extent_buffer(parent,
3611 			      btrfs_node_key_ptr_offset(slot),
3612 			      btrfs_node_key_ptr_offset(slot + 1),
3613 			      sizeof(struct btrfs_key_ptr) *
3614 			      (nritems - slot - 1));
3615 	}
3616 	nritems--;
3617 	btrfs_set_header_nritems(parent, nritems);
3618 	if (nritems == 0 && parent == root->node) {
3619 		BUG_ON(btrfs_header_level(root->node) != 1);
3620 		/* just turn the root into a leaf and break */
3621 		btrfs_set_header_level(root->node, 0);
3622 	} else if (slot == 0) {
3623 		struct btrfs_disk_key disk_key;
3624 
3625 		btrfs_node_key(parent, &disk_key, 0);
3626 		wret = fixup_low_keys(trans, root, path, &disk_key, level + 1);
3627 		if (wret)
3628 			ret = wret;
3629 	}
3630 	btrfs_mark_buffer_dirty(parent);
3631 	return ret;
3632 }
3633 
3634 /*
3635  * a helper function to delete the leaf pointed to by path->slots[1] and
3636  * path->nodes[1].  bytenr is the node block pointer, but since the callers
3637  * already know it, it is faster to have them pass it down than to
3638  * read it out of the node again.
3639  *
3640  * This deletes the pointer in path->nodes[1] and frees the leaf
3641  * block extent.  zero is returned if it all worked out, < 0 otherwise.
3642  *
3643  * The path must have already been setup for deleting the leaf, including
3644  * all the proper balancing.  path->nodes[1] must be locked.
3645  */
btrfs_del_leaf(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,u64 bytenr)3646 noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
3647 			    struct btrfs_root *root,
3648 			    struct btrfs_path *path, u64 bytenr)
3649 {
3650 	int ret;
3651 	u64 root_gen = btrfs_header_generation(path->nodes[1]);
3652 	u64 parent_start = path->nodes[1]->start;
3653 	u64 parent_owner = btrfs_header_owner(path->nodes[1]);
3654 
3655 	ret = del_ptr(trans, root, path, 1, path->slots[1]);
3656 	if (ret)
3657 		return ret;
3658 
3659 	/*
3660 	 * btrfs_free_extent is expensive, we want to make sure we
3661 	 * aren't holding any locks when we call it
3662 	 */
3663 	btrfs_unlock_up_safe(path, 0);
3664 
3665 	ret = btrfs_free_extent(trans, root, bytenr,
3666 				btrfs_level_size(root, 0),
3667 				parent_start, parent_owner,
3668 				root_gen, 0, 1);
3669 	return ret;
3670 }
3671 /*
3672  * delete the item at the leaf level in path.  If that empties
3673  * the leaf, remove it from the tree
3674  */
btrfs_del_items(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,int slot,int nr)3675 int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3676 		    struct btrfs_path *path, int slot, int nr)
3677 {
3678 	struct extent_buffer *leaf;
3679 	struct btrfs_item *item;
3680 	int last_off;
3681 	int dsize = 0;
3682 	int ret = 0;
3683 	int wret;
3684 	int i;
3685 	u32 nritems;
3686 
3687 	leaf = path->nodes[0];
3688 	last_off = btrfs_item_offset_nr(leaf, slot + nr - 1);
3689 
3690 	for (i = 0; i < nr; i++)
3691 		dsize += btrfs_item_size_nr(leaf, slot + i);
3692 
3693 	nritems = btrfs_header_nritems(leaf);
3694 
3695 	if (slot + nr != nritems) {
3696 		int data_end = leaf_data_end(root, leaf);
3697 
3698 		memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3699 			      data_end + dsize,
3700 			      btrfs_leaf_data(leaf) + data_end,
3701 			      last_off - data_end);
3702 
3703 		for (i = slot + nr; i < nritems; i++) {
3704 			u32 ioff;
3705 
3706 			item = btrfs_item_nr(leaf, i);
3707 			if (!leaf->map_token) {
3708 				map_extent_buffer(leaf, (unsigned long)item,
3709 					sizeof(struct btrfs_item),
3710 					&leaf->map_token, &leaf->kaddr,
3711 					&leaf->map_start, &leaf->map_len,
3712 					KM_USER1);
3713 			}
3714 			ioff = btrfs_item_offset(leaf, item);
3715 			btrfs_set_item_offset(leaf, item, ioff + dsize);
3716 		}
3717 
3718 		if (leaf->map_token) {
3719 			unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
3720 			leaf->map_token = NULL;
3721 		}
3722 
3723 		memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
3724 			      btrfs_item_nr_offset(slot + nr),
3725 			      sizeof(struct btrfs_item) *
3726 			      (nritems - slot - nr));
3727 	}
3728 	btrfs_set_header_nritems(leaf, nritems - nr);
3729 	nritems -= nr;
3730 
3731 	/* delete the leaf if we've emptied it */
3732 	if (nritems == 0) {
3733 		if (leaf == root->node) {
3734 			btrfs_set_header_level(leaf, 0);
3735 		} else {
3736 			ret = btrfs_del_leaf(trans, root, path, leaf->start);
3737 			BUG_ON(ret);
3738 		}
3739 	} else {
3740 		int used = leaf_space_used(leaf, 0, nritems);
3741 		if (slot == 0) {
3742 			struct btrfs_disk_key disk_key;
3743 
3744 			btrfs_item_key(leaf, &disk_key, 0);
3745 			wret = fixup_low_keys(trans, root, path,
3746 					      &disk_key, 1);
3747 			if (wret)
3748 				ret = wret;
3749 		}
3750 
3751 		/* delete the leaf if it is mostly empty */
3752 		if (used < BTRFS_LEAF_DATA_SIZE(root) / 4) {
3753 			/* push_leaf_left fixes the path.
3754 			 * make sure the path still points to our leaf
3755 			 * for possible call to del_ptr below
3756 			 */
3757 			slot = path->slots[1];
3758 			extent_buffer_get(leaf);
3759 
3760 			wret = push_leaf_left(trans, root, path, 1, 1);
3761 			if (wret < 0 && wret != -ENOSPC)
3762 				ret = wret;
3763 
3764 			if (path->nodes[0] == leaf &&
3765 			    btrfs_header_nritems(leaf)) {
3766 				wret = push_leaf_right(trans, root, path, 1, 1);
3767 				if (wret < 0 && wret != -ENOSPC)
3768 					ret = wret;
3769 			}
3770 
3771 			if (btrfs_header_nritems(leaf) == 0) {
3772 				path->slots[1] = slot;
3773 				ret = btrfs_del_leaf(trans, root, path,
3774 						     leaf->start);
3775 				BUG_ON(ret);
3776 				free_extent_buffer(leaf);
3777 			} else {
3778 				/* if we're still in the path, make sure
3779 				 * we're dirty.  Otherwise, one of the
3780 				 * push_leaf functions must have already
3781 				 * dirtied this buffer
3782 				 */
3783 				if (path->nodes[0] == leaf)
3784 					btrfs_mark_buffer_dirty(leaf);
3785 				free_extent_buffer(leaf);
3786 			}
3787 		} else {
3788 			btrfs_mark_buffer_dirty(leaf);
3789 		}
3790 	}
3791 	return ret;
3792 }
3793 
3794 /*
3795  * search the tree again to find a leaf with lesser keys
3796  * returns 0 if it found something or 1 if there are no lesser leaves.
3797  * returns < 0 on io errors.
3798  *
3799  * This may release the path, and so you may lose any locks held at the
3800  * time you call it.
3801  */
btrfs_prev_leaf(struct btrfs_root * root,struct btrfs_path * path)3802 int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
3803 {
3804 	struct btrfs_key key;
3805 	struct btrfs_disk_key found_key;
3806 	int ret;
3807 
3808 	btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
3809 
3810 	if (key.offset > 0)
3811 		key.offset--;
3812 	else if (key.type > 0)
3813 		key.type--;
3814 	else if (key.objectid > 0)
3815 		key.objectid--;
3816 	else
3817 		return 1;
3818 
3819 	btrfs_release_path(root, path);
3820 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3821 	if (ret < 0)
3822 		return ret;
3823 	btrfs_item_key(path->nodes[0], &found_key, 0);
3824 	ret = comp_keys(&found_key, &key);
3825 	if (ret < 0)
3826 		return 0;
3827 	return 1;
3828 }
3829 
3830 /*
3831  * A helper function to walk down the tree starting at min_key, and looking
3832  * for nodes or leaves that are either in cache or have a minimum
3833  * transaction id.  This is used by the btree defrag code, and tree logging
3834  *
3835  * This does not cow, but it does stuff the starting key it finds back
3836  * into min_key, so you can call btrfs_search_slot with cow=1 on the
3837  * key and get a writable path.
3838  *
3839  * This does lock as it descends, and path->keep_locks should be set
3840  * to 1 by the caller.
3841  *
3842  * This honors path->lowest_level to prevent descent past a given level
3843  * of the tree.
3844  *
3845  * min_trans indicates the oldest transaction that you are interested
3846  * in walking through.  Any nodes or leaves older than min_trans are
3847  * skipped over (without reading them).
3848  *
3849  * returns zero if something useful was found, < 0 on error and 1 if there
3850  * was nothing in the tree that matched the search criteria.
3851  */
btrfs_search_forward(struct btrfs_root * root,struct btrfs_key * min_key,struct btrfs_key * max_key,struct btrfs_path * path,int cache_only,u64 min_trans)3852 int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
3853 			 struct btrfs_key *max_key,
3854 			 struct btrfs_path *path, int cache_only,
3855 			 u64 min_trans)
3856 {
3857 	struct extent_buffer *cur;
3858 	struct btrfs_key found_key;
3859 	int slot;
3860 	int sret;
3861 	u32 nritems;
3862 	int level;
3863 	int ret = 1;
3864 
3865 	WARN_ON(!path->keep_locks);
3866 again:
3867 	cur = btrfs_lock_root_node(root);
3868 	level = btrfs_header_level(cur);
3869 	WARN_ON(path->nodes[level]);
3870 	path->nodes[level] = cur;
3871 	path->locks[level] = 1;
3872 
3873 	if (btrfs_header_generation(cur) < min_trans) {
3874 		ret = 1;
3875 		goto out;
3876 	}
3877 	while (1) {
3878 		nritems = btrfs_header_nritems(cur);
3879 		level = btrfs_header_level(cur);
3880 		sret = bin_search(cur, min_key, level, &slot);
3881 
3882 		/* at the lowest level, we're done, setup the path and exit */
3883 		if (level == path->lowest_level) {
3884 			if (slot >= nritems)
3885 				goto find_next_key;
3886 			ret = 0;
3887 			path->slots[level] = slot;
3888 			btrfs_item_key_to_cpu(cur, &found_key, slot);
3889 			goto out;
3890 		}
3891 		if (sret && slot > 0)
3892 			slot--;
3893 		/*
3894 		 * check this node pointer against the cache_only and
3895 		 * min_trans parameters.  If it isn't in cache or is too
3896 		 * old, skip to the next one.
3897 		 */
3898 		while (slot < nritems) {
3899 			u64 blockptr;
3900 			u64 gen;
3901 			struct extent_buffer *tmp;
3902 			struct btrfs_disk_key disk_key;
3903 
3904 			blockptr = btrfs_node_blockptr(cur, slot);
3905 			gen = btrfs_node_ptr_generation(cur, slot);
3906 			if (gen < min_trans) {
3907 				slot++;
3908 				continue;
3909 			}
3910 			if (!cache_only)
3911 				break;
3912 
3913 			if (max_key) {
3914 				btrfs_node_key(cur, &disk_key, slot);
3915 				if (comp_keys(&disk_key, max_key) >= 0) {
3916 					ret = 1;
3917 					goto out;
3918 				}
3919 			}
3920 
3921 			tmp = btrfs_find_tree_block(root, blockptr,
3922 					    btrfs_level_size(root, level - 1));
3923 
3924 			if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
3925 				free_extent_buffer(tmp);
3926 				break;
3927 			}
3928 			if (tmp)
3929 				free_extent_buffer(tmp);
3930 			slot++;
3931 		}
3932 find_next_key:
3933 		/*
3934 		 * we didn't find a candidate key in this node, walk forward
3935 		 * and find another one
3936 		 */
3937 		if (slot >= nritems) {
3938 			path->slots[level] = slot;
3939 			btrfs_set_path_blocking(path);
3940 			sret = btrfs_find_next_key(root, path, min_key, level,
3941 						  cache_only, min_trans);
3942 			if (sret == 0) {
3943 				btrfs_release_path(root, path);
3944 				goto again;
3945 			} else {
3946 				goto out;
3947 			}
3948 		}
3949 		/* save our key for returning back */
3950 		btrfs_node_key_to_cpu(cur, &found_key, slot);
3951 		path->slots[level] = slot;
3952 		if (level == path->lowest_level) {
3953 			ret = 0;
3954 			unlock_up(path, level, 1);
3955 			goto out;
3956 		}
3957 		btrfs_set_path_blocking(path);
3958 		cur = read_node_slot(root, cur, slot);
3959 
3960 		btrfs_tree_lock(cur);
3961 
3962 		path->locks[level - 1] = 1;
3963 		path->nodes[level - 1] = cur;
3964 		unlock_up(path, level, 1);
3965 		btrfs_clear_path_blocking(path, NULL);
3966 	}
3967 out:
3968 	if (ret == 0)
3969 		memcpy(min_key, &found_key, sizeof(found_key));
3970 	btrfs_set_path_blocking(path);
3971 	return ret;
3972 }
3973 
3974 /*
3975  * this is similar to btrfs_next_leaf, but does not try to preserve
3976  * and fixup the path.  It looks for and returns the next key in the
3977  * tree based on the current path and the cache_only and min_trans
3978  * parameters.
3979  *
3980  * 0 is returned if another key is found, < 0 if there are any errors
3981  * and 1 is returned if there are no higher keys in the tree
3982  *
3983  * path->keep_locks should be set to 1 on the search made before
3984  * calling this function.
3985  */
btrfs_find_next_key(struct btrfs_root * root,struct btrfs_path * path,struct btrfs_key * key,int lowest_level,int cache_only,u64 min_trans)3986 int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
3987 			struct btrfs_key *key, int lowest_level,
3988 			int cache_only, u64 min_trans)
3989 {
3990 	int level = lowest_level;
3991 	int slot;
3992 	struct extent_buffer *c;
3993 
3994 	WARN_ON(!path->keep_locks);
3995 	while (level < BTRFS_MAX_LEVEL) {
3996 		if (!path->nodes[level])
3997 			return 1;
3998 
3999 		slot = path->slots[level] + 1;
4000 		c = path->nodes[level];
4001 next:
4002 		if (slot >= btrfs_header_nritems(c)) {
4003 			level++;
4004 			if (level == BTRFS_MAX_LEVEL)
4005 				return 1;
4006 			continue;
4007 		}
4008 		if (level == 0)
4009 			btrfs_item_key_to_cpu(c, key, slot);
4010 		else {
4011 			u64 blockptr = btrfs_node_blockptr(c, slot);
4012 			u64 gen = btrfs_node_ptr_generation(c, slot);
4013 
4014 			if (cache_only) {
4015 				struct extent_buffer *cur;
4016 				cur = btrfs_find_tree_block(root, blockptr,
4017 					    btrfs_level_size(root, level - 1));
4018 				if (!cur || !btrfs_buffer_uptodate(cur, gen)) {
4019 					slot++;
4020 					if (cur)
4021 						free_extent_buffer(cur);
4022 					goto next;
4023 				}
4024 				free_extent_buffer(cur);
4025 			}
4026 			if (gen < min_trans) {
4027 				slot++;
4028 				goto next;
4029 			}
4030 			btrfs_node_key_to_cpu(c, key, slot);
4031 		}
4032 		return 0;
4033 	}
4034 	return 1;
4035 }
4036 
4037 /*
4038  * search the tree again to find a leaf with greater keys
4039  * returns 0 if it found something or 1 if there are no greater leaves.
4040  * returns < 0 on io errors.
4041  */
btrfs_next_leaf(struct btrfs_root * root,struct btrfs_path * path)4042 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
4043 {
4044 	int slot;
4045 	int level = 1;
4046 	struct extent_buffer *c;
4047 	struct extent_buffer *next = NULL;
4048 	struct btrfs_key key;
4049 	u32 nritems;
4050 	int ret;
4051 
4052 	nritems = btrfs_header_nritems(path->nodes[0]);
4053 	if (nritems == 0)
4054 		return 1;
4055 
4056 	btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
4057 
4058 	btrfs_release_path(root, path);
4059 	path->keep_locks = 1;
4060 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4061 	path->keep_locks = 0;
4062 
4063 	if (ret < 0)
4064 		return ret;
4065 
4066 	btrfs_set_path_blocking(path);
4067 	nritems = btrfs_header_nritems(path->nodes[0]);
4068 	/*
4069 	 * by releasing the path above we dropped all our locks.  A balance
4070 	 * could have added more items next to the key that used to be
4071 	 * at the very end of the block.  So, check again here and
4072 	 * advance the path if there are now more items available.
4073 	 */
4074 	if (nritems > 0 && path->slots[0] < nritems - 1) {
4075 		path->slots[0]++;
4076 		goto done;
4077 	}
4078 
4079 	while (level < BTRFS_MAX_LEVEL) {
4080 		if (!path->nodes[level])
4081 			return 1;
4082 
4083 		slot = path->slots[level] + 1;
4084 		c = path->nodes[level];
4085 		if (slot >= btrfs_header_nritems(c)) {
4086 			level++;
4087 			if (level == BTRFS_MAX_LEVEL)
4088 				return 1;
4089 			continue;
4090 		}
4091 
4092 		if (next) {
4093 			btrfs_tree_unlock(next);
4094 			free_extent_buffer(next);
4095 		}
4096 
4097 		/* the path was set to blocking above */
4098 		if (level == 1 && (path->locks[1] || path->skip_locking) &&
4099 		    path->reada)
4100 			reada_for_search(root, path, level, slot, 0);
4101 
4102 		next = read_node_slot(root, c, slot);
4103 		if (!path->skip_locking) {
4104 			btrfs_assert_tree_locked(c);
4105 			btrfs_tree_lock(next);
4106 			btrfs_set_lock_blocking(next);
4107 		}
4108 		break;
4109 	}
4110 	path->slots[level] = slot;
4111 	while (1) {
4112 		level--;
4113 		c = path->nodes[level];
4114 		if (path->locks[level])
4115 			btrfs_tree_unlock(c);
4116 		free_extent_buffer(c);
4117 		path->nodes[level] = next;
4118 		path->slots[level] = 0;
4119 		if (!path->skip_locking)
4120 			path->locks[level] = 1;
4121 		if (!level)
4122 			break;
4123 
4124 		btrfs_set_path_blocking(path);
4125 		if (level == 1 && path->locks[1] && path->reada)
4126 			reada_for_search(root, path, level, slot, 0);
4127 		next = read_node_slot(root, next, 0);
4128 		if (!path->skip_locking) {
4129 			btrfs_assert_tree_locked(path->nodes[level]);
4130 			btrfs_tree_lock(next);
4131 			btrfs_set_lock_blocking(next);
4132 		}
4133 	}
4134 done:
4135 	unlock_up(path, 0, 1);
4136 	return 0;
4137 }
4138 
4139 /*
4140  * this uses btrfs_prev_leaf to walk backwards in the tree, and keeps
4141  * searching until it gets past min_objectid or finds an item of 'type'
4142  *
4143  * returns 0 if something is found, 1 if nothing was found and < 0 on error
4144  */
btrfs_previous_item(struct btrfs_root * root,struct btrfs_path * path,u64 min_objectid,int type)4145 int btrfs_previous_item(struct btrfs_root *root,
4146 			struct btrfs_path *path, u64 min_objectid,
4147 			int type)
4148 {
4149 	struct btrfs_key found_key;
4150 	struct extent_buffer *leaf;
4151 	u32 nritems;
4152 	int ret;
4153 
4154 	while (1) {
4155 		if (path->slots[0] == 0) {
4156 			btrfs_set_path_blocking(path);
4157 			ret = btrfs_prev_leaf(root, path);
4158 			if (ret != 0)
4159 				return ret;
4160 		} else {
4161 			path->slots[0]--;
4162 		}
4163 		leaf = path->nodes[0];
4164 		nritems = btrfs_header_nritems(leaf);
4165 		if (nritems == 0)
4166 			return 1;
4167 		if (path->slots[0] == nritems)
4168 			path->slots[0]--;
4169 
4170 		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
4171 		if (found_key.type == type)
4172 			return 0;
4173 		if (found_key.objectid < min_objectid)
4174 			break;
4175 		if (found_key.objectid == min_objectid &&
4176 		    found_key.type < type)
4177 			break;
4178 	}
4179 	return 1;
4180 }
4181