1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2011 Fujitsu. All rights reserved.
4 * Written by Miao Xie <miaox@cn.fujitsu.com>
5 */
6
7 #include <linux/slab.h>
8 #include <linux/iversion.h>
9 #include <linux/sched/mm.h>
10 #include "delayed-inode.h"
11 #include "disk-io.h"
12 #include "transaction.h"
13 #include "ctree.h"
14 #include "qgroup.h"
15
16 #define BTRFS_DELAYED_WRITEBACK 512
17 #define BTRFS_DELAYED_BACKGROUND 128
18 #define BTRFS_DELAYED_BATCH 16
19
20 static struct kmem_cache *delayed_node_cache;
21
btrfs_delayed_inode_init(void)22 int __init btrfs_delayed_inode_init(void)
23 {
24 delayed_node_cache = kmem_cache_create("btrfs_delayed_node",
25 sizeof(struct btrfs_delayed_node),
26 0,
27 SLAB_MEM_SPREAD,
28 NULL);
29 if (!delayed_node_cache)
30 return -ENOMEM;
31 return 0;
32 }
33
btrfs_delayed_inode_exit(void)34 void __cold btrfs_delayed_inode_exit(void)
35 {
36 kmem_cache_destroy(delayed_node_cache);
37 }
38
btrfs_init_delayed_node(struct btrfs_delayed_node * delayed_node,struct btrfs_root * root,u64 inode_id)39 static inline void btrfs_init_delayed_node(
40 struct btrfs_delayed_node *delayed_node,
41 struct btrfs_root *root, u64 inode_id)
42 {
43 delayed_node->root = root;
44 delayed_node->inode_id = inode_id;
45 refcount_set(&delayed_node->refs, 0);
46 delayed_node->ins_root = RB_ROOT;
47 delayed_node->del_root = RB_ROOT;
48 mutex_init(&delayed_node->mutex);
49 INIT_LIST_HEAD(&delayed_node->n_list);
50 INIT_LIST_HEAD(&delayed_node->p_list);
51 }
52
btrfs_is_continuous_delayed_item(struct btrfs_delayed_item * item1,struct btrfs_delayed_item * item2)53 static inline int btrfs_is_continuous_delayed_item(
54 struct btrfs_delayed_item *item1,
55 struct btrfs_delayed_item *item2)
56 {
57 if (item1->key.type == BTRFS_DIR_INDEX_KEY &&
58 item1->key.objectid == item2->key.objectid &&
59 item1->key.type == item2->key.type &&
60 item1->key.offset + 1 == item2->key.offset)
61 return 1;
62 return 0;
63 }
64
btrfs_get_delayed_node(struct btrfs_inode * btrfs_inode)65 static struct btrfs_delayed_node *btrfs_get_delayed_node(
66 struct btrfs_inode *btrfs_inode)
67 {
68 struct btrfs_root *root = btrfs_inode->root;
69 u64 ino = btrfs_ino(btrfs_inode);
70 struct btrfs_delayed_node *node;
71
72 node = READ_ONCE(btrfs_inode->delayed_node);
73 if (node) {
74 refcount_inc(&node->refs);
75 return node;
76 }
77
78 spin_lock(&root->inode_lock);
79 node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
80
81 if (node) {
82 if (btrfs_inode->delayed_node) {
83 refcount_inc(&node->refs); /* can be accessed */
84 BUG_ON(btrfs_inode->delayed_node != node);
85 spin_unlock(&root->inode_lock);
86 return node;
87 }
88
89 /*
90 * It's possible that we're racing into the middle of removing
91 * this node from the radix tree. In this case, the refcount
92 * was zero and it should never go back to one. Just return
93 * NULL like it was never in the radix at all; our release
94 * function is in the process of removing it.
95 *
96 * Some implementations of refcount_inc refuse to bump the
97 * refcount once it has hit zero. If we don't do this dance
98 * here, refcount_inc() may decide to just WARN_ONCE() instead
99 * of actually bumping the refcount.
100 *
101 * If this node is properly in the radix, we want to bump the
102 * refcount twice, once for the inode and once for this get
103 * operation.
104 */
105 if (refcount_inc_not_zero(&node->refs)) {
106 refcount_inc(&node->refs);
107 btrfs_inode->delayed_node = node;
108 } else {
109 node = NULL;
110 }
111
112 spin_unlock(&root->inode_lock);
113 return node;
114 }
115 spin_unlock(&root->inode_lock);
116
117 return NULL;
118 }
119
120 /* Will return either the node or PTR_ERR(-ENOMEM) */
btrfs_get_or_create_delayed_node(struct btrfs_inode * btrfs_inode)121 static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
122 struct btrfs_inode *btrfs_inode)
123 {
124 struct btrfs_delayed_node *node;
125 struct btrfs_root *root = btrfs_inode->root;
126 u64 ino = btrfs_ino(btrfs_inode);
127 int ret;
128
129 again:
130 node = btrfs_get_delayed_node(btrfs_inode);
131 if (node)
132 return node;
133
134 node = kmem_cache_zalloc(delayed_node_cache, GFP_NOFS);
135 if (!node)
136 return ERR_PTR(-ENOMEM);
137 btrfs_init_delayed_node(node, root, ino);
138
139 /* cached in the btrfs inode and can be accessed */
140 refcount_set(&node->refs, 2);
141
142 ret = radix_tree_preload(GFP_NOFS);
143 if (ret) {
144 kmem_cache_free(delayed_node_cache, node);
145 return ERR_PTR(ret);
146 }
147
148 spin_lock(&root->inode_lock);
149 ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node);
150 if (ret == -EEXIST) {
151 spin_unlock(&root->inode_lock);
152 kmem_cache_free(delayed_node_cache, node);
153 radix_tree_preload_end();
154 goto again;
155 }
156 btrfs_inode->delayed_node = node;
157 spin_unlock(&root->inode_lock);
158 radix_tree_preload_end();
159
160 return node;
161 }
162
163 /*
164 * Call it when holding delayed_node->mutex
165 *
166 * If mod = 1, add this node into the prepared list.
167 */
btrfs_queue_delayed_node(struct btrfs_delayed_root * root,struct btrfs_delayed_node * node,int mod)168 static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root,
169 struct btrfs_delayed_node *node,
170 int mod)
171 {
172 spin_lock(&root->lock);
173 if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
174 if (!list_empty(&node->p_list))
175 list_move_tail(&node->p_list, &root->prepare_list);
176 else if (mod)
177 list_add_tail(&node->p_list, &root->prepare_list);
178 } else {
179 list_add_tail(&node->n_list, &root->node_list);
180 list_add_tail(&node->p_list, &root->prepare_list);
181 refcount_inc(&node->refs); /* inserted into list */
182 root->nodes++;
183 set_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags);
184 }
185 spin_unlock(&root->lock);
186 }
187
188 /* Call it when holding delayed_node->mutex */
btrfs_dequeue_delayed_node(struct btrfs_delayed_root * root,struct btrfs_delayed_node * node)189 static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root,
190 struct btrfs_delayed_node *node)
191 {
192 spin_lock(&root->lock);
193 if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
194 root->nodes--;
195 refcount_dec(&node->refs); /* not in the list */
196 list_del_init(&node->n_list);
197 if (!list_empty(&node->p_list))
198 list_del_init(&node->p_list);
199 clear_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags);
200 }
201 spin_unlock(&root->lock);
202 }
203
btrfs_first_delayed_node(struct btrfs_delayed_root * delayed_root)204 static struct btrfs_delayed_node *btrfs_first_delayed_node(
205 struct btrfs_delayed_root *delayed_root)
206 {
207 struct list_head *p;
208 struct btrfs_delayed_node *node = NULL;
209
210 spin_lock(&delayed_root->lock);
211 if (list_empty(&delayed_root->node_list))
212 goto out;
213
214 p = delayed_root->node_list.next;
215 node = list_entry(p, struct btrfs_delayed_node, n_list);
216 refcount_inc(&node->refs);
217 out:
218 spin_unlock(&delayed_root->lock);
219
220 return node;
221 }
222
btrfs_next_delayed_node(struct btrfs_delayed_node * node)223 static struct btrfs_delayed_node *btrfs_next_delayed_node(
224 struct btrfs_delayed_node *node)
225 {
226 struct btrfs_delayed_root *delayed_root;
227 struct list_head *p;
228 struct btrfs_delayed_node *next = NULL;
229
230 delayed_root = node->root->fs_info->delayed_root;
231 spin_lock(&delayed_root->lock);
232 if (!test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
233 /* not in the list */
234 if (list_empty(&delayed_root->node_list))
235 goto out;
236 p = delayed_root->node_list.next;
237 } else if (list_is_last(&node->n_list, &delayed_root->node_list))
238 goto out;
239 else
240 p = node->n_list.next;
241
242 next = list_entry(p, struct btrfs_delayed_node, n_list);
243 refcount_inc(&next->refs);
244 out:
245 spin_unlock(&delayed_root->lock);
246
247 return next;
248 }
249
__btrfs_release_delayed_node(struct btrfs_delayed_node * delayed_node,int mod)250 static void __btrfs_release_delayed_node(
251 struct btrfs_delayed_node *delayed_node,
252 int mod)
253 {
254 struct btrfs_delayed_root *delayed_root;
255
256 if (!delayed_node)
257 return;
258
259 delayed_root = delayed_node->root->fs_info->delayed_root;
260
261 mutex_lock(&delayed_node->mutex);
262 if (delayed_node->count)
263 btrfs_queue_delayed_node(delayed_root, delayed_node, mod);
264 else
265 btrfs_dequeue_delayed_node(delayed_root, delayed_node);
266 mutex_unlock(&delayed_node->mutex);
267
268 if (refcount_dec_and_test(&delayed_node->refs)) {
269 struct btrfs_root *root = delayed_node->root;
270
271 spin_lock(&root->inode_lock);
272 /*
273 * Once our refcount goes to zero, nobody is allowed to bump it
274 * back up. We can delete it now.
275 */
276 ASSERT(refcount_read(&delayed_node->refs) == 0);
277 radix_tree_delete(&root->delayed_nodes_tree,
278 delayed_node->inode_id);
279 spin_unlock(&root->inode_lock);
280 kmem_cache_free(delayed_node_cache, delayed_node);
281 }
282 }
283
btrfs_release_delayed_node(struct btrfs_delayed_node * node)284 static inline void btrfs_release_delayed_node(struct btrfs_delayed_node *node)
285 {
286 __btrfs_release_delayed_node(node, 0);
287 }
288
btrfs_first_prepared_delayed_node(struct btrfs_delayed_root * delayed_root)289 static struct btrfs_delayed_node *btrfs_first_prepared_delayed_node(
290 struct btrfs_delayed_root *delayed_root)
291 {
292 struct list_head *p;
293 struct btrfs_delayed_node *node = NULL;
294
295 spin_lock(&delayed_root->lock);
296 if (list_empty(&delayed_root->prepare_list))
297 goto out;
298
299 p = delayed_root->prepare_list.next;
300 list_del_init(p);
301 node = list_entry(p, struct btrfs_delayed_node, p_list);
302 refcount_inc(&node->refs);
303 out:
304 spin_unlock(&delayed_root->lock);
305
306 return node;
307 }
308
btrfs_release_prepared_delayed_node(struct btrfs_delayed_node * node)309 static inline void btrfs_release_prepared_delayed_node(
310 struct btrfs_delayed_node *node)
311 {
312 __btrfs_release_delayed_node(node, 1);
313 }
314
btrfs_alloc_delayed_item(u32 data_len)315 static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
316 {
317 struct btrfs_delayed_item *item;
318 item = kmalloc(sizeof(*item) + data_len, GFP_NOFS);
319 if (item) {
320 item->data_len = data_len;
321 item->ins_or_del = 0;
322 item->bytes_reserved = 0;
323 item->delayed_node = NULL;
324 refcount_set(&item->refs, 1);
325 }
326 return item;
327 }
328
329 /*
330 * __btrfs_lookup_delayed_item - look up the delayed item by key
331 * @delayed_node: pointer to the delayed node
332 * @key: the key to look up
333 * @prev: used to store the prev item if the right item isn't found
334 * @next: used to store the next item if the right item isn't found
335 *
336 * Note: if we don't find the right item, we will return the prev item and
337 * the next item.
338 */
__btrfs_lookup_delayed_item(struct rb_root * root,struct btrfs_key * key,struct btrfs_delayed_item ** prev,struct btrfs_delayed_item ** next)339 static struct btrfs_delayed_item *__btrfs_lookup_delayed_item(
340 struct rb_root *root,
341 struct btrfs_key *key,
342 struct btrfs_delayed_item **prev,
343 struct btrfs_delayed_item **next)
344 {
345 struct rb_node *node, *prev_node = NULL;
346 struct btrfs_delayed_item *delayed_item = NULL;
347 int ret = 0;
348
349 node = root->rb_node;
350
351 while (node) {
352 delayed_item = rb_entry(node, struct btrfs_delayed_item,
353 rb_node);
354 prev_node = node;
355 ret = btrfs_comp_cpu_keys(&delayed_item->key, key);
356 if (ret < 0)
357 node = node->rb_right;
358 else if (ret > 0)
359 node = node->rb_left;
360 else
361 return delayed_item;
362 }
363
364 if (prev) {
365 if (!prev_node)
366 *prev = NULL;
367 else if (ret < 0)
368 *prev = delayed_item;
369 else if ((node = rb_prev(prev_node)) != NULL) {
370 *prev = rb_entry(node, struct btrfs_delayed_item,
371 rb_node);
372 } else
373 *prev = NULL;
374 }
375
376 if (next) {
377 if (!prev_node)
378 *next = NULL;
379 else if (ret > 0)
380 *next = delayed_item;
381 else if ((node = rb_next(prev_node)) != NULL) {
382 *next = rb_entry(node, struct btrfs_delayed_item,
383 rb_node);
384 } else
385 *next = NULL;
386 }
387 return NULL;
388 }
389
__btrfs_lookup_delayed_insertion_item(struct btrfs_delayed_node * delayed_node,struct btrfs_key * key)390 static struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item(
391 struct btrfs_delayed_node *delayed_node,
392 struct btrfs_key *key)
393 {
394 return __btrfs_lookup_delayed_item(&delayed_node->ins_root, key,
395 NULL, NULL);
396 }
397
__btrfs_add_delayed_item(struct btrfs_delayed_node * delayed_node,struct btrfs_delayed_item * ins,int action)398 static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
399 struct btrfs_delayed_item *ins,
400 int action)
401 {
402 struct rb_node **p, *node;
403 struct rb_node *parent_node = NULL;
404 struct rb_root *root;
405 struct btrfs_delayed_item *item;
406 int cmp;
407
408 if (action == BTRFS_DELAYED_INSERTION_ITEM)
409 root = &delayed_node->ins_root;
410 else if (action == BTRFS_DELAYED_DELETION_ITEM)
411 root = &delayed_node->del_root;
412 else
413 BUG();
414 p = &root->rb_node;
415 node = &ins->rb_node;
416
417 while (*p) {
418 parent_node = *p;
419 item = rb_entry(parent_node, struct btrfs_delayed_item,
420 rb_node);
421
422 cmp = btrfs_comp_cpu_keys(&item->key, &ins->key);
423 if (cmp < 0)
424 p = &(*p)->rb_right;
425 else if (cmp > 0)
426 p = &(*p)->rb_left;
427 else
428 return -EEXIST;
429 }
430
431 rb_link_node(node, parent_node, p);
432 rb_insert_color(node, root);
433 ins->delayed_node = delayed_node;
434 ins->ins_or_del = action;
435
436 if (ins->key.type == BTRFS_DIR_INDEX_KEY &&
437 action == BTRFS_DELAYED_INSERTION_ITEM &&
438 ins->key.offset >= delayed_node->index_cnt)
439 delayed_node->index_cnt = ins->key.offset + 1;
440
441 delayed_node->count++;
442 atomic_inc(&delayed_node->root->fs_info->delayed_root->items);
443 return 0;
444 }
445
__btrfs_add_delayed_insertion_item(struct btrfs_delayed_node * node,struct btrfs_delayed_item * item)446 static int __btrfs_add_delayed_insertion_item(struct btrfs_delayed_node *node,
447 struct btrfs_delayed_item *item)
448 {
449 return __btrfs_add_delayed_item(node, item,
450 BTRFS_DELAYED_INSERTION_ITEM);
451 }
452
__btrfs_add_delayed_deletion_item(struct btrfs_delayed_node * node,struct btrfs_delayed_item * item)453 static int __btrfs_add_delayed_deletion_item(struct btrfs_delayed_node *node,
454 struct btrfs_delayed_item *item)
455 {
456 return __btrfs_add_delayed_item(node, item,
457 BTRFS_DELAYED_DELETION_ITEM);
458 }
459
finish_one_item(struct btrfs_delayed_root * delayed_root)460 static void finish_one_item(struct btrfs_delayed_root *delayed_root)
461 {
462 int seq = atomic_inc_return(&delayed_root->items_seq);
463
464 /* atomic_dec_return implies a barrier */
465 if ((atomic_dec_return(&delayed_root->items) <
466 BTRFS_DELAYED_BACKGROUND || seq % BTRFS_DELAYED_BATCH == 0))
467 cond_wake_up_nomb(&delayed_root->wait);
468 }
469
__btrfs_remove_delayed_item(struct btrfs_delayed_item * delayed_item)470 static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
471 {
472 struct rb_root *root;
473 struct btrfs_delayed_root *delayed_root;
474
475 /* Not associated with any delayed_node */
476 if (!delayed_item->delayed_node)
477 return;
478 delayed_root = delayed_item->delayed_node->root->fs_info->delayed_root;
479
480 BUG_ON(!delayed_root);
481 BUG_ON(delayed_item->ins_or_del != BTRFS_DELAYED_DELETION_ITEM &&
482 delayed_item->ins_or_del != BTRFS_DELAYED_INSERTION_ITEM);
483
484 if (delayed_item->ins_or_del == BTRFS_DELAYED_INSERTION_ITEM)
485 root = &delayed_item->delayed_node->ins_root;
486 else
487 root = &delayed_item->delayed_node->del_root;
488
489 rb_erase(&delayed_item->rb_node, root);
490 delayed_item->delayed_node->count--;
491
492 finish_one_item(delayed_root);
493 }
494
btrfs_release_delayed_item(struct btrfs_delayed_item * item)495 static void btrfs_release_delayed_item(struct btrfs_delayed_item *item)
496 {
497 if (item) {
498 __btrfs_remove_delayed_item(item);
499 if (refcount_dec_and_test(&item->refs))
500 kfree(item);
501 }
502 }
503
__btrfs_first_delayed_insertion_item(struct btrfs_delayed_node * delayed_node)504 static struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item(
505 struct btrfs_delayed_node *delayed_node)
506 {
507 struct rb_node *p;
508 struct btrfs_delayed_item *item = NULL;
509
510 p = rb_first(&delayed_node->ins_root);
511 if (p)
512 item = rb_entry(p, struct btrfs_delayed_item, rb_node);
513
514 return item;
515 }
516
__btrfs_first_delayed_deletion_item(struct btrfs_delayed_node * delayed_node)517 static struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item(
518 struct btrfs_delayed_node *delayed_node)
519 {
520 struct rb_node *p;
521 struct btrfs_delayed_item *item = NULL;
522
523 p = rb_first(&delayed_node->del_root);
524 if (p)
525 item = rb_entry(p, struct btrfs_delayed_item, rb_node);
526
527 return item;
528 }
529
__btrfs_next_delayed_item(struct btrfs_delayed_item * item)530 static struct btrfs_delayed_item *__btrfs_next_delayed_item(
531 struct btrfs_delayed_item *item)
532 {
533 struct rb_node *p;
534 struct btrfs_delayed_item *next = NULL;
535
536 p = rb_next(&item->rb_node);
537 if (p)
538 next = rb_entry(p, struct btrfs_delayed_item, rb_node);
539
540 return next;
541 }
542
btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_delayed_item * item)543 static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
544 struct btrfs_root *root,
545 struct btrfs_delayed_item *item)
546 {
547 struct btrfs_block_rsv *src_rsv;
548 struct btrfs_block_rsv *dst_rsv;
549 struct btrfs_fs_info *fs_info = root->fs_info;
550 u64 num_bytes;
551 int ret;
552
553 if (!trans->bytes_reserved)
554 return 0;
555
556 src_rsv = trans->block_rsv;
557 dst_rsv = &fs_info->delayed_block_rsv;
558
559 num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
560
561 /*
562 * Here we migrate space rsv from transaction rsv, since have already
563 * reserved space when starting a transaction. So no need to reserve
564 * qgroup space here.
565 */
566 ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
567 if (!ret) {
568 trace_btrfs_space_reservation(fs_info, "delayed_item",
569 item->key.objectid,
570 num_bytes, 1);
571 item->bytes_reserved = num_bytes;
572 }
573
574 return ret;
575 }
576
btrfs_delayed_item_release_metadata(struct btrfs_root * root,struct btrfs_delayed_item * item)577 static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
578 struct btrfs_delayed_item *item)
579 {
580 struct btrfs_block_rsv *rsv;
581 struct btrfs_fs_info *fs_info = root->fs_info;
582
583 if (!item->bytes_reserved)
584 return;
585
586 rsv = &fs_info->delayed_block_rsv;
587 /*
588 * Check btrfs_delayed_item_reserve_metadata() to see why we don't need
589 * to release/reserve qgroup space.
590 */
591 trace_btrfs_space_reservation(fs_info, "delayed_item",
592 item->key.objectid, item->bytes_reserved,
593 0);
594 btrfs_block_rsv_release(fs_info, rsv,
595 item->bytes_reserved);
596 }
597
btrfs_delayed_inode_reserve_metadata(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_inode * inode,struct btrfs_delayed_node * node)598 static int btrfs_delayed_inode_reserve_metadata(
599 struct btrfs_trans_handle *trans,
600 struct btrfs_root *root,
601 struct btrfs_inode *inode,
602 struct btrfs_delayed_node *node)
603 {
604 struct btrfs_fs_info *fs_info = root->fs_info;
605 struct btrfs_block_rsv *src_rsv;
606 struct btrfs_block_rsv *dst_rsv;
607 u64 num_bytes;
608 int ret;
609
610 src_rsv = trans->block_rsv;
611 dst_rsv = &fs_info->delayed_block_rsv;
612
613 num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
614
615 /*
616 * btrfs_dirty_inode will update the inode under btrfs_join_transaction
617 * which doesn't reserve space for speed. This is a problem since we
618 * still need to reserve space for this update, so try to reserve the
619 * space.
620 *
621 * Now if src_rsv == delalloc_block_rsv we'll let it just steal since
622 * we always reserve enough to update the inode item.
623 */
624 if (!src_rsv || (!trans->bytes_reserved &&
625 src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) {
626 ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true);
627 if (ret < 0)
628 return ret;
629 ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes,
630 BTRFS_RESERVE_NO_FLUSH);
631 /*
632 * Since we're under a transaction reserve_metadata_bytes could
633 * try to commit the transaction which will make it return
634 * EAGAIN to make us stop the transaction we have, so return
635 * ENOSPC instead so that btrfs_dirty_inode knows what to do.
636 */
637 if (ret == -EAGAIN) {
638 ret = -ENOSPC;
639 btrfs_qgroup_free_meta_prealloc(root, num_bytes);
640 }
641 if (!ret) {
642 node->bytes_reserved = num_bytes;
643 trace_btrfs_space_reservation(fs_info,
644 "delayed_inode",
645 btrfs_ino(inode),
646 num_bytes, 1);
647 } else {
648 btrfs_qgroup_free_meta_prealloc(root, fs_info->nodesize);
649 }
650 return ret;
651 }
652
653 ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
654 if (!ret) {
655 trace_btrfs_space_reservation(fs_info, "delayed_inode",
656 btrfs_ino(inode), num_bytes, 1);
657 node->bytes_reserved = num_bytes;
658 }
659
660 return ret;
661 }
662
btrfs_delayed_inode_release_metadata(struct btrfs_fs_info * fs_info,struct btrfs_delayed_node * node,bool qgroup_free)663 static void btrfs_delayed_inode_release_metadata(struct btrfs_fs_info *fs_info,
664 struct btrfs_delayed_node *node,
665 bool qgroup_free)
666 {
667 struct btrfs_block_rsv *rsv;
668
669 if (!node->bytes_reserved)
670 return;
671
672 rsv = &fs_info->delayed_block_rsv;
673 trace_btrfs_space_reservation(fs_info, "delayed_inode",
674 node->inode_id, node->bytes_reserved, 0);
675 btrfs_block_rsv_release(fs_info, rsv,
676 node->bytes_reserved);
677 if (qgroup_free)
678 btrfs_qgroup_free_meta_prealloc(node->root,
679 node->bytes_reserved);
680 else
681 btrfs_qgroup_convert_reserved_meta(node->root,
682 node->bytes_reserved);
683 node->bytes_reserved = 0;
684 }
685
686 /*
687 * This helper will insert some continuous items into the same leaf according
688 * to the free space of the leaf.
689 */
btrfs_batch_insert_items(struct btrfs_root * root,struct btrfs_path * path,struct btrfs_delayed_item * item)690 static int btrfs_batch_insert_items(struct btrfs_root *root,
691 struct btrfs_path *path,
692 struct btrfs_delayed_item *item)
693 {
694 struct btrfs_fs_info *fs_info = root->fs_info;
695 struct btrfs_delayed_item *curr, *next;
696 int free_space;
697 int total_data_size = 0, total_size = 0;
698 struct extent_buffer *leaf;
699 char *data_ptr;
700 struct btrfs_key *keys;
701 u32 *data_size;
702 struct list_head head;
703 int slot;
704 int nitems;
705 int i;
706 int ret = 0;
707
708 BUG_ON(!path->nodes[0]);
709
710 leaf = path->nodes[0];
711 free_space = btrfs_leaf_free_space(fs_info, leaf);
712 INIT_LIST_HEAD(&head);
713
714 next = item;
715 nitems = 0;
716
717 /*
718 * count the number of the continuous items that we can insert in batch
719 */
720 while (total_size + next->data_len + sizeof(struct btrfs_item) <=
721 free_space) {
722 total_data_size += next->data_len;
723 total_size += next->data_len + sizeof(struct btrfs_item);
724 list_add_tail(&next->tree_list, &head);
725 nitems++;
726
727 curr = next;
728 next = __btrfs_next_delayed_item(curr);
729 if (!next)
730 break;
731
732 if (!btrfs_is_continuous_delayed_item(curr, next))
733 break;
734 }
735
736 if (!nitems) {
737 ret = 0;
738 goto out;
739 }
740
741 /*
742 * we need allocate some memory space, but it might cause the task
743 * to sleep, so we set all locked nodes in the path to blocking locks
744 * first.
745 */
746 btrfs_set_path_blocking(path);
747
748 keys = kmalloc_array(nitems, sizeof(struct btrfs_key), GFP_NOFS);
749 if (!keys) {
750 ret = -ENOMEM;
751 goto out;
752 }
753
754 data_size = kmalloc_array(nitems, sizeof(u32), GFP_NOFS);
755 if (!data_size) {
756 ret = -ENOMEM;
757 goto error;
758 }
759
760 /* get keys of all the delayed items */
761 i = 0;
762 list_for_each_entry(next, &head, tree_list) {
763 keys[i] = next->key;
764 data_size[i] = next->data_len;
765 i++;
766 }
767
768 /* reset all the locked nodes in the patch to spinning locks. */
769 btrfs_clear_path_blocking(path, NULL, 0);
770
771 /* insert the keys of the items */
772 setup_items_for_insert(root, path, keys, data_size,
773 total_data_size, total_size, nitems);
774
775 /* insert the dir index items */
776 slot = path->slots[0];
777 list_for_each_entry_safe(curr, next, &head, tree_list) {
778 data_ptr = btrfs_item_ptr(leaf, slot, char);
779 write_extent_buffer(leaf, &curr->data,
780 (unsigned long)data_ptr,
781 curr->data_len);
782 slot++;
783
784 btrfs_delayed_item_release_metadata(root, curr);
785
786 list_del(&curr->tree_list);
787 btrfs_release_delayed_item(curr);
788 }
789
790 error:
791 kfree(data_size);
792 kfree(keys);
793 out:
794 return ret;
795 }
796
797 /*
798 * This helper can just do simple insertion that needn't extend item for new
799 * data, such as directory name index insertion, inode insertion.
800 */
btrfs_insert_delayed_item(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,struct btrfs_delayed_item * delayed_item)801 static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
802 struct btrfs_root *root,
803 struct btrfs_path *path,
804 struct btrfs_delayed_item *delayed_item)
805 {
806 struct extent_buffer *leaf;
807 unsigned int nofs_flag;
808 char *ptr;
809 int ret;
810
811 nofs_flag = memalloc_nofs_save();
812 ret = btrfs_insert_empty_item(trans, root, path, &delayed_item->key,
813 delayed_item->data_len);
814 memalloc_nofs_restore(nofs_flag);
815 if (ret < 0 && ret != -EEXIST)
816 return ret;
817
818 leaf = path->nodes[0];
819
820 ptr = btrfs_item_ptr(leaf, path->slots[0], char);
821
822 write_extent_buffer(leaf, delayed_item->data, (unsigned long)ptr,
823 delayed_item->data_len);
824 btrfs_mark_buffer_dirty(leaf);
825
826 btrfs_delayed_item_release_metadata(root, delayed_item);
827 return 0;
828 }
829
830 /*
831 * we insert an item first, then if there are some continuous items, we try
832 * to insert those items into the same leaf.
833 */
btrfs_insert_delayed_items(struct btrfs_trans_handle * trans,struct btrfs_path * path,struct btrfs_root * root,struct btrfs_delayed_node * node)834 static int btrfs_insert_delayed_items(struct btrfs_trans_handle *trans,
835 struct btrfs_path *path,
836 struct btrfs_root *root,
837 struct btrfs_delayed_node *node)
838 {
839 struct btrfs_delayed_item *curr, *prev;
840 int ret = 0;
841
842 do_again:
843 mutex_lock(&node->mutex);
844 curr = __btrfs_first_delayed_insertion_item(node);
845 if (!curr)
846 goto insert_end;
847
848 ret = btrfs_insert_delayed_item(trans, root, path, curr);
849 if (ret < 0) {
850 btrfs_release_path(path);
851 goto insert_end;
852 }
853
854 prev = curr;
855 curr = __btrfs_next_delayed_item(prev);
856 if (curr && btrfs_is_continuous_delayed_item(prev, curr)) {
857 /* insert the continuous items into the same leaf */
858 path->slots[0]++;
859 btrfs_batch_insert_items(root, path, curr);
860 }
861 btrfs_release_delayed_item(prev);
862 btrfs_mark_buffer_dirty(path->nodes[0]);
863
864 btrfs_release_path(path);
865 mutex_unlock(&node->mutex);
866 goto do_again;
867
868 insert_end:
869 mutex_unlock(&node->mutex);
870 return ret;
871 }
872
btrfs_batch_delete_items(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,struct btrfs_delayed_item * item)873 static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans,
874 struct btrfs_root *root,
875 struct btrfs_path *path,
876 struct btrfs_delayed_item *item)
877 {
878 struct btrfs_delayed_item *curr, *next;
879 struct extent_buffer *leaf;
880 struct btrfs_key key;
881 struct list_head head;
882 int nitems, i, last_item;
883 int ret = 0;
884
885 BUG_ON(!path->nodes[0]);
886
887 leaf = path->nodes[0];
888
889 i = path->slots[0];
890 last_item = btrfs_header_nritems(leaf) - 1;
891 if (i > last_item)
892 return -ENOENT; /* FIXME: Is errno suitable? */
893
894 next = item;
895 INIT_LIST_HEAD(&head);
896 btrfs_item_key_to_cpu(leaf, &key, i);
897 nitems = 0;
898 /*
899 * count the number of the dir index items that we can delete in batch
900 */
901 while (btrfs_comp_cpu_keys(&next->key, &key) == 0) {
902 list_add_tail(&next->tree_list, &head);
903 nitems++;
904
905 curr = next;
906 next = __btrfs_next_delayed_item(curr);
907 if (!next)
908 break;
909
910 if (!btrfs_is_continuous_delayed_item(curr, next))
911 break;
912
913 i++;
914 if (i > last_item)
915 break;
916 btrfs_item_key_to_cpu(leaf, &key, i);
917 }
918
919 if (!nitems)
920 return 0;
921
922 ret = btrfs_del_items(trans, root, path, path->slots[0], nitems);
923 if (ret)
924 goto out;
925
926 list_for_each_entry_safe(curr, next, &head, tree_list) {
927 btrfs_delayed_item_release_metadata(root, curr);
928 list_del(&curr->tree_list);
929 btrfs_release_delayed_item(curr);
930 }
931
932 out:
933 return ret;
934 }
935
btrfs_delete_delayed_items(struct btrfs_trans_handle * trans,struct btrfs_path * path,struct btrfs_root * root,struct btrfs_delayed_node * node)936 static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans,
937 struct btrfs_path *path,
938 struct btrfs_root *root,
939 struct btrfs_delayed_node *node)
940 {
941 struct btrfs_delayed_item *curr, *prev;
942 unsigned int nofs_flag;
943 int ret = 0;
944
945 do_again:
946 mutex_lock(&node->mutex);
947 curr = __btrfs_first_delayed_deletion_item(node);
948 if (!curr)
949 goto delete_fail;
950
951 nofs_flag = memalloc_nofs_save();
952 ret = btrfs_search_slot(trans, root, &curr->key, path, -1, 1);
953 memalloc_nofs_restore(nofs_flag);
954 if (ret < 0)
955 goto delete_fail;
956 else if (ret > 0) {
957 /*
958 * can't find the item which the node points to, so this node
959 * is invalid, just drop it.
960 */
961 prev = curr;
962 curr = __btrfs_next_delayed_item(prev);
963 btrfs_release_delayed_item(prev);
964 ret = 0;
965 btrfs_release_path(path);
966 if (curr) {
967 mutex_unlock(&node->mutex);
968 goto do_again;
969 } else
970 goto delete_fail;
971 }
972
973 btrfs_batch_delete_items(trans, root, path, curr);
974 btrfs_release_path(path);
975 mutex_unlock(&node->mutex);
976 goto do_again;
977
978 delete_fail:
979 btrfs_release_path(path);
980 mutex_unlock(&node->mutex);
981 return ret;
982 }
983
btrfs_release_delayed_inode(struct btrfs_delayed_node * delayed_node)984 static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
985 {
986 struct btrfs_delayed_root *delayed_root;
987
988 if (delayed_node &&
989 test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
990 BUG_ON(!delayed_node->root);
991 clear_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags);
992 delayed_node->count--;
993
994 delayed_root = delayed_node->root->fs_info->delayed_root;
995 finish_one_item(delayed_root);
996 }
997 }
998
btrfs_release_delayed_iref(struct btrfs_delayed_node * delayed_node)999 static void btrfs_release_delayed_iref(struct btrfs_delayed_node *delayed_node)
1000 {
1001 struct btrfs_delayed_root *delayed_root;
1002
1003 ASSERT(delayed_node->root);
1004 clear_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags);
1005 delayed_node->count--;
1006
1007 delayed_root = delayed_node->root->fs_info->delayed_root;
1008 finish_one_item(delayed_root);
1009 }
1010
__btrfs_update_delayed_inode(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,struct btrfs_delayed_node * node)1011 static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
1012 struct btrfs_root *root,
1013 struct btrfs_path *path,
1014 struct btrfs_delayed_node *node)
1015 {
1016 struct btrfs_fs_info *fs_info = root->fs_info;
1017 struct btrfs_key key;
1018 struct btrfs_inode_item *inode_item;
1019 struct extent_buffer *leaf;
1020 unsigned int nofs_flag;
1021 int mod;
1022 int ret;
1023
1024 key.objectid = node->inode_id;
1025 key.type = BTRFS_INODE_ITEM_KEY;
1026 key.offset = 0;
1027
1028 if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags))
1029 mod = -1;
1030 else
1031 mod = 1;
1032
1033 nofs_flag = memalloc_nofs_save();
1034 ret = btrfs_lookup_inode(trans, root, path, &key, mod);
1035 memalloc_nofs_restore(nofs_flag);
1036 if (ret > 0) {
1037 btrfs_release_path(path);
1038 return -ENOENT;
1039 } else if (ret < 0) {
1040 return ret;
1041 }
1042
1043 leaf = path->nodes[0];
1044 inode_item = btrfs_item_ptr(leaf, path->slots[0],
1045 struct btrfs_inode_item);
1046 write_extent_buffer(leaf, &node->inode_item, (unsigned long)inode_item,
1047 sizeof(struct btrfs_inode_item));
1048 btrfs_mark_buffer_dirty(leaf);
1049
1050 if (!test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags))
1051 goto no_iref;
1052
1053 path->slots[0]++;
1054 if (path->slots[0] >= btrfs_header_nritems(leaf))
1055 goto search;
1056 again:
1057 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1058 if (key.objectid != node->inode_id)
1059 goto out;
1060
1061 if (key.type != BTRFS_INODE_REF_KEY &&
1062 key.type != BTRFS_INODE_EXTREF_KEY)
1063 goto out;
1064
1065 /*
1066 * Delayed iref deletion is for the inode who has only one link,
1067 * so there is only one iref. The case that several irefs are
1068 * in the same item doesn't exist.
1069 */
1070 btrfs_del_item(trans, root, path);
1071 out:
1072 btrfs_release_delayed_iref(node);
1073 no_iref:
1074 btrfs_release_path(path);
1075 err_out:
1076 btrfs_delayed_inode_release_metadata(fs_info, node, (ret < 0));
1077 btrfs_release_delayed_inode(node);
1078
1079 return ret;
1080
1081 search:
1082 btrfs_release_path(path);
1083
1084 key.type = BTRFS_INODE_EXTREF_KEY;
1085 key.offset = -1;
1086
1087 nofs_flag = memalloc_nofs_save();
1088 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1089 memalloc_nofs_restore(nofs_flag);
1090 if (ret < 0)
1091 goto err_out;
1092 ASSERT(ret);
1093
1094 ret = 0;
1095 leaf = path->nodes[0];
1096 path->slots[0]--;
1097 goto again;
1098 }
1099
btrfs_update_delayed_inode(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,struct btrfs_delayed_node * node)1100 static inline int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
1101 struct btrfs_root *root,
1102 struct btrfs_path *path,
1103 struct btrfs_delayed_node *node)
1104 {
1105 int ret;
1106
1107 mutex_lock(&node->mutex);
1108 if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &node->flags)) {
1109 mutex_unlock(&node->mutex);
1110 return 0;
1111 }
1112
1113 ret = __btrfs_update_delayed_inode(trans, root, path, node);
1114 mutex_unlock(&node->mutex);
1115 return ret;
1116 }
1117
1118 static inline int
__btrfs_commit_inode_delayed_items(struct btrfs_trans_handle * trans,struct btrfs_path * path,struct btrfs_delayed_node * node)1119 __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1120 struct btrfs_path *path,
1121 struct btrfs_delayed_node *node)
1122 {
1123 int ret;
1124
1125 ret = btrfs_insert_delayed_items(trans, path, node->root, node);
1126 if (ret)
1127 return ret;
1128
1129 ret = btrfs_delete_delayed_items(trans, path, node->root, node);
1130 if (ret)
1131 return ret;
1132
1133 ret = btrfs_update_delayed_inode(trans, node->root, path, node);
1134 return ret;
1135 }
1136
1137 /*
1138 * Called when committing the transaction.
1139 * Returns 0 on success.
1140 * Returns < 0 on error and returns with an aborted transaction with any
1141 * outstanding delayed items cleaned up.
1142 */
__btrfs_run_delayed_items(struct btrfs_trans_handle * trans,int nr)1143 static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, int nr)
1144 {
1145 struct btrfs_fs_info *fs_info = trans->fs_info;
1146 struct btrfs_delayed_root *delayed_root;
1147 struct btrfs_delayed_node *curr_node, *prev_node;
1148 struct btrfs_path *path;
1149 struct btrfs_block_rsv *block_rsv;
1150 int ret = 0;
1151 bool count = (nr > 0);
1152
1153 if (trans->aborted)
1154 return -EIO;
1155
1156 path = btrfs_alloc_path();
1157 if (!path)
1158 return -ENOMEM;
1159 path->leave_spinning = 1;
1160
1161 block_rsv = trans->block_rsv;
1162 trans->block_rsv = &fs_info->delayed_block_rsv;
1163
1164 delayed_root = fs_info->delayed_root;
1165
1166 curr_node = btrfs_first_delayed_node(delayed_root);
1167 while (curr_node && (!count || (count && nr--))) {
1168 ret = __btrfs_commit_inode_delayed_items(trans, path,
1169 curr_node);
1170 if (ret) {
1171 btrfs_release_delayed_node(curr_node);
1172 curr_node = NULL;
1173 btrfs_abort_transaction(trans, ret);
1174 break;
1175 }
1176
1177 prev_node = curr_node;
1178 curr_node = btrfs_next_delayed_node(curr_node);
1179 btrfs_release_delayed_node(prev_node);
1180 }
1181
1182 if (curr_node)
1183 btrfs_release_delayed_node(curr_node);
1184 btrfs_free_path(path);
1185 trans->block_rsv = block_rsv;
1186
1187 return ret;
1188 }
1189
btrfs_run_delayed_items(struct btrfs_trans_handle * trans)1190 int btrfs_run_delayed_items(struct btrfs_trans_handle *trans)
1191 {
1192 return __btrfs_run_delayed_items(trans, -1);
1193 }
1194
btrfs_run_delayed_items_nr(struct btrfs_trans_handle * trans,int nr)1195 int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans, int nr)
1196 {
1197 return __btrfs_run_delayed_items(trans, nr);
1198 }
1199
btrfs_commit_inode_delayed_items(struct btrfs_trans_handle * trans,struct btrfs_inode * inode)1200 int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1201 struct btrfs_inode *inode)
1202 {
1203 struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1204 struct btrfs_path *path;
1205 struct btrfs_block_rsv *block_rsv;
1206 int ret;
1207
1208 if (!delayed_node)
1209 return 0;
1210
1211 mutex_lock(&delayed_node->mutex);
1212 if (!delayed_node->count) {
1213 mutex_unlock(&delayed_node->mutex);
1214 btrfs_release_delayed_node(delayed_node);
1215 return 0;
1216 }
1217 mutex_unlock(&delayed_node->mutex);
1218
1219 path = btrfs_alloc_path();
1220 if (!path) {
1221 btrfs_release_delayed_node(delayed_node);
1222 return -ENOMEM;
1223 }
1224 path->leave_spinning = 1;
1225
1226 block_rsv = trans->block_rsv;
1227 trans->block_rsv = &delayed_node->root->fs_info->delayed_block_rsv;
1228
1229 ret = __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
1230
1231 btrfs_release_delayed_node(delayed_node);
1232 btrfs_free_path(path);
1233 trans->block_rsv = block_rsv;
1234
1235 return ret;
1236 }
1237
btrfs_commit_inode_delayed_inode(struct btrfs_inode * inode)1238 int btrfs_commit_inode_delayed_inode(struct btrfs_inode *inode)
1239 {
1240 struct btrfs_fs_info *fs_info = inode->root->fs_info;
1241 struct btrfs_trans_handle *trans;
1242 struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1243 struct btrfs_path *path;
1244 struct btrfs_block_rsv *block_rsv;
1245 int ret;
1246
1247 if (!delayed_node)
1248 return 0;
1249
1250 mutex_lock(&delayed_node->mutex);
1251 if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1252 mutex_unlock(&delayed_node->mutex);
1253 btrfs_release_delayed_node(delayed_node);
1254 return 0;
1255 }
1256 mutex_unlock(&delayed_node->mutex);
1257
1258 trans = btrfs_join_transaction(delayed_node->root);
1259 if (IS_ERR(trans)) {
1260 ret = PTR_ERR(trans);
1261 goto out;
1262 }
1263
1264 path = btrfs_alloc_path();
1265 if (!path) {
1266 ret = -ENOMEM;
1267 goto trans_out;
1268 }
1269 path->leave_spinning = 1;
1270
1271 block_rsv = trans->block_rsv;
1272 trans->block_rsv = &fs_info->delayed_block_rsv;
1273
1274 mutex_lock(&delayed_node->mutex);
1275 if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags))
1276 ret = __btrfs_update_delayed_inode(trans, delayed_node->root,
1277 path, delayed_node);
1278 else
1279 ret = 0;
1280 mutex_unlock(&delayed_node->mutex);
1281
1282 btrfs_free_path(path);
1283 trans->block_rsv = block_rsv;
1284 trans_out:
1285 btrfs_end_transaction(trans);
1286 btrfs_btree_balance_dirty(fs_info);
1287 out:
1288 btrfs_release_delayed_node(delayed_node);
1289
1290 return ret;
1291 }
1292
btrfs_remove_delayed_node(struct btrfs_inode * inode)1293 void btrfs_remove_delayed_node(struct btrfs_inode *inode)
1294 {
1295 struct btrfs_delayed_node *delayed_node;
1296
1297 delayed_node = READ_ONCE(inode->delayed_node);
1298 if (!delayed_node)
1299 return;
1300
1301 inode->delayed_node = NULL;
1302 btrfs_release_delayed_node(delayed_node);
1303 }
1304
1305 struct btrfs_async_delayed_work {
1306 struct btrfs_delayed_root *delayed_root;
1307 int nr;
1308 struct btrfs_work work;
1309 };
1310
btrfs_async_run_delayed_root(struct btrfs_work * work)1311 static void btrfs_async_run_delayed_root(struct btrfs_work *work)
1312 {
1313 struct btrfs_async_delayed_work *async_work;
1314 struct btrfs_delayed_root *delayed_root;
1315 struct btrfs_trans_handle *trans;
1316 struct btrfs_path *path;
1317 struct btrfs_delayed_node *delayed_node = NULL;
1318 struct btrfs_root *root;
1319 struct btrfs_block_rsv *block_rsv;
1320 int total_done = 0;
1321
1322 async_work = container_of(work, struct btrfs_async_delayed_work, work);
1323 delayed_root = async_work->delayed_root;
1324
1325 path = btrfs_alloc_path();
1326 if (!path)
1327 goto out;
1328
1329 do {
1330 if (atomic_read(&delayed_root->items) <
1331 BTRFS_DELAYED_BACKGROUND / 2)
1332 break;
1333
1334 delayed_node = btrfs_first_prepared_delayed_node(delayed_root);
1335 if (!delayed_node)
1336 break;
1337
1338 path->leave_spinning = 1;
1339 root = delayed_node->root;
1340
1341 trans = btrfs_join_transaction(root);
1342 if (IS_ERR(trans)) {
1343 btrfs_release_path(path);
1344 btrfs_release_prepared_delayed_node(delayed_node);
1345 total_done++;
1346 continue;
1347 }
1348
1349 block_rsv = trans->block_rsv;
1350 trans->block_rsv = &root->fs_info->delayed_block_rsv;
1351
1352 __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
1353
1354 trans->block_rsv = block_rsv;
1355 btrfs_end_transaction(trans);
1356 btrfs_btree_balance_dirty_nodelay(root->fs_info);
1357
1358 btrfs_release_path(path);
1359 btrfs_release_prepared_delayed_node(delayed_node);
1360 total_done++;
1361
1362 } while ((async_work->nr == 0 && total_done < BTRFS_DELAYED_WRITEBACK)
1363 || total_done < async_work->nr);
1364
1365 btrfs_free_path(path);
1366 out:
1367 wake_up(&delayed_root->wait);
1368 kfree(async_work);
1369 }
1370
1371
btrfs_wq_run_delayed_node(struct btrfs_delayed_root * delayed_root,struct btrfs_fs_info * fs_info,int nr)1372 static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
1373 struct btrfs_fs_info *fs_info, int nr)
1374 {
1375 struct btrfs_async_delayed_work *async_work;
1376
1377 async_work = kmalloc(sizeof(*async_work), GFP_NOFS);
1378 if (!async_work)
1379 return -ENOMEM;
1380
1381 async_work->delayed_root = delayed_root;
1382 btrfs_init_work(&async_work->work, btrfs_delayed_meta_helper,
1383 btrfs_async_run_delayed_root, NULL, NULL);
1384 async_work->nr = nr;
1385
1386 btrfs_queue_work(fs_info->delayed_workers, &async_work->work);
1387 return 0;
1388 }
1389
btrfs_assert_delayed_root_empty(struct btrfs_fs_info * fs_info)1390 void btrfs_assert_delayed_root_empty(struct btrfs_fs_info *fs_info)
1391 {
1392 WARN_ON(btrfs_first_delayed_node(fs_info->delayed_root));
1393 }
1394
could_end_wait(struct btrfs_delayed_root * delayed_root,int seq)1395 static int could_end_wait(struct btrfs_delayed_root *delayed_root, int seq)
1396 {
1397 int val = atomic_read(&delayed_root->items_seq);
1398
1399 if (val < seq || val >= seq + BTRFS_DELAYED_BATCH)
1400 return 1;
1401
1402 if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
1403 return 1;
1404
1405 return 0;
1406 }
1407
btrfs_balance_delayed_items(struct btrfs_fs_info * fs_info)1408 void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info)
1409 {
1410 struct btrfs_delayed_root *delayed_root = fs_info->delayed_root;
1411
1412 if ((atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND) ||
1413 btrfs_workqueue_normal_congested(fs_info->delayed_workers))
1414 return;
1415
1416 if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) {
1417 int seq;
1418 int ret;
1419
1420 seq = atomic_read(&delayed_root->items_seq);
1421
1422 ret = btrfs_wq_run_delayed_node(delayed_root, fs_info, 0);
1423 if (ret)
1424 return;
1425
1426 wait_event_interruptible(delayed_root->wait,
1427 could_end_wait(delayed_root, seq));
1428 return;
1429 }
1430
1431 btrfs_wq_run_delayed_node(delayed_root, fs_info, BTRFS_DELAYED_BATCH);
1432 }
1433
1434 /* Will return 0 or -ENOMEM */
btrfs_insert_delayed_dir_index(struct btrfs_trans_handle * trans,const char * name,int name_len,struct btrfs_inode * dir,struct btrfs_disk_key * disk_key,u8 type,u64 index)1435 int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
1436 const char *name, int name_len,
1437 struct btrfs_inode *dir,
1438 struct btrfs_disk_key *disk_key, u8 type,
1439 u64 index)
1440 {
1441 struct btrfs_delayed_node *delayed_node;
1442 struct btrfs_delayed_item *delayed_item;
1443 struct btrfs_dir_item *dir_item;
1444 int ret;
1445
1446 delayed_node = btrfs_get_or_create_delayed_node(dir);
1447 if (IS_ERR(delayed_node))
1448 return PTR_ERR(delayed_node);
1449
1450 delayed_item = btrfs_alloc_delayed_item(sizeof(*dir_item) + name_len);
1451 if (!delayed_item) {
1452 ret = -ENOMEM;
1453 goto release_node;
1454 }
1455
1456 delayed_item->key.objectid = btrfs_ino(dir);
1457 delayed_item->key.type = BTRFS_DIR_INDEX_KEY;
1458 delayed_item->key.offset = index;
1459
1460 dir_item = (struct btrfs_dir_item *)delayed_item->data;
1461 dir_item->location = *disk_key;
1462 btrfs_set_stack_dir_transid(dir_item, trans->transid);
1463 btrfs_set_stack_dir_data_len(dir_item, 0);
1464 btrfs_set_stack_dir_name_len(dir_item, name_len);
1465 btrfs_set_stack_dir_type(dir_item, type);
1466 memcpy((char *)(dir_item + 1), name, name_len);
1467
1468 ret = btrfs_delayed_item_reserve_metadata(trans, dir->root, delayed_item);
1469 /*
1470 * we have reserved enough space when we start a new transaction,
1471 * so reserving metadata failure is impossible
1472 */
1473 BUG_ON(ret);
1474
1475 mutex_lock(&delayed_node->mutex);
1476 ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item);
1477 if (unlikely(ret)) {
1478 btrfs_err(trans->fs_info,
1479 "err add delayed dir index item(name: %.*s) into the insertion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
1480 name_len, name, delayed_node->root->objectid,
1481 delayed_node->inode_id, ret);
1482 BUG();
1483 }
1484 mutex_unlock(&delayed_node->mutex);
1485
1486 release_node:
1487 btrfs_release_delayed_node(delayed_node);
1488 return ret;
1489 }
1490
btrfs_delete_delayed_insertion_item(struct btrfs_fs_info * fs_info,struct btrfs_delayed_node * node,struct btrfs_key * key)1491 static int btrfs_delete_delayed_insertion_item(struct btrfs_fs_info *fs_info,
1492 struct btrfs_delayed_node *node,
1493 struct btrfs_key *key)
1494 {
1495 struct btrfs_delayed_item *item;
1496
1497 mutex_lock(&node->mutex);
1498 item = __btrfs_lookup_delayed_insertion_item(node, key);
1499 if (!item) {
1500 mutex_unlock(&node->mutex);
1501 return 1;
1502 }
1503
1504 btrfs_delayed_item_release_metadata(node->root, item);
1505 btrfs_release_delayed_item(item);
1506 mutex_unlock(&node->mutex);
1507 return 0;
1508 }
1509
btrfs_delete_delayed_dir_index(struct btrfs_trans_handle * trans,struct btrfs_inode * dir,u64 index)1510 int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
1511 struct btrfs_inode *dir, u64 index)
1512 {
1513 struct btrfs_delayed_node *node;
1514 struct btrfs_delayed_item *item;
1515 struct btrfs_key item_key;
1516 int ret;
1517
1518 node = btrfs_get_or_create_delayed_node(dir);
1519 if (IS_ERR(node))
1520 return PTR_ERR(node);
1521
1522 item_key.objectid = btrfs_ino(dir);
1523 item_key.type = BTRFS_DIR_INDEX_KEY;
1524 item_key.offset = index;
1525
1526 ret = btrfs_delete_delayed_insertion_item(trans->fs_info, node,
1527 &item_key);
1528 if (!ret)
1529 goto end;
1530
1531 item = btrfs_alloc_delayed_item(0);
1532 if (!item) {
1533 ret = -ENOMEM;
1534 goto end;
1535 }
1536
1537 item->key = item_key;
1538
1539 ret = btrfs_delayed_item_reserve_metadata(trans, dir->root, item);
1540 /*
1541 * we have reserved enough space when we start a new transaction,
1542 * so reserving metadata failure is impossible.
1543 */
1544 if (ret < 0) {
1545 btrfs_err(trans->fs_info,
1546 "metadata reservation failed for delayed dir item deltiona, should have been reserved");
1547 btrfs_release_delayed_item(item);
1548 goto end;
1549 }
1550
1551 mutex_lock(&node->mutex);
1552 ret = __btrfs_add_delayed_deletion_item(node, item);
1553 if (unlikely(ret)) {
1554 btrfs_err(trans->fs_info,
1555 "err add delayed dir index item(index: %llu) into the deletion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
1556 index, node->root->objectid, node->inode_id, ret);
1557 btrfs_delayed_item_release_metadata(dir->root, item);
1558 btrfs_release_delayed_item(item);
1559 }
1560 mutex_unlock(&node->mutex);
1561 end:
1562 btrfs_release_delayed_node(node);
1563 return ret;
1564 }
1565
btrfs_inode_delayed_dir_index_count(struct btrfs_inode * inode)1566 int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode)
1567 {
1568 struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1569
1570 if (!delayed_node)
1571 return -ENOENT;
1572
1573 /*
1574 * Since we have held i_mutex of this directory, it is impossible that
1575 * a new directory index is added into the delayed node and index_cnt
1576 * is updated now. So we needn't lock the delayed node.
1577 */
1578 if (!delayed_node->index_cnt) {
1579 btrfs_release_delayed_node(delayed_node);
1580 return -EINVAL;
1581 }
1582
1583 inode->index_cnt = delayed_node->index_cnt;
1584 btrfs_release_delayed_node(delayed_node);
1585 return 0;
1586 }
1587
btrfs_readdir_get_delayed_items(struct inode * inode,struct list_head * ins_list,struct list_head * del_list)1588 bool btrfs_readdir_get_delayed_items(struct inode *inode,
1589 struct list_head *ins_list,
1590 struct list_head *del_list)
1591 {
1592 struct btrfs_delayed_node *delayed_node;
1593 struct btrfs_delayed_item *item;
1594
1595 delayed_node = btrfs_get_delayed_node(BTRFS_I(inode));
1596 if (!delayed_node)
1597 return false;
1598
1599 /*
1600 * We can only do one readdir with delayed items at a time because of
1601 * item->readdir_list.
1602 */
1603 inode_unlock_shared(inode);
1604 inode_lock(inode);
1605
1606 mutex_lock(&delayed_node->mutex);
1607 item = __btrfs_first_delayed_insertion_item(delayed_node);
1608 while (item) {
1609 refcount_inc(&item->refs);
1610 list_add_tail(&item->readdir_list, ins_list);
1611 item = __btrfs_next_delayed_item(item);
1612 }
1613
1614 item = __btrfs_first_delayed_deletion_item(delayed_node);
1615 while (item) {
1616 refcount_inc(&item->refs);
1617 list_add_tail(&item->readdir_list, del_list);
1618 item = __btrfs_next_delayed_item(item);
1619 }
1620 mutex_unlock(&delayed_node->mutex);
1621 /*
1622 * This delayed node is still cached in the btrfs inode, so refs
1623 * must be > 1 now, and we needn't check it is going to be freed
1624 * or not.
1625 *
1626 * Besides that, this function is used to read dir, we do not
1627 * insert/delete delayed items in this period. So we also needn't
1628 * requeue or dequeue this delayed node.
1629 */
1630 refcount_dec(&delayed_node->refs);
1631
1632 return true;
1633 }
1634
btrfs_readdir_put_delayed_items(struct inode * inode,struct list_head * ins_list,struct list_head * del_list)1635 void btrfs_readdir_put_delayed_items(struct inode *inode,
1636 struct list_head *ins_list,
1637 struct list_head *del_list)
1638 {
1639 struct btrfs_delayed_item *curr, *next;
1640
1641 list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1642 list_del(&curr->readdir_list);
1643 if (refcount_dec_and_test(&curr->refs))
1644 kfree(curr);
1645 }
1646
1647 list_for_each_entry_safe(curr, next, del_list, readdir_list) {
1648 list_del(&curr->readdir_list);
1649 if (refcount_dec_and_test(&curr->refs))
1650 kfree(curr);
1651 }
1652
1653 /*
1654 * The VFS is going to do up_read(), so we need to downgrade back to a
1655 * read lock.
1656 */
1657 downgrade_write(&inode->i_rwsem);
1658 }
1659
btrfs_should_delete_dir_index(struct list_head * del_list,u64 index)1660 int btrfs_should_delete_dir_index(struct list_head *del_list,
1661 u64 index)
1662 {
1663 struct btrfs_delayed_item *curr;
1664 int ret = 0;
1665
1666 list_for_each_entry(curr, del_list, readdir_list) {
1667 if (curr->key.offset > index)
1668 break;
1669 if (curr->key.offset == index) {
1670 ret = 1;
1671 break;
1672 }
1673 }
1674 return ret;
1675 }
1676
1677 /*
1678 * btrfs_readdir_delayed_dir_index - read dir info stored in the delayed tree
1679 *
1680 */
btrfs_readdir_delayed_dir_index(struct dir_context * ctx,struct list_head * ins_list)1681 int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
1682 struct list_head *ins_list)
1683 {
1684 struct btrfs_dir_item *di;
1685 struct btrfs_delayed_item *curr, *next;
1686 struct btrfs_key location;
1687 char *name;
1688 int name_len;
1689 int over = 0;
1690 unsigned char d_type;
1691
1692 if (list_empty(ins_list))
1693 return 0;
1694
1695 /*
1696 * Changing the data of the delayed item is impossible. So
1697 * we needn't lock them. And we have held i_mutex of the
1698 * directory, nobody can delete any directory indexes now.
1699 */
1700 list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1701 list_del(&curr->readdir_list);
1702
1703 if (curr->key.offset < ctx->pos) {
1704 if (refcount_dec_and_test(&curr->refs))
1705 kfree(curr);
1706 continue;
1707 }
1708
1709 ctx->pos = curr->key.offset;
1710
1711 di = (struct btrfs_dir_item *)curr->data;
1712 name = (char *)(di + 1);
1713 name_len = btrfs_stack_dir_name_len(di);
1714
1715 d_type = btrfs_filetype_table[di->type];
1716 btrfs_disk_key_to_cpu(&location, &di->location);
1717
1718 over = !dir_emit(ctx, name, name_len,
1719 location.objectid, d_type);
1720
1721 if (refcount_dec_and_test(&curr->refs))
1722 kfree(curr);
1723
1724 if (over)
1725 return 1;
1726 ctx->pos++;
1727 }
1728 return 0;
1729 }
1730
fill_stack_inode_item(struct btrfs_trans_handle * trans,struct btrfs_inode_item * inode_item,struct inode * inode)1731 static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
1732 struct btrfs_inode_item *inode_item,
1733 struct inode *inode)
1734 {
1735 btrfs_set_stack_inode_uid(inode_item, i_uid_read(inode));
1736 btrfs_set_stack_inode_gid(inode_item, i_gid_read(inode));
1737 btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size);
1738 btrfs_set_stack_inode_mode(inode_item, inode->i_mode);
1739 btrfs_set_stack_inode_nlink(inode_item, inode->i_nlink);
1740 btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode));
1741 btrfs_set_stack_inode_generation(inode_item,
1742 BTRFS_I(inode)->generation);
1743 btrfs_set_stack_inode_sequence(inode_item,
1744 inode_peek_iversion(inode));
1745 btrfs_set_stack_inode_transid(inode_item, trans->transid);
1746 btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev);
1747 btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
1748 btrfs_set_stack_inode_block_group(inode_item, 0);
1749
1750 btrfs_set_stack_timespec_sec(&inode_item->atime,
1751 inode->i_atime.tv_sec);
1752 btrfs_set_stack_timespec_nsec(&inode_item->atime,
1753 inode->i_atime.tv_nsec);
1754
1755 btrfs_set_stack_timespec_sec(&inode_item->mtime,
1756 inode->i_mtime.tv_sec);
1757 btrfs_set_stack_timespec_nsec(&inode_item->mtime,
1758 inode->i_mtime.tv_nsec);
1759
1760 btrfs_set_stack_timespec_sec(&inode_item->ctime,
1761 inode->i_ctime.tv_sec);
1762 btrfs_set_stack_timespec_nsec(&inode_item->ctime,
1763 inode->i_ctime.tv_nsec);
1764
1765 btrfs_set_stack_timespec_sec(&inode_item->otime,
1766 BTRFS_I(inode)->i_otime.tv_sec);
1767 btrfs_set_stack_timespec_nsec(&inode_item->otime,
1768 BTRFS_I(inode)->i_otime.tv_nsec);
1769 }
1770
btrfs_fill_inode(struct inode * inode,u32 * rdev)1771 int btrfs_fill_inode(struct inode *inode, u32 *rdev)
1772 {
1773 struct btrfs_delayed_node *delayed_node;
1774 struct btrfs_inode_item *inode_item;
1775
1776 delayed_node = btrfs_get_delayed_node(BTRFS_I(inode));
1777 if (!delayed_node)
1778 return -ENOENT;
1779
1780 mutex_lock(&delayed_node->mutex);
1781 if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1782 mutex_unlock(&delayed_node->mutex);
1783 btrfs_release_delayed_node(delayed_node);
1784 return -ENOENT;
1785 }
1786
1787 inode_item = &delayed_node->inode_item;
1788
1789 i_uid_write(inode, btrfs_stack_inode_uid(inode_item));
1790 i_gid_write(inode, btrfs_stack_inode_gid(inode_item));
1791 btrfs_i_size_write(BTRFS_I(inode), btrfs_stack_inode_size(inode_item));
1792 inode->i_mode = btrfs_stack_inode_mode(inode_item);
1793 set_nlink(inode, btrfs_stack_inode_nlink(inode_item));
1794 inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
1795 BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item);
1796 BTRFS_I(inode)->last_trans = btrfs_stack_inode_transid(inode_item);
1797
1798 inode_set_iversion_queried(inode,
1799 btrfs_stack_inode_sequence(inode_item));
1800 inode->i_rdev = 0;
1801 *rdev = btrfs_stack_inode_rdev(inode_item);
1802 BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item);
1803
1804 inode->i_atime.tv_sec = btrfs_stack_timespec_sec(&inode_item->atime);
1805 inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->atime);
1806
1807 inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(&inode_item->mtime);
1808 inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->mtime);
1809
1810 inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(&inode_item->ctime);
1811 inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->ctime);
1812
1813 BTRFS_I(inode)->i_otime.tv_sec =
1814 btrfs_stack_timespec_sec(&inode_item->otime);
1815 BTRFS_I(inode)->i_otime.tv_nsec =
1816 btrfs_stack_timespec_nsec(&inode_item->otime);
1817
1818 inode->i_generation = BTRFS_I(inode)->generation;
1819 BTRFS_I(inode)->index_cnt = (u64)-1;
1820
1821 mutex_unlock(&delayed_node->mutex);
1822 btrfs_release_delayed_node(delayed_node);
1823 return 0;
1824 }
1825
btrfs_delayed_update_inode(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct inode * inode)1826 int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
1827 struct btrfs_root *root, struct inode *inode)
1828 {
1829 struct btrfs_delayed_node *delayed_node;
1830 int ret = 0;
1831
1832 delayed_node = btrfs_get_or_create_delayed_node(BTRFS_I(inode));
1833 if (IS_ERR(delayed_node))
1834 return PTR_ERR(delayed_node);
1835
1836 mutex_lock(&delayed_node->mutex);
1837 if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1838 fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1839 goto release_node;
1840 }
1841
1842 ret = btrfs_delayed_inode_reserve_metadata(trans, root, BTRFS_I(inode),
1843 delayed_node);
1844 if (ret)
1845 goto release_node;
1846
1847 fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1848 set_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags);
1849 delayed_node->count++;
1850 atomic_inc(&root->fs_info->delayed_root->items);
1851 release_node:
1852 mutex_unlock(&delayed_node->mutex);
1853 btrfs_release_delayed_node(delayed_node);
1854 return ret;
1855 }
1856
btrfs_delayed_delete_inode_ref(struct btrfs_inode * inode)1857 int btrfs_delayed_delete_inode_ref(struct btrfs_inode *inode)
1858 {
1859 struct btrfs_fs_info *fs_info = inode->root->fs_info;
1860 struct btrfs_delayed_node *delayed_node;
1861
1862 /*
1863 * we don't do delayed inode updates during log recovery because it
1864 * leads to enospc problems. This means we also can't do
1865 * delayed inode refs
1866 */
1867 if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
1868 return -EAGAIN;
1869
1870 delayed_node = btrfs_get_or_create_delayed_node(inode);
1871 if (IS_ERR(delayed_node))
1872 return PTR_ERR(delayed_node);
1873
1874 /*
1875 * We don't reserve space for inode ref deletion is because:
1876 * - We ONLY do async inode ref deletion for the inode who has only
1877 * one link(i_nlink == 1), it means there is only one inode ref.
1878 * And in most case, the inode ref and the inode item are in the
1879 * same leaf, and we will deal with them at the same time.
1880 * Since we are sure we will reserve the space for the inode item,
1881 * it is unnecessary to reserve space for inode ref deletion.
1882 * - If the inode ref and the inode item are not in the same leaf,
1883 * We also needn't worry about enospc problem, because we reserve
1884 * much more space for the inode update than it needs.
1885 * - At the worst, we can steal some space from the global reservation.
1886 * It is very rare.
1887 */
1888 mutex_lock(&delayed_node->mutex);
1889 if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags))
1890 goto release_node;
1891
1892 set_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags);
1893 delayed_node->count++;
1894 atomic_inc(&fs_info->delayed_root->items);
1895 release_node:
1896 mutex_unlock(&delayed_node->mutex);
1897 btrfs_release_delayed_node(delayed_node);
1898 return 0;
1899 }
1900
__btrfs_kill_delayed_node(struct btrfs_delayed_node * delayed_node)1901 static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
1902 {
1903 struct btrfs_root *root = delayed_node->root;
1904 struct btrfs_fs_info *fs_info = root->fs_info;
1905 struct btrfs_delayed_item *curr_item, *prev_item;
1906
1907 mutex_lock(&delayed_node->mutex);
1908 curr_item = __btrfs_first_delayed_insertion_item(delayed_node);
1909 while (curr_item) {
1910 btrfs_delayed_item_release_metadata(root, curr_item);
1911 prev_item = curr_item;
1912 curr_item = __btrfs_next_delayed_item(prev_item);
1913 btrfs_release_delayed_item(prev_item);
1914 }
1915
1916 curr_item = __btrfs_first_delayed_deletion_item(delayed_node);
1917 while (curr_item) {
1918 btrfs_delayed_item_release_metadata(root, curr_item);
1919 prev_item = curr_item;
1920 curr_item = __btrfs_next_delayed_item(prev_item);
1921 btrfs_release_delayed_item(prev_item);
1922 }
1923
1924 if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags))
1925 btrfs_release_delayed_iref(delayed_node);
1926
1927 if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1928 btrfs_delayed_inode_release_metadata(fs_info, delayed_node, false);
1929 btrfs_release_delayed_inode(delayed_node);
1930 }
1931 mutex_unlock(&delayed_node->mutex);
1932 }
1933
btrfs_kill_delayed_inode_items(struct btrfs_inode * inode)1934 void btrfs_kill_delayed_inode_items(struct btrfs_inode *inode)
1935 {
1936 struct btrfs_delayed_node *delayed_node;
1937
1938 delayed_node = btrfs_get_delayed_node(inode);
1939 if (!delayed_node)
1940 return;
1941
1942 __btrfs_kill_delayed_node(delayed_node);
1943 btrfs_release_delayed_node(delayed_node);
1944 }
1945
btrfs_kill_all_delayed_nodes(struct btrfs_root * root)1946 void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
1947 {
1948 u64 inode_id = 0;
1949 struct btrfs_delayed_node *delayed_nodes[8];
1950 int i, n;
1951
1952 while (1) {
1953 spin_lock(&root->inode_lock);
1954 n = radix_tree_gang_lookup(&root->delayed_nodes_tree,
1955 (void **)delayed_nodes, inode_id,
1956 ARRAY_SIZE(delayed_nodes));
1957 if (!n) {
1958 spin_unlock(&root->inode_lock);
1959 break;
1960 }
1961
1962 inode_id = delayed_nodes[n - 1]->inode_id + 1;
1963 for (i = 0; i < n; i++) {
1964 /*
1965 * Don't increase refs in case the node is dead and
1966 * about to be removed from the tree in the loop below
1967 */
1968 if (!refcount_inc_not_zero(&delayed_nodes[i]->refs))
1969 delayed_nodes[i] = NULL;
1970 }
1971 spin_unlock(&root->inode_lock);
1972
1973 for (i = 0; i < n; i++) {
1974 if (!delayed_nodes[i])
1975 continue;
1976 __btrfs_kill_delayed_node(delayed_nodes[i]);
1977 btrfs_release_delayed_node(delayed_nodes[i]);
1978 }
1979 }
1980 }
1981
btrfs_destroy_delayed_inodes(struct btrfs_fs_info * fs_info)1982 void btrfs_destroy_delayed_inodes(struct btrfs_fs_info *fs_info)
1983 {
1984 struct btrfs_delayed_node *curr_node, *prev_node;
1985
1986 curr_node = btrfs_first_delayed_node(fs_info->delayed_root);
1987 while (curr_node) {
1988 __btrfs_kill_delayed_node(curr_node);
1989
1990 prev_node = curr_node;
1991 curr_node = btrfs_next_delayed_node(curr_node);
1992 btrfs_release_delayed_node(prev_node);
1993 }
1994 }
1995
1996