1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * fs/f2fs/segment.c
4 *
5 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
6 * http://www.samsung.com/
7 */
8 #include <linux/fs.h>
9 #include <linux/f2fs_fs.h>
10 #include <linux/bio.h>
11 #include <linux/blkdev.h>
12 #include <linux/prefetch.h>
13 #include <linux/kthread.h>
14 #include <linux/swap.h>
15 #include <linux/timer.h>
16 #include <linux/freezer.h>
17 #include <linux/sched/signal.h>
18
19 #include "f2fs.h"
20 #include "segment.h"
21 #include "node.h"
22 #include "gc.h"
23 #include <trace/events/f2fs.h>
24
25 #define __reverse_ffz(x) __reverse_ffs(~(x))
26
27 static struct kmem_cache *discard_entry_slab;
28 static struct kmem_cache *discard_cmd_slab;
29 static struct kmem_cache *sit_entry_set_slab;
30 static struct kmem_cache *inmem_entry_slab;
31
__reverse_ulong(unsigned char * str)32 static unsigned long __reverse_ulong(unsigned char *str)
33 {
34 unsigned long tmp = 0;
35 int shift = 24, idx = 0;
36
37 #if BITS_PER_LONG == 64
38 shift = 56;
39 #endif
40 while (shift >= 0) {
41 tmp |= (unsigned long)str[idx++] << shift;
42 shift -= BITS_PER_BYTE;
43 }
44 return tmp;
45 }
46
47 /*
48 * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
49 * MSB and LSB are reversed in a byte by f2fs_set_bit.
50 */
__reverse_ffs(unsigned long word)51 static inline unsigned long __reverse_ffs(unsigned long word)
52 {
53 int num = 0;
54
55 #if BITS_PER_LONG == 64
56 if ((word & 0xffffffff00000000UL) == 0)
57 num += 32;
58 else
59 word >>= 32;
60 #endif
61 if ((word & 0xffff0000) == 0)
62 num += 16;
63 else
64 word >>= 16;
65
66 if ((word & 0xff00) == 0)
67 num += 8;
68 else
69 word >>= 8;
70
71 if ((word & 0xf0) == 0)
72 num += 4;
73 else
74 word >>= 4;
75
76 if ((word & 0xc) == 0)
77 num += 2;
78 else
79 word >>= 2;
80
81 if ((word & 0x2) == 0)
82 num += 1;
83 return num;
84 }
85
86 /*
87 * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
88 * f2fs_set_bit makes MSB and LSB reversed in a byte.
89 * @size must be integral times of unsigned long.
90 * Example:
91 * MSB <--> LSB
92 * f2fs_set_bit(0, bitmap) => 1000 0000
93 * f2fs_set_bit(7, bitmap) => 0000 0001
94 */
__find_rev_next_bit(const unsigned long * addr,unsigned long size,unsigned long offset)95 static unsigned long __find_rev_next_bit(const unsigned long *addr,
96 unsigned long size, unsigned long offset)
97 {
98 const unsigned long *p = addr + BIT_WORD(offset);
99 unsigned long result = size;
100 unsigned long tmp;
101
102 if (offset >= size)
103 return size;
104
105 size -= (offset & ~(BITS_PER_LONG - 1));
106 offset %= BITS_PER_LONG;
107
108 while (1) {
109 if (*p == 0)
110 goto pass;
111
112 tmp = __reverse_ulong((unsigned char *)p);
113
114 tmp &= ~0UL >> offset;
115 if (size < BITS_PER_LONG)
116 tmp &= (~0UL << (BITS_PER_LONG - size));
117 if (tmp)
118 goto found;
119 pass:
120 if (size <= BITS_PER_LONG)
121 break;
122 size -= BITS_PER_LONG;
123 offset = 0;
124 p++;
125 }
126 return result;
127 found:
128 return result - size + __reverse_ffs(tmp);
129 }
130
__find_rev_next_zero_bit(const unsigned long * addr,unsigned long size,unsigned long offset)131 static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
132 unsigned long size, unsigned long offset)
133 {
134 const unsigned long *p = addr + BIT_WORD(offset);
135 unsigned long result = size;
136 unsigned long tmp;
137
138 if (offset >= size)
139 return size;
140
141 size -= (offset & ~(BITS_PER_LONG - 1));
142 offset %= BITS_PER_LONG;
143
144 while (1) {
145 if (*p == ~0UL)
146 goto pass;
147
148 tmp = __reverse_ulong((unsigned char *)p);
149
150 if (offset)
151 tmp |= ~0UL << (BITS_PER_LONG - offset);
152 if (size < BITS_PER_LONG)
153 tmp |= ~0UL >> size;
154 if (tmp != ~0UL)
155 goto found;
156 pass:
157 if (size <= BITS_PER_LONG)
158 break;
159 size -= BITS_PER_LONG;
160 offset = 0;
161 p++;
162 }
163 return result;
164 found:
165 return result - size + __reverse_ffz(tmp);
166 }
167
f2fs_need_SSR(struct f2fs_sb_info * sbi)168 bool f2fs_need_SSR(struct f2fs_sb_info *sbi)
169 {
170 int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
171 int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
172 int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);
173
174 if (f2fs_lfs_mode(sbi))
175 return false;
176 if (sbi->gc_mode == GC_URGENT_HIGH)
177 return true;
178 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
179 return true;
180
181 return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
182 SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
183 }
184
f2fs_register_inmem_page(struct inode * inode,struct page * page)185 void f2fs_register_inmem_page(struct inode *inode, struct page *page)
186 {
187 struct inmem_pages *new;
188
189 set_page_private_atomic(page);
190
191 new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
192
193 /* add atomic page indices to the list */
194 new->page = page;
195 INIT_LIST_HEAD(&new->list);
196
197 /* increase reference count with clean state */
198 get_page(page);
199 mutex_lock(&F2FS_I(inode)->inmem_lock);
200 list_add_tail(&new->list, &F2FS_I(inode)->inmem_pages);
201 inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
202 mutex_unlock(&F2FS_I(inode)->inmem_lock);
203
204 trace_f2fs_register_inmem_page(page, INMEM);
205 }
206
__revoke_inmem_pages(struct inode * inode,struct list_head * head,bool drop,bool recover,bool trylock)207 static int __revoke_inmem_pages(struct inode *inode,
208 struct list_head *head, bool drop, bool recover,
209 bool trylock)
210 {
211 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
212 struct inmem_pages *cur, *tmp;
213 int err = 0;
214
215 list_for_each_entry_safe(cur, tmp, head, list) {
216 struct page *page = cur->page;
217
218 if (drop)
219 trace_f2fs_commit_inmem_page(page, INMEM_DROP);
220
221 if (trylock) {
222 /*
223 * to avoid deadlock in between page lock and
224 * inmem_lock.
225 */
226 if (!trylock_page(page))
227 continue;
228 } else {
229 lock_page(page);
230 }
231
232 f2fs_wait_on_page_writeback(page, DATA, true, true);
233
234 if (recover) {
235 struct dnode_of_data dn;
236 struct node_info ni;
237
238 trace_f2fs_commit_inmem_page(page, INMEM_REVOKE);
239 retry:
240 set_new_dnode(&dn, inode, NULL, NULL, 0);
241 err = f2fs_get_dnode_of_data(&dn, page->index,
242 LOOKUP_NODE);
243 if (err) {
244 if (err == -ENOMEM) {
245 congestion_wait(BLK_RW_ASYNC,
246 DEFAULT_IO_TIMEOUT);
247 cond_resched();
248 goto retry;
249 }
250 err = -EAGAIN;
251 goto next;
252 }
253
254 err = f2fs_get_node_info(sbi, dn.nid, &ni, false);
255 if (err) {
256 f2fs_put_dnode(&dn);
257 return err;
258 }
259
260 if (cur->old_addr == NEW_ADDR) {
261 f2fs_invalidate_blocks(sbi, dn.data_blkaddr);
262 f2fs_update_data_blkaddr(&dn, NEW_ADDR);
263 } else
264 f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
265 cur->old_addr, ni.version, true, true);
266 f2fs_put_dnode(&dn);
267 }
268 next:
269 /* we don't need to invalidate this in the sccessful status */
270 if (drop || recover) {
271 ClearPageUptodate(page);
272 clear_page_private_gcing(page);
273 }
274 detach_page_private(page);
275 set_page_private(page, 0);
276 f2fs_put_page(page, 1);
277
278 list_del(&cur->list);
279 kmem_cache_free(inmem_entry_slab, cur);
280 dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
281 }
282 return err;
283 }
284
f2fs_drop_inmem_pages_all(struct f2fs_sb_info * sbi,bool gc_failure)285 void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure)
286 {
287 struct list_head *head = &sbi->inode_list[ATOMIC_FILE];
288 struct inode *inode;
289 struct f2fs_inode_info *fi;
290 unsigned int count = sbi->atomic_files;
291 unsigned int looped = 0;
292 next:
293 spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
294 if (list_empty(head)) {
295 spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
296 return;
297 }
298 fi = list_first_entry(head, struct f2fs_inode_info, inmem_ilist);
299 inode = igrab(&fi->vfs_inode);
300 if (inode)
301 list_move_tail(&fi->inmem_ilist, head);
302 spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
303
304 if (inode) {
305 if (gc_failure) {
306 if (!fi->i_gc_failures[GC_FAILURE_ATOMIC])
307 goto skip;
308 }
309 set_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
310 f2fs_drop_inmem_pages(inode);
311 skip:
312 iput(inode);
313 }
314 congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
315 cond_resched();
316 if (gc_failure) {
317 if (++looped >= count)
318 return;
319 }
320 goto next;
321 }
322
f2fs_drop_inmem_pages(struct inode * inode)323 void f2fs_drop_inmem_pages(struct inode *inode)
324 {
325 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
326 struct f2fs_inode_info *fi = F2FS_I(inode);
327
328 do {
329 mutex_lock(&fi->inmem_lock);
330 if (list_empty(&fi->inmem_pages)) {
331 fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0;
332
333 spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
334 if (!list_empty(&fi->inmem_ilist))
335 list_del_init(&fi->inmem_ilist);
336 if (f2fs_is_atomic_file(inode)) {
337 clear_inode_flag(inode, FI_ATOMIC_FILE);
338 sbi->atomic_files--;
339 }
340 spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
341
342 mutex_unlock(&fi->inmem_lock);
343 break;
344 }
345 __revoke_inmem_pages(inode, &fi->inmem_pages,
346 true, false, true);
347 mutex_unlock(&fi->inmem_lock);
348 } while (1);
349 }
350
f2fs_drop_inmem_page(struct inode * inode,struct page * page)351 void f2fs_drop_inmem_page(struct inode *inode, struct page *page)
352 {
353 struct f2fs_inode_info *fi = F2FS_I(inode);
354 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
355 struct list_head *head = &fi->inmem_pages;
356 struct inmem_pages *cur = NULL;
357 struct inmem_pages *tmp;
358
359 f2fs_bug_on(sbi, !page_private_atomic(page));
360
361 mutex_lock(&fi->inmem_lock);
362 list_for_each_entry(tmp, head, list) {
363 if (tmp->page == page) {
364 cur = tmp;
365 break;
366 }
367 }
368
369 f2fs_bug_on(sbi, !cur);
370 list_del(&cur->list);
371 mutex_unlock(&fi->inmem_lock);
372
373 dec_page_count(sbi, F2FS_INMEM_PAGES);
374 kmem_cache_free(inmem_entry_slab, cur);
375
376 ClearPageUptodate(page);
377 clear_page_private_atomic(page);
378 f2fs_put_page(page, 0);
379
380 detach_page_private(page);
381 set_page_private(page, 0);
382
383 trace_f2fs_commit_inmem_page(page, INMEM_INVALIDATE);
384 }
385
__f2fs_commit_inmem_pages(struct inode * inode)386 static int __f2fs_commit_inmem_pages(struct inode *inode)
387 {
388 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
389 struct f2fs_inode_info *fi = F2FS_I(inode);
390 struct inmem_pages *cur, *tmp;
391 struct f2fs_io_info fio = {
392 .sbi = sbi,
393 .ino = inode->i_ino,
394 .type = DATA,
395 .op = REQ_OP_WRITE,
396 .op_flags = REQ_SYNC | REQ_PRIO,
397 .io_type = FS_DATA_IO,
398 };
399 struct list_head revoke_list;
400 bool submit_bio = false;
401 int err = 0;
402
403 INIT_LIST_HEAD(&revoke_list);
404
405 list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) {
406 struct page *page = cur->page;
407
408 lock_page(page);
409 if (page->mapping == inode->i_mapping) {
410 trace_f2fs_commit_inmem_page(page, INMEM);
411
412 f2fs_wait_on_page_writeback(page, DATA, true, true);
413
414 set_page_dirty(page);
415 if (clear_page_dirty_for_io(page)) {
416 inode_dec_dirty_pages(inode);
417 f2fs_remove_dirty_inode(inode);
418 }
419 retry:
420 fio.page = page;
421 fio.old_blkaddr = NULL_ADDR;
422 fio.encrypted_page = NULL;
423 fio.need_lock = LOCK_DONE;
424 err = f2fs_do_write_data_page(&fio);
425 if (err) {
426 if (err == -ENOMEM) {
427 congestion_wait(BLK_RW_ASYNC,
428 DEFAULT_IO_TIMEOUT);
429 cond_resched();
430 goto retry;
431 }
432 unlock_page(page);
433 break;
434 }
435 /* record old blkaddr for revoking */
436 cur->old_addr = fio.old_blkaddr;
437 submit_bio = true;
438 }
439 unlock_page(page);
440 list_move_tail(&cur->list, &revoke_list);
441 }
442
443 if (submit_bio)
444 f2fs_submit_merged_write_cond(sbi, inode, NULL, 0, DATA);
445
446 if (err) {
447 /*
448 * try to revoke all committed pages, but still we could fail
449 * due to no memory or other reason, if that happened, EAGAIN
450 * will be returned, which means in such case, transaction is
451 * already not integrity, caller should use journal to do the
452 * recovery or rewrite & commit last transaction. For other
453 * error number, revoking was done by filesystem itself.
454 */
455 err = __revoke_inmem_pages(inode, &revoke_list,
456 false, true, false);
457
458 /* drop all uncommitted pages */
459 __revoke_inmem_pages(inode, &fi->inmem_pages,
460 true, false, false);
461 } else {
462 __revoke_inmem_pages(inode, &revoke_list,
463 false, false, false);
464 }
465
466 return err;
467 }
468
f2fs_commit_inmem_pages(struct inode * inode)469 int f2fs_commit_inmem_pages(struct inode *inode)
470 {
471 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
472 struct f2fs_inode_info *fi = F2FS_I(inode);
473 int err;
474
475 f2fs_balance_fs(sbi, true);
476
477 f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
478
479 f2fs_lock_op(sbi);
480 set_inode_flag(inode, FI_ATOMIC_COMMIT);
481
482 mutex_lock(&fi->inmem_lock);
483 err = __f2fs_commit_inmem_pages(inode);
484 mutex_unlock(&fi->inmem_lock);
485
486 clear_inode_flag(inode, FI_ATOMIC_COMMIT);
487
488 f2fs_unlock_op(sbi);
489 f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
490
491 return err;
492 }
493
494 /*
495 * This function balances dirty node and dentry pages.
496 * In addition, it controls garbage collection.
497 */
f2fs_balance_fs(struct f2fs_sb_info * sbi,bool need)498 void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
499 {
500 if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
501 f2fs_show_injection_info(sbi, FAULT_CHECKPOINT);
502 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_FAULT_INJECT);
503 }
504
505 /* balance_fs_bg is able to be pending */
506 if (need && excess_cached_nats(sbi))
507 f2fs_balance_fs_bg(sbi, false);
508
509 if (!f2fs_is_checkpoint_ready(sbi))
510 return;
511
512 /*
513 * We should do GC or end up with checkpoint, if there are so many dirty
514 * dir/node pages without enough free segments.
515 */
516 if (has_not_enough_free_secs(sbi, 0, 0)) {
517 if (test_opt(sbi, GC_MERGE) && sbi->gc_thread &&
518 sbi->gc_thread->f2fs_gc_task) {
519 DEFINE_WAIT(wait);
520
521 prepare_to_wait(&sbi->gc_thread->fggc_wq, &wait,
522 TASK_UNINTERRUPTIBLE);
523 wake_up(&sbi->gc_thread->gc_wait_queue_head);
524 io_schedule();
525 finish_wait(&sbi->gc_thread->fggc_wq, &wait);
526 } else {
527 f2fs_down_write(&sbi->gc_lock);
528 f2fs_gc(sbi, false, false, false, NULL_SEGNO);
529 }
530 }
531 }
532
f2fs_balance_fs_bg(struct f2fs_sb_info * sbi,bool from_bg)533 void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg)
534 {
535 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
536 return;
537
538 /* try to shrink extent cache when there is no enough memory */
539 if (!f2fs_available_free_memory(sbi, READ_EXTENT_CACHE))
540 f2fs_shrink_read_extent_tree(sbi,
541 READ_EXTENT_CACHE_SHRINK_NUMBER);
542
543 /* try to shrink age extent cache when there is no enough memory */
544 if (!f2fs_available_free_memory(sbi, AGE_EXTENT_CACHE))
545 f2fs_shrink_age_extent_tree(sbi,
546 AGE_EXTENT_CACHE_SHRINK_NUMBER);
547
548 /* check the # of cached NAT entries */
549 if (!f2fs_available_free_memory(sbi, NAT_ENTRIES))
550 f2fs_try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK);
551
552 if (!f2fs_available_free_memory(sbi, FREE_NIDS))
553 f2fs_try_to_free_nids(sbi, MAX_FREE_NIDS);
554 else
555 f2fs_build_free_nids(sbi, false, false);
556
557 if (excess_dirty_nats(sbi) || excess_dirty_nodes(sbi) ||
558 excess_prefree_segs(sbi))
559 goto do_sync;
560
561 /* there is background inflight IO or foreground operation recently */
562 if (is_inflight_io(sbi, REQ_TIME) ||
563 (!f2fs_time_over(sbi, REQ_TIME) && f2fs_rwsem_is_locked(&sbi->cp_rwsem)))
564 return;
565
566 /* exceed periodical checkpoint timeout threshold */
567 if (f2fs_time_over(sbi, CP_TIME))
568 goto do_sync;
569
570 /* checkpoint is the only way to shrink partial cached entries */
571 if (f2fs_available_free_memory(sbi, NAT_ENTRIES) ||
572 f2fs_available_free_memory(sbi, INO_ENTRIES))
573 return;
574
575 do_sync:
576 if (test_opt(sbi, DATA_FLUSH) && from_bg) {
577 struct blk_plug plug;
578
579 mutex_lock(&sbi->flush_lock);
580
581 blk_start_plug(&plug);
582 f2fs_sync_dirty_inodes(sbi, FILE_INODE, NULL);
583 blk_finish_plug(&plug);
584
585 mutex_unlock(&sbi->flush_lock);
586 }
587 f2fs_sync_fs(sbi->sb, true);
588 stat_inc_bg_cp_count(sbi->stat_info);
589 }
590
__submit_flush_wait(struct f2fs_sb_info * sbi,struct block_device * bdev)591 static int __submit_flush_wait(struct f2fs_sb_info *sbi,
592 struct block_device *bdev)
593 {
594 int ret = blkdev_issue_flush(bdev, GFP_NOFS);
595
596 trace_f2fs_issue_flush(bdev, test_opt(sbi, NOBARRIER),
597 test_opt(sbi, FLUSH_MERGE), ret);
598 return ret;
599 }
600
submit_flush_wait(struct f2fs_sb_info * sbi,nid_t ino)601 static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino)
602 {
603 int ret = 0;
604 int i;
605
606 if (!f2fs_is_multi_device(sbi))
607 return __submit_flush_wait(sbi, sbi->sb->s_bdev);
608
609 for (i = 0; i < sbi->s_ndevs; i++) {
610 if (!f2fs_is_dirty_device(sbi, ino, i, FLUSH_INO))
611 continue;
612 ret = __submit_flush_wait(sbi, FDEV(i).bdev);
613 if (ret)
614 break;
615 }
616 return ret;
617 }
618
issue_flush_thread(void * data)619 static int issue_flush_thread(void *data)
620 {
621 struct f2fs_sb_info *sbi = data;
622 struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
623 wait_queue_head_t *q = &fcc->flush_wait_queue;
624 repeat:
625 if (kthread_should_stop())
626 return 0;
627
628 if (!llist_empty(&fcc->issue_list)) {
629 struct flush_cmd *cmd, *next;
630 int ret;
631
632 fcc->dispatch_list = llist_del_all(&fcc->issue_list);
633 fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
634
635 cmd = llist_entry(fcc->dispatch_list, struct flush_cmd, llnode);
636
637 ret = submit_flush_wait(sbi, cmd->ino);
638 atomic_inc(&fcc->issued_flush);
639
640 llist_for_each_entry_safe(cmd, next,
641 fcc->dispatch_list, llnode) {
642 cmd->ret = ret;
643 complete(&cmd->wait);
644 }
645 fcc->dispatch_list = NULL;
646 }
647
648 wait_event_interruptible(*q,
649 kthread_should_stop() || !llist_empty(&fcc->issue_list));
650 goto repeat;
651 }
652
f2fs_issue_flush(struct f2fs_sb_info * sbi,nid_t ino)653 int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
654 {
655 struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
656 struct flush_cmd cmd;
657 int ret;
658
659 if (test_opt(sbi, NOBARRIER))
660 return 0;
661
662 if (!test_opt(sbi, FLUSH_MERGE)) {
663 atomic_inc(&fcc->queued_flush);
664 ret = submit_flush_wait(sbi, ino);
665 atomic_dec(&fcc->queued_flush);
666 atomic_inc(&fcc->issued_flush);
667 return ret;
668 }
669
670 if (atomic_inc_return(&fcc->queued_flush) == 1 ||
671 f2fs_is_multi_device(sbi)) {
672 ret = submit_flush_wait(sbi, ino);
673 atomic_dec(&fcc->queued_flush);
674
675 atomic_inc(&fcc->issued_flush);
676 return ret;
677 }
678
679 cmd.ino = ino;
680 init_completion(&cmd.wait);
681
682 llist_add(&cmd.llnode, &fcc->issue_list);
683
684 /*
685 * update issue_list before we wake up issue_flush thread, this
686 * smp_mb() pairs with another barrier in ___wait_event(), see
687 * more details in comments of waitqueue_active().
688 */
689 smp_mb();
690
691 if (waitqueue_active(&fcc->flush_wait_queue))
692 wake_up(&fcc->flush_wait_queue);
693
694 if (fcc->f2fs_issue_flush) {
695 wait_for_completion(&cmd.wait);
696 atomic_dec(&fcc->queued_flush);
697 } else {
698 struct llist_node *list;
699
700 list = llist_del_all(&fcc->issue_list);
701 if (!list) {
702 wait_for_completion(&cmd.wait);
703 atomic_dec(&fcc->queued_flush);
704 } else {
705 struct flush_cmd *tmp, *next;
706
707 ret = submit_flush_wait(sbi, ino);
708
709 llist_for_each_entry_safe(tmp, next, list, llnode) {
710 if (tmp == &cmd) {
711 cmd.ret = ret;
712 atomic_dec(&fcc->queued_flush);
713 continue;
714 }
715 tmp->ret = ret;
716 complete(&tmp->wait);
717 }
718 }
719 }
720
721 return cmd.ret;
722 }
723
f2fs_create_flush_cmd_control(struct f2fs_sb_info * sbi)724 int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi)
725 {
726 dev_t dev = sbi->sb->s_bdev->bd_dev;
727 struct flush_cmd_control *fcc;
728 int err = 0;
729
730 if (SM_I(sbi)->fcc_info) {
731 fcc = SM_I(sbi)->fcc_info;
732 if (fcc->f2fs_issue_flush)
733 return err;
734 goto init_thread;
735 }
736
737 fcc = f2fs_kzalloc(sbi, sizeof(struct flush_cmd_control), GFP_KERNEL);
738 if (!fcc)
739 return -ENOMEM;
740 atomic_set(&fcc->issued_flush, 0);
741 atomic_set(&fcc->queued_flush, 0);
742 init_waitqueue_head(&fcc->flush_wait_queue);
743 init_llist_head(&fcc->issue_list);
744 SM_I(sbi)->fcc_info = fcc;
745 if (!test_opt(sbi, FLUSH_MERGE))
746 return err;
747
748 init_thread:
749 fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
750 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
751 if (IS_ERR(fcc->f2fs_issue_flush)) {
752 err = PTR_ERR(fcc->f2fs_issue_flush);
753 kfree(fcc);
754 SM_I(sbi)->fcc_info = NULL;
755 return err;
756 }
757
758 return err;
759 }
760
f2fs_destroy_flush_cmd_control(struct f2fs_sb_info * sbi,bool free)761 void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
762 {
763 struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
764
765 if (fcc && fcc->f2fs_issue_flush) {
766 struct task_struct *flush_thread = fcc->f2fs_issue_flush;
767
768 fcc->f2fs_issue_flush = NULL;
769 kthread_stop(flush_thread);
770 }
771 if (free) {
772 kfree(fcc);
773 SM_I(sbi)->fcc_info = NULL;
774 }
775 }
776
f2fs_flush_device_cache(struct f2fs_sb_info * sbi)777 int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
778 {
779 int ret = 0, i;
780
781 if (!f2fs_is_multi_device(sbi))
782 return 0;
783
784 if (test_opt(sbi, NOBARRIER))
785 return 0;
786
787 for (i = 1; i < sbi->s_ndevs; i++) {
788 if (!f2fs_test_bit(i, (char *)&sbi->dirty_device))
789 continue;
790 ret = __submit_flush_wait(sbi, FDEV(i).bdev);
791 if (ret) {
792 f2fs_stop_checkpoint(sbi, false,
793 STOP_CP_REASON_FLUSH_FAIL);
794 break;
795 }
796
797 spin_lock(&sbi->dev_lock);
798 f2fs_clear_bit(i, (char *)&sbi->dirty_device);
799 spin_unlock(&sbi->dev_lock);
800 }
801
802 return ret;
803 }
804
__locate_dirty_segment(struct f2fs_sb_info * sbi,unsigned int segno,enum dirty_type dirty_type)805 static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
806 enum dirty_type dirty_type)
807 {
808 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
809
810 /* need not be added */
811 if (IS_CURSEG(sbi, segno))
812 return;
813
814 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
815 dirty_i->nr_dirty[dirty_type]++;
816
817 if (dirty_type == DIRTY) {
818 struct seg_entry *sentry = get_seg_entry(sbi, segno);
819 enum dirty_type t = sentry->type;
820
821 if (unlikely(t >= DIRTY)) {
822 f2fs_bug_on(sbi, 1);
823 return;
824 }
825 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
826 dirty_i->nr_dirty[t]++;
827
828 if (__is_large_section(sbi)) {
829 unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
830 block_t valid_blocks =
831 get_valid_blocks(sbi, segno, true);
832
833 f2fs_bug_on(sbi, unlikely(!valid_blocks ||
834 valid_blocks == BLKS_PER_SEC(sbi)));
835
836 if (!IS_CURSEC(sbi, secno))
837 set_bit(secno, dirty_i->dirty_secmap);
838 }
839 }
840 }
841
__remove_dirty_segment(struct f2fs_sb_info * sbi,unsigned int segno,enum dirty_type dirty_type)842 static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
843 enum dirty_type dirty_type)
844 {
845 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
846 block_t valid_blocks;
847
848 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
849 dirty_i->nr_dirty[dirty_type]--;
850
851 if (dirty_type == DIRTY) {
852 struct seg_entry *sentry = get_seg_entry(sbi, segno);
853 enum dirty_type t = sentry->type;
854
855 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
856 dirty_i->nr_dirty[t]--;
857
858 valid_blocks = get_valid_blocks(sbi, segno, true);
859 if (valid_blocks == 0) {
860 clear_bit(GET_SEC_FROM_SEG(sbi, segno),
861 dirty_i->victim_secmap);
862 #ifdef CONFIG_F2FS_CHECK_FS
863 clear_bit(segno, SIT_I(sbi)->invalid_segmap);
864 #endif
865 }
866 if (__is_large_section(sbi)) {
867 unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
868
869 if (!valid_blocks ||
870 valid_blocks == BLKS_PER_SEC(sbi)) {
871 clear_bit(secno, dirty_i->dirty_secmap);
872 return;
873 }
874
875 if (!IS_CURSEC(sbi, secno))
876 set_bit(secno, dirty_i->dirty_secmap);
877 }
878 }
879 }
880
881 /*
882 * Should not occur error such as -ENOMEM.
883 * Adding dirty entry into seglist is not critical operation.
884 * If a given segment is one of current working segments, it won't be added.
885 */
locate_dirty_segment(struct f2fs_sb_info * sbi,unsigned int segno)886 static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
887 {
888 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
889 unsigned short valid_blocks, ckpt_valid_blocks;
890 unsigned int usable_blocks;
891
892 if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
893 return;
894
895 usable_blocks = f2fs_usable_blks_in_seg(sbi, segno);
896 mutex_lock(&dirty_i->seglist_lock);
897
898 valid_blocks = get_valid_blocks(sbi, segno, false);
899 ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno, false);
900
901 if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) ||
902 ckpt_valid_blocks == usable_blocks)) {
903 __locate_dirty_segment(sbi, segno, PRE);
904 __remove_dirty_segment(sbi, segno, DIRTY);
905 } else if (valid_blocks < usable_blocks) {
906 __locate_dirty_segment(sbi, segno, DIRTY);
907 } else {
908 /* Recovery routine with SSR needs this */
909 __remove_dirty_segment(sbi, segno, DIRTY);
910 }
911
912 mutex_unlock(&dirty_i->seglist_lock);
913 }
914
915 /* This moves currently empty dirty blocks to prefree. Must hold seglist_lock */
f2fs_dirty_to_prefree(struct f2fs_sb_info * sbi)916 void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi)
917 {
918 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
919 unsigned int segno;
920
921 mutex_lock(&dirty_i->seglist_lock);
922 for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
923 if (get_valid_blocks(sbi, segno, false))
924 continue;
925 if (IS_CURSEG(sbi, segno))
926 continue;
927 __locate_dirty_segment(sbi, segno, PRE);
928 __remove_dirty_segment(sbi, segno, DIRTY);
929 }
930 mutex_unlock(&dirty_i->seglist_lock);
931 }
932
f2fs_get_unusable_blocks(struct f2fs_sb_info * sbi)933 block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi)
934 {
935 int ovp_hole_segs =
936 (overprovision_segments(sbi) - reserved_segments(sbi));
937 block_t ovp_holes = ovp_hole_segs << sbi->log_blocks_per_seg;
938 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
939 block_t holes[2] = {0, 0}; /* DATA and NODE */
940 block_t unusable;
941 struct seg_entry *se;
942 unsigned int segno;
943
944 mutex_lock(&dirty_i->seglist_lock);
945 for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
946 se = get_seg_entry(sbi, segno);
947 if (IS_NODESEG(se->type))
948 holes[NODE] += f2fs_usable_blks_in_seg(sbi, segno) -
949 se->valid_blocks;
950 else
951 holes[DATA] += f2fs_usable_blks_in_seg(sbi, segno) -
952 se->valid_blocks;
953 }
954 mutex_unlock(&dirty_i->seglist_lock);
955
956 unusable = holes[DATA] > holes[NODE] ? holes[DATA] : holes[NODE];
957 if (unusable > ovp_holes)
958 return unusable - ovp_holes;
959 return 0;
960 }
961
f2fs_disable_cp_again(struct f2fs_sb_info * sbi,block_t unusable)962 int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable)
963 {
964 int ovp_hole_segs =
965 (overprovision_segments(sbi) - reserved_segments(sbi));
966 if (unusable > F2FS_OPTION(sbi).unusable_cap)
967 return -EAGAIN;
968 if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) &&
969 dirty_segments(sbi) > ovp_hole_segs)
970 return -EAGAIN;
971 return 0;
972 }
973
974 /* This is only used by SBI_CP_DISABLED */
get_free_segment(struct f2fs_sb_info * sbi)975 static unsigned int get_free_segment(struct f2fs_sb_info *sbi)
976 {
977 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
978 unsigned int segno = 0;
979
980 mutex_lock(&dirty_i->seglist_lock);
981 for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
982 if (get_valid_blocks(sbi, segno, false))
983 continue;
984 if (get_ckpt_valid_blocks(sbi, segno, false))
985 continue;
986 mutex_unlock(&dirty_i->seglist_lock);
987 return segno;
988 }
989 mutex_unlock(&dirty_i->seglist_lock);
990 return NULL_SEGNO;
991 }
992
__create_discard_cmd(struct f2fs_sb_info * sbi,struct block_device * bdev,block_t lstart,block_t start,block_t len)993 static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
994 struct block_device *bdev, block_t lstart,
995 block_t start, block_t len)
996 {
997 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
998 struct list_head *pend_list;
999 struct discard_cmd *dc;
1000
1001 f2fs_bug_on(sbi, !len);
1002
1003 pend_list = &dcc->pend_list[plist_idx(len)];
1004
1005 dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS);
1006 INIT_LIST_HEAD(&dc->list);
1007 dc->bdev = bdev;
1008 dc->lstart = lstart;
1009 dc->start = start;
1010 dc->len = len;
1011 dc->ref = 0;
1012 dc->state = D_PREP;
1013 dc->queued = 0;
1014 dc->error = 0;
1015 init_completion(&dc->wait);
1016 list_add_tail(&dc->list, pend_list);
1017 spin_lock_init(&dc->lock);
1018 dc->bio_ref = 0;
1019 atomic_inc(&dcc->discard_cmd_cnt);
1020 dcc->undiscard_blks += len;
1021
1022 return dc;
1023 }
1024
__attach_discard_cmd(struct f2fs_sb_info * sbi,struct block_device * bdev,block_t lstart,block_t start,block_t len,struct rb_node * parent,struct rb_node ** p,bool leftmost)1025 static struct discard_cmd *__attach_discard_cmd(struct f2fs_sb_info *sbi,
1026 struct block_device *bdev, block_t lstart,
1027 block_t start, block_t len,
1028 struct rb_node *parent, struct rb_node **p,
1029 bool leftmost)
1030 {
1031 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1032 struct discard_cmd *dc;
1033
1034 dc = __create_discard_cmd(sbi, bdev, lstart, start, len);
1035
1036 rb_link_node(&dc->rb_node, parent, p);
1037 rb_insert_color_cached(&dc->rb_node, &dcc->root, leftmost);
1038
1039 return dc;
1040 }
1041
__detach_discard_cmd(struct discard_cmd_control * dcc,struct discard_cmd * dc)1042 static void __detach_discard_cmd(struct discard_cmd_control *dcc,
1043 struct discard_cmd *dc)
1044 {
1045 if (dc->state == D_DONE)
1046 atomic_sub(dc->queued, &dcc->queued_discard);
1047
1048 list_del(&dc->list);
1049 rb_erase_cached(&dc->rb_node, &dcc->root);
1050 dcc->undiscard_blks -= dc->len;
1051
1052 kmem_cache_free(discard_cmd_slab, dc);
1053
1054 atomic_dec(&dcc->discard_cmd_cnt);
1055 }
1056
__remove_discard_cmd(struct f2fs_sb_info * sbi,struct discard_cmd * dc)1057 static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
1058 struct discard_cmd *dc)
1059 {
1060 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1061 unsigned long flags;
1062
1063 trace_f2fs_remove_discard(dc->bdev, dc->start, dc->len);
1064
1065 spin_lock_irqsave(&dc->lock, flags);
1066 if (dc->bio_ref) {
1067 spin_unlock_irqrestore(&dc->lock, flags);
1068 return;
1069 }
1070 spin_unlock_irqrestore(&dc->lock, flags);
1071
1072 f2fs_bug_on(sbi, dc->ref);
1073
1074 if (dc->error == -EOPNOTSUPP)
1075 dc->error = 0;
1076
1077 if (dc->error)
1078 printk_ratelimited(
1079 "%sF2FS-fs (%s): Issue discard(%u, %u, %u) failed, ret: %d",
1080 KERN_INFO, sbi->sb->s_id,
1081 dc->lstart, dc->start, dc->len, dc->error);
1082 __detach_discard_cmd(dcc, dc);
1083 }
1084
f2fs_submit_discard_endio(struct bio * bio)1085 static void f2fs_submit_discard_endio(struct bio *bio)
1086 {
1087 struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
1088 unsigned long flags;
1089
1090 spin_lock_irqsave(&dc->lock, flags);
1091 if (!dc->error)
1092 dc->error = blk_status_to_errno(bio->bi_status);
1093 dc->bio_ref--;
1094 if (!dc->bio_ref && dc->state == D_SUBMIT) {
1095 dc->state = D_DONE;
1096 complete_all(&dc->wait);
1097 }
1098 spin_unlock_irqrestore(&dc->lock, flags);
1099 bio_put(bio);
1100 }
1101
__check_sit_bitmap(struct f2fs_sb_info * sbi,block_t start,block_t end)1102 static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
1103 block_t start, block_t end)
1104 {
1105 #ifdef CONFIG_F2FS_CHECK_FS
1106 struct seg_entry *sentry;
1107 unsigned int segno;
1108 block_t blk = start;
1109 unsigned long offset, size, max_blocks = sbi->blocks_per_seg;
1110 unsigned long *map;
1111
1112 while (blk < end) {
1113 segno = GET_SEGNO(sbi, blk);
1114 sentry = get_seg_entry(sbi, segno);
1115 offset = GET_BLKOFF_FROM_SEG0(sbi, blk);
1116
1117 if (end < START_BLOCK(sbi, segno + 1))
1118 size = GET_BLKOFF_FROM_SEG0(sbi, end);
1119 else
1120 size = max_blocks;
1121 map = (unsigned long *)(sentry->cur_valid_map);
1122 offset = __find_rev_next_bit(map, size, offset);
1123 f2fs_bug_on(sbi, offset != size);
1124 blk = START_BLOCK(sbi, segno + 1);
1125 }
1126 #endif
1127 }
1128
__init_discard_policy(struct f2fs_sb_info * sbi,struct discard_policy * dpolicy,int discard_type,unsigned int granularity)1129 static void __init_discard_policy(struct f2fs_sb_info *sbi,
1130 struct discard_policy *dpolicy,
1131 int discard_type, unsigned int granularity)
1132 {
1133 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1134
1135 /* common policy */
1136 dpolicy->type = discard_type;
1137 dpolicy->sync = true;
1138 dpolicy->ordered = false;
1139 dpolicy->granularity = granularity;
1140
1141 dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
1142 dpolicy->io_aware_gran = MAX_PLIST_NUM;
1143 dpolicy->timeout = false;
1144
1145 if (discard_type == DPOLICY_BG) {
1146 dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
1147 dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
1148 dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
1149 dpolicy->io_aware = true;
1150 dpolicy->sync = false;
1151 dpolicy->ordered = true;
1152 if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
1153 dpolicy->granularity = 1;
1154 if (atomic_read(&dcc->discard_cmd_cnt))
1155 dpolicy->max_interval =
1156 DEF_MIN_DISCARD_ISSUE_TIME;
1157 }
1158 } else if (discard_type == DPOLICY_FORCE) {
1159 dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
1160 dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
1161 dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
1162 dpolicy->io_aware = false;
1163 } else if (discard_type == DPOLICY_FSTRIM) {
1164 dpolicy->io_aware = false;
1165 } else if (discard_type == DPOLICY_UMOUNT) {
1166 dpolicy->io_aware = false;
1167 /* we need to issue all to keep CP_TRIMMED_FLAG */
1168 dpolicy->granularity = 1;
1169 dpolicy->timeout = true;
1170 }
1171 }
1172
1173 static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
1174 struct block_device *bdev, block_t lstart,
1175 block_t start, block_t len);
1176 /* this function is copied from blkdev_issue_discard from block/blk-lib.c */
__submit_discard_cmd(struct f2fs_sb_info * sbi,struct discard_policy * dpolicy,struct discard_cmd * dc,unsigned int * issued)1177 static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
1178 struct discard_policy *dpolicy,
1179 struct discard_cmd *dc,
1180 unsigned int *issued)
1181 {
1182 struct block_device *bdev = dc->bdev;
1183 struct request_queue *q = bdev_get_queue(bdev);
1184 unsigned int max_discard_blocks =
1185 SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
1186 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1187 struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
1188 &(dcc->fstrim_list) : &(dcc->wait_list);
1189 int flag = dpolicy->sync ? REQ_SYNC : 0;
1190 block_t lstart, start, len, total_len;
1191 int err = 0;
1192
1193 if (dc->state != D_PREP)
1194 return 0;
1195
1196 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
1197 return 0;
1198
1199 trace_f2fs_issue_discard(bdev, dc->start, dc->len);
1200
1201 lstart = dc->lstart;
1202 start = dc->start;
1203 len = dc->len;
1204 total_len = len;
1205
1206 dc->len = 0;
1207
1208 while (total_len && *issued < dpolicy->max_requests && !err) {
1209 struct bio *bio = NULL;
1210 unsigned long flags;
1211 bool last = true;
1212
1213 if (len > max_discard_blocks) {
1214 len = max_discard_blocks;
1215 last = false;
1216 }
1217
1218 (*issued)++;
1219 if (*issued == dpolicy->max_requests)
1220 last = true;
1221
1222 dc->len += len;
1223
1224 if (time_to_inject(sbi, FAULT_DISCARD)) {
1225 f2fs_show_injection_info(sbi, FAULT_DISCARD);
1226 err = -EIO;
1227 goto submit;
1228 }
1229 err = __blkdev_issue_discard(bdev,
1230 SECTOR_FROM_BLOCK(start),
1231 SECTOR_FROM_BLOCK(len),
1232 GFP_NOFS, 0, &bio);
1233 submit:
1234 if (err) {
1235 spin_lock_irqsave(&dc->lock, flags);
1236 if (dc->state == D_PARTIAL)
1237 dc->state = D_SUBMIT;
1238 spin_unlock_irqrestore(&dc->lock, flags);
1239
1240 break;
1241 }
1242
1243 f2fs_bug_on(sbi, !bio);
1244
1245 /*
1246 * should keep before submission to avoid D_DONE
1247 * right away
1248 */
1249 spin_lock_irqsave(&dc->lock, flags);
1250 if (last)
1251 dc->state = D_SUBMIT;
1252 else
1253 dc->state = D_PARTIAL;
1254 dc->bio_ref++;
1255 spin_unlock_irqrestore(&dc->lock, flags);
1256
1257 atomic_inc(&dcc->queued_discard);
1258 dc->queued++;
1259 list_move_tail(&dc->list, wait_list);
1260
1261 /* sanity check on discard range */
1262 __check_sit_bitmap(sbi, lstart, lstart + len);
1263
1264 bio->bi_private = dc;
1265 bio->bi_end_io = f2fs_submit_discard_endio;
1266 bio->bi_opf |= flag;
1267 submit_bio(bio);
1268
1269 atomic_inc(&dcc->issued_discard);
1270
1271 f2fs_update_iostat(sbi, FS_DISCARD, 1);
1272
1273 lstart += len;
1274 start += len;
1275 total_len -= len;
1276 len = total_len;
1277 }
1278
1279 if (!err && len) {
1280 dcc->undiscard_blks -= len;
1281 __update_discard_tree_range(sbi, bdev, lstart, start, len);
1282 }
1283 return err;
1284 }
1285
__insert_discard_tree(struct f2fs_sb_info * sbi,struct block_device * bdev,block_t lstart,block_t start,block_t len,struct rb_node ** insert_p,struct rb_node * insert_parent)1286 static void __insert_discard_tree(struct f2fs_sb_info *sbi,
1287 struct block_device *bdev, block_t lstart,
1288 block_t start, block_t len,
1289 struct rb_node **insert_p,
1290 struct rb_node *insert_parent)
1291 {
1292 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1293 struct rb_node **p;
1294 struct rb_node *parent = NULL;
1295 bool leftmost = true;
1296
1297 if (insert_p && insert_parent) {
1298 parent = insert_parent;
1299 p = insert_p;
1300 goto do_insert;
1301 }
1302
1303 p = f2fs_lookup_rb_tree_for_insert(sbi, &dcc->root, &parent,
1304 lstart, &leftmost);
1305 do_insert:
1306 __attach_discard_cmd(sbi, bdev, lstart, start, len, parent,
1307 p, leftmost);
1308 }
1309
__relocate_discard_cmd(struct discard_cmd_control * dcc,struct discard_cmd * dc)1310 static void __relocate_discard_cmd(struct discard_cmd_control *dcc,
1311 struct discard_cmd *dc)
1312 {
1313 list_move_tail(&dc->list, &dcc->pend_list[plist_idx(dc->len)]);
1314 }
1315
__punch_discard_cmd(struct f2fs_sb_info * sbi,struct discard_cmd * dc,block_t blkaddr)1316 static void __punch_discard_cmd(struct f2fs_sb_info *sbi,
1317 struct discard_cmd *dc, block_t blkaddr)
1318 {
1319 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1320 struct discard_info di = dc->di;
1321 bool modified = false;
1322
1323 if (dc->state == D_DONE || dc->len == 1) {
1324 __remove_discard_cmd(sbi, dc);
1325 return;
1326 }
1327
1328 dcc->undiscard_blks -= di.len;
1329
1330 if (blkaddr > di.lstart) {
1331 dc->len = blkaddr - dc->lstart;
1332 dcc->undiscard_blks += dc->len;
1333 __relocate_discard_cmd(dcc, dc);
1334 modified = true;
1335 }
1336
1337 if (blkaddr < di.lstart + di.len - 1) {
1338 if (modified) {
1339 __insert_discard_tree(sbi, dc->bdev, blkaddr + 1,
1340 di.start + blkaddr + 1 - di.lstart,
1341 di.lstart + di.len - 1 - blkaddr,
1342 NULL, NULL);
1343 } else {
1344 dc->lstart++;
1345 dc->len--;
1346 dc->start++;
1347 dcc->undiscard_blks += dc->len;
1348 __relocate_discard_cmd(dcc, dc);
1349 }
1350 }
1351 }
1352
__update_discard_tree_range(struct f2fs_sb_info * sbi,struct block_device * bdev,block_t lstart,block_t start,block_t len)1353 static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
1354 struct block_device *bdev, block_t lstart,
1355 block_t start, block_t len)
1356 {
1357 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1358 struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
1359 struct discard_cmd *dc;
1360 struct discard_info di = {0};
1361 struct rb_node **insert_p = NULL, *insert_parent = NULL;
1362 struct request_queue *q = bdev_get_queue(bdev);
1363 unsigned int max_discard_blocks =
1364 SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
1365 block_t end = lstart + len;
1366
1367 dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
1368 NULL, lstart,
1369 (struct rb_entry **)&prev_dc,
1370 (struct rb_entry **)&next_dc,
1371 &insert_p, &insert_parent, true, NULL);
1372 if (dc)
1373 prev_dc = dc;
1374
1375 if (!prev_dc) {
1376 di.lstart = lstart;
1377 di.len = next_dc ? next_dc->lstart - lstart : len;
1378 di.len = min(di.len, len);
1379 di.start = start;
1380 }
1381
1382 while (1) {
1383 struct rb_node *node;
1384 bool merged = false;
1385 struct discard_cmd *tdc = NULL;
1386
1387 if (prev_dc) {
1388 di.lstart = prev_dc->lstart + prev_dc->len;
1389 if (di.lstart < lstart)
1390 di.lstart = lstart;
1391 if (di.lstart >= end)
1392 break;
1393
1394 if (!next_dc || next_dc->lstart > end)
1395 di.len = end - di.lstart;
1396 else
1397 di.len = next_dc->lstart - di.lstart;
1398 di.start = start + di.lstart - lstart;
1399 }
1400
1401 if (!di.len)
1402 goto next;
1403
1404 if (prev_dc && prev_dc->state == D_PREP &&
1405 prev_dc->bdev == bdev &&
1406 __is_discard_back_mergeable(&di, &prev_dc->di,
1407 max_discard_blocks)) {
1408 prev_dc->di.len += di.len;
1409 dcc->undiscard_blks += di.len;
1410 __relocate_discard_cmd(dcc, prev_dc);
1411 di = prev_dc->di;
1412 tdc = prev_dc;
1413 merged = true;
1414 }
1415
1416 if (next_dc && next_dc->state == D_PREP &&
1417 next_dc->bdev == bdev &&
1418 __is_discard_front_mergeable(&di, &next_dc->di,
1419 max_discard_blocks)) {
1420 next_dc->di.lstart = di.lstart;
1421 next_dc->di.len += di.len;
1422 next_dc->di.start = di.start;
1423 dcc->undiscard_blks += di.len;
1424 __relocate_discard_cmd(dcc, next_dc);
1425 if (tdc)
1426 __remove_discard_cmd(sbi, tdc);
1427 merged = true;
1428 }
1429
1430 if (!merged) {
1431 __insert_discard_tree(sbi, bdev, di.lstart, di.start,
1432 di.len, NULL, NULL);
1433 }
1434 next:
1435 prev_dc = next_dc;
1436 if (!prev_dc)
1437 break;
1438
1439 node = rb_next(&prev_dc->rb_node);
1440 next_dc = rb_entry_safe(node, struct discard_cmd, rb_node);
1441 }
1442 }
1443
__queue_discard_cmd(struct f2fs_sb_info * sbi,struct block_device * bdev,block_t blkstart,block_t blklen)1444 static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
1445 struct block_device *bdev, block_t blkstart, block_t blklen)
1446 {
1447 block_t lblkstart = blkstart;
1448
1449 if (!f2fs_bdev_support_discard(bdev))
1450 return 0;
1451
1452 trace_f2fs_queue_discard(bdev, blkstart, blklen);
1453
1454 if (f2fs_is_multi_device(sbi)) {
1455 int devi = f2fs_target_device_index(sbi, blkstart);
1456
1457 blkstart -= FDEV(devi).start_blk;
1458 }
1459 mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock);
1460 __update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen);
1461 mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock);
1462 return 0;
1463 }
1464
__issue_discard_cmd_orderly(struct f2fs_sb_info * sbi,struct discard_policy * dpolicy)1465 static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
1466 struct discard_policy *dpolicy)
1467 {
1468 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1469 struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
1470 struct rb_node **insert_p = NULL, *insert_parent = NULL;
1471 struct discard_cmd *dc;
1472 struct blk_plug plug;
1473 unsigned int pos = dcc->next_pos;
1474 unsigned int issued = 0;
1475 bool io_interrupted = false;
1476
1477 mutex_lock(&dcc->cmd_lock);
1478 dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
1479 NULL, pos,
1480 (struct rb_entry **)&prev_dc,
1481 (struct rb_entry **)&next_dc,
1482 &insert_p, &insert_parent, true, NULL);
1483 if (!dc)
1484 dc = next_dc;
1485
1486 blk_start_plug(&plug);
1487
1488 while (dc) {
1489 struct rb_node *node;
1490 int err = 0;
1491
1492 if (dc->state != D_PREP)
1493 goto next;
1494
1495 if (dpolicy->io_aware && !is_idle(sbi, DISCARD_TIME)) {
1496 io_interrupted = true;
1497 break;
1498 }
1499
1500 dcc->next_pos = dc->lstart + dc->len;
1501 err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
1502
1503 if (issued >= dpolicy->max_requests)
1504 break;
1505 next:
1506 node = rb_next(&dc->rb_node);
1507 if (err)
1508 __remove_discard_cmd(sbi, dc);
1509 dc = rb_entry_safe(node, struct discard_cmd, rb_node);
1510 }
1511
1512 blk_finish_plug(&plug);
1513
1514 if (!dc)
1515 dcc->next_pos = 0;
1516
1517 mutex_unlock(&dcc->cmd_lock);
1518
1519 if (!issued && io_interrupted)
1520 issued = -1;
1521
1522 return issued;
1523 }
1524 static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
1525 struct discard_policy *dpolicy);
1526
__issue_discard_cmd(struct f2fs_sb_info * sbi,struct discard_policy * dpolicy)1527 static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
1528 struct discard_policy *dpolicy)
1529 {
1530 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1531 struct list_head *pend_list;
1532 struct discard_cmd *dc, *tmp;
1533 struct blk_plug plug;
1534 int i, issued;
1535 bool io_interrupted = false;
1536
1537 if (dpolicy->timeout)
1538 f2fs_update_time(sbi, UMOUNT_DISCARD_TIMEOUT);
1539
1540 retry:
1541 issued = 0;
1542 for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
1543 if (dpolicy->timeout &&
1544 f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT))
1545 break;
1546
1547 if (i + 1 < dpolicy->granularity)
1548 break;
1549
1550 if (i + 1 < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered)
1551 return __issue_discard_cmd_orderly(sbi, dpolicy);
1552
1553 pend_list = &dcc->pend_list[i];
1554
1555 mutex_lock(&dcc->cmd_lock);
1556 if (list_empty(pend_list))
1557 goto next;
1558 if (unlikely(dcc->rbtree_check))
1559 f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
1560 &dcc->root, false));
1561 blk_start_plug(&plug);
1562 list_for_each_entry_safe(dc, tmp, pend_list, list) {
1563 f2fs_bug_on(sbi, dc->state != D_PREP);
1564
1565 if (dpolicy->timeout &&
1566 f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT))
1567 break;
1568
1569 if (dpolicy->io_aware && i < dpolicy->io_aware_gran &&
1570 !is_idle(sbi, DISCARD_TIME)) {
1571 io_interrupted = true;
1572 break;
1573 }
1574
1575 __submit_discard_cmd(sbi, dpolicy, dc, &issued);
1576
1577 if (issued >= dpolicy->max_requests)
1578 break;
1579 }
1580 blk_finish_plug(&plug);
1581 next:
1582 mutex_unlock(&dcc->cmd_lock);
1583
1584 if (issued >= dpolicy->max_requests || io_interrupted)
1585 break;
1586 }
1587
1588 if (dpolicy->type == DPOLICY_UMOUNT && issued) {
1589 __wait_all_discard_cmd(sbi, dpolicy);
1590 goto retry;
1591 }
1592
1593 if (!issued && io_interrupted)
1594 issued = -1;
1595
1596 return issued;
1597 }
1598
__drop_discard_cmd(struct f2fs_sb_info * sbi)1599 static bool __drop_discard_cmd(struct f2fs_sb_info *sbi)
1600 {
1601 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1602 struct list_head *pend_list;
1603 struct discard_cmd *dc, *tmp;
1604 int i;
1605 bool dropped = false;
1606
1607 mutex_lock(&dcc->cmd_lock);
1608 for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
1609 pend_list = &dcc->pend_list[i];
1610 list_for_each_entry_safe(dc, tmp, pend_list, list) {
1611 f2fs_bug_on(sbi, dc->state != D_PREP);
1612 __remove_discard_cmd(sbi, dc);
1613 dropped = true;
1614 }
1615 }
1616 mutex_unlock(&dcc->cmd_lock);
1617
1618 return dropped;
1619 }
1620
f2fs_drop_discard_cmd(struct f2fs_sb_info * sbi)1621 void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi)
1622 {
1623 __drop_discard_cmd(sbi);
1624 }
1625
__wait_one_discard_bio(struct f2fs_sb_info * sbi,struct discard_cmd * dc)1626 static unsigned int __wait_one_discard_bio(struct f2fs_sb_info *sbi,
1627 struct discard_cmd *dc)
1628 {
1629 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1630 unsigned int len = 0;
1631
1632 wait_for_completion_io(&dc->wait);
1633 mutex_lock(&dcc->cmd_lock);
1634 f2fs_bug_on(sbi, dc->state != D_DONE);
1635 dc->ref--;
1636 if (!dc->ref) {
1637 if (!dc->error)
1638 len = dc->len;
1639 __remove_discard_cmd(sbi, dc);
1640 }
1641 mutex_unlock(&dcc->cmd_lock);
1642
1643 return len;
1644 }
1645
__wait_discard_cmd_range(struct f2fs_sb_info * sbi,struct discard_policy * dpolicy,block_t start,block_t end)1646 static unsigned int __wait_discard_cmd_range(struct f2fs_sb_info *sbi,
1647 struct discard_policy *dpolicy,
1648 block_t start, block_t end)
1649 {
1650 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1651 struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
1652 &(dcc->fstrim_list) : &(dcc->wait_list);
1653 struct discard_cmd *dc, *tmp;
1654 bool need_wait;
1655 unsigned int trimmed = 0;
1656
1657 next:
1658 need_wait = false;
1659
1660 mutex_lock(&dcc->cmd_lock);
1661 list_for_each_entry_safe(dc, tmp, wait_list, list) {
1662 if (dc->lstart + dc->len <= start || end <= dc->lstart)
1663 continue;
1664 if (dc->len < dpolicy->granularity)
1665 continue;
1666 if (dc->state == D_DONE && !dc->ref) {
1667 wait_for_completion_io(&dc->wait);
1668 if (!dc->error)
1669 trimmed += dc->len;
1670 __remove_discard_cmd(sbi, dc);
1671 } else {
1672 dc->ref++;
1673 need_wait = true;
1674 break;
1675 }
1676 }
1677 mutex_unlock(&dcc->cmd_lock);
1678
1679 if (need_wait) {
1680 trimmed += __wait_one_discard_bio(sbi, dc);
1681 goto next;
1682 }
1683
1684 return trimmed;
1685 }
1686
__wait_all_discard_cmd(struct f2fs_sb_info * sbi,struct discard_policy * dpolicy)1687 static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
1688 struct discard_policy *dpolicy)
1689 {
1690 struct discard_policy dp;
1691 unsigned int discard_blks;
1692
1693 if (dpolicy)
1694 return __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
1695
1696 /* wait all */
1697 __init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, 1);
1698 discard_blks = __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
1699 __init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, 1);
1700 discard_blks += __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
1701
1702 return discard_blks;
1703 }
1704
1705 /* This should be covered by global mutex, &sit_i->sentry_lock */
f2fs_wait_discard_bio(struct f2fs_sb_info * sbi,block_t blkaddr)1706 static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
1707 {
1708 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1709 struct discard_cmd *dc;
1710 bool need_wait = false;
1711
1712 mutex_lock(&dcc->cmd_lock);
1713 dc = (struct discard_cmd *)f2fs_lookup_rb_tree(&dcc->root,
1714 NULL, blkaddr);
1715 if (dc) {
1716 if (dc->state == D_PREP) {
1717 __punch_discard_cmd(sbi, dc, blkaddr);
1718 } else {
1719 dc->ref++;
1720 need_wait = true;
1721 }
1722 }
1723 mutex_unlock(&dcc->cmd_lock);
1724
1725 if (need_wait)
1726 __wait_one_discard_bio(sbi, dc);
1727 }
1728
f2fs_stop_discard_thread(struct f2fs_sb_info * sbi)1729 void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi)
1730 {
1731 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1732
1733 if (dcc && dcc->f2fs_issue_discard) {
1734 struct task_struct *discard_thread = dcc->f2fs_issue_discard;
1735
1736 dcc->f2fs_issue_discard = NULL;
1737 kthread_stop(discard_thread);
1738 }
1739 }
1740
1741 /* This comes from f2fs_put_super */
f2fs_issue_discard_timeout(struct f2fs_sb_info * sbi)1742 bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi)
1743 {
1744 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1745 struct discard_policy dpolicy;
1746 bool dropped;
1747
1748 __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT,
1749 dcc->discard_granularity);
1750 __issue_discard_cmd(sbi, &dpolicy);
1751 dropped = __drop_discard_cmd(sbi);
1752
1753 /* just to make sure there is no pending discard commands */
1754 __wait_all_discard_cmd(sbi, NULL);
1755
1756 f2fs_bug_on(sbi, atomic_read(&dcc->discard_cmd_cnt));
1757 return dropped;
1758 }
1759
issue_discard_thread(void * data)1760 static int issue_discard_thread(void *data)
1761 {
1762 struct f2fs_sb_info *sbi = data;
1763 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1764 wait_queue_head_t *q = &dcc->discard_wait_queue;
1765 struct discard_policy dpolicy;
1766 unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME;
1767 int issued;
1768
1769 set_freezable();
1770
1771 do {
1772 if (sbi->gc_mode == GC_URGENT_HIGH ||
1773 !f2fs_available_free_memory(sbi, DISCARD_CACHE))
1774 __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1);
1775 else
1776 __init_discard_policy(sbi, &dpolicy, DPOLICY_BG,
1777 dcc->discard_granularity);
1778
1779 if (!atomic_read(&dcc->discard_cmd_cnt))
1780 wait_ms = dpolicy.max_interval;
1781
1782 wait_event_interruptible_timeout(*q,
1783 kthread_should_stop() || freezing(current) ||
1784 dcc->discard_wake,
1785 msecs_to_jiffies(wait_ms));
1786
1787 if (dcc->discard_wake)
1788 dcc->discard_wake = 0;
1789
1790 /* clean up pending candidates before going to sleep */
1791 if (atomic_read(&dcc->queued_discard))
1792 __wait_all_discard_cmd(sbi, NULL);
1793
1794 if (try_to_freeze())
1795 continue;
1796 if (f2fs_readonly(sbi->sb))
1797 continue;
1798 if (kthread_should_stop())
1799 return 0;
1800 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
1801 wait_ms = dpolicy.max_interval;
1802 continue;
1803 }
1804 if (!atomic_read(&dcc->discard_cmd_cnt))
1805 continue;
1806
1807 sb_start_intwrite(sbi->sb);
1808
1809 issued = __issue_discard_cmd(sbi, &dpolicy);
1810 if (issued > 0) {
1811 __wait_all_discard_cmd(sbi, &dpolicy);
1812 wait_ms = dpolicy.min_interval;
1813 } else if (issued == -1) {
1814 wait_ms = f2fs_time_to_wait(sbi, DISCARD_TIME);
1815 if (!wait_ms)
1816 wait_ms = dpolicy.mid_interval;
1817 } else {
1818 wait_ms = dpolicy.max_interval;
1819 }
1820
1821 sb_end_intwrite(sbi->sb);
1822
1823 } while (!kthread_should_stop());
1824 return 0;
1825 }
1826
1827 #ifdef CONFIG_BLK_DEV_ZONED
__f2fs_issue_discard_zone(struct f2fs_sb_info * sbi,struct block_device * bdev,block_t blkstart,block_t blklen)1828 static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
1829 struct block_device *bdev, block_t blkstart, block_t blklen)
1830 {
1831 sector_t sector, nr_sects;
1832 block_t lblkstart = blkstart;
1833 int devi = 0;
1834
1835 if (f2fs_is_multi_device(sbi)) {
1836 devi = f2fs_target_device_index(sbi, blkstart);
1837 if (blkstart < FDEV(devi).start_blk ||
1838 blkstart > FDEV(devi).end_blk) {
1839 f2fs_err(sbi, "Invalid block %x", blkstart);
1840 return -EIO;
1841 }
1842 blkstart -= FDEV(devi).start_blk;
1843 }
1844
1845 /* For sequential zones, reset the zone write pointer */
1846 if (f2fs_blkz_is_seq(sbi, devi, blkstart)) {
1847 sector = SECTOR_FROM_BLOCK(blkstart);
1848 nr_sects = SECTOR_FROM_BLOCK(blklen);
1849
1850 if (sector & (bdev_zone_sectors(bdev) - 1) ||
1851 nr_sects != bdev_zone_sectors(bdev)) {
1852 f2fs_err(sbi, "(%d) %s: Unaligned zone reset attempted (block %x + %x)",
1853 devi, sbi->s_ndevs ? FDEV(devi).path : "",
1854 blkstart, blklen);
1855 return -EIO;
1856 }
1857 trace_f2fs_issue_reset_zone(bdev, blkstart);
1858 return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
1859 sector, nr_sects, GFP_NOFS);
1860 }
1861
1862 /* For conventional zones, use regular discard if supported */
1863 return __queue_discard_cmd(sbi, bdev, lblkstart, blklen);
1864 }
1865 #endif
1866
__issue_discard_async(struct f2fs_sb_info * sbi,struct block_device * bdev,block_t blkstart,block_t blklen)1867 static int __issue_discard_async(struct f2fs_sb_info *sbi,
1868 struct block_device *bdev, block_t blkstart, block_t blklen)
1869 {
1870 #ifdef CONFIG_BLK_DEV_ZONED
1871 if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev))
1872 return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen);
1873 #endif
1874 return __queue_discard_cmd(sbi, bdev, blkstart, blklen);
1875 }
1876
f2fs_issue_discard(struct f2fs_sb_info * sbi,block_t blkstart,block_t blklen)1877 static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
1878 block_t blkstart, block_t blklen)
1879 {
1880 sector_t start = blkstart, len = 0;
1881 struct block_device *bdev;
1882 struct seg_entry *se;
1883 unsigned int offset;
1884 block_t i;
1885 int err = 0;
1886
1887 bdev = f2fs_target_device(sbi, blkstart, NULL);
1888
1889 for (i = blkstart; i < blkstart + blklen; i++, len++) {
1890 if (i != start) {
1891 struct block_device *bdev2 =
1892 f2fs_target_device(sbi, i, NULL);
1893
1894 if (bdev2 != bdev) {
1895 err = __issue_discard_async(sbi, bdev,
1896 start, len);
1897 if (err)
1898 return err;
1899 bdev = bdev2;
1900 start = i;
1901 len = 0;
1902 }
1903 }
1904
1905 se = get_seg_entry(sbi, GET_SEGNO(sbi, i));
1906 offset = GET_BLKOFF_FROM_SEG0(sbi, i);
1907
1908 if (!f2fs_test_and_set_bit(offset, se->discard_map))
1909 sbi->discard_blks--;
1910 }
1911
1912 if (len)
1913 err = __issue_discard_async(sbi, bdev, start, len);
1914 return err;
1915 }
1916
add_discard_addrs(struct f2fs_sb_info * sbi,struct cp_control * cpc,bool check_only)1917 static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
1918 bool check_only)
1919 {
1920 int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
1921 int max_blocks = sbi->blocks_per_seg;
1922 struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
1923 unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
1924 unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
1925 unsigned long *discard_map = (unsigned long *)se->discard_map;
1926 unsigned long *dmap = SIT_I(sbi)->tmp_map;
1927 unsigned int start = 0, end = -1;
1928 bool force = (cpc->reason & CP_DISCARD);
1929 struct discard_entry *de = NULL;
1930 struct list_head *head = &SM_I(sbi)->dcc_info->entry_list;
1931 int i;
1932
1933 if (se->valid_blocks == max_blocks || !f2fs_hw_support_discard(sbi))
1934 return false;
1935
1936 if (!force) {
1937 if (!f2fs_realtime_discard_enable(sbi) || !se->valid_blocks ||
1938 SM_I(sbi)->dcc_info->nr_discards >=
1939 SM_I(sbi)->dcc_info->max_discards)
1940 return false;
1941 }
1942
1943 /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
1944 for (i = 0; i < entries; i++)
1945 dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] :
1946 (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
1947
1948 while (force || SM_I(sbi)->dcc_info->nr_discards <=
1949 SM_I(sbi)->dcc_info->max_discards) {
1950 start = __find_rev_next_bit(dmap, max_blocks, end + 1);
1951 if (start >= max_blocks)
1952 break;
1953
1954 end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
1955 if (force && start && end != max_blocks
1956 && (end - start) < cpc->trim_minlen)
1957 continue;
1958
1959 if (check_only)
1960 return true;
1961
1962 if (!de) {
1963 de = f2fs_kmem_cache_alloc(discard_entry_slab,
1964 GFP_F2FS_ZERO);
1965 de->start_blkaddr = START_BLOCK(sbi, cpc->trim_start);
1966 list_add_tail(&de->list, head);
1967 }
1968
1969 for (i = start; i < end; i++)
1970 __set_bit_le(i, (void *)de->discard_map);
1971
1972 SM_I(sbi)->dcc_info->nr_discards += end - start;
1973 }
1974 return false;
1975 }
1976
release_discard_addr(struct discard_entry * entry)1977 static void release_discard_addr(struct discard_entry *entry)
1978 {
1979 list_del(&entry->list);
1980 kmem_cache_free(discard_entry_slab, entry);
1981 }
1982
f2fs_release_discard_addrs(struct f2fs_sb_info * sbi)1983 void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi)
1984 {
1985 struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list);
1986 struct discard_entry *entry, *this;
1987
1988 /* drop caches */
1989 list_for_each_entry_safe(entry, this, head, list)
1990 release_discard_addr(entry);
1991 }
1992
1993 /*
1994 * Should call f2fs_clear_prefree_segments after checkpoint is done.
1995 */
set_prefree_as_free_segments(struct f2fs_sb_info * sbi)1996 static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
1997 {
1998 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1999 unsigned int segno;
2000
2001 mutex_lock(&dirty_i->seglist_lock);
2002 for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
2003 __set_test_and_free(sbi, segno, false);
2004 mutex_unlock(&dirty_i->seglist_lock);
2005 }
2006
f2fs_clear_prefree_segments(struct f2fs_sb_info * sbi,struct cp_control * cpc)2007 void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
2008 struct cp_control *cpc)
2009 {
2010 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
2011 struct list_head *head = &dcc->entry_list;
2012 struct discard_entry *entry, *this;
2013 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2014 unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
2015 unsigned int start = 0, end = -1;
2016 unsigned int secno, start_segno;
2017 bool force = (cpc->reason & CP_DISCARD);
2018 bool need_align = f2fs_lfs_mode(sbi) && __is_large_section(sbi);
2019
2020 mutex_lock(&dirty_i->seglist_lock);
2021
2022 while (1) {
2023 int i;
2024
2025 if (need_align && end != -1)
2026 end--;
2027 start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
2028 if (start >= MAIN_SEGS(sbi))
2029 break;
2030 end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
2031 start + 1);
2032
2033 if (need_align) {
2034 start = rounddown(start, sbi->segs_per_sec);
2035 end = roundup(end, sbi->segs_per_sec);
2036 }
2037
2038 for (i = start; i < end; i++) {
2039 if (test_and_clear_bit(i, prefree_map))
2040 dirty_i->nr_dirty[PRE]--;
2041 }
2042
2043 if (!f2fs_realtime_discard_enable(sbi))
2044 continue;
2045
2046 if (force && start >= cpc->trim_start &&
2047 (end - 1) <= cpc->trim_end)
2048 continue;
2049
2050 if (!f2fs_lfs_mode(sbi) || !__is_large_section(sbi)) {
2051 f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
2052 (end - start) << sbi->log_blocks_per_seg);
2053 continue;
2054 }
2055 next:
2056 secno = GET_SEC_FROM_SEG(sbi, start);
2057 start_segno = GET_SEG_FROM_SEC(sbi, secno);
2058 if (!IS_CURSEC(sbi, secno) &&
2059 !get_valid_blocks(sbi, start, true))
2060 f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno),
2061 sbi->segs_per_sec << sbi->log_blocks_per_seg);
2062
2063 start = start_segno + sbi->segs_per_sec;
2064 if (start < end)
2065 goto next;
2066 else
2067 end = start - 1;
2068 }
2069 mutex_unlock(&dirty_i->seglist_lock);
2070
2071 /* send small discards */
2072 list_for_each_entry_safe(entry, this, head, list) {
2073 unsigned int cur_pos = 0, next_pos, len, total_len = 0;
2074 bool is_valid = test_bit_le(0, entry->discard_map);
2075
2076 find_next:
2077 if (is_valid) {
2078 next_pos = find_next_zero_bit_le(entry->discard_map,
2079 sbi->blocks_per_seg, cur_pos);
2080 len = next_pos - cur_pos;
2081
2082 if (f2fs_sb_has_blkzoned(sbi) ||
2083 (force && len < cpc->trim_minlen))
2084 goto skip;
2085
2086 f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos,
2087 len);
2088 total_len += len;
2089 } else {
2090 next_pos = find_next_bit_le(entry->discard_map,
2091 sbi->blocks_per_seg, cur_pos);
2092 }
2093 skip:
2094 cur_pos = next_pos;
2095 is_valid = !is_valid;
2096
2097 if (cur_pos < sbi->blocks_per_seg)
2098 goto find_next;
2099
2100 release_discard_addr(entry);
2101 dcc->nr_discards -= total_len;
2102 }
2103
2104 wake_up_discard_thread(sbi, false);
2105 }
2106
create_discard_cmd_control(struct f2fs_sb_info * sbi)2107 static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
2108 {
2109 dev_t dev = sbi->sb->s_bdev->bd_dev;
2110 struct discard_cmd_control *dcc;
2111 int err = 0, i;
2112
2113 if (SM_I(sbi)->dcc_info) {
2114 dcc = SM_I(sbi)->dcc_info;
2115 goto init_thread;
2116 }
2117
2118 dcc = f2fs_kzalloc(sbi, sizeof(struct discard_cmd_control), GFP_KERNEL);
2119 if (!dcc)
2120 return -ENOMEM;
2121
2122 dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
2123 INIT_LIST_HEAD(&dcc->entry_list);
2124 for (i = 0; i < MAX_PLIST_NUM; i++)
2125 INIT_LIST_HEAD(&dcc->pend_list[i]);
2126 INIT_LIST_HEAD(&dcc->wait_list);
2127 INIT_LIST_HEAD(&dcc->fstrim_list);
2128 mutex_init(&dcc->cmd_lock);
2129 atomic_set(&dcc->issued_discard, 0);
2130 atomic_set(&dcc->queued_discard, 0);
2131 atomic_set(&dcc->discard_cmd_cnt, 0);
2132 dcc->nr_discards = 0;
2133 dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
2134 dcc->undiscard_blks = 0;
2135 dcc->next_pos = 0;
2136 dcc->root = RB_ROOT_CACHED;
2137 dcc->rbtree_check = false;
2138
2139 init_waitqueue_head(&dcc->discard_wait_queue);
2140 SM_I(sbi)->dcc_info = dcc;
2141 init_thread:
2142 dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi,
2143 "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
2144 if (IS_ERR(dcc->f2fs_issue_discard)) {
2145 err = PTR_ERR(dcc->f2fs_issue_discard);
2146 kfree(dcc);
2147 SM_I(sbi)->dcc_info = NULL;
2148 return err;
2149 }
2150
2151 return err;
2152 }
2153
destroy_discard_cmd_control(struct f2fs_sb_info * sbi)2154 static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi)
2155 {
2156 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
2157
2158 if (!dcc)
2159 return;
2160
2161 f2fs_stop_discard_thread(sbi);
2162
2163 /*
2164 * Recovery can cache discard commands, so in error path of
2165 * fill_super(), it needs to give a chance to handle them.
2166 */
2167 if (unlikely(atomic_read(&dcc->discard_cmd_cnt)))
2168 f2fs_issue_discard_timeout(sbi);
2169
2170 kfree(dcc);
2171 SM_I(sbi)->dcc_info = NULL;
2172 }
2173
__mark_sit_entry_dirty(struct f2fs_sb_info * sbi,unsigned int segno)2174 static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
2175 {
2176 struct sit_info *sit_i = SIT_I(sbi);
2177
2178 if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) {
2179 sit_i->dirty_sentries++;
2180 return false;
2181 }
2182
2183 return true;
2184 }
2185
__set_sit_entry_type(struct f2fs_sb_info * sbi,int type,unsigned int segno,int modified)2186 static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
2187 unsigned int segno, int modified)
2188 {
2189 struct seg_entry *se = get_seg_entry(sbi, segno);
2190
2191 se->type = type;
2192 if (modified)
2193 __mark_sit_entry_dirty(sbi, segno);
2194 }
2195
get_segment_mtime(struct f2fs_sb_info * sbi,block_t blkaddr)2196 static inline unsigned long long get_segment_mtime(struct f2fs_sb_info *sbi,
2197 block_t blkaddr)
2198 {
2199 unsigned int segno = GET_SEGNO(sbi, blkaddr);
2200
2201 if (segno == NULL_SEGNO)
2202 return 0;
2203 return get_seg_entry(sbi, segno)->mtime;
2204 }
2205
update_segment_mtime(struct f2fs_sb_info * sbi,block_t blkaddr,unsigned long long old_mtime)2206 static void update_segment_mtime(struct f2fs_sb_info *sbi, block_t blkaddr,
2207 unsigned long long old_mtime)
2208 {
2209 struct seg_entry *se;
2210 unsigned int segno = GET_SEGNO(sbi, blkaddr);
2211 unsigned long long ctime = get_mtime(sbi, false);
2212 unsigned long long mtime = old_mtime ? old_mtime : ctime;
2213
2214 if (segno == NULL_SEGNO)
2215 return;
2216
2217 se = get_seg_entry(sbi, segno);
2218
2219 if (!se->mtime)
2220 se->mtime = mtime;
2221 else
2222 se->mtime = div_u64(se->mtime * se->valid_blocks + mtime,
2223 se->valid_blocks + 1);
2224
2225 if (ctime > SIT_I(sbi)->max_mtime)
2226 SIT_I(sbi)->max_mtime = ctime;
2227 }
2228
update_sit_entry(struct f2fs_sb_info * sbi,block_t blkaddr,int del)2229 static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
2230 {
2231 struct seg_entry *se;
2232 unsigned int segno, offset;
2233 long int new_vblocks;
2234 bool exist;
2235 #ifdef CONFIG_F2FS_CHECK_FS
2236 bool mir_exist;
2237 #endif
2238
2239 segno = GET_SEGNO(sbi, blkaddr);
2240
2241 se = get_seg_entry(sbi, segno);
2242 new_vblocks = se->valid_blocks + del;
2243 offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
2244
2245 f2fs_bug_on(sbi, (new_vblocks < 0 ||
2246 (new_vblocks > f2fs_usable_blks_in_seg(sbi, segno))));
2247
2248 se->valid_blocks = new_vblocks;
2249
2250 /* Update valid block bitmap */
2251 if (del > 0) {
2252 exist = f2fs_test_and_set_bit(offset, se->cur_valid_map);
2253 #ifdef CONFIG_F2FS_CHECK_FS
2254 mir_exist = f2fs_test_and_set_bit(offset,
2255 se->cur_valid_map_mir);
2256 if (unlikely(exist != mir_exist)) {
2257 f2fs_err(sbi, "Inconsistent error when setting bitmap, blk:%u, old bit:%d",
2258 blkaddr, exist);
2259 f2fs_bug_on(sbi, 1);
2260 }
2261 #endif
2262 if (unlikely(exist)) {
2263 f2fs_err(sbi, "Bitmap was wrongly set, blk:%u",
2264 blkaddr);
2265 f2fs_bug_on(sbi, 1);
2266 se->valid_blocks--;
2267 del = 0;
2268 }
2269
2270 if (!f2fs_test_and_set_bit(offset, se->discard_map))
2271 sbi->discard_blks--;
2272
2273 /*
2274 * SSR should never reuse block which is checkpointed
2275 * or newly invalidated.
2276 */
2277 if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
2278 if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
2279 se->ckpt_valid_blocks++;
2280 }
2281 } else {
2282 exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map);
2283 #ifdef CONFIG_F2FS_CHECK_FS
2284 mir_exist = f2fs_test_and_clear_bit(offset,
2285 se->cur_valid_map_mir);
2286 if (unlikely(exist != mir_exist)) {
2287 f2fs_err(sbi, "Inconsistent error when clearing bitmap, blk:%u, old bit:%d",
2288 blkaddr, exist);
2289 f2fs_bug_on(sbi, 1);
2290 }
2291 #endif
2292 if (unlikely(!exist)) {
2293 f2fs_err(sbi, "Bitmap was wrongly cleared, blk:%u",
2294 blkaddr);
2295 f2fs_bug_on(sbi, 1);
2296 se->valid_blocks++;
2297 del = 0;
2298 } else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
2299 /*
2300 * If checkpoints are off, we must not reuse data that
2301 * was used in the previous checkpoint. If it was used
2302 * before, we must track that to know how much space we
2303 * really have.
2304 */
2305 if (f2fs_test_bit(offset, se->ckpt_valid_map)) {
2306 spin_lock(&sbi->stat_lock);
2307 sbi->unusable_block_count++;
2308 spin_unlock(&sbi->stat_lock);
2309 }
2310 }
2311
2312 if (f2fs_test_and_clear_bit(offset, se->discard_map))
2313 sbi->discard_blks++;
2314 }
2315 if (!f2fs_test_bit(offset, se->ckpt_valid_map))
2316 se->ckpt_valid_blocks += del;
2317
2318 __mark_sit_entry_dirty(sbi, segno);
2319
2320 /* update total number of valid blocks to be written in ckpt area */
2321 SIT_I(sbi)->written_valid_blocks += del;
2322
2323 if (__is_large_section(sbi))
2324 get_sec_entry(sbi, segno)->valid_blocks += del;
2325 }
2326
f2fs_invalidate_blocks(struct f2fs_sb_info * sbi,block_t addr)2327 void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
2328 {
2329 unsigned int segno = GET_SEGNO(sbi, addr);
2330 struct sit_info *sit_i = SIT_I(sbi);
2331
2332 f2fs_bug_on(sbi, addr == NULL_ADDR);
2333 if (addr == NEW_ADDR || addr == COMPRESS_ADDR)
2334 return;
2335
2336 invalidate_mapping_pages(META_MAPPING(sbi), addr, addr);
2337 f2fs_invalidate_compress_page(sbi, addr);
2338
2339 /* add it into sit main buffer */
2340 down_write(&sit_i->sentry_lock);
2341
2342 update_segment_mtime(sbi, addr, 0);
2343 update_sit_entry(sbi, addr, -1);
2344
2345 /* add it into dirty seglist */
2346 locate_dirty_segment(sbi, segno);
2347
2348 up_write(&sit_i->sentry_lock);
2349 }
2350
f2fs_is_checkpointed_data(struct f2fs_sb_info * sbi,block_t blkaddr)2351 bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
2352 {
2353 struct sit_info *sit_i = SIT_I(sbi);
2354 unsigned int segno, offset;
2355 struct seg_entry *se;
2356 bool is_cp = false;
2357
2358 if (!__is_valid_data_blkaddr(blkaddr))
2359 return true;
2360
2361 down_read(&sit_i->sentry_lock);
2362
2363 segno = GET_SEGNO(sbi, blkaddr);
2364 se = get_seg_entry(sbi, segno);
2365 offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
2366
2367 if (f2fs_test_bit(offset, se->ckpt_valid_map))
2368 is_cp = true;
2369
2370 up_read(&sit_i->sentry_lock);
2371
2372 return is_cp;
2373 }
2374
2375 /*
2376 * This function should be resided under the curseg_mutex lock
2377 */
__add_sum_entry(struct f2fs_sb_info * sbi,int type,struct f2fs_summary * sum)2378 static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
2379 struct f2fs_summary *sum)
2380 {
2381 struct curseg_info *curseg = CURSEG_I(sbi, type);
2382 void *addr = curseg->sum_blk;
2383
2384 addr += curseg->next_blkoff * sizeof(struct f2fs_summary);
2385 memcpy(addr, sum, sizeof(struct f2fs_summary));
2386 }
2387
2388 /*
2389 * Calculate the number of current summary pages for writing
2390 */
f2fs_npages_for_summary_flush(struct f2fs_sb_info * sbi,bool for_ra)2391 int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
2392 {
2393 int valid_sum_count = 0;
2394 int i, sum_in_page;
2395
2396 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
2397 if (sbi->ckpt->alloc_type[i] == SSR)
2398 valid_sum_count += sbi->blocks_per_seg;
2399 else {
2400 if (for_ra)
2401 valid_sum_count += le16_to_cpu(
2402 F2FS_CKPT(sbi)->cur_data_blkoff[i]);
2403 else
2404 valid_sum_count += curseg_blkoff(sbi, i);
2405 }
2406 }
2407
2408 sum_in_page = (PAGE_SIZE - 2 * SUM_JOURNAL_SIZE -
2409 SUM_FOOTER_SIZE) / SUMMARY_SIZE;
2410 if (valid_sum_count <= sum_in_page)
2411 return 1;
2412 else if ((valid_sum_count - sum_in_page) <=
2413 (PAGE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
2414 return 2;
2415 return 3;
2416 }
2417
2418 /*
2419 * Caller should put this summary page
2420 */
f2fs_get_sum_page(struct f2fs_sb_info * sbi,unsigned int segno)2421 struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
2422 {
2423 if (unlikely(f2fs_cp_error(sbi)))
2424 return ERR_PTR(-EIO);
2425 return f2fs_get_meta_page_retry(sbi, GET_SUM_BLOCK(sbi, segno));
2426 }
2427
f2fs_update_meta_page(struct f2fs_sb_info * sbi,void * src,block_t blk_addr)2428 void f2fs_update_meta_page(struct f2fs_sb_info *sbi,
2429 void *src, block_t blk_addr)
2430 {
2431 struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
2432
2433 memcpy(page_address(page), src, PAGE_SIZE);
2434 set_page_dirty(page);
2435 f2fs_put_page(page, 1);
2436 }
2437
write_sum_page(struct f2fs_sb_info * sbi,struct f2fs_summary_block * sum_blk,block_t blk_addr)2438 static void write_sum_page(struct f2fs_sb_info *sbi,
2439 struct f2fs_summary_block *sum_blk, block_t blk_addr)
2440 {
2441 f2fs_update_meta_page(sbi, (void *)sum_blk, blk_addr);
2442 }
2443
write_current_sum_page(struct f2fs_sb_info * sbi,int type,block_t blk_addr)2444 static void write_current_sum_page(struct f2fs_sb_info *sbi,
2445 int type, block_t blk_addr)
2446 {
2447 struct curseg_info *curseg = CURSEG_I(sbi, type);
2448 struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
2449 struct f2fs_summary_block *src = curseg->sum_blk;
2450 struct f2fs_summary_block *dst;
2451
2452 dst = (struct f2fs_summary_block *)page_address(page);
2453 memset(dst, 0, PAGE_SIZE);
2454
2455 mutex_lock(&curseg->curseg_mutex);
2456
2457 down_read(&curseg->journal_rwsem);
2458 memcpy(&dst->journal, curseg->journal, SUM_JOURNAL_SIZE);
2459 up_read(&curseg->journal_rwsem);
2460
2461 memcpy(dst->entries, src->entries, SUM_ENTRY_SIZE);
2462 memcpy(&dst->footer, &src->footer, SUM_FOOTER_SIZE);
2463
2464 mutex_unlock(&curseg->curseg_mutex);
2465
2466 set_page_dirty(page);
2467 f2fs_put_page(page, 1);
2468 }
2469
is_next_segment_free(struct f2fs_sb_info * sbi,struct curseg_info * curseg,int type)2470 static int is_next_segment_free(struct f2fs_sb_info *sbi,
2471 struct curseg_info *curseg, int type)
2472 {
2473 unsigned int segno = curseg->segno + 1;
2474 struct free_segmap_info *free_i = FREE_I(sbi);
2475
2476 if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec)
2477 return !test_bit(segno, free_i->free_segmap);
2478 return 0;
2479 }
2480
2481 /*
2482 * Find a new segment from the free segments bitmap to right order
2483 * This function should be returned with success, otherwise BUG
2484 */
get_new_segment(struct f2fs_sb_info * sbi,unsigned int * newseg,bool new_sec,int dir)2485 static void get_new_segment(struct f2fs_sb_info *sbi,
2486 unsigned int *newseg, bool new_sec, int dir)
2487 {
2488 struct free_segmap_info *free_i = FREE_I(sbi);
2489 unsigned int segno, secno, zoneno;
2490 unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
2491 unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg);
2492 unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg);
2493 unsigned int left_start = hint;
2494 bool init = true;
2495 int go_left = 0;
2496 int i;
2497
2498 spin_lock(&free_i->segmap_lock);
2499
2500 if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
2501 segno = find_next_zero_bit(free_i->free_segmap,
2502 GET_SEG_FROM_SEC(sbi, hint + 1), *newseg + 1);
2503 if (segno < GET_SEG_FROM_SEC(sbi, hint + 1))
2504 goto got_it;
2505 }
2506 find_other_zone:
2507 secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
2508 if (secno >= MAIN_SECS(sbi)) {
2509 if (dir == ALLOC_RIGHT) {
2510 secno = find_next_zero_bit(free_i->free_secmap,
2511 MAIN_SECS(sbi), 0);
2512 f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi));
2513 } else {
2514 go_left = 1;
2515 left_start = hint - 1;
2516 }
2517 }
2518 if (go_left == 0)
2519 goto skip_left;
2520
2521 while (test_bit(left_start, free_i->free_secmap)) {
2522 if (left_start > 0) {
2523 left_start--;
2524 continue;
2525 }
2526 left_start = find_next_zero_bit(free_i->free_secmap,
2527 MAIN_SECS(sbi), 0);
2528 f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi));
2529 break;
2530 }
2531 secno = left_start;
2532 skip_left:
2533 segno = GET_SEG_FROM_SEC(sbi, secno);
2534 zoneno = GET_ZONE_FROM_SEC(sbi, secno);
2535
2536 /* give up on finding another zone */
2537 if (!init)
2538 goto got_it;
2539 if (sbi->secs_per_zone == 1)
2540 goto got_it;
2541 if (zoneno == old_zoneno)
2542 goto got_it;
2543 if (dir == ALLOC_LEFT) {
2544 if (!go_left && zoneno + 1 >= total_zones)
2545 goto got_it;
2546 if (go_left && zoneno == 0)
2547 goto got_it;
2548 }
2549 for (i = 0; i < NR_CURSEG_TYPE; i++)
2550 if (CURSEG_I(sbi, i)->zone == zoneno)
2551 break;
2552
2553 if (i < NR_CURSEG_TYPE) {
2554 /* zone is in user, try another */
2555 if (go_left)
2556 hint = zoneno * sbi->secs_per_zone - 1;
2557 else if (zoneno + 1 >= total_zones)
2558 hint = 0;
2559 else
2560 hint = (zoneno + 1) * sbi->secs_per_zone;
2561 init = false;
2562 goto find_other_zone;
2563 }
2564 got_it:
2565 /* set it as dirty segment in free segmap */
2566 f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap));
2567 __set_inuse(sbi, segno);
2568 *newseg = segno;
2569 spin_unlock(&free_i->segmap_lock);
2570 }
2571
reset_curseg(struct f2fs_sb_info * sbi,int type,int modified)2572 static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
2573 {
2574 struct curseg_info *curseg = CURSEG_I(sbi, type);
2575 struct summary_footer *sum_footer;
2576 unsigned short seg_type = curseg->seg_type;
2577
2578 curseg->inited = true;
2579 curseg->segno = curseg->next_segno;
2580 curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
2581 curseg->next_blkoff = 0;
2582 curseg->next_segno = NULL_SEGNO;
2583
2584 sum_footer = &(curseg->sum_blk->footer);
2585 memset(sum_footer, 0, sizeof(struct summary_footer));
2586
2587 sanity_check_seg_type(sbi, seg_type);
2588
2589 if (IS_DATASEG(seg_type))
2590 SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
2591 if (IS_NODESEG(seg_type))
2592 SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
2593 __set_sit_entry_type(sbi, seg_type, curseg->segno, modified);
2594 }
2595
__get_next_segno(struct f2fs_sb_info * sbi,int type)2596 static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
2597 {
2598 struct curseg_info *curseg = CURSEG_I(sbi, type);
2599 unsigned short seg_type = curseg->seg_type;
2600
2601 sanity_check_seg_type(sbi, seg_type);
2602
2603 /* if segs_per_sec is large than 1, we need to keep original policy. */
2604 if (__is_large_section(sbi))
2605 return curseg->segno;
2606
2607 /* inmem log may not locate on any segment after mount */
2608 if (!curseg->inited)
2609 return 0;
2610
2611 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
2612 return 0;
2613
2614 if (test_opt(sbi, NOHEAP) &&
2615 (seg_type == CURSEG_HOT_DATA || IS_NODESEG(seg_type)))
2616 return 0;
2617
2618 if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
2619 return SIT_I(sbi)->last_victim[ALLOC_NEXT];
2620
2621 /* find segments from 0 to reuse freed segments */
2622 if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
2623 return 0;
2624
2625 return curseg->segno;
2626 }
2627
2628 /*
2629 * Allocate a current working segment.
2630 * This function always allocates a free segment in LFS manner.
2631 */
new_curseg(struct f2fs_sb_info * sbi,int type,bool new_sec)2632 static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
2633 {
2634 struct curseg_info *curseg = CURSEG_I(sbi, type);
2635 unsigned short seg_type = curseg->seg_type;
2636 unsigned int segno = curseg->segno;
2637 int dir = ALLOC_LEFT;
2638
2639 if (curseg->inited)
2640 write_sum_page(sbi, curseg->sum_blk,
2641 GET_SUM_BLOCK(sbi, segno));
2642 if (seg_type == CURSEG_WARM_DATA || seg_type == CURSEG_COLD_DATA)
2643 dir = ALLOC_RIGHT;
2644
2645 if (test_opt(sbi, NOHEAP))
2646 dir = ALLOC_RIGHT;
2647
2648 segno = __get_next_segno(sbi, type);
2649 get_new_segment(sbi, &segno, new_sec, dir);
2650 curseg->next_segno = segno;
2651 reset_curseg(sbi, type, 1);
2652 curseg->alloc_type = LFS;
2653 }
2654
__next_free_blkoff(struct f2fs_sb_info * sbi,int segno,block_t start)2655 static int __next_free_blkoff(struct f2fs_sb_info *sbi,
2656 int segno, block_t start)
2657 {
2658 struct seg_entry *se = get_seg_entry(sbi, segno);
2659 int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
2660 unsigned long *target_map = SIT_I(sbi)->tmp_map;
2661 unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
2662 unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
2663 int i;
2664
2665 for (i = 0; i < entries; i++)
2666 target_map[i] = ckpt_map[i] | cur_map[i];
2667
2668 return __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);
2669 }
2670
2671 /*
2672 * If a segment is written by LFS manner, next block offset is just obtained
2673 * by increasing the current block offset. However, if a segment is written by
2674 * SSR manner, next block offset obtained by calling __next_free_blkoff
2675 */
__refresh_next_blkoff(struct f2fs_sb_info * sbi,struct curseg_info * seg)2676 static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
2677 struct curseg_info *seg)
2678 {
2679 if (seg->alloc_type == SSR)
2680 seg->next_blkoff =
2681 __next_free_blkoff(sbi, seg->segno,
2682 seg->next_blkoff + 1);
2683 else
2684 seg->next_blkoff++;
2685 }
2686
f2fs_segment_has_free_slot(struct f2fs_sb_info * sbi,int segno)2687 bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno)
2688 {
2689 return __next_free_blkoff(sbi, segno, 0) < sbi->blocks_per_seg;
2690 }
2691
2692 /*
2693 * This function always allocates a used segment(from dirty seglist) by SSR
2694 * manner, so it should recover the existing segment information of valid blocks
2695 */
change_curseg(struct f2fs_sb_info * sbi,int type,bool flush)2696 static void change_curseg(struct f2fs_sb_info *sbi, int type, bool flush)
2697 {
2698 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2699 struct curseg_info *curseg = CURSEG_I(sbi, type);
2700 unsigned int new_segno = curseg->next_segno;
2701 struct f2fs_summary_block *sum_node;
2702 struct page *sum_page;
2703
2704 if (flush)
2705 write_sum_page(sbi, curseg->sum_blk,
2706 GET_SUM_BLOCK(sbi, curseg->segno));
2707
2708 __set_test_and_inuse(sbi, new_segno);
2709
2710 mutex_lock(&dirty_i->seglist_lock);
2711 __remove_dirty_segment(sbi, new_segno, PRE);
2712 __remove_dirty_segment(sbi, new_segno, DIRTY);
2713 mutex_unlock(&dirty_i->seglist_lock);
2714
2715 reset_curseg(sbi, type, 1);
2716 curseg->alloc_type = SSR;
2717 curseg->next_blkoff = __next_free_blkoff(sbi, curseg->segno, 0);
2718
2719 sum_page = f2fs_get_sum_page(sbi, new_segno);
2720 if (IS_ERR(sum_page)) {
2721 /* GC won't be able to use stale summary pages by cp_error */
2722 memset(curseg->sum_blk, 0, SUM_ENTRY_SIZE);
2723 return;
2724 }
2725 sum_node = (struct f2fs_summary_block *)page_address(sum_page);
2726 memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
2727 f2fs_put_page(sum_page, 1);
2728 }
2729
2730 static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
2731 int alloc_mode, unsigned long long age);
2732
get_atssr_segment(struct f2fs_sb_info * sbi,int type,int target_type,int alloc_mode,unsigned long long age)2733 static void get_atssr_segment(struct f2fs_sb_info *sbi, int type,
2734 int target_type, int alloc_mode,
2735 unsigned long long age)
2736 {
2737 struct curseg_info *curseg = CURSEG_I(sbi, type);
2738
2739 curseg->seg_type = target_type;
2740
2741 if (get_ssr_segment(sbi, type, alloc_mode, age)) {
2742 struct seg_entry *se = get_seg_entry(sbi, curseg->next_segno);
2743
2744 curseg->seg_type = se->type;
2745 change_curseg(sbi, type, true);
2746 } else {
2747 /* allocate cold segment by default */
2748 curseg->seg_type = CURSEG_COLD_DATA;
2749 new_curseg(sbi, type, true);
2750 }
2751 stat_inc_seg_type(sbi, curseg);
2752 }
2753
__f2fs_init_atgc_curseg(struct f2fs_sb_info * sbi)2754 static void __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi)
2755 {
2756 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC);
2757
2758 if (!sbi->am.atgc_enabled)
2759 return;
2760
2761 f2fs_down_read(&SM_I(sbi)->curseg_lock);
2762
2763 mutex_lock(&curseg->curseg_mutex);
2764 down_write(&SIT_I(sbi)->sentry_lock);
2765
2766 get_atssr_segment(sbi, CURSEG_ALL_DATA_ATGC, CURSEG_COLD_DATA, SSR, 0);
2767
2768 up_write(&SIT_I(sbi)->sentry_lock);
2769 mutex_unlock(&curseg->curseg_mutex);
2770
2771 f2fs_up_read(&SM_I(sbi)->curseg_lock);
2772
2773 }
f2fs_init_inmem_curseg(struct f2fs_sb_info * sbi)2774 void f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi)
2775 {
2776 __f2fs_init_atgc_curseg(sbi);
2777 }
2778
__f2fs_save_inmem_curseg(struct f2fs_sb_info * sbi,int type)2779 static void __f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type)
2780 {
2781 struct curseg_info *curseg = CURSEG_I(sbi, type);
2782
2783 mutex_lock(&curseg->curseg_mutex);
2784 if (!curseg->inited)
2785 goto out;
2786
2787 if (get_valid_blocks(sbi, curseg->segno, false)) {
2788 write_sum_page(sbi, curseg->sum_blk,
2789 GET_SUM_BLOCK(sbi, curseg->segno));
2790 } else {
2791 mutex_lock(&DIRTY_I(sbi)->seglist_lock);
2792 __set_test_and_free(sbi, curseg->segno, true);
2793 mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
2794 }
2795 out:
2796 mutex_unlock(&curseg->curseg_mutex);
2797 }
2798
f2fs_save_inmem_curseg(struct f2fs_sb_info * sbi)2799 void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi)
2800 {
2801 __f2fs_save_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
2802
2803 if (sbi->am.atgc_enabled)
2804 __f2fs_save_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC);
2805 }
2806
__f2fs_restore_inmem_curseg(struct f2fs_sb_info * sbi,int type)2807 static void __f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type)
2808 {
2809 struct curseg_info *curseg = CURSEG_I(sbi, type);
2810
2811 mutex_lock(&curseg->curseg_mutex);
2812 if (!curseg->inited)
2813 goto out;
2814 if (get_valid_blocks(sbi, curseg->segno, false))
2815 goto out;
2816
2817 mutex_lock(&DIRTY_I(sbi)->seglist_lock);
2818 __set_test_and_inuse(sbi, curseg->segno);
2819 mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
2820 out:
2821 mutex_unlock(&curseg->curseg_mutex);
2822 }
2823
f2fs_restore_inmem_curseg(struct f2fs_sb_info * sbi)2824 void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi)
2825 {
2826 __f2fs_restore_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
2827
2828 if (sbi->am.atgc_enabled)
2829 __f2fs_restore_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC);
2830 }
2831
get_ssr_segment(struct f2fs_sb_info * sbi,int type,int alloc_mode,unsigned long long age)2832 static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
2833 int alloc_mode, unsigned long long age)
2834 {
2835 struct curseg_info *curseg = CURSEG_I(sbi, type);
2836 const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
2837 unsigned segno = NULL_SEGNO;
2838 unsigned short seg_type = curseg->seg_type;
2839 int i, cnt;
2840 bool reversed = false;
2841
2842 sanity_check_seg_type(sbi, seg_type);
2843
2844 /* f2fs_need_SSR() already forces to do this */
2845 if (!v_ops->get_victim(sbi, &segno, BG_GC, seg_type, alloc_mode, age)) {
2846 curseg->next_segno = segno;
2847 return 1;
2848 }
2849
2850 /* For node segments, let's do SSR more intensively */
2851 if (IS_NODESEG(seg_type)) {
2852 if (seg_type >= CURSEG_WARM_NODE) {
2853 reversed = true;
2854 i = CURSEG_COLD_NODE;
2855 } else {
2856 i = CURSEG_HOT_NODE;
2857 }
2858 cnt = NR_CURSEG_NODE_TYPE;
2859 } else {
2860 if (seg_type >= CURSEG_WARM_DATA) {
2861 reversed = true;
2862 i = CURSEG_COLD_DATA;
2863 } else {
2864 i = CURSEG_HOT_DATA;
2865 }
2866 cnt = NR_CURSEG_DATA_TYPE;
2867 }
2868
2869 for (; cnt-- > 0; reversed ? i-- : i++) {
2870 if (i == seg_type)
2871 continue;
2872 if (!v_ops->get_victim(sbi, &segno, BG_GC, i, alloc_mode, age)) {
2873 curseg->next_segno = segno;
2874 return 1;
2875 }
2876 }
2877
2878 /* find valid_blocks=0 in dirty list */
2879 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
2880 segno = get_free_segment(sbi);
2881 if (segno != NULL_SEGNO) {
2882 curseg->next_segno = segno;
2883 return 1;
2884 }
2885 }
2886 return 0;
2887 }
2888
2889 /*
2890 * flush out current segment and replace it with new segment
2891 * This function should be returned with success, otherwise BUG
2892 */
allocate_segment_by_default(struct f2fs_sb_info * sbi,int type,bool force)2893 static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
2894 int type, bool force)
2895 {
2896 struct curseg_info *curseg = CURSEG_I(sbi, type);
2897
2898 if (force)
2899 new_curseg(sbi, type, true);
2900 else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
2901 curseg->seg_type == CURSEG_WARM_NODE)
2902 new_curseg(sbi, type, false);
2903 else if (curseg->alloc_type == LFS &&
2904 is_next_segment_free(sbi, curseg, type) &&
2905 likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
2906 new_curseg(sbi, type, false);
2907 else if (f2fs_need_SSR(sbi) &&
2908 get_ssr_segment(sbi, type, SSR, 0))
2909 change_curseg(sbi, type, true);
2910 else
2911 new_curseg(sbi, type, false);
2912
2913 stat_inc_seg_type(sbi, curseg);
2914 }
2915
f2fs_allocate_segment_for_resize(struct f2fs_sb_info * sbi,int type,unsigned int start,unsigned int end)2916 void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
2917 unsigned int start, unsigned int end)
2918 {
2919 struct curseg_info *curseg = CURSEG_I(sbi, type);
2920 unsigned int segno;
2921
2922 f2fs_down_read(&SM_I(sbi)->curseg_lock);
2923 mutex_lock(&curseg->curseg_mutex);
2924 down_write(&SIT_I(sbi)->sentry_lock);
2925
2926 segno = CURSEG_I(sbi, type)->segno;
2927 if (segno < start || segno > end)
2928 goto unlock;
2929
2930 if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type, SSR, 0))
2931 change_curseg(sbi, type, true);
2932 else
2933 new_curseg(sbi, type, true);
2934
2935 stat_inc_seg_type(sbi, curseg);
2936
2937 locate_dirty_segment(sbi, segno);
2938 unlock:
2939 up_write(&SIT_I(sbi)->sentry_lock);
2940
2941 if (segno != curseg->segno)
2942 f2fs_notice(sbi, "For resize: curseg of type %d: %u ==> %u",
2943 type, segno, curseg->segno);
2944
2945 mutex_unlock(&curseg->curseg_mutex);
2946 f2fs_up_read(&SM_I(sbi)->curseg_lock);
2947 }
2948
__allocate_new_segment(struct f2fs_sb_info * sbi,int type,bool new_sec,bool force)2949 static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
2950 bool new_sec, bool force)
2951 {
2952 struct curseg_info *curseg = CURSEG_I(sbi, type);
2953 unsigned int old_segno;
2954
2955 if (!curseg->inited)
2956 goto alloc;
2957
2958 if (force || curseg->next_blkoff ||
2959 get_valid_blocks(sbi, curseg->segno, new_sec))
2960 goto alloc;
2961
2962 if (!get_ckpt_valid_blocks(sbi, curseg->segno, new_sec))
2963 return;
2964 alloc:
2965 old_segno = curseg->segno;
2966 SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true);
2967 locate_dirty_segment(sbi, old_segno);
2968 }
2969
__allocate_new_section(struct f2fs_sb_info * sbi,int type,bool force)2970 static void __allocate_new_section(struct f2fs_sb_info *sbi,
2971 int type, bool force)
2972 {
2973 __allocate_new_segment(sbi, type, true, force);
2974 }
2975
f2fs_allocate_new_section(struct f2fs_sb_info * sbi,int type,bool force)2976 void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force)
2977 {
2978 f2fs_down_read(&SM_I(sbi)->curseg_lock);
2979 down_write(&SIT_I(sbi)->sentry_lock);
2980 __allocate_new_section(sbi, type, force);
2981 up_write(&SIT_I(sbi)->sentry_lock);
2982 f2fs_up_read(&SM_I(sbi)->curseg_lock);
2983 }
2984
f2fs_allocate_new_segments(struct f2fs_sb_info * sbi)2985 void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
2986 {
2987 int i;
2988
2989 f2fs_down_read(&SM_I(sbi)->curseg_lock);
2990 down_write(&SIT_I(sbi)->sentry_lock);
2991 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++)
2992 __allocate_new_segment(sbi, i, false, false);
2993 up_write(&SIT_I(sbi)->sentry_lock);
2994 f2fs_up_read(&SM_I(sbi)->curseg_lock);
2995 }
2996
2997 static const struct segment_allocation default_salloc_ops = {
2998 .allocate_segment = allocate_segment_by_default,
2999 };
3000
f2fs_exist_trim_candidates(struct f2fs_sb_info * sbi,struct cp_control * cpc)3001 bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
3002 struct cp_control *cpc)
3003 {
3004 __u64 trim_start = cpc->trim_start;
3005 bool has_candidate = false;
3006
3007 down_write(&SIT_I(sbi)->sentry_lock);
3008 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) {
3009 if (add_discard_addrs(sbi, cpc, true)) {
3010 has_candidate = true;
3011 break;
3012 }
3013 }
3014 up_write(&SIT_I(sbi)->sentry_lock);
3015
3016 cpc->trim_start = trim_start;
3017 return has_candidate;
3018 }
3019
__issue_discard_cmd_range(struct f2fs_sb_info * sbi,struct discard_policy * dpolicy,unsigned int start,unsigned int end)3020 static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
3021 struct discard_policy *dpolicy,
3022 unsigned int start, unsigned int end)
3023 {
3024 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
3025 struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
3026 struct rb_node **insert_p = NULL, *insert_parent = NULL;
3027 struct discard_cmd *dc;
3028 struct blk_plug plug;
3029 int issued;
3030 unsigned int trimmed = 0;
3031
3032 next:
3033 issued = 0;
3034
3035 mutex_lock(&dcc->cmd_lock);
3036 if (unlikely(dcc->rbtree_check))
3037 f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
3038 &dcc->root, false));
3039
3040 dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
3041 NULL, start,
3042 (struct rb_entry **)&prev_dc,
3043 (struct rb_entry **)&next_dc,
3044 &insert_p, &insert_parent, true, NULL);
3045 if (!dc)
3046 dc = next_dc;
3047
3048 blk_start_plug(&plug);
3049
3050 while (dc && dc->lstart <= end) {
3051 struct rb_node *node;
3052 int err = 0;
3053
3054 if (dc->len < dpolicy->granularity)
3055 goto skip;
3056
3057 if (dc->state != D_PREP) {
3058 list_move_tail(&dc->list, &dcc->fstrim_list);
3059 goto skip;
3060 }
3061
3062 err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
3063
3064 if (issued >= dpolicy->max_requests) {
3065 start = dc->lstart + dc->len;
3066
3067 if (err)
3068 __remove_discard_cmd(sbi, dc);
3069
3070 blk_finish_plug(&plug);
3071 mutex_unlock(&dcc->cmd_lock);
3072 trimmed += __wait_all_discard_cmd(sbi, NULL);
3073 congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
3074 goto next;
3075 }
3076 skip:
3077 node = rb_next(&dc->rb_node);
3078 if (err)
3079 __remove_discard_cmd(sbi, dc);
3080 dc = rb_entry_safe(node, struct discard_cmd, rb_node);
3081
3082 if (fatal_signal_pending(current))
3083 break;
3084 }
3085
3086 blk_finish_plug(&plug);
3087 mutex_unlock(&dcc->cmd_lock);
3088
3089 return trimmed;
3090 }
3091
f2fs_trim_fs(struct f2fs_sb_info * sbi,struct fstrim_range * range)3092 int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
3093 {
3094 __u64 start = F2FS_BYTES_TO_BLK(range->start);
3095 __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
3096 unsigned int start_segno, end_segno;
3097 block_t start_block, end_block;
3098 struct cp_control cpc;
3099 struct discard_policy dpolicy;
3100 unsigned long long trimmed = 0;
3101 int err = 0;
3102 bool need_align = f2fs_lfs_mode(sbi) && __is_large_section(sbi);
3103
3104 if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
3105 return -EINVAL;
3106
3107 if (end < MAIN_BLKADDR(sbi))
3108 goto out;
3109
3110 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
3111 f2fs_warn(sbi, "Found FS corruption, run fsck to fix.");
3112 return -EFSCORRUPTED;
3113 }
3114
3115 /* start/end segment number in main_area */
3116 start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
3117 end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
3118 GET_SEGNO(sbi, end);
3119 if (need_align) {
3120 start_segno = rounddown(start_segno, sbi->segs_per_sec);
3121 end_segno = roundup(end_segno + 1, sbi->segs_per_sec) - 1;
3122 }
3123
3124 cpc.reason = CP_DISCARD;
3125 cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
3126 cpc.trim_start = start_segno;
3127 cpc.trim_end = end_segno;
3128
3129 if (sbi->discard_blks == 0)
3130 goto out;
3131
3132 f2fs_down_write(&sbi->gc_lock);
3133 err = f2fs_write_checkpoint(sbi, &cpc);
3134 f2fs_up_write(&sbi->gc_lock);
3135 if (err)
3136 goto out;
3137
3138 /*
3139 * We filed discard candidates, but actually we don't need to wait for
3140 * all of them, since they'll be issued in idle time along with runtime
3141 * discard option. User configuration looks like using runtime discard
3142 * or periodic fstrim instead of it.
3143 */
3144 if (f2fs_realtime_discard_enable(sbi))
3145 goto out;
3146
3147 start_block = START_BLOCK(sbi, start_segno);
3148 end_block = START_BLOCK(sbi, end_segno + 1);
3149
3150 __init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
3151 trimmed = __issue_discard_cmd_range(sbi, &dpolicy,
3152 start_block, end_block);
3153
3154 trimmed += __wait_discard_cmd_range(sbi, &dpolicy,
3155 start_block, end_block);
3156 out:
3157 if (!err)
3158 range->len = F2FS_BLK_TO_BYTES(trimmed);
3159 return err;
3160 }
3161
__has_curseg_space(struct f2fs_sb_info * sbi,struct curseg_info * curseg)3162 static bool __has_curseg_space(struct f2fs_sb_info *sbi,
3163 struct curseg_info *curseg)
3164 {
3165 return curseg->next_blkoff < f2fs_usable_blks_in_seg(sbi,
3166 curseg->segno);
3167 }
3168
f2fs_rw_hint_to_seg_type(enum rw_hint hint)3169 int f2fs_rw_hint_to_seg_type(enum rw_hint hint)
3170 {
3171 switch (hint) {
3172 case WRITE_LIFE_SHORT:
3173 return CURSEG_HOT_DATA;
3174 case WRITE_LIFE_EXTREME:
3175 return CURSEG_COLD_DATA;
3176 default:
3177 return CURSEG_WARM_DATA;
3178 }
3179 }
3180
3181 /* This returns write hints for each segment type. This hints will be
3182 * passed down to block layer. There are mapping tables which depend on
3183 * the mount option 'whint_mode'.
3184 *
3185 * 1) whint_mode=off. F2FS only passes down WRITE_LIFE_NOT_SET.
3186 *
3187 * 2) whint_mode=user-based. F2FS tries to pass down hints given by users.
3188 *
3189 * User F2FS Block
3190 * ---- ---- -----
3191 * META WRITE_LIFE_NOT_SET
3192 * HOT_NODE "
3193 * WARM_NODE "
3194 * COLD_NODE "
3195 * ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME
3196 * extension list " "
3197 *
3198 * -- buffered io
3199 * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
3200 * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT
3201 * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET
3202 * WRITE_LIFE_NONE " "
3203 * WRITE_LIFE_MEDIUM " "
3204 * WRITE_LIFE_LONG " "
3205 *
3206 * -- direct io
3207 * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
3208 * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT
3209 * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET
3210 * WRITE_LIFE_NONE " WRITE_LIFE_NONE
3211 * WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM
3212 * WRITE_LIFE_LONG " WRITE_LIFE_LONG
3213 *
3214 * 3) whint_mode=fs-based. F2FS passes down hints with its policy.
3215 *
3216 * User F2FS Block
3217 * ---- ---- -----
3218 * META WRITE_LIFE_MEDIUM;
3219 * HOT_NODE WRITE_LIFE_NOT_SET
3220 * WARM_NODE "
3221 * COLD_NODE WRITE_LIFE_NONE
3222 * ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME
3223 * extension list " "
3224 *
3225 * -- buffered io
3226 * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
3227 * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT
3228 * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_LONG
3229 * WRITE_LIFE_NONE " "
3230 * WRITE_LIFE_MEDIUM " "
3231 * WRITE_LIFE_LONG " "
3232 *
3233 * -- direct io
3234 * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
3235 * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT
3236 * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET
3237 * WRITE_LIFE_NONE " WRITE_LIFE_NONE
3238 * WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM
3239 * WRITE_LIFE_LONG " WRITE_LIFE_LONG
3240 */
3241
f2fs_io_type_to_rw_hint(struct f2fs_sb_info * sbi,enum page_type type,enum temp_type temp)3242 enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi,
3243 enum page_type type, enum temp_type temp)
3244 {
3245 if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_USER) {
3246 if (type == DATA) {
3247 if (temp == WARM)
3248 return WRITE_LIFE_NOT_SET;
3249 else if (temp == HOT)
3250 return WRITE_LIFE_SHORT;
3251 else if (temp == COLD)
3252 return WRITE_LIFE_EXTREME;
3253 } else {
3254 return WRITE_LIFE_NOT_SET;
3255 }
3256 } else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS) {
3257 if (type == DATA) {
3258 if (temp == WARM)
3259 return WRITE_LIFE_LONG;
3260 else if (temp == HOT)
3261 return WRITE_LIFE_SHORT;
3262 else if (temp == COLD)
3263 return WRITE_LIFE_EXTREME;
3264 } else if (type == NODE) {
3265 if (temp == WARM || temp == HOT)
3266 return WRITE_LIFE_NOT_SET;
3267 else if (temp == COLD)
3268 return WRITE_LIFE_NONE;
3269 } else if (type == META) {
3270 return WRITE_LIFE_MEDIUM;
3271 }
3272 }
3273 return WRITE_LIFE_NOT_SET;
3274 }
3275
__get_segment_type_2(struct f2fs_io_info * fio)3276 static int __get_segment_type_2(struct f2fs_io_info *fio)
3277 {
3278 if (fio->type == DATA)
3279 return CURSEG_HOT_DATA;
3280 else
3281 return CURSEG_HOT_NODE;
3282 }
3283
__get_segment_type_4(struct f2fs_io_info * fio)3284 static int __get_segment_type_4(struct f2fs_io_info *fio)
3285 {
3286 if (fio->type == DATA) {
3287 struct inode *inode = fio->page->mapping->host;
3288
3289 if (S_ISDIR(inode->i_mode))
3290 return CURSEG_HOT_DATA;
3291 else
3292 return CURSEG_COLD_DATA;
3293 } else {
3294 if (IS_DNODE(fio->page) && is_cold_node(fio->page))
3295 return CURSEG_WARM_NODE;
3296 else
3297 return CURSEG_COLD_NODE;
3298 }
3299 }
3300
__get_age_segment_type(struct inode * inode,pgoff_t pgofs)3301 static int __get_age_segment_type(struct inode *inode, pgoff_t pgofs)
3302 {
3303 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3304 struct extent_info ei = {};
3305
3306 if (f2fs_lookup_age_extent_cache(inode, pgofs, &ei)) {
3307 if (!ei.age)
3308 return NO_CHECK_TYPE;
3309 if (ei.age <= sbi->hot_data_age_threshold)
3310 return CURSEG_HOT_DATA;
3311 if (ei.age <= sbi->warm_data_age_threshold)
3312 return CURSEG_WARM_DATA;
3313 return CURSEG_COLD_DATA;
3314 }
3315 return NO_CHECK_TYPE;
3316 }
3317
__get_segment_type_6(struct f2fs_io_info * fio)3318 static int __get_segment_type_6(struct f2fs_io_info *fio)
3319 {
3320 if (fio->type == DATA) {
3321 struct inode *inode = fio->page->mapping->host;
3322 int type;
3323
3324 if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
3325 return CURSEG_COLD_DATA_PINNED;
3326
3327 if (page_private_gcing(fio->page)) {
3328 if (fio->sbi->am.atgc_enabled &&
3329 (fio->io_type == FS_DATA_IO) &&
3330 (fio->sbi->gc_mode != GC_URGENT_HIGH))
3331 return CURSEG_ALL_DATA_ATGC;
3332 else
3333 return CURSEG_COLD_DATA;
3334 }
3335 if (file_is_cold(inode) || f2fs_need_compress_data(inode))
3336 return CURSEG_COLD_DATA;
3337
3338 type = __get_age_segment_type(inode, fio->page->index);
3339 if (type != NO_CHECK_TYPE)
3340 return type;
3341
3342 if (file_is_hot(inode) ||
3343 is_inode_flag_set(inode, FI_HOT_DATA) ||
3344 f2fs_is_atomic_file(inode) ||
3345 f2fs_is_volatile_file(inode))
3346 return CURSEG_HOT_DATA;
3347 return f2fs_rw_hint_to_seg_type(inode->i_write_hint);
3348 } else {
3349 if (IS_DNODE(fio->page))
3350 return is_cold_node(fio->page) ? CURSEG_WARM_NODE :
3351 CURSEG_HOT_NODE;
3352 return CURSEG_COLD_NODE;
3353 }
3354 }
3355
__get_segment_type(struct f2fs_io_info * fio)3356 static int __get_segment_type(struct f2fs_io_info *fio)
3357 {
3358 int type = 0;
3359
3360 switch (F2FS_OPTION(fio->sbi).active_logs) {
3361 case 2:
3362 type = __get_segment_type_2(fio);
3363 break;
3364 case 4:
3365 type = __get_segment_type_4(fio);
3366 break;
3367 case 6:
3368 type = __get_segment_type_6(fio);
3369 break;
3370 default:
3371 f2fs_bug_on(fio->sbi, true);
3372 }
3373
3374 if (IS_HOT(type))
3375 fio->temp = HOT;
3376 else if (IS_WARM(type))
3377 fio->temp = WARM;
3378 else
3379 fio->temp = COLD;
3380 return type;
3381 }
3382
f2fs_allocate_data_block(struct f2fs_sb_info * sbi,struct page * page,block_t old_blkaddr,block_t * new_blkaddr,struct f2fs_summary * sum,int type,struct f2fs_io_info * fio)3383 void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
3384 block_t old_blkaddr, block_t *new_blkaddr,
3385 struct f2fs_summary *sum, int type,
3386 struct f2fs_io_info *fio)
3387 {
3388 struct sit_info *sit_i = SIT_I(sbi);
3389 struct curseg_info *curseg = CURSEG_I(sbi, type);
3390 unsigned long long old_mtime;
3391 bool from_gc = (type == CURSEG_ALL_DATA_ATGC);
3392 struct seg_entry *se = NULL;
3393
3394 f2fs_down_read(&SM_I(sbi)->curseg_lock);
3395
3396 mutex_lock(&curseg->curseg_mutex);
3397 down_write(&sit_i->sentry_lock);
3398
3399 if (from_gc) {
3400 f2fs_bug_on(sbi, GET_SEGNO(sbi, old_blkaddr) == NULL_SEGNO);
3401 se = get_seg_entry(sbi, GET_SEGNO(sbi, old_blkaddr));
3402 sanity_check_seg_type(sbi, se->type);
3403 f2fs_bug_on(sbi, IS_NODESEG(se->type));
3404 }
3405 *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
3406
3407 f2fs_bug_on(sbi, curseg->next_blkoff >= sbi->blocks_per_seg);
3408
3409 f2fs_wait_discard_bio(sbi, *new_blkaddr);
3410
3411 /*
3412 * __add_sum_entry should be resided under the curseg_mutex
3413 * because, this function updates a summary entry in the
3414 * current summary block.
3415 */
3416 __add_sum_entry(sbi, type, sum);
3417
3418 __refresh_next_blkoff(sbi, curseg);
3419
3420 stat_inc_block_count(sbi, curseg);
3421
3422 if (from_gc) {
3423 old_mtime = get_segment_mtime(sbi, old_blkaddr);
3424 } else {
3425 update_segment_mtime(sbi, old_blkaddr, 0);
3426 old_mtime = 0;
3427 }
3428 update_segment_mtime(sbi, *new_blkaddr, old_mtime);
3429
3430 /*
3431 * SIT information should be updated before segment allocation,
3432 * since SSR needs latest valid block information.
3433 */
3434 update_sit_entry(sbi, *new_blkaddr, 1);
3435 if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
3436 update_sit_entry(sbi, old_blkaddr, -1);
3437
3438 if (!__has_curseg_space(sbi, curseg)) {
3439 if (from_gc)
3440 get_atssr_segment(sbi, type, se->type,
3441 AT_SSR, se->mtime);
3442 else
3443 sit_i->s_ops->allocate_segment(sbi, type, false);
3444 }
3445 /*
3446 * segment dirty status should be updated after segment allocation,
3447 * so we just need to update status only one time after previous
3448 * segment being closed.
3449 */
3450 locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
3451 locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr));
3452
3453 if (IS_DATASEG(type))
3454 atomic64_inc(&sbi->allocated_data_blocks);
3455
3456 up_write(&sit_i->sentry_lock);
3457
3458 if (page && IS_NODESEG(type)) {
3459 fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
3460
3461 f2fs_inode_chksum_set(sbi, page);
3462 }
3463
3464 if (fio) {
3465 struct f2fs_bio_info *io;
3466
3467 if (F2FS_IO_ALIGNED(sbi))
3468 fio->retry = false;
3469
3470 INIT_LIST_HEAD(&fio->list);
3471 fio->in_list = true;
3472 io = sbi->write_io[fio->type] + fio->temp;
3473 spin_lock(&io->io_lock);
3474 list_add_tail(&fio->list, &io->io_list);
3475 spin_unlock(&io->io_lock);
3476 }
3477
3478 mutex_unlock(&curseg->curseg_mutex);
3479
3480 f2fs_up_read(&SM_I(sbi)->curseg_lock);
3481 }
3482
update_device_state(struct f2fs_io_info * fio)3483 static void update_device_state(struct f2fs_io_info *fio)
3484 {
3485 struct f2fs_sb_info *sbi = fio->sbi;
3486 unsigned int devidx;
3487
3488 if (!f2fs_is_multi_device(sbi))
3489 return;
3490
3491 devidx = f2fs_target_device_index(sbi, fio->new_blkaddr);
3492
3493 /* update device state for fsync */
3494 f2fs_set_dirty_device(sbi, fio->ino, devidx, FLUSH_INO);
3495
3496 /* update device state for checkpoint */
3497 if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
3498 spin_lock(&sbi->dev_lock);
3499 f2fs_set_bit(devidx, (char *)&sbi->dirty_device);
3500 spin_unlock(&sbi->dev_lock);
3501 }
3502 }
3503
do_write_page(struct f2fs_summary * sum,struct f2fs_io_info * fio)3504 static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
3505 {
3506 int type = __get_segment_type(fio);
3507 bool keep_order = (f2fs_lfs_mode(fio->sbi) && type == CURSEG_COLD_DATA);
3508
3509 if (keep_order)
3510 f2fs_down_read(&fio->sbi->io_order_lock);
3511 reallocate:
3512 f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
3513 &fio->new_blkaddr, sum, type, fio);
3514 if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO) {
3515 invalidate_mapping_pages(META_MAPPING(fio->sbi),
3516 fio->old_blkaddr, fio->old_blkaddr);
3517 f2fs_invalidate_compress_page(fio->sbi, fio->old_blkaddr);
3518 }
3519
3520 /* writeout dirty page into bdev */
3521 f2fs_submit_page_write(fio);
3522 if (fio->retry) {
3523 fio->old_blkaddr = fio->new_blkaddr;
3524 goto reallocate;
3525 }
3526
3527 update_device_state(fio);
3528
3529 if (keep_order)
3530 f2fs_up_read(&fio->sbi->io_order_lock);
3531 }
3532
f2fs_do_write_meta_page(struct f2fs_sb_info * sbi,struct page * page,enum iostat_type io_type)3533 void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
3534 enum iostat_type io_type)
3535 {
3536 struct f2fs_io_info fio = {
3537 .sbi = sbi,
3538 .type = META,
3539 .temp = HOT,
3540 .op = REQ_OP_WRITE,
3541 .op_flags = REQ_SYNC | REQ_META | REQ_PRIO,
3542 .old_blkaddr = page->index,
3543 .new_blkaddr = page->index,
3544 .page = page,
3545 .encrypted_page = NULL,
3546 .in_list = false,
3547 };
3548
3549 if (unlikely(page->index >= MAIN_BLKADDR(sbi)))
3550 fio.op_flags &= ~REQ_META;
3551
3552 set_page_writeback(page);
3553 ClearPageError(page);
3554 f2fs_submit_page_write(&fio);
3555
3556 stat_inc_meta_count(sbi, page->index);
3557 f2fs_update_iostat(sbi, io_type, F2FS_BLKSIZE);
3558 }
3559
f2fs_do_write_node_page(unsigned int nid,struct f2fs_io_info * fio)3560 void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio)
3561 {
3562 struct f2fs_summary sum;
3563
3564 set_summary(&sum, nid, 0, 0);
3565 do_write_page(&sum, fio);
3566
3567 f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
3568 }
3569
f2fs_outplace_write_data(struct dnode_of_data * dn,struct f2fs_io_info * fio)3570 void f2fs_outplace_write_data(struct dnode_of_data *dn,
3571 struct f2fs_io_info *fio)
3572 {
3573 struct f2fs_sb_info *sbi = fio->sbi;
3574 struct f2fs_summary sum;
3575
3576 f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
3577 if (fio->io_type == FS_DATA_IO || fio->io_type == FS_CP_DATA_IO)
3578 f2fs_update_age_extent_cache(dn);
3579 set_summary(&sum, dn->nid, dn->ofs_in_node, fio->version);
3580 do_write_page(&sum, fio);
3581 f2fs_update_data_blkaddr(dn, fio->new_blkaddr);
3582
3583 f2fs_update_iostat(sbi, fio->io_type, F2FS_BLKSIZE);
3584 }
3585
f2fs_inplace_write_data(struct f2fs_io_info * fio)3586 int f2fs_inplace_write_data(struct f2fs_io_info *fio)
3587 {
3588 int err;
3589 struct f2fs_sb_info *sbi = fio->sbi;
3590 unsigned int segno;
3591
3592 fio->new_blkaddr = fio->old_blkaddr;
3593 /* i/o temperature is needed for passing down write hints */
3594 __get_segment_type(fio);
3595
3596 segno = GET_SEGNO(sbi, fio->new_blkaddr);
3597
3598 if (!IS_DATASEG(get_seg_entry(sbi, segno)->type)) {
3599 set_sbi_flag(sbi, SBI_NEED_FSCK);
3600 f2fs_warn(sbi, "%s: incorrect segment(%u) type, run fsck to fix.",
3601 __func__, segno);
3602 err = -EFSCORRUPTED;
3603 goto drop_bio;
3604 }
3605
3606 if (f2fs_cp_error(sbi)) {
3607 err = -EIO;
3608 goto drop_bio;
3609 }
3610
3611 if (fio->post_read)
3612 invalidate_mapping_pages(META_MAPPING(sbi),
3613 fio->new_blkaddr, fio->new_blkaddr);
3614
3615 stat_inc_inplace_blocks(fio->sbi);
3616
3617 if (fio->bio && !(SM_I(sbi)->ipu_policy & (1 << F2FS_IPU_NOCACHE)))
3618 err = f2fs_merge_page_bio(fio);
3619 else
3620 err = f2fs_submit_page_bio(fio);
3621 if (!err) {
3622 update_device_state(fio);
3623 f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
3624 }
3625
3626 return err;
3627 drop_bio:
3628 if (fio->bio && *(fio->bio)) {
3629 struct bio *bio = *(fio->bio);
3630
3631 bio->bi_status = BLK_STS_IOERR;
3632 bio_endio(bio);
3633 *(fio->bio) = NULL;
3634 }
3635 return err;
3636 }
3637
__f2fs_get_curseg(struct f2fs_sb_info * sbi,unsigned int segno)3638 static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi,
3639 unsigned int segno)
3640 {
3641 int i;
3642
3643 for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
3644 if (CURSEG_I(sbi, i)->segno == segno)
3645 break;
3646 }
3647 return i;
3648 }
3649
f2fs_do_replace_block(struct f2fs_sb_info * sbi,struct f2fs_summary * sum,block_t old_blkaddr,block_t new_blkaddr,bool recover_curseg,bool recover_newaddr,bool from_gc)3650 void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
3651 block_t old_blkaddr, block_t new_blkaddr,
3652 bool recover_curseg, bool recover_newaddr,
3653 bool from_gc)
3654 {
3655 struct sit_info *sit_i = SIT_I(sbi);
3656 struct curseg_info *curseg;
3657 unsigned int segno, old_cursegno;
3658 struct seg_entry *se;
3659 int type;
3660 unsigned short old_blkoff;
3661 unsigned char old_alloc_type;
3662
3663 segno = GET_SEGNO(sbi, new_blkaddr);
3664 se = get_seg_entry(sbi, segno);
3665 type = se->type;
3666
3667 f2fs_down_write(&SM_I(sbi)->curseg_lock);
3668
3669 if (!recover_curseg) {
3670 /* for recovery flow */
3671 if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
3672 if (old_blkaddr == NULL_ADDR)
3673 type = CURSEG_COLD_DATA;
3674 else
3675 type = CURSEG_WARM_DATA;
3676 }
3677 } else {
3678 if (IS_CURSEG(sbi, segno)) {
3679 /* se->type is volatile as SSR allocation */
3680 type = __f2fs_get_curseg(sbi, segno);
3681 f2fs_bug_on(sbi, type == NO_CHECK_TYPE);
3682 } else {
3683 type = CURSEG_WARM_DATA;
3684 }
3685 }
3686
3687 f2fs_bug_on(sbi, !IS_DATASEG(type));
3688 curseg = CURSEG_I(sbi, type);
3689
3690 mutex_lock(&curseg->curseg_mutex);
3691 down_write(&sit_i->sentry_lock);
3692
3693 old_cursegno = curseg->segno;
3694 old_blkoff = curseg->next_blkoff;
3695 old_alloc_type = curseg->alloc_type;
3696
3697 /* change the current segment */
3698 if (segno != curseg->segno) {
3699 curseg->next_segno = segno;
3700 change_curseg(sbi, type, true);
3701 }
3702
3703 curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
3704 __add_sum_entry(sbi, type, sum);
3705
3706 if (!recover_curseg || recover_newaddr) {
3707 if (!from_gc)
3708 update_segment_mtime(sbi, new_blkaddr, 0);
3709 update_sit_entry(sbi, new_blkaddr, 1);
3710 }
3711 if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) {
3712 invalidate_mapping_pages(META_MAPPING(sbi),
3713 old_blkaddr, old_blkaddr);
3714 f2fs_invalidate_compress_page(sbi, old_blkaddr);
3715 if (!from_gc)
3716 update_segment_mtime(sbi, old_blkaddr, 0);
3717 update_sit_entry(sbi, old_blkaddr, -1);
3718 }
3719
3720 locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
3721 locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr));
3722
3723 locate_dirty_segment(sbi, old_cursegno);
3724
3725 if (recover_curseg) {
3726 if (old_cursegno != curseg->segno) {
3727 curseg->next_segno = old_cursegno;
3728 change_curseg(sbi, type, true);
3729 }
3730 curseg->next_blkoff = old_blkoff;
3731 curseg->alloc_type = old_alloc_type;
3732 }
3733
3734 up_write(&sit_i->sentry_lock);
3735 mutex_unlock(&curseg->curseg_mutex);
3736 f2fs_up_write(&SM_I(sbi)->curseg_lock);
3737 }
3738
f2fs_replace_block(struct f2fs_sb_info * sbi,struct dnode_of_data * dn,block_t old_addr,block_t new_addr,unsigned char version,bool recover_curseg,bool recover_newaddr)3739 void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
3740 block_t old_addr, block_t new_addr,
3741 unsigned char version, bool recover_curseg,
3742 bool recover_newaddr)
3743 {
3744 struct f2fs_summary sum;
3745
3746 set_summary(&sum, dn->nid, dn->ofs_in_node, version);
3747
3748 f2fs_do_replace_block(sbi, &sum, old_addr, new_addr,
3749 recover_curseg, recover_newaddr, false);
3750
3751 f2fs_update_data_blkaddr(dn, new_addr);
3752 }
3753
f2fs_wait_on_page_writeback(struct page * page,enum page_type type,bool ordered,bool locked)3754 void f2fs_wait_on_page_writeback(struct page *page,
3755 enum page_type type, bool ordered, bool locked)
3756 {
3757 if (PageWriteback(page)) {
3758 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
3759
3760 /* submit cached LFS IO */
3761 f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type);
3762 /* sbumit cached IPU IO */
3763 f2fs_submit_merged_ipu_write(sbi, NULL, page);
3764 if (ordered) {
3765 wait_on_page_writeback(page);
3766 f2fs_bug_on(sbi, locked && PageWriteback(page));
3767 } else {
3768 wait_for_stable_page(page);
3769 }
3770 }
3771 }
3772
f2fs_wait_on_block_writeback(struct inode * inode,block_t blkaddr)3773 void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr)
3774 {
3775 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3776 struct page *cpage;
3777
3778 if (!f2fs_post_read_required(inode))
3779 return;
3780
3781 if (!__is_valid_data_blkaddr(blkaddr))
3782 return;
3783
3784 cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
3785 if (cpage) {
3786 f2fs_wait_on_page_writeback(cpage, DATA, true, true);
3787 f2fs_put_page(cpage, 1);
3788 }
3789 }
3790
f2fs_wait_on_block_writeback_range(struct inode * inode,block_t blkaddr,block_t len)3791 void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr,
3792 block_t len)
3793 {
3794 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3795 block_t i;
3796
3797 if (!f2fs_post_read_required(inode))
3798 return;
3799
3800 for (i = 0; i < len; i++)
3801 f2fs_wait_on_block_writeback(inode, blkaddr + i);
3802
3803 invalidate_mapping_pages(META_MAPPING(sbi), blkaddr, blkaddr + len - 1);
3804 }
3805
read_compacted_summaries(struct f2fs_sb_info * sbi)3806 static int read_compacted_summaries(struct f2fs_sb_info *sbi)
3807 {
3808 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
3809 struct curseg_info *seg_i;
3810 unsigned char *kaddr;
3811 struct page *page;
3812 block_t start;
3813 int i, j, offset;
3814
3815 start = start_sum_block(sbi);
3816
3817 page = f2fs_get_meta_page(sbi, start++);
3818 if (IS_ERR(page))
3819 return PTR_ERR(page);
3820 kaddr = (unsigned char *)page_address(page);
3821
3822 /* Step 1: restore nat cache */
3823 seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
3824 memcpy(seg_i->journal, kaddr, SUM_JOURNAL_SIZE);
3825
3826 /* Step 2: restore sit cache */
3827 seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
3828 memcpy(seg_i->journal, kaddr + SUM_JOURNAL_SIZE, SUM_JOURNAL_SIZE);
3829 offset = 2 * SUM_JOURNAL_SIZE;
3830
3831 /* Step 3: restore summary entries */
3832 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
3833 unsigned short blk_off;
3834 unsigned int segno;
3835
3836 seg_i = CURSEG_I(sbi, i);
3837 segno = le32_to_cpu(ckpt->cur_data_segno[i]);
3838 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
3839 seg_i->next_segno = segno;
3840 reset_curseg(sbi, i, 0);
3841 seg_i->alloc_type = ckpt->alloc_type[i];
3842 seg_i->next_blkoff = blk_off;
3843
3844 if (seg_i->alloc_type == SSR)
3845 blk_off = sbi->blocks_per_seg;
3846
3847 for (j = 0; j < blk_off; j++) {
3848 struct f2fs_summary *s;
3849
3850 s = (struct f2fs_summary *)(kaddr + offset);
3851 seg_i->sum_blk->entries[j] = *s;
3852 offset += SUMMARY_SIZE;
3853 if (offset + SUMMARY_SIZE <= PAGE_SIZE -
3854 SUM_FOOTER_SIZE)
3855 continue;
3856
3857 f2fs_put_page(page, 1);
3858 page = NULL;
3859
3860 page = f2fs_get_meta_page(sbi, start++);
3861 if (IS_ERR(page))
3862 return PTR_ERR(page);
3863 kaddr = (unsigned char *)page_address(page);
3864 offset = 0;
3865 }
3866 }
3867 f2fs_put_page(page, 1);
3868 return 0;
3869 }
3870
read_normal_summaries(struct f2fs_sb_info * sbi,int type)3871 static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
3872 {
3873 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
3874 struct f2fs_summary_block *sum;
3875 struct curseg_info *curseg;
3876 struct page *new;
3877 unsigned short blk_off;
3878 unsigned int segno = 0;
3879 block_t blk_addr = 0;
3880 int err = 0;
3881
3882 /* get segment number and block addr */
3883 if (IS_DATASEG(type)) {
3884 segno = le32_to_cpu(ckpt->cur_data_segno[type]);
3885 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
3886 CURSEG_HOT_DATA]);
3887 if (__exist_node_summaries(sbi))
3888 blk_addr = sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type);
3889 else
3890 blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
3891 } else {
3892 segno = le32_to_cpu(ckpt->cur_node_segno[type -
3893 CURSEG_HOT_NODE]);
3894 blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
3895 CURSEG_HOT_NODE]);
3896 if (__exist_node_summaries(sbi))
3897 blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
3898 type - CURSEG_HOT_NODE);
3899 else
3900 blk_addr = GET_SUM_BLOCK(sbi, segno);
3901 }
3902
3903 new = f2fs_get_meta_page(sbi, blk_addr);
3904 if (IS_ERR(new))
3905 return PTR_ERR(new);
3906 sum = (struct f2fs_summary_block *)page_address(new);
3907
3908 if (IS_NODESEG(type)) {
3909 if (__exist_node_summaries(sbi)) {
3910 struct f2fs_summary *ns = &sum->entries[0];
3911 int i;
3912
3913 for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
3914 ns->version = 0;
3915 ns->ofs_in_node = 0;
3916 }
3917 } else {
3918 err = f2fs_restore_node_summary(sbi, segno, sum);
3919 if (err)
3920 goto out;
3921 }
3922 }
3923
3924 /* set uncompleted segment to curseg */
3925 curseg = CURSEG_I(sbi, type);
3926 mutex_lock(&curseg->curseg_mutex);
3927
3928 /* update journal info */
3929 down_write(&curseg->journal_rwsem);
3930 memcpy(curseg->journal, &sum->journal, SUM_JOURNAL_SIZE);
3931 up_write(&curseg->journal_rwsem);
3932
3933 memcpy(curseg->sum_blk->entries, sum->entries, SUM_ENTRY_SIZE);
3934 memcpy(&curseg->sum_blk->footer, &sum->footer, SUM_FOOTER_SIZE);
3935 curseg->next_segno = segno;
3936 reset_curseg(sbi, type, 0);
3937 curseg->alloc_type = ckpt->alloc_type[type];
3938 curseg->next_blkoff = blk_off;
3939 mutex_unlock(&curseg->curseg_mutex);
3940 out:
3941 f2fs_put_page(new, 1);
3942 return err;
3943 }
3944
restore_curseg_summaries(struct f2fs_sb_info * sbi)3945 static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
3946 {
3947 struct f2fs_journal *sit_j = CURSEG_I(sbi, CURSEG_COLD_DATA)->journal;
3948 struct f2fs_journal *nat_j = CURSEG_I(sbi, CURSEG_HOT_DATA)->journal;
3949 int type = CURSEG_HOT_DATA;
3950 int err;
3951
3952 if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) {
3953 int npages = f2fs_npages_for_summary_flush(sbi, true);
3954
3955 if (npages >= 2)
3956 f2fs_ra_meta_pages(sbi, start_sum_block(sbi), npages,
3957 META_CP, true);
3958
3959 /* restore for compacted data summary */
3960 err = read_compacted_summaries(sbi);
3961 if (err)
3962 return err;
3963 type = CURSEG_HOT_NODE;
3964 }
3965
3966 if (__exist_node_summaries(sbi))
3967 f2fs_ra_meta_pages(sbi,
3968 sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type),
3969 NR_CURSEG_PERSIST_TYPE - type, META_CP, true);
3970
3971 for (; type <= CURSEG_COLD_NODE; type++) {
3972 err = read_normal_summaries(sbi, type);
3973 if (err)
3974 return err;
3975 }
3976
3977 /* sanity check for summary blocks */
3978 if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES ||
3979 sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES) {
3980 f2fs_err(sbi, "invalid journal entries nats %u sits %u",
3981 nats_in_cursum(nat_j), sits_in_cursum(sit_j));
3982 return -EINVAL;
3983 }
3984
3985 return 0;
3986 }
3987
write_compacted_summaries(struct f2fs_sb_info * sbi,block_t blkaddr)3988 static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
3989 {
3990 struct page *page;
3991 unsigned char *kaddr;
3992 struct f2fs_summary *summary;
3993 struct curseg_info *seg_i;
3994 int written_size = 0;
3995 int i, j;
3996
3997 page = f2fs_grab_meta_page(sbi, blkaddr++);
3998 kaddr = (unsigned char *)page_address(page);
3999 memset(kaddr, 0, PAGE_SIZE);
4000
4001 /* Step 1: write nat cache */
4002 seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
4003 memcpy(kaddr, seg_i->journal, SUM_JOURNAL_SIZE);
4004 written_size += SUM_JOURNAL_SIZE;
4005
4006 /* Step 2: write sit cache */
4007 seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
4008 memcpy(kaddr + written_size, seg_i->journal, SUM_JOURNAL_SIZE);
4009 written_size += SUM_JOURNAL_SIZE;
4010
4011 /* Step 3: write summary entries */
4012 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
4013 unsigned short blkoff;
4014
4015 seg_i = CURSEG_I(sbi, i);
4016 if (sbi->ckpt->alloc_type[i] == SSR)
4017 blkoff = sbi->blocks_per_seg;
4018 else
4019 blkoff = curseg_blkoff(sbi, i);
4020
4021 for (j = 0; j < blkoff; j++) {
4022 if (!page) {
4023 page = f2fs_grab_meta_page(sbi, blkaddr++);
4024 kaddr = (unsigned char *)page_address(page);
4025 memset(kaddr, 0, PAGE_SIZE);
4026 written_size = 0;
4027 }
4028 summary = (struct f2fs_summary *)(kaddr + written_size);
4029 *summary = seg_i->sum_blk->entries[j];
4030 written_size += SUMMARY_SIZE;
4031
4032 if (written_size + SUMMARY_SIZE <= PAGE_SIZE -
4033 SUM_FOOTER_SIZE)
4034 continue;
4035
4036 set_page_dirty(page);
4037 f2fs_put_page(page, 1);
4038 page = NULL;
4039 }
4040 }
4041 if (page) {
4042 set_page_dirty(page);
4043 f2fs_put_page(page, 1);
4044 }
4045 }
4046
write_normal_summaries(struct f2fs_sb_info * sbi,block_t blkaddr,int type)4047 static void write_normal_summaries(struct f2fs_sb_info *sbi,
4048 block_t blkaddr, int type)
4049 {
4050 int i, end;
4051
4052 if (IS_DATASEG(type))
4053 end = type + NR_CURSEG_DATA_TYPE;
4054 else
4055 end = type + NR_CURSEG_NODE_TYPE;
4056
4057 for (i = type; i < end; i++)
4058 write_current_sum_page(sbi, i, blkaddr + (i - type));
4059 }
4060
f2fs_write_data_summaries(struct f2fs_sb_info * sbi,block_t start_blk)4061 void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
4062 {
4063 if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG))
4064 write_compacted_summaries(sbi, start_blk);
4065 else
4066 write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
4067 }
4068
f2fs_write_node_summaries(struct f2fs_sb_info * sbi,block_t start_blk)4069 void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
4070 {
4071 write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
4072 }
4073
f2fs_lookup_journal_in_cursum(struct f2fs_journal * journal,int type,unsigned int val,int alloc)4074 int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
4075 unsigned int val, int alloc)
4076 {
4077 int i;
4078
4079 if (type == NAT_JOURNAL) {
4080 for (i = 0; i < nats_in_cursum(journal); i++) {
4081 if (le32_to_cpu(nid_in_journal(journal, i)) == val)
4082 return i;
4083 }
4084 if (alloc && __has_cursum_space(journal, 1, NAT_JOURNAL))
4085 return update_nats_in_cursum(journal, 1);
4086 } else if (type == SIT_JOURNAL) {
4087 for (i = 0; i < sits_in_cursum(journal); i++)
4088 if (le32_to_cpu(segno_in_journal(journal, i)) == val)
4089 return i;
4090 if (alloc && __has_cursum_space(journal, 1, SIT_JOURNAL))
4091 return update_sits_in_cursum(journal, 1);
4092 }
4093 return -1;
4094 }
4095
get_current_sit_page(struct f2fs_sb_info * sbi,unsigned int segno)4096 static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
4097 unsigned int segno)
4098 {
4099 return f2fs_get_meta_page(sbi, current_sit_addr(sbi, segno));
4100 }
4101
get_next_sit_page(struct f2fs_sb_info * sbi,unsigned int start)4102 static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
4103 unsigned int start)
4104 {
4105 struct sit_info *sit_i = SIT_I(sbi);
4106 struct page *page;
4107 pgoff_t src_off, dst_off;
4108
4109 src_off = current_sit_addr(sbi, start);
4110 dst_off = next_sit_addr(sbi, src_off);
4111
4112 page = f2fs_grab_meta_page(sbi, dst_off);
4113 seg_info_to_sit_page(sbi, page, start);
4114
4115 set_page_dirty(page);
4116 set_to_next_sit(sit_i, start);
4117
4118 return page;
4119 }
4120
grab_sit_entry_set(void)4121 static struct sit_entry_set *grab_sit_entry_set(void)
4122 {
4123 struct sit_entry_set *ses =
4124 f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_NOFS);
4125
4126 ses->entry_cnt = 0;
4127 INIT_LIST_HEAD(&ses->set_list);
4128 return ses;
4129 }
4130
release_sit_entry_set(struct sit_entry_set * ses)4131 static void release_sit_entry_set(struct sit_entry_set *ses)
4132 {
4133 list_del(&ses->set_list);
4134 kmem_cache_free(sit_entry_set_slab, ses);
4135 }
4136
adjust_sit_entry_set(struct sit_entry_set * ses,struct list_head * head)4137 static void adjust_sit_entry_set(struct sit_entry_set *ses,
4138 struct list_head *head)
4139 {
4140 struct sit_entry_set *next = ses;
4141
4142 if (list_is_last(&ses->set_list, head))
4143 return;
4144
4145 list_for_each_entry_continue(next, head, set_list)
4146 if (ses->entry_cnt <= next->entry_cnt)
4147 break;
4148
4149 list_move_tail(&ses->set_list, &next->set_list);
4150 }
4151
add_sit_entry(unsigned int segno,struct list_head * head)4152 static void add_sit_entry(unsigned int segno, struct list_head *head)
4153 {
4154 struct sit_entry_set *ses;
4155 unsigned int start_segno = START_SEGNO(segno);
4156
4157 list_for_each_entry(ses, head, set_list) {
4158 if (ses->start_segno == start_segno) {
4159 ses->entry_cnt++;
4160 adjust_sit_entry_set(ses, head);
4161 return;
4162 }
4163 }
4164
4165 ses = grab_sit_entry_set();
4166
4167 ses->start_segno = start_segno;
4168 ses->entry_cnt++;
4169 list_add(&ses->set_list, head);
4170 }
4171
add_sits_in_set(struct f2fs_sb_info * sbi)4172 static void add_sits_in_set(struct f2fs_sb_info *sbi)
4173 {
4174 struct f2fs_sm_info *sm_info = SM_I(sbi);
4175 struct list_head *set_list = &sm_info->sit_entry_set;
4176 unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap;
4177 unsigned int segno;
4178
4179 for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi))
4180 add_sit_entry(segno, set_list);
4181 }
4182
remove_sits_in_journal(struct f2fs_sb_info * sbi)4183 static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
4184 {
4185 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
4186 struct f2fs_journal *journal = curseg->journal;
4187 int i;
4188
4189 down_write(&curseg->journal_rwsem);
4190 for (i = 0; i < sits_in_cursum(journal); i++) {
4191 unsigned int segno;
4192 bool dirtied;
4193
4194 segno = le32_to_cpu(segno_in_journal(journal, i));
4195 dirtied = __mark_sit_entry_dirty(sbi, segno);
4196
4197 if (!dirtied)
4198 add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
4199 }
4200 update_sits_in_cursum(journal, -i);
4201 up_write(&curseg->journal_rwsem);
4202 }
4203
4204 /*
4205 * CP calls this function, which flushes SIT entries including sit_journal,
4206 * and moves prefree segs to free segs.
4207 */
f2fs_flush_sit_entries(struct f2fs_sb_info * sbi,struct cp_control * cpc)4208 void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
4209 {
4210 struct sit_info *sit_i = SIT_I(sbi);
4211 unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
4212 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
4213 struct f2fs_journal *journal = curseg->journal;
4214 struct sit_entry_set *ses, *tmp;
4215 struct list_head *head = &SM_I(sbi)->sit_entry_set;
4216 bool to_journal = !is_sbi_flag_set(sbi, SBI_IS_RESIZEFS);
4217 struct seg_entry *se;
4218
4219 down_write(&sit_i->sentry_lock);
4220
4221 if (!sit_i->dirty_sentries)
4222 goto out;
4223
4224 /*
4225 * add and account sit entries of dirty bitmap in sit entry
4226 * set temporarily
4227 */
4228 add_sits_in_set(sbi);
4229
4230 /*
4231 * if there are no enough space in journal to store dirty sit
4232 * entries, remove all entries from journal and add and account
4233 * them in sit entry set.
4234 */
4235 if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL) ||
4236 !to_journal)
4237 remove_sits_in_journal(sbi);
4238
4239 /*
4240 * there are two steps to flush sit entries:
4241 * #1, flush sit entries to journal in current cold data summary block.
4242 * #2, flush sit entries to sit page.
4243 */
4244 list_for_each_entry_safe(ses, tmp, head, set_list) {
4245 struct page *page = NULL;
4246 struct f2fs_sit_block *raw_sit = NULL;
4247 unsigned int start_segno = ses->start_segno;
4248 unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK,
4249 (unsigned long)MAIN_SEGS(sbi));
4250 unsigned int segno = start_segno;
4251
4252 if (to_journal &&
4253 !__has_cursum_space(journal, ses->entry_cnt, SIT_JOURNAL))
4254 to_journal = false;
4255
4256 if (to_journal) {
4257 down_write(&curseg->journal_rwsem);
4258 } else {
4259 page = get_next_sit_page(sbi, start_segno);
4260 raw_sit = page_address(page);
4261 }
4262
4263 /* flush dirty sit entries in region of current sit set */
4264 for_each_set_bit_from(segno, bitmap, end) {
4265 int offset, sit_offset;
4266
4267 se = get_seg_entry(sbi, segno);
4268 #ifdef CONFIG_F2FS_CHECK_FS
4269 if (memcmp(se->cur_valid_map, se->cur_valid_map_mir,
4270 SIT_VBLOCK_MAP_SIZE))
4271 f2fs_bug_on(sbi, 1);
4272 #endif
4273
4274 /* add discard candidates */
4275 if (!(cpc->reason & CP_DISCARD)) {
4276 cpc->trim_start = segno;
4277 add_discard_addrs(sbi, cpc, false);
4278 }
4279
4280 if (to_journal) {
4281 offset = f2fs_lookup_journal_in_cursum(journal,
4282 SIT_JOURNAL, segno, 1);
4283 f2fs_bug_on(sbi, offset < 0);
4284 segno_in_journal(journal, offset) =
4285 cpu_to_le32(segno);
4286 seg_info_to_raw_sit(se,
4287 &sit_in_journal(journal, offset));
4288 check_block_count(sbi, segno,
4289 &sit_in_journal(journal, offset));
4290 } else {
4291 sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
4292 seg_info_to_raw_sit(se,
4293 &raw_sit->entries[sit_offset]);
4294 check_block_count(sbi, segno,
4295 &raw_sit->entries[sit_offset]);
4296 }
4297
4298 __clear_bit(segno, bitmap);
4299 sit_i->dirty_sentries--;
4300 ses->entry_cnt--;
4301 }
4302
4303 if (to_journal)
4304 up_write(&curseg->journal_rwsem);
4305 else
4306 f2fs_put_page(page, 1);
4307
4308 f2fs_bug_on(sbi, ses->entry_cnt);
4309 release_sit_entry_set(ses);
4310 }
4311
4312 f2fs_bug_on(sbi, !list_empty(head));
4313 f2fs_bug_on(sbi, sit_i->dirty_sentries);
4314 out:
4315 if (cpc->reason & CP_DISCARD) {
4316 __u64 trim_start = cpc->trim_start;
4317
4318 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
4319 add_discard_addrs(sbi, cpc, false);
4320
4321 cpc->trim_start = trim_start;
4322 }
4323 up_write(&sit_i->sentry_lock);
4324
4325 set_prefree_as_free_segments(sbi);
4326 }
4327
build_sit_info(struct f2fs_sb_info * sbi)4328 static int build_sit_info(struct f2fs_sb_info *sbi)
4329 {
4330 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
4331 struct sit_info *sit_i;
4332 unsigned int sit_segs, start;
4333 char *src_bitmap, *bitmap;
4334 unsigned int bitmap_size, main_bitmap_size, sit_bitmap_size;
4335
4336 /* allocate memory for SIT information */
4337 sit_i = f2fs_kzalloc(sbi, sizeof(struct sit_info), GFP_KERNEL);
4338 if (!sit_i)
4339 return -ENOMEM;
4340
4341 SM_I(sbi)->sit_info = sit_i;
4342
4343 sit_i->sentries =
4344 f2fs_kvzalloc(sbi, array_size(sizeof(struct seg_entry),
4345 MAIN_SEGS(sbi)),
4346 GFP_KERNEL);
4347 if (!sit_i->sentries)
4348 return -ENOMEM;
4349
4350 main_bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
4351 sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(sbi, main_bitmap_size,
4352 GFP_KERNEL);
4353 if (!sit_i->dirty_sentries_bitmap)
4354 return -ENOMEM;
4355
4356 #ifdef CONFIG_F2FS_CHECK_FS
4357 bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * 4;
4358 #else
4359 bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * 3;
4360 #endif
4361 sit_i->bitmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
4362 if (!sit_i->bitmap)
4363 return -ENOMEM;
4364
4365 bitmap = sit_i->bitmap;
4366
4367 for (start = 0; start < MAIN_SEGS(sbi); start++) {
4368 sit_i->sentries[start].cur_valid_map = bitmap;
4369 bitmap += SIT_VBLOCK_MAP_SIZE;
4370
4371 sit_i->sentries[start].ckpt_valid_map = bitmap;
4372 bitmap += SIT_VBLOCK_MAP_SIZE;
4373
4374 #ifdef CONFIG_F2FS_CHECK_FS
4375 sit_i->sentries[start].cur_valid_map_mir = bitmap;
4376 bitmap += SIT_VBLOCK_MAP_SIZE;
4377 #endif
4378
4379 sit_i->sentries[start].discard_map = bitmap;
4380 bitmap += SIT_VBLOCK_MAP_SIZE;
4381 }
4382
4383 sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
4384 if (!sit_i->tmp_map)
4385 return -ENOMEM;
4386
4387 if (__is_large_section(sbi)) {
4388 sit_i->sec_entries =
4389 f2fs_kvzalloc(sbi, array_size(sizeof(struct sec_entry),
4390 MAIN_SECS(sbi)),
4391 GFP_KERNEL);
4392 if (!sit_i->sec_entries)
4393 return -ENOMEM;
4394 }
4395
4396 /* get information related with SIT */
4397 sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
4398
4399 /* setup SIT bitmap from ckeckpoint pack */
4400 sit_bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
4401 src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
4402
4403 sit_i->sit_bitmap = kmemdup(src_bitmap, sit_bitmap_size, GFP_KERNEL);
4404 if (!sit_i->sit_bitmap)
4405 return -ENOMEM;
4406
4407 #ifdef CONFIG_F2FS_CHECK_FS
4408 sit_i->sit_bitmap_mir = kmemdup(src_bitmap,
4409 sit_bitmap_size, GFP_KERNEL);
4410 if (!sit_i->sit_bitmap_mir)
4411 return -ENOMEM;
4412
4413 sit_i->invalid_segmap = f2fs_kvzalloc(sbi,
4414 main_bitmap_size, GFP_KERNEL);
4415 if (!sit_i->invalid_segmap)
4416 return -ENOMEM;
4417 #endif
4418
4419 /* init SIT information */
4420 sit_i->s_ops = &default_salloc_ops;
4421
4422 sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
4423 sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
4424 sit_i->written_valid_blocks = 0;
4425 sit_i->bitmap_size = sit_bitmap_size;
4426 sit_i->dirty_sentries = 0;
4427 sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
4428 sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
4429 sit_i->mounted_time = ktime_get_boottime_seconds();
4430 init_rwsem(&sit_i->sentry_lock);
4431 return 0;
4432 }
4433
build_free_segmap(struct f2fs_sb_info * sbi)4434 static int build_free_segmap(struct f2fs_sb_info *sbi)
4435 {
4436 struct free_segmap_info *free_i;
4437 unsigned int bitmap_size, sec_bitmap_size;
4438
4439 /* allocate memory for free segmap information */
4440 free_i = f2fs_kzalloc(sbi, sizeof(struct free_segmap_info), GFP_KERNEL);
4441 if (!free_i)
4442 return -ENOMEM;
4443
4444 SM_I(sbi)->free_info = free_i;
4445
4446 bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
4447 free_i->free_segmap = f2fs_kvmalloc(sbi, bitmap_size, GFP_KERNEL);
4448 if (!free_i->free_segmap)
4449 return -ENOMEM;
4450
4451 sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
4452 free_i->free_secmap = f2fs_kvmalloc(sbi, sec_bitmap_size, GFP_KERNEL);
4453 if (!free_i->free_secmap)
4454 return -ENOMEM;
4455
4456 /* set all segments as dirty temporarily */
4457 memset(free_i->free_segmap, 0xff, bitmap_size);
4458 memset(free_i->free_secmap, 0xff, sec_bitmap_size);
4459
4460 /* init free segmap information */
4461 free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
4462 free_i->free_segments = 0;
4463 free_i->free_sections = 0;
4464 spin_lock_init(&free_i->segmap_lock);
4465 return 0;
4466 }
4467
build_curseg(struct f2fs_sb_info * sbi)4468 static int build_curseg(struct f2fs_sb_info *sbi)
4469 {
4470 struct curseg_info *array;
4471 int i;
4472
4473 array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE,
4474 sizeof(*array)), GFP_KERNEL);
4475 if (!array)
4476 return -ENOMEM;
4477
4478 SM_I(sbi)->curseg_array = array;
4479
4480 for (i = 0; i < NO_CHECK_TYPE; i++) {
4481 mutex_init(&array[i].curseg_mutex);
4482 array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
4483 if (!array[i].sum_blk)
4484 return -ENOMEM;
4485 init_rwsem(&array[i].journal_rwsem);
4486 array[i].journal = f2fs_kzalloc(sbi,
4487 sizeof(struct f2fs_journal), GFP_KERNEL);
4488 if (!array[i].journal)
4489 return -ENOMEM;
4490 if (i < NR_PERSISTENT_LOG)
4491 array[i].seg_type = CURSEG_HOT_DATA + i;
4492 else if (i == CURSEG_COLD_DATA_PINNED)
4493 array[i].seg_type = CURSEG_COLD_DATA;
4494 else if (i == CURSEG_ALL_DATA_ATGC)
4495 array[i].seg_type = CURSEG_COLD_DATA;
4496 array[i].segno = NULL_SEGNO;
4497 array[i].next_blkoff = 0;
4498 array[i].inited = false;
4499 }
4500 return restore_curseg_summaries(sbi);
4501 }
4502
build_sit_entries(struct f2fs_sb_info * sbi)4503 static int build_sit_entries(struct f2fs_sb_info *sbi)
4504 {
4505 struct sit_info *sit_i = SIT_I(sbi);
4506 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
4507 struct f2fs_journal *journal = curseg->journal;
4508 struct seg_entry *se;
4509 struct f2fs_sit_entry sit;
4510 int sit_blk_cnt = SIT_BLK_CNT(sbi);
4511 unsigned int i, start, end;
4512 unsigned int readed, start_blk = 0;
4513 int err = 0;
4514 block_t sit_valid_blocks[2] = {0, 0};
4515
4516 do {
4517 readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES,
4518 META_SIT, true);
4519
4520 start = start_blk * sit_i->sents_per_block;
4521 end = (start_blk + readed) * sit_i->sents_per_block;
4522
4523 for (; start < end && start < MAIN_SEGS(sbi); start++) {
4524 struct f2fs_sit_block *sit_blk;
4525 struct page *page;
4526
4527 se = &sit_i->sentries[start];
4528 page = get_current_sit_page(sbi, start);
4529 if (IS_ERR(page))
4530 return PTR_ERR(page);
4531 sit_blk = (struct f2fs_sit_block *)page_address(page);
4532 sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
4533 f2fs_put_page(page, 1);
4534
4535 err = check_block_count(sbi, start, &sit);
4536 if (err)
4537 return err;
4538 seg_info_from_raw_sit(se, &sit);
4539
4540 if (se->type >= NR_PERSISTENT_LOG) {
4541 f2fs_err(sbi, "Invalid segment type: %u, segno: %u",
4542 se->type, start);
4543 return -EFSCORRUPTED;
4544 }
4545
4546 sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks;
4547
4548 /* build discard map only one time */
4549 if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
4550 memset(se->discard_map, 0xff,
4551 SIT_VBLOCK_MAP_SIZE);
4552 } else {
4553 memcpy(se->discard_map,
4554 se->cur_valid_map,
4555 SIT_VBLOCK_MAP_SIZE);
4556 sbi->discard_blks +=
4557 sbi->blocks_per_seg -
4558 se->valid_blocks;
4559 }
4560
4561 if (__is_large_section(sbi))
4562 get_sec_entry(sbi, start)->valid_blocks +=
4563 se->valid_blocks;
4564 }
4565 start_blk += readed;
4566 } while (start_blk < sit_blk_cnt);
4567
4568 down_read(&curseg->journal_rwsem);
4569 for (i = 0; i < sits_in_cursum(journal); i++) {
4570 unsigned int old_valid_blocks;
4571
4572 start = le32_to_cpu(segno_in_journal(journal, i));
4573 if (start >= MAIN_SEGS(sbi)) {
4574 f2fs_err(sbi, "Wrong journal entry on segno %u",
4575 start);
4576 err = -EFSCORRUPTED;
4577 break;
4578 }
4579
4580 se = &sit_i->sentries[start];
4581 sit = sit_in_journal(journal, i);
4582
4583 old_valid_blocks = se->valid_blocks;
4584
4585 sit_valid_blocks[SE_PAGETYPE(se)] -= old_valid_blocks;
4586
4587 err = check_block_count(sbi, start, &sit);
4588 if (err)
4589 break;
4590 seg_info_from_raw_sit(se, &sit);
4591
4592 if (se->type >= NR_PERSISTENT_LOG) {
4593 f2fs_err(sbi, "Invalid segment type: %u, segno: %u",
4594 se->type, start);
4595 err = -EFSCORRUPTED;
4596 break;
4597 }
4598
4599 sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks;
4600
4601 if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
4602 memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE);
4603 } else {
4604 memcpy(se->discard_map, se->cur_valid_map,
4605 SIT_VBLOCK_MAP_SIZE);
4606 sbi->discard_blks += old_valid_blocks;
4607 sbi->discard_blks -= se->valid_blocks;
4608 }
4609
4610 if (__is_large_section(sbi)) {
4611 get_sec_entry(sbi, start)->valid_blocks +=
4612 se->valid_blocks;
4613 get_sec_entry(sbi, start)->valid_blocks -=
4614 old_valid_blocks;
4615 }
4616 }
4617 up_read(&curseg->journal_rwsem);
4618
4619 if (err)
4620 return err;
4621
4622 if (sit_valid_blocks[NODE] != valid_node_count(sbi)) {
4623 f2fs_err(sbi, "SIT is corrupted node# %u vs %u",
4624 sit_valid_blocks[NODE], valid_node_count(sbi));
4625 return -EFSCORRUPTED;
4626 }
4627
4628 if (sit_valid_blocks[DATA] + sit_valid_blocks[NODE] >
4629 valid_user_blocks(sbi)) {
4630 f2fs_err(sbi, "SIT is corrupted data# %u %u vs %u",
4631 sit_valid_blocks[DATA], sit_valid_blocks[NODE],
4632 valid_user_blocks(sbi));
4633 return -EFSCORRUPTED;
4634 }
4635
4636 return 0;
4637 }
4638
init_free_segmap(struct f2fs_sb_info * sbi)4639 static void init_free_segmap(struct f2fs_sb_info *sbi)
4640 {
4641 unsigned int start;
4642 int type;
4643 struct seg_entry *sentry;
4644
4645 for (start = 0; start < MAIN_SEGS(sbi); start++) {
4646 if (f2fs_usable_blks_in_seg(sbi, start) == 0)
4647 continue;
4648 sentry = get_seg_entry(sbi, start);
4649 if (!sentry->valid_blocks)
4650 __set_free(sbi, start);
4651 else
4652 SIT_I(sbi)->written_valid_blocks +=
4653 sentry->valid_blocks;
4654 }
4655
4656 /* set use the current segments */
4657 for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
4658 struct curseg_info *curseg_t = CURSEG_I(sbi, type);
4659
4660 __set_test_and_inuse(sbi, curseg_t->segno);
4661 }
4662 }
4663
init_dirty_segmap(struct f2fs_sb_info * sbi)4664 static void init_dirty_segmap(struct f2fs_sb_info *sbi)
4665 {
4666 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4667 struct free_segmap_info *free_i = FREE_I(sbi);
4668 unsigned int segno = 0, offset = 0, secno;
4669 block_t valid_blocks, usable_blks_in_seg;
4670 block_t blks_per_sec = BLKS_PER_SEC(sbi);
4671
4672 while (1) {
4673 /* find dirty segment based on free segmap */
4674 segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset);
4675 if (segno >= MAIN_SEGS(sbi))
4676 break;
4677 offset = segno + 1;
4678 valid_blocks = get_valid_blocks(sbi, segno, false);
4679 usable_blks_in_seg = f2fs_usable_blks_in_seg(sbi, segno);
4680 if (valid_blocks == usable_blks_in_seg || !valid_blocks)
4681 continue;
4682 if (valid_blocks > usable_blks_in_seg) {
4683 f2fs_bug_on(sbi, 1);
4684 continue;
4685 }
4686 mutex_lock(&dirty_i->seglist_lock);
4687 __locate_dirty_segment(sbi, segno, DIRTY);
4688 mutex_unlock(&dirty_i->seglist_lock);
4689 }
4690
4691 if (!__is_large_section(sbi))
4692 return;
4693
4694 mutex_lock(&dirty_i->seglist_lock);
4695 for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
4696 valid_blocks = get_valid_blocks(sbi, segno, true);
4697 secno = GET_SEC_FROM_SEG(sbi, segno);
4698
4699 if (!valid_blocks || valid_blocks == blks_per_sec)
4700 continue;
4701 if (IS_CURSEC(sbi, secno))
4702 continue;
4703 set_bit(secno, dirty_i->dirty_secmap);
4704 }
4705 mutex_unlock(&dirty_i->seglist_lock);
4706 }
4707
init_victim_secmap(struct f2fs_sb_info * sbi)4708 static int init_victim_secmap(struct f2fs_sb_info *sbi)
4709 {
4710 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4711 unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
4712
4713 dirty_i->victim_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
4714 if (!dirty_i->victim_secmap)
4715 return -ENOMEM;
4716
4717 dirty_i->pinned_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
4718 if (!dirty_i->pinned_secmap)
4719 return -ENOMEM;
4720
4721 dirty_i->pinned_secmap_cnt = 0;
4722 dirty_i->enable_pin_section = true;
4723 return 0;
4724 }
4725
build_dirty_segmap(struct f2fs_sb_info * sbi)4726 static int build_dirty_segmap(struct f2fs_sb_info *sbi)
4727 {
4728 struct dirty_seglist_info *dirty_i;
4729 unsigned int bitmap_size, i;
4730
4731 /* allocate memory for dirty segments list information */
4732 dirty_i = f2fs_kzalloc(sbi, sizeof(struct dirty_seglist_info),
4733 GFP_KERNEL);
4734 if (!dirty_i)
4735 return -ENOMEM;
4736
4737 SM_I(sbi)->dirty_info = dirty_i;
4738 mutex_init(&dirty_i->seglist_lock);
4739
4740 bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
4741
4742 for (i = 0; i < NR_DIRTY_TYPE; i++) {
4743 dirty_i->dirty_segmap[i] = f2fs_kvzalloc(sbi, bitmap_size,
4744 GFP_KERNEL);
4745 if (!dirty_i->dirty_segmap[i])
4746 return -ENOMEM;
4747 }
4748
4749 if (__is_large_section(sbi)) {
4750 bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
4751 dirty_i->dirty_secmap = f2fs_kvzalloc(sbi,
4752 bitmap_size, GFP_KERNEL);
4753 if (!dirty_i->dirty_secmap)
4754 return -ENOMEM;
4755 }
4756
4757 init_dirty_segmap(sbi);
4758 return init_victim_secmap(sbi);
4759 }
4760
sanity_check_curseg(struct f2fs_sb_info * sbi)4761 static int sanity_check_curseg(struct f2fs_sb_info *sbi)
4762 {
4763 int i;
4764
4765 /*
4766 * In LFS/SSR curseg, .next_blkoff should point to an unused blkaddr;
4767 * In LFS curseg, all blkaddr after .next_blkoff should be unused.
4768 */
4769 for (i = 0; i < NR_PERSISTENT_LOG; i++) {
4770 struct curseg_info *curseg = CURSEG_I(sbi, i);
4771 struct seg_entry *se = get_seg_entry(sbi, curseg->segno);
4772 unsigned int blkofs = curseg->next_blkoff;
4773
4774 if (f2fs_sb_has_readonly(sbi) &&
4775 i != CURSEG_HOT_DATA && i != CURSEG_HOT_NODE)
4776 continue;
4777
4778 sanity_check_seg_type(sbi, curseg->seg_type);
4779
4780 if (curseg->alloc_type != LFS && curseg->alloc_type != SSR) {
4781 f2fs_err(sbi,
4782 "Current segment has invalid alloc_type:%d",
4783 curseg->alloc_type);
4784 return -EFSCORRUPTED;
4785 }
4786
4787 if (f2fs_test_bit(blkofs, se->cur_valid_map))
4788 goto out;
4789
4790 if (curseg->alloc_type == SSR)
4791 continue;
4792
4793 for (blkofs += 1; blkofs < sbi->blocks_per_seg; blkofs++) {
4794 if (!f2fs_test_bit(blkofs, se->cur_valid_map))
4795 continue;
4796 out:
4797 f2fs_err(sbi,
4798 "Current segment's next free block offset is inconsistent with bitmap, logtype:%u, segno:%u, type:%u, next_blkoff:%u, blkofs:%u",
4799 i, curseg->segno, curseg->alloc_type,
4800 curseg->next_blkoff, blkofs);
4801 return -EFSCORRUPTED;
4802 }
4803 }
4804 return 0;
4805 }
4806
4807 #ifdef CONFIG_BLK_DEV_ZONED
4808
check_zone_write_pointer(struct f2fs_sb_info * sbi,struct f2fs_dev_info * fdev,struct blk_zone * zone)4809 static int check_zone_write_pointer(struct f2fs_sb_info *sbi,
4810 struct f2fs_dev_info *fdev,
4811 struct blk_zone *zone)
4812 {
4813 unsigned int wp_segno, wp_blkoff, zone_secno, zone_segno, segno;
4814 block_t zone_block, wp_block, last_valid_block;
4815 unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
4816 int i, s, b, ret;
4817 struct seg_entry *se;
4818
4819 if (zone->type != BLK_ZONE_TYPE_SEQWRITE_REQ)
4820 return 0;
4821
4822 wp_block = fdev->start_blk + (zone->wp >> log_sectors_per_block);
4823 wp_segno = GET_SEGNO(sbi, wp_block);
4824 wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno);
4825 zone_block = fdev->start_blk + (zone->start >> log_sectors_per_block);
4826 zone_segno = GET_SEGNO(sbi, zone_block);
4827 zone_secno = GET_SEC_FROM_SEG(sbi, zone_segno);
4828
4829 if (zone_segno >= MAIN_SEGS(sbi))
4830 return 0;
4831
4832 /*
4833 * Skip check of zones cursegs point to, since
4834 * fix_curseg_write_pointer() checks them.
4835 */
4836 for (i = 0; i < NO_CHECK_TYPE; i++)
4837 if (zone_secno == GET_SEC_FROM_SEG(sbi,
4838 CURSEG_I(sbi, i)->segno))
4839 return 0;
4840
4841 /*
4842 * Get last valid block of the zone.
4843 */
4844 last_valid_block = zone_block - 1;
4845 for (s = sbi->segs_per_sec - 1; s >= 0; s--) {
4846 segno = zone_segno + s;
4847 se = get_seg_entry(sbi, segno);
4848 for (b = sbi->blocks_per_seg - 1; b >= 0; b--)
4849 if (f2fs_test_bit(b, se->cur_valid_map)) {
4850 last_valid_block = START_BLOCK(sbi, segno) + b;
4851 break;
4852 }
4853 if (last_valid_block >= zone_block)
4854 break;
4855 }
4856
4857 /*
4858 * If last valid block is beyond the write pointer, report the
4859 * inconsistency. This inconsistency does not cause write error
4860 * because the zone will not be selected for write operation until
4861 * it get discarded. Just report it.
4862 */
4863 if (last_valid_block >= wp_block) {
4864 f2fs_notice(sbi, "Valid block beyond write pointer: "
4865 "valid block[0x%x,0x%x] wp[0x%x,0x%x]",
4866 GET_SEGNO(sbi, last_valid_block),
4867 GET_BLKOFF_FROM_SEG0(sbi, last_valid_block),
4868 wp_segno, wp_blkoff);
4869 return 0;
4870 }
4871
4872 /*
4873 * If there is no valid block in the zone and if write pointer is
4874 * not at zone start, reset the write pointer.
4875 */
4876 if (last_valid_block + 1 == zone_block && zone->wp != zone->start) {
4877 f2fs_notice(sbi,
4878 "Zone without valid block has non-zero write "
4879 "pointer. Reset the write pointer: wp[0x%x,0x%x]",
4880 wp_segno, wp_blkoff);
4881 ret = __f2fs_issue_discard_zone(sbi, fdev->bdev, zone_block,
4882 zone->len >> log_sectors_per_block);
4883 if (ret) {
4884 f2fs_err(sbi, "Discard zone failed: %s (errno=%d)",
4885 fdev->path, ret);
4886 return ret;
4887 }
4888 }
4889
4890 return 0;
4891 }
4892
get_target_zoned_dev(struct f2fs_sb_info * sbi,block_t zone_blkaddr)4893 static struct f2fs_dev_info *get_target_zoned_dev(struct f2fs_sb_info *sbi,
4894 block_t zone_blkaddr)
4895 {
4896 int i;
4897
4898 for (i = 0; i < sbi->s_ndevs; i++) {
4899 if (!bdev_is_zoned(FDEV(i).bdev))
4900 continue;
4901 if (sbi->s_ndevs == 1 || (FDEV(i).start_blk <= zone_blkaddr &&
4902 zone_blkaddr <= FDEV(i).end_blk))
4903 return &FDEV(i);
4904 }
4905
4906 return NULL;
4907 }
4908
report_one_zone_cb(struct blk_zone * zone,unsigned int idx,void * data)4909 static int report_one_zone_cb(struct blk_zone *zone, unsigned int idx,
4910 void *data)
4911 {
4912 memcpy(data, zone, sizeof(struct blk_zone));
4913 return 0;
4914 }
4915
fix_curseg_write_pointer(struct f2fs_sb_info * sbi,int type)4916 static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type)
4917 {
4918 struct curseg_info *cs = CURSEG_I(sbi, type);
4919 struct f2fs_dev_info *zbd;
4920 struct blk_zone zone;
4921 unsigned int cs_section, wp_segno, wp_blkoff, wp_sector_off;
4922 block_t cs_zone_block, wp_block;
4923 unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
4924 sector_t zone_sector;
4925 int err;
4926
4927 cs_section = GET_SEC_FROM_SEG(sbi, cs->segno);
4928 cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section));
4929
4930 zbd = get_target_zoned_dev(sbi, cs_zone_block);
4931 if (!zbd)
4932 return 0;
4933
4934 /* report zone for the sector the curseg points to */
4935 zone_sector = (sector_t)(cs_zone_block - zbd->start_blk)
4936 << log_sectors_per_block;
4937 err = blkdev_report_zones(zbd->bdev, zone_sector, 1,
4938 report_one_zone_cb, &zone);
4939 if (err != 1) {
4940 f2fs_err(sbi, "Report zone failed: %s errno=(%d)",
4941 zbd->path, err);
4942 return err;
4943 }
4944
4945 if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ)
4946 return 0;
4947
4948 wp_block = zbd->start_blk + (zone.wp >> log_sectors_per_block);
4949 wp_segno = GET_SEGNO(sbi, wp_block);
4950 wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno);
4951 wp_sector_off = zone.wp & GENMASK(log_sectors_per_block - 1, 0);
4952
4953 if (cs->segno == wp_segno && cs->next_blkoff == wp_blkoff &&
4954 wp_sector_off == 0)
4955 return 0;
4956
4957 f2fs_notice(sbi, "Unaligned curseg[%d] with write pointer: "
4958 "curseg[0x%x,0x%x] wp[0x%x,0x%x]",
4959 type, cs->segno, cs->next_blkoff, wp_segno, wp_blkoff);
4960
4961 f2fs_notice(sbi, "Assign new section to curseg[%d]: "
4962 "curseg[0x%x,0x%x]", type, cs->segno, cs->next_blkoff);
4963
4964 f2fs_allocate_new_section(sbi, type, true);
4965
4966 /* check consistency of the zone curseg pointed to */
4967 if (check_zone_write_pointer(sbi, zbd, &zone))
4968 return -EIO;
4969
4970 /* check newly assigned zone */
4971 cs_section = GET_SEC_FROM_SEG(sbi, cs->segno);
4972 cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section));
4973
4974 zbd = get_target_zoned_dev(sbi, cs_zone_block);
4975 if (!zbd)
4976 return 0;
4977
4978 zone_sector = (sector_t)(cs_zone_block - zbd->start_blk)
4979 << log_sectors_per_block;
4980 err = blkdev_report_zones(zbd->bdev, zone_sector, 1,
4981 report_one_zone_cb, &zone);
4982 if (err != 1) {
4983 f2fs_err(sbi, "Report zone failed: %s errno=(%d)",
4984 zbd->path, err);
4985 return err;
4986 }
4987
4988 if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ)
4989 return 0;
4990
4991 if (zone.wp != zone.start) {
4992 f2fs_notice(sbi,
4993 "New zone for curseg[%d] is not yet discarded. "
4994 "Reset the zone: curseg[0x%x,0x%x]",
4995 type, cs->segno, cs->next_blkoff);
4996 err = __f2fs_issue_discard_zone(sbi, zbd->bdev,
4997 zone_sector >> log_sectors_per_block,
4998 zone.len >> log_sectors_per_block);
4999 if (err) {
5000 f2fs_err(sbi, "Discard zone failed: %s (errno=%d)",
5001 zbd->path, err);
5002 return err;
5003 }
5004 }
5005
5006 return 0;
5007 }
5008
f2fs_fix_curseg_write_pointer(struct f2fs_sb_info * sbi)5009 int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
5010 {
5011 int i, ret;
5012
5013 for (i = 0; i < NR_PERSISTENT_LOG; i++) {
5014 ret = fix_curseg_write_pointer(sbi, i);
5015 if (ret)
5016 return ret;
5017 }
5018
5019 return 0;
5020 }
5021
5022 struct check_zone_write_pointer_args {
5023 struct f2fs_sb_info *sbi;
5024 struct f2fs_dev_info *fdev;
5025 };
5026
check_zone_write_pointer_cb(struct blk_zone * zone,unsigned int idx,void * data)5027 static int check_zone_write_pointer_cb(struct blk_zone *zone, unsigned int idx,
5028 void *data)
5029 {
5030 struct check_zone_write_pointer_args *args;
5031
5032 args = (struct check_zone_write_pointer_args *)data;
5033
5034 return check_zone_write_pointer(args->sbi, args->fdev, zone);
5035 }
5036
f2fs_check_write_pointer(struct f2fs_sb_info * sbi)5037 int f2fs_check_write_pointer(struct f2fs_sb_info *sbi)
5038 {
5039 int i, ret;
5040 struct check_zone_write_pointer_args args;
5041
5042 for (i = 0; i < sbi->s_ndevs; i++) {
5043 if (!bdev_is_zoned(FDEV(i).bdev))
5044 continue;
5045
5046 args.sbi = sbi;
5047 args.fdev = &FDEV(i);
5048 ret = blkdev_report_zones(FDEV(i).bdev, 0, BLK_ALL_ZONES,
5049 check_zone_write_pointer_cb, &args);
5050 if (ret < 0)
5051 return ret;
5052 }
5053
5054 return 0;
5055 }
5056
5057 /*
5058 * Return the number of usable blocks in a segment. The number of blocks
5059 * returned is always equal to the number of blocks in a segment for
5060 * segments fully contained within a sequential zone capacity or a
5061 * conventional zone. For segments partially contained in a sequential
5062 * zone capacity, the number of usable blocks up to the zone capacity
5063 * is returned. 0 is returned in all other cases.
5064 */
f2fs_usable_zone_blks_in_seg(struct f2fs_sb_info * sbi,unsigned int segno)5065 static inline unsigned int f2fs_usable_zone_blks_in_seg(
5066 struct f2fs_sb_info *sbi, unsigned int segno)
5067 {
5068 block_t seg_start, sec_start_blkaddr, sec_cap_blkaddr;
5069 unsigned int secno;
5070
5071 if (!sbi->unusable_blocks_per_sec)
5072 return sbi->blocks_per_seg;
5073
5074 secno = GET_SEC_FROM_SEG(sbi, segno);
5075 seg_start = START_BLOCK(sbi, segno);
5076 sec_start_blkaddr = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, secno));
5077 sec_cap_blkaddr = sec_start_blkaddr + CAP_BLKS_PER_SEC(sbi);
5078
5079 /*
5080 * If segment starts before zone capacity and spans beyond
5081 * zone capacity, then usable blocks are from seg start to
5082 * zone capacity. If the segment starts after the zone capacity,
5083 * then there are no usable blocks.
5084 */
5085 if (seg_start >= sec_cap_blkaddr)
5086 return 0;
5087 if (seg_start + sbi->blocks_per_seg > sec_cap_blkaddr)
5088 return sec_cap_blkaddr - seg_start;
5089
5090 return sbi->blocks_per_seg;
5091 }
5092 #else
f2fs_fix_curseg_write_pointer(struct f2fs_sb_info * sbi)5093 int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
5094 {
5095 return 0;
5096 }
5097
f2fs_check_write_pointer(struct f2fs_sb_info * sbi)5098 int f2fs_check_write_pointer(struct f2fs_sb_info *sbi)
5099 {
5100 return 0;
5101 }
5102
f2fs_usable_zone_blks_in_seg(struct f2fs_sb_info * sbi,unsigned int segno)5103 static inline unsigned int f2fs_usable_zone_blks_in_seg(struct f2fs_sb_info *sbi,
5104 unsigned int segno)
5105 {
5106 return 0;
5107 }
5108
5109 #endif
f2fs_usable_blks_in_seg(struct f2fs_sb_info * sbi,unsigned int segno)5110 unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
5111 unsigned int segno)
5112 {
5113 if (f2fs_sb_has_blkzoned(sbi))
5114 return f2fs_usable_zone_blks_in_seg(sbi, segno);
5115
5116 return sbi->blocks_per_seg;
5117 }
5118
f2fs_usable_segs_in_sec(struct f2fs_sb_info * sbi,unsigned int segno)5119 unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi,
5120 unsigned int segno)
5121 {
5122 if (f2fs_sb_has_blkzoned(sbi))
5123 return CAP_SEGS_PER_SEC(sbi);
5124
5125 return sbi->segs_per_sec;
5126 }
5127
5128 /*
5129 * Update min, max modified time for cost-benefit GC algorithm
5130 */
init_min_max_mtime(struct f2fs_sb_info * sbi)5131 static void init_min_max_mtime(struct f2fs_sb_info *sbi)
5132 {
5133 struct sit_info *sit_i = SIT_I(sbi);
5134 unsigned int segno;
5135
5136 down_write(&sit_i->sentry_lock);
5137
5138 sit_i->min_mtime = ULLONG_MAX;
5139
5140 for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
5141 unsigned int i;
5142 unsigned long long mtime = 0;
5143
5144 for (i = 0; i < sbi->segs_per_sec; i++)
5145 mtime += get_seg_entry(sbi, segno + i)->mtime;
5146
5147 mtime = div_u64(mtime, sbi->segs_per_sec);
5148
5149 if (sit_i->min_mtime > mtime)
5150 sit_i->min_mtime = mtime;
5151 }
5152 sit_i->max_mtime = get_mtime(sbi, false);
5153 sit_i->dirty_max_mtime = 0;
5154 up_write(&sit_i->sentry_lock);
5155 }
5156
f2fs_build_segment_manager(struct f2fs_sb_info * sbi)5157 int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
5158 {
5159 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
5160 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
5161 struct f2fs_sm_info *sm_info;
5162 int err;
5163
5164 sm_info = f2fs_kzalloc(sbi, sizeof(struct f2fs_sm_info), GFP_KERNEL);
5165 if (!sm_info)
5166 return -ENOMEM;
5167
5168 /* init sm info */
5169 sbi->sm_info = sm_info;
5170 sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
5171 sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
5172 sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
5173 sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
5174 sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
5175 sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
5176 sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
5177 sm_info->rec_prefree_segments = sm_info->main_segments *
5178 DEF_RECLAIM_PREFREE_SEGMENTS / 100;
5179 if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS)
5180 sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS;
5181
5182 if (!f2fs_lfs_mode(sbi))
5183 sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
5184 sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
5185 sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
5186 sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec;
5187 sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
5188 sm_info->min_ssr_sections = reserved_sections(sbi);
5189
5190 INIT_LIST_HEAD(&sm_info->sit_entry_set);
5191
5192 init_f2fs_rwsem(&sm_info->curseg_lock);
5193
5194 if (!f2fs_readonly(sbi->sb)) {
5195 err = f2fs_create_flush_cmd_control(sbi);
5196 if (err)
5197 return err;
5198 }
5199
5200 err = create_discard_cmd_control(sbi);
5201 if (err)
5202 return err;
5203
5204 err = build_sit_info(sbi);
5205 if (err)
5206 return err;
5207 err = build_free_segmap(sbi);
5208 if (err)
5209 return err;
5210 err = build_curseg(sbi);
5211 if (err)
5212 return err;
5213
5214 /* reinit free segmap based on SIT */
5215 err = build_sit_entries(sbi);
5216 if (err)
5217 return err;
5218
5219 init_free_segmap(sbi);
5220 err = build_dirty_segmap(sbi);
5221 if (err)
5222 return err;
5223
5224 err = sanity_check_curseg(sbi);
5225 if (err)
5226 return err;
5227
5228 init_min_max_mtime(sbi);
5229 return 0;
5230 }
5231
discard_dirty_segmap(struct f2fs_sb_info * sbi,enum dirty_type dirty_type)5232 static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
5233 enum dirty_type dirty_type)
5234 {
5235 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
5236
5237 mutex_lock(&dirty_i->seglist_lock);
5238 kvfree(dirty_i->dirty_segmap[dirty_type]);
5239 dirty_i->nr_dirty[dirty_type] = 0;
5240 mutex_unlock(&dirty_i->seglist_lock);
5241 }
5242
destroy_victim_secmap(struct f2fs_sb_info * sbi)5243 static void destroy_victim_secmap(struct f2fs_sb_info *sbi)
5244 {
5245 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
5246
5247 kvfree(dirty_i->pinned_secmap);
5248 kvfree(dirty_i->victim_secmap);
5249 }
5250
destroy_dirty_segmap(struct f2fs_sb_info * sbi)5251 static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
5252 {
5253 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
5254 int i;
5255
5256 if (!dirty_i)
5257 return;
5258
5259 /* discard pre-free/dirty segments list */
5260 for (i = 0; i < NR_DIRTY_TYPE; i++)
5261 discard_dirty_segmap(sbi, i);
5262
5263 if (__is_large_section(sbi)) {
5264 mutex_lock(&dirty_i->seglist_lock);
5265 kvfree(dirty_i->dirty_secmap);
5266 mutex_unlock(&dirty_i->seglist_lock);
5267 }
5268
5269 destroy_victim_secmap(sbi);
5270 SM_I(sbi)->dirty_info = NULL;
5271 kfree(dirty_i);
5272 }
5273
destroy_curseg(struct f2fs_sb_info * sbi)5274 static void destroy_curseg(struct f2fs_sb_info *sbi)
5275 {
5276 struct curseg_info *array = SM_I(sbi)->curseg_array;
5277 int i;
5278
5279 if (!array)
5280 return;
5281 SM_I(sbi)->curseg_array = NULL;
5282 for (i = 0; i < NR_CURSEG_TYPE; i++) {
5283 kfree(array[i].sum_blk);
5284 kfree(array[i].journal);
5285 }
5286 kfree(array);
5287 }
5288
destroy_free_segmap(struct f2fs_sb_info * sbi)5289 static void destroy_free_segmap(struct f2fs_sb_info *sbi)
5290 {
5291 struct free_segmap_info *free_i = SM_I(sbi)->free_info;
5292
5293 if (!free_i)
5294 return;
5295 SM_I(sbi)->free_info = NULL;
5296 kvfree(free_i->free_segmap);
5297 kvfree(free_i->free_secmap);
5298 kfree(free_i);
5299 }
5300
destroy_sit_info(struct f2fs_sb_info * sbi)5301 static void destroy_sit_info(struct f2fs_sb_info *sbi)
5302 {
5303 struct sit_info *sit_i = SIT_I(sbi);
5304
5305 if (!sit_i)
5306 return;
5307
5308 if (sit_i->sentries)
5309 kvfree(sit_i->bitmap);
5310 kfree(sit_i->tmp_map);
5311
5312 kvfree(sit_i->sentries);
5313 kvfree(sit_i->sec_entries);
5314 kvfree(sit_i->dirty_sentries_bitmap);
5315
5316 SM_I(sbi)->sit_info = NULL;
5317 kvfree(sit_i->sit_bitmap);
5318 #ifdef CONFIG_F2FS_CHECK_FS
5319 kvfree(sit_i->sit_bitmap_mir);
5320 kvfree(sit_i->invalid_segmap);
5321 #endif
5322 kfree(sit_i);
5323 }
5324
f2fs_destroy_segment_manager(struct f2fs_sb_info * sbi)5325 void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi)
5326 {
5327 struct f2fs_sm_info *sm_info = SM_I(sbi);
5328
5329 if (!sm_info)
5330 return;
5331 f2fs_destroy_flush_cmd_control(sbi, true);
5332 destroy_discard_cmd_control(sbi);
5333 destroy_dirty_segmap(sbi);
5334 destroy_curseg(sbi);
5335 destroy_free_segmap(sbi);
5336 destroy_sit_info(sbi);
5337 sbi->sm_info = NULL;
5338 kfree(sm_info);
5339 }
5340
f2fs_create_segment_manager_caches(void)5341 int __init f2fs_create_segment_manager_caches(void)
5342 {
5343 discard_entry_slab = f2fs_kmem_cache_create("f2fs_discard_entry",
5344 sizeof(struct discard_entry));
5345 if (!discard_entry_slab)
5346 goto fail;
5347
5348 discard_cmd_slab = f2fs_kmem_cache_create("f2fs_discard_cmd",
5349 sizeof(struct discard_cmd));
5350 if (!discard_cmd_slab)
5351 goto destroy_discard_entry;
5352
5353 sit_entry_set_slab = f2fs_kmem_cache_create("f2fs_sit_entry_set",
5354 sizeof(struct sit_entry_set));
5355 if (!sit_entry_set_slab)
5356 goto destroy_discard_cmd;
5357
5358 inmem_entry_slab = f2fs_kmem_cache_create("f2fs_inmem_page_entry",
5359 sizeof(struct inmem_pages));
5360 if (!inmem_entry_slab)
5361 goto destroy_sit_entry_set;
5362 return 0;
5363
5364 destroy_sit_entry_set:
5365 kmem_cache_destroy(sit_entry_set_slab);
5366 destroy_discard_cmd:
5367 kmem_cache_destroy(discard_cmd_slab);
5368 destroy_discard_entry:
5369 kmem_cache_destroy(discard_entry_slab);
5370 fail:
5371 return -ENOMEM;
5372 }
5373
f2fs_destroy_segment_manager_caches(void)5374 void f2fs_destroy_segment_manager_caches(void)
5375 {
5376 kmem_cache_destroy(sit_entry_set_slab);
5377 kmem_cache_destroy(discard_cmd_slab);
5378 kmem_cache_destroy(discard_entry_slab);
5379 kmem_cache_destroy(inmem_entry_slab);
5380 }
5381